From: Lynne <dev@lynne.ee> To: Ffmpeg Devel <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding Date: Fri, 17 Feb 2023 04:43:50 +0100 (CET) Message-ID: <NOST85t--3-9@lynne.ee> (raw) [-- Attachment #1: Type: text/plain, Size: 338 bytes --] This small patchset mostly rewrites Vulkan to enable using multiplane images, and implements video decode support. Also, many numerous bugs and issues were fixed, as well as having quite a lot of performance improvements. The patchset can be viewed here as well: https://github.com/cyanreg/FFmpeg/tree/vulkan_staging Patches attached. [-- Attachment #2: 0001-h2645_vui-expose-aspect_ratio_idc.patch --] [-- Type: text/x-diff, Size: 1857 bytes --] From a03d8aa0e2aa961183440e85de3f4922b14f8075 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 00:02:11 +0100 Subject: [PATCH 01/72] h2645_vui: expose aspect_ratio_idc --- libavcodec/h2645_vui.c | 10 +++++----- libavcodec/h2645_vui.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c index 0633fcbddd..93e83a9e1f 100644 --- a/libavcodec/h2645_vui.c +++ b/libavcodec/h2645_vui.c @@ -42,15 +42,15 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *l aspect_ratio_info_present_flag = get_bits1(gb); if (aspect_ratio_info_present_flag) { - uint8_t aspect_ratio_idc = get_bits(gb, 8); - if (aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) - vui->sar = ff_h2645_pixel_aspect[aspect_ratio_idc]; - else if (aspect_ratio_idc == EXTENDED_SAR) { + vui->aspect_ratio_idc = get_bits(gb, 8); + if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) + vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc]; + else if (vui->aspect_ratio_idc == EXTENDED_SAR) { vui->sar.num = get_bits(gb, 16); vui->sar.den = get_bits(gb, 16); } else av_log(logctx, AV_LOG_WARNING, - "Unknown SAR index: %u.\n", aspect_ratio_idc); + "Unknown SAR index: %u.\n", vui->aspect_ratio_idc); } else vui->sar = (AVRational){ 0, 1 }; diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h index 638da7c366..f1aeab7758 100644 --- a/libavcodec/h2645_vui.h +++ b/libavcodec/h2645_vui.h @@ -26,6 +26,7 @@ typedef struct H2645VUI { AVRational sar; + int aspect_ratio_idc; int overscan_info_present_flag; int overscan_appropriate_flag; -- 2.39.2 [-- Attachment #3: 0002-h2645_vui-expose-aspect_ratio_info_present_flag.patch --] [-- Type: text/x-diff, Size: 1469 bytes --] From 42ff928100caea41ffa55ea2c8a8181de39306b7 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 00:03:44 +0100 Subject: [PATCH 02/72] h2645_vui: expose aspect_ratio_info_present_flag --- libavcodec/h2645_vui.c | 6 ++---- libavcodec/h2645_vui.h | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c index 93e83a9e1f..e5c7bf46f9 100644 --- a/libavcodec/h2645_vui.c +++ b/libavcodec/h2645_vui.c @@ -36,12 +36,10 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *logctx) { - int aspect_ratio_info_present_flag; - av_log(logctx, AV_LOG_DEBUG, "Decoding VUI\n"); - aspect_ratio_info_present_flag = get_bits1(gb); - if (aspect_ratio_info_present_flag) { + vui->aspect_ratio_info_present_flag = get_bits1(gb); + if (vui->aspect_ratio_info_present_flag) { vui->aspect_ratio_idc = get_bits(gb, 8); if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect)) vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc]; diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h index f1aeab7758..2c839f4b01 100644 --- a/libavcodec/h2645_vui.h +++ b/libavcodec/h2645_vui.h @@ -27,6 +27,7 @@ typedef struct H2645VUI { AVRational sar; int aspect_ratio_idc; + int aspect_ratio_info_present_flag; int overscan_info_present_flag; int overscan_appropriate_flag; -- 2.39.2 [-- Attachment #4: 0003-h264_ps-expose-pps_id.patch --] [-- Type: text/x-diff, Size: 1226 bytes --] From 5e115cd41e2221cc8048932dfed362be6f80b74b Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 18 Mar 2022 15:11:02 +0100 Subject: [PATCH 03/72] h264_ps: expose pps_id --- libavcodec/h264_ps.c | 1 + libavcodec/h264_ps.h | 1 + 2 files changed, 2 insertions(+) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index d0d1e65903..4ec5bd4e80 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -731,6 +731,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct if (!(bit_length & 7) && pps->data_size < sizeof(pps->data)) pps->data[pps->data_size++] = 0x80; + pps->pps_id = pps_id; pps->sps_id = get_ue_golomb_31(gb); if ((unsigned)pps->sps_id >= MAX_SPS_COUNT || !ps->sps_list[pps->sps_id]) { diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index 5c35761fbc..c3f0888f24 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -103,6 +103,7 @@ typedef struct SPS { * Picture parameter set */ typedef struct PPS { + unsigned int pps_id; unsigned int sps_id; int cabac; ///< entropy_coding_mode_flag int pic_order_present; ///< pic_order_present_flag -- 2.39.2 [-- Attachment #5: 0004-h264_ps-set-pic_scaling_matrix_present_flag.patch --] [-- Type: text/x-diff, Size: 3223 bytes --] From 2720b9ff2a3d95c5d5887c2e06161de1691fc085 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 18 Mar 2022 16:17:33 +0100 Subject: [PATCH 04/72] h264_ps: set pic_scaling_matrix_present_flag --- libavcodec/h264_ps.c | 7 +++++-- libavcodec/h264_ps.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index 4ec5bd4e80..a94f5350c4 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -226,6 +226,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size, /* returns non zero if the provided SPS scaling matrix has been filled */ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, const PPS *pps, int is_sps, + int present_flag, uint8_t(*scaling_matrix4)[16], uint8_t(*scaling_matrix8)[64]) { @@ -237,7 +238,7 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1] }; int ret = 0; - if (get_bits1(gb)) { + if (present_flag) { ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb @@ -368,7 +369,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx, goto fail; } sps->transform_bypass = get_bits1(gb); - ret = decode_scaling_matrices(gb, sps, NULL, 1, + ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb), sps->scaling_matrix4, sps->scaling_matrix8); if (ret < 0) goto fail; @@ -803,7 +804,9 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct bits_left = bit_length - get_bits_count(gb); if (bits_left > 0 && more_rbsp_data_in_pps(sps, avctx)) { pps->transform_8x8_mode = get_bits1(gb); + pps->pic_scaling_matrix_present_flag = get_bits1(gb); ret = decode_scaling_matrices(gb, sps, pps, 0, + pps->pic_scaling_matrix_present_flag, pps->scaling_matrix4, pps->scaling_matrix8); if (ret < 0) goto fail; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index c3f0888f24..d2413ae0f8 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -119,6 +119,7 @@ typedef struct PPS { int constrained_intra_pred; ///< constrained_intra_pred_flag int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag int transform_8x8_mode; ///< transform_8x8_mode_flag + int pic_scaling_matrix_present_flag; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table -- 2.39.2 [-- Attachment #6: 0005-h264_parser-expose-idr_pic_id.patch --] [-- Type: text/x-diff, Size: 1437 bytes --] From a9ae85816dfaa8791f974348825fc8ba9209423d Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 10 Mar 2022 18:08:53 +0100 Subject: [PATCH 05/72] h264_parser: expose idr_pic_id Vulkan needs it. --- libavcodec/h264_parse.h | 1 + libavcodec/h264_parser.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/h264_parse.h b/libavcodec/h264_parse.h index 4ee863df66..4ba0add4f2 100644 --- a/libavcodec/h264_parse.h +++ b/libavcodec/h264_parse.h @@ -85,6 +85,7 @@ typedef struct H264POCContext { int delta_poc_bottom; int delta_poc[2]; int frame_num; + int idr_pic_id; int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 int frame_num_offset; ///< for POC type 2 diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c index 46134a1c48..1c330484c1 100644 --- a/libavcodec/h264_parser.c +++ b/libavcodec/h264_parser.c @@ -432,7 +432,7 @@ static inline int parse_nal_units(AVCodecParserContext *s, } if (nal.type == H264_NAL_IDR_SLICE) - get_ue_golomb_long(&nal.gb); /* idr_pic_id */ + p->poc.idr_pic_id = get_ue_golomb_long(&nal.gb); /* idr_pic_id */ if (sps->poc_type == 0) { p->poc.poc_lsb = get_bits(&nal.gb, sps->log2_max_poc_lsb); -- 2.39.2 [-- Attachment #7: 0006-h264_ps-comment-pic_order_present-better.patch --] [-- Type: text/x-diff, Size: 997 bytes --] From e42521563191a899d21fbf24e461bc6cb89661e9 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 15:59:23 +0100 Subject: [PATCH 06/72] h264_ps: comment pic_order_present better The official name which CBS uses is bottom_field_pic_order_in_frame_present_flag. --- libavcodec/h264_ps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index d2413ae0f8..de4529b353 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -106,7 +106,7 @@ typedef struct PPS { unsigned int pps_id; unsigned int sps_id; int cabac; ///< entropy_coding_mode_flag - int pic_order_present; ///< pic_order_present_flag + int pic_order_present; ///< bottom_field_pic_order_in_frame_present_flag int slice_group_count; ///< num_slice_groups_minus1 + 1 int mb_slice_group_map_type; unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1 -- 2.39.2 [-- Attachment #8: 0007-h264_ps-expose-max_dec_frame_buffering.patch --] [-- Type: text/x-diff, Size: 1396 bytes --] From e222eaa26f4d8fd36dd04525d754dbf4800c502a Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 00:06:04 +0100 Subject: [PATCH 07/72] h264_ps: expose max_dec_frame_buffering --- libavcodec/h264_ps.c | 2 +- libavcodec/h264_ps.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index a94f5350c4..d9df570718 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -176,7 +176,7 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx, get_ue_golomb_31(gb); /* log2_max_mv_length_horizontal */ get_ue_golomb_31(gb); /* log2_max_mv_length_vertical */ sps->num_reorder_frames = get_ue_golomb_31(gb); - get_ue_golomb_31(gb); /*max_dec_frame_buffering*/ + sps->max_dec_frame_buffering = get_ue_golomb_31(gb); if (get_bits_left(gb) < 0) { sps->num_reorder_frames = 0; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index de4529b353..906bab7214 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -80,6 +80,7 @@ typedef struct SPS { int32_t offset_for_ref_frame[256]; int bitstream_restriction_flag; int num_reorder_frames; + int max_dec_frame_buffering; int scaling_matrix_present; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; -- 2.39.2 [-- Attachment #9: 0008-h264_ps-expose-bit-rate-and-CPB-size-fields.patch --] [-- Type: text/x-diff, Size: 2114 bytes --] From 1279c6011c610fdb054cd9eea7a6f07c94f69f29 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 00:09:08 +0100 Subject: [PATCH 08/72] h264_ps: expose bit rate and CPB size fields --- libavcodec/h264_ps.c | 8 ++++---- libavcodec/h264_ps.h | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index d9df570718..fc8715876a 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -113,12 +113,12 @@ static inline int decode_hrd_parameters(GetBitContext *gb, void *logctx, return AVERROR_INVALIDDATA; } - get_bits(gb, 4); /* bit_rate_scale */ + sps->bit_rate_scale = get_bits(gb, 4); get_bits(gb, 4); /* cpb_size_scale */ for (i = 0; i < cpb_count; i++) { - get_ue_golomb_long(gb); /* bit_rate_value_minus1 */ - get_ue_golomb_long(gb); /* cpb_size_value_minus1 */ - get_bits1(gb); /* cbr_flag */ + sps->bit_rate_value[i] = get_ue_golomb_long(gb) + 1; /* bit_rate_value_minus1 + 1 */ + sps->cpb_size_value[i] = get_ue_golomb_long(gb) + 1; /* cpb_size_value_minus1 + 1 */ + sps->cpr_flag[i] = get_bits1(gb); } sps->initial_cpb_removal_delay_length = get_bits(gb, 5) + 1; sps->cpb_removal_delay_length = get_bits(gb, 5) + 1; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index 906bab7214..03bd0227d6 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -89,6 +89,10 @@ typedef struct SPS { int pic_struct_present_flag; int time_offset_length; int cpb_cnt; ///< See H.264 E.1.2 + int bit_rate_scale; + uint32_t bit_rate_value[32]; ///< bit_rate_value_minus1 + 1 + uint32_t cpb_size_value[32]; ///< cpb_size_value_minus1 + 1 + uint8_t cpr_flag[32]; int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1 int cpb_removal_delay_length; ///< cpb_removal_delay_length_minus1 + 1 int dpb_output_delay_length; ///< dpb_output_delay_length_minus1 + 1 -- 2.39.2 [-- Attachment #10: 0009-h264_ps-expose-scaling_matrix_present_mask.patch --] [-- Type: text/x-diff, Size: 7404 bytes --] From 3ef9965fe2fa33942eb5b5def748f3f6bf9e0afb Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 15 Dec 2022 17:05:35 +0100 Subject: [PATCH 09/72] h264_ps: expose scaling_matrix_present_mask Vulkan requires it. It technically also requires use_default_scaling_matrix_mask, but we can just be explicit and give it the matrix we fill in as-non default. --- libavcodec/h264_ps.c | 37 +++++++++++++++++++++---------------- libavcodec/h264_ps.h | 2 ++ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c index fc8715876a..9f26514167 100644 --- a/libavcodec/h264_ps.c +++ b/libavcodec/h264_ps.c @@ -197,12 +197,14 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx, } static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size, - const uint8_t *jvt_list, - const uint8_t *fallback_list) + const uint8_t *jvt_list, const uint8_t *fallback_list, + uint16_t *mask, int pos) { int i, last = 8, next = 8; const uint8_t *scan = size == 16 ? ff_zigzag_scan : ff_zigzag_direct; - if (!get_bits1(gb)) /* matrix not written, we use the predicted one */ + uint16_t seq_scaling_list_present_flag = get_bits1(gb); + *mask |= (seq_scaling_list_present_flag << pos); + if (!seq_scaling_list_present_flag) /* matrix not written, we use the predicted one */ memcpy(factors, fallback_list, size * sizeof(uint8_t)); else for (i = 0; i < size; i++) { @@ -226,7 +228,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size, /* returns non zero if the provided SPS scaling matrix has been filled */ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, const PPS *pps, int is_sps, - int present_flag, + int present_flag, uint16_t *mask, uint8_t(*scaling_matrix4)[16], uint8_t(*scaling_matrix8)[64]) { @@ -238,21 +240,22 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps, fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1] }; int ret = 0; + *mask = 0x0; if (present_flag) { - ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y - ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr - ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb - ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1]); // Inter, Y - ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3]); // Inter, Cr - ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4]); // Inter, Cb + ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0], mask, 0); // Intra, Y + ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0], mask, 1); // Intra, Cr + ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1], mask, 2); // Intra, Cb + ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1], mask, 3); // Inter, Y + ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3], mask, 4); // Inter, Cr + ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4], mask, 5); // Inter, Cb if (is_sps || pps->transform_8x8_mode) { - ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2]); // Intra, Y - ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3]); // Inter, Y + ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2], mask, 6); // Intra, Y + ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3], mask, 7); // Inter, Y if (sps->chroma_format_idc == 3) { - ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0]); // Intra, Cr - ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3]); // Inter, Cr - ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1]); // Intra, Cb - ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4]); // Inter, Cb + ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0], mask, 8); // Intra, Cr + ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3], mask, 9); // Inter, Cr + ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1], mask, 10); // Intra, Cb + ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4], mask, 11); // Inter, Cb } } if (!ret) @@ -370,6 +373,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx, } sps->transform_bypass = get_bits1(gb); ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb), + &sps->scaling_matrix_present_mask, sps->scaling_matrix4, sps->scaling_matrix8); if (ret < 0) goto fail; @@ -807,6 +811,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct pps->pic_scaling_matrix_present_flag = get_bits1(gb); ret = decode_scaling_matrices(gb, sps, pps, 0, pps->pic_scaling_matrix_present_flag, + &pps->pic_scaling_matrix_present_mask, pps->scaling_matrix4, pps->scaling_matrix8); if (ret < 0) goto fail; diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h index 03bd0227d6..60ca9b3cd7 100644 --- a/libavcodec/h264_ps.h +++ b/libavcodec/h264_ps.h @@ -82,6 +82,7 @@ typedef struct SPS { int num_reorder_frames; int max_dec_frame_buffering; int scaling_matrix_present; + uint16_t scaling_matrix_present_mask; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; int nal_hrd_parameters_present_flag; @@ -125,6 +126,7 @@ typedef struct PPS { int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag int transform_8x8_mode; ///< transform_8x8_mode_flag int pic_scaling_matrix_present_flag; + uint16_t pic_scaling_matrix_present_mask; uint8_t scaling_matrix4[6][16]; uint8_t scaling_matrix8[6][64]; uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table -- 2.39.2 [-- Attachment #11: 0010-h264dec-track-picture_structure-in-H264Picture.patch --] [-- Type: text/x-diff, Size: 2132 bytes --] From 52ab3cd8d165a838be92189c87c54915efc1c7e5 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 11 Jan 2023 05:20:32 +0100 Subject: [PATCH 10/72] h264dec: track picture_structure in H264Picture --- libavcodec/h264_picture.c | 1 + libavcodec/h264_slice.c | 1 + libavcodec/h264dec.h | 1 + 3 files changed, 3 insertions(+) diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c index 2661ff4698..0348166c43 100644 --- a/libavcodec/h264_picture.c +++ b/libavcodec/h264_picture.c @@ -80,6 +80,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src) dst->mbaff = src->mbaff; dst->field_picture = src->field_picture; dst->reference = src->reference; + dst->picture_structure = src->picture_structure; dst->recovered = src->recovered; dst->invalid_gap = src->invalid_gap; dst->sei_recovery_frame_cnt = src->sei_recovery_frame_cnt; diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 6188c74632..8ac66b343c 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -491,6 +491,7 @@ static int h264_frame_start(H264Context *h) pic->reference = h->droppable ? 0 : h->picture_structure; pic->f->coded_picture_number = h->coded_picture_number++; pic->field_picture = h->picture_structure != PICT_FRAME; + pic->picture_structure = h->picture_structure; pic->frame_num = h->poc.frame_num; /* * Zero key_frame here; IDR markings per slice in frame or fields are ORed diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h index 9a1ec1bace..1b18aba71f 100644 --- a/libavcodec/h264dec.h +++ b/libavcodec/h264dec.h @@ -137,6 +137,7 @@ typedef struct H264Picture { int ref_count[2][2]; ///< number of entries in ref_poc (FIXME need per slice) int mbaff; ///< 1 -> MBAFF frame 0-> not MBAFF int field_picture; ///< whether or not picture was encoded in separate fields + int picture_structure; ///< picture structure /** * H264Picture.reference has this flag set, -- 2.39.2 [-- Attachment #12: 0011-hevc_ps-expose-SPS-and-VPS-headers.patch --] [-- Type: text/x-diff, Size: 9068 bytes --] From d80272e0759b686942f51b1c0c7615edb6a81bc6 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 7 Dec 2022 01:29:57 +0100 Subject: [PATCH 11/72] hevc_ps: expose SPS and VPS headers --- libavcodec/hevc_ps.c | 100 ++++++++++++++++++++++--------------------- libavcodec/hevc_ps.h | 41 ++++++++++++++++++ 2 files changed, 93 insertions(+), 48 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 5fe62ec35b..bd1f278b06 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -355,81 +355,84 @@ static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx, } static void decode_sublayer_hrd(GetBitContext *gb, unsigned int nb_cpb, - int subpic_params_present) + HEVCSublayerHdrParams *par, int subpic_params_present) { int i; for (i = 0; i < nb_cpb; i++) { - get_ue_golomb_long(gb); // bit_rate_value_minus1 - get_ue_golomb_long(gb); // cpb_size_value_minus1 + par->bit_rate_value_minus1[i] = get_ue_golomb_long(gb); + par->cpb_size_value_minus1[i] = get_ue_golomb_long(gb); if (subpic_params_present) { - get_ue_golomb_long(gb); // cpb_size_du_value_minus1 - get_ue_golomb_long(gb); // bit_rate_du_value_minus1 + par->cpb_size_du_value_minus1[i] = get_ue_golomb_long(gb); + par->bit_rate_du_value_minus1[i] = get_ue_golomb_long(gb); } - skip_bits1(gb); // cbr_flag + + par->cbr_flag = get_bits1(gb); } } static int decode_hrd(GetBitContext *gb, int common_inf_present, - int max_sublayers) + HEVCHdrParams *hdr, int max_sublayers) { - int nal_params_present = 0, vcl_params_present = 0; - int subpic_params_present = 0; - int i; - if (common_inf_present) { - nal_params_present = get_bits1(gb); - vcl_params_present = get_bits1(gb); - - if (nal_params_present || vcl_params_present) { - subpic_params_present = get_bits1(gb); - - if (subpic_params_present) { - skip_bits(gb, 8); // tick_divisor_minus2 - skip_bits(gb, 5); // du_cpb_removal_delay_increment_length_minus1 - skip_bits(gb, 1); // sub_pic_cpb_params_in_pic_timing_sei_flag - skip_bits(gb, 5); // dpb_output_delay_du_length_minus1 + hdr->flags.nal_hrd_parameters_present_flag = get_bits1(gb); + hdr->flags.vcl_hrd_parameters_present_flag = get_bits1(gb); + + if (hdr->flags.nal_hrd_parameters_present_flag || + hdr->flags.vcl_hrd_parameters_present_flag) { + hdr->flags.sub_pic_hrd_params_present_flag = get_bits1(gb); + + if (hdr->flags.sub_pic_hrd_params_present_flag) { + hdr->tick_divisor_minus2 = get_bits(gb, 8); + hdr->du_cpb_removal_delay_increment_length_minus1 = get_bits(gb, 5); + hdr->flags.sub_pic_cpb_params_in_pic_timing_sei_flag = get_bits1(gb); + hdr->dpb_output_delay_du_length_minus1 = get_bits(gb, 5); } - skip_bits(gb, 4); // bit_rate_scale - skip_bits(gb, 4); // cpb_size_scale + hdr->bit_rate_scale = get_bits(gb, 4); + hdr->cpb_size_scale = get_bits(gb, 4); - if (subpic_params_present) - skip_bits(gb, 4); // cpb_size_du_scale + if (hdr->flags.sub_pic_hrd_params_present_flag) + hdr->cpb_size_du_scale = get_bits(gb, 4); - skip_bits(gb, 5); // initial_cpb_removal_delay_length_minus1 - skip_bits(gb, 5); // au_cpb_removal_delay_length_minus1 - skip_bits(gb, 5); // dpb_output_delay_length_minus1 + hdr->initial_cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hdr->au_cpb_removal_delay_length_minus1 = get_bits(gb, 5); + hdr->dpb_output_delay_length_minus1 = get_bits(gb, 5); } } - for (i = 0; i < max_sublayers; i++) { - int low_delay = 0; - unsigned int nb_cpb = 1; - int fixed_rate = get_bits1(gb); + for (int i = 0; i < max_sublayers; i++) { + hdr->flags.fixed_pic_rate_general_flag = get_bits1(gb); + + hdr->cpb_cnt_minus1[i] = 1; - if (!fixed_rate) - fixed_rate = get_bits1(gb); + if (!hdr->flags.fixed_pic_rate_general_flag) + hdr->flags.fixed_pic_rate_within_cvs_flag = get_bits1(gb); - if (fixed_rate) - get_ue_golomb_long(gb); // elemental_duration_in_tc_minus1 + if (hdr->flags.fixed_pic_rate_within_cvs_flag) + hdr->elemental_duration_in_tc_minus1[i] = get_ue_golomb_long(gb); else - low_delay = get_bits1(gb); + hdr->flags.low_delay_hrd_flag = get_bits1(gb); - if (!low_delay) { - nb_cpb = get_ue_golomb_long(gb) + 1; - if (nb_cpb < 1 || nb_cpb > 32) { - av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb); + if (!hdr->flags.low_delay_hrd_flag) { + hdr->cpb_cnt_minus1[i] = get_ue_golomb_long(gb); + if (hdr->cpb_cnt_minus1[i] > 31) { + av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", + hdr->cpb_cnt_minus1[i]); return AVERROR_INVALIDDATA; } } - if (nal_params_present) - decode_sublayer_hrd(gb, nb_cpb, subpic_params_present); - if (vcl_params_present) - decode_sublayer_hrd(gb, nb_cpb, subpic_params_present); + if (hdr->flags.nal_hrd_parameters_present_flag) + decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->nal_params[i], + hdr->flags.sub_pic_hrd_params_present_flag); + + if (hdr->flags.vcl_hrd_parameters_present_flag) + decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->vcl_params[i], + hdr->flags.sub_pic_hrd_params_present_flag); } + return 0; } @@ -536,7 +539,8 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx, get_ue_golomb_long(gb); // hrd_layer_set_idx if (i) common_inf_present = get_bits1(gb); - decode_hrd(gb, common_inf_present, vps->vps_max_sub_layers); + decode_hrd(gb, common_inf_present, &vps->hdr[i], + vps->vps_max_sub_layers); } } get_bits1(gb); /* vps_extension_flag */ @@ -655,7 +659,7 @@ timing_info: vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb); vui->vui_hrd_parameters_present_flag = get_bits1(gb); if (vui->vui_hrd_parameters_present_flag) - decode_hrd(gb, 1, sps->max_sub_layers); + decode_hrd(gb, 1, &sps->hdr, sps->max_sub_layers); } vui->bitstream_restriction_flag = get_bits1(gb); diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 18894cfed1..b61d3b32b3 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -32,6 +32,43 @@ #include "h2645_vui.h" #include "hevc.h" +typedef struct HEVCSublayerHdrParams { + uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cpb_size_du_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t bit_rate_du_value_minus1[HEVC_MAX_CPB_CNT]; + uint32_t cbr_flag; +} HEVCSublayerHdrParams; + +typedef struct HEVCHdrFlagParams { + uint32_t nal_hrd_parameters_present_flag; + uint32_t vcl_hrd_parameters_present_flag; + uint32_t sub_pic_hrd_params_present_flag; + uint32_t sub_pic_cpb_params_in_pic_timing_sei_flag; + uint32_t fixed_pic_rate_general_flag; + uint32_t fixed_pic_rate_within_cvs_flag; + uint32_t low_delay_hrd_flag; +} HEVCHdrFlagParams; + +typedef struct HEVCHdrParams { + HEVCHdrFlagParams flags; + + uint8_t tick_divisor_minus2; + uint8_t du_cpb_removal_delay_increment_length_minus1; + uint8_t dpb_output_delay_du_length_minus1; + uint8_t bit_rate_scale; + uint8_t cpb_size_scale; + uint8_t cpb_size_du_scale; + uint8_t initial_cpb_removal_delay_length_minus1; + uint8_t au_cpb_removal_delay_length_minus1; + uint8_t dpb_output_delay_length_minus1; + uint8_t cpb_cnt_minus1[HEVC_MAX_SUB_LAYERS]; + uint16_t elemental_duration_in_tc_minus1[HEVC_MAX_SUB_LAYERS]; + + HEVCSublayerHdrParams nal_params[HEVC_MAX_SUB_LAYERS]; + HEVCSublayerHdrParams vcl_params[HEVC_MAX_SUB_LAYERS]; +} HEVCHdrParams; + typedef struct ShortTermRPS { unsigned int num_negative_pics; int num_delta_pocs; @@ -108,6 +145,8 @@ typedef struct PTL { } PTL; typedef struct HEVCVPS { + HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS]; + uint8_t vps_temporal_id_nesting_flag; int vps_max_layers; int vps_max_sub_layers; ///< vps_max_temporal_layers_minus1 + 1 @@ -146,6 +185,8 @@ typedef struct HEVCSPS { HEVCWindow pic_conf_win; + HEVCHdrParams hdr; + int bit_depth; int bit_depth_chroma; int pixel_shift; -- 2.39.2 [-- Attachment #13: 0012-hevc_ps-expose-pps_id.patch --] [-- Type: text/x-diff, Size: 1213 bytes --] From d6e2ac33861642ac5dfa651963874c0f65d9b49b Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 7 Dec 2022 05:33:29 +0100 Subject: [PATCH 12/72] hevc_ps: expose pps_id --- libavcodec/hevc_ps.c | 2 +- libavcodec/hevc_ps.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index bd1f278b06..3242904473 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -1486,7 +1486,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, pps->log2_max_transform_skip_block_size = 2; // Coded parameters - pps_id = get_ue_golomb_long(gb); + pps_id = pps->pps_id = get_ue_golomb_long(gb); if (pps_id >= HEVC_MAX_PPS_COUNT) { av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id); ret = AVERROR_INVALIDDATA; diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index b61d3b32b3..4cfcbcf9ae 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -275,6 +275,7 @@ typedef struct HEVCSPS { } HEVCSPS; typedef struct HEVCPPS { + unsigned int pps_id; unsigned int sps_id; ///< seq_parameter_set_id uint8_t sign_data_hiding_flag; -- 2.39.2 [-- Attachment #14: 0013-hevc_ps-expose-vps_id.patch --] [-- Type: text/x-diff, Size: 1162 bytes --] From a09e6d7611f6e89ea3107c4581b27715a7ca480d Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 7 Dec 2022 06:42:44 +0100 Subject: [PATCH 13/72] hevc_ps: expose vps_id --- libavcodec/hevc_ps.c | 2 +- libavcodec/hevc_ps.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 3242904473..a26f2940fc 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -462,7 +462,7 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx, } memcpy(vps->data, gb->buffer, vps->data_size); - vps_id = get_bits(gb, 4); + vps_id = vps->vps_id = get_bits(gb, 4); if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n"); diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 4cfcbcf9ae..571657d7fd 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -145,6 +145,7 @@ typedef struct PTL { } PTL; typedef struct HEVCVPS { + unsigned int vps_id; HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS]; uint8_t vps_temporal_id_nesting_flag; -- 2.39.2 [-- Attachment #15: 0014-hevc_ps-expose-pps_extension_present_flag.patch --] [-- Type: text/x-diff, Size: 1512 bytes --] From 73a6b7e49ba8f01aefe2b7c152b2e2d04edaa3ee Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 7 Dec 2022 12:49:45 +0100 Subject: [PATCH 14/72] hevc_ps: expose pps_extension_present_flag --- libavcodec/hevc_ps.c | 3 ++- libavcodec/hevc_ps.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index a26f2940fc..b1247bad67 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -1659,7 +1659,8 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, pps->slice_header_extension_present_flag = get_bits1(gb); - if (get_bits1(gb)) { // pps_extension_present_flag + pps->pps_extension_present_flag = get_bits1(gb); + if (pps->pps_extension_present_flag) { pps->pps_range_extensions_flag = get_bits1(gb); skip_bits(gb, 7); // pps_extension_7bits if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) { diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 571657d7fd..f221640531 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -326,6 +326,7 @@ typedef struct HEVCPPS { int num_extra_slice_header_bits; uint8_t slice_header_extension_present_flag; uint8_t log2_max_transform_skip_block_size; + uint8_t pps_extension_present_flag; uint8_t pps_range_extensions_flag; uint8_t cross_component_prediction_enabled_flag; uint8_t chroma_qp_offset_list_enabled_flag; -- 2.39.2 [-- Attachment #16: 0015-hevcdec-expose-bits_used_for_short_term_rps.patch --] [-- Type: text/x-diff, Size: 1228 bytes --] From 68e33940f494112e359f6a0a769083c1dd82a1c4 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 7 Dec 2022 17:11:36 +0100 Subject: [PATCH 15/72] hevcdec: expose bits_used_for_short_term_rps --- libavcodec/hevcdec.c | 1 + libavcodec/hevcdec.h | 1 + 2 files changed, 2 insertions(+) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 567e8d81d4..43cd963175 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -702,6 +702,7 @@ static int hls_slice_header(HEVCContext *s) if (ret < 0) return ret; + sh->bits_used_for_short_term_rps = pos - get_bits_left(gb); sh->short_term_rps = &sh->slice_rps; } else { int numbits, rps_idx; diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index 9d3f4adbb3..15c4113bdd 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -268,6 +268,7 @@ typedef struct SliceHeader { ///< RPS coded in the slice header itself is stored here int short_term_ref_pic_set_sps_flag; + int bits_used_for_short_term_rps; int short_term_ref_pic_set_size; ShortTermRPS slice_rps; const ShortTermRPS *short_term_rps; -- 2.39.2 [-- Attachment #17: 0016-hevc_ps-expose-vui_present-sublayer_ordering_info-co.patch --] [-- Type: text/x-diff, Size: 4332 bytes --] From 46f18bf6af9e8ed0aaa82085a06b31dc8565e0df Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 00:17:51 +0100 Subject: [PATCH 16/72] hevc_ps: expose vui_present, sublayer_ordering_info, conformance_window_flag --- libavcodec/hevc_ps.c | 18 ++++++++++-------- libavcodec/hevc_ps.h | 4 ++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index b1247bad67..a740da9f82 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -855,7 +855,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, HEVCWindow *ow; int ret = 0; int log2_diff_max_min_transform_block_size; - int bit_depth_chroma, start, vui_present, sublayer_ordering_info; + int bit_depth_chroma, start; int i; // Coded parameters @@ -904,7 +904,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->height, 0, avctx)) < 0) return ret; - if (get_bits1(gb)) { // pic_conformance_flag + sps->conformance_window_flag = get_bits1(gb); + if (sps->conformance_window_flag) { // pic_conformance_flag int vert_mult = hevc_sub_height_c[sps->chroma_format_idc]; int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc]; sps->pic_conf_win.left_offset = get_ue_golomb_long(gb) * horiz_mult; @@ -951,8 +952,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - sublayer_ordering_info = get_bits1(gb); - start = sublayer_ordering_info ? 0 : sps->max_sub_layers - 1; + sps->sublayer_ordering_info_flag = get_bits1(gb); + start = sps->sublayer_ordering_info_flag ? 0 : sps->max_sub_layers - 1; for (i = start; i < sps->max_sub_layers; i++) { sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1; sps->temporal_layer[i].num_reorder_pics = get_ue_golomb_long(gb); @@ -973,7 +974,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - if (!sublayer_ordering_info) { + if (!sps->sublayer_ordering_info_flag) { for (i = 0; i < start; i++) { sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering; sps->temporal_layer[i].num_reorder_pics = sps->temporal_layer[start].num_reorder_pics; @@ -1015,7 +1016,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, if (sps->scaling_list_enable_flag) { set_default_scaling_list_data(&sps->scaling_list); - if (get_bits1(gb)) { + sps->scaling_list_data_present_flag = get_bits1(gb); + if (sps->scaling_list_data_present_flag) { ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps); if (ret < 0) return ret; @@ -1071,8 +1073,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->sps_temporal_mvp_enabled_flag = get_bits1(gb); sps->sps_strong_intra_smoothing_enable_flag = get_bits1(gb); sps->vui.common.sar = (AVRational){0, 1}; - vui_present = get_bits1(gb); - if (vui_present) + sps->vui_present = get_bits1(gb); + if (sps->vui_present) decode_vui(gb, avctx, apply_defdispwin, sps); if (get_bits1(gb)) { // sps_extension_flag diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index f221640531..549e0bdf57 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -184,6 +184,7 @@ typedef struct HEVCSPS { HEVCWindow output_window; + int conformance_window_flag; HEVCWindow pic_conf_win; HEVCHdrParams hdr; @@ -196,6 +197,7 @@ typedef struct HEVCSPS { unsigned int log2_max_poc_lsb; int pcm_enabled_flag; + int sublayer_ordering_info_flag; int max_sub_layers; struct { int max_dec_pic_buffering; @@ -204,10 +206,12 @@ typedef struct HEVCSPS { } temporal_layer[HEVC_MAX_SUB_LAYERS]; uint8_t temporal_id_nesting_flag; + int vui_present; VUI vui; PTL ptl; uint8_t scaling_list_enable_flag; + int scaling_list_data_present_flag; ScalingList scaling_list; unsigned int nb_st_rps; -- 2.39.2 [-- Attachment #18: 0017-hevc_ps-expose-and-parse-scc-range-extension-fields.patch --] [-- Type: text/x-diff, Size: 7752 bytes --] From 4645f1fb3249f8249fdebaf9b3edffc848b9af3c Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 00:18:42 +0100 Subject: [PATCH 17/72] hevc_ps: expose and parse scc range extension fields --- libavcodec/hevc.h | 2 ++ libavcodec/hevc_ps.c | 63 ++++++++++++++++++++++++++++++++++++++++---- libavcodec/hevc_ps.h | 26 ++++++++++++++++++ 3 files changed, 86 insertions(+), 5 deletions(-) diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h index 1804755327..913c7d4e2e 100644 --- a/libavcodec/hevc.h +++ b/libavcodec/hevc.h @@ -154,6 +154,8 @@ enum { // get near that, though, so set a lower limit here with the maximum // possible value for 4K video (at most 135 16x16 Ctb rows). HEVC_MAX_ENTRY_POINT_OFFSETS = HEVC_MAX_TILE_COLUMNS * 135, + + HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE = 128, }; diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index a740da9f82..b03f59efef 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -856,7 +856,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, int ret = 0; int log2_diff_max_min_transform_block_size; int bit_depth_chroma, start; - int i; + int i, j; // Coded parameters @@ -1077,9 +1077,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, if (sps->vui_present) decode_vui(gb, avctx, apply_defdispwin, sps); - if (get_bits1(gb)) { // sps_extension_flag + sps->sps_extension_present_flag = get_bits1(gb); + if (sps->sps_extension_present_flag) { // sps_extension_flag sps->sps_range_extension_flag = get_bits1(gb); - skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7); + skip_bits(gb, 2); + sps->sps_scc_extension_flag = get_bits1(gb); + skip_bits(gb, 4); if (sps->sps_range_extension_flag) { sps->transform_skip_rotation_enabled_flag = get_bits1(gb); sps->transform_skip_context_enabled_flag = get_bits1(gb); @@ -1105,6 +1108,26 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, av_log(avctx, AV_LOG_WARNING, "cabac_bypass_alignment_enabled_flag not yet implemented\n"); } + if (sps->sps_scc_extension_flag) { + sps->sps_curr_pic_ref_enabled_flag = get_bits1(gb); + sps->palette_mode_enabled_flag = get_bits1(gb); + if (sps->palette_mode_enabled_flag) { + sps->palette_max_size = get_ue_golomb_long(gb); + sps->delta_palette_max_predictor_size = get_ue_golomb_long(gb); + + sps->sps_palette_predictor_initializer_present_flag = get_bits1(gb); + if (sps->sps_palette_predictor_initializer_present_flag) { + sps->sps_num_palette_predictor_initializer_minus1 = get_ue_golomb_long(gb); + for (i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) { + for (j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++) + sps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb); + } + } + } + + sps->motion_vector_resolution_control_idc = get_bits(gb, 2); + sps->intra_boundary_filtering_disable_flag = get_bits1(gb); + } } if (apply_defdispwin) { sps->output_window.left_offset += sps->vui.def_disp_win.left_offset; @@ -1446,7 +1469,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, HEVCParamSets *ps) { HEVCSPS *sps = NULL; - int i, ret = 0; + int i, j, ret = 0; unsigned int pps_id = 0; ptrdiff_t nal_size; unsigned log2_parallel_merge_level_minus2; @@ -1664,11 +1687,41 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx, pps->pps_extension_present_flag = get_bits1(gb); if (pps->pps_extension_present_flag) { pps->pps_range_extensions_flag = get_bits1(gb); - skip_bits(gb, 7); // pps_extension_7bits + skip_bits(gb, 2); + pps->pps_scc_extension_flag = get_bits1(gb); + skip_bits(gb, 4); if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) { if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0) goto err; } + if (pps->pps_scc_extension_flag) { + pps->pps_curr_pic_ref_enabled_flag = get_bits1(gb); + pps->residual_adaptive_colour_transform_enabled_flag = get_bits1(gb); + + if (pps->residual_adaptive_colour_transform_enabled_flag) { + pps->pps_slice_act_qp_offsets_present_flag = get_bits1(gb); + pps->pps_act_y_qp_offset_plus5 = get_se_golomb(gb); + pps->pps_act_cb_qp_offset_plus5 = get_se_golomb(gb); + pps->pps_act_cr_qp_offset_plus3 = get_se_golomb(gb); + } + + pps->pps_palette_predictor_initializer_present_flag = get_bits1(gb); + if (pps->pps_palette_predictor_initializer_present_flag) { + pps->pps_num_palette_predictor_initializer = get_ue_golomb_long(gb); + if (pps->pps_num_palette_predictor_initializer) { + pps->monochrome_palette_flag = get_bits1(gb); + pps->luma_bit_depth_entry_minus8 = get_ue_golomb_long(gb); + + if (!pps->monochrome_palette_flag) + pps->chroma_bit_depth_entry_minus8 = get_ue_golomb_long(gb); + + for (i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) { + for (j = 0; j < pps->pps_num_palette_predictor_initializer; j++) + pps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb); + } + } + } + } } ret = setup_pps(avctx, gb, pps, sps); diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 549e0bdf57..8dddf7ef8d 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -210,6 +210,18 @@ typedef struct HEVCSPS { VUI vui; PTL ptl; + int sps_extension_present_flag; + int sps_scc_extension_flag; + int sps_curr_pic_ref_enabled_flag; + int palette_mode_enabled_flag; + uint8_t palette_max_size; + uint8_t delta_palette_max_predictor_size; + uint8_t motion_vector_resolution_control_idc; + uint8_t sps_num_palette_predictor_initializer_minus1; + int sps_palette_predictor_initializer_present_flag; + int intra_boundary_filtering_disable_flag; + uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE]; + uint8_t scaling_list_enable_flag; int scaling_list_data_present_flag; ScalingList scaling_list; @@ -341,6 +353,20 @@ typedef struct HEVCPPS { uint8_t log2_sao_offset_scale_luma; uint8_t log2_sao_offset_scale_chroma; + int pps_scc_extension_flag; + int pps_curr_pic_ref_enabled_flag; + int residual_adaptive_colour_transform_enabled_flag; + int pps_slice_act_qp_offsets_present_flag; + int pps_palette_predictor_initializer_present_flag; + int pps_num_palette_predictor_initializer; + int monochrome_palette_flag; + int luma_bit_depth_entry_minus8; + int chroma_bit_depth_entry_minus8; + int pps_act_y_qp_offset_plus5; + int pps_act_cb_qp_offset_plus5; + int pps_act_cr_qp_offset_plus3; + uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE]; + // Inferred parameters unsigned int *column_width; ///< ColumnWidth unsigned int *row_height; ///< RowHeight -- 2.39.2 [-- Attachment #19: 0018-hevc_ps-expose-log2_diff_max_min_transform_block_siz.patch --] [-- Type: text/x-diff, Size: 3078 bytes --] From 141df2aaa6e9e256cf5260b919fb9151982dabe0 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 7 Dec 2022 04:30:46 +0100 Subject: [PATCH 18/72] hevc_ps: expose log2_diff_max_min_transform_block_size --- libavcodec/hevc_ps.c | 18 +++++++++--------- libavcodec/hevc_ps.h | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index b03f59efef..2f0aff5a97 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -854,7 +854,6 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, { HEVCWindow *ow; int ret = 0; - int log2_diff_max_min_transform_block_size; int bit_depth_chroma, start; int i, j; @@ -982,12 +981,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3; - sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb); - sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2; - log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb); - sps->log2_max_trafo_size = log2_diff_max_min_transform_block_size + - sps->log2_min_tb_size; + sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3; + sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb); + sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2; + sps->log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb); + sps->log2_max_trafo_size = sps->log2_diff_max_min_transform_block_size + + sps->log2_min_tb_size; if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) { av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size); @@ -1004,8 +1003,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - if (log2_diff_max_min_transform_block_size < 0 || log2_diff_max_min_transform_block_size > 30) { - av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size); + if (sps->log2_diff_max_min_transform_block_size > 30) { + av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", + sps->log2_diff_max_min_transform_block_size); return AVERROR_INVALIDDATA; } diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 8dddf7ef8d..88e73e97c8 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -253,6 +253,7 @@ typedef struct HEVCSPS { unsigned int log2_max_trafo_size; unsigned int log2_ctb_size; unsigned int log2_min_pu_size; + unsigned int log2_diff_max_min_transform_block_size; int max_transform_hierarchy_depth_inter; int max_transform_hierarchy_depth_intra; -- 2.39.2 [-- Attachment #20: 0019-hevc_ps-expose-rps-fields.patch --] [-- Type: text/x-diff, Size: 4900 bytes --] From b0e8756c78c95ff93b908612b76d2013f79d5c2b Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 00:25:48 +0100 Subject: [PATCH 19/72] hevc_ps: expose rps fields --- libavcodec/hevc_ps.c | 37 ++++++++++++++++++------------------- libavcodec/hevc_ps.h | 7 +++++++ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 2f0aff5a97..745a4f270e 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -100,51 +100,50 @@ static void remove_vps(HEVCParamSets *s, int id) int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header) { - uint8_t rps_predict = 0; int delta_poc; int k0 = 0; int k = 0; int i; + rps->rps_predict = 0; + if (rps != sps->st_rps && sps->nb_st_rps) - rps_predict = get_bits1(gb); + rps->rps_predict = get_bits1(gb); - if (rps_predict) { + if (rps->rps_predict) { const ShortTermRPS *rps_ridx; int delta_rps; - unsigned abs_delta_rps; - uint8_t use_delta_flag = 0; - uint8_t delta_rps_sign; if (is_slice_header) { - unsigned int delta_idx = get_ue_golomb_long(gb) + 1; - if (delta_idx > sps->nb_st_rps) { + rps->delta_idx = get_ue_golomb_long(gb) + 1; + if (rps->delta_idx > sps->nb_st_rps) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_idx in slice header RPS: %d > %d.\n", - delta_idx, sps->nb_st_rps); + rps->delta_idx, sps->nb_st_rps); return AVERROR_INVALIDDATA; } - rps_ridx = &sps->st_rps[sps->nb_st_rps - delta_idx]; + rps_ridx = &sps->st_rps[sps->nb_st_rps - rps->delta_idx]; rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs; } else rps_ridx = &sps->st_rps[rps - sps->st_rps - 1]; - delta_rps_sign = get_bits1(gb); - abs_delta_rps = get_ue_golomb_long(gb) + 1; - if (abs_delta_rps < 1 || abs_delta_rps > 32768) { + rps->delta_rps_sign = get_bits1(gb); + rps->abs_delta_rps = get_ue_golomb_long(gb) + 1; + if (rps->abs_delta_rps > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of abs_delta_rps: %d\n", - abs_delta_rps); + rps->abs_delta_rps); return AVERROR_INVALIDDATA; } - delta_rps = (1 - (delta_rps_sign << 1)) * abs_delta_rps; + delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps; for (i = 0; i <= rps_ridx->num_delta_pocs; i++) { int used = rps->used[k] = get_bits1(gb); + rps->use_delta_flag = 0; if (!used) - use_delta_flag = get_bits1(gb); + rps->use_delta_flag = get_bits1(gb); - if (used || use_delta_flag) { + if (used || rps->use_delta_flag) { if (i < rps_ridx->num_delta_pocs) delta_poc = delta_rps + rps_ridx->delta_poc[i]; else @@ -210,7 +209,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, if (rps->num_delta_pocs) { prev = 0; for (i = 0; i < rps->num_negative_pics; i++) { - delta_poc = get_ue_golomb_long(gb) + 1; + delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_poc: %d\n", @@ -223,7 +222,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } prev = 0; for (i = 0; i < nb_positive_pics; i++) { - delta_poc = get_ue_golomb_long(gb) + 1; + delta_poc = rps->delta_poc_s1[i] = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_poc: %d\n", diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 88e73e97c8..3cdbf6abec 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -70,9 +70,16 @@ typedef struct HEVCHdrParams { } HEVCHdrParams; typedef struct ShortTermRPS { + uint8_t rps_predict; + unsigned int delta_idx; + uint8_t use_delta_flag; + uint8_t delta_rps_sign; + unsigned int abs_delta_rps; unsigned int num_negative_pics; int num_delta_pocs; int rps_idx_num_delta_pocs; + int32_t delta_poc_s0[32]; + int32_t delta_poc_s1[32]; int32_t delta_poc[32]; uint8_t used[32]; } ShortTermRPS; -- 2.39.2 [-- Attachment #21: 0020-hwcontext_vulkan-initialize-and-require-instance-ver.patch --] [-- Type: text/x-diff, Size: 2363 bytes --] From a35cd953f9af8f34836d53006d10e3890a30ebf1 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 15:15:04 +0100 Subject: [PATCH 20/72] hwcontext_vulkan: initialize and require instance version 1.3 --- configure | 4 ++-- libavutil/hwcontext_vulkan.c | 2 +- libavutil/hwcontext_vulkan.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configure b/configure index d38613309d..f0f15b9e87 100755 --- a/configure +++ b/configure @@ -7006,8 +7006,8 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if. "in maintaining it." if enabled vulkan; then - check_pkg_config_header_only vulkan "vulkan >= 1.2.189" "vulkan/vulkan.h" "defined VK_VERSION_1_2" || - check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_3) || (defined(VK_VERSION_1_2) && VK_HEADER_VERSION >= 189)" + check_pkg_config_header_only vulkan "vulkan >= 1.3.238" "vulkan/vulkan.h" "defined VK_VERSION_1_3" || + check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) || (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 238)" fi if enabled x86; then diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 2a9b5f4aac..c87f39d072 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -673,7 +673,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) VkApplicationInfo application_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pEngineName = "libavutil", - .apiVersion = VK_API_VERSION_1_2, + .apiVersion = VK_API_VERSION_1_3, .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, LIBAVUTIL_VERSION_MINOR, LIBAVUTIL_VERSION_MICRO), diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index df86c85b3c..70c8379dc3 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -53,7 +53,7 @@ typedef struct AVVulkanDeviceContext { PFN_vkGetInstanceProcAddr get_proc_addr; /** - * Vulkan instance. Must be at least version 1.2. + * Vulkan instance. Must be at least version 1.3. */ VkInstance inst; -- 2.39.2 [-- Attachment #22: 0021-hwcontext_vulkan-enable-support-for-YCbCr-samplers.patch --] [-- Type: text/x-diff, Size: 1833 bytes --] From f365b7902693a367d77032e13c2e099306308f44 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 02:37:14 +0100 Subject: [PATCH 21/72] hwcontext_vulkan: enable support for YCbCr samplers --- libavutil/hwcontext_vulkan.c | 1 + libavutil/vulkan_functions.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index c87f39d072..72850c03cf 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1378,6 +1378,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, goto end; } p->device_features_1_2.timelineSemaphore = 1; + p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; /* Setup queue family */ if ((err = setup_queue_families(ctx, &dev_info))) diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index d15a5d9a42..deb77495a2 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -155,6 +155,8 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipeline) \ \ /* Sampler */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSamplerYcbcrConversion) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \ \ -- 2.39.2 [-- Attachment #23: 0022-hwcontext_vulkan-enable-VK_KHR_synchronization2-if-s.patch --] [-- Type: text/x-diff, Size: 5364 bytes --] From b6db2ca65db72b346ba08480df4a201f7e1caea9 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Sun, 13 Mar 2022 09:06:06 +0100 Subject: [PATCH 22/72] hwcontext_vulkan: enable VK_KHR_synchronization2 if supported --- libavutil/hwcontext_vulkan.c | 17 +++++++++++++---- libavutil/vulkan_functions.h | 6 +++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 72850c03cf..1d0261c8fe 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -89,6 +89,7 @@ typedef struct VulkanDevicePriv { /* Features */ VkPhysicalDeviceVulkan11Features device_features_1_1; VkPhysicalDeviceVulkan12Features device_features_1_2; + VkPhysicalDeviceVulkan13Features device_features_1_3; /* Queues */ uint32_t qfs[5]; @@ -346,7 +347,7 @@ static const VulkanOptExtension optional_device_exts[] = { /* Misc or required by other extensions */ { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, - { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, + { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 }, /* Imports/exports */ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, @@ -1326,9 +1327,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, }; + VkPhysicalDeviceVulkan13Features dev_features_1_3 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, + .pNext = &timeline_features, + }; VkPhysicalDeviceVulkan12Features dev_features_1_2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, - .pNext = &timeline_features, + .pNext = &dev_features_1_3, }; VkPhysicalDeviceVulkan11Features dev_features_1_1 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, @@ -1340,8 +1345,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, }; VkDeviceCreateInfo dev_info = { - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .pNext = &hwctx->device_features, + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, }; hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -1349,6 +1353,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES; p->device_features_1_1.pNext = &p->device_features_1_2; p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; + p->device_features_1_2.pNext = &p->device_features_1_3; + p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; ctx->free = vulkan_device_free; /* Create an instance if not given one */ @@ -1379,6 +1385,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, } p->device_features_1_2.timelineSemaphore = 1; p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; + p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2; + + dev_info.pNext = &hwctx->device_features; /* Setup queue family */ if ((err = setup_queue_families(ctx, &dev_info))) diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index deb77495a2..103bff3013 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -37,6 +37,7 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_EXTERNAL_WIN32_MEMORY = 1ULL << 6, /* VK_KHR_external_memory_win32 */ FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */ #endif + FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; @@ -145,7 +146,10 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \ - \ + \ + /* sync2 */ \ + MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \ + \ /* Pipeline */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \ -- 2.39.2 [-- Attachment #24: 0023-hwcontext_vulkan-support-threadsafe-queue-and-frame-.patch --] [-- Type: text/x-diff, Size: 19170 bytes --] From 05e94e06667f305afe181c3b318d08b4e528ce09 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Tue, 15 Mar 2022 23:00:32 +0100 Subject: [PATCH 23/72] hwcontext_vulkan: support threadsafe queue and frame operations --- libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++---------- libavutil/hwcontext_vulkan.h | 40 +++++++- 2 files changed, 167 insertions(+), 49 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 1d0261c8fe..5a06a6872d 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -27,6 +27,7 @@ #include <dlfcn.h> #endif +#include <pthread.h> #include <unistd.h> #include "config.h" @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv { VkPhysicalDeviceVulkan13Features device_features_1_3; /* Queues */ - uint32_t qfs[5]; - int num_qfs; + pthread_mutex_t **qf_mutex; + int nb_tot_qfs; + uint32_t img_qfs[5]; + int nb_img_qfs; /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv { } VulkanFramesPriv; typedef struct AVVkFrameInternal { + pthread_mutex_t update_mutex; + #if CONFIG_CUDA /* Importing external memory into cuda is really expensive so we keep the * memory imported all the time */ @@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx) if (p->libvulkan) dlclose(p->libvulkan); + for (int i = 0; i < p->nb_tot_qfs; i++) + av_freep(&p->qf_mutex[i]); + av_freep(&p->qf_mutex); + RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions); RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); } @@ -1436,13 +1445,26 @@ end: return err; } +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_lock(&p->qf_mutex[queue_family][index]); +} + +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]); +} + static int vulkan_device_init(AVHWDeviceContext *ctx) { int err; - uint32_t queue_num; + uint32_t qf_num; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + VkQueueFamilyProperties *qf; int graph_index, comp_index, tx_index, enc_index, dec_index; /* Set device extension flags */ @@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de); p->dev_is_intel = (p->props.properties.vendorID == 0x8086); - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL); - if (!queue_num) { + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL); + if (!qf_num) { av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); return AVERROR_EXTERNAL; } + qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties)); + if (!qf) + return AVERROR(ENOMEM); + + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf); + + p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex)); + if (!p->qf_mutex) + return AVERROR(ENOMEM); + p->nb_tot_qfs = qf_num; + + for (int i = 0; i < qf_num; i++) { + p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex)); + if (!p->qf_mutex[i]) + return AVERROR(ENOMEM); + for (int j = 0; j < qf[i].queueCount; j++) + pthread_mutex_init(&p->qf_mutex[i][j], NULL); + } + graph_index = hwctx->queue_family_index; comp_index = hwctx->queue_family_comp_index; tx_index = hwctx->queue_family_tx_index; @@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) return AVERROR(EINVAL); \ } else if (fidx < 0 || ctx_qf < 0) { \ break; \ - } else if (ctx_qf >= queue_num) { \ + } else if (ctx_qf >= qf_num) { \ av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \ - type, ctx_qf, queue_num); \ + type, ctx_qf, qf_num); \ return AVERROR(EINVAL); \ } \ \ @@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \ enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \ dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \ - p->qfs[p->num_qfs++] = ctx_qf; \ + p->img_qfs[p->nb_img_qfs++] = ctx_qf; \ } while (0) CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues); @@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) #undef CHECK_QUEUE + if (!hwctx->lock_queue) + hwctx->lock_queue = lock_queue; + if (!hwctx->unlock_queue) + hwctx->unlock_queue = unlock_queue; + /* Get device capabilities */ vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); @@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f) { AVVkFrameInternal *internal = f->internal; - if (!internal) - return; - #if CONFIG_CUDA if (internal->cuda_fc_ref) { AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; @@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, uint32_t src_qf, dst_qf; VkImageLayout new_layout; VkAccessFlags new_access; + AVVulkanFramesContext *vkfc = hwfc->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + AVFrame tmp = { .data[0] = (uint8_t *)frame }; uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; @@ -1944,6 +1989,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, }; VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; + + if ((err = wait_start_exec_ctx(hwfc, ectx))) + return err; + + vkfc->lock_frame(hwfc, frame); + for (int i = 0; i < planes; i++) { wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; sem_sig_val[i] = frame->sem_value[i] + 1; @@ -1980,9 +2031,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, break; } - if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; - /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable * for synchronization */ @@ -2008,7 +2056,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, planes, img_bar); - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + vkfc->unlock_frame(hwfc, frame); + + return err; } static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, @@ -2090,10 +2141,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .usage = usage, .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; get_plane_wh(&create_info.extent.width, &create_info.extent.height, @@ -2117,6 +2168,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, return AVERROR_EXTERNAL; } + f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; f->layout[i] = create_info.initialLayout; f->access[i] = 0x0; f->sem_value[i] = 0; @@ -2161,10 +2213,10 @@ static void try_export_flags(AVHWFramesContext *hwfc, VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, .pNext = NULL, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; VkPhysicalDeviceExternalImageFormatInfo enext = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, @@ -2259,6 +2311,16 @@ fail: return NULL; } +static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) +{ + pthread_mutex_lock(&vkf->internal->update_mutex); +} + +static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) +{ + pthread_mutex_unlock(&vkf->internal->update_mutex); +} + static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { VulkanFramesPriv *fp = hwfc->internal->priv; @@ -2421,6 +2483,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) return AVERROR(ENOMEM); } + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; + if (!hwctx->unlock_frame) + hwctx->unlock_frame = unlock_frame; + return 0; } @@ -2727,10 +2794,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = p->qfs, - .queueFamilyIndexCount = p->num_qfs, - .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, }; /* Image format verification */ @@ -2809,6 +2876,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f * offer us anything we could import and sync with, so instead * just signal the semaphore we created. */ + f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; f->layout[i] = create_info.initialLayout; f->access[i] = 0x0; f->sem_value[i] = 0; @@ -3017,20 +3085,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, CU_AD_FORMAT_UNSIGNED_INT8; dst_f = (AVVkFrame *)frame->data[0]; - dst_int = dst_f->internal; - if (!dst_int || !dst_int->cuda_fc_ref) { - if (!dst_f->internal) - dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); - - if (!dst_int) - return AVERROR(ENOMEM); + if (!dst_int->cuda_fc_ref) { dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); - if (!dst_int->cuda_fc_ref) { - av_freep(&dst_f->internal); + if (!dst_int->cuda_fc_ref) return AVERROR(ENOMEM); - } for (int i = 0; i < planes; i++) { CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { @@ -3704,13 +3764,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, return err; } -static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, +static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, AVBufferRef **bufs, size_t *buf_offsets, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) { int err; AVVkFrame *frame = (AVVkFrame *)f->data[0]; + AVVulkanFramesContext *vkfc = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; @@ -3745,11 +3806,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, .waitSemaphoreCount = planes, }; - for (int i = 0; i < planes; i++) - sem_signal_values[i] = frame->sem_value[i] + 1; + vkfc->lock_frame(hwfc, frame); if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; + goto end; + + for (int i = 0; i < planes; i++) + sem_signal_values[i] = frame->sem_value[i] + 1; /* Change the image layout to something more optimal for transfers */ for (int i = 0; i < planes; i++) { @@ -3824,14 +3887,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, if (!f->buf[ref]) break; if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) - return err; + goto end; } if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) - return err; - return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); + goto end; + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); } else { - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); } + +end: + vkfc->unlock_frame(hwfc, frame); + return err; } static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, @@ -3960,8 +4027,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, } /* Copy buffers into/from image */ - err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize, - swf->width, swf->height, swf->format, from); + err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets, + tmp.linesize, swf->width, swf->height, swf->format, + from); if (from) { /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */ @@ -4142,7 +4210,19 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, AVVkFrame *av_vk_frame_alloc(void) { - return av_mallocz(sizeof(AVVkFrame)); + AVVkFrame *f = av_mallocz(sizeof(AVVkFrame)); + if (!f) + return NULL; + + f->internal = av_mallocz(sizeof(*f->internal)); + if (!f->internal) { + av_free(f); + return NULL; + } + + pthread_mutex_init(&f->internal->update_mutex, NULL); + + return f; } const HWContextType ff_hwcontext_type_vulkan = { diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index 70c8379dc3..406d8709c3 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -27,6 +27,8 @@ #include "pixfmt.h" #include "frame.h" +typedef struct AVVkFrame AVVkFrame; + /** * @file * API-specific header for AV_HWDEVICE_TYPE_VULKAN. @@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext { */ int queue_family_decode_index; int nb_decode_queues; + + /** + * Locks a queue, preventing other threads from submitting any command + * buffers to this queue. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + */ + void (*lock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index); + + /** + * Similar to lock_queue(), unlocks a queue. Must only be called after locking. + */ + void (*unlock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index); } AVVulkanDeviceContext; /** @@ -195,6 +210,23 @@ typedef struct AVVulkanFramesContext { * av_hwframe_ctx_init(). */ AVVkFrameFlags flags; + + /** + * Locks a frame, preventing other threads from changing frame properties. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + * Users SHOULD only ever lock just before command submission in order + * to get accurate frame properties, and unlock immediately after command + * submission without waiting for it to finish. + * + * If unset, will be set to lavu-internal functions that utilize a mutex. + */ + void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); + + /** + * Similar to lock_frame(), unlocks a frame. Must only be called after locking. + */ + void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); } AVVulkanFramesContext; /* @@ -210,7 +242,7 @@ typedef struct AVVulkanFramesContext { * @note the size of this structure is not part of the ABI, to allocate * you must use @av_vk_frame_alloc(). */ -typedef struct AVVkFrame { +struct AVVkFrame { /** * Vulkan images to which the memory is bound to. */ @@ -264,6 +296,12 @@ typedef struct AVVkFrame { * Describes the binding offset of each plane to the VkDeviceMemory. */ ptrdiff_t offset[AV_NUM_DATA_POINTERS]; + + /** + * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if + * the image was allocated with the CONCURRENT concurrency option. + */ + uint32_t queue_family[AV_NUM_DATA_POINTERS]; } AVVkFrame; /** -- 2.39.2 [-- Attachment #25: 0024-hwcontext_vulkan-remove-contiguous-memory-hack.patch --] [-- Type: text/x-diff, Size: 2600 bytes --] From 197e5cfa63a2356a64ac6ae20024fa98fda26f43 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 20:32:49 +0100 Subject: [PATCH 24/72] hwcontext_vulkan: remove contiguous memory hack --- libavutil/hwcontext_vulkan.c | 12 ------------ libavutil/hwcontext_vulkan.h | 4 +--- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 5a06a6872d..ab5b24f10c 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -112,9 +112,6 @@ typedef struct VulkanDevicePriv { /* Nvidia */ int dev_is_nvidia; - - /* Intel */ - int dev_is_intel; } VulkanDevicePriv; typedef struct VulkanFramesPriv { @@ -1501,7 +1498,6 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) p->hprops.minImportedHostPointerAlignment); p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de); - p->dev_is_intel = (p->props.properties.vendorID == 0x8086); vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL); if (!qf_num) { @@ -1620,8 +1616,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, return AVERROR_EXTERNAL; } - if (strstr(vendor, "Intel")) - dev_select.vendor_id = 0x8086; if (strstr(vendor, "AMD")) dev_select.vendor_id = 0x1002; @@ -2356,12 +2350,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) if (!hwctx->usage) hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS; - if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) { - if (p->contiguous_planes == 1 || - ((p->contiguous_planes == -1) && p->dev_is_intel)) - hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY; - } - modifier_info = vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index 406d8709c3..e89fa52927 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags { * device and tiling during av_hwframe_ctx_init(). */ AV_VK_FRAME_FLAG_NONE = (1ULL << 0), - /* Image planes will be allocated in a single VkDeviceMemory, rather - * than as per-plane VkDeviceMemory allocations. Required for exporting - * to VAAPI on Intel devices. */ + /* DEPRECATED: does nothing. */ AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1), } AVVkFrameFlags; -- 2.39.2 [-- Attachment #26: 0025-hwcontext_vulkan-rename-vk_pixfmt_map-to-vk_pixfmt_p.patch --] [-- Type: text/x-diff, Size: 1383 bytes --] From 28903a643a7db85e6eef289a853a03b33b67be41 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 20:35:51 +0100 Subject: [PATCH 25/72] hwcontext_vulkan: rename vk_pixfmt_map to vk_pixfmt_planar_map --- libavutil/hwcontext_vulkan.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ab5b24f10c..de5575c031 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal { static const struct { enum AVPixelFormat pixfmt; - const VkFormat vkfmts[4]; -} vk_pixfmt_map[] = { + const VkFormat vkfmts[5]; +} vk_pixfmt_planar_map[] = { { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, @@ -244,9 +244,9 @@ static const struct { const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) { - for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++) - if (vk_pixfmt_map[i].pixfmt == p) - return vk_pixfmt_map[i].vkfmts; + for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++) + if (vk_pixfmt_planar_map[i].pixfmt == p) + return vk_pixfmt_planar_map[i].vkfmts; return NULL; } -- 2.39.2 [-- Attachment #27: 0026-hwcontext_vulkan-fix-minor-type-issue-in-VulkanQueue.patch --] [-- Type: text/x-diff, Size: 772 bytes --] From a62f75557a8b2d64fe88670b823d1e8500504bd2 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 00:52:15 +0100 Subject: [PATCH 26/72] hwcontext_vulkan: fix minor type issue in VulkanQueueCtx.buf_deps_alloc_size --- libavutil/hwcontext_vulkan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index de5575c031..8141e8c310 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -66,7 +66,7 @@ typedef struct VulkanQueueCtx { /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; - int buf_deps_alloc_size; + unsigned int buf_deps_alloc_size; } VulkanQueueCtx; typedef struct VulkanExecCtx { -- 2.39.2 [-- Attachment #28: 0027-hwcontext_vulkan-report-nonCoherentAtomSize.patch --] [-- Type: text/x-diff, Size: 1140 bytes --] From 0dec881653e9c9434a1b06ea212735a4c7b9caf8 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 28 Dec 2022 05:55:17 +0100 Subject: [PATCH 27/72] hwcontext_vulkan: report nonCoherentAtomSize --- libavutil/hwcontext_vulkan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 8141e8c310..7e63c2350c 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1493,6 +1493,8 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) p->props.properties.limits.optimalBufferCopyRowPitchAlignment); av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n", p->props.properties.limits.minMemoryMapAlignment); + av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n", + p->props.properties.limits.nonCoherentAtomSize); if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n", p->hprops.minImportedHostPointerAlignment); -- 2.39.2 [-- Attachment #29: 0028-hwcontext_vulkan-add-support-for-descriptor-buffers.patch --] [-- Type: text/x-diff, Size: 6084 bytes --] From a028bdcd05284bfb306558212646a309e2da4c24 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:15:02 +0100 Subject: [PATCH 28/72] hwcontext_vulkan: add support for descriptor buffers --- libavutil/hwcontext_vulkan.c | 13 ++++++++++++- libavutil/vulkan_functions.h | 9 +++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 7e63c2350c..60ff11ad3d 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -91,6 +91,7 @@ typedef struct VulkanDevicePriv { VkPhysicalDeviceVulkan11Features device_features_1_1; VkPhysicalDeviceVulkan12Features device_features_1_2; VkPhysicalDeviceVulkan13Features device_features_1_3; + VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features; /* Queues */ pthread_mutex_t **qf_mutex; @@ -350,6 +351,7 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 }, + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, /* Imports/exports */ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, @@ -1333,9 +1335,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, }; + VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, + .pNext = &timeline_features, + }; VkPhysicalDeviceVulkan13Features dev_features_1_3 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, - .pNext = &timeline_features, + .pNext = &desc_buf_features, }; VkPhysicalDeviceVulkan12Features dev_features_1_2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, @@ -1361,6 +1367,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES; p->device_features_1_2.pNext = &p->device_features_1_3; p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES; + p->device_features_1_3.pNext = &p->desc_buf_features; + p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT; ctx->free = vulkan_device_free; /* Create an instance if not given one */ @@ -1390,8 +1398,11 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, goto end; } p->device_features_1_2.timelineSemaphore = 1; + p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress; p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion; p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2; + p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer; + p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors; dev_info.pNext = &hwctx->device_features; diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 103bff3013..f8739da8e5 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -38,6 +38,7 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */ #endif FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */ + FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; @@ -121,6 +122,7 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \ \ /* Image */ \ @@ -142,6 +144,13 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorPool) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorSetLayout) \ \ + /* Descriptor buffers */ \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutSizeEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutBindingOffsetEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdBindDescriptorBuffersEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdSetDescriptorBufferOffsetsEXT) \ + \ /* DescriptorUpdateTemplate */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ -- 2.39.2 [-- Attachment #30: 0029-hwcontext_vulkan-add-functions-for-video-decoding.patch --] [-- Type: text/x-diff, Size: 6637 bytes --] From cc5ef22f90cc48ee604f6a27d28bb05237b9f2b7 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 13:34:36 +0100 Subject: [PATCH 29/72] hwcontext_vulkan: add functions for video decoding --- libavutil/hwcontext_vulkan.c | 6 ++++++ libavutil/vulkan.c | 8 +++++--- libavutil/vulkan_functions.h | 20 ++++++++++++++++++++ libavutil/vulkan_loader.h | 4 ++++ 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 60ff11ad3d..c0e35d8d78 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -363,6 +363,12 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, #endif + + /* Video encoding/decoding */ + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, }; /* Converts return values to strings */ diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 403f0b1f27..6bf2c214b7 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -85,9 +85,11 @@ const char *ff_vk_ret2str(VkResult res) CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); CASE(VK_ERROR_VALIDATION_FAILED_EXT); CASE(VK_ERROR_INVALID_SHADER_NV); - CASE(VK_ERROR_OUT_OF_POOL_MEMORY); - CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); - CASE(VK_ERROR_NOT_PERMITTED_EXT); + CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR); default: return "Unknown error"; } #undef CASE diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index f8739da8e5..65ab560d21 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -39,6 +39,10 @@ typedef enum FFVulkanExtensions { #endif FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */ FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */ + FF_VK_EXT_VIDEO_QUEUE = 1ULL << 10, /* VK_KHR_video_queue */ + FF_VK_EXT_VIDEO_DECODE_QUEUE = 1ULL << 11, /* VK_KHR_video_decode_queue */ + FF_VK_EXT_VIDEO_DECODE_H264 = 1ULL << 12, /* VK_EXT_video_decode_h264 */ + FF_VK_EXT_VIDEO_DECODE_H265 = 1ULL << 13, /* VK_EXT_video_decode_h265 */ FF_VK_EXT_NO_FLAG = 1ULL << 31, } FFVulkanExtensions; @@ -60,6 +64,8 @@ typedef enum FFVulkanExtensions { MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDevice) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFeatures2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoCapabilitiesKHR) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoFormatPropertiesKHR) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, DeviceWaitIdle) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDevice) \ \ @@ -159,6 +165,20 @@ typedef enum FFVulkanExtensions { /* sync2 */ \ MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \ \ + /* Video queue */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionParametersKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, GetVideoSessionMemoryRequirementsKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, BindVideoSessionMemoryKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdBeginVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdControlVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdEndVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionParametersKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionKHR) \ + \ + /* Video decoding */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \ + \ /* Pipeline */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \ diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 3f1ee6aa46..5385e398bf 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -48,6 +48,10 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, #endif + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, }; FFVulkanExtensions mask = 0x0; -- 2.39.2 [-- Attachment #31: 0030-hwcontext_vulkan-support-PREP_MODE_DECODING-in-prepa.patch --] [-- Type: text/x-diff, Size: 5554 bytes --] From 506c7daa8423efd56296868cce017642235b6186 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 15:18:21 +0100 Subject: [PATCH 30/72] hwcontext_vulkan: support PREP_MODE_DECODING in prepare_frame() --- libavutil/hwcontext_vulkan.c | 70 ++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index c0e35d8d78..e7c14fad74 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1969,7 +1969,9 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, enum PrepMode { PREP_MODE_WRITE, PREP_MODE_EXTERNAL_EXPORT, - PREP_MODE_EXTERNAL_IMPORT + PREP_MODE_EXTERNAL_IMPORT, + PREP_MODE_DECODING_DST, + PREP_MODE_DECODING_DPB, }; static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, @@ -1978,7 +1980,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, int err; uint32_t src_qf, dst_qf; VkImageLayout new_layout; - VkAccessFlags new_access; + VkAccessFlags2 new_access; AVVulkanFramesContext *vkfc = hwfc->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; @@ -1986,7 +1988,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, AVFrame tmp = { .data[0] = (uint8_t *)frame }; uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkDependencyInfo dep_info; VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, @@ -2042,32 +2045,55 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, s_info.pWaitDstStageMask = wait_st; s_info.waitSemaphoreCount = planes; break; + case PREP_MODE_DECODING_DST: + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; + new_access = VK_ACCESS_TRANSFER_WRITE_BIT; + src_qf = VK_QUEUE_FAMILY_IGNORED; + dst_qf = VK_QUEUE_FAMILY_IGNORED; + break; + case PREP_MODE_DECODING_DPB: + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; + new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + src_qf = VK_QUEUE_FAMILY_IGNORED; + dst_qf = VK_QUEUE_FAMILY_IGNORED; + break; } /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable * for synchronization */ for (int i = 0; i < planes; i++) { - img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - img_bar[i].srcAccessMask = 0x0; - img_bar[i].dstAccessMask = new_access; - img_bar[i].oldLayout = frame->layout[i]; - img_bar[i].newLayout = new_layout; - img_bar[i].srcQueueFamilyIndex = src_qf; - img_bar[i].dstQueueFamilyIndex = dst_qf; - img_bar[i].image = frame->img[i]; - img_bar[i].subresourceRange.levelCount = 1; - img_bar[i].subresourceRange.layerCount = 1; - img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_bar[i] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, + .srcAccessMask = 0x0, + .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, + .dstAccessMask = new_access, + .oldLayout = frame->layout[i], + .newLayout = new_layout, + .srcQueueFamilyIndex = src_qf, + .dstQueueFamilyIndex = dst_qf, + .image = frame->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + }, + }; frame->layout[i] = img_bar[i].newLayout; frame->access[i] = img_bar[i].dstAccessMask; } - vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx), - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, 0, NULL, planes, img_bar); + dep_info = (VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = planes, + }; + + vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info); err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); vkfc->unlock_frame(hwfc, frame); @@ -2308,7 +2334,13 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) if (err) goto fail; - err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); + if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && + !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DPB); + else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DST); + else + err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE); if (err) goto fail; -- 2.39.2 [-- Attachment #32: 0031-vulkan-lock-queues-before-submitting-operations.patch --] [-- Type: text/x-diff, Size: 1087 bytes --] From 6da405c60b7b04895a4395f5e226e8cc60e6552e Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 14:04:28 +0100 Subject: [PATCH 31/72] vulkan: lock queues before submitting operations --- libavutil/vulkan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 6bf2c214b7..ad13b8f3cb 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) return AVERROR_EXTERNAL; } + s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); + ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); + + s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); + if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", ff_vk_ret2str(ret)); -- 2.39.2 [-- Attachment #33: 0032-vulkan-define-VK_NO_PROTOTYPES.patch --] [-- Type: text/x-diff, Size: 573 bytes --] From 69c6d3dff6040feb9192be9364b064cce340ef3a Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 14:04:48 +0100 Subject: [PATCH 32/72] vulkan: define VK_NO_PROTOTYPES --- libavutil/vulkan.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index d1ea1e24fb..7927b04454 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -19,6 +19,8 @@ #ifndef AVUTIL_VULKAN_H #define AVUTIL_VULKAN_H +#define VK_NO_PROTOTYPES + #include "pixdesc.h" #include "bprint.h" #include "hwcontext.h" -- 2.39.2 [-- Attachment #34: 0033-vulkan-add-additional-error-codes.patch --] [-- Type: text/x-diff, Size: 1553 bytes --] From 3049e9213948926ec2a3f42808f065c336eb0126 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 13:54:35 +0100 Subject: [PATCH 33/72] vulkan: add additional error codes --- libavutil/vulkan.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index ad13b8f3cb..f2846e628a 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -78,6 +78,12 @@ const char *ff_vk_ret2str(VkResult res) CASE(VK_ERROR_TOO_MANY_OBJECTS); CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_UNKNOWN); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_FRAGMENTATION); + CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS); + CASE(VK_PIPELINE_COMPILE_REQUIRED); CASE(VK_ERROR_SURFACE_LOST_KHR); CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); CASE(VK_SUBOPTIMAL_KHR); @@ -90,6 +96,13 @@ const char *ff_vk_ret2str(VkResult res) CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR); CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR); CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); + CASE(VK_ERROR_NOT_PERMITTED_KHR); + CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); + CASE(VK_THREAD_IDLE_KHR); + CASE(VK_THREAD_DONE_KHR); + CASE(VK_OPERATION_DEFERRED_KHR); + CASE(VK_OPERATION_NOT_DEFERRED_KHR); default: return "Unknown error"; } #undef CASE -- 2.39.2 [-- Attachment #35: 0034-vulkan-fix-comment-statement-about-exec_queue-blocki.patch --] [-- Type: text/x-diff, Size: 919 bytes --] From 630be2276afccbac78976d7c8a0f3662b72de248 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 10 Mar 2022 21:41:59 +0100 Subject: [PATCH 34/72] vulkan: fix comment statement about exec_queue blocking --- libavutil/vulkan.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 7927b04454..a8aa9d8a8b 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -386,9 +386,7 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag); /** - * Submits a command buffer to the queue for execution. - * Will block until execution has finished in order to simplify resource - * management. + * Submits a command buffer to the queue for execution. Will not block. */ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); -- 2.39.2 [-- Attachment #36: 0035-vulkan-add-pNext-argument-to-ff_vk_create_buf.patch --] [-- Type: text/x-diff, Size: 3809 bytes --] From d9c9bfa670126ea72a95a1808beb6bd0883cbb98 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 17 Mar 2022 12:23:56 +0100 Subject: [PATCH 35/72] vulkan: add pNext argument to ff_vk_create_buf() --- libavfilter/vf_gblur_vulkan.c | 2 +- libavfilter/vf_overlay_vulkan.c | 2 +- libavfilter/vf_scale_vulkan.c | 2 +- libavutil/vulkan.c | 4 ++-- libavutil/vulkan.h | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c index d61f3c778c..c6360799a7 100644 --- a/libavfilter/vf_gblur_vulkan.c +++ b/libavfilter/vf_gblur_vulkan.c @@ -174,7 +174,7 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk RET(ff_vk_init_pipeline_layout(&s->vkctx, pl)); RET(ff_vk_init_compute_pipeline(&s->vkctx, pl)); - RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, + RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0)); diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c index e87ee83000..bdf231f4ef 100644 --- a/libavfilter/vf_overlay_vulkan.c +++ b/libavfilter/vf_overlay_vulkan.c @@ -181,7 +181,7 @@ static av_cold int init_filter(AVFilterContext *ctx) } *par; err = ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), + sizeof(*par), NULL, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); if (err) diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index c140420896..31dc35569b 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -253,7 +253,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) } RET(ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), + sizeof(*par), NULL, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index f2846e628a..ae6adc5104 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -205,7 +205,7 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, return 0; } -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) { int err; @@ -215,7 +215,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, VkBufferCreateInfo buf_spawn = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = NULL, + .pNext = pNext, .usage = usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .size = size, /* Gets FFALIGNED during alloc if host visible diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index a8aa9d8a8b..2311928a8c 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -393,7 +393,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); /** * Create a VkBuffer with the specified parameters. */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** -- 2.39.2 [-- Attachment #37: 0036-vulkan-add-ff_vk_qf_fill.patch --] [-- Type: text/x-diff, Size: 2777 bytes --] From da581e95cea93e9b628263aa28de945828f71967 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 13:03:58 +0100 Subject: [PATCH 36/72] vulkan: add ff_vk_qf_fill() --- libavutil/vulkan.c | 25 +++++++++++++++++++++++++ libavutil/vulkan.h | 9 +++++++++ 2 files changed, 34 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index ae6adc5104..eceef295a8 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -108,6 +108,31 @@ const char *ff_vk_ret2str(VkResult res) #undef CASE } +void ff_vk_qf_fill(FFVulkanContext *s) +{ + s->nb_qfs = 0; + + /* Simply fills in all unique queues into s->qfs */ + if (s->hwctx->queue_family_index >= 0) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; + if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; + if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && + s->qfs[1] != s->hwctx->queue_family_comp_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; + if (s->hwctx->queue_family_decode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_decode_index && + s->qfs[1] != s->hwctx->queue_family_decode_index && + s->qfs[2] != s->hwctx->queue_family_decode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; + if (s->hwctx->queue_family_encode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_encode_index && + s->qfs[1] != s->hwctx->queue_family_encode_index && + s->qfs[2] != s->hwctx->queue_family_encode_index && + s->qfs[3] != s->hwctx->queue_family_encode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; +} + void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family, int nb_queues) { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 2311928a8c..7254c21cf7 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -203,6 +203,9 @@ typedef struct FFVulkanContext { AVHWFramesContext *frames; AVVulkanFramesContext *hwfc; + uint32_t qfs[5]; + int nb_qfs; + FFVkSPIRVCompiler *spirv_compiler; /* Properties */ @@ -245,6 +248,12 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); */ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); +/** + * Setup the queue families from the hardware device context. + * Necessary for image creation to work. + */ +void ff_vk_qf_fill(FFVulkanContext *s); + /** * Initialize a queue family with a specific number of queues. * If nb_queues == 0, use however many queues the queue family has. -- 2.39.2 [-- Attachment #38: 0037-vulkan-add-ff_vk_image_create.patch --] [-- Type: text/x-diff, Size: 4892 bytes --] From 9da56b3fc3169588f97f590abeecb7ead3c18202 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 13:05:59 +0100 Subject: [PATCH 37/72] vulkan: add ff_vk_image_create() --- libavutil/vulkan.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++ libavutil/vulkan.h | 11 ++++++ 2 files changed, 100 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index eceef295a8..212f134466 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -401,6 +401,95 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } +int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx, + int width, int height, VkFormat fmt, VkImageTiling tiling, + VkImageUsageFlagBits usage, VkImageCreateFlags flags, + void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + AVVulkanDeviceContext *hwctx = s->hwctx; + + VkExportSemaphoreCreateInfo ext_sem_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, +#ifdef _WIN32 + .handleTypes = IsWindows8OrGreater() + ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT + : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, +#else + .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, +#endif + }; + + VkSemaphoreTypeCreateInfo sem_type_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, +#ifdef _WIN32 + .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, +#else + .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, +#endif + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = 0, + }; + + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &sem_type_info, + }; + + /* Create the image */ + VkImageCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = create_pnext, + .imageType = VK_IMAGE_TYPE_2D, + .format = fmt, + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = flags, + .tiling = tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = usage, + .samples = VK_SAMPLE_COUNT_1_BIT, + .pQueueFamilyIndices = s->qfs, + .queueFamilyIndexCount = s->nb_qfs, + .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, + }; + + ret = vk->CreateImage(hwctx->act_dev, &create_info, + hwctx->alloc, &f->img[0]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR(EINVAL); + goto fail; + } + + /* Create semaphore */ + ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[0]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0]; + f->layout[0] = create_info.initialLayout; + f->access[0] = 0x0; + f->sem_value[0] = 0; + + f->flags = 0x0; + f->tiling = tiling; + + return 0; + +fail: + return err; +} + int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, VkShaderStageFlagBits stage) { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 7254c21cf7..69c099fa8f 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -423,6 +423,17 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, */ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); +/** + * Creates an image, allocates and binds memory in the given + * idx value of the dst frame. If mem is non-NULL, then no memory will be + * allocated, but instead the given memory will be bound to the image. + */ +int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx, + int width, int height, VkFormat fmt, VkImageTiling tiling, + VkImageUsageFlagBits usage, VkImageCreateFlags flags, + void *create_pnext, + VkDeviceMemory *mem, void *alloc_pnext); + /** * Frees the main Vulkan context. */ -- 2.39.2 [-- Attachment #39: 0038-vulkan-expose-ff_vk_alloc_mem.patch --] [-- Type: text/x-diff, Size: 2666 bytes --] From 661af851afe7dcb3c2982fab953aff2941b4e5b9 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 23 Nov 2022 14:03:34 +0100 Subject: [PATCH 38/72] vulkan: expose ff_vk_alloc_mem() --- libavutil/vulkan.c | 15 ++++++++------- libavutil/vulkan.h | 7 +++++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 212f134466..7870de351d 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -174,9 +174,9 @@ void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; } -static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) { VkResult ret; int index = -1; @@ -225,7 +225,8 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, return AVERROR(ENOMEM); } - *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; + if (mem_flags) + *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; return 0; } @@ -279,9 +280,9 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe if (use_ded_mem) ded_alloc.buffer = buf->buf; - err = vk_alloc_mem(s, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &buf->flags, &buf->mem); + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, + use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + &buf->flags, &buf->mem); if (err) return err; diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 69c099fa8f..afc8bce999 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -254,6 +254,13 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); */ void ff_vk_qf_fill(FFVulkanContext *s); +/** + * Allocate device memory. + */ +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); + /** * Initialize a queue family with a specific number of queues. * If nb_queues == 0, use however many queues the queue family has. -- 2.39.2 [-- Attachment #40: 0039-vulkan-support-ignoring-memory-properties-when-alloc.patch --] [-- Type: text/x-diff, Size: 1648 bytes --] From e2a8084132631c8fad25aa5a2850deb904e42847 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Tue, 29 Nov 2022 00:43:19 +0000 Subject: [PATCH 39/72] vulkan: support ignoring memory properties when allocating --- libavutil/vulkan.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 7870de351d..b1553c6537 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -188,7 +188,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, }; /* Align if we need to */ - if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment); alloc_info.allocationSize = req->size; @@ -201,7 +201,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, continue; /* The memory type flags must include our properties */ - if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + if ((req_flags != UINT32_MAX) && + ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)) continue; /* Found a suitable memory type */ @@ -210,7 +211,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, } if (index < 0) { - av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", req_flags); return AVERROR(EINVAL); } -- 2.39.2 [-- Attachment #41: 0040-vulkan-allow-alloc-pNext-in-ff_vk_create_buf.patch --] [-- Type: text/x-diff, Size: 1878 bytes --] From 6ac7455f51f0ea1d68b4be2c8cf3ef6f5ca9abde Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 15 Dec 2022 17:43:27 +0100 Subject: [PATCH 40/72] vulkan: allow alloc pNext in ff_vk_create_buf --- libavutil/vulkan.c | 5 +++-- libavutil/vulkan.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index b1553c6537..0bb5b1eebf 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, return 0; } -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) { int err; @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe }; VkMemoryDedicatedAllocateInfo ded_alloc = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = NULL, + .pNext = alloc_pNext, }; VkMemoryDedicatedRequirements ded_req = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index afc8bce999..65f24ca138 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -409,7 +409,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); /** * Create a VkBuffer with the specified parameters. */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** -- 2.39.2 [-- Attachment #42: 0041-vulkan-do-not-wait-for-device-idle-when-destroying-b.patch --] [-- Type: text/x-diff, Size: 786 bytes --] From 8ce981bb551f37d27f9a11a36c4af7eb007011cb Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 16 Dec 2022 00:37:53 +0100 Subject: [PATCH 41/72] vulkan: do not wait for device idle when destroying buffers This should be done explicitly. --- libavutil/vulkan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 0bb5b1eebf..0250f5aa39 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -396,8 +396,6 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) if (!buf || !s->hwctx) return; - vk->DeviceWaitIdle(s->hwctx->act_dev); - if (buf->buf != VK_NULL_HANDLE) vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); if (buf->mem != VK_NULL_HANDLE) -- 2.39.2 [-- Attachment #43: 0042-vulkan-add-size-tracking-to-buffer-structs.patch --] [-- Type: text/x-diff, Size: 964 bytes --] From 0f532a85d9d3fd09d8f35f61911edc8827ed26c0 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 16 Dec 2022 01:47:42 +0100 Subject: [PATCH 42/72] vulkan: add size tracking to buffer structs --- libavutil/vulkan.c | 2 ++ libavutil/vulkan.h | 1 + 2 files changed, 3 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 0250f5aa39..faf5cd5508 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -295,6 +295,8 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, return AVERROR_EXTERNAL; } + buf->size = size; + return 0; } diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 65f24ca138..c993263324 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -94,6 +94,7 @@ typedef struct FFVkBuffer { VkBuffer buf; VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; + size_t size; } FFVkBuffer; typedef struct FFVkQueueFamilyCtx { -- 2.39.2 [-- Attachment #44: 0043-vulkan-use-device-properties-2-and-add-a-convenience.patch --] [-- Type: text/x-diff, Size: 2388 bytes --] From 054c1925dd67a5918fd42b894bb4ca966e60aec8 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Mon, 19 Dec 2022 07:57:22 +0100 Subject: [PATCH 43/72] vulkan: use device properties 2 and add a convenience loader function --- libavutil/vulkan.c | 18 +++++++++++++++++- libavutil/vulkan.h | 8 +++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index faf5cd5508..8a583248d1 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -108,6 +108,22 @@ const char *ff_vk_ret2str(VkResult res) #undef CASE } +void ff_vk_load_props(FFVulkanContext *s) +{ + FFVulkanFunctions *vk = &s->vkfn; + + s->driver_props = (VkPhysicalDeviceDriverProperties) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + }; + s->props = (VkPhysicalDeviceProperties2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &s->driver_props, + }; + + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); + vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); +} + void ff_vk_qf_fill(FFVulkanContext *s) { s->nb_qfs = 0; @@ -189,7 +205,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, /* Align if we need to */ if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment); + req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment); alloc_info.allocationSize = req->size; diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index c993263324..0f6efd023e 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -193,7 +193,8 @@ typedef struct FFVulkanContext { FFVulkanFunctions vkfn; FFVulkanExtensions extensions; - VkPhysicalDeviceProperties props; + VkPhysicalDeviceProperties2 props; + VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; AVBufferRef *device_ref; @@ -239,6 +240,11 @@ extern const VkComponentMapping ff_comp_identity_map; */ const char *ff_vk_ret2str(VkResult res); +/** + * Loads props/mprops/driver_props + */ +void ff_vk_load_props(FFVulkanContext *s); + /** * Returns 1 if the image is any sort of supported RGB */ -- 2.39.2 [-- Attachment #45: 0044-vulkan-minor-indent-fix-add-support-for-synchronous-.patch --] [-- Type: text/x-diff, Size: 2945 bytes --] From 834645640497d6e371fa50c40ee9ef9700494851 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 22 Dec 2022 05:02:50 +0100 Subject: [PATCH 44/72] vulkan: minor indent fix, add support for synchronous submission/waiting --- libavutil/vulkan.c | 20 ++++++++++++++++++-- libavutil/vulkan.h | 9 +++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 8a583248d1..b5e08ecc46 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -564,7 +564,7 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, /* Create command pool */ ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, - s->hwctx->alloc, &e->pool); + s->hwctx->alloc, &e->pool); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", ff_vk_ret2str(ret)); @@ -631,11 +631,13 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } - } else { + } else if (!q->synchronous) { vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); } + q->synchronous = 0; + /* Discard queue dependencies */ ff_vk_discard_exec_deps(e); @@ -788,9 +790,23 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) for (int i = 0; i < e->sem_sig_cnt; i++) *e->sem_sig_val_dst[i] += 1; + q->submitted = 1; + return 0; } +void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) +{ + FFVulkanFunctions *vk = &s->vkfn; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + if (!q->submitted) + return; + + vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); + q->synchronous = 1; +} + int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps) { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 0f6efd023e..9ee9469305 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -143,6 +143,9 @@ typedef struct FFVkQueueCtx { VkFence fence; VkQueue queue; + int synchronous; + int submitted; + /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; @@ -413,6 +416,12 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, */ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); +/** + * Wait on a command buffer's execution. Mainly useful for debugging and + * development. + */ +void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e); + /** * Create a VkBuffer with the specified parameters. */ -- 2.39.2 [-- Attachment #46: 0045-vulkan-add-support-for-queries.patch --] [-- Type: text/x-diff, Size: 7363 bytes --] From f97d922f523914c0d1e9748876aa3002e0f5811c Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 22 Dec 2022 05:03:32 +0100 Subject: [PATCH 45/72] vulkan: add support for queries --- libavutil/vulkan.c | 118 +++++++++++++++++++++++++++++++++++++++++++++ libavutil/vulkan.h | 30 ++++++++++++ 2 files changed, 148 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index b5e08ecc46..de0c300c0e 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -592,6 +592,114 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, return 0; } +int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, + int nb_queries, VkQueryType type, + int elem_64bits, void *create_pnext) +{ + VkResult ret; + size_t qd_size; + int nb_results = nb_queries; + int nb_statuses = 0 /* Once RADV has support, = nb_queries */; + int status_stride = 2; + int result_elem_size = elem_64bits ? 8 : 4; + FFVulkanFunctions *vk = &s->vkfn; + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = create_pnext, + .queryType = type, + .queryCount = nb_queries*e->qf->nb_queues, + }; + + if (e->query.pool) + return AVERROR(EINVAL); + + /* Video encode quieries produce two results per query */ + if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) { + status_stride = 3; /* skip,skip,result,skip,skip,result */ + nb_results *= 2; + } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + status_stride = 1; + nb_results *= 0; + } + + qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size; + + e->query.data = av_mallocz(e->qf->nb_queues*qd_size); + if (!e->query.data) + return AVERROR(ENOMEM); + + ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, + s->hwctx->alloc, &e->query.pool); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + e->query.data_per_queue = qd_size; + e->query.nb_queries = nb_queries; + e->query.nb_results = nb_results; + e->query.nb_statuses = nb_statuses; + e->query.elem_64bits = elem_64bits; + e->query.status_stride = status_stride; + + return 0; +} + +int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, + int query_idx, void **data, int64_t *status) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + uint8_t *qd; + int32_t *res32; + int64_t *res64; + int64_t res = 0; + VkQueryResultFlags qf = 0; + FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + + if (!q->submitted) { + *data = NULL; + return 0; + } + + qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue; + qf |= e->query.nb_results && e->query.nb_statuses ? + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; + qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0; + res32 = (int32_t *)(qd + e->query.nb_results*4); + res64 = (int64_t *)(qd + e->query.nb_results*8); + + ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool, + query_idx, + e->query.nb_queries, + e->query.data_per_queue, qd, + e->query.elem_64bits ? 8 : 4, qf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (e->query.nb_statuses && e->query.elem_64bits) { + for (int i = 0; i < e->query.nb_queries; i++) { + res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? + res64[i] : res; + res64 += e->query.status_stride; + } + } else if (e->query.nb_statuses) { + for (int i = 0; i < e->query.nb_queries; i++) { + res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? + res32[i] : res; + res32 += e->query.status_stride; + } + } + + if (data) + *data = qd; + if (status) + *status = res; + + return 0; +} + void ff_vk_discard_exec_deps(FFVkExecContext *e) { FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; @@ -648,6 +756,12 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) return AVERROR_EXTERNAL; } + if (e->query.pool) { + e->query.idx = e->qf->cur_queue*e->query.nb_queries; + vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool, + e->query.idx, e->query.nb_queries); + } + return 0; } @@ -790,6 +904,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) for (int i = 0; i < e->sem_sig_cnt; i++) *e->sem_sig_val_dst[i] += 1; + e->query.idx = e->qf->cur_queue*e->query.nb_queries; q->submitted = 1; return 0; @@ -1483,7 +1598,10 @@ static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); if (e->pool) vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); + if (e->query.pool) + vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc); + av_freep(&e->query.data); av_freep(&e->bufs); av_freep(&e->queues); av_freep(&e->sem_sig); diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 9ee9469305..e222f67b5a 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -164,6 +164,19 @@ typedef struct FFVkExecContext { VkCommandBuffer *bufs; FFVkQueueCtx *queues; + struct { + int idx; + VkQueryPool pool; + uint8_t *data; + + int nb_queries; + int nb_results; + int nb_statuses; + int elem_64bits; + size_t data_per_queue; + int status_stride; + } query; + AVBufferRef ***deps; int *nb_deps; int *dep_alloc_size; @@ -367,6 +380,23 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, FFVkQueueFamilyCtx *qf); +/** + * Create a query pool for a command context. + * elem_64bits exists to troll driver devs for compliance. All results + * and statuses returned should be 32 bits, unless this is set, then it's 64bits. + */ +int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, + int nb_queries, VkQueryType type, + int elem_64bits, void *create_pnext); + +/** + * Get results for query. + * Returns the status of the query. + * Sets *res to the status of the queries. + */ +int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, + int query_idx, void **data, int64_t *status); + /** * Begin recording to the command buffer. Previous execution must have been * completed, which ff_vk_submit_exec_queue() will ensure. -- 2.39.2 [-- Attachment #47: 0046-vulkan-add-support-for-retrieving-queue-query-and-vi.patch --] [-- Type: text/x-diff, Size: 7602 bytes --] From 5422a554ad592c3b4a68c34490db201577f295ee Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 22 Dec 2022 17:37:51 +0100 Subject: [PATCH 46/72] vulkan: add support for retrieving queue, query and video properties --- libavutil/vulkan.c | 87 ++++++++++++++++++++++++++++++------ libavutil/vulkan.h | 14 ++++-- libavutil/vulkan_functions.h | 1 + 3 files changed, 85 insertions(+), 17 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index de0c300c0e..d045ff83c1 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res) #undef CASE } -void ff_vk_load_props(FFVulkanContext *s) +int ff_vk_load_props(FFVulkanContext *s) { + uint32_t qc = 0; FFVulkanFunctions *vk = &s->vkfn; s->driver_props = (VkPhysicalDeviceDriverProperties) { @@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s) .pNext = &s->driver_props, }; + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); + + if (s->qf_props) + return 0; + + s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc); + if (!s->qf_props) + return AVERROR(ENOMEM); + + s->query_props = av_mallocz(sizeof(*s->query_props)*qc); + if (!s->qf_props) { + av_freep(&s->qf_props); + return AVERROR(ENOMEM); + } + + s->video_props = av_mallocz(sizeof(*s->video_props)*qc); + if (!s->video_props) { + av_freep(&s->qf_props); + av_freep(&s->query_props); + return AVERROR(ENOMEM); + } + + for (uint32_t i = 0; i < qc; i++) { + s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR, + }; + s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, + .pNext = &s->query_props[i], + }; + s->qf_props[i] = (VkQueueFamilyProperties2) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, + .pNext = &s->video_props[i], + }; + } + + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); + + return 0; } void ff_vk_qf_fill(FFVulkanContext *s) @@ -149,40 +190,54 @@ void ff_vk_qf_fill(FFVulkanContext *s) s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; } -void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues) +int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) { + int ret, num; + switch (dev_family) { case VK_QUEUE_GRAPHICS_BIT: - qf->queue_family = s->hwctx->queue_family_index; - qf->actual_queues = s->hwctx->nb_graphics_queues; + ret = s->hwctx->queue_family_index; + num = s->hwctx->nb_graphics_queues; break; case VK_QUEUE_COMPUTE_BIT: - qf->queue_family = s->hwctx->queue_family_comp_index; - qf->actual_queues = s->hwctx->nb_comp_queues; + ret = s->hwctx->queue_family_comp_index; + num = s->hwctx->nb_comp_queues; break; case VK_QUEUE_TRANSFER_BIT: - qf->queue_family = s->hwctx->queue_family_tx_index; - qf->actual_queues = s->hwctx->nb_tx_queues; + ret = s->hwctx->queue_family_tx_index; + num = s->hwctx->nb_tx_queues; break; case VK_QUEUE_VIDEO_ENCODE_BIT_KHR: - qf->queue_family = s->hwctx->queue_family_encode_index; - qf->actual_queues = s->hwctx->nb_encode_queues; + ret = s->hwctx->queue_family_encode_index; + num = s->hwctx->nb_encode_queues; break; case VK_QUEUE_VIDEO_DECODE_BIT_KHR: - qf->queue_family = s->hwctx->queue_family_decode_index; - qf->actual_queues = s->hwctx->nb_decode_queues; + ret = s->hwctx->queue_family_decode_index; + num = s->hwctx->nb_decode_queues; break; default: av_assert0(0); /* Should never happen */ } + if (nb) + *nb = num; + + return ret; +} + +int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family, int nb_queues) +{ + int ret; + + ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues); + if (!nb_queues) qf->nb_queues = qf->actual_queues; else qf->nb_queues = nb_queues; - return; + return ret; } void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) @@ -1669,6 +1724,10 @@ void ff_vk_uninit(FFVulkanContext *s) { FFVulkanFunctions *vk = &s->vkfn; + av_freep(&s->query_props); + av_freep(&s->qf_props); + av_freep(&s->video_props); + if (s->spirv_compiler) s->spirv_compiler->uninit(&s->spirv_compiler); diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index e222f67b5a..ccfa88f44f 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -212,6 +212,9 @@ typedef struct FFVulkanContext { VkPhysicalDeviceProperties2 props; VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; + VkQueueFamilyVideoPropertiesKHR *video_props; + VkQueueFamilyProperties2 *qf_props; AVBufferRef *device_ref; AVHWDeviceContext *device; @@ -259,7 +262,7 @@ const char *ff_vk_ret2str(VkResult res); /** * Loads props/mprops/driver_props */ -void ff_vk_load_props(FFVulkanContext *s); +int ff_vk_load_props(FFVulkanContext *s); /** * Returns 1 if the image is any sort of supported RGB @@ -284,12 +287,17 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, VkMemoryPropertyFlagBits req_flags, void *alloc_extension, VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +/** + * Get a queue family index and the number of queues. nb is optional. + */ +int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb); + /** * Initialize a queue family with a specific number of queues. * If nb_queues == 0, use however many queues the queue family has. */ -void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues); +int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family, int nb_queues); /** * Rotate through the queues in a queue family. diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 65ab560d21..fa1650e895 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -77,6 +77,7 @@ typedef enum FFVulkanExtensions { MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties2) \ \ /* Command pool */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateCommandPool) \ -- 2.39.2 [-- Attachment #48: 0047-vulkan-return-current-queue-index-from-ff_vk_qf_rota.patch --] [-- Type: text/x-diff, Size: 1290 bytes --] From 4632426c65f136ef70c4ab854a1076e1d1c868ff Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 28 Dec 2022 05:55:53 +0100 Subject: [PATCH 47/72] vulkan: return current queue index from ff_vk_qf_rotate() --- libavutil/vulkan.c | 3 ++- libavutil/vulkan.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index d045ff83c1..cb8e08e02f 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -240,9 +240,10 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, return ret; } -void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) +int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) { qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; + return qf->cur_queue; } int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index ccfa88f44f..dd1bc9c440 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -302,7 +302,7 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, /** * Rotate through the queues in a queue family. */ -void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); +int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); /** * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() -- 2.39.2 [-- Attachment #49: 0048-vulkan-rewrite-dependency-handling-code.patch --] [-- Type: text/x-diff, Size: 82373 bytes --] From c1e607011ac764b46875add61c533ab2e49ab00e Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 29 Dec 2022 21:16:21 +0100 Subject: [PATCH 48/72] vulkan: rewrite dependency handling code --- libavutil/vulkan.c | 1350 ++++++++++++++++++++------------------------ libavutil/vulkan.h | 382 +++++-------- 2 files changed, 749 insertions(+), 983 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index cb8e08e02f..17a5bd6f3f 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -27,27 +27,6 @@ #include "vulkan_shaderc.c" #endif -/* Generic macro for creating contexts which need to keep their addresses - * if another context is created. */ -#define FN_CREATING(ctx, type, shortname, array, num) \ -static av_always_inline type *create_ ##shortname(ctx *dctx) \ -{ \ - type **array, *sctx = av_mallocz(sizeof(*sctx)); \ - if (!sctx) \ - return NULL; \ - \ - array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ - if (!array) { \ - av_free(sctx); \ - return NULL; \ - } \ - \ - dctx->array = array; \ - dctx->array[dctx->num++] = sctx; \ - \ - return sctx; \ -} - const VkComponentMapping ff_comp_identity_map = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -165,32 +144,7 @@ int ff_vk_load_props(FFVulkanContext *s) return 0; } -void ff_vk_qf_fill(FFVulkanContext *s) -{ - s->nb_qfs = 0; - - /* Simply fills in all unique queues into s->qfs */ - if (s->hwctx->queue_family_index >= 0) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; - if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; - if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && - s->qfs[1] != s->hwctx->queue_family_comp_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; - if (s->hwctx->queue_family_decode_index >= 0 && - (s->qfs[0] != s->hwctx->queue_family_decode_index && - s->qfs[1] != s->hwctx->queue_family_decode_index && - s->qfs[2] != s->hwctx->queue_family_decode_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; - if (s->hwctx->queue_family_encode_index >= 0 && - (s->qfs[0] != s->hwctx->queue_family_encode_index && - s->qfs[1] != s->hwctx->queue_family_encode_index && - s->qfs[2] != s->hwctx->queue_family_encode_index && - s->qfs[3] != s->hwctx->queue_family_encode_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; -} - -int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) +static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) { int ret, num; @@ -226,790 +180,760 @@ int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) } int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues) + VkQueueFlagBits dev_family) { - int ret; - - ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues); - - if (!nb_queues) - qf->nb_queues = qf->actual_queues; - else - qf->nb_queues = nb_queues; - - return ret; -} + /* Fill in queue families from context if not done yet */ + if (!s->nb_qfs) { + s->nb_qfs = 0; + + /* Simply fills in all unique queues into s->qfs */ + if (s->hwctx->queue_family_index >= 0) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; + if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; + if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && + s->qfs[1] != s->hwctx->queue_family_comp_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; + if (s->hwctx->queue_family_decode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_decode_index && + s->qfs[1] != s->hwctx->queue_family_decode_index && + s->qfs[2] != s->hwctx->queue_family_decode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; + if (s->hwctx->queue_family_encode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_encode_index && + s->qfs[1] != s->hwctx->queue_family_encode_index && + s->qfs[2] != s->hwctx->queue_family_encode_index && + s->qfs[3] != s->hwctx->queue_family_encode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; + } -int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) -{ - qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; - return qf->cur_queue; + return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues)); } -int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) { - VkResult ret; - int index = -1; FFVulkanFunctions *vk = &s->vkfn; - VkMemoryAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = alloc_extension, - }; - - /* Align if we need to */ - if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment); - - alloc_info.allocationSize = req->size; - - /* The vulkan spec requires memory types to be sorted in the "optimal" - * order, so the first matching type we find will be the best/fastest one */ - for (int i = 0; i < s->mprops.memoryTypeCount; i++) { - /* The memory type must be supported by the requirements (bitfield) */ - if (!(req->memoryTypeBits & (1 << i))) - continue; - - /* The memory type flags must include our properties */ - if ((req_flags != UINT32_MAX) && - ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)) - continue; + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; - /* Found a suitable memory type */ - index = i; - break; - } + if (e->fence) { + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + } - if (index < 0) { - av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", - req_flags); - return AVERROR(EINVAL); + ff_vk_exec_discard_deps(s, e); + + av_free(e->frame_deps); + av_free(e->buf_deps); + av_free(e->queue_family_dst); + av_free(e->layout_dst); + av_free(e->access_dst); + av_free(e->frame_update); + av_free(e->frame_locked); + av_free(e->sem_sig); + av_free(e->sem_sig_val); + av_free(e->sem_sig_val_dst); + av_free(e->sem_wait); + av_free(e->sem_wait_dst); + av_free(e->sem_wait_val); } - alloc_info.memoryTypeIndex = index; - - ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, - s->hwctx->alloc, mem); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n", - ff_vk_ret2str(ret)); - return AVERROR(ENOMEM); - } - - if (mem_flags) - *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; - - return 0; + if (pool->cmd_bufs) + vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool, + pool->pool_size, pool->cmd_bufs); + if (pool->cmd_buf_pool) + vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc); + if (pool->query_pool) + vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc); + + av_free(pool->query_data); + av_free(pool->cmd_bufs); + av_free(pool->contexts); } -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, - void *pNext, void *alloc_pNext, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + void *query_create_pnext) { int err; VkResult ret; - int use_ded_mem; FFVulkanFunctions *vk = &s->vkfn; - VkBufferCreateInfo buf_spawn = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = pNext, - .usage = usage, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .size = size, /* Gets FFALIGNED during alloc if host visible - but should be ok */ - }; + VkCommandPoolCreateInfo cqueue_create; + VkCommandBufferAllocateInfo cbuf_create; - VkBufferMemoryRequirementsInfo2 req_desc = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, - }; - VkMemoryDedicatedAllocateInfo ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = alloc_pNext, - }; - VkMemoryDedicatedRequirements ded_req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, - }; - VkMemoryRequirements2 req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, - .pNext = &ded_req, - }; + atomic_init(&pool->idx, 0); - ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + /* Create command pool */ + cqueue_create = (VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = qf->queue_family, + }; + ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &pool->cmd_buf_pool); if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n", + av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + err = AVERROR_EXTERNAL; + goto fail; } - req_desc.buffer = buf->buf; - - vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req); - - /* In case the implementation prefers/requires dedicated allocation */ - use_ded_mem = ded_req.prefersDedicatedAllocation | - ded_req.requiresDedicatedAllocation; - if (use_ded_mem) - ded_alloc.buffer = buf->buf; - - err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &buf->flags, &buf->mem); - if (err) - return err; + /* Allocate space for command buffers */ + pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs)); + if (!pool->cmd_bufs) { + err = AVERROR(ENOMEM); + goto fail; + } - ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); + /* Allocate command buffer */ + cbuf_create = (VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandPool = pool->cmd_buf_pool, + .commandBufferCount = nb_contexts, + }; + ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, + pool->cmd_bufs); if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", + av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + err = AVERROR_EXTERNAL; + goto fail; } - buf->size = size; + /* Query pool */ + if (nb_queries) { + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = query_create_pnext, + .queryType = query_type, + .queryCount = nb_queries*nb_contexts, + }; + ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, + s->hwctx->alloc, &pool->query_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } - return 0; -} + pool->nb_queries = nb_queries; + pool->query_status_stride = 2; + pool->query_results = nb_queries; + pool->query_statuses = 0; /* if radv supports it, nb_queries; */ + + /* Video encode quieries produce two results per query */ + if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) { + pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */ + pool->query_results *= 2; + } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + pool->query_status_stride = 1; + pool->query_results = 0; + pool->query_statuses = nb_queries; + } -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], - int nb_buffers, int invalidate) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *inval_list = NULL; - int inval_count = 0; + pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); - for (int i = 0; i < nb_buffers; i++) { - ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0, - VK_WHOLE_SIZE, 0, (void **)&mem[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + /* Allocate space for the query data */ + pool->query_data = av_mallocz(nb_contexts*pool->qd_size); + if (!pool->query_data) { + err = AVERROR(ENOMEM); + goto fail; } } - if (!invalidate) - return 0; - - for (int i = 0; i < nb_buffers; i++) { - const VkMappedMemoryRange ival_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, - .size = VK_WHOLE_SIZE, - }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) - continue; - inval_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++inval_count)*sizeof(*inval_list)); - if (!inval_list) - return AVERROR(ENOMEM); - inval_list[inval_count - 1] = ival_buf; + /* Allocate space for the contexts */ + pool->contexts = av_mallocz(nb_contexts*sizeof(*pool->contexts)); + if (!pool->contexts) { + err = AVERROR(ENOMEM); + goto fail; } - if (inval_count) { - ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, - inval_list); + pool->pool_size = nb_contexts; + + /* Init contexts */ + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + + /* Fence */ + VkFenceCreateInfo fence_create = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc, + &e->fence); if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", + av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } + + e->parent = pool; + + /* Query data */ + e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i; + e->query_idx = nb_queries*i; + + /* Command buffer */ + e->buf = pool->cmd_bufs[i]; + + /* Queue index distribution */ + e->qi = i % qf->nb_queues; + e->qf = qf->queue_family; + vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, + e->qi, &e->queue); } return 0; + +fail: + ff_vk_exec_pool_free(s, pool); + return err; } -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, - int flush) +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status) { - int err = 0; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *flush_list = NULL; - int flush_count = 0; + const FFVkExecPool *pool = e->parent; - if (flush) { - for (int i = 0; i < nb_buffers; i++) { - const VkMappedMemoryRange flush_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, - .size = VK_WHOLE_SIZE, - }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) - continue; - flush_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++flush_count)*sizeof(*flush_list)); - if (!flush_list) - return AVERROR(ENOMEM); - flush_list[flush_count - 1] = flush_buf; - } - } + int32_t *res32; + int64_t *res64; + int64_t res = 0; + VkQueryResultFlags qf = 0; - if (flush_count) { - ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, - flush_list); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ + qf |= pool->query_64bit ? + VK_QUERY_RESULT_64_BIT : 0x0; + qf |= pool->query_statuses ? + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; + + ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool, + e->query_idx, + pool->nb_queries, + pool->qd_size, e->query_data, + pool->query_64bit ? 8 : 4, qf); + if (ret != VK_SUCCESS) + return ret; + + if (pool->query_statuses && pool->query_64bit) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? + res64[i] : res; + res64 += pool->query_status_stride; + } + } else if (pool->query_statuses) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? + res32[i] : res; + res32 += pool->query_status_stride; } } - for (int i = 0; i < nb_buffers; i++) - vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem); + if (data) + *data = e->query_data; + if (status) + *status = res; - return err; + return VK_SUCCESS; } -void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool) { - FFVulkanFunctions *vk = &s->vkfn; - - if (!buf || !s->hwctx) - return; + int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed); + idx %= pool->pool_size; + return &pool->contexts[idx]; +} - if (buf->buf != VK_NULL_HANDLE) - vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); - if (buf->mem != VK_NULL_HANDLE) - vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); } -int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx, - int width, int height, VkFormat fmt, VkImageTiling tiling, - VkImageUsageFlagBits usage, VkImageCreateFlags flags, - void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext) +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e) { - int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - AVVulkanDeviceContext *hwctx = s->hwctx; - - VkExportSemaphoreCreateInfo ext_sem_info = { - .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, -#ifdef _WIN32 - .handleTypes = IsWindows8OrGreater() - ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, -#else - .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, -#endif - }; + const FFVkExecPool *pool = e->parent; - VkSemaphoreTypeCreateInfo sem_type_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, -#ifdef _WIN32 - .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, -#else - .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, -#endif - .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, - .initialValue = 0, + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &sem_type_info, - }; + /* Create the fence and don't wait for it initially */ + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(s->hwctx->act_dev, 1, &e->fence); - /* Create the image */ - VkImageCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = create_pnext, - .imageType = VK_IMAGE_TYPE_2D, - .format = fmt, - .extent.depth = 1, - .mipLevels = 1, - .arrayLayers = 1, - .flags = flags, - .tiling = tiling, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .usage = usage, - .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = s->qfs, - .queueFamilyIndexCount = s->nb_qfs, - .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, - }; + /* Discard queue dependencies */ + ff_vk_exec_discard_deps(s, e); - ret = vk->CreateImage(hwctx->act_dev, &create_info, - hwctx->alloc, &f->img[0]); + ret = vk->BeginCommandBuffer(e->buf, &cmd_start); if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n", + av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", ff_vk_ret2str(ret)); - err = AVERROR(EINVAL); - goto fail; + return AVERROR_EXTERNAL; } - /* Create semaphore */ - ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[0]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + if (pool->nb_queries) + vk->CmdResetQueryPool(e->buf, pool->query_pool, + e->query_idx, pool->nb_queries); + + return 0; +} + +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) +{ + for (int j = 0; j < e->nb_buf_deps; j++) + av_buffer_unref(&e->buf_deps[j]); + e->nb_buf_deps = 0; + + for (int j = 0; j < e->nb_frame_deps; j++) { + if (e->frame_locked[j]) { + AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data; + s->hwfc->unlock_frame(s->frames, f); + e->frame_locked[j] = 0; + e->frame_update[j] = 0; + } + av_buffer_unref(&e->frame_deps[j]); } + e->nb_frame_deps = 0; - f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0]; - f->layout[0] = create_info.initialLayout; - f->access[0] = 0x0; - f->sem_value[0] = 0; + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; +} - f->flags = 0x0; - f->tiling = tiling; +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref) +{ + AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size, + (e->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } - return 0; + e->buf_deps = dst; -fail: - return err; + for (int i = 0; i < nb_deps; i++) { + e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i]; + if (!e->buf_deps[e->nb_buf_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + e->nb_buf_deps++; + } + + return 0; } -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage) +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb, + VkPipelineStageFlagBits in_wait_dst_flag) { - VkPushConstantRange *pc; + uint8_t *frame_locked; + uint8_t *frame_update; + AVBufferRef **frame_deps; + VkImageLayout *layout_dst; + uint32_t *queue_family_dst; + VkAccessFlagBits *access_dst; + + AVVkFrame *f = (AVVkFrame *)vkfb->data; + int nb_images = ff_vk_count_images(f); + +#define ARR_REALLOC(str, arr, alloc_s, cnt) \ + do { \ + arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \ + if (!arr) { \ + ff_vk_exec_discard_deps(s, e); \ + return AVERROR(ENOMEM); \ + } \ + str->arr = arr; \ + } while (0) + + for (int i = 0; i < nb_images; i++) { + VkSemaphore *sem_wait; + uint64_t *sem_wait_val; + VkPipelineStageFlagBits *sem_wait_dst; + VkSemaphore *sem_sig; + uint64_t *sem_sig_val; + uint64_t **sem_sig_val_dst; + + ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); + ARR_REALLOC(e, sem_wait_dst, &e->sem_wait_dst_alloc, e->sem_wait_cnt); + ARR_REALLOC(e, sem_wait_val, &e->sem_wait_val_alloc, e->sem_wait_cnt); + ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); + ARR_REALLOC(e, sem_sig_val, &e->sem_sig_val_alloc, e->sem_sig_cnt); + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_cnt); - pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), - pl->push_consts_num + 1); - if (!pl->push_consts) + e->sem_wait[e->sem_wait_cnt] = f->sem[i]; + e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; + e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; + e->sem_wait_cnt++; + + e->sem_sig[e->sem_sig_cnt] = f->sem[i]; + e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; + e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; + e->sem_sig_cnt++; + } + + ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps); + + ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); + + e->frame_deps[e->nb_frame_deps] = av_buffer_ref(vkfb); + if (!e->frame_deps[e->nb_frame_deps]) { + ff_vk_exec_discard_deps(s, e); return AVERROR(ENOMEM); + } - pc = &pl->push_consts[pl->push_consts_num++]; - memset(pc, 0, sizeof(*pc)); + s->hwfc->lock_frame(s->frames, f); + e->frame_locked[e->nb_frame_deps] = 1; + e->frame_update[e->nb_frame_deps] = 0; - pc->stageFlags = stage; - pc->offset = offset; - pc->size = size; + e->nb_frame_deps++; return 0; } -FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf) +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef *vkfb, + VkImageMemoryBarrier2 *bar) +{ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data == vkfb->data) + break; + av_assert0(i < e->nb_frame_deps); + + e->queue_family_dst[i] = bar->dstQueueFamilyIndex; + e->access_dst[i] = bar->dstAccessMask; + e->layout_dst[i] = bar->newLayout; + e->frame_update[i] = 1; +} + +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) { VkResult ret; - FFVkExecContext *e; FFVulkanFunctions *vk = &s->vkfn; - VkCommandPoolCreateInfo cqueue_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = qf->queue_family, - }; - VkCommandBufferAllocateInfo cbuf_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = qf->nb_queues, + VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .pWaitSemaphoreValues = e->sem_wait_val, + .pSignalSemaphoreValues = e->sem_sig_val, + .waitSemaphoreValueCount = e->sem_wait_cnt, + .signalSemaphoreValueCount = e->sem_sig_cnt, }; - e = create_exec_ctx(s); - if (!e) - return AVERROR(ENOMEM); + VkSubmitInfo s_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &s_timeline_sem_info, - e->qf = qf; + .commandBufferCount = 1, + .pCommandBuffers = &e->buf, - e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues)); - if (!e->queues) - return AVERROR(ENOMEM); + .pWaitSemaphores = e->sem_wait, + .pWaitDstStageMask = e->sem_wait_dst, + .waitSemaphoreCount = e->sem_wait_cnt, - e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs)); - if (!e->bufs) - return AVERROR(ENOMEM); + .pSignalSemaphores = e->sem_sig, + .signalSemaphoreCount = e->sem_sig_cnt, + }; - /* Create command pool */ - ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, - s->hwctx->alloc, &e->pool); + ret = vk->EndCommandBuffer(e->buf); if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", + av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); return AVERROR_EXTERNAL; } - cbuf_create.commandPool = e->pool; + s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, e->qf, e->qi); + ret = vk->QueueSubmit(e->queue, 1, &s_info, e->fence); + s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, e->qf, e->qi); - /* Allocate command buffer */ - ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs); if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); return AVERROR_EXTERNAL; } - for (int i = 0; i < qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, - i % qf->actual_queues, &q->queue); - } + for (int i = 0; i < e->sem_sig_cnt; i++) + *e->sem_sig_val_dst[i] += 1; - *ctx = e; + /* Unlock all frames */ + for (int j = 0; j < e->nb_frame_deps; j++) { + if (e->frame_locked[j]) { + AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data; + if (e->frame_update[j]) { + int nb_images = ff_vk_count_images(f); + for (int i = 0; i < nb_images; i++) { + f->layout[i] = e->layout_dst[j]; + f->access[i] = e->access_dst[j]; + f->queue_family[i] = e->queue_family_dst[j]; + } + } + s->hwfc->unlock_frame(s->frames, f); + e->frame_locked[j] = 0; + } + } return 0; } -int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, - int nb_queries, VkQueryType type, - int elem_64bits, void *create_pnext) +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) { VkResult ret; - size_t qd_size; - int nb_results = nb_queries; - int nb_statuses = 0 /* Once RADV has support, = nb_queries */; - int status_stride = 2; - int result_elem_size = elem_64bits ? 8 : 4; + int index = -1; FFVulkanFunctions *vk = &s->vkfn; - VkQueryPoolCreateInfo query_pool_info = { - .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, - .pNext = create_pnext, - .queryType = type, - .queryCount = nb_queries*e->qf->nb_queues, - }; - if (e->query.pool) - return AVERROR(EINVAL); - - /* Video encode quieries produce two results per query */ - if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) { - status_stride = 3; /* skip,skip,result,skip,skip,result */ - nb_results *= 2; - } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { - status_stride = 1; - nb_results *= 0; - } - - qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size; - - e->query.data = av_mallocz(e->qf->nb_queues*qd_size); - if (!e->query.data) - return AVERROR(ENOMEM); - - ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, - s->hwctx->alloc, &e->query.pool); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + }; - e->query.data_per_queue = qd_size; - e->query.nb_queries = nb_queries; - e->query.nb_results = nb_results; - e->query.nb_statuses = nb_statuses; - e->query.elem_64bits = elem_64bits; - e->query.status_stride = status_stride; + /* Align if we need to */ + if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment); - return 0; -} + alloc_info.allocationSize = req->size; -int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, - int query_idx, void **data, int64_t *status) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - uint8_t *qd; - int32_t *res32; - int64_t *res64; - int64_t res = 0; - VkQueryResultFlags qf = 0; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + /* The vulkan spec requires memory types to be sorted in the "optimal" + * order, so the first matching type we find will be the best/fastest one */ + for (int i = 0; i < s->mprops.memoryTypeCount; i++) { + /* The memory type must be supported by the requirements (bitfield) */ + if (!(req->memoryTypeBits & (1 << i))) + continue; - if (!q->submitted) { - *data = NULL; - return 0; - } + /* The memory type flags must include our properties */ + if ((req_flags != UINT32_MAX) && + ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)) + continue; - qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue; - qf |= e->query.nb_results && e->query.nb_statuses ? - VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; - qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0; - res32 = (int32_t *)(qd + e->query.nb_results*4); - res64 = (int64_t *)(qd + e->query.nb_results*8); - - ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool, - query_idx, - e->query.nb_queries, - e->query.data_per_queue, qd, - e->query.elem_64bits ? 8 : 4, qf); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + /* Found a suitable memory type */ + index = i; + break; } - if (e->query.nb_statuses && e->query.elem_64bits) { - for (int i = 0; i < e->query.nb_queries; i++) { - res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? - res64[i] : res; - res64 += e->query.status_stride; - } - } else if (e->query.nb_statuses) { - for (int i = 0; i < e->query.nb_queries; i++) { - res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? - res32[i] : res; - res32 += e->query.status_stride; - } + if (index < 0) { + av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + req_flags); + return AVERROR(EINVAL); } - if (data) - *data = qd; - if (status) - *status = res; - - return 0; -} - -void ff_vk_discard_exec_deps(FFVkExecContext *e) -{ - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + alloc_info.memoryTypeIndex = index; - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - q->nb_buf_deps = 0; + ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, + s->hwctx->alloc, mem); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR(ENOMEM); + } - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - q->nb_frame_deps = 0; + if (mem_flags) + *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; - e->sem_wait_cnt = 0; - e->sem_sig_cnt = 0; + return 0; } -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) { + int err; VkResult ret; + int use_ded_mem; FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = pNext, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .size = size, /* Gets FFALIGNED during alloc if host visible + but should be ok */ }; - /* Create the fence and don't wait for it initially */ - if (!q->fence) { - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; - ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc, - &q->fence); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } else if (!q->synchronous) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); + VkBufferMemoryRequirementsInfo2 req_desc = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, + }; + VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = alloc_pNext, + }; + VkMemoryDedicatedRequirements ded_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + }; + VkMemoryRequirements2 req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = &ded_req, + }; + + ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; } - q->synchronous = 0; + req_desc.buffer = buf->buf; - /* Discard queue dependencies */ - ff_vk_discard_exec_deps(e); + vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req); + + /* In case the implementation prefers/requires dedicated allocation */ + use_ded_mem = ded_req.prefersDedicatedAllocation | + ded_req.requiresDedicatedAllocation; + if (use_ded_mem) + ded_alloc.buffer = buf->buf; + + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, + use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + &buf->flags, &buf->mem); + if (err) + return err; - ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start); + ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } - if (e->query.pool) { - e->query.idx = e->qf->cur_queue*e->query.nb_queries; - vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool, - e->query.idx, e->query.nb_queries); - } + buf->size = size; return 0; } -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e) -{ - return e->bufs[e->qf->cur_queue]; -} - -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag) +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate) { - AVFrame **dst; - AVVkFrame *f = (AVVkFrame *)frame->data[0]; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; - int planes = av_pix_fmt_count_planes(fc->sw_format); - - for (int i = 0; i < planes; i++) { - e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); - if (!e->sem_wait) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); - if (!e->sem_wait_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val)); - if (!e->sem_wait_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); - if (!e->sem_sig) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val)); - if (!e->sem_sig_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkMappedMemoryRange inval_list[64]; + int inval_count = 0; - e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst)); - if (!e->sem_sig_val_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); + for (int i = 0; i < nb_buffers; i++) { + ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0, + VK_WHOLE_SIZE, 0, (void **)&mem[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; } - - e->sem_wait[e->sem_wait_cnt] = f->sem[i]; - e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; - e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; - e->sem_wait_cnt++; - - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; - e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; - e->sem_sig_cnt++; } - dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size, - (q->nb_frame_deps + 1) * sizeof(*dst)); - if (!dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); + if (!invalidate) + return 0; + + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange ival_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + inval_list[inval_count++] = ival_buf; } - q->frame_deps = dst; - q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame); - if (!q->frame_deps[q->nb_frame_deps]) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); + if (inval_count) { + ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, + inval_list); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } } - q->nb_frame_deps++; return 0; } -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, + int flush) { + int err = 0; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pWaitSemaphoreValues = e->sem_wait_val, - .pSignalSemaphoreValues = e->sem_sig_val, - .waitSemaphoreValueCount = e->sem_wait_cnt, - .signalSemaphoreValueCount = e->sem_sig_cnt, - }; - - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, - - .commandBufferCount = 1, - .pCommandBuffers = &e->bufs[e->qf->cur_queue], - - .pWaitSemaphores = e->sem_wait, - .pWaitDstStageMask = e->sem_wait_dst, - .waitSemaphoreCount = e->sem_wait_cnt, - - .pSignalSemaphores = e->sem_sig, - .signalSemaphoreCount = e->sem_sig_cnt, - }; + VkMappedMemoryRange flush_list[64]; + int flush_count = 0; - ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + if (flush) { + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + flush_list[flush_count++] = flush_buf; + } } - s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, - e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); - - ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); - - s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, - e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); - - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + if (flush_count) { + ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, + flush_list); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ + } } - for (int i = 0; i < e->sem_sig_cnt; i++) - *e->sem_sig_val_dst[i] += 1; - - e->query.idx = e->qf->cur_queue*e->query.nb_queries; - q->submitted = 1; + for (int i = 0; i < nb_buffers; i++) + vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem); - return 0; + return err; } -void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) +void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) { FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - if (!q->submitted) + + if (!buf || !s->hwctx) return; - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - q->synchronous = 1; + if (buf->buf != VK_NULL_HANDLE) + vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); + if (buf->mem != VK_NULL_HANDLE) + vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps) +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage) { - AVBufferRef **dst; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - if (!deps || !nb_deps) - return 0; + VkPushConstantRange *pc; - dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, - (q->nb_buf_deps + nb_deps) * sizeof(*dst)); - if (!dst) - goto err; + pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), + pl->push_consts_num + 1); + if (!pl->push_consts) + return AVERROR(ENOMEM); - q->buf_deps = dst; + pc = &pl->push_consts[pl->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); - for (int i = 0; i < nb_deps; i++) { - q->buf_deps[q->nb_buf_deps] = deps[i]; - if (!q->buf_deps[q->nb_buf_deps]) - goto err; - q->nb_buf_deps++; - } + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; return 0; - -err: - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); } -FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num) -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, +FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx, int unnorm_coords, VkFilter filt) { VkResult ret; @@ -1030,10 +954,6 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, .unnormalizedCoordinates = unnorm_coords, }; - FFVkSampler *sctx = create_sampler(s); - if (!sctx) - return NULL; - ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, s->hwctx->alloc, &sctx->sampler[0]); if (ret != VK_SUCCESS) { @@ -1048,6 +968,13 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, return sctx; } +void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->DestroySampler(s->hwctx->act_dev, sctx->sampler[0], + s->hwctx->alloc); +} + int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) { if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA || @@ -1122,7 +1049,7 @@ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, } /* Add to queue dependencies */ - err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1); + err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); if (err) { av_buffer_unref(&buf); return err; @@ -1133,14 +1060,9 @@ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, return 0; } -FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num) -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage) +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage) { - FFVkSPIRVShader *shd = create_shader(pl); - if (!shd) - return NULL; - av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -1152,10 +1074,10 @@ FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); GLSLC(0, ); - return shd; + return 0; } -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]) +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]) { shd->local_size[0] = local_size[0]; shd->local_size[1] = local_size[1]; @@ -1166,7 +1088,7 @@ void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]) shd->local_size[0], shd->local_size[1], shd->local_size[2]); } -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio) { int line = 0; const char *p = shd->src.str; @@ -1188,7 +1110,13 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) av_bprint_finalize(&buf, NULL); } -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc); +} + +int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd, const char *entrypoint) { int err; @@ -1437,7 +1365,7 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, return; } - set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; +// set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, pl->desc_set[set_id], @@ -1446,12 +1374,12 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, } void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src) + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src) { FFVulkanFunctions *vk = &s->vkfn; - - vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout, + vk->CmdPushConstants(e->buf, pl->pipeline_layout, stage, offset, size, src); } @@ -1558,17 +1486,8 @@ int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) return 0; } -FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num) -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf) -{ - FFVulkanPipeline *pl = create_pipeline(s); - if (pl) - pl->qf = qf; - - return pl; -} - -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkQueueFamilyCtx *qf) { int i; VkResult ret; @@ -1579,6 +1498,8 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) .layout = pl->pipeline_layout, }; + pl->qf = qf; + for (i = 0; i < pl->shaders_num; i++) { if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { pipe.stage = pl->shaders[i]->shader; @@ -1603,73 +1524,24 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) return 0; } -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, +void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; - vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline); + vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); - for (int i = 0; i < pl->descriptor_sets_num; i++) - pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; +// for (int i = 0; i < pl->descriptor_sets_num; i++) + // pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; - vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point, + vk->CmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, 0, pl->descriptor_sets_num, (VkDescriptorSet *)pl->desc_staging, 0, NULL); - - e->bound_pl = pl; -} - -static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - - /* Make sure all queues have finished executing */ - for (int i = 0; i < e->qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - - if (q->fence) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - /* Free the fence */ - if (q->fence) - vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc); - - /* Free buffer dependencies */ - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - av_free(q->buf_deps); - - /* Free frame dependencies */ - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - av_free(q->frame_deps); - } - - if (e->bufs) - vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); - if (e->pool) - vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); - if (e->query.pool) - vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc); - - av_freep(&e->query.data); - av_freep(&e->bufs); - av_freep(&e->queues); - av_freep(&e->sem_sig); - av_freep(&e->sem_sig_val); - av_freep(&e->sem_sig_val_dst); - av_freep(&e->sem_wait); - av_freep(&e->sem_wait_dst); - av_freep(&e->sem_wait_val); - av_free(e); } -static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; @@ -1723,8 +1595,6 @@ static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) void ff_vk_uninit(FFVulkanContext *s) { - FFVulkanFunctions *vk = &s->vkfn; - av_freep(&s->query_props); av_freep(&s->qf_props); av_freep(&s->video_props); @@ -1732,24 +1602,6 @@ void ff_vk_uninit(FFVulkanContext *s) if (s->spirv_compiler) s->spirv_compiler->uninit(&s->spirv_compiler); - for (int i = 0; i < s->exec_ctx_num; i++) - free_exec_ctx(s, s->exec_ctx[i]); - av_freep(&s->exec_ctx); - - for (int i = 0; i < s->samplers_num; i++) { - vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0], - s->hwctx->alloc); - av_free(s->samplers[i]); - } - av_freep(&s->samplers); - - for (int i = 0; i < s->pipelines_num; i++) - free_pipeline(s, s->pipelines[i]); - av_freep(&s->pipelines); - - av_freep(&s->scratch); - s->scratch_size = 0; - av_buffer_unref(&s->device_ref); av_buffer_unref(&s->frames_ref); } diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index dd1bc9c440..a8f3c458fc 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -21,6 +21,8 @@ #define VK_NO_PROTOTYPES +#include <stdatomic.h> + #include "pixdesc.h" #include "bprint.h" #include "hwcontext.h" @@ -100,8 +102,6 @@ typedef struct FFVkBuffer { typedef struct FFVkQueueFamilyCtx { int queue_family; int nb_queues; - int cur_queue; - int actual_queues; } FFVkQueueFamilyCtx; typedef struct FFVulkanPipeline { @@ -139,71 +139,88 @@ typedef struct FFVulkanPipeline { VkDescriptorPoolSize *pool_size_desc; } FFVulkanPipeline; -typedef struct FFVkQueueCtx { - VkFence fence; +typedef struct FFVkExecContext { + const struct FFVkExecPool *parent; + + /* Queue for the execution context */ VkQueue queue; + int qf; + int qi; - int synchronous; - int submitted; + /* Command buffer for the context */ + VkCommandBuffer buf; + + /* Fence for the command buffer */ + VkFence fence; + + void *query_data; + int query_idx; /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; - int buf_deps_alloc_size; + unsigned int buf_deps_alloc_size; /* Frame dependencies */ - AVFrame **frame_deps; + AVBufferRef **frame_deps; + unsigned int frame_deps_alloc_size; int nb_frame_deps; - int frame_deps_alloc_size; -} FFVkQueueCtx; - -typedef struct FFVkExecContext { - FFVkQueueFamilyCtx *qf; - - VkCommandPool pool; - VkCommandBuffer *bufs; - FFVkQueueCtx *queues; - - struct { - int idx; - VkQueryPool pool; - uint8_t *data; - - int nb_queries; - int nb_results; - int nb_statuses; - int elem_64bits; - size_t data_per_queue; - int status_stride; - } query; - - AVBufferRef ***deps; - int *nb_deps; - int *dep_alloc_size; - - FFVulkanPipeline *bound_pl; VkSemaphore *sem_wait; - int sem_wait_alloc; /* Allocated sem_wait */ + unsigned int sem_wait_alloc; /* Allocated sem_wait */ int sem_wait_cnt; uint64_t *sem_wait_val; - int sem_wait_val_alloc; + unsigned int sem_wait_val_alloc; VkPipelineStageFlagBits *sem_wait_dst; - int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ + unsigned int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ VkSemaphore *sem_sig; - int sem_sig_alloc; /* Allocated sem_sig */ + unsigned int sem_sig_alloc; /* Allocated sem_sig */ int sem_sig_cnt; uint64_t *sem_sig_val; - int sem_sig_val_alloc; + unsigned int sem_sig_val_alloc; uint64_t **sem_sig_val_dst; - int sem_sig_val_dst_alloc; + unsigned int sem_sig_val_dst_alloc; + + uint8_t *frame_locked; + unsigned int frame_locked_alloc_size; + + VkAccessFlagBits *access_dst; + unsigned int access_dst_alloc; + + VkImageLayout *layout_dst; + unsigned int layout_dst_alloc; + + uint32_t *queue_family_dst; + unsigned int queue_family_dst_alloc; + + uint8_t *frame_update; + unsigned int frame_update_alloc_size; } FFVkExecContext; +typedef struct FFVkExecPool { + FFVkQueueFamilyCtx *qf; + FFVkExecContext *contexts; + atomic_int_least64_t idx; + + VkCommandPool cmd_buf_pool; + VkCommandBuffer *cmd_bufs; + int pool_size; + + VkQueryPool query_pool; + void *query_data; + int query_results; + int query_statuses; + int query_64bit; + int query_status_stride; + int nb_queries; + size_t qd_size; +} FFVkExecPool; + typedef struct FFVulkanContext { const AVClass *class; /* Filters and encoders use this */ @@ -234,21 +251,6 @@ typedef struct FFVulkanContext { int output_height; enum AVPixelFormat output_format; enum AVPixelFormat input_format; - - /* Samplers */ - FFVkSampler **samplers; - int samplers_num; - - /* Exec contexts */ - FFVkExecContext **exec_ctx; - int exec_ctx_num; - - /* Pipelines (each can have 1 shader of each type) */ - FFVulkanPipeline **pipelines; - int pipelines_num; - - void *scratch; /* Scratch memory used only in functions */ - unsigned int scratch_size; } FFVulkanContext; /* Identity mapping - r = r, b = b, g = g, a = a */ @@ -260,244 +262,156 @@ extern const VkComponentMapping ff_comp_identity_map; const char *ff_vk_ret2str(VkResult res); /** - * Loads props/mprops/driver_props + * Returns 1 if pixfmt is a usable RGB format. */ -int ff_vk_load_props(FFVulkanContext *s); +int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); /** - * Returns 1 if the image is any sort of supported RGB + * Returns the format to use for images in shaders. */ -int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); /** - * Gets the glsl format string for a pixel format + * Loads props/mprops/driver_props */ -const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); +int ff_vk_load_props(FFVulkanContext *s); /** - * Setup the queue families from the hardware device context. - * Necessary for image creation to work. + * Loads queue families into the main context. + * Chooses a QF and loads it into a context. */ void ff_vk_qf_fill(FFVulkanContext *s); +int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + VkQueueFlagBits dev_family); /** - * Allocate device memory. + * Allocates/frees an execution pool. */ -int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + void *query_create_pnext); +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool); /** - * Get a queue family index and the number of queues. nb is optional. + * Retrieve an execution pool. Threadsafe. */ -int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb); +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool); /** - * Initialize a queue family with a specific number of queues. - * If nb_queues == 0, use however many queues the queue family has. + * Explicitly wait on an execution to be finished. + * Starting via ff_vk_exec_start() also waits on it. */ -int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues); /** - * Rotate through the queues in a queue family. + * Performs nb_queries queries and returns their results and statuses. + * Execution must have been waited on to produce valid results. + */ +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status); + +/** + * Start/submit/wait an execution. + * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait() + * is not necessary (unless using it is just better). */ -int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e); +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e); /** - * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() + * Execution dependency management. + * Can attach buffers to executions that will only be unref'd once the + * buffer has finished executing. + * Adding a frame dep will *lock the frame*, until either the dependencies + * are discarded, the execution is submitted, or a failure happens. + * update_frame will update the frame's properties before it is unlocked, + * only if submission was successful. */ -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords, - VkFilter filt); +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref); +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef *vkfb, VkPipelineStageFlagBits in_wait_dst_flag); +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb, + VkImageMemoryBarrier2 *bar); +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); /** - * Create an imageview. - * Guaranteed to remain alive until the queue submission has finished executing, - * and will be destroyed after that. + * Create an imageview and add it as a dependency to an execution. */ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, VkImageView *v, VkImage img, VkFormat fmt, const VkComponentMapping map); /** - * Define a push constant for a given stage into a pipeline. - * Must be called before the pipeline layout has been initialized. + * Memory/buffer/image allocation helpers. */ -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage); +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** - * Inits a pipeline. Everything in it will be auto-freed when calling - * ff_vk_filter_uninit(). + * Buffer management code. */ -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf); +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate); +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, + int flush); +void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); /** - * Inits a shader for a specific pipeline. Will be auto-freed on uninit. + * Sampler management. */ -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage); +FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx, + int unnorm_coords, VkFilter filt); +void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx); /** - * Writes the workgroup size for a shader. + * Shader management. */ -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]); +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage); +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]); +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio); +int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd, + const char *entrypoint); +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd); /** - * Adds a descriptor set to the shader and registers them in the pipeline. + * Register a descriptor set. + * Update a descriptor set for execution. */ int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, int num, int only_print_to_shader); - -/** - * Compiles the shader, entrypoint must be set to "main". - */ -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint); - -/** - * Pretty print shader, mainly used by shader compilers. - */ -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio); - -/** - * Initializes the pipeline layout after all shaders and descriptor sets have - * been finished. - */ -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl); - -/** - * Initializes a compute pipeline. Will pick the first shader with the - * COMPUTE flag set. - */ -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl); - -/** - * Updates a descriptor set via the updaters defined. - * Can be called immediately after pipeline creation, but must be called - * at least once before queue submission. - */ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, int set_id); /** - * Init an execution context for command recording and queue submission. - * WIll be auto-freed on uninit. - */ -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf); - -/** - * Create a query pool for a command context. - * elem_64bits exists to troll driver devs for compliance. All results - * and statuses returned should be 32 bits, unless this is set, then it's 64bits. - */ -int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, - int nb_queries, VkQueryType type, - int elem_64bits, void *create_pnext); - -/** - * Get results for query. - * Returns the status of the query. - * Sets *res to the status of the queries. - */ -int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, - int query_idx, void **data, int64_t *status); - -/** - * Begin recording to the command buffer. Previous execution must have been - * completed, which ff_vk_submit_exec_queue() will ensure. - */ -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Add a command to bind the completed pipeline and its descriptor sets. - * Must be called after ff_vk_start_exec_recording() and before submission. - */ -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanPipeline *pl); - -/** - * Updates push constants. - * Must be called after binding a pipeline if any push constants were defined. + * Add/update push constants for execution. */ +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage); void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src); + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src); /** - * Gets the command buffer to use for this submission from the exe context. + * Pipeline management. */ -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e); - -/** - * Adds a generic AVBufferRef as a queue depenency. - */ -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps); - -/** - * Discards all queue dependencies - */ -void ff_vk_discard_exec_deps(FFVkExecContext *e); - -/** - * Adds a frame as a queue dependency. This also manages semaphore signalling. - * Must be called before submission. - */ -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag); - -/** - * Submits a command buffer to the queue for execution. Will not block. - */ -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Wait on a command buffer's execution. Mainly useful for debugging and - * development. - */ -void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Create a VkBuffer with the specified parameters. - */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, - void *pNext, void *alloc_pNext, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); - -/** - * Maps the buffer to userspace. Set invalidate to 1 if reading the contents - * is necessary. - */ -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], - int nb_buffers, int invalidate); - -/** - * Unmaps the buffer from userspace. Set flush to 1 to write and sync. - */ -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, - int flush); - -/** - * Frees a buffer. - */ -void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); - -/** - * Creates an image, allocates and binds memory in the given - * idx value of the dst frame. If mem is non-NULL, then no memory will be - * allocated, but instead the given memory will be bound to the image. - */ -int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx, - int width, int height, VkFormat fmt, VkImageTiling tiling, - VkImageUsageFlagBits usage, VkImageCreateFlags flags, - void *create_pnext, - VkDeviceMemory *mem, void *alloc_pnext); +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkQueueFamilyCtx *qf); +int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl); +void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl); +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl); /** - * Frees the main Vulkan context. + * Frees main context. */ void ff_vk_uninit(FFVulkanContext *s); -- 2.39.2 [-- Attachment #50: 0049-vulkan-add-ff_vk_count_images.patch --] [-- Type: text/x-diff, Size: 779 bytes --] From 2aad41bb35392d7f2e300857a1b0f73b873ec601 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 11 Jan 2023 09:37:18 +0100 Subject: [PATCH 49/72] vulkan: add ff_vk_count_images() --- libavutil/vulkan.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index a8f3c458fc..e66ca59ef7 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -253,6 +253,15 @@ typedef struct FFVulkanContext { enum AVPixelFormat input_format; } FFVulkanContext; +static inline int ff_vk_count_images(AVVkFrame *f) +{ + int cnt = 0; + while (f->img[cnt]) + cnt++; + + return cnt; +} + /* Identity mapping - r = r, b = b, g = g, a = a */ extern const VkComponentMapping ff_comp_identity_map; -- 2.39.2 [-- Attachment #51: 0050-vulkan-rewrite-image-handling-code.patch --] [-- Type: text/x-diff, Size: 64405 bytes --] From dca500204539da2a17746db4125c476a29851305 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 11 Jan 2023 09:38:10 +0100 Subject: [PATCH 50/72] vulkan: rewrite image handling code --- libavutil/vulkan.c | 919 +++++++++++++++++++++++++-------------------- libavutil/vulkan.h | 166 ++++---- 2 files changed, 612 insertions(+), 473 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 17a5bd6f3f..20ad269b0a 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -21,12 +23,6 @@ #include "vulkan.h" #include "vulkan_loader.h" -#if CONFIG_LIBGLSLANG -#include "vulkan_glslang.c" -#elif CONFIG_LIBSHADERC -#include "vulkan_shaderc.c" -#endif - const VkComponentMapping ff_comp_identity_map = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -92,15 +88,22 @@ int ff_vk_load_props(FFVulkanContext *s) uint32_t qc = 0; FFVulkanFunctions *vk = &s->vkfn; + s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, + }; + s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT, + .pNext = &s->hprops, + }; s->driver_props = (VkPhysicalDeviceDriverProperties) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + .pNext = &s->desc_buf_props, }; s->props = (VkPhysicalDeviceProperties2) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &s->driver_props, }; - vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); @@ -373,6 +376,7 @@ int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, return AVERROR_EXTERNAL; } + e->idx = i; e->parent = pool; /* Query data */ @@ -496,17 +500,21 @@ void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) for (int j = 0; j < e->nb_frame_deps; j++) { if (e->frame_locked[j]) { - AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data; - s->hwfc->unlock_frame(s->frames, f); + AVFrame *f = e->frame_deps[j]; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + vkfc->unlock_frame(hwfc, vkf); e->frame_locked[j] = 0; e->frame_update[j] = 0; } - av_buffer_unref(&e->frame_deps[j]); + av_frame_free(&e->frame_deps[j]); } e->nb_frame_deps = 0; e->sem_wait_cnt = 0; e->sem_sig_cnt = 0; + e->sem_sig_val_dst_cnt = 0; } int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, @@ -533,18 +541,25 @@ int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, return 0; } -int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb, +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits in_wait_dst_flag) { uint8_t *frame_locked; uint8_t *frame_update; - AVBufferRef **frame_deps; + AVFrame **frame_deps; VkImageLayout *layout_dst; uint32_t *queue_family_dst; VkAccessFlagBits *access_dst; - AVVkFrame *f = (AVVkFrame *)vkfb->data; - int nb_images = ff_vk_count_images(f); + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + int nb_images = ff_vk_count_images(vkf); + + /* Don't add duplicates */ + for (int i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + return 1; #define ARR_REALLOC(str, arr, alloc_s, cnt) \ do { \ @@ -569,17 +584,18 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef ARR_REALLOC(e, sem_wait_val, &e->sem_wait_val_alloc, e->sem_wait_cnt); ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); ARR_REALLOC(e, sem_sig_val, &e->sem_sig_val_alloc, e->sem_sig_cnt); - ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_cnt); + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); - e->sem_wait[e->sem_wait_cnt] = f->sem[i]; + e->sem_wait[e->sem_wait_cnt] = vkf->sem[i]; e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; - e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; + e->sem_wait_val[e->sem_wait_cnt] = vkf->sem_value[i]; e->sem_wait_cnt++; - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; - e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; + e->sem_sig[e->sem_sig_cnt] = vkf->sem[i]; + e->sem_sig_val[e->sem_sig_cnt] = vkf->sem_value[i] + 1; + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i]; e->sem_sig_cnt++; + e->sem_sig_val_dst_cnt++; } ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); @@ -590,13 +606,13 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); - e->frame_deps[e->nb_frame_deps] = av_buffer_ref(vkfb); + e->frame_deps[e->nb_frame_deps] = av_frame_clone(f); if (!e->frame_deps[e->nb_frame_deps]) { ff_vk_exec_discard_deps(s, e); return AVERROR(ENOMEM); } - s->hwfc->lock_frame(s->frames, f); + vkfc->lock_frame(hwfc, vkf); e->frame_locked[e->nb_frame_deps] = 1; e->frame_update[e->nb_frame_deps] = 0; @@ -605,22 +621,51 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef return 0; } -void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef *vkfb, - VkImageMemoryBarrier2 *bar) +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar) { int i; for (i = 0; i < e->nb_frame_deps; i++) - if (e->frame_deps[i]->data == vkfb->data) + if (e->frame_deps[i]->data[0] == f->data[0]) break; av_assert0(i < e->nb_frame_deps); + /* Don't update duplicates */ + if (nb_img_bar && !e->frame_update[i]) + (*nb_img_bar)++; + e->queue_family_dst[i] = bar->dstQueueFamilyIndex; e->access_dst[i] = bar->dstAccessMask; e->layout_dst[i] = bar->newLayout; e->frame_update[i] = 1; } +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f) +{ + uint64_t **sem_sig_val_dst; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + /* Reject unknown frames */ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + if (i == e->nb_frame_deps) + return AVERROR(EINVAL); + + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + *dst = vkf->sem[0]; + *dst_val = vkf->sem_value[0]; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val; + e->sem_sig_val_dst_cnt++; + + return 0; +} + int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) { VkResult ret; @@ -668,22 +713,26 @@ int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) return AVERROR_EXTERNAL; } - for (int i = 0; i < e->sem_sig_cnt; i++) + for (int i = 0; i < e->sem_sig_val_dst_cnt; i++) *e->sem_sig_val_dst[i] += 1; /* Unlock all frames */ for (int j = 0; j < e->nb_frame_deps; j++) { if (e->frame_locked[j]) { - AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data; + AVFrame *f = e->frame_deps[j]; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + if (e->frame_update[j]) { - int nb_images = ff_vk_count_images(f); + int nb_images = ff_vk_count_images(vkf); for (int i = 0; i < nb_images; i++) { - f->layout[i] = e->layout_dst[j]; - f->access[i] = e->access_dst[j]; - f->queue_family[i] = e->queue_family_dst[j]; + vkf->layout[i] = e->layout_dst[j]; + vkf->access[i] = e->access_dst[j]; + vkf->queue_family[i] = e->queue_family_dst[j]; } } - s->hwfc->unlock_frame(s->frames, f); + vkfc->unlock_frame(hwfc, vkf); e->frame_locked[j] = 0; } } @@ -767,6 +816,10 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, but should be ok */ }; + VkMemoryAllocateFlagsInfo alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; VkBufferMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, }; @@ -796,11 +849,18 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; - if (use_ded_mem) + if (use_ded_mem) { ded_alloc.buffer = buf->buf; + ded_alloc.pNext = alloc_pNext; + alloc_pNext = &ded_alloc; + } - err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + alloc_flags.pNext = alloc_pNext; + alloc_pNext = &alloc_flags; + } + + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext, &buf->flags, &buf->mem); if (err) return err; @@ -812,6 +872,14 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, return AVERROR_EXTERNAL; } + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buf->buf, + }; + buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); + } + buf->size = size; return 0; @@ -933,8 +1001,8 @@ int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, return 0; } -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx, - int unnorm_coords, VkFilter filt) +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; @@ -955,24 +1023,14 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx, }; ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, - s->hwctx->alloc, &sctx->sampler[0]); + s->hwctx->alloc, sampler); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", ff_vk_ret2str(ret)); - return NULL; + return AVERROR_EXTERNAL; } - for (int i = 1; i < 4; i++) - sctx->sampler[i] = sctx->sampler[0]; - - return sctx; -} - -void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx) -{ - FFVulkanFunctions *vk = &s->vkfn; - vk->DestroySampler(s->hwctx->act_dev, sctx->sampler[0], - s->hwctx->alloc); + return 0; } int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) @@ -995,69 +1053,131 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) } typedef struct ImageViewCtx { - VkImageView view; + VkImageView views[AV_NUM_DATA_POINTERS]; + int nb_views; } ImageViewCtx; -static void destroy_imageview(void *opaque, uint8_t *data) +static void destroy_imageviews(void *opaque, uint8_t *data) { FFVulkanContext *s = opaque; FFVulkanFunctions *vk = &s->vkfn; ImageViewCtx *iv = (ImageViewCtx *)data; - vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc); + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); } -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map) +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f) { int err; + VkResult ret; AVBufferRef *buf; FFVulkanFunctions *vk = &s->vkfn; - - VkImageViewCreateInfo imgview_spawn = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = NULL, - .image = img, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = fmt, - .components = map, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); ImageViewCtx *iv = av_mallocz(sizeof(*iv)); - VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn, - s->hwctx->alloc, &iv->view); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + for (int i = 0; i < nb_planes; i++) { + VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = NULL, + .image = vkf->img[FFMIN(i, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = rep_fmts[i], + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = plane_aspect[(nb_planes != 1) + i*(nb_planes != 1)], + .levelCount = 1, + .layerCount = 1, + }, + }; + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, &iv->views[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + iv->nb_views++; } - buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0); + buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0); if (!buf) { - destroy_imageview(s, (uint8_t *)iv); - return AVERROR(ENOMEM); + err = AVERROR(ENOMEM); + goto fail; } /* Add to queue dependencies */ err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); - if (err) { + if (err < 0) av_buffer_unref(&buf); - return err; - } - *v = iv->view; + memcpy(views, iv->views, nb_planes*sizeof(*views)); - return 0; + return err; + +fail: + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); + return err; +} + +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf) +{ + int i, found; + AVVkFrame *vkf = (AVVkFrame *)pic->data[0]; + const int nb_images = ff_vk_count_images(vkf); + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == pic->data[0]) + break; + found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1; + + for (int i = 0; i < nb_images; i++) { + bar[*nb_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = src_stage, + .dstStageMask = dst_stage, + .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i], + .dstAccessMask = new_access, + .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0], + .newLayout = new_layout, + .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0], + .dstQueueFamilyIndex = new_qf, + .image = vkf->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + .levelCount = 1, + }, + }; + *nb_bar += 1; + } + + ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); } int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, @@ -1077,11 +1197,11 @@ int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *na return 0; } -void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]) +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z) { - shd->local_size[0] = local_size[0]; - shd->local_size[1] = local_size[1]; - shd->local_size[2] = local_size[2]; + shd->local_size[0] = x; + shd->local_size[1] = y; + shd->local_size[2] = z; av_bprintf(&shd->src, "layout (local_size_x = %i, " "local_size_y = %i, local_size_z = %i) in;\n\n", @@ -1113,39 +1233,21 @@ void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio) void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd) { FFVulkanFunctions *vk = &s->vkfn; - vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc); + av_bprint_finalize(&shd->src, NULL); + + if (shd->shader.module) + vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc); } -int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint) +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint) { - int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkShaderModuleCreateInfo shader_create; - uint8_t *spirv; - size_t spirv_size; - void *priv; shd->shader.pName = entrypoint; - if (!s->spirv_compiler) { -#if CONFIG_LIBGLSLANG - s->spirv_compiler = ff_vk_glslang_init(); -#elif CONFIG_LIBSHADERC - s->spirv_compiler = ff_vk_shaderc_init(); -#else - return AVERROR(ENOSYS); -#endif - if (!s->spirv_compiler) - return AVERROR(ENOMEM); - } - - err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv, - &spirv_size, entrypoint, &priv); - if (err < 0) - return err; - av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n", shd->name, spirv_size); @@ -1157,11 +1259,8 @@ int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd, ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, &shd->shader.module); - - s->spirv_compiler->free_shader(s->spirv_compiler, &priv); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n", + av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -1190,132 +1289,88 @@ static const struct descriptor_props { [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, }; -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader) +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only) { VkResult ret; - VkDescriptorSetLayout *layout; + int has_sampler = 0; FFVulkanFunctions *vk = &s->vkfn; + FFVulkanDescriptorSet *set; + VkDescriptorSetLayoutCreateInfo desc_create_layout; - if (only_print_to_shader) + if (print_to_shader_only) goto print; - pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), - pl->desc_layout_num + pl->qf->nb_queues); - if (!pl->desc_layout) + /* Actual layout allocated for the pipeline */ + set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set), + pl->nb_descriptor_sets + 1); + if (!set) return AVERROR(ENOMEM); + pl->desc_set = set; + set = &set[pl->nb_descriptor_sets]; + memset(set, 0, sizeof(*set)); - pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized, - sizeof(*pl->desc_set_initialized), - pl->descriptor_sets_num + 1); - if (!pl->desc_set_initialized) + set->binding = av_mallocz(nb*sizeof(*set->binding)); + if (!set->binding) return AVERROR(ENOMEM); - pl->desc_set_initialized[pl->descriptor_sets_num] = 0; - layout = &pl->desc_layout[pl->desc_layout_num]; - - { /* Create descriptor set layout descriptions */ - VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; - VkDescriptorSetLayoutBinding *desc_binding; - - desc_binding = av_mallocz(sizeof(*desc_binding)*num); - if (!desc_binding) - return AVERROR(ENOMEM); - - for (int i = 0; i < num; i++) { - desc_binding[i].binding = i; - desc_binding[i].descriptorType = desc[i].type; - desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); - desc_binding[i].stageFlags = desc[i].stages; - desc_binding[i].pImmutableSamplers = desc[i].sampler ? - desc[i].sampler->sampler : - NULL; - } - - desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - desc_create_layout.pBindings = desc_binding; - desc_create_layout.bindingCount = num; - - for (int i = 0; i < pl->qf->nb_queues; i++) { - ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, - s->hwctx->alloc, &layout[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "layout: %s\n", ff_vk_ret2str(ret)); - av_free(desc_binding); - return AVERROR_EXTERNAL; - } - } - - av_free(desc_binding); + set->binding_offset = av_mallocz(nb*sizeof(*set->binding_offset)); + if (!set->binding_offset) { + av_freep(&set->binding); + return AVERROR(ENOMEM); } - { /* Pool each descriptor by type and update pool counts */ - for (int i = 0; i < num; i++) { - int j; - for (j = 0; j < pl->pool_size_desc_num; j++) - if (pl->pool_size_desc[j].type == desc[i].type) - break; - if (j >= pl->pool_size_desc_num) { - pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, - sizeof(*pl->pool_size_desc), - ++pl->pool_size_desc_num); - if (!pl->pool_size_desc) - return AVERROR(ENOMEM); - memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); - } - pl->pool_size_desc[j].type = desc[i].type; - pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues; - } - } + desc_create_layout = (VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = nb, + .pBindings = set->binding, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + }; - { /* Create template creation struct */ - VkDescriptorUpdateTemplateCreateInfo *dt; - VkDescriptorUpdateTemplateEntry *des_entries; + for (int i = 0; i < nb; i++) { + set->binding[i].binding = i; + set->binding[i].descriptorType = desc[i].type; + set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + set->binding[i].stageFlags = desc[i].stages; + set->binding[i].pImmutableSamplers = desc[i].samplers; - /* Freed after descriptor set initialization */ - des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); - if (!des_entries) - return AVERROR(ENOMEM); + if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + has_sampler |= 1; + } - for (int i = 0; i < num; i++) { - des_entries[i].dstBinding = i; - des_entries[i].descriptorType = desc[i].type; - des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); - des_entries[i].dstArrayElement = 0; - des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; - des_entries[i].stride = descriptor_props[desc[i].type].struct_size; - } + set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + if (has_sampler) + set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - pl->desc_template_info = av_realloc_array(pl->desc_template_info, - sizeof(*pl->desc_template_info), - pl->total_descriptor_sets + pl->qf->nb_queues); - if (!pl->desc_template_info) - return AVERROR(ENOMEM); + ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, &set->layout); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - dt = &pl->desc_template_info[pl->total_descriptor_sets]; - memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues); + vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size); - for (int i = 0; i < pl->qf->nb_queues; i++) { - dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; - dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; - dt[i].descriptorSetLayout = layout[i]; - dt[i].pDescriptorUpdateEntries = des_entries; - dt[i].descriptorUpdateEntryCount = num; - } - } + set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); - pl->descriptor_sets_num++; + for (int i = 0; i < nb; i++) + vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout, + i, &set->binding_offset[i]); - pl->desc_layout_num += pl->qf->nb_queues; - pl->total_descriptor_sets += pl->qf->nb_queues; + set->read_only = read_only; + set->nb_bindings = nb; + pl->nb_descriptor_sets++; print: /* Write shader info */ - for (int i = 0; i < num; i++) { + for (int i = 0; i < nb; i++) { const struct descriptor_props *prop = &descriptor_props[desc[i].type]; - GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i); if (desc[i].mem_layout) GLSLA(", %s", desc[i].mem_layout); @@ -1347,171 +1402,260 @@ print: return 0; } -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id) +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl) { - FFVulkanFunctions *vk = &s->vkfn; + int err; - /* If a set has never been updated, update all queues' sets. */ - if (!pl->desc_set_initialized[set_id]) { - for (int i = 0; i < pl->qf->nb_queues; i++) { - int idx = set_id*pl->qf->nb_queues + i; - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[idx], - pl->desc_template[idx], - s); - } - pl->desc_set_initialized[set_id] = 1; - return; - } + pl->desc_bind = av_mallocz(pl->nb_descriptor_sets*sizeof(*pl->desc_bind)); + if (!pl->desc_bind) + return AVERROR(ENOMEM); + + pl->bound_buffer_indices = av_mallocz(pl->nb_descriptor_sets* + sizeof(*pl->bound_buffer_indices)); + if (!pl->bound_buffer_indices) + return AVERROR(ENOMEM); -// set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + int nb = set->read_only ? 1 : pool->pool_size; + + err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb, + NULL, NULL, set->usage, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + + err = ff_vk_map_buffers(s, &set->buf, &set->desc_mem, 1, 0); + if (err < 0) + return err; + + pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, + .usage = set->usage, + .address = set->buf.address, + }; + + pl->bound_buffer_indices[i] = i; + } - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[set_id], - pl->desc_template[set_id], - s); + return 0; } -void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanPipeline *pl, - VkShaderStageFlagBits stage, - int offset, size_t size, void *src) +static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanDescriptorSet *set, + int bind_idx, int array_idx, + VkDescriptorGetInfoEXT *desc_get_info, + size_t desc_size) { FFVulkanFunctions *vk = &s->vkfn; - vk->CmdPushConstants(e->buf, pl->pipeline_layout, - stage, offset, size, src); + const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx; + void *desc = set->desc_mem + /* Base */ + exec_offset + /* Execution context */ + set->binding_offset[bind_idx] + /* Descriptor binding */ + array_idx*desc_size; /* Array position */ + + vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); } -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler) { - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; - pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging)); - if (!pl->desc_staging) - return AVERROR(ENOMEM); + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + desc_get_info.data.pSampler = sampler; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - { /* Init descriptor set pool */ - VkDescriptorPoolCreateInfo pool_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .poolSizeCount = pl->pool_size_desc_num, - .pPoolSizes = pl->pool_size_desc, - .maxSets = pl->total_descriptor_sets, - }; + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, + s->desc_buf_props.samplerDescriptorSize); - ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, - s->hwctx->alloc, &pl->desc_pool); - av_freep(&pl->pool_size_desc); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "pool: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + return 0; +} - { /* Allocate descriptor sets */ - VkDescriptorSetAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = pl->desc_pool, - .descriptorSetCount = pl->total_descriptor_sets, - .pSetLayouts = pl->desc_layout, - }; +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorImageInfo desc_img_info = { + .imageView = view, + .sampler = sampler, + .imageLayout = layout, + }; + size_t desc_size; - pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set)); - if (!pl->desc_set) - return AVERROR(ENOMEM); + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + desc_get_info.data.pSampledImage = &desc_img_info; + desc_size = s->desc_buf_props.sampledImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + desc_get_info.data.pStorageImage = &desc_img_info; + desc_size = s->desc_buf_props.storageImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + desc_get_info.data.pInputAttachmentImage = &desc_img_info; + desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + desc_get_info.data.pCombinedImageSampler = &desc_img_info; + desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, - pl->desc_set); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - { /* Finally create the pipeline layout */ - VkPipelineLayoutCreateInfo spawn_pipeline_layout = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging, - .pushConstantRangeCount = pl->push_consts_num, - .pPushConstantRanges = pl->push_consts, - }; + return 0; +} - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) - pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i]; +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorAddressInfoEXT desc_buf_info = { + .address = addr, + .range = len, + .format = fmt, + }; + size_t desc_size; - ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, - s->hwctx->alloc, &pl->pipeline_layout); - av_freep(&pl->push_consts); - pl->push_consts_num = 0; - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + desc_get_info.data.pUniformBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + desc_get_info.data.pStorageBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - { /* Descriptor template (for tightly packed descriptors) */ - VkDescriptorUpdateTemplateCreateInfo *dt; + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template)); - if (!pl->desc_template) - return AVERROR(ENOMEM); + return 0; +} - /* Create update templates for the descriptor sets */ - for (int i = 0; i < pl->total_descriptor_sets; i++) { - dt = &pl->desc_template_info[i]; - dt->pipelineLayout = pl->pipeline_layout; - ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, - dt, s->hwctx->alloc, - &pl->desc_template[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor " - "template: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler) +{ + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); - /* Free the duplicated memory used for the template entries */ - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } + for (int i = 0; i < nb_planes; i++) + ff_vk_set_descriptor_image(s, pl, e, set, binding, i, + views[i], layout, sampler); +} + +void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->CmdPushConstants(e->buf, pl->pipeline_layout, + stage, offset, size, src); +} + +static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkPipelineLayoutCreateInfo pipeline_layout_info; + + VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets* + sizeof(desc_layouts)); + if (!desc_layouts) + return AVERROR(ENOMEM); + + for (int i = 0; i < pl->nb_descriptor_sets; i++) + desc_layouts[i] = pl->desc_set[i].layout; + + /* Finally create the pipeline layout */ + pipeline_layout_info = (VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pSetLayouts = desc_layouts, + .setLayoutCount = pl->nb_descriptor_sets, + .pushConstantRangeCount = pl->push_consts_num, + .pPushConstantRanges = pl->push_consts, + }; - av_freep(&pl->desc_template_info); + ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info, + s->hwctx->alloc, &pl->pipeline_layout); + av_free(desc_layouts); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; } return 0; } int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkQueueFamilyCtx *qf) + FFVkSPIRVShader *shd) { - int i; + int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkComputePipelineCreateInfo pipe = { + VkComputePipelineCreateInfo pipeline_create_info; + + err = init_pipeline_layout(s, pl); + if (err < 0) + return err; + + pipeline_create_info = (VkComputePipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, .layout = pl->pipeline_layout, + .stage = shd->shader, }; - pl->qf = qf; - - for (i = 0; i < pl->shaders_num; i++) { - if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { - pipe.stage = pl->shaders[i]->shader; - break; - } - } - if (i == pl->shaders_num) { - av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); - return AVERROR(EINVAL); - } - - ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, + ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, + &pipeline_create_info, s->hwctx->alloc, &pl->pipeline); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", @@ -1520,77 +1664,57 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, } pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + pl->wg_size[0] = shd->local_size[0]; + pl->wg_size[1] = shd->local_size[1]; + pl->wg_size[2] = shd->local_size[2]; return 0; } -void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e, +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; + VkDeviceSize offsets[1024]; - vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); - -// for (int i = 0; i < pl->descriptor_sets_num; i++) - // pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; + for (int i = 0; i < pl->nb_descriptor_sets; i++) + offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx; - vk->CmdBindDescriptorSets(e->buf, pl->bind_point, - pl->pipeline_layout, 0, - pl->descriptor_sets_num, - (VkDescriptorSet *)pl->desc_staging, - 0, NULL); + /* Bind pipeline */ + vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); + /* Bind descriptor buffers */ + vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind); + /* Binding offsets */ + vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout, + 0, pl->nb_descriptor_sets, + pl->bound_buffer_indices, offsets); } void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; - for (int i = 0; i < pl->shaders_num; i++) { - FFVkSPIRVShader *shd = pl->shaders[i]; - av_bprint_finalize(&shd->src, NULL); - vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, - s->hwctx->alloc); - av_free(shd); - } - - vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); - vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, - s->hwctx->alloc); + if (pl->pipeline) + vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); + if (pl->pipeline_layout) + vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, + s->hwctx->alloc); - for (int i = 0; i < pl->desc_layout_num; i++) { - if (pl->desc_template && pl->desc_template[i]) - vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], - s->hwctx->alloc); - if (pl->desc_layout && pl->desc_layout[i]) - vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + ff_vk_unmap_buffers(s, &set->buf, 1, 0); + ff_vk_free_buf(s, &set->buf); + if (set->layout) + vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout, s->hwctx->alloc); + av_free(set->binding); + av_free(set->binding_offset); } - /* Also frees the descriptor sets */ - if (pl->desc_pool) - vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, - s->hwctx->alloc); - - av_freep(&pl->desc_staging); av_freep(&pl->desc_set); - av_freep(&pl->shaders); - av_freep(&pl->desc_layout); - av_freep(&pl->desc_template); - av_freep(&pl->desc_set_initialized); + av_freep(&pl->desc_bind); av_freep(&pl->push_consts); pl->push_consts_num = 0; - - /* Only freed in case of failure */ - av_freep(&pl->pool_size_desc); - if (pl->desc_template_info) { - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } - av_freep(&pl->desc_template_info); - } - - av_free(pl); } void ff_vk_uninit(FFVulkanContext *s) @@ -1599,9 +1723,6 @@ void ff_vk_uninit(FFVulkanContext *s) av_freep(&s->qf_props); av_freep(&s->video_props); - if (s->spirv_compiler) - s->spirv_compiler->uninit(&s->spirv_compiler); - av_buffer_unref(&s->device_ref); av_buffer_unref(&s->frames_ref); } diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index e66ca59ef7..1321fb8ba8 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -30,11 +30,6 @@ #include "hwcontext_vulkan.h" #include "vulkan_loader.h" -#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \ - VK_IMAGE_USAGE_STORAGE_BIT | \ - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \ - VK_IMAGE_USAGE_TRANSFER_DST_BIT) - /* GLSL management macros */ #define INDENT(N) INDENT_##N #define INDENT_0 @@ -59,6 +54,8 @@ goto fail; \ } while (0) +#define DUP_SAMPLER(x) { x, x, x, x } + typedef struct FFVkSPIRVShader { const char *name; /* Name for id/debugging purposes */ AVBPrint src; @@ -66,19 +63,6 @@ typedef struct FFVkSPIRVShader { VkPipelineShaderStageCreateInfo shader; } FFVkSPIRVShader; -typedef struct FFVkSPIRVCompiler { - void *priv; - int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, - struct FFVkSPIRVShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, void **opaque); - void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); - void (*uninit)(struct FFVkSPIRVCompiler **ctx); -} FFVkSPIRVCompiler; - -typedef struct FFVkSampler { - VkSampler sampler[4]; -} FFVkSampler; - typedef struct FFVulkanDescriptorSetBinding { const char *name; VkDescriptorType type; @@ -88,8 +72,7 @@ typedef struct FFVulkanDescriptorSetBinding { uint32_t dimensions; /* Needed for e.g. sampler%iD */ uint32_t elems; /* 0 - scalar, 1 or more - vector */ VkShaderStageFlags stages; - FFVkSampler *sampler; /* Sampler to use for all elems */ - void *updater; /* Pointer to VkDescriptor*Info */ + VkSampler samplers[4]; /* Sampler to use for all elems */ } FFVulkanDescriptorSetBinding; typedef struct FFVkBuffer { @@ -97,6 +80,7 @@ typedef struct FFVkBuffer { VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; size_t size; + VkDeviceAddress address; } FFVkBuffer; typedef struct FFVkQueueFamilyCtx { @@ -104,42 +88,45 @@ typedef struct FFVkQueueFamilyCtx { int nb_queues; } FFVkQueueFamilyCtx; -typedef struct FFVulkanPipeline { - FFVkQueueFamilyCtx *qf; +typedef struct FFVulkanDescriptorSet { + VkDescriptorSetLayout layout; + FFVkBuffer buf; + uint8_t *desc_mem; + VkDeviceSize layout_size; + VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ + VkDeviceSize total_size; /* Once registered to an exec context */ + VkBufferUsageFlags usage; + VkDescriptorSetLayoutBinding *binding; + VkDeviceSize *binding_offset; + int nb_bindings; + + int read_only; +} FFVulkanDescriptorSet; + +typedef struct FFVulkanPipeline { VkPipelineBindPoint bind_point; /* Contexts */ VkPipelineLayout pipeline_layout; VkPipeline pipeline; - /* Shaders */ - FFVkSPIRVShader **shaders; - int shaders_num; - /* Push consts */ VkPushConstantRange *push_consts; int push_consts_num; + /* Workgroup */ + int wg_size[3]; + /* Descriptors */ - VkDescriptorSetLayout *desc_layout; - VkDescriptorPool desc_pool; - VkDescriptorSet *desc_set; - void **desc_staging; - VkDescriptorSetLayoutBinding **desc_binding; - VkDescriptorUpdateTemplate *desc_template; - int *desc_set_initialized; - int desc_layout_num; - int descriptor_sets_num; - int total_descriptor_sets; - int pool_size_desc_num; - - /* Temporary, used to store data in between initialization stages */ - VkDescriptorUpdateTemplateCreateInfo *desc_template_info; - VkDescriptorPoolSize *pool_size_desc; + FFVulkanDescriptorSet *desc_set; + VkDescriptorBufferBindingInfoEXT *desc_bind; + uint32_t *bound_buffer_indices; + int nb_descriptor_sets; } FFVulkanPipeline; typedef struct FFVkExecContext { + int idx; const struct FFVkExecPool *parent; /* Queue for the execution context */ @@ -162,7 +149,7 @@ typedef struct FFVkExecContext { unsigned int buf_deps_alloc_size; /* Frame dependencies */ - AVBufferRef **frame_deps; + AVFrame **frame_deps; unsigned int frame_deps_alloc_size; int nb_frame_deps; @@ -185,6 +172,7 @@ typedef struct FFVkExecContext { uint64_t **sem_sig_val_dst; unsigned int sem_sig_val_dst_alloc; + int sem_sig_val_dst_cnt; uint8_t *frame_locked; unsigned int frame_locked_alloc_size; @@ -229,6 +217,8 @@ typedef struct FFVulkanContext { VkPhysicalDeviceProperties2 props; VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; + VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; VkQueueFamilyVideoPropertiesKHR *video_props; VkQueueFamilyProperties2 *qf_props; @@ -244,8 +234,6 @@ typedef struct FFVulkanContext { uint32_t qfs[5]; int nb_qfs; - FFVkSPIRVCompiler *spirv_compiler; - /* Properties */ int output_width; int output_height; @@ -286,15 +274,15 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); int ff_vk_load_props(FFVulkanContext *s); /** - * Loads queue families into the main context. * Chooses a QF and loads it into a context. */ -void ff_vk_qf_fill(FFVulkanContext *s); int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family); /** * Allocates/frees an execution pool. + * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add() + * has been called. */ int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, FFVkExecPool *pool, int nb_contexts, @@ -340,17 +328,28 @@ void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e); int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref); int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef *vkfb, VkPipelineStageFlagBits in_wait_dst_flag); -void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb, - VkImageMemoryBarrier2 *bar); + AVFrame *f, VkPipelineStageFlagBits in_wait_dst_flag); +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar); +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f); void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); /** * Create an imageview and add it as a dependency to an execution. */ -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map); +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f); + +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf); /** * Memory/buffer/image allocation helpers. @@ -372,33 +371,22 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); /** - * Sampler management. + * Create a sampler. */ -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx, - int unnorm_coords, VkFilter filt); -void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx); +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt); /** * Shader management. */ int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, VkShaderStageFlags stage); -void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]); +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z); void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio); -int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint); +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint); void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd); -/** - * Register a descriptor set. - * Update a descriptor set for execution. - */ -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader); -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id); - /** * Add/update push constants for execution. */ @@ -410,15 +398,45 @@ void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, int offset, size_t size, void *src); /** - * Pipeline management. + * Add descriptor to a pipeline. Must be called before pipeline init. */ +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only); + +/* Initialize/free a pipeline. */ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkQueueFamilyCtx *qf); -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl); -void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanPipeline *pl); + FFVkSPIRVShader *shd); void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl); +/** + * Register a pipeline with an exec pool. + * Pool may be NULL if all descriptor sets are read-only. + */ +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl); + +/* Bind pipeline */ +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl); + +/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */ +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler); +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt); + +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler); + /** * Frees main context. */ -- 2.39.2 [-- Attachment #52: 0051-hwcontext_vulkan-rewrite-to-support-multiplane-surfa.patch --] [-- Type: text/x-diff, Size: 68673 bytes --] From f36680714e0636288dacf687e766a9222fe04867 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 11 Jan 2023 09:37:35 +0100 Subject: [PATCH 51/72] hwcontext_vulkan: rewrite to support multiplane surfaces --- libavutil/hwcontext_vulkan.c | 744 +++++++++++++++++------------------ libavutil/hwcontext_vulkan.h | 69 ++-- 2 files changed, 411 insertions(+), 402 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index e7c14fad74..027ecc76b1 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -62,6 +64,8 @@ typedef struct VulkanQueueCtx { VkFence fence; VkQueue queue; int was_synchronous; + int qf; + int qidx; /* Buffer dependencies */ AVBufferRef **buf_deps; @@ -116,6 +120,11 @@ typedef struct VulkanDevicePriv { } VulkanDevicePriv; typedef struct VulkanFramesPriv { + const VkFormat *fmts; + int nb_images; + VkImageAspectFlags aspect; + const struct FFVkFormatEntry *fmt; + /* Image conversions */ VulkanExecCtx conv_ctx; @@ -145,112 +154,201 @@ typedef struct AVVkFrameInternal { #endif } AVVkFrameInternal; -#define ADD_VAL_TO_LIST(list, count, val) \ - do { \ - list = av_realloc_array(list, sizeof(*list), ++count); \ - if (!list) { \ - err = AVERROR(ENOMEM); \ - goto fail; \ - } \ - list[count - 1] = av_strdup(val); \ - if (!list[count - 1]) { \ - err = AVERROR(ENOMEM); \ - goto fail; \ - } \ - } while(0) - -#define RELEASE_PROPS(props, count) \ - if (props) { \ - for (int i = 0; i < count; i++) \ - av_free((void *)((props)[i])); \ - av_free((void *)props); \ - } +#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) +#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) -static const struct { +static const struct FFVkFormatEntry { + VkFormat vkf; enum AVPixelFormat pixfmt; - const VkFormat vkfmts[5]; -} vk_pixfmt_planar_map[] = { - { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } }, - - { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { AV_PIX_FMT_P012, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - - { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - - { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - - { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - /* There is no AV_PIX_FMT_YUVA420P12 */ - { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - { AV_PIX_FMT_VUYX, { VK_FORMAT_R8G8B8A8_UNORM } }, - { AV_PIX_FMT_XV36, { VK_FORMAT_R16G16B16A16_UNORM } }, - - { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } }, - { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } }, - { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } }, - { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } }, - { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } }, - { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, - { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } }, - { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, - { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, - { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } }, - { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } }, - - /* Lower priority as there's an endianess-dependent overlap between these - * and rgba/bgr0, and PACK32 formats are more limited */ - { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, - { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } }, - - { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, - - { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, - { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + VkImageAspectFlags aspect; + int vk_planes; + int nb_images; + int nb_images_fallback; + const VkFormat fallback[5]; +} vk_formats_list[] = { + /* Gray formats */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } }, + + /* RGB formats */ + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } }, + { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } }, + { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } }, + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, + { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, + + /* Planar RGB */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + + /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Two-plane 422 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Two-plane 444 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Single plane 422 at 8, 10 and 12 bits */ + { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, }; +static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list); const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) { - for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++) - if (vk_pixfmt_planar_map[i].pixfmt == p) - return vk_pixfmt_planar_map[i].vkfmts; + for (int i = 0; i < nb_vk_formats_list; i++) + if (vk_formats_list[i].pixfmt == p) + return vk_formats_list[i].fallback; + return NULL; +} + +static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p) +{ + for (int i = 0; i < nb_vk_formats_list; i++) + if (vk_formats_list[i].pixfmt == p) + return &vk_formats_list[i]; return NULL; } +/* Malitia pura, Khronos */ +#define FN_MAP_TO(dst_t, dst, src_t, src) \ + static dst_t map_ ##src## _to_ ##dst(src_t mask2) \ + { \ + dst_t mask1 = 0x0; \ + MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \ + MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \ + MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \ + MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \ + return mask1; \ + } + +#define MAP_TO(mask1, mask2, flag1, flag2) if (mask2 & flag2) mask1 |= flag1; +FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage) +#undef MAP_TO +#define MAP_TO(mask1, mask2, flag1, flag2) if (mask1 & flag1) mask2 |= flag2; +FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats) +#undef MAP_TO +#undef FN_MAP_TO + +static int av_vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, + VkImageUsageFlags additional_usage, const VkFormat **fmts, + int *nb_images, VkImageAspectFlags *aspect, + VkImageUsageFlags *supported_usage) +{ + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + VulkanDevicePriv *priv = dev_ctx->internal->priv; + FFVulkanFunctions *vk = &priv->vkfn; + + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; + const VkFormatFeatureFlagBits2 full_flags = VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT | + basic_flags; + + const VkFormatFeatureFlagBits2 additional_flags = map_usage_to_feats(additional_usage); + + for (int i = 0; i < nb_vk_formats_list; i++) { + if (vk_formats_list[i].pixfmt == p) { + VkFormatFeatureFlagBits *feat = &prop.formatProperties.optimalTilingFeatures; + VkFormatFeatureFlagBits2 feats_vk1, feats_vk2; + int basics; + int full; + int additional; + + basics = 0; + full = 0; + additional = 0; + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, vk_formats_list[i].vkf, + &prop); + + /* We want at least the basics supported */ + feats_vk1 = *feat; + basics = !!(*feat & basic_flags); + additional = !!(*feat & additional_flags); + + /* If basics are not supported, OR we have multiplane images, + * check the fallback/single-plane rep for support. */ + if (!basics || vk_formats_list[i].vk_planes > 1) + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, + vk_formats_list[i].fallback[0], + &prop); + + feats_vk2 = *feat; + full = !!(*feat & full_flags); + + if (additional_flags && !additional) { + return AVERROR(ENOTSUP); + } else if (full && basics) { + if (fmts) + *fmts = &vk_formats_list[i].vkf; + if (nb_images) + *nb_images = 1; + if (aspect) + *aspect = vk_formats_list[i].aspect; + if (supported_usage) + *supported_usage = map_feats_to_usage(feats_vk1); + return 0; + } else if (full && (vk_formats_list[i].vk_planes > 1)) { + if (fmts) + *fmts = vk_formats_list[i].fallback; + if (nb_images) + *nb_images = vk_formats_list[i].nb_images_fallback; + if (aspect) + *aspect = vk_formats_list[i].aspect; + if (supported_usage) + *supported_usage = map_feats_to_usage(feats_vk2); + return 0; + } else { + return AVERROR(ENOTSUP); + } + } + } + + return AVERROR(EINVAL); +} + static const void *vk_find_struct(const void *chain, VkStructureType stype) { const VkBaseInStructure *in = chain; @@ -276,33 +374,6 @@ static void vk_link_struct(void *chain, void *in) out->pNext = in; } -static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, - int linear) -{ - AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; - VulkanDevicePriv *priv = dev_ctx->internal->priv; - FFVulkanFunctions *vk = &priv->vkfn; - const VkFormat *fmt = av_vkfmt_from_pixfmt(p); - int planes = av_pix_fmt_count_planes(p); - - if (!fmt) - return 0; - - for (int i = 0; i < planes; i++) { - VkFormatFeatureFlags flags; - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - }; - vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop); - flags = linear ? prop.formatProperties.linearTilingFeatures : - prop.formatProperties.optimalTilingFeatures; - if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS)) - return 0; - } - - return 1; -} - static int load_libvulkan(AVHWDeviceContext *ctx) { AVVulkanDeviceContext *hwctx = ctx->hwctx; @@ -435,6 +506,27 @@ static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, return 0; } +#define ADD_VAL_TO_LIST(list, count, val) \ + do { \ + list = av_realloc_array(list, sizeof(*list), ++count); \ + if (!list) { \ + err = AVERROR(ENOMEM); \ + goto fail; \ + } \ + list[count - 1] = av_strdup(val); \ + if (!list[count - 1]) { \ + err = AVERROR(ENOMEM); \ + goto fail; \ + } \ + } while(0) + +#define RELEASE_PROPS(props, count) \ + if (props) { \ + for (int i = 0; i < count; i++) \ + av_free((void *)((props)[i])); \ + av_free((void *)props); \ + } + static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, const char * const **dst, uint32_t *num, int debug) { @@ -683,6 +775,10 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts) AVVulkanDeviceContext *hwctx = ctx->hwctx; VkApplicationInfo application_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "ffmpeg", + .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, + LIBAVUTIL_VERSION_MINOR, + LIBAVUTIL_VERSION_MICRO), .pEngineName = "libavutil", .apiVersion = VK_API_VERSION_1_3, .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, @@ -1121,6 +1217,8 @@ static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, VulkanQueueCtx *q = &cmd->queues[i]; vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue); q->was_synchronous = 1; + q->qf = queue_family_index; + q->qidx = i; } return 0; @@ -1256,6 +1354,7 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, VkResult ret; VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx]; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; FFVulkanFunctions *vk = &p->vkfn; ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]); @@ -1269,7 +1368,9 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx]; s_info->commandBufferCount = 1; + hwctx->lock_queue(hwfc->device_ctx, q->qf, q->qidx); ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence); + hwctx->unlock_queue(hwfc->device_ctx, q->qf, q->qidx); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n", vk_ret2str(ret)); @@ -1284,7 +1385,6 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd, q->was_synchronous = synchronous; if (synchronous) { - AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); vk->ResetFences(hwctx->act_dev, 1, &q->fence); unref_exec_ctx_deps(hwfc, cmd); @@ -1446,12 +1546,6 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx, if (opt_d) p->use_linear_images = strtol(opt_d->value, NULL, 10); - opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0); - if (opt_d) - p->contiguous_planes = strtol(opt_d->value, NULL, 10); - else - p->contiguous_planes = -1; - hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames; hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount; @@ -1690,8 +1784,10 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, int count = 0; VulkanDevicePriv *p = ctx->internal->priv; - for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) - count += pixfmt_is_supported(ctx, i, p->use_linear_images); + for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { + count += av_vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, + 0, NULL, NULL, NULL, NULL) >= 0; + } #if CONFIG_CUDA if (p->dev_is_nvidia) @@ -1704,9 +1800,12 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, return AVERROR(ENOMEM); count = 0; - for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++) - if (pixfmt_is_supported(ctx, i, p->use_linear_images)) - constraints->valid_sw_formats[count++] = i; + for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { + if (av_vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, + 0, NULL, NULL, NULL, NULL) >= 0) { + constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt; + } + } #if CONFIG_CUDA if (p->dev_is_nvidia) @@ -1714,8 +1813,8 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, #endif constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE; - constraints->min_width = 0; - constraints->min_height = 0; + constraints->min_width = 1; + constraints->min_height = 1; constraints->max_width = p->props.properties.limits.maxImageDimension2D; constraints->max_height = p->props.properties.limits.maxImageDimension2D; @@ -1789,7 +1888,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, static void vulkan_free_internal(AVVkFrame *f) { - AVVkFrameInternal *internal = f->internal; + av_unused AVVkFrameInternal *internal = f->internal; #if CONFIG_CUDA if (internal->cuda_fc_ref) { @@ -1829,17 +1928,22 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; - int planes = av_pix_fmt_count_planes(hwfc->sw_format); + int nb_images = ff_vk_count_images(f); + + VkSemaphoreWaitInfo sem_wait = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = f->sem, + .pValues = f->sem_value, + .semaphoreCount = nb_images, + }; - /* We could use vkWaitSemaphores, but the validation layer seems to have - * issues tracking command buffer execution state on uninit. */ - vk->DeviceWaitIdle(hwctx->act_dev); + vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); vulkan_free_internal(f); - for (int i = 0; i < planes; i++) { - vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + for (int i = 0; i < nb_images; i++) { + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } @@ -1849,30 +1953,25 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, void *alloc_pnext, size_t alloc_pnext_stride) { - int err; + int img_cnt = 0, err; VkResult ret; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; AVVulkanFramesContext *hwfctx = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; - VkMemoryRequirements cont_memory_requirements = { 0 }; - int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 }; - int cont_mem_size = 0; - AVVulkanDeviceContext *hwctx = ctx->hwctx; - for (int i = 0; i < planes; i++) { + while (f->img[img_cnt]) { int use_ded_mem; VkImageMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, - .image = f->img[i], + .image = f->img[img_cnt], }; VkMemoryDedicatedAllocateInfo ded_alloc = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride), + .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride), }; VkMemoryDedicatedRequirements ded_req = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, @@ -1884,79 +1983,35 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req); - if (f->tiling == VK_IMAGE_TILING_LINEAR) + if (hwfctx->tiling == VK_IMAGE_TILING_LINEAR) req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size, p->props.properties.limits.minMemoryMapAlignment); - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - if (ded_req.requiresDedicatedAllocation) { - av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, " - "device requires dedicated image allocation!\n"); - return AVERROR(EINVAL); - } else if (!i) { - cont_memory_requirements = req.memoryRequirements; - } else if (cont_memory_requirements.memoryTypeBits != - req.memoryRequirements.memoryTypeBits) { - av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 " - "and %i, cannot allocate in a single region!\n", - i); - return AVERROR(EINVAL); - } - - cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size, - req.memoryRequirements.alignment); - cont_mem_size += cont_mem_size_list[i]; - continue; - } - /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; if (use_ded_mem) - ded_alloc.image = f->img[i]; + ded_alloc.image = f->img[img_cnt]; /* Allocate memory */ if ((err = alloc_mem(ctx, &req.memoryRequirements, - f->tiling == VK_IMAGE_TILING_LINEAR ? + hwfctx->tiling == VK_IMAGE_TILING_LINEAR ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &f->flags, &f->mem[i]))) - return err; - - f->size[i] = req.memoryRequirements.size; - bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; - bind_info[i].image = f->img[i]; - bind_info[i].memory = f->mem[i]; - } - - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - cont_memory_requirements.size = cont_mem_size; - - /* Allocate memory */ - if ((err = alloc_mem(ctx, &cont_memory_requirements, - f->tiling == VK_IMAGE_TILING_LINEAR ? - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - (void *)(((uint8_t *)alloc_pnext)), - &f->flags, &f->mem[0]))) + &f->flags, &f->mem[img_cnt]))) return err; - f->size[0] = cont_memory_requirements.size; - - for (int i = 0, offset = 0; i < planes; i++) { - bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; - bind_info[i].image = f->img[i]; - bind_info[i].memory = f->mem[0]; - bind_info[i].memoryOffset = offset; + f->size[img_cnt] = req.memoryRequirements.size; + bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[img_cnt].image = f->img[img_cnt]; + bind_info[img_cnt].memory = f->mem[img_cnt]; - f->offset[i] = bind_info[i].memoryOffset; - offset += cont_mem_size_list[i]; - } + img_cnt++; } /* Bind the allocated memory to the images */ - ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info); + ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info); if (ret != VK_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", vk_ret2str(ret)); @@ -1982,11 +2037,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, VkImageLayout new_layout; VkAccessFlags2 new_access; AVVulkanFramesContext *vkfc = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; - AVFrame tmp = { .data[0] = (uint8_t *)frame }; uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; + int nb_images = ff_vk_count_images(frame); VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 }; VkDependencyInfo dep_info; @@ -1994,14 +2048,14 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pSignalSemaphoreValues = sem_sig_val, - .signalSemaphoreValueCount = planes, + .signalSemaphoreValueCount = nb_images, }; VkSubmitInfo s_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &s_timeline_sem_info, .pSignalSemaphores = frame->sem, - .signalSemaphoreCount = planes, + .signalSemaphoreCount = nb_images, }; VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; @@ -2011,7 +2065,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, vkfc->lock_frame(hwfc, frame); - for (int i = 0; i < planes; i++) { + for (int i = 0; i < nb_images; i++) { wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; sem_sig_val[i] = frame->sem_value[i] + 1; } @@ -2029,10 +2083,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; dst_qf = VK_QUEUE_FAMILY_IGNORED; s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; - s_timeline_sem_info.waitSemaphoreValueCount = planes; + s_timeline_sem_info.waitSemaphoreValueCount = nb_images; s_info.pWaitSemaphores = frame->sem; s_info.pWaitDstStageMask = wait_st; - s_info.waitSemaphoreCount = planes; + s_info.waitSemaphoreCount = nb_images; break; case PREP_MODE_EXTERNAL_EXPORT: new_layout = VK_IMAGE_LAYOUT_GENERAL; @@ -2040,10 +2094,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, src_qf = VK_QUEUE_FAMILY_IGNORED; dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value; - s_timeline_sem_info.waitSemaphoreValueCount = planes; + s_timeline_sem_info.waitSemaphoreValueCount = nb_images; s_info.pWaitSemaphores = frame->sem; s_info.pWaitDstStageMask = wait_st; - s_info.waitSemaphoreCount = planes; + s_info.waitSemaphoreCount = nb_images; break; case PREP_MODE_DECODING_DST: new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; @@ -2062,7 +2116,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable * for synchronization */ - for (int i = 0; i < planes; i++) { + for (int i = 0; i < nb_images; i++) { img_bar[i] = (VkImageMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, .pNext = NULL, @@ -2077,8 +2131,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, .image = frame->img[i], .subresourceRange = (VkImageSubresourceRange) { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = VK_REMAINING_ARRAY_LAYERS, .levelCount = 1, - .layerCount = 1, }, }; @@ -2090,7 +2144,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = planes, + .imageMemoryBarrierCount = nb_images, }; vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info); @@ -2101,7 +2155,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, return err; } -static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, +static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format, int frame_w, int frame_h, int plane) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); @@ -2120,17 +2174,17 @@ static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, VkImageTiling tiling, VkImageUsageFlagBits usage, + VkImageCreateFlags flags, int nb_layers, void *create_pnext) { int err; VkResult ret; AVHWDeviceContext *ctx = hwfc->device_ctx; VulkanDevicePriv *p = ctx->internal->priv; + VulkanFramesPriv *fp = hwfc->internal->priv; FFVulkanFunctions *vk = &p->vkfn; AVVulkanDeviceContext *hwctx = ctx->hwctx; - enum AVPixelFormat format = hwfc->sw_format; - const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format); - const int planes = av_pix_fmt_count_planes(format); + AVVulkanFramesContext *frames = hwfc->hwctx; VkExportSemaphoreCreateInfo ext_sem_info = { .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, @@ -2165,17 +2219,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, return AVERROR(ENOMEM); } + // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled) + /* Create the images */ - for (int i = 0; i < planes; i++) { + for (int i = 0; i < fp->nb_images; i++) { VkImageCreateInfo create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = create_pnext, .imageType = VK_IMAGE_TYPE_2D, - .format = img_fmts[i], + .format = fp->fmts[i], .extent.depth = 1, .mipLevels = 1, - .arrayLayers = 1, - .flags = VK_IMAGE_CREATE_ALIAS_BIT, + .arrayLayers = nb_layers, + .flags = flags, .tiling = tiling, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .usage = usage, @@ -2187,7 +2243,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, }; get_plane_wh(&create_info.extent.width, &create_info.extent.height, - format, hwfc->width, hwfc->height, i); + hwfc->sw_format, hwfc->width, hwfc->height, i); ret = vk->CreateImage(hwctx->act_dev, &create_info, hwctx->alloc, &f->img[i]); @@ -2214,7 +2270,9 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, } f->flags = 0x0; +FF_DISABLE_DEPRECATION_WARNINGS f->tiling = tiling; +FF_ENABLE_DEPRECATION_WARNINGS *frame = f; return 0; @@ -2296,41 +2354,23 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) AVVulkanFramesContext *hwctx = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; VulkanFramesPriv *fp = hwfc->internal->priv; - VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; VkExternalMemoryHandleTypeFlags e = 0x0; - VkExternalMemoryImageCreateInfo eiinfo = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, - .pNext = hwctx->create_pnext, - }; - #ifdef _WIN32 if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater() ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT); #else - if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) - try_export_flags(hwfc, &eiinfo.handleTypes, &e, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); - if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) - try_export_flags(hwfc, &eiinfo.handleTypes, &e, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); #endif - for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { - eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; - eminfo[i].pNext = hwctx->alloc_pnext[i]; - eminfo[i].handleTypes = e; - } - - err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, - eiinfo.handleTypes ? &eiinfo : NULL); + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, + hwctx->nb_layers, hwctx->create_pnext); if (err) return NULL; - err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo)); + err = alloc_bind_mem(hwfc, f, NULL, 0); if (err) goto fail; @@ -2389,103 +2429,44 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) VulkanFramesPriv *fp = hwfc->internal->priv; AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; - const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; - const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS); - - /* Default tiling flags */ - hwctx->tiling = hwctx->tiling ? hwctx->tiling : - has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT : - p->use_linear_images ? VK_IMAGE_TILING_LINEAR : - VK_IMAGE_TILING_OPTIMAL; - - if (!hwctx->usage) - hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS; - - modifier_info = vk_find_struct(hwctx->create_pnext, - VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); - - /* Get the supported modifiers if the user has not given any. */ - if (has_modifiers && !modifier_info) { - const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); - VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; - FFVulkanFunctions *vk = &p->vkfn; - VkDrmFormatModifierPropertiesEXT *mod_props; - uint64_t *modifiers; - int modifier_count = 0; - - VkDrmFormatModifierPropertiesListEXT mod_props_list = { - .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, - .pNext = NULL, - .drmFormatModifierCount = 0, - .pDrmFormatModifierProperties = NULL, - }; - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - .pNext = &mod_props_list, - }; - - /* Get all supported modifiers */ - vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop); + VkImageUsageFlagBits supported_usage; - if (!mod_props_list.drmFormatModifierCount) { - av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n"); - return AVERROR(EINVAL); - } - - /* Createa structure to hold the modifier list info */ - modifier_info = av_mallocz(sizeof(*modifier_info)); - if (!modifier_info) - return AVERROR(ENOMEM); - - modifier_info->pNext = NULL; - modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT; + /* Defaults */ + if (!hwctx->nb_layers) + hwctx->nb_layers = 1; - /* Add structure to the image creation pNext chain */ - if (!hwctx->create_pnext) - hwctx->create_pnext = modifier_info; - else - vk_link_struct(hwctx->create_pnext, (void *)modifier_info); + /* VK_IMAGE_TILING_OPTIMAL == 0, so no need to check */ - /* Backup the allocated struct to be freed later */ - fp->modifier_info = modifier_info; - - /* Allocate list of modifiers */ - modifiers = av_mallocz(mod_props_list.drmFormatModifierCount * - sizeof(*modifiers)); - if (!modifiers) - return AVERROR(ENOMEM); - - modifier_info->pDrmFormatModifiers = modifiers; + if (!hwctx->usage) + hwctx->usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; - /* Allocate a temporary list to hold all modifiers supported */ - mod_props = av_mallocz(mod_props_list.drmFormatModifierCount * - sizeof(*mod_props)); - if (!mod_props) - return AVERROR(ENOMEM); + err = av_vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, 0, /* drivers must fix feats. */ + &fp->fmts, &fp->nb_images, &fp->aspect, &supported_usage); + if (err < 0) + return err; - mod_props_list.pDrmFormatModifierProperties = mod_props; + fp->fmt = vk_find_format_entry(hwfc->sw_format); - /* Finally get all modifiers from the device */ - vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop); + /* Remove comments once drivers properly signal features for formats */ + if (fp->fmt->vk_planes > 1) // || supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) + hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; - /* Reject any modifiers that don't match our requirements */ - for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) { - if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage)) - continue; +// fp->fmt = vk_find_format_entry(hwfc->sw_format); - modifiers[modifier_count++] = mod_props[i].drmFormatModifier; - } + if (!hwctx->img_flags) { + hwctx->img_flags = VK_IMAGE_CREATE_ALIAS_BIT; + if ((fp->fmt->vk_planes > 1 && fp->nb_images == 1) || + (fp->fmt->vkf != fp->fmt->fallback[0])) + hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + } - if (!modifier_count) { - av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports" - " the usage flags!\n"); - av_freep(&mod_props); - return AVERROR(EINVAL); - } + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; - modifier_info->drmFormatModifierCount = modifier_count; - av_freep(&mod_props); - } + if (!hwctx->unlock_frame) + hwctx->unlock_frame = unlock_frame; err = create_exec_ctx(hwfc, &fp->conv_ctx, dev_hwctx->queue_family_comp_index, @@ -2505,8 +2486,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) return err; /* Test to see if allocation will fail */ - err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, - hwctx->create_pnext); + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, + hwctx->nb_layers, hwctx->create_pnext); if (err) return err; @@ -2522,11 +2503,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) return AVERROR(ENOMEM); } - if (!hwctx->lock_frame) - hwctx->lock_frame = lock_frame; - if (!hwctx->unlock_frame) - hwctx->unlock_frame = unlock_frame; - return 0; } @@ -2602,7 +2578,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src, int flags) { VkResult ret; - int err, mapped_mem_count = 0, mem_planes = 0; + int err, nb_mem = 0, mapped_mem_count = 0, mem_planes = 0; AVVkFrame *f = (AVVkFrame *)src->data[0]; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; AVVulkanFramesContext *hwfctx = hwfc->hwctx; @@ -2622,7 +2598,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, } if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) || - !(f->tiling == VK_IMAGE_TILING_LINEAR)) { + !(hwfctx->tiling == VK_IMAGE_TILING_LINEAR)) { av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible " "and linear!\n"); err = AVERROR(EINVAL); @@ -2632,35 +2608,35 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, dst->width = src->width; dst->height = src->height; - mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes; - for (int i = 0; i < mem_planes; i++) { + for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) + nb_mem += !!f->mem[i]; + + for (int i = 0; i < nb_mem; i++) { ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0, VK_WHOLE_SIZE, 0, (void **)&dst->data[i]); if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n", - vk_ret2str(ret)); + av_log(hwfc, AV_LOG_ERROR, "Failed to map %ith frame memory: %s\n", + i, vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } mapped_mem_count++; } - if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) { - for (int i = 0; i < planes; i++) - dst->data[i] = dst->data[0] + f->offset[i]; - } + for (int i = 0; i < planes; i++) + dst->data[i] = dst->data[i] + f->offset[i]; /* Check if the memory contents matter */ if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) && !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } }; - for (int i = 0; i < planes; i++) { + for (int i = 0; i < nb_mem; i++) { map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; map_mem_ranges[i].size = VK_WHOLE_SIZE; map_mem_ranges[i].memory = f->mem[i]; } - ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes, + ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, nb_mem, map_mem_ranges); if (ret != VK_SUCCESS) { av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", @@ -2702,25 +2678,25 @@ static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwma { AVVkFrame *f = hwmap->priv; AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + const int nb_images = ff_vk_count_images(f); VkSemaphoreWaitInfo wait_info = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, .flags = 0x0, .pSemaphores = f->sem, .pValues = f->sem_value, - .semaphoreCount = planes, + .semaphoreCount = nb_images, }; vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX); vulkan_free_internal(f); - for (int i = 0; i < planes; i++) { - vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + for (int i = 0; i < nb_images; i++) { + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } @@ -2790,7 +2766,9 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f goto fail; } +FF_DISABLE_DEPRECATION_WARNINGS f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; +FF_ENABLE_DEPRECATION_WARNINGS for (int i = 0; i < desc->nb_layers; i++) { const int planes = desc->layers[i].nb_planes; @@ -2828,7 +2806,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f .mipLevels = 1, .arrayLayers = 1, .flags = 0x0, /* ALIAS flag is implicit for imported images */ - .tiling = f->tiling, + .tiling = hwfctx->tiling, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, @@ -3498,7 +3476,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1); - if (f->tiling == VK_IMAGE_TILING_OPTIMAL) + if (hwfctx ->tiling == VK_IMAGE_TILING_OPTIMAL) continue; vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); @@ -3818,7 +3796,10 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, int bar_num = 0; VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS]; - const int planes = av_pix_fmt_count_planes(pix_fmt); + const int img_planes = fp->fmt->vk_planes; + const int nb_images = ff_vk_count_images(frame); + int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; @@ -3831,8 +3812,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pWaitSemaphoreValues = frame->sem_value, .pSignalSemaphoreValues = sem_signal_values, - .waitSemaphoreValueCount = planes, - .signalSemaphoreValueCount = planes, + .waitSemaphoreValueCount = nb_images, + .signalSemaphoreValueCount = nb_images, }; VkSubmitInfo s_info = { @@ -3841,8 +3822,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, .pSignalSemaphores = frame->sem, .pWaitSemaphores = frame->sem, .pWaitDstStageMask = sem_wait_dst, - .signalSemaphoreCount = planes, - .waitSemaphoreCount = planes, + .signalSemaphoreCount = nb_images, + .waitSemaphoreCount = nb_images, }; vkfc->lock_frame(hwfc, frame); @@ -3850,11 +3831,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, if ((err = wait_start_exec_ctx(hwfc, ectx))) goto end; - for (int i = 0; i < planes; i++) + for (int i = 0; i < nb_images; i++) sem_signal_values[i] = frame->sem_value[i] + 1; /* Change the image layout to something more optimal for transfers */ - for (int i = 0; i < planes; i++) { + for (int i = 0; i < nb_images; i++) { VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT : @@ -3890,13 +3871,19 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, 0, NULL, 0, NULL, bar_num, img_bar); /* Schedule a copy for each plane */ - for (int i = 0; i < planes; i++) { + for (int i = 0; i < pixfmt_planes; i++) { + int idx = FFMIN(i, nb_images - 1); + VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; VkBufferImageCopy buf_reg = { .bufferOffset = buf_offsets[i], .bufferRowLength = buf_stride[i] / desc->comp[i].step, .imageSubresource.layerCount = 1, - .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .imageSubresource.aspectMask = plane_aspect[(img_planes != 1) + i*(img_planes != 1)], .imageOffset = { 0, 0, 0, }, }; @@ -3907,11 +3894,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; if (to_buf) - vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i], + vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], frame->layout[idx], vkbuf->buf, 1, &buf_reg); else - vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i], - frame->layout[i], 1, &buf_reg); + vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx], + frame->layout[idx], 1, &buf_reg); } /* When uploading, do this asynchronously if the source is refcounted by @@ -3928,7 +3915,7 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) goto end; } - if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) + if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, pixfmt_planes))) goto end; err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); } else { @@ -3948,6 +3935,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, AVVkFrame *f = (AVVkFrame *)vkf->data[0]; AVHWDeviceContext *dev_ctx = hwfc->device_ctx; AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + AVVulkanFramesContext *fc = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; @@ -3970,7 +3958,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, return AVERROR(EINVAL); /* For linear, host visiable images */ - if (f->tiling == VK_IMAGE_TILING_LINEAR && + if (fc->tiling == VK_IMAGE_TILING_LINEAR && f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { AVFrame *map = av_frame_alloc(); if (!map) diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index e89fa52927..13a40fa563 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -169,26 +169,31 @@ typedef enum AVVkFrameFlags { */ typedef struct AVVulkanFramesContext { /** - * Controls the tiling of allocated frames. If left as optimal tiling, - * then during av_hwframe_ctx_init() will decide based on whether the device - * supports DRM modifiers, or if the linear_images flag is set, otherwise - * will allocate optimally-tiled images. + * Controls the tiling of allocated frames. + * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling. + * Can be set to VK_IMAGE_TILING_LINEAR to force linear images, + * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed + * images. */ VkImageTiling tiling; /** - * Defines extra usage of output frames. If left as 0, the following bits - * are set: TRANSFER_SRC, TRANSFER_DST. SAMPLED and STORAGE. + * Defines extra usage of output frames. If non-zero, all flags MUST be + * supported by the VkFormat. Otherwise, will use supported flags amongst: + * - VK_IMAGE_USAGE_SAMPLED_BIT + * - VK_IMAGE_USAGE_STORAGE_BIT + * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT + * - VK_IMAGE_USAGE_TRANSFER_DST_BIT */ VkImageUsageFlagBits usage; /** * Extension data for image creation. - * If VkImageDrmFormatModifierListCreateInfoEXT is present in the chain, - * and the device supports DRM modifiers, then images will be allocated - * with the specific requested DRM modifiers. + * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure + * can be added to specify the exact modifier to use. + * * Additional structures may be added at av_hwframe_ctx_init() time, - * which will be freed automatically on uninit(), so users need only free + * which will be freed automatically on uninit(), so users must only free * any structures they've allocated themselves. */ void *create_pnext; @@ -209,6 +214,25 @@ typedef struct AVVulkanFramesContext { */ AVVkFrameFlags flags; + /** + * Flags to set during image creation. If unset, defaults to + * VK_IMAGE_CREATE_ALIAS_BIT. + */ + VkImageCreateFlags img_flags; + + /** + * Vulkan format for each image. MUST be compatible with the pixel format. + * If unset, will be automatically set. + * There are at most two compatible formats for a frame - a multiplane + * format, and a single-plane multi-image format. + */ + VkFormat format[AV_NUM_DATA_POINTERS]; + + /** + * Number of layers each image will have. + */ + int nb_layers; + /** * Locks a frame, preventing other threads from changing frame properties. * If set to NULL, will be set to lavu-internal functions that utilize a @@ -228,14 +252,7 @@ typedef struct AVVulkanFramesContext { } AVVulkanFramesContext; /* - * Frame structure, the VkFormat of the image will always match - * the pool's sw_format. - * All frames, imported or allocated, will be created with the - * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed. - * - * If all queue family indices in the device context are the same, - * images will be created with the EXCLUSIVE sharing mode. Otherwise, all images - * will be created using the CONCURRENT sharing mode. + * Frame structure. * * @note the size of this structure is not part of the ABI, to allocate * you must use @av_vk_frame_alloc(). @@ -248,8 +265,9 @@ struct AVVkFrame { /** * The same tiling must be used for all images in the frame. + * DEPRECATED: use AVVulkanFramesContext.tiling instead. */ - VkImageTiling tiling; + attribute_deprecated VkImageTiling tiling; /** * Memory backing the images. Could be less than the amount of planes, @@ -265,13 +283,13 @@ struct AVVkFrame { VkMemoryPropertyFlagBits flags; /** - * Updated after every barrier + * Updated after every barrier. One per VkImage. */ VkAccessFlagBits access[AV_NUM_DATA_POINTERS]; VkImageLayout layout[AV_NUM_DATA_POINTERS]; /** - * Synchronization timeline semaphores, one for each sw_format plane. + * Synchronization timeline semaphores, one for each VkImage. * Must not be freed manually. Must be waited on at every submission using * the value in sem_value, and must be signalled at every submission, * using an incremented value. @@ -280,6 +298,7 @@ struct AVVkFrame { /** * Up to date semaphore value at which each image becomes accessible. + * One per VkImage. * Clients must wait on this value when submitting a command queue, * and increment it when signalling. */ @@ -291,16 +310,18 @@ struct AVVkFrame { struct AVVkFrameInternal *internal; /** - * Describes the binding offset of each plane to the VkDeviceMemory. + * Describes the binding offset of each image to the VkDeviceMemory. + * One per VkImage. */ ptrdiff_t offset[AV_NUM_DATA_POINTERS]; /** * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if * the image was allocated with the CONCURRENT concurrency option. + * One per VkImage. */ uint32_t queue_family[AV_NUM_DATA_POINTERS]; -} AVVkFrame; +}; /** * Allocates a single AVVkFrame and initializes everything as 0. @@ -309,7 +330,7 @@ struct AVVkFrame { AVVkFrame *av_vk_frame_alloc(void); /** - * Returns the format of each image up to the number of planes for a given sw_format. + * Returns the optimal format for a given sw_format, one for each plane. * Returns NULL on unsupported formats. */ const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p); -- 2.39.2 [-- Attachment #53: 0052-hwcontext_vulkan-don-t-change-properties-if-prepare_.patch --] [-- Type: text/x-diff, Size: 2638 bytes --] From a9ac0aa322a3369ccb5167ae1a8a984faf2e24d1 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 04:14:08 +0100 Subject: [PATCH 52/72] hwcontext_vulkan: don't change properties if prepare_frame fails --- libavutil/hwcontext_vulkan.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 027ecc76b1..75004037da 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -2113,16 +2113,13 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, break; } - /* Change the image layout to something more optimal for writes. - * This also signals the newly created semaphore, making it usable - * for synchronization */ for (int i = 0; i < nb_images; i++) { img_bar[i] = (VkImageMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, .pNext = NULL, .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, - .srcAccessMask = 0x0, .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, + .srcAccessMask = frame->access[i], .dstAccessMask = new_access, .oldLayout = frame->layout[i], .newLayout = new_layout, @@ -2135,21 +2132,23 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, .levelCount = 1, }, }; - - frame->layout[i] = img_bar[i].newLayout; - frame->access[i] = img_bar[i].dstAccessMask; } - dep_info = (VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_images, - }; - - vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info); + vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_images, + }); err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + if (err >= 0) { + for (int i = 0; i < nb_images; i++) { + frame->layout[i] = img_bar[i].newLayout; + frame->access[i] = img_bar[i].dstAccessMask; + frame->queue_family[i] = img_bar[i].dstQueueFamilyIndex; + } + } vkfc->unlock_frame(hwfc, frame); return err; -- 2.39.2 [-- Attachment #54: 0053-hwcontext_vulkan-disable-host-mapping-frames-for-tra.patch --] [-- Type: text/x-diff, Size: 1033 bytes --] From 51c352d34c0ab2ae5eea1df1753d2a8d615c33d8 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 04:14:24 +0100 Subject: [PATCH 53/72] hwcontext_vulkan: disable host-mapping frames for transfers Currently broken for multiplane surfaces. --- libavutil/hwcontext_vulkan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 75004037da..647a072bdd 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -3946,7 +3946,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, const int planes = av_pix_fmt_count_planes(swf->format); int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; - const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); + const int map_host = 0; if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); -- 2.39.2 [-- Attachment #55: 0054-hwcontext_vulkan-disable-all-mapping-code.patch --] [-- Type: text/x-diff, Size: 5612 bytes --] From a871a7d4ffe3f94488cd5091794e683c720bc5df Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 04:30:00 +0100 Subject: [PATCH 54/72] hwcontext_vulkan: disable all mapping code Multiplane formats are currently not easy to map. --- libavutil/hwcontext_vulkan.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 647a072bdd..761a63ddd7 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -44,7 +44,7 @@ #include "vulkan.h" #include "vulkan_loader.h" -#if CONFIG_LIBDRM +#if 0 #include <xf86drm.h> #include <drm_fourcc.h> #include "hwcontext_drm.h" @@ -54,7 +54,7 @@ #endif #endif -#if CONFIG_CUDA +#if 0 #include "hwcontext_cuda_internal.h" #include "cuda_check.h" #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) @@ -139,7 +139,7 @@ typedef struct VulkanFramesPriv { typedef struct AVVkFrameInternal { pthread_mutex_t update_mutex; -#if CONFIG_CUDA +#if 0 /* Importing external memory into cuda is really expensive so we keep the * memory imported all the time */ AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */ @@ -1718,7 +1718,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, * by the following checks (e.g. non-PCIe ARM GPU), having an empty * dev_select will mean it'll get picked. */ switch(src_ctx->type) { -#if CONFIG_LIBDRM +#if 0 #if CONFIG_VAAPI case AV_HWDEVICE_TYPE_VAAPI: { AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; @@ -1753,7 +1753,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx, return vulkan_device_create_internal(ctx, &dev_select, opts, flags); } #endif -#if CONFIG_CUDA +#if 0 case AV_HWDEVICE_TYPE_CUDA: { AVHWDeviceContext *cuda_cu = src_ctx; AVCUDADeviceContext *src_hwctx = src_ctx->hwctx; @@ -1789,7 +1789,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, 0, NULL, NULL, NULL, NULL) >= 0; } -#if CONFIG_CUDA +#if 0 if (p->dev_is_nvidia) count++; #endif @@ -1807,7 +1807,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, } } -#if CONFIG_CUDA +#if 0 if (p->dev_is_nvidia) constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA; #endif @@ -1890,7 +1890,7 @@ static void vulkan_free_internal(AVVkFrame *f) { av_unused AVVkFrameInternal *internal = f->internal; -#if CONFIG_CUDA +#if 0 if (internal->cuda_fc_ref) { AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; int planes = av_pix_fmt_count_planes(cuda_fc->sw_format); @@ -2672,7 +2672,7 @@ fail: return err; } -#if CONFIG_LIBDRM +#if 0 static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) { AVVkFrame *f = hwmap->priv; @@ -2746,6 +2746,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + AVVulkanFramesContext *hwfctx = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES]; @@ -3076,7 +3077,7 @@ fail: #endif #endif -#if CONFIG_CUDA +#if 0 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, AVBufferRef *cuda_hwfc, const AVFrame *frame) @@ -3346,7 +3347,7 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; switch (src->format) { -#if CONFIG_LIBDRM +#if 0 #if CONFIG_VAAPI case AV_PIX_FMT_VAAPI: if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) @@ -3365,7 +3366,7 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, } } -#if CONFIG_LIBDRM +#if 0 typedef struct VulkanDRMMapping { AVDRMFrameDescriptor drm_desc; AVVkFrame *source; @@ -3533,7 +3534,7 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; switch (dst->format) { -#if CONFIG_LIBDRM +#if 0 case AV_PIX_FMT_DRM_PRIME: if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS)) return vulkan_map_to_drm(hwfc, dst, src, flags); @@ -4091,7 +4092,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; switch (src->format) { -#if CONFIG_CUDA +#if 0 case AV_PIX_FMT_CUDA: #ifdef _WIN32 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && @@ -4110,7 +4111,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, } } -#if CONFIG_CUDA +#if 0 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, const AVFrame *src) { @@ -4209,7 +4210,7 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; switch (dst->format) { -#if CONFIG_CUDA +#if 0 case AV_PIX_FMT_CUDA: #ifdef _WIN32 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && -- 2.39.2 [-- Attachment #56: 0055-lavfi-add-lavfi-only-Vulkan-infrastructure.patch --] [-- Type: text/x-diff, Size: 21753 bytes --] From 6bd109733484568c98c2d08935d9c7f05ad7803c Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:10:58 +0100 Subject: [PATCH 55/72] lavfi: add lavfi-only Vulkan infrastructure --- libavfilter/Makefile | 6 + libavfilter/vulkan_filter.c | 241 +++++++++++++++++++- libavfilter/vulkan_filter.h | 25 ++ {libavutil => libavfilter}/vulkan_glslang.c | 19 +- {libavutil => libavfilter}/vulkan_shaderc.c | 8 +- libavfilter/vulkan_spirv.h | 45 ++++ 6 files changed, 330 insertions(+), 14 deletions(-) rename {libavutil => libavfilter}/vulkan_glslang.c (95%) rename {libavutil => libavfilter}/vulkan_shaderc.c (96%) create mode 100644 libavfilter/vulkan_spirv.h diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 0173b11870..f02e787d61 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -615,6 +615,10 @@ OBJS-$(CONFIG_AVSYNCTEST_FILTER) += src_avsynctest.o OBJS-$(CONFIG_AMOVIE_FILTER) += src_movie.o OBJS-$(CONFIG_MOVIE_FILTER) += src_movie.o +# vulkan libs +OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o +OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o + # Objects duplicated from other libraries for shared builds SHLIBOBJS += log2_tab.o @@ -628,6 +632,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_filter.h +SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h +SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h TOOLS = graph2dot TESTPROGS = drawutils filtfmts formats integral diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c index e22541bd23..ad88931c4b 100644 --- a/libavfilter/vulkan_filter.c +++ b/libavfilter/vulkan_filter.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -54,7 +56,6 @@ int ff_vk_filter_config_input(AVFilterLink *inlink) int err; AVFilterContext *avctx = inlink->dst; FFVulkanContext *s = avctx->priv; - FFVulkanFunctions *vk = &s->vkfn; AVHWFramesContext *input_frames; if (!inlink->hw_frames_ctx) { @@ -85,8 +86,7 @@ int ff_vk_filter_config_input(AVFilterLink *inlink) if (err < 0) return err; - vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props); - vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + ff_vk_load_props(s); /* Default output parameters match input parameters. */ s->input_format = input_frames->sw_format; @@ -189,3 +189,238 @@ int ff_vk_filter_init(AVFilterContext *avctx) return 0; } + +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)); + + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f)); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, + NULL); + + ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[1]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} + +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pls[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView tmp_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)); + + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); + RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + for (int i = 0; i < 2; i++) { + FFVulkanPipeline *pl = pls[i]; + AVFrame *src_f = !i ? in : tmp; + AVFrame *dst_f = !i ? tmp : out; + VkImageView *src_views = !i ? in_views : tmp_views; + VkImageView *dst_views = !i ? tmp_views : out_views; + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0, + !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : + VK_IMAGE_LAYOUT_GENERAL, + sampler); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, + NULL); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[1]); + } + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} + +int ff_vk_filter_process_2in(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, + AVFrame *out, AVFrame *in1, AVFrame *in2, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in1_views[AV_NUM_DATA_POINTERS]; + VkImageView in2_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in1, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in2, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)); + + RET(ff_vk_create_imageviews(vkctx, exec, in1_views, in1)); + RET(ff_vk_create_imageviews(vkctx, exec, in2_views, in2)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + + ff_vk_frame_barrier(vkctx, exec, in1, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, in2, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in1, in1_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in2, in2_views, 0, 1, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, 2, + VK_IMAGE_LAYOUT_GENERAL, + NULL); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[1]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h index bfdb9b2d7d..2a2a0e6e97 100644 --- a/libavfilter/vulkan_filter.h +++ b/libavfilter/vulkan_filter.h @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -31,4 +33,27 @@ int ff_vk_filter_config_input (AVFilterLink *inlink); int ff_vk_filter_config_output (AVFilterLink *outlink); int ff_vk_filter_config_output_inplace(AVFilterLink *outlink); +/** + * Submit a compute shader with a single in and single out for execution. + */ +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Submit a compute shader with a single in and single out with 2 stages. + */ +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pls[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Two inputs, one output + */ +int ff_vk_filter_process_2in(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, + AVFrame *out, AVFrame *in1, AVFrame *in2, + VkSampler sampler, void *push_src, size_t push_size); + #endif /* AVFILTER_VULKAN_FILTER_H */ diff --git a/libavutil/vulkan_glslang.c b/libavfilter/vulkan_glslang.c similarity index 95% rename from libavutil/vulkan_glslang.c rename to libavfilter/vulkan_glslang.c index e7785f6d40..845a530ee0 100644 --- a/libavutil/vulkan_glslang.c +++ b/libavfilter/vulkan_glslang.c @@ -21,8 +21,9 @@ #include <glslang/build_info.h> #include <glslang/Include/glslang_c_interface.h> -#include "mem.h" -#include "avassert.h" +#include "vulkan_spirv.h" +#include "libavutil/mem.h" +#include "libavutil/avassert.h" static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER; static int glslc_refcount = 0; @@ -176,11 +177,13 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, av_assert0(glslc_refcount); + *opaque = NULL; + if (!(glslc_shader = glslang_shader_create(&glslc_input))) return AVERROR(ENOMEM); if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n", glslang_shader_get_info_log(glslc_shader), glslang_shader_get_info_debug_log(glslc_shader)); @@ -189,7 +192,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, } if (!glslang_shader_parse(glslc_shader, &glslc_input)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n", glslang_shader_get_info_log(glslc_shader), glslang_shader_get_info_debug_log(glslc_shader)); @@ -206,7 +209,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT | GLSLANG_MSG_VULKAN_RULES_BIT)) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n", glslang_program_get_info_log(glslc_program), glslang_program_get_info_debug_log(glslc_program)); @@ -219,10 +222,10 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, messages = glslang_program_SPIRV_get_messages(glslc_program); if (messages) { - ff_vk_print_shader(avctx, shd, AV_LOG_WARNING); + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); av_log(avctx, AV_LOG_WARNING, "%s\n", messages); } else { - ff_vk_print_shader(avctx, shd, AV_LOG_VERBOSE); + ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE); } glslang_shader_delete(glslc_shader); @@ -257,7 +260,7 @@ static void glslc_uninit(FFVkSPIRVCompiler **ctx) av_freep(ctx); } -static FFVkSPIRVCompiler *ff_vk_glslang_init(void) +FFVkSPIRVCompiler *ff_vk_glslang_init(void) { FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); if (!ret) diff --git a/libavutil/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c similarity index 96% rename from libavutil/vulkan_shaderc.c rename to libavfilter/vulkan_shaderc.c index bd40edf187..38be1030ad 100644 --- a/libavutil/vulkan_shaderc.c +++ b/libavfilter/vulkan_shaderc.c @@ -18,7 +18,8 @@ #include <shaderc/shaderc.h> -#include "mem.h" +#include "libavutil/mem.h" +#include "vulkan_spirv.h" static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, FFVkSPIRVShader *shd, uint8_t **data, @@ -43,6 +44,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, }; shaderc_compile_options_t opts = shaderc_compile_options_initialize(); + *opaque = NULL; if (!opts) return AVERROR(ENOMEM); @@ -65,7 +67,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE; - ff_vk_print_shader(avctx, shd, loglevel); + ff_vk_shader_print(avctx, shd, loglevel); if (message && (err || warn)) av_log(avctx, loglevel, "%s\n", message); status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown"; @@ -104,7 +106,7 @@ static void shdc_uninit(FFVkSPIRVCompiler **ctx) av_freep(ctx); } -static FFVkSPIRVCompiler *ff_vk_shaderc_init(void) +FFVkSPIRVCompiler *ff_vk_shaderc_init(void) { FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); if (!ret) diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h new file mode 100644 index 0000000000..5638cd9696 --- /dev/null +++ b/libavfilter/vulkan_spirv.h @@ -0,0 +1,45 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_SPIRV_H +#define AVFILTER_VULKAN_SPIRV_H + +#include "libavutil/vulkan.h" + +#include "vulkan.h" +#include "config.h" + +typedef struct FFVkSPIRVCompiler { + void *priv; + int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, + struct FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, void **opaque); + void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); + void (*uninit)(struct FFVkSPIRVCompiler **ctx); +} FFVkSPIRVCompiler; + +#if CONFIG_LIBGLSLANG +FFVkSPIRVCompiler *ff_vk_glslang_init(void); +#define ff_vk_spirv_init ff_vk_glslang_init +#endif +#if CONFIG_LIBSHADERC +FFVkSPIRVCompiler *ff_vk_shaderc_init(void); +#define ff_vk_spirv_init ff_vk_shaderc_init +#endif + +#endif /* AVFILTER_VULKAN_H */ -- 2.39.2 [-- Attachment #57: 0056-avgblur_vulkan-port-for-the-rewrite.patch --] [-- Type: text/x-diff, Size: 18269 bytes --] From b14473b21aa057181ec85e0ea3bac3e5fa053875 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:11:19 +0100 Subject: [PATCH 56/72] avgblur_vulkan: port for the rewrite --- libavfilter/vf_avgblur_vulkan.c | 339 ++++++++++---------------------- 1 file changed, 108 insertions(+), 231 deletions(-) diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c index d118ce802c..17b2167951 100644 --- a/libavfilter/vf_avgblur_vulkan.c +++ b/libavfilter/vf_avgblur_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,23 +21,20 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGS 32 - typedef struct AvgBlurVulkanContext { FFVulkanContext vkctx; int initialized; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl_hor; - FFVulkanPipeline *pl_ver; - - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo tmp_images[3]; - VkDescriptorImageInfo output_images[3]; + VkSampler sampler; + FFVulkanPipeline pl_hor; + FFVkSPIRVShader shd_hor; + FFVulkanPipeline pl_ver; + FFVkSPIRVShader shd_ver; int size_x; int size_y; @@ -71,18 +70,41 @@ static const char blur_kernel[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; AvgBlurVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; - FFVulkanDescriptorSetBinding desc_i[2] = { + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "avgblur_hor_compute", + VK_SHADER_STAGE_COMPUTE_BIT)); + RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "avgblur_ver_compute", + VK_SHADER_STAGE_COMPUTE_BIT)); + shd = &s->shd_hor; + + ff_vk_shader_set_compute_sizes(shd, 32, 1, 1); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_img", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_img", @@ -95,238 +117,79 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) }, }; - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0)); - desc_i[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!desc_i[0].sampler) - return AVERROR_EXTERNAL; - - { /* Create shader for the horizontal pass */ - desc_i[0].updater = s->input_images; - desc_i[1].updater = s->tmp_images; - - s->pl_hor = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl_hor) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl_hor, "avgblur_compute_hor", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl_hor, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); - - GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1); - GLSLC(0, #define INC(x) (ivec2(x, 0)) ); - GLSLC(0, #define DIR(var) (var.x) ); - GLSLD( blur_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->planes & (1 << i)) { - GLSLF(2, distort(pos, %i); ,i); - } else { - GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - } - GLSLC(1, } ); + GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1); + GLSLC(0, #define INC(x) (ivec2(x, 0)) ); + GLSLC(0, #define DIR(var) (var.x) ); + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->planes & (1 << i)) { + GLSLF(2, distort(pos, %i); ,i); + } else { + GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl_hor)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl_hor)); + GLSLC(1, } ); } - - { /* Create shader for the vertical pass */ - desc_i[0].updater = s->tmp_images; - desc_i[1].updater = s->output_images; - - s->pl_ver = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl_ver) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl_ver, "avgblur_compute_ver", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 }); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl_ver, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); - - GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1); - GLSLC(0, #define INC(x) (ivec2(0, x)) ); - GLSLC(0, #define DIR(var) (var.y) ); - GLSLD( blur_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->planes & (1 << i)) { - GLSLF(2, distort(pos, %i); ,i); - } else { - GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - } - GLSLC(1, } ); + GLSLC(0, } ); + + shd = &s->shd_ver; + ff_vk_shader_set_compute_sizes(shd, 1, 32, 1); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0)); + + GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1); + GLSLC(0, #define INC(x) (ivec2(0, x)) ); + GLSLC(0, #define DIR(var) (var.y) ); + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->planes & (1 << i)) { + GLSLF(2, distort(pos, %i); ,i); + } else { + GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl_ver)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl_ver)); + GLSLC(1, } ); } + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, &s->shd_hor, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, &s->shd_hor, spv_data, spv_len, "main")); + RET(spv->compile_shader(spv, ctx, &s->shd_ver, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, &s->shd_ver, spv_data, spv_len, "main")); - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_hor, &s->shd_hor)); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_ver, &s->shd_ver)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_hor)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_ver)); s->initialized = 1; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f) -{ - int err; - VkCommandBuffer cmd_buf; - AvgBlurVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->tmp_images[i].imageView, tmp->img[i], - output_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl_hor, 0); - ff_vk_update_descriptor_set(vkctx, s->pl_ver, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, - .oldLayout = tmp->layout[i], - .newLayout = s->tmp_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = tmp->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - in->layout[i] = bar[0].newLayout; - in->access[i] = bar[0].dstAccessMask; - - tmp->layout[i] = bar[1].newLayout; - tmp->access[i] = bar[1].dstAccessMask; - - out->layout[i] = bar[2].newLayout; - out->access[i] = bar[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_hor); - - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_ver); - - vk->CmdDispatch(cmd_buf, s->vkctx.output_width, - FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx,s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); return err; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; } static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) @@ -352,7 +215,9 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, tmp, in)); + RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e, + (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver }, + out, tmp, in, s->sampler, NULL, 0)); err = av_frame_copy_props(out, in); if (err < 0) @@ -373,6 +238,18 @@ fail: static void avgblur_vulkan_uninit(AVFilterContext *avctx) { AvgBlurVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl_hor); + ff_vk_pipeline_free(vkctx, &s->pl_ver); + ff_vk_shader_free(vkctx, &s->shd_hor); + ff_vk_shader_free(vkctx, &s->shd_ver); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); -- 2.39.2 [-- Attachment #58: 0057-blend_vulkan-port-for-the-rewrite.patch --] [-- Type: text/x-diff, Size: 16613 bytes --] From 83edf3b91ffaed33b2103a6ba743487850f5325c Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:11:43 +0100 Subject: [PATCH 57/72] blend_vulkan: port for the rewrite --- libavfilter/vf_blend_vulkan.c | 315 +++++++++++----------------------- 1 file changed, 102 insertions(+), 213 deletions(-) diff --git a/libavfilter/vf_blend_vulkan.c b/libavfilter/vf_blend_vulkan.c index fcc21cbc8d..7ffdc9f3bd 100644 --- a/libavfilter/vf_blend_vulkan.c +++ b/libavfilter/vf_blend_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com> + * Copyright (c) Lynne + * * The blend modes are based on the blend.c. * * This file is part of FFmpeg. @@ -22,12 +24,11 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "framesync.h" #include "blend.h" -#define CGS 32 - #define IN_TOP 0 #define IN_BOTTOM 1 @@ -40,20 +41,18 @@ typedef struct FilterParamsVulkan { typedef struct BlendVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; FFFrameSync fs; - VkDescriptorImageInfo top_images[3]; - VkDescriptorImageInfo bottom_images[3]; - VkDescriptorImageInfo output_images[3]; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; FilterParamsVulkan params[4]; double all_opacity; enum BlendMode all_mode; - - int initialized; } BlendVulkanContext; #define DEFINE_BLEND_MODE(MODE, EXPR) \ @@ -125,223 +124,102 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar static av_cold int init_filter(AVFilterContext *avctx) { int err = 0; - FFVkSampler *sampler; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; BlendVulkanContext *s = avctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!sampler) + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { - FFVulkanDescriptorSetBinding image_descs[] = { - { - .name = "top_images", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->top_images, - .sampler = sampler, - }, - { - .name = "bottom_images", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->bottom_images, - .sampler = sampler, - }, - { - .name = "output_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - shd = ff_vk_init_shader(s->pl, "blend_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - for (int i = 0, j = 0; i < planes; i++) { - for (j = 0; j < i; j++) - if (s->params[i].blend_func == s->params[j].blend_func) - break; - /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */ - if (j == i) { - GLSLD(s->params[i].blend_func); - } - } - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i); - GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i); - GLSLF(2, const float opacity = %f; ,s->params[i].opacity); - GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend); - GLSLC(0, ); - GLSLF(2, imageStore(output_images[%i], pos, dst); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); } - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); - - s->initialized = 1; - -fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_frame, AVFrame *top_frame, AVFrame *bottom_frame) -{ - int err = 0; - VkCommandBuffer cmd_buf; - BlendVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *out = (AVVkFrame *)out_frame->data[0]; - AVVkFrame *top = (AVVkFrame *)top_frame->data[0]; - AVVkFrame *bottom = (AVVkFrame *)bottom_frame->data[0]; - - AVHWFramesContext *top_fc = (AVHWFramesContext*)top_frame->hw_frames_ctx->data; - AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom_frame->hw_frames_ctx->data; - - const VkFormat *top_formats = av_vkfmt_from_pixfmt(top_fc->sw_format); - const VkFormat *bottom_formats = av_vkfmt_from_pixfmt(bottom_fc->sw_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->top_images[i].imageView, top->img[i], - top_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->bottom_images[i].imageView, bottom->img[i], - bottom_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->top_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->bottom_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "blend_compute", VK_SHADER_STAGE_COMPUTE_BIT)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "top_images", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "bottom_images", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0)); + + for (int i = 0, j = 0; i < planes; i++) { + for (j = 0; j < i; j++) + if (s->params[i].blend_func == s->params[j].blend_func) + break; + /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */ + if (j == i) { + GLSLD(s->params[i].blend_func); + } } - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = top->layout[i], - .newLayout = s->top_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = top->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = bottom->layout[i], - .newLayout = s->bottom_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = bottom->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - top->layout[i] = barriers[0].newLayout; - top->access[i] = barriers[0].dstAccessMask; - - bottom->layout[i] = barriers[1].newLayout; - bottom->access[i] = barriers[1].dstAccessMask; - - out->layout[i] = barriers[2].newLayout; - out->access[i] = barriers[2].dstAccessMask; + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i); + GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i); + GLSLF(2, const float opacity = %f; ,s->params[i].opacity); + GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend); + GLSLC(0, ); + GLSLF(2, imageStore(output_images[%i], pos, dst); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS) / CGS, - FFALIGN(s->vkctx.output_height, CGS) / CGS, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, top_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, bottom_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + RET(spv->compile_shader(spv, avctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - ff_vk_qf_rotate(&s->qf); - - return 0; + s->initialized = 1; fail: - ff_vk_discard_exec_deps(s->exec); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } @@ -375,7 +253,9 @@ static int blend_frame(FFFrameSync *fs) RET(init_filter(avctx)); } - RET(process_frames(avctx, out, top, bottom)); + RET(ff_vk_filter_process_2in(&s->vkctx, &s->e, &s->pl, + out, top, bottom, + s->sampler, NULL, 0)); return ff_filter_frame(outlink, out); @@ -396,10 +276,19 @@ static av_cold int init(AVFilterContext *avctx) static av_cold void uninit(AVFilterContext *avctx) { BlendVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; - ff_framesync_uninit(&s->fs); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); + ff_framesync_uninit(&s->fs); s->initialized = 0; } -- 2.39.2 [-- Attachment #59: 0058-chromaber_vulkan-port-for-the-rewrite.patch --] [-- Type: text/x-diff, Size: 14904 bytes --] From 3328104c3ec2aa1412b5c8ea33ef8a96249acdd9 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:11:53 +0100 Subject: [PATCH 58/72] chromaber_vulkan: port for the rewrite --- libavfilter/vf_chromaber_vulkan.c | 288 ++++++++++-------------------- 1 file changed, 99 insertions(+), 189 deletions(-) diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c index b9423e417e..24649f7b25 100644 --- a/libavfilter/vf_chromaber_vulkan.c +++ b/libavfilter/vf_chromaber_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,21 +21,18 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - typedef struct ChromaticAberrationVulkanContext { FFVulkanContext vkctx; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; + FFVkSPIRVShader shd; + VkSampler sampler; /* Push constants / options */ struct { @@ -68,205 +67,105 @@ static const char distort_chroma_kernel[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; ChromaticAberrationVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - /* Create a sampler */ - sampler = ff_vk_init_sampler(vkctx, 0, VK_FILTER_LINEAR); - if (!sampler) - return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; /* Normalize options */ s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f; s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; - { /* Create the shader */ - FFVulkanDescriptorSetBinding desc_i[2] = { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "chromaber_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, vec2 dist; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_add_push_constant(s->pl, 0, sizeof(s->opts), - VK_SHADER_STAGE_COMPUTE_BIT); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - - GLSLD( distort_chroma_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - if (planes == 1) { - GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); - } else { - GLSLC(1, ivec2 size = imageSize(output_img[0]); ); - GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); - GLSLC(1, vec4 res = texture(input_img[0], npos); ); - GLSLC(1, imageStore(output_img[0], pos, res); ); - for (int i = 1; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - GLSLF(2, distort_chroma(%i, size, pos); ,i); - GLSLC(1, } else { ); - GLSLC(2, npos = vec2(pos)/vec2(size); ); - GLSLF(2, res = texture(input_img[%i], npos); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - GLSLC(1, } ); - } - } - GLSLC(0, } ); + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } - RET(ff_vk_compile_shader(vkctx, shd, "main")); + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "chromaber_compute", VK_SHADER_STAGE_COMPUTE_BIT)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec2 dist; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); + + GLSLD( distort_chroma_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (planes == 1) { + GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); + } else { + GLSLC(1, ivec2 size = imageSize(output_img[0]); ); + GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); + GLSLC(1, vec4 res = texture(input_img[0], npos); ); + GLSLC(1, imageStore(output_img[0], pos, res); ); + for (int i = 1; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(2, distort_chroma(%i, size, pos); ,i); + GLSLC(1, } else { ); + GLSLC(2, npos = vec2(pos)/vec2(size); ); + GLSLF(2, res = texture(input_img[%i], npos); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + GLSLC(1, } ); + } } + GLSLC(0, } ); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); s->initialized = 1; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) -{ - int err = 0; - VkCommandBuffer cmd_buf; - ChromaticAberrationVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *ouput_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - ouput_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[2] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - in->layout[i] = bar[0].newLayout; - in->access[i] = bar[0].dstAccessMask; - - out->layout[i] = bar[1].newLayout; - out->access[i] = bar[1].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - ff_vk_update_push_exec(vkctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(s->opts), &s->opts); - - vk->CmdDispatch(cmd_buf, - FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); - ff_vk_qf_rotate(&s->qf); - - return err; - -fail: - ff_vk_discard_exec_deps(s->exec); return err; } @@ -286,7 +185,8 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, in); if (err < 0) @@ -305,6 +205,16 @@ fail: static void chromaber_vulkan_uninit(AVFilterContext *avctx) { ChromaticAberrationVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); ff_vk_uninit(&s->vkctx); -- 2.39.2 [-- Attachment #60: 0059-flip_vulkan-port-for-the-rewrite.patch --] [-- Type: text/x-diff, Size: 13075 bytes --] From f69abda00b625c1f9d69421e7c6bef6713a43f76 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:12:42 +0100 Subject: [PATCH 59/72] flip_vulkan: port for the rewrite --- libavfilter/vf_flip_vulkan.c | 229 ++++++++++++----------------------- 1 file changed, 78 insertions(+), 151 deletions(-) diff --git a/libavfilter/vf_flip_vulkan.c b/libavfilter/vf_flip_vulkan.c index 0223786ef1..0330dce257 100644 --- a/libavfilter/vf_flip_vulkan.c +++ b/libavfilter/vf_flip_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com> + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,10 +22,9 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" -#define CGS 32 - enum FlipType { FLIP_VERTICAL, FLIP_HORIZONTAL, @@ -32,32 +33,49 @@ enum FlipType { typedef struct FlipVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; } FlipVulkanContext; static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type) { int err = 0; - FFVkSPIRVShader *shd; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; FlipVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "flip_compute", VK_SHADER_STAGE_COMPUTE_BIT)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); - FFVulkanDescriptorSetBinding image_descs[] = { + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_image", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_image", @@ -67,167 +85,75 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, }, }; - image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - { - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl, "flip_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_image[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - switch (type) - { - case FLIP_HORIZONTAL: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i); - break; - case FLIP_VERTICAL: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i); - break; - case FLIP_BOTH: - GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i); - break; - default: - GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i); - break; - } - GLSLF(2, imageStore(output_image[%i], pos, res); ,i); - GLSLC(1, } ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_image[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + switch (type) + { + case FLIP_HORIZONTAL: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i); + break; + case FLIP_VERTICAL: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i); + break; + case FLIP_BOTH: + GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i); + break; + default: + GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i); + break; } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + GLSLF(2, imageStore(output_image[%i], pos, res); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); s->initialized = 1; fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } static av_cold void flip_vulkan_uninit(AVFilterContext *avctx) { FlipVulkanContext *s = avctx->priv; - ff_vk_uninit(&s->vkctx); - s->initialized = 0; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err = 0; - VkCommandBuffer cmd_buf; - FlipVulkanContext *s = avctx->priv; FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } + FFVulkanFunctions *vk = &vkctx->vkfn; - ff_vk_update_descriptor_set(vkctx, s->pl, 0); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - out->layout[i] = barriers[1].newLayout; - out->access[i] = barriers[1].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_qf_rotate(&s->qf); + ff_vk_uninit(&s->vkctx); - return 0; -fail: - ff_vk_discard_exec_deps(s->exec); - return err; + s->initialized = 0; } static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type) @@ -247,7 +173,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type) if (!s->initialized) RET(init_filter(ctx, in, type)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, NULL, 0)); RET(av_frame_copy_props(out, in)); -- 2.39.2 [-- Attachment #61: 0060-gblur_vulkan-port-for-the-rewrite.patch --] [-- Type: text/x-diff, Size: 17658 bytes --] From 369e41818f25c68097764dd417cd03b6984e3ce6 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:12:55 +0100 Subject: [PATCH 60/72] gblur_vulkan: port for the rewrite --- libavfilter/vf_gblur_vulkan.c | 314 ++++++++++------------------------ 1 file changed, 95 insertions(+), 219 deletions(-) diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c index c6360799a7..72308ffe83 100644 --- a/libavfilter/vf_gblur_vulkan.c +++ b/libavfilter/vf_gblur_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com> + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,6 +22,7 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #define CGS 32 @@ -27,26 +30,23 @@ typedef struct GBlurVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl_hor; - FFVulkanPipeline *pl_ver; - FFVkBuffer params_buf_hor; - FFVkBuffer params_buf_ver; - - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo tmp_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc_hor; - VkDescriptorBufferInfo params_desc_ver; int initialized; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + VkSampler sampler; + FFVulkanPipeline pl_hor; + FFVkSPIRVShader shd_hor; + FFVkBuffer params_hor; + FFVulkanPipeline pl_ver; + FFVkSPIRVShader shd_ver; + FFVkBuffer params_ver; + int size; int sizeV; int planes; float sigma; float sigmaV; - AVFrame *tmpframe; } GBlurVulkanContext; static const char gblur_func[] = { @@ -118,16 +118,17 @@ static av_cold void init_gaussian_params(GBlurVulkanContext *s) s->sizeV = s->size; else init_kernel_size(s, &s->sizeV); - - s->tmpframe = NULL; } -static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd, - FFVkBuffer *params_buf, VkDescriptorBufferInfo *params_desc, - int ksize, float sigma) +static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, FFVkBuffer *params_buf, + int ksize, float sigma, FFVkSPIRVCompiler *spv) { int err = 0; uint8_t *kernel_mapped; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); @@ -137,7 +138,6 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk .mem_quali = "readonly", .mem_layout = "std430", .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = NULL, .buf_content = NULL, }; @@ -145,10 +145,9 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk if (!kernel_def) return AVERROR(ENOMEM); - buf_desc.updater = params_desc; buf_desc.buf_content = kernel_def; - RET(ff_vk_add_descriptor_set(&s->vkctx, pl, shd, &buf_desc, 1, 0)); + RET(ff_vk_pipeline_descriptor_set_add(&s->vkctx, pl, shd, &buf_desc, 1, 1, 0)); GLSLD( gblur_func ); GLSLC(0, void main() ); @@ -169,26 +168,31 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk } GLSLC(0, } ); - RET(ff_vk_compile_shader(&s->vkctx, shd, "main")); + RET(spv->compile_shader(spv, s, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(&s->vkctx, shd, spv_data, spv_len, "main")); - RET(ff_vk_init_pipeline_layout(&s->vkctx, pl)); - RET(ff_vk_init_compute_pipeline(&s->vkctx, pl)); + RET(ff_vk_init_compute_pipeline(&s->vkctx, pl, shd)); + RET(ff_vk_exec_pipeline_register(&s->vkctx, &s->e, pl)); - RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0)); init_gaussian_kernel((float *)kernel_mapped, sigma, ksize); RET(ff_vk_unmap_buffers(&s->vkctx, params_buf, 1, 1)); - params_desc->buffer = params_buf->buf; - params_desc->range = VK_WHOLE_SIZE; - - ff_vk_update_descriptor_set(&s->vkctx, pl, 1); + RET(ff_vk_set_descriptor_buffer(&s->vkctx, pl, NULL, 1, 0, 0, + params_buf->address, params_buf->size, + VK_FORMAT_UNDEFINED)); fail: av_free(kernel_def); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); return err; } @@ -196,16 +200,35 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err = 0; GBlurVulkanContext *s = ctx->priv; - FFVkSPIRVShader *shd; + FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanDescriptorSetBinding image_descs[] = { + FFVkSPIRVShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "gblur_hor_compute", + VK_SHADER_STAGE_COMPUTE_BIT)); + RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "gblur_ver_compute", + VK_SHADER_STAGE_COMPUTE_BIT)); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_images", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_images", @@ -218,215 +241,64 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) }, }; - image_descs[0].sampler = ff_vk_init_sampler(&s->vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; - init_gaussian_params(s); - ff_vk_qf_init(&s->vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - { - /* Create shader for the horizontal pass */ - image_descs[0].updater = s->input_images; - image_descs[1].updater = s->tmp_images; - - s->pl_hor = ff_vk_create_pipeline(&s->vkctx, &s->qf); - if (!s->pl_hor) { - err = AVERROR(ENOMEM); - goto fail; - } - - shd = ff_vk_init_shader(s->pl_hor, "gblur_compute_hor", image_descs[0].stages); - if (!shd) { - err = AVERROR(ENOMEM); - goto fail; - } + shd = &s->shd_hor; + ff_vk_shader_set_compute_sizes(shd, 32, 1, 1); - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0)); GLSLC(0, #define OFFSET (vec2(i, 0.0))); - RET(init_gblur_pipeline(s, s->pl_hor, shd, &s->params_buf_hor, &s->params_desc_hor, - s->size, s->sigma)); + RET(init_gblur_pipeline(s, &s->pl_hor, shd, &s->params_hor, s->size, s->sigma, spv)); } { - /* Create shader for the vertical pass */ - image_descs[0].updater = s->tmp_images; - image_descs[1].updater = s->output_images; - - s->pl_ver = ff_vk_create_pipeline(&s->vkctx, &s->qf); - if (!s->pl_ver) { - err = AVERROR(ENOMEM); - goto fail; - } + shd = &s->shd_ver; + ff_vk_shader_set_compute_sizes(shd, 1, 32, 1); - shd = ff_vk_init_shader(s->pl_ver, "gblur_compute_ver", image_descs[0].stages); - if (!shd) { - err = AVERROR(ENOMEM); - goto fail; - } - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 }); - RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0)); GLSLC(0, #define OFFSET (vec2(0.0, i))); - RET(init_gblur_pipeline(s, s->pl_ver, shd, &s->params_buf_ver, &s->params_desc_ver, - s->sizeV, s->sigmaV)); + RET(init_gblur_pipeline(s, &s->pl_ver, shd, &s->params_ver, s->sizeV, s->sigmaV, spv)); } - RET(ff_vk_create_exec_ctx(&s->vkctx, &s->exec, &s->qf)); - s->initialized = 1; fail: + if (spv) + spv->uninit(&spv); + return err; } static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx) { GBlurVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; - av_frame_free(&s->tmpframe); + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl_hor); + ff_vk_pipeline_free(vkctx, &s->pl_ver); + ff_vk_shader_free(vkctx, &s->shd_hor); + ff_vk_shader_free(vkctx, &s->shd_ver); + ff_vk_free_buf(vkctx, &s->params_hor); + ff_vk_free_buf(vkctx, &s->params_ver); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf_hor); - ff_vk_free_buf(&s->vkctx, &s->params_buf_ver); ff_vk_uninit(&s->vkctx); s->initialized = 0; } -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err; - VkCommandBuffer cmd_buf; - GBlurVulkanContext *s = avctx->priv; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(&s->vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->input_images[i].imageView, - in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->tmp_images[i].imageView, - tmp->img[i], - output_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->output_images[i].imageView, - out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(&s->vkctx, s->pl_hor, 0); - ff_vk_update_descriptor_set(&s->vkctx, s->pl_ver, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, - .oldLayout = tmp->layout[i], - .newLayout = s->tmp_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = tmp->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - tmp->layout[i] = barriers[1].newLayout; - tmp->access[i] = barriers[1].dstAccessMask; - - out->layout[i] = barriers[2].newLayout; - out->access[i] = barriers[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_hor); - - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); - - ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_ver); - - vk->CmdDispatch(cmd_buf,s->vkctx.output_width, - FFALIGN(s->vkctx.output_height, CGS)/CGS, 1); - - ff_vk_add_exec_dep(&s->vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(&s->vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(&s->vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); - - return 0; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; -} - static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) { int err; - AVFrame *out = NULL; + AVFrame *tmp = NULL, *out = NULL; AVFilterContext *ctx = link->dst; GBlurVulkanContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; @@ -437,28 +309,32 @@ static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - if (!s->initialized) { - RET(init_filter(ctx, in)); - s->tmpframe = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!s->tmpframe) { - err = AVERROR(ENOMEM); - goto fail; - } + tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!tmp) { + err = AVERROR(ENOMEM); + goto fail; } - RET(process_frames(ctx, out, in)); + if (!s->initialized) + RET(init_filter(ctx, in)); - RET(av_frame_copy_props(out, in)); + RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e, + (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver }, + out, tmp, in, s->sampler, NULL, 0)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; av_frame_free(&in); + av_frame_free(&tmp); return ff_filter_frame(outlink, out); fail: av_frame_free(&in); + av_frame_free(&tmp); av_frame_free(&out); - av_frame_free(&s->tmpframe); - return err; } -- 2.39.2 [-- Attachment #62: 0061-overlay_vulkan-port-for-the-rewrite.patch --] [-- Type: text/x-diff, Size: 18798 bytes --] From 1a4987ea3171409cc15b7ea85c2d483cf155378e Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:13:05 +0100 Subject: [PATCH 61/72] overlay_vulkan: port for the rewrite --- libavfilter/vf_overlay_vulkan.c | 397 ++++++++++---------------------- 1 file changed, 122 insertions(+), 275 deletions(-) diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c index bdf231f4ef..694cb666d8 100644 --- a/libavfilter/vf_overlay_vulkan.c +++ b/libavfilter/vf_overlay_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,26 +21,26 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "framesync.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - typedef struct OverlayVulkanContext { FFVulkanContext vkctx; + FFFrameSync fs; int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - FFFrameSync fs; - FFVkBuffer params_buf; + FFVkSPIRVShader shd; + VkSampler sampler; - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo main_images[3]; - VkDescriptorImageInfo overlay_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc; + /* Push constants / options */ + struct { + int32_t o_offset[2*3]; + int32_t o_size[2*3]; + } opts; int overlay_x; int overlay_y; @@ -80,279 +82,113 @@ static const char overlay_alpha[] = { static av_cold int init_filter(AVFilterContext *ctx) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; OverlayVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_NEAREST); - if (!sampler) + const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; - - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { /* Create the shader */ - const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; - - FFVulkanDescriptorSetBinding desc_i[3] = { - { - .name = "main_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->main_images, - .sampler = sampler, - }, - { - .name = "overlay_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->overlay_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVulkanDescriptorSetBinding desc_b = { - .name = "params", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .mem_layout = "std430", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = &s->params_desc, - .buf_content = "ivec2 o_offset[3], o_size[3];", - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "overlay_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ - - GLSLD( overlay_noalpha ); - GLSLD( overlay_alpha ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, int planes = %i; ,planes); - GLSLC(1, for (int i = 0; i < planes; i++) { ); - if (ialpha) - GLSLC(2, overlay_alpha_opaque(i, pos); ); - else - GLSLC(2, overlay_noalpha(i, pos); ); - GLSLC(1, } ); - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - } - - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); - - { /* Create and update buffer */ - const AVPixFmtDescriptor *desc; - - /* NOTE: std430 requires the same identical struct layout, padding and - * alignment as C, so we're allowed to do this, as this will map - * exactly to what the shader recieves */ - struct { - int32_t o_offset[2*3]; - int32_t o_size[2*3]; - } *par; - - err = ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), NULL, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - if (err) - return err; - - err = ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0); - if (err) - return err; - - desc = av_pix_fmt_desc_get(s->vkctx.output_format); - - par->o_offset[0] = s->overlay_x; - par->o_offset[1] = s->overlay_y; - par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w; - par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h; - par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w; - par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h; - - par->o_size[0] = s->overlay_w; - par->o_size[1] = s->overlay_h; - par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w; - par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h; - par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w; - par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h; - - err = ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1); - if (err) - return err; - - s->params_desc.buffer = s->params_buf.buf; - s->params_desc.range = VK_WHOLE_SIZE; - - ff_vk_update_descriptor_set(vkctx, s->pl, 1); } - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "overlay_compute", VK_SHADER_STAGE_COMPUTE_BIT)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, ivec2 o_offset[3]; ); + GLSLC(1, ivec2 o_size[3]; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "main_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "overlay_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0)); + + GLSLD( overlay_noalpha ); + GLSLD( overlay_alpha ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, int planes = %i; ,planes); + GLSLC(1, for (int i = 0; i < planes; i++) { ); + if (ialpha) + GLSLC(2, overlay_alpha_opaque(i, pos); ); + else + GLSLC(2, overlay_noalpha(i, pos); ); + GLSLC(1, } ); + GLSLC(0, } ); + + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); + + s->opts.o_offset[0] = s->overlay_x; + s->opts.o_offset[1] = s->overlay_y; + s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; + s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; + s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; + s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; + + s->opts.o_size[0] = s->overlay_w; + s->opts.o_size[1] = s->overlay_h; + s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; + s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; + s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; + s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; s->initialized = 1; - return 0; - fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, - AVFrame *main_f, AVFrame *overlay_f) -{ - int err; - VkCommandBuffer cmd_buf; - OverlayVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - AVVkFrame *main = (AVVkFrame *)main_f->data[0]; - AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0]; - - AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data; - AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data; - - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - const VkFormat *main_sw_formats = av_vkfmt_from_pixfmt(main_fc->sw_format); - const VkFormat *overlay_sw_formats = av_vkfmt_from_pixfmt(overlay_fc->sw_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->main_images[i].imageView, main->img[i], - main_sw_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->overlay_images[i].imageView, overlay->img[i], - overlay_sw_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar[3] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = main->layout[i], - .newLayout = s->main_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = main->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = overlay->layout[i], - .newLayout = s->overlay_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = overlay->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar); - - main->layout[i] = bar[0].newLayout; - main->access[i] = bar[0].dstAccessMask; - - overlay->layout[i] = bar[1].newLayout; - overlay->access[i] = bar[1].dstAccessMask; - - out->layout[i] = bar[2].newLayout; - out->access[i] = bar[2].dstAccessMask; - } - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - vk->CmdDispatch(cmd_buf, - FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); - ff_vk_qf_rotate(&s->qf); - - return err; - -fail: - ff_vk_discard_exec_deps(s->exec); return err; } @@ -394,7 +230,9 @@ static int overlay_vulkan_blend(FFFrameSync *fs) goto fail; } - RET(process_frames(ctx, out, input_main, input_overlay)); + RET(ff_vk_filter_process_2in(&s->vkctx, &s->e, &s->pl, + out, input_main, input_overlay, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, input_main); if (err < 0) @@ -443,8 +281,17 @@ static av_cold int overlay_vulkan_init(AVFilterContext *avctx) static void overlay_vulkan_uninit(AVFilterContext *avctx) { OverlayVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf); ff_vk_uninit(&s->vkctx); ff_framesync_uninit(&s->fs); -- 2.39.2 [-- Attachment #63: 0062-scale_vulkan-port-for-the-rewrite.patch --] [-- Type: text/x-diff, Size: 18951 bytes --] From 4ec8834fa164e172420cd162d4a51735fbddd986 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:13:32 +0100 Subject: [PATCH 62/72] scale_vulkan: port for the rewrite --- libavfilter/vf_scale_vulkan.c | 365 ++++++++++++---------------------- 1 file changed, 124 insertions(+), 241 deletions(-) diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c index 31dc35569b..84bd19c012 100644 --- a/libavfilter/vf_scale_vulkan.c +++ b/libavfilter/vf_scale_vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -19,12 +21,11 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "scale_eval.h" #include "internal.h" #include "colorspace.h" -#define CGROUPS (int [3]){ 32, 32, 1 } - enum ScalerFunc { F_BILINEAR = 0, F_NEAREST, @@ -35,15 +36,17 @@ enum ScalerFunc { typedef struct ScaleVulkanContext { FFVulkanContext vkctx; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - FFVkBuffer params_buf; + FFVkSPIRVShader shd; + VkSampler sampler; - /* Shader updators, must be in the main filter struct */ - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; - VkDescriptorBufferInfo params_desc; + /* Push constants / options */ + struct { + float yuv_matrix[4][4]; + } opts; char *out_format_string; char *w_expr; @@ -51,8 +54,6 @@ typedef struct ScaleVulkanContext { enum ScalerFunc scaler; enum AVColorRange out_range; - - int initialized; } ScaleVulkanContext; static const char scale_bilinear[] = { @@ -110,10 +111,15 @@ static const char write_444[] = { static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { int err; - FFVkSampler *sampler; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; VkFilter sampler_mode; ScaleVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; int crop_x = in->crop_left; int crop_y = in->crop_top; @@ -121,8 +127,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) int crop_h = in->height - (in->crop_top + in->crop_bottom); int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format); - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - switch (s->scaler) { case F_NEAREST: sampler_mode = VK_FILTER_NEAREST; @@ -132,264 +136,133 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) break; }; - /* Create a sampler */ - sampler = ff_vk_init_sampler(vkctx, 0, sampler_mode); - if (!sampler) + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); return AVERROR_EXTERNAL; + } - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - { /* Create the shader */ - FFVulkanDescriptorSetBinding desc_i[2] = { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = in_planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, - .sampler = sampler, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.output_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, - }, - }; - - FFVulkanDescriptorSetBinding desc_b = { - .name = "params", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .mem_layout = "std430", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = &s->params_desc, - .buf_content = "mat4 yuv_matrix;", - }; - - FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "scale_compute", - VK_SHADER_STAGE_COMPUTE_BIT); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, CGROUPS); - - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */ - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */ - - GLSLD( scale_bilinear ); - - if (s->vkctx.output_format != s->vkctx.input_format) { - GLSLD( rgb2yuv ); - } + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "scale_compute", VK_SHADER_STAGE_COMPUTE_BIT)); + + ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, mat4 yuv_matrix; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = in_planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.output_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; - switch (s->vkctx.output_format) { - case AV_PIX_FMT_NV12: GLSLD(write_nv12); break; - case AV_PIX_FMT_YUV420P: GLSLD( write_420); break; - case AV_PIX_FMT_YUV444P: GLSLD( write_444); break; - default: break; - } + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height); - GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h); - GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y); - GLSLC(0, ); - - if (s->vkctx.output_format == s->vkctx.input_format) { - for (int i = 0; i < desc_i[1].elems; i++) { - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - switch (s->scaler) { - case F_NEAREST: - case F_BILINEAR: - GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - break; - }; - GLSLC(1, } ); - } - } else { - GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); ); - GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG); - switch (s->vkctx.output_format) { - case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break; - case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break; - case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break; - default: return AVERROR(EINVAL); - } - } + GLSLD( scale_bilinear ); + + if (s->vkctx.output_format != s->vkctx.input_format) { + GLSLD( rgb2yuv ); + } - GLSLC(0, } ); + switch (s->vkctx.output_format) { + case AV_PIX_FMT_NV12: GLSLD(write_nv12); break; + case AV_PIX_FMT_YUV420P: GLSLD( write_420); break; + case AV_PIX_FMT_YUV444P: GLSLD( write_444); break; + default: break; + } - RET(ff_vk_compile_shader(vkctx, shd, "main")); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height); + GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h); + GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y); + GLSLC(0, ); + + if (s->vkctx.output_format == s->vkctx.input_format) { + for (int i = 0; i < desc[i].elems; i++) { + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + switch (s->scaler) { + case F_NEAREST: + case F_BILINEAR: + GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + break; + }; + GLSLC(1, } ); + } + } else { + GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); ); + GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG); + switch (s->vkctx.output_format) { + case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break; + case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break; + case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break; + default: return AVERROR(EINVAL); + } } - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); + GLSLC(0, } ); if (s->vkctx.output_format != s->vkctx.input_format) { const AVLumaCoefficients *lcoeffs; double tmp_mat[3][3]; - struct { - float yuv_matrix[4][4]; - } *par; - lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace); if (!lcoeffs) { av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n"); return AVERROR(EINVAL); } - RET(ff_vk_create_buf(vkctx, &s->params_buf, - sizeof(*par), NULL, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - - RET(ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0)); - ff_fill_rgb2yuv_table(lcoeffs, tmp_mat); - memset(par, 0, sizeof(*par)); - for (int y = 0; y < 3; y++) for (int x = 0; x < 3; x++) - par->yuv_matrix[x][y] = tmp_mat[x][y]; - - par->yuv_matrix[3][3] = 1.0; - - RET(ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1)); - - s->params_desc.buffer = s->params_buf.buf; - s->params_desc.range = VK_WHOLE_SIZE; - - ff_vk_update_descriptor_set(vkctx, s->pl, 1); + s->opts.yuv_matrix[x][y] = tmp_mat[x][y]; + s->opts.yuv_matrix[3][3] = 1.0; } - /* Execution context */ - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); s->initialized = 1; return 0; fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f) -{ - int err = 0; - VkCommandBuffer cmd_buf; - ScaleVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - AVVkFrame *in = (AVVkFrame *)in_f->data[0]; - AVVkFrame *out = (AVVkFrame *)out_f->data[0]; - VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2]; - int barrier_count = 0; - const int planes = av_pix_fmt_count_planes(s->vkctx.input_format); - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - /* Update descriptors and init the exec context */ - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); - - for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier bar = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }; - - memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier)); - - in->layout[i] = bar.newLayout; - in->access[i] = bar.dstAccessMask; - } - - for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) { - VkImageMemoryBarrier bar = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }; - - memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier)); - - out->layout[i] = bar.newLayout; - out->access[i] = bar.dstAccessMask; - } - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, barrier_count, barriers); - - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - - vk->CmdDispatch(cmd_buf, - FFALIGN(vkctx->output_width, CGROUPS[0])/CGROUPS[0], - FFALIGN(vkctx->output_height, CGROUPS[1])/CGROUPS[1], 1); - - ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); return err; - -fail: - ff_vk_discard_exec_deps(s->exec); - return err; } static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) @@ -408,7 +281,8 @@ static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, &s->opts, sizeof(s->opts))); err = av_frame_copy_props(out, in); if (err < 0) @@ -475,8 +349,17 @@ static int scale_vulkan_config_output(AVFilterLink *outlink) static void scale_vulkan_uninit(AVFilterContext *avctx) { ScaleVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); - ff_vk_free_buf(&s->vkctx, &s->params_buf); ff_vk_uninit(&s->vkctx); s->initialized = 0; -- 2.39.2 [-- Attachment #64: 0063-transpose_vulkan-port-for-the-rewrite.patch --] [-- Type: text/x-diff, Size: 12391 bytes --] From ec245a2b213f82a52b9a5120062ab4f620519100 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 17 Feb 2023 03:13:43 +0100 Subject: [PATCH 63/72] transpose_vulkan: port for the rewrite --- libavfilter/vf_transpose_vulkan.c | 223 ++++++++++-------------------- 1 file changed, 75 insertions(+), 148 deletions(-) diff --git a/libavfilter/vf_transpose_vulkan.c b/libavfilter/vf_transpose_vulkan.c index 30d052e08c..36f286b219 100644 --- a/libavfilter/vf_transpose_vulkan.c +++ b/libavfilter/vf_transpose_vulkan.c @@ -1,5 +1,7 @@ /* * copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com> + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -20,41 +22,59 @@ #include "libavutil/random_seed.h" #include "libavutil/opt.h" #include "vulkan_filter.h" +#include "vulkan_spirv.h" #include "internal.h" #include "transpose.h" -#define CGS 32 - typedef struct TransposeVulkanContext { FFVulkanContext vkctx; - FFVkQueueFamilyCtx qf; - FFVkExecContext *exec; - FFVulkanPipeline *pl; - VkDescriptorImageInfo input_images[3]; - VkDescriptorImageInfo output_images[3]; + int initialized; + FFVulkanPipeline pl; + FFVkExecPool e; + FFVkQueueFamilyCtx qf; + FFVkSPIRVShader shd; + VkSampler sampler; int dir; int passthrough; - int initialized; } TransposeVulkanContext; static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) { - int err = 0; - FFVkSPIRVShader *shd; + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque; TransposeVulkanContext *s = ctx->priv; FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT); + RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(&s->pl, &s->shd, "transpose_compute", VK_SHADER_STAGE_COMPUTE_BIT)); - FFVulkanDescriptorSetBinding image_descs[] = { + ff_vk_shader_set_compute_sizes(&s->shd, 32, 1, 1); + + desc = (FFVulkanDescriptorSetBinding []) { { .name = "input_images", .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->input_images, + .samplers = DUP_SAMPLER(s->sampler), }, { .name = "output_images", @@ -64,154 +84,49 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .updater = s->output_images, }, }; - image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR); - if (!image_descs[0].sampler) - return AVERROR_EXTERNAL; - - ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0); - - { - s->pl = ff_vk_create_pipeline(vkctx, &s->qf); - if (!s->pl) - return AVERROR(ENOMEM); - - shd = ff_vk_init_shader(s->pl, "transpose_compute", image_descs[0].stages); - if (!shd) - return AVERROR(ENOMEM); - - ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 }); - RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->dir == TRANSPOSE_CCLOCK) - GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i); - else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) { - GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i); - if (s->dir == TRANSPOSE_CLOCK) - GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); ); - } else - GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i); - GLSLF(2, imageStore(output_images[%i], pos, res); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(ff_vk_compile_shader(vkctx, shd, "main")); - RET(ff_vk_init_pipeline_layout(vkctx, s->pl)); - RET(ff_vk_init_compute_pipeline(vkctx, s->pl)); - } - - RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf)); - s->initialized = 1; - -fail: - return err; -} - -static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe) -{ - int err = 0; - VkCommandBuffer cmd_buf; - TransposeVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &s->vkctx.vkfn; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - AVVkFrame *in = (AVVkFrame *)inframe->data[0]; - AVVkFrame *out = (AVVkFrame *)outframe->data[0]; - - const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format); - const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format); - - ff_vk_start_exec_recording(vkctx, s->exec); - cmd_buf = ff_vk_get_exec_buf(s->exec); - - for (int i = 0; i < planes; i++) { - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->input_images[i].imageView, in->img[i], - input_formats[i], - ff_comp_identity_map)); - - RET(ff_vk_create_imageview(vkctx, s->exec, - &s->output_images[i].imageView, out->img[i], - output_formats[i], - ff_comp_identity_map)); - - s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - ff_vk_update_descriptor_set(vkctx, s->pl, 0); + RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0)); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); for (int i = 0; i < planes; i++) { - VkImageMemoryBarrier barriers[] = { - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = in->layout[i], - .newLayout = s->input_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = in->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = out->layout[i], - .newLayout = s->output_images[i].imageLayout, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = out->img[i], - .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .subresourceRange.levelCount = 1, - .subresourceRange.layerCount = 1, - }, - }; - - vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, - 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers); - - in->layout[i] = barriers[0].newLayout; - in->access[i] = barriers[0].dstAccessMask; - - out->layout[i] = barriers[1].newLayout; - out->access[i] = barriers[1].dstAccessMask; + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->dir == TRANSPOSE_CCLOCK) + GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i); + else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) { + GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i); + if (s->dir == TRANSPOSE_CLOCK) + GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); ); + } else + GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i); + GLSLF(2, imageStore(output_images[%i], pos, res); ,i); + GLSLC(1, } ); } + GLSLC(0, } ); - ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl); - vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS, - s->vkctx.output_height, 1); + RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); - ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); - ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd)); + RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl)); - err = ff_vk_submit_exec_queue(vkctx, s->exec); - if (err) - return err; - - ff_vk_qf_rotate(&s->qf); + s->initialized = 1; return 0; fail: - ff_vk_discard_exec_deps(s->exec); + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + return err; } @@ -235,7 +150,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) if (!s->initialized) RET(init_filter(ctx, in)); - RET(process_frames(ctx, out, in)); + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in, + s->sampler, NULL, 0)); RET(av_frame_copy_props(out, in)); @@ -259,6 +175,17 @@ fail: static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx) { TransposeVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_pipeline_free(vkctx, &s->pl); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + ff_vk_uninit(&s->vkctx); s->initialized = 0; -- 2.39.2 [-- Attachment #65: 0064-avcodec-add-AVHWAccel.free_frame_priv-callback.patch --] [-- Type: text/x-diff, Size: 7769 bytes --] From dbf81f602283527ea27d7ddac58e8ff648fc5557 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 10 Mar 2022 18:03:05 +0100 Subject: [PATCH 64/72] avcodec: add AVHWAccel.free_frame_priv callback --- libavcodec/av1dec.c | 4 ++-- libavcodec/avcodec.h | 8 ++++++++ libavcodec/decode.c | 19 +++++++++++++++++++ libavcodec/decode.h | 11 +++++++++++ libavcodec/h264_slice.c | 3 ++- libavcodec/hevc_refs.c | 3 ++- libavcodec/mpegpicture.c | 4 +++- libavcodec/vp8.c | 2 +- libavcodec/vp9.c | 2 +- 9 files changed, 49 insertions(+), 7 deletions(-) diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c index d83c902f1f..d105835d51 100644 --- a/libavcodec/av1dec.c +++ b/libavcodec/av1dec.c @@ -24,6 +24,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/opt.h" #include "avcodec.h" +#include "decode.h" #include "av1dec.h" #include "bytestream.h" #include "codec_internal.h" @@ -836,8 +837,7 @@ static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f) if (avctx->hwaccel) { const AVHWAccel *hwaccel = avctx->hwaccel; if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = - av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, hwaccel); if (!f->hwaccel_priv_buf) { ret = AVERROR(ENOMEM); goto fail; diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 17416791a6..6babfc7132 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2206,6 +2206,14 @@ typedef struct AVHWAccel { * that avctx->hwaccel_priv_data is invalid. */ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + + /** + * Callback to free the hwaccel-specific frame data. + * + * @param avctx the codec context + * @param data the per-frame hardware accelerator private data to be freed. + */ + void (*free_frame_priv)(AVCodecContext *avctx, void *data); } AVHWAccel; /** diff --git a/libavcodec/decode.c b/libavcodec/decode.c index 93ecd36c2b..b9a2ec84f6 100644 --- a/libavcodec/decode.c +++ b/libavcodec/decode.c @@ -1675,3 +1675,22 @@ int ff_copy_palette(void *dst, const AVPacket *src, void *logctx) } return 0; } + +AVBufferRef *ff_alloc_hwaccel_frame_priv_data(AVCodecContext *avctx, + const AVHWAccel *hwaccel) +{ + AVBufferRef *ref; + uint8_t *data = av_mallocz(hwaccel->frame_priv_data_size); + if (!data) + return NULL; + + ref = av_buffer_create(data, hwaccel->frame_priv_data_size, + (void (*)(void *, uint8_t *))hwaccel->free_frame_priv, + avctx, 0); + if (!ref) { + av_free(data); + return NULL; + } + + return ref; +} diff --git a/libavcodec/decode.h b/libavcodec/decode.h index 8430ffbd66..aa40baafc0 100644 --- a/libavcodec/decode.h +++ b/libavcodec/decode.h @@ -150,4 +150,15 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags); int ff_side_data_update_matrix_encoding(AVFrame *frame, enum AVMatrixEncoding matrix_encoding); +/** + * Allocate a hwaccel frame private data and create an AVBufferRef + * from it. + * + * @param avctx The codec context which to attach as an opaque value + * @param hwaccel The hwaccel for which to allocate + * @return The allocated buffer + */ +AVBufferRef *ff_alloc_hwaccel_frame_priv_data(AVCodecContext *avctx, + const AVHWAccel *hwaccel); + #endif /* AVCODEC_DECODE_H */ diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 8ac66b343c..c0aa31bcd9 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -33,6 +33,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/timecode.h" #include "internal.h" +#include "decode.h" #include "cabac.h" #include "cabac_functions.h" #include "decode.h" @@ -212,7 +213,7 @@ static int alloc_picture(H264Context *h, H264Picture *pic) const AVHWAccel *hwaccel = h->avctx->hwaccel; av_assert0(!pic->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + pic->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(h->avctx, hwaccel); if (!pic->hwaccel_priv_buf) return AVERROR(ENOMEM); pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data; diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c index 811e8feff8..30cbb8b37a 100644 --- a/libavcodec/hevc_refs.c +++ b/libavcodec/hevc_refs.c @@ -23,6 +23,7 @@ #include "libavutil/avassert.h" +#include "decode.h" #include "thread.h" #include "hevc.h" #include "hevcdec.h" @@ -118,7 +119,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s) const AVHWAccel *hwaccel = s->avctx->hwaccel; av_assert0(!frame->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - frame->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + frame->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(s->avctx, hwaccel); if (!frame->hwaccel_priv_buf) goto fail; frame->hwaccel_picture_private = frame->hwaccel_priv_buf->data; diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c index 977bc65191..a1d58f04b3 100644 --- a/libavcodec/mpegpicture.c +++ b/libavcodec/mpegpicture.c @@ -27,6 +27,8 @@ #include "avcodec.h" #include "encode.h" +#include "internal.h" +#include "decode.h" #include "motion_est.h" #include "mpegpicture.h" #include "mpegutils.h" @@ -172,7 +174,7 @@ static int alloc_frame_buffer(AVCodecContext *avctx, Picture *pic, if (avctx->hwaccel) { assert(!pic->hwaccel_picture_private); if (avctx->hwaccel->frame_priv_data_size) { - pic->hwaccel_priv_buf = av_buffer_allocz(avctx->hwaccel->frame_priv_data_size); + pic->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, avctx->hwaccel); if (!pic->hwaccel_priv_buf) { av_log(avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n"); return -1; diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index db2419deaf..4c23eb5672 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -109,7 +109,7 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref) if (s->avctx->hwaccel) { const AVHWAccel *hwaccel = s->avctx->hwaccel; if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(s->avctx, hwaccel); if (!f->hwaccel_priv_buf) goto fail; f->hwaccel_picture_private = f->hwaccel_priv_buf->data; diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index 7c0a246446..4f345f18db 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -136,7 +136,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f) const AVHWAccel *hwaccel = avctx->hwaccel; av_assert0(!f->hwaccel_picture_private); if (hwaccel->frame_priv_data_size) { - f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size); + f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, hwaccel); if (!f->hwaccel_priv_buf) goto fail; f->hwaccel_picture_private = f->hwaccel_priv_buf->data; -- 2.39.2 [-- Attachment #66: 0065-avcodec-add-AVHWAccel.flush-callback.patch --] [-- Type: text/x-diff, Size: 3020 bytes --] From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Fri, 6 Jan 2023 03:32:56 +0100 Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback --- libavcodec/av1dec.c | 3 +++ libavcodec/avcodec.h | 5 +++++ libavcodec/h264dec.c | 3 +++ libavcodec/hevcdec.c | 3 +++ libavcodec/vp8.c | 3 +++ libavcodec/vp9.c | 3 +++ 6 files changed, 20 insertions(+) diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c index d105835d51..3cbb80bcb5 100644 --- a/libavcodec/av1dec.c +++ b/libavcodec/av1dec.c @@ -1228,6 +1228,9 @@ static void av1_decode_flush(AVCodecContext *avctx) s->raw_seq = NULL; ff_cbs_flush(s->cbc); + + if (avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } #define OFFSET(x) offsetof(AV1DecContext, x) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 6babfc7132..531998a78c 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -2214,6 +2214,11 @@ typedef struct AVHWAccel { * @param data the per-frame hardware accelerator private data to be freed. */ void (*free_frame_priv)(AVCodecContext *avctx, void *data); + + /** + * Callback to flush the hwaccel state. + */ + void (*flush)(AVCodecContext *avctx); } AVHWAccel; /** diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index 2d691731c5..995bf17a8f 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -480,6 +480,9 @@ static void h264_decode_flush(AVCodecContext *avctx) ff_h264_free_tables(h); h->context_initialized = 0; + + if (avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static int get_last_needed_nal(H264Context *h) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 43cd963175..7c9b46240c 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -3682,6 +3682,9 @@ static void hevc_decode_flush(AVCodecContext *avctx) av_buffer_unref(&s->rpu_buf); s->max_ra = INT_MAX; s->eos = 1; + + if (avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } #define OFFSET(x) offsetof(HEVCContext, x) diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 4c23eb5672..b591b82ad1 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -167,6 +167,9 @@ static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) if (free_mem) free_buffers(s); + + if (avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static void vp8_decode_flush(AVCodecContext *avctx) diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index 4f345f18db..18c2b09f64 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -1791,6 +1791,9 @@ static void vp9_decode_flush(AVCodecContext *avctx) vp9_frame_unref(avctx, &s->s.frames[i]); for (i = 0; i < 8; i++) ff_thread_release_ext_buffer(avctx, &s->s.refs[i]); + + if (avctx->hwaccel->flush) + avctx->hwaccel->flush(avctx); } static av_cold int vp9_decode_init(AVCodecContext *avctx) -- 2.39.2 [-- Attachment #67: 0066-hwconfig-add-a-new-HWACCEL_CAP_THREAD_SAFE-for-threa.patch --] [-- Type: text/x-diff, Size: 1369 bytes --] From 99ce9693bcb6218ffe82bb5780827c1dca614092 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Thu, 15 Dec 2022 01:06:52 +0100 Subject: [PATCH 66/72] hwconfig: add a new HWACCEL_CAP_THREAD_SAFE for threadsafe hwaccels Vulkan is fully threadsafe and stateless, so we can benefit from this. --- libavcodec/hwconfig.h | 1 + libavcodec/pthread_frame.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index 721424912c..e6b78f0160 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -24,6 +24,7 @@ #define HWACCEL_CAP_ASYNC_SAFE (1 << 0) +#define HWACCEL_CAP_THREAD_SAFE (1 << 1) typedef struct AVCodecHWConfigInternal { diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c index 71edd6b3ec..15e8d96a79 100644 --- a/libavcodec/pthread_frame.c +++ b/libavcodec/pthread_frame.c @@ -204,7 +204,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg) /* if the previous thread uses hwaccel then we take the lock to ensure * the threads don't run concurrently */ - if (avctx->hwaccel) { + if (avctx->hwaccel && !(avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE)) { pthread_mutex_lock(&p->parent->hwaccel_mutex); p->hwaccel_serializing = 1; } -- 2.39.2 [-- Attachment #68: 0067-libavcodec-add-Vulkan-common-video-code.patch --] [-- Type: text/x-diff, Size: 23311 bytes --] From 2f30e4ddaf855b53cd3d8fd95a863b240bae0047 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Sun, 18 Dec 2022 08:31:03 +0100 Subject: [PATCH 67/72] libavcodec: add Vulkan common video code --- configure | 2 +- libavcodec/Makefile | 2 + libavcodec/hwconfig.h | 2 + libavcodec/vulkan.c | 19 ++ libavcodec/vulkan.h | 24 +++ libavcodec/vulkan_video.c | 417 ++++++++++++++++++++++++++++++++++++++ libavcodec/vulkan_video.h | 98 +++++++++ 7 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan.c create mode 100644 libavcodec/vulkan.h create mode 100644 libavcodec/vulkan_video.c create mode 100644 libavcodec/vulkan_video.h diff --git a/configure b/configure index f0f15b9e87..91f715351c 100755 --- a/configure +++ b/configure @@ -326,7 +326,6 @@ External library support: --disable-securetransport disable Secure Transport, needed for TLS support on OSX if openssl and gnutls are not used [autodetect] --enable-vapoursynth enable VapourSynth demuxer [no] - --disable-vulkan disable Vulkan code [autodetect] --disable-xlib disable xlib [autodetect] --disable-zlib disable zlib [autodetect] @@ -353,6 +352,7 @@ External library support: --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect] --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] --disable-videotoolbox disable VideoToolbox code [autodetect] + --disable-vulkan disable Vulkan code [autodetect] Toolchain options: --arch=ARCH select architecture [$arch] diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 4971832ff4..a45c32e564 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -976,6 +976,7 @@ OBJS-$(CONFIG_NVDEC) += nvdec.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o OBJS-$(CONFIG_VDPAU) += vdpau.o +OBJS-$(CONFIG_VULKAN) += vulkan.o vulkan_video.o OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL) += dxva2_av1.o OBJS-$(CONFIG_AV1_DXVA2_HWACCEL) += dxva2_av1.o @@ -1284,6 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h index e6b78f0160..220b8a1e95 100644 --- a/libavcodec/hwconfig.h +++ b/libavcodec/hwconfig.h @@ -77,6 +77,8 @@ typedef struct AVCodecHWConfigInternal { HW_CONFIG_HWACCEL(1, 1, 1, VDPAU, VDPAU, ff_ ## codec ## _vdpau_hwaccel) #define HWACCEL_VIDEOTOOLBOX(codec) \ HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel) +#define HWACCEL_VULKAN(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## _vulkan_hwaccel) #define HWACCEL_D3D11VA(codec) \ HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel) diff --git a/libavcodec/vulkan.c b/libavcodec/vulkan.c new file mode 100644 index 0000000000..fc8a1fa47b --- /dev/null +++ b/libavcodec/vulkan.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/vulkan.c" diff --git a/libavcodec/vulkan.h b/libavcodec/vulkan.h new file mode 100644 index 0000000000..b15efd4add --- /dev/null +++ b/libavcodec/vulkan.h @@ -0,0 +1,24 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_H +#define AVCODEC_VULKAN_H + +#include "libavutil/vulkan.h" + +#endif /* AVCODEC_VULKAN_H */ diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c new file mode 100644 index 0000000000..3e76109b26 --- /dev/null +++ b/libavcodec/vulkan_video.c @@ -0,0 +1,417 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "codec_id.h" + +#include "vulkan_video.h" + +const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO] = { + [AV_CODEC_ID_H264] = { +#if CONFIG_VULKAN_ENCODE + FF_VK_EXT_VIDEO_ENCODE_H264 | FF_VK_EXT_SYNC2, + VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_EXT, +#else + 0, + 0, +#endif + FF_VK_EXT_VIDEO_DECODE_H264 | FF_VK_EXT_SYNC2, + VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, + }, + [AV_CODEC_ID_HEVC] = { +#if CONFIG_VULKAN_ENCODE + FF_VK_EXT_VIDEO_ENCODE_H265 | FF_VK_EXT_SYNC2, + VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_EXT, +#else + 0, + 0, +#endif + FF_VK_EXT_VIDEO_DECODE_H265 | FF_VK_EXT_SYNC2, + VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR + }, +}; + +enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf, int *score) +{ + switch (vkf) { + /* Mono */ + case VK_FORMAT_R8_UNORM: + *score = 1; + return AV_PIX_FMT_GRAY8; + case VK_FORMAT_R10X6_UNORM_PACK16: + case VK_FORMAT_R12X4_UNORM_PACK16: + *score = 2; + return AV_PIX_FMT_GRAY16; + case VK_FORMAT_R16_UNORM: + *score = 1; + return AV_PIX_FMT_GRAY16; + + /* RGB */ + case VK_FORMAT_B8G8R8A8_UNORM: + *score = 1; + return AV_PIX_FMT_BGRA; + case VK_FORMAT_R8G8B8A8_UNORM: + *score = 1; + return AV_PIX_FMT_RGBA; + case VK_FORMAT_R8G8B8_UNORM: + *score = 1; + return AV_PIX_FMT_RGB24; + case VK_FORMAT_B8G8R8_UNORM: + *score = 1; + return AV_PIX_FMT_BGR24; + + /* 420 */ + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + *score = 1; + return AV_PIX_FMT_NV12; + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + *score = 1; + return AV_PIX_FMT_YUV420P; + case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: + *score = 2; + return AV_PIX_FMT_P010; + case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16: + *score = 2; + return AV_PIX_FMT_YUV420P16; + /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16 */ + case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16: + *score = 2; + return AV_PIX_FMT_YUV420P12; + case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM: + *score = 1; + return AV_PIX_FMT_YUV420P16; + + /* 422 */ + case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM: + *score = 1; + return AV_PIX_FMT_NV16; + case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM: + *score = 1; + return AV_PIX_FMT_YUV422P; + case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16: + *score = 2; + return AV_PIX_FMT_NV20; + case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16: + *score = 2; + return AV_PIX_FMT_YUV422P10; + /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16 */ + case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16: + *score = 2; + return AV_PIX_FMT_YUV422P12; + case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM: + *score = 1; + return AV_PIX_FMT_YUV422P16; + + /* 444 */ + case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT: + *score = 1; + return AV_PIX_FMT_NV24; + case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM: + *score = 1; + return AV_PIX_FMT_YUV444P; + /* No support for VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT */ + case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16: + *score = 2; + return AV_PIX_FMT_YUV444P10; + /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT */ + case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16: + *score = 2; + return AV_PIX_FMT_YUV444P12; + case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM: + *score = 1; + return AV_PIX_FMT_YUV444P16; + default: + break; + } + + return AV_PIX_FMT_NONE; +} + +VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf) +{ + switch (vkf) { + case VK_FORMAT_R8_UNORM: + case VK_FORMAT_R10X6_UNORM_PACK16: + case VK_FORMAT_R12X4_UNORM_PACK16: + case VK_FORMAT_R16_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R8G8B8_UNORM: + case VK_FORMAT_B8G8R8_UNORM: + return VK_IMAGE_ASPECT_COLOR_BIT; + + /* 420 */ + case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM: + case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16: + case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM: + case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16: + case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT: + return VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT; + + case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM: + case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16: + case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16: + case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM: + case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM: + case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16: + case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16: + case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM: + case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM: + case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16: + case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16: + case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM: + return VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT; + + default: + break; + } + + return VK_IMAGE_ASPECT_NONE; +} + +VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc) +{ + if (desc->nb_components == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR; + else if (!desc->log2_chroma_w && !desc->log2_chroma_h) + return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR; + else if (!desc->log2_chroma_w && desc->log2_chroma_h == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR; + else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR; + return VK_VIDEO_CHROMA_SUBSAMPLING_INVALID_KHR; +} + +VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth) +{ + switch (depth) { + case 8: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR; + case 10: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR; + case 12: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR; + default: break; + } + return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR; +} + +static void free_data_buf(void *opaque, uint8_t *data) +{ + FFVulkanContext *ctx = opaque; + FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data; + ff_vk_unmap_buffers(ctx, &buf->buf, 1, 0); + ff_vk_free_buf(ctx, &buf->buf); + av_free(data); +} + +static AVBufferRef *alloc_data_buf(void *opaque, size_t size) +{ + uint8_t *buf = av_mallocz(size); + if (!buf) + return NULL; + + return av_buffer_create(buf, size, free_data_buf, opaque, 0); +} + +int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size) +{ + int err; + AVBufferRef *ref; + FFVkVideoBuffer *data; + + if (!s->buf_pool) { + s->buf_pool = av_buffer_pool_init2(sizeof(FFVkVideoBuffer), ctx, + alloc_data_buf, NULL); + if (!s->buf_pool) + return AVERROR(ENOMEM); + } + + *buf = ref = av_buffer_pool_get(s->buf_pool); + if (!ref) + return AVERROR(ENOMEM); + + data = (FFVkVideoBuffer *)ref->data; + + if (data->buf.size >= size) + return 0; + + /* No point in requesting anything smaller. */ + size = FFMAX(size, 1024*1024); + size = FFALIGN(size, s->caps.minBitstreamBufferSizeAlignment); + + /* Align buffer to nearest power of two. Makes fragmentation management + * easier, and gives us ample headroom. */ + size--; + size |= size >> 1; + size |= size >> 2; + size |= size >> 4; + size |= size >> 8; + size |= size >> 16; + size++; + + ff_vk_free_buf(ctx, &data->buf); + memset(data, 0, sizeof(FFVkVideoBuffer)); + + err = ff_vk_create_buf(ctx, &data->buf, size, + create_pNext, NULL, usage, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) { + av_buffer_unref(&ref); + return err; + } + + /* Map the buffer */ + err = ff_vk_map_buffers(ctx, &data->buf, &data->mem, 1, 0); + if (err < 0) { + av_buffer_unref(&ref); + return err; + } + + return 0; +} + +av_cold void ff_vk_video_common_uninit(FFVulkanContext *s, + FFVkVideoCommon *common) +{ + FFVulkanFunctions *vk = &s->vkfn; + + if (common->session) { + vk->DestroyVideoSessionKHR(s->hwctx->act_dev, common->session, + s->hwctx->alloc); + common->session = NULL; + } + + if (common->nb_mem && common->mem) + for (int i = 0; i < common->nb_mem; i++) + vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc); + + av_freep(&common->mem); + + av_buffer_pool_uninit(&common->buf_pool); +} + +av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s, + FFVkVideoCommon *common, + VkVideoSessionCreateInfoKHR *session_create) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkMemoryRequirements2 *mem_req = NULL; + VkVideoSessionMemoryRequirementsKHR *mem = NULL; + VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL; + + /* Create session */ + ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create, + s->hwctx->alloc, &common->session); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + /* Get memory requirements */ + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, + common->session, + &common->nb_mem, + NULL); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate all memory needed to actually allocate memory */ + common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem); + if (!common->mem) { + err = AVERROR(ENOMEM); + goto fail; + } + mem = av_mallocz(sizeof(*mem)*common->nb_mem); + if (!mem) { + err = AVERROR(ENOMEM); + goto fail; + } + mem_req = av_mallocz(sizeof(*mem_req)*common->nb_mem); + if (!mem_req) { + err = AVERROR(ENOMEM); + goto fail; + } + bind_mem = av_mallocz(sizeof(*bind_mem)*common->nb_mem); + if (!bind_mem) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Set the needed fields to get the memory requirements */ + for (int i = 0; i < common->nb_mem; i++) { + mem_req[i] = (VkMemoryRequirements2) { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + }; + mem[i] = (VkVideoSessionMemoryRequirementsKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_MEMORY_REQUIREMENTS_KHR, + .memoryRequirements = mem_req[i].memoryRequirements, + }; + } + + /* Finally get the memory requirements */ + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, + common->session, &common->nb_mem, + mem); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Now allocate each requested memory. + * For ricing, could pool together memory that ends up in the same index. */ + for (int i = 0; i < common->nb_mem; i++) { + err = ff_vk_alloc_mem(s, &mem[i].memoryRequirements, + UINT32_MAX, NULL, NULL, &common->mem[i]); + if (err < 0) + goto fail; + + bind_mem[i] = (VkBindVideoSessionMemoryInfoKHR) { + .sType = VK_STRUCTURE_TYPE_BIND_VIDEO_SESSION_MEMORY_INFO_KHR, + .memory = common->mem[i], + .memoryBindIndex = mem[i].memoryBindIndex, + .memoryOffset = 0, + .memorySize = mem[i].memoryRequirements.size, + }; + + av_log(log, AV_LOG_VERBOSE, "Allocating %lu bytes in bind index %i for video session\n", + bind_mem[i].memorySize, bind_mem[i].memoryBindIndex); + } + + /* Bind the allocated memory */ + ret = vk->BindVideoSessionMemoryKHR(s->hwctx->act_dev, common->session, + common->nb_mem, bind_mem); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + av_freep(&mem); + av_freep(&mem_req); + av_freep(&bind_mem); + + return 0; + +fail: + av_freep(&mem); + av_freep(&mem_req); + av_freep(&bind_mem); + + ff_vk_video_common_uninit(s, common); + return err; +} diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h new file mode 100644 index 0000000000..5e2676a282 --- /dev/null +++ b/libavcodec/vulkan_video.h @@ -0,0 +1,98 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_VIDEO_H +#define AVCODEC_VULKAN_VIDEO_H + +#include "codec_id.h" +#include "vulkan.h" + +#include <vk_video/vulkan_video_codecs_common.h> + +#define CODEC_VER_MAJ(ver) (ver >> 22) +#define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1)) +#define CODEC_VER_PAT(ver) (ver & ((1 << 12) - 1)) +#define CODEC_VER(ver) CODEC_VER_MAJ(ver), CODEC_VER_MIN(ver), CODEC_VER_PAT(ver) + +typedef struct FFVkCodecMap { + FFVulkanExtensions encode_extension; + VkVideoCodecOperationFlagBitsKHR encode_op; + FFVulkanExtensions decode_extension; + VkVideoCodecOperationFlagBitsKHR decode_op; +} FFVkCodecMap; + +typedef struct FFVkVideoSession { + VkVideoSessionKHR session; + VkDeviceMemory *mem; + uint32_t nb_mem; + VkVideoCapabilitiesKHR caps; + + AVBufferPool *buf_pool; +} FFVkVideoCommon; + +/** + * Index is codec_id. + */ +extern const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO]; + +/** + * Get pixfmt from a Vulkan format. + */ +enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf, int *score); + +/** + * Get aspect bits which include all planes from a VkFormat. + */ +VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf); + +/** + * Get Vulkan's chroma subsampling from a pixfmt descriptor. + */ +VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc); + +/** + * Get Vulkan's bit depth from an [8:12] integer. + */ +VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth); + +typedef struct FFVkVideoBuffer { + FFVkBuffer buf; + uint8_t *mem; +} FFVkVideoBuffer; + +/** + * Get a mapped FFVkPooledBuffer with a specific guaranteed minimum size + * from a pool. + */ +int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size); + +/** + * Initialize video session, allocating and binding necessary memory. + */ +int ff_vk_video_common_init(void *log, FFVulkanContext *s, + FFVkVideoCommon *common, + VkVideoSessionCreateInfoKHR *session_create); + +/** + * Free video session and required resources. + */ +void ff_vk_video_common_uninit(FFVulkanContext *s, FFVkVideoCommon *common); + +#endif /* AVCODEC_VULKAN_VIDEO_H */ -- 2.39.2 [-- Attachment #69: 0068-libavcodec-add-Vulkan-common-video-decoding-code.patch --] [-- Type: text/x-diff, Size: 53050 bytes --] From d3f2fa8e530dc94c9058149a2cee92196c7adb33 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Mon, 16 Jan 2023 07:23:27 +0100 Subject: [PATCH 68/72] libavcodec: add Vulkan common video decoding code --- libavcodec/Makefile | 2 +- libavcodec/vulkan_decode.c | 1135 ++++++++++++++++++++++++++++++++++++ libavcodec/vulkan_decode.h | 163 ++++++ 3 files changed, 1299 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan_decode.c create mode 100644 libavcodec/vulkan_decode.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index a45c32e564..eabf4eb43e 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1285,7 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h -SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_decode.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c new file mode 100644 index 0000000000..582968e1da --- /dev/null +++ b/libavcodec/vulkan_decode.c @@ -0,0 +1,1135 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_video.h" +#include "vulkan_decode.h" +#include "config_components.h" + +#if CONFIG_H264_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_h264_ext; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_hevc_ext; +#endif + +static const VkExtensionProperties *dec_ext[] = { +#if CONFIG_H264_VULKAN_HWACCEL + [AV_CODEC_ID_H264] = &ff_vk_dec_h264_ext, +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + [AV_CODEC_ID_HEVC] = &ff_vk_dec_hevc_ext, +#endif +}; + +static int vk_decode_create_view(FFVulkanDecodeContext *ctx, VkImageView *dst_view, + VkImageAspectFlags *aspect, AVVkFrame *src) +{ + VkResult ret; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(ctx->pic_format); + + VkSamplerYcbcrConversionInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, + .conversion = ctx->yuv_sampler, + }; + VkImageViewCreateInfo img_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &yuv_sampler_info, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = ctx->pic_format, + .image = src->img[0], + .components = (VkComponentMapping) { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + .levelCount = 1, + }, + }; + + ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info, + ctx->s.hwctx->alloc, dst_view); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + *aspect = aspect_mask; + + return 0; +} + +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeContext *ctx) +{ + AVFrame *avf = av_frame_alloc(); + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + if (!avf) + return NULL; + + avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref); + if (!avf->hw_frames_ctx) + av_frame_free(&avf); + avf->buf[0] = av_buffer_pool_get(dpb_frames->pool); + if (!avf->buf[0]) + av_frame_free(&avf); + avf->data[0] = avf->buf[0]->data; + + return avf; +} + +int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb) +{ + int err; + + vkpic->nb_slices = 0; + vkpic->slices_size = 0; + + /* If the decoder made a blank frame to make up for a missing ref, or the + * frame is the current frame so it's missing one, create a re-representation */ + if (vkpic->img_view_ref) + return 0; + + /* Pre-allocate slice buffer with a reasonable default */ + if (is_current) { + uint64_t min_alloc = 4096; + if (0) + min_alloc = 2*ctx->s.hprops.minImportedHostPointerAlignment; + + vkpic->slices = av_fast_realloc(NULL, &vkpic->slices_size_max, min_alloc); + if (!vkpic->slices) + return AVERROR(ENOMEM); + + if (0) + vkpic->slices_size += ctx->s.hprops.minImportedHostPointerAlignment; + } + + vkpic->dpb_frame = NULL; + vkpic->dpb_vkf = NULL; + vkpic->img_view_ref = NULL; + vkpic->img_view_out = NULL; + + if (ctx->layered_dpb && alloc_dpb) { + vkpic->img_view_ref = ctx->layered_view; + vkpic->img_aspect_ref = ctx->layered_aspect; + } else if (alloc_dpb) { + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); + + vkpic->dpb_vkf = (AVVkFrame *)vkpic->dpb_frame->data[0]; + + err = vk_decode_create_view(ctx, &vkpic->img_view_ref, + &vkpic->img_aspect_ref, + vkpic->dpb_vkf); + if (err < 0) + return err; + } + + if (!alloc_dpb || is_current) { + err = vk_decode_create_view(ctx, &vkpic->img_view_out, + &vkpic->img_aspect, + (AVVkFrame *)pic->buf[0]->data); + if (err < 0) + return err; + + if (!alloc_dpb) { + vkpic->img_view_ref = vkpic->img_view_out; + vkpic->img_aspect_ref = vkpic->img_aspect; + } + } + + return 0; +} + +int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets) +{ + static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 }; + const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0; + const int nb = *nb_slices; + uint8_t *slices; + uint32_t *slice_off; + + slice_off = av_fast_realloc(vp->slice_off, &vp->slice_off_max, + (nb + 1)*sizeof(slice_off)); + if (!slice_off) + return AVERROR(ENOMEM); + + *offsets = vp->slice_off = slice_off; + slice_off[nb] = vp->slices_size; + + slices = av_fast_realloc(vp->slices, &vp->slices_size_max, + vp->slices_size + size + startcode_len); + if (!slices) + return AVERROR(ENOMEM); + + vp->slices = slices; + + /* Startcode */ + memcpy(slices + vp->slices_size, startcode_prefix, startcode_len); + + /* Slice data */ + memcpy(slices + vp->slices_size + startcode_len, data, size); + + *nb_slices = nb + 1; + vp->nb_slices++; + vp->slices_size += startcode_len + size; + + return 0; +} + +void ff_vk_decode_flush(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = ctx->empty_session_params, + }; + VkVideoCodingControlInfoKHR decode_ctrl = { + .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, + .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl); + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + ff_vk_exec_submit(&ctx->s, exec); +} + +static void host_map_buf_free(void *opaque, uint8_t *data) +{ + FFVulkanContext *ctx = opaque; + FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data; + ff_vk_free_buf(ctx, &buf->buf); + av_free(data); +} + +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]) +{ + int err; + VkResult ret; + VkCommandBuffer cmd_buf; + FFVkVideoBuffer *sd_buf; + + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Output */ + AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data; + + /* Quirks */ + const int layered_dpb = ctx->layered_dpb; + + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)vp->session_params->data; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = *par, + .referenceSlotCount = vp->decode_info.referenceSlotCount, + .pReferenceSlots = vp->decode_info.pReferenceSlots, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + AVBufferRef *sd_ref = NULL; + size_t data_size = FFALIGN(vp->slices_size, ctx->common.caps.minBitstreamBufferSizeAlignment); + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + + if (ctx->exec_pool.nb_queries) { + int64_t prev_sub_res = 0; + ff_vk_exec_wait(&ctx->s, exec); + ret = ff_vk_exec_get_query(&ctx->s, exec, NULL, &prev_sub_res); + if (ret != VK_NOT_READY && ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (ret == VK_SUCCESS) + av_log(avctx, prev_sub_res < 0 ? AV_LOG_ERROR : AV_LOG_DEBUG, + "Result of previous frame decoding: %li\n", prev_sub_res); + } + + if (0) { + size_t req_size; + VkExternalMemoryBufferCreateInfo create_desc = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + .pNext = &ctx->profile_list, + }; + + VkImportMemoryHostPointerInfoEXT import_desc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + }; + + VkMemoryHostPointerPropertiesEXT p_props = { + .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, + }; + + /* Align slices pointer */ + import_desc.pHostPointer = (void *)FFALIGN((uintptr_t)vp->slices, + ctx->s.hprops.minImportedHostPointerAlignment); + + req_size = FFALIGN(data_size, + ctx->s.hprops.minImportedHostPointerAlignment); + + ret = vk->GetMemoryHostPointerPropertiesEXT(ctx->s.hwctx->act_dev, + import_desc.handleType, + import_desc.pHostPointer, + &p_props); + + if (ret == VK_SUCCESS) { + sd_buf = av_mallocz(sizeof(*sd_buf)); + if (!sd_buf) + return AVERROR(ENOMEM); + + err = ff_vk_create_buf(&ctx->s, &sd_buf->buf, req_size, + &create_desc, &import_desc, + VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) { + av_free(sd_buf); + return err; /* This shouldn't error out, unless it's critical */ + } else { + size_t neg_offs = (uint8_t *)import_desc.pHostPointer - vp->slices; + + sd_ref = av_buffer_create((uint8_t *)sd_buf, sizeof(*sd_buf), + host_map_buf_free, &ctx->s, 0); + if (!sd_ref) { + ff_vk_free_buf(&ctx->s, &sd_buf->buf); + av_free(sd_buf); + return AVERROR(ENOMEM); + } + + for (int i = 0; i < vp->nb_slices; i++) + vp->slice_off[i] -= neg_offs; + + sd_buf->mem = vp->slices; + } + } + } + + if (!sd_ref) { + err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &sd_ref, + VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, + &ctx->profile_list, data_size); + if (err < 0) + return err; + + sd_buf = (FFVkVideoBuffer *)sd_ref->data; + + /* Copy the slices data to the buffer */ + memcpy(sd_buf->mem, vp->slices, vp->slices_size); + } + + /* Flush if needed */ + if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = sd_buf->buf.mem, + .offset = 0, + .size = FFALIGN(vp->slices_size, + ctx->s.props.properties.limits.nonCoherentAtomSize), + }; + + ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + av_buffer_unref(&sd_ref); + return AVERROR_EXTERNAL; + } + } + + vp->decode_info.srcBuffer = sd_buf->buf.buf; + vp->decode_info.srcBufferOffset = 0; + vp->decode_info.srcBufferRange = data_size; + + /* Start command buffer recording */ + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + /* Slices */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &sd_ref, 1, 0); + if (err < 0) + return err; + + /* Parameters */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->session_params, 1, 0); + if (err < 0) + return err; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + if (err < 0) + return err; + + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + pic); + if (err < 0) + return err; + + /* Output image - change layout, as it comes from a pool */ + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = vkf->access[0], + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = vkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR, + .srcQueueFamilyIndex = vkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = vp->img_aspect, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, pic, + &img_bar[nb_img_bar], &nb_img_bar); + + /* Reference for the current image, if existing and not layered */ + if (vp->dpb_frame) { + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + if (err < 0) + return err; + } + + if (!layered_dpb) { + /* All references (apart from the current) for non-layered refs */ + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + AVFrame *ref_frame = rpic[i]; + FFVulkanDecodePicture *rvp = rvkp[i]; + AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + if (err < 0) + return err; + + if (err == 0) { + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, + &rvp->sem, &rvp->sem_value, + ref); + if (err < 0) + return err; + } + + if (!rvp->dpb_frame) { + AVVkFrame *rvkf = (AVVkFrame *)ref->data; + + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = rvkf->access[0], + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR | + VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = rvkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, + .srcQueueFamilyIndex = rvkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = rvkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = rvp->img_aspect_ref, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, ref, + &img_bar[nb_img_bar], &nb_img_bar); + } + } + } else if (vp->decode_info.referenceSlotCount || + vp->img_view_out != vp->img_view_ref) { + /* Single barrier for a single layered ref */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->layered_frame, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + if (err < 0) + return err; + } + + /* Change image layout */ + vk->CmdPipelineBarrier2KHR(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Start, use parameters, decode and end decoding */ + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + + /* Start status query TODO: remove check when radv gets support */ + if (ctx->exec_pool.nb_queries) + vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0); + + vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info); + + /* End status query */ + if (ctx->exec_pool.nb_queries) + vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0); + + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + + /* End recording and submit for execution */ + return ff_vk_exec_submit(&ctx->s, exec); +} + +void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp) +{ + FFVulkanFunctions *vk; + VkSemaphoreWaitInfo sem_wait; + + // TODO: investigate why this happens + if (!ctx) { + av_freep(&vp->slices); + av_freep(&vp->slice_off); + av_frame_free(&vp->dpb_frame); + return; + } + + vk = &ctx->s.vkfn; + + /* We do not have to lock the frame here because we're not interested + * in the actual current semaphore value, but only that it's later than + * the time we submitted the image for decoding. */ + sem_wait = (VkSemaphoreWaitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = &vp->sem, + .pValues = &vp->sem_value, + .semaphoreCount = 1, + }; + + if (vp->sem) + vk->WaitSemaphores(ctx->s.hwctx->act_dev, &sem_wait, UINT64_MAX); + + /* Free slices data + * TODO: use a pool in the decode context instead to avoid per-frame allocs. */ + av_freep(&vp->slices); + av_freep(&vp->slice_off); + + /* Destroy image view (out) */ + if (vp->img_view_out != vp->img_view_ref && vp->img_view_out) + vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_out, ctx->s.hwctx->alloc); + + /* Destroy image view (ref, unlayered) */ + if (vp->dpb_vkf && vp->img_view_ref) + vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_ref, ctx->s.hwctx->alloc); + + av_frame_free(&vp->dpb_frame); +} + +/* Since to even get decoder capabilities, we have to initialize quite a lot, + * this function does initialization and saves it to hwaccel_priv_data if + * available. */ +static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref, + int *width_align, int *height_align, + enum AVPixelFormat *pix_fmt, int *dpb_dedicate) +{ + VkResult ret; + int err, max_level, score = INT32_MAX; + const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id]; + AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; + AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; + AVVulkanDeviceContext *hwctx = device->hwctx; + enum AVPixelFormat context_format = frames->sw_format; + int context_format_was_found = 0; + int base_profile, cur_profile = avctx->profile; + + int dedicated_dpb; + int layered_dpb; + + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanExtensions local_extensions = 0x0; + FFVulkanExtensions *extensions = ctx ? &ctx->s.extensions : &local_extensions; + FFVulkanFunctions local_vk = { 0 }; + FFVulkanFunctions *vk = ctx ? &ctx->s.vkfn : &local_vk; + VkVideoCapabilitiesKHR local_caps = { 0 }; + VkVideoCapabilitiesKHR *caps = ctx ? &ctx->common.caps : &local_caps; + VkVideoDecodeCapabilitiesKHR local_dec_caps = { 0 }; + VkVideoDecodeCapabilitiesKHR *dec_caps = ctx ? &ctx->dec_caps : &local_dec_caps; + VkVideoDecodeUsageInfoKHR local_usage = { 0 }; + VkVideoDecodeUsageInfoKHR *usage = ctx ? &ctx->usage : &local_usage; + VkVideoProfileInfoKHR local_profile = { 0 }; + VkVideoProfileInfoKHR *profile = ctx ? &ctx->profile : &local_profile; + VkVideoProfileListInfoKHR local_profile_list = { 0 }; + VkVideoProfileListInfoKHR *profile_list = ctx ? &ctx->profile_list : &local_profile_list; + + VkVideoDecodeH264ProfileInfoKHR local_h264_profile = { 0 }; + VkVideoDecodeH264ProfileInfoKHR *h264_profile = ctx ? &ctx->h264_profile : &local_h264_profile; + + VkVideoDecodeH264ProfileInfoKHR local_h265_profile = { 0 }; + VkVideoDecodeH264ProfileInfoKHR *h265_profile = ctx ? &ctx->h265_profile : &local_h265_profile; + + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, + .pNext = profile_list, + }; + VkVideoDecodeH264CapabilitiesKHR h264_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, + }; + VkVideoDecodeH265CapabilitiesKHR h265_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, + }; + VkVideoFormatPropertiesKHR *ret_info; + uint32_t nb_out_fmts = 0; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!desc) + return AVERROR(EINVAL); + + if (ctx && ctx->init) + return 0; + + if (!vk_codec->decode_op) + return AVERROR(EINVAL); + + *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, + hwctx->nb_enabled_dev_extensions); + + if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); + return AVERROR(ENOSYS); + } else if (!vk_codec->decode_extension) { + av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } else if (!(vk_codec->decode_extension & *extensions)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } + + err = ff_vk_load_functions(device, vk, *extensions, 1, 1); + if (err < 0) + return err; + +repeat: + if (avctx->codec_id == AV_CODEC_ID_H264) { + base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE; + dec_caps->pNext = &h264_caps; + usage->pNext = h264_profile; + h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR; + h264_profile->stdProfileIdc = cur_profile; + h264_profile->pictureLayout = avctx->field_order == AV_FIELD_PROGRESSIVE ? + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR : + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR; + } else if (avctx->codec_id == AV_CODEC_ID_H265) { + base_profile = FF_PROFILE_HEVC_MAIN; + dec_caps->pNext = &h265_caps; + usage->pNext = h265_profile; + h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; + h265_profile->stdProfileIdc = cur_profile; + } + + usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR; + usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR; + + profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; + /* NOTE: NVIDIA's implementation fails if the USAGE hint is inserted. + * Remove this once it's fixed. */ + profile->pNext = usage->pNext; + profile->videoCodecOperation = vk_codec->decode_op; + profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); + profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); + profile->chromaBitDepth = profile->lumaBitDepth; + + profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; + profile_list->profileCount = 1; + profile_list->pProfiles = profile; + + /* Get the capabilities of the decoder for the given profile */ + caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + caps->pNext = dec_caps; + dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR; + /* dec_caps->pNext already filled in */ + + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile, + caps); + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR && + avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH && + cur_profile != base_profile) { + cur_profile = base_profile; + av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting " + "again with profile %s\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile), + avcodec_profile_name(avctx->codec_id, base_profile)); + goto repeat; + } else if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "%s profile \"%s\" not supported!\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, cur_profile)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "format (%s) not supported!\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || + ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } + + max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc : + avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc : + 0; + + if (ctx) { + av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile)); + av_log(avctx, AV_LOG_VERBOSE, " Maximum level: %i\n", + max_level); + av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", + caps->minCodedExtent.width, caps->maxCodedExtent.width); + av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", + caps->minCodedExtent.height, caps->maxCodedExtent.height); + av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", + caps->pictureAccessGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", + caps->pictureAccessGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", + caps->minBitstreamBufferOffsetAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", + caps->minBitstreamBufferSizeAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", + caps->maxDpbSlots); + av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", + caps->maxActiveReferencePictures); + av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", + CODEC_VER(caps->stdHeaderVersion.specVersion), + CODEC_VER(dec_ext[avctx->codec_id]->specVersion)); + av_log(avctx, AV_LOG_VERBOSE, " Decode modes:%s%s%s\n", + dec_caps->flags ? "" : + " invalid", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ? + " reuse_dst_dpb" : "", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ? + " dedicated_dpb" : ""); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", + caps->flags ? "" : + " none", + caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? + " protected" : "", + caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? + " separate_references" : ""); + } + + /* Check if decoding is possible with the given parameters */ + if (avctx->coded_width < caps->minCodedExtent.width || + avctx->coded_height < caps->minCodedExtent.height || + avctx->coded_width > caps->maxCodedExtent.width || + avctx->coded_height > caps->maxCodedExtent.height) + return AVERROR(EINVAL); + + if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) && + avctx->level > max_level) + return AVERROR(EINVAL); + + /* Some basic sanity checking */ + if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) { + av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n"); + return AVERROR_EXTERNAL; + } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) == + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) && + !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) { + av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set " + "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n"); + return AVERROR_EXTERNAL; + } + + /* TODO: make dedicated_dpb tunable */ + dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); + layered_dpb = !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + if (dedicated_dpb) { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + } else { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + } + + /* Get the format of the images necessary */ + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, NULL); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); + if (!ret_info) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_out_fmts; i++) + ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, ret_info); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + av_free(ret_info); + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + av_free(ret_info); + return AVERROR_EXTERNAL; + } + + if (ctx) { + ctx->dedicated_dpb = dedicated_dpb; + ctx->layered_dpb = layered_dpb; + ctx->init = 1; + } + + *pix_fmt = AV_PIX_FMT_NONE; + + av_log(avctx, AV_LOG_DEBUG, "Pixel format list for decoding:\n"); + for (int i = 0; i < nb_out_fmts; i++) { + int tmp_score; + enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format, + &tmp_score); + const AVPixFmtDescriptor *tmp_desc = av_pix_fmt_desc_get(tmp); + if (tmp == AV_PIX_FMT_NONE || !tmp_desc) + continue; + + av_log(avctx, AV_LOG_DEBUG, " %i - %s (%i), score %i\n", i, + av_get_pix_fmt_name(tmp), ret_info[i].format, tmp_score); + + if (context_format == tmp || tmp_score < score) { + if (ctx) + ctx->pic_format = ret_info[i].format; + *pix_fmt = tmp; + context_format_was_found |= context_format == tmp; + if (context_format_was_found) + break; + } + } + + if (*pix_fmt == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "No valid pixel format for decoding!\n"); + return AVERROR(EINVAL); + } + + if (width_align) + *width_align = caps->pictureAccessGranularity.width; + if (height_align) + *height_align = caps->pictureAccessGranularity.height; + if (dpb_dedicate) + *dpb_dedicate = dedicated_dpb; + + av_free(ret_info); + + av_log(avctx, AV_LOG_VERBOSE, "Chosen frames format: %s\n", + av_get_pix_fmt_name(*pix_fmt)); + + if (context_format != AV_PIX_FMT_NONE && !context_format_was_found) { + av_log(avctx, AV_LOG_ERROR, "Frames context had a pixel format set which " + "was not available for decoding into!\n"); + return AVERROR(EINVAL); + } + + return *pix_fmt == AV_PIX_FMT_NONE ? AVERROR(EINVAL) : 0; +} + +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + int err, width_align, height_align, dedicated_dpb; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames_ctx->hwctx; + + err = vulkan_decode_check_init(avctx, hw_frames_ctx, &width_align, &height_align, + &frames_ctx->sw_format, &dedicated_dpb); + if (err < 0) + return err; + + frames_ctx->width = FFALIGN(avctx->coded_width, width_align); + frames_ctx->height = FFALIGN(avctx->coded_height, height_align); + frames_ctx->format = AV_PIX_FMT_VULKAN; + + hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + + if (!dedicated_dpb) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + + return err; +} + +void ff_vk_decode_free_params(void *opaque, uint8_t *data) +{ + FFVulkanDecodeContext *ctx = opaque; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data; + vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par, + ctx->s.hwctx->alloc); + av_free(par); +} + +int ff_vk_decode_uninit(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(s, &ctx->exec_pool); + + /* Destroy layered view */ + if (ctx->layered_view) + vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc); + + /* This also frees all references from this pool */ + av_frame_free(&ctx->layered_frame); + av_buffer_unref(&ctx->dpb_hwfc_ref); + + /* Destroy parameters */ + if (ctx->empty_session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->empty_session_params, + s->hwctx->alloc); + + ff_vk_video_common_uninit(s, &ctx->common); + + vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler, + s->hwctx->alloc); + + av_buffer_pool_uninit(&ctx->tmp_pool); + + ff_vk_uninit(s); + + return 0; +} + +int ff_vk_decode_init(AVCodecContext *avctx) +{ + int err, qf, cxpos = 0, cypos = 0, nb_q = 0; + VkResult ret; + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params : + avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params : + NULL, + }; + VkVideoSessionCreateInfoKHR session_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, + }; + VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, + .components = ff_comp_identity_map, + .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, + .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */ + }; + + err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN); + if (err < 0) + return err; + + s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); + s->frames = (AVHWFramesContext *)s->frames_ref->data; + s->hwfc = s->frames->hwctx; + + s->device_ref = av_buffer_ref(s->frames->device_ref); + s->device = (AVHWDeviceContext *)s->device_ref->data; + s->hwctx = s->device->hwctx; + + /* Get parameters, capabilities and final pixel/vulkan format */ + err = vulkan_decode_check_init(avctx, s->frames_ref, NULL, NULL, + &ctx->sw_format, NULL); + if (err < 0) + goto fail; + + /* Load all properties */ + err = ff_vk_load_props(s); + if (err < 0) + goto fail; + + /* Create queue context */ + qf = ff_vk_qf_init(s, &ctx->qf_dec, VK_QUEUE_VIDEO_DECODE_BIT_KHR); + + /* Check for support */ + if (!(s->video_props[qf].videoCodecOperations & + ff_vk_codec_map[avctx->codec_id].decode_op)) { + av_log(avctx, AV_LOG_ERROR, "Decoding %s not supported on the given " + "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf); + return AVERROR(EINVAL); + } + + /* TODO: enable when stable and tested. */ + if (s->query_props[qf].queryResultStatusSupport) + nb_q = 1; + + /* Create decode exec context. + * 4 async contexts per thread seems like a good number. */ + err = ff_vk_exec_pool_init(s, &ctx->qf_dec, &ctx->exec_pool, 4*avctx->thread_count, + nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0, + &ctx->profile); + if (err < 0) + goto fail; + + session_create.pVideoProfile = &ctx->profile; + session_create.flags = 0x0; + session_create.queueFamilyIndex = s->hwctx->queue_family_decode_index; + session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures; + session_create.pictureFormat = ctx->pic_format; + session_create.referencePictureFormat = session_create.pictureFormat; + session_create.pStdHeaderVersion = dec_ext[avctx->codec_id]; + + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + goto fail; + + /* Get sampler */ + av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location); + yuv_sampler_info.xChromaOffset = cxpos >> 7; + yuv_sampler_info.yChromaOffset = cypos >> 7; + yuv_sampler_info.format = ctx->pic_format; + ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info, + s->hwctx->alloc, &ctx->yuv_sampler); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* If doing an out-of-place decoding, create a DPB pool */ + if (ctx->dedicated_dpb) { + AVHWFramesContext *dpb_frames; + AVVulkanFramesContext *dpb_hwfc; + + ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->device_ref); + if (!ctx->dpb_hwfc_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + + dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + dpb_frames->format = s->frames->format; + dpb_frames->sw_format = s->frames->sw_format; + dpb_frames->width = s->frames->width; + dpb_frames->height = s->frames->height; + + dpb_hwfc = dpb_frames->hwctx; + dpb_hwfc->create_pnext = &ctx->profile_list; + dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */ + + if (ctx->layered_dpb) + dpb_hwfc->nb_layers = ctx->common.caps.maxDpbSlots; + + err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref); + if (err < 0) + goto fail; + + if (ctx->layered_dpb) { + ctx->layered_frame = vk_get_dpb_pool(ctx); + if (!ctx->layered_frame) { + err = AVERROR(ENOMEM); + goto fail; + } + + err = vk_decode_create_view(ctx, &ctx->layered_view, &ctx->layered_aspect, + (AVVkFrame *)ctx->layered_frame->data); + if (err < 0) + goto fail; + } + } + + session_params_create.videoSession = ctx->common.session; + ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, + s->hwctx->alloc, &ctx->empty_session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ff_vk_decode_flush(avctx); + + av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n"); + + return 0; + +fail: + ff_vk_decode_uninit(avctx); + + return err; +} diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h new file mode 100644 index 0000000000..9f9676bbfa --- /dev/null +++ b/libavcodec/vulkan_decode.h @@ -0,0 +1,163 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_DECODE_H +#define AVCODEC_VULKAN_DECODE_H + +#include "decode.h" +#include "hwconfig.h" +#include "internal.h" + +#include "vulkan_video.h" + +typedef struct FFVulkanDecodeContext { + FFVulkanContext s; + FFVkVideoCommon common; + + int dedicated_dpb; /* Oddity #1 - separate DPB images */ + int layered_dpb; /* Madness #1 - layered DPB images */ + + AVBufferRef *dpb_hwfc_ref; /* Only used for dedicated_dpb */ + + AVFrame *layered_frame; /* Only used for layered_dpb */ + VkImageView layered_view; + VkImageAspectFlags layered_aspect; + + VkVideoDecodeH264ProfileInfoKHR h264_profile; + VkVideoDecodeH264ProfileInfoKHR h265_profile; + VkVideoSessionParametersKHR empty_session_params; + + VkSamplerYcbcrConversion yuv_sampler; + VkVideoDecodeUsageInfoKHR usage; + VkVideoProfileInfoKHR profile; + VkVideoDecodeCapabilitiesKHR dec_caps; + VkVideoProfileListInfoKHR profile_list; + VkFormat pic_format; + enum AVPixelFormat sw_format; + int init; + + AVBufferRef *session_params; + + FFVkQueueFamilyCtx qf_dec; + FFVkExecPool exec_pool; + + AVBufferPool *tmp_pool; /* Pool for temporary data, if needed (HEVC) */ + size_t tmp_pool_ele_size; + + uint16_t last_ref_frames_in_use; +} FFVulkanDecodeContext; + +typedef struct FFVulkanDecodePicture { + AVFrame *dpb_frame; /* Only used for out-of-place decoding. */ + AVVkFrame *dpb_vkf; /* Only used for out-of-place decoding. */ + + VkImageView img_view_ref; /* Image representation view (reference) */ + VkImageView img_view_out; /* Image representation view (output-only) */ + VkImageAspectFlags img_aspect; /* Image plane mask bits */ + VkImageAspectFlags img_aspect_ref; /* Only used for out-of-place decoding */ + + VkSemaphore sem; + uint64_t sem_value; + + /* State */ + int update_params; + AVBufferRef *session_params; + + /* Current picture */ + VkVideoPictureResourceInfoKHR ref; + VkVideoReferenceSlotInfoKHR ref_slot; + + /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */ + VkVideoPictureResourceInfoKHR refs [36]; + VkVideoReferenceSlotInfoKHR ref_slots[36]; + + /* Main decoding struct */ + AVBufferRef *params_buf; + VkVideoDecodeInfoKHR decode_info; + + /* Slice data */ + uint8_t *slices; + size_t slices_size; + unsigned int slices_size_max; + uint32_t *slice_off; + unsigned int slice_off_max; + uint32_t nb_slices; +} FFVulkanDecodePicture; + +/** + * Initialize decoder. + */ +int ff_vk_decode_init(AVCodecContext *avctx); + +/** + * Initialize hw_frames_ctx with the parameters needed to decode the stream + * using the parameters from avctx. + * + * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize + * the context. + */ +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + +/** + * Prepare a frame, creates the image view, and sets up the dpb fields. + */ +int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb); + +/** + * Add slice data to frame. + */ +int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets); + +/** + * Decode a frame. + */ +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]); + +/** + * Free a frame and its state. + */ +void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp); + +/** + * Get an FFVkBuffer suitable for decoding from. + */ +int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf, + void *create_pNext, size_t size); + +/** + * Free VkVideoSessionParametersKHR. + */ +void ff_vk_decode_free_params(void *opaque, uint8_t *data); + +/** + * Flush decoder. + */ +void ff_vk_decode_flush(AVCodecContext *avctx); + +/** + * Free decoder. + */ +int ff_vk_decode_uninit(AVCodecContext *avctx); + +#endif /* AVCODEC_VULKAN_DECODE_H */ -- 2.39.2 [-- Attachment #70: 0069-h264dec-add-hwaccel_params_buf.patch --] [-- Type: text/x-diff, Size: 2737 bytes --] From e26c514b35f5c87321a8fa6c6eb70b54220a92ed Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Tue, 17 Jan 2023 05:01:45 +0100 Subject: [PATCH 69/72] h264dec: add hwaccel_params_buf --- libavcodec/h264_slice.c | 4 ++++ libavcodec/h264dec.c | 4 ++++ libavcodec/h264dec.h | 2 ++ 3 files changed, 10 insertions(+) diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index c0aa31bcd9..0c7f80c018 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -347,6 +347,10 @@ int ff_h264_update_thread_context(AVCodecContext *dst, return ret; } + ret = av_buffer_replace(&h->hwaccel_params_buf, h1->hwaccel_params_buf); + if (ret < 0) + return ret; + ret = av_buffer_replace(&h->ps.pps_ref, h1->ps.pps_ref); if (ret < 0) return ret; diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index 995bf17a8f..f6059da950 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -341,6 +341,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx) H264Context *h = avctx->priv_data; int i; + av_buffer_unref(&h->hwaccel_params_buf); ff_h264_remove_all_refs(h); ff_h264_free_tables(h); @@ -470,6 +471,7 @@ static void h264_decode_flush(AVCodecContext *avctx) ff_h264_flush_change(h); ff_h264_sei_uninit(&h->sei); + av_buffer_unref(&h->hwaccel_params_buf); for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) ff_h264_unref_picture(h, &h->DPB[i]); @@ -669,6 +671,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size) avpriv_request_sample(avctx, "data partitioning"); break; case H264_NAL_SEI: + av_buffer_unref(&h->hwaccel_params_buf); if (h->setup_finished) { avpriv_request_sample(avctx, "Late SEI"); break; @@ -682,6 +685,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size) break; case H264_NAL_SPS: { GetBitContext tmp_gb = nal->gb; + av_buffer_unref(&h->hwaccel_params_buf); if (avctx->hwaccel && avctx->hwaccel->decode_params) { ret = avctx->hwaccel->decode_params(avctx, nal->type, diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h index 1b18aba71f..5b1620c3f1 100644 --- a/libavcodec/h264dec.h +++ b/libavcodec/h264dec.h @@ -342,6 +342,8 @@ typedef struct H264Context { H264Picture cur_pic; H264Picture last_pic_for_ec; + AVBufferRef *hwaccel_params_buf; + H264SliceContext *slice_ctx; int nb_slice_ctx; int nb_slice_ctx_queued; -- 2.39.2 [-- Attachment #71: 0070-h264dec-add-Vulkan-hwaccel.patch --] [-- Type: text/x-diff, Size: 27544 bytes --] From b5ff58808482bedf12b981ee1c03dd95099a9332 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 01:13:01 +0100 Subject: [PATCH 70/72] h264dec: add Vulkan hwaccel Thanks to Dave Airlie for figuring out a lot of the parameters. --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/h264_slice.c | 12 +- libavcodec/h264dec.c | 3 + libavcodec/hwaccels.h | 1 + libavcodec/vulkan_h264.c | 521 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 539 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan_h264.c diff --git a/configure b/configure index 91f715351c..60973c38b3 100755 --- a/configure +++ b/configure @@ -3034,6 +3034,8 @@ h264_vdpau_hwaccel_deps="vdpau" h264_vdpau_hwaccel_select="h264_decoder" h264_videotoolbox_hwaccel_deps="videotoolbox" h264_videotoolbox_hwaccel_select="h264_decoder" +h264_vulkan_hwaccel_deps="vulkan" +h264_vulkan_hwaccel_select="h264_decoder" hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" hevc_d3d11va_hwaccel_select="hevc_decoder" hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index eabf4eb43e..4c9db167a5 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -992,6 +992,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o +OBJS-$(CONFIG_H264_VULKAN_HWACCEL) += vulkan_decode.o vulkan_h264.o OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 0c7f80c018..50d138e2a9 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -782,7 +782,8 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) CONFIG_H264_NVDEC_HWACCEL + \ CONFIG_H264_VAAPI_HWACCEL + \ CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \ - CONFIG_H264_VDPAU_HWACCEL) + CONFIG_H264_VDPAU_HWACCEL + \ + CONFIG_H264_VULKAN_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; const enum AVPixelFormat *choices = pix_fmts; int i; @@ -803,6 +804,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL if (h->avctx->colorspace != AVCOL_SPC_RGB) *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif if (CHROMA444(h)) { if (h->avctx->colorspace == AVCOL_SPC_RGB) { @@ -815,6 +819,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) *fmt++ = AV_PIX_FMT_YUV420P10; break; case 12: +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif if (CHROMA444(h)) { if (h->avctx->colorspace == AVCOL_SPC_RGB) { *fmt++ = AV_PIX_FMT_GBRP12; @@ -840,6 +847,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback) #if CONFIG_H264_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif +#if CONFIG_H264_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_H264_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index f6059da950..15a6e74829 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -1100,6 +1100,9 @@ const FFCodec ff_h264_decoder = { #endif #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL HWACCEL_VIDEOTOOLBOX(h264), +#endif +#if CONFIG_H264_VULKAN_HWACCEL + HWACCEL_VULKAN(h264), #endif NULL }, diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index aca55831f3..23d0843c76 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -36,6 +36,7 @@ extern const AVHWAccel ff_h264_nvdec_hwaccel; extern const AVHWAccel ff_h264_vaapi_hwaccel; extern const AVHWAccel ff_h264_vdpau_hwaccel; extern const AVHWAccel ff_h264_videotoolbox_hwaccel; +extern const AVHWAccel ff_h264_vulkan_hwaccel; extern const AVHWAccel ff_hevc_d3d11va_hwaccel; extern const AVHWAccel ff_hevc_d3d11va2_hwaccel; extern const AVHWAccel ff_hevc_dxva2_hwaccel; diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c new file mode 100644 index 0000000000..241a7d8f5b --- /dev/null +++ b/libavcodec/vulkan_h264.c @@ -0,0 +1,521 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "h264dec.h" +#include "h264_ps.h" + +#include "vulkan_decode.h" + +const VkExtensionProperties ff_vk_dec_h264_ext = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION, +}; + +typedef struct H264VulkanDecodePicture { + FFVulkanDecodeContext *ctx; + FFVulkanDecodePicture vp; + + /* Current picture */ + StdVideoDecodeH264ReferenceInfo h264_ref; + VkVideoDecodeH264DpbSlotInfoKHR vkh264_ref; + + /* Picture refs */ + H264Picture *ref_src [H264_MAX_PICTURE_COUNT]; + StdVideoDecodeH264ReferenceInfo h264_refs [H264_MAX_PICTURE_COUNT]; + VkVideoDecodeH264DpbSlotInfoKHR vkh264_refs[H264_MAX_PICTURE_COUNT]; + + /* Current picture (contd.) */ + StdVideoDecodeH264PictureInfo h264pic; + VkVideoDecodeH264PictureInfoKHR h264_pic_info; +} H264VulkanDecodePicture; + +static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeH264DpbSlotInfoKHR *vkh264_ref, /* Goes in ^ */ + StdVideoDecodeH264ReferenceInfo *h264_ref, /* Goes in ^ */ + H264Picture *pic, int is_current, int picture_structure, + int dpb_slot_index) +{ + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(ctx, pic->f, vkpic, is_current, + ctx->dedicated_dpb); + if (err < 0) + return err; + + *h264_ref = (StdVideoDecodeH264ReferenceInfo) { + .FrameNum = pic->long_ref ? pic->pic_id : pic->frame_num, /* TODO: kinda sure */ + .PicOrderCnt = { pic->field_poc[0], pic->field_poc[1] }, + .flags = (StdVideoDecodeH264ReferenceInfoFlags) { + .top_field_flag = !!(picture_structure & PICT_TOP_FIELD), + .bottom_field_flag = !!(picture_structure & PICT_BOTTOM_FIELD), + .used_for_long_term_reference = pic->reference && pic->long_ref, + .is_non_existing = 0, + }, + }; + + *vkh264_ref = (VkVideoDecodeH264DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = h264_ref, + }; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = ctx->layered_dpb ? dpb_slot_index : 0, + .imageViewBinding = vkpic->img_view_ref, + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkh264_ref, + .slotIndex = dpb_slot_index, /* TODO: kinda sure */ + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static void set_sps(const SPS *sps, + StdVideoH264ScalingLists *vksps_scaling, + StdVideoH264HrdParameters *vksps_vui_header, + StdVideoH264SequenceParameterSetVui *vksps_vui, + StdVideoH264SequenceParameterSet *vksps) +{ + *vksps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = sps->scaling_matrix_present_mask, + .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ + }; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_matrix4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**sps->scaling_matrix4)); + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_matrix8[i], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**sps->scaling_matrix8)); + + *vksps_vui_header = (StdVideoH264HrdParameters) { + .cpb_cnt_minus1 = sps->cpb_cnt - 1, + .bit_rate_scale = sps->bit_rate_scale, + .initial_cpb_removal_delay_length_minus1 = sps->initial_cpb_removal_delay_length - 1, + .cpb_removal_delay_length_minus1 = sps->cpb_removal_delay_length - 1, + .dpb_output_delay_length_minus1 = sps->dpb_output_delay_length - 1, + .time_offset_length = sps->time_offset_length, + }; + + for (int i = 0; i < sps->cpb_cnt; i++) { + vksps_vui_header->bit_rate_value_minus1[i] = sps->bit_rate_value[i] - 1; + vksps_vui_header->cpb_size_value_minus1[i] = sps->cpb_size_value[i] - 1; + vksps_vui_header->cbr_flag[i] = sps->cpr_flag[i]; + } + + *vksps_vui = (StdVideoH264SequenceParameterSetVui) { + .aspect_ratio_idc = sps->vui.aspect_ratio_idc, + .sar_width = sps->vui.sar.num, + .sar_height = sps->vui.sar.den, + .video_format = sps->vui.video_format, + .colour_primaries = sps->vui.colour_primaries, + .transfer_characteristics = sps->vui.transfer_characteristics, + .matrix_coefficients = sps->vui.matrix_coeffs, + .num_units_in_tick = sps->num_units_in_tick, + .time_scale = sps->time_scale, + .pHrdParameters = vksps_vui_header, + .max_num_reorder_frames = sps->num_reorder_frames, + .max_dec_frame_buffering = sps->max_dec_frame_buffering, + .flags = (StdVideoH264SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.video_full_range_flag, + .color_description_present_flag = sps->vui.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.chroma_location, + .timing_info_present_flag = sps->timing_info_present_flag, + .fixed_frame_rate_flag = sps->fixed_frame_rate_flag, + .bitstream_restriction_flag = sps->bitstream_restriction_flag, + .nal_hrd_parameters_present_flag = sps->nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->vcl_hrd_parameters_present_flag, + }, + }; + + *vksps = (StdVideoH264SequenceParameterSet) { + .profile_idc = sps->profile_idc, + .level_idc = sps->level_idc, + .seq_parameter_set_id = sps->sps_id, + .chroma_format_idc = sps->chroma_format_idc, + .bit_depth_luma_minus8 = sps->bit_depth_luma - 8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, + .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4, + .pic_order_cnt_type = sps->poc_type, + .log2_max_pic_order_cnt_lsb_minus4 = sps->poc_type ? 0 : sps->log2_max_poc_lsb - 4, + .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, + .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, + .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length, + .max_num_ref_frames = sps->ref_frame_count, + .pic_width_in_mbs_minus1 = sps->mb_width - 1, + .pic_height_in_map_units_minus1 = (sps->mb_height/(2 - sps->frame_mbs_only_flag)) - 1, + .frame_crop_left_offset = sps->crop_left, + .frame_crop_right_offset = sps->crop_right, + .frame_crop_top_offset = sps->crop_top, + .frame_crop_bottom_offset = sps->crop_bottom, + .flags = (StdVideoH264SpsFlags) { + .constraint_set0_flag = (sps->constraint_set_flags >> 0) & 0x1, + .constraint_set1_flag = (sps->constraint_set_flags >> 1) & 0x1, + .constraint_set2_flag = (sps->constraint_set_flags >> 2) & 0x1, + .constraint_set3_flag = (sps->constraint_set_flags >> 3) & 0x1, + .constraint_set4_flag = (sps->constraint_set_flags >> 4) & 0x1, + .constraint_set5_flag = (sps->constraint_set_flags >> 5) & 0x1, + .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, + .mb_adaptive_frame_field_flag = sps->mb_aff, + .frame_mbs_only_flag = sps->frame_mbs_only_flag, + .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, + .separate_colour_plane_flag = sps->residual_color_transform_flag, + .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, + .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass, + .frame_cropping_flag = sps->crop, + .seq_scaling_matrix_present_flag = sps->scaling_matrix_present, + .vui_parameters_present_flag = sps->vui_parameters_present_flag, + }, + .pOffsetForRefFrame = sps->offset_for_ref_frame, + .pScalingLists = vksps_scaling, + .pSequenceParameterSetVui = vksps_vui, + }; +} + +static void set_pps(const PPS *pps, const SPS *sps, + StdVideoH264ScalingLists *vkpps_scaling, + StdVideoH264PictureParameterSet *vkpps) +{ + *vkpps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = pps->pic_scaling_matrix_present_mask, + .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ + }; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_matrix4[i], + STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**pps->scaling_matrix4)); + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_matrix8[i], + STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**pps->scaling_matrix8)); + + *vkpps = (StdVideoH264PictureParameterSet) { + .seq_parameter_set_id = pps->sps_id, + .pic_parameter_set_id = pps->pps_id, + .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1, + .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1, + .weighted_bipred_idc = pps->weighted_bipred_idc, + .pic_init_qp_minus26 = pps->init_qp - 26, + .pic_init_qs_minus26 = pps->init_qs - 26, + .chroma_qp_index_offset = pps->chroma_qp_index_offset[0], + .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1], + .flags = (StdVideoH264PpsFlags) { + .transform_8x8_mode_flag = pps->transform_8x8_mode, + .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present, + .constrained_intra_pred_flag = pps->constrained_intra_pred, + .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present, + .weighted_pred_flag = pps->weighted_pred, + .bottom_field_pic_order_in_frame_present_flag = pps->pic_order_present, + .entropy_coding_mode_flag = pps->cabac, + .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, + }, + .pScalingLists = vkpps_scaling, + }; +} + +static int vk_h264_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + VkResult ret; + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + const H264Context *h = avctx->priv_data; + + /* SPS */ + StdVideoH264ScalingLists vksps_scaling[MAX_SPS_COUNT]; + StdVideoH264HrdParameters vksps_vui_header[MAX_SPS_COUNT]; + StdVideoH264SequenceParameterSetVui vksps_vui[MAX_SPS_COUNT]; + StdVideoH264SequenceParameterSet vksps[MAX_SPS_COUNT]; + + /* PPS */ + StdVideoH264ScalingLists vkpps_scaling[MAX_PPS_COUNT]; + StdVideoH264PictureParameterSet vkpps[MAX_PPS_COUNT]; + + VkVideoDecodeH264SessionParametersAddInfoKHR h264_params_info = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR, + .pStdSPSs = vksps, + .stdSPSCount = 0, + .pStdPPSs = vkpps, + .stdPPSCount = 0, + }; + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pParametersAddInfo = &h264_params_info, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h264_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = NULL, + }; + + AVBufferRef *tmp; + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + if (!par) + return AVERROR(ENOMEM); + + /* SPS list */ + for (int i = 0; h->ps.sps_list[i]; i++) { + const SPS *sps_l = (const SPS *)h->ps.sps_list[i]->data; + set_sps(sps_l, &vksps_scaling[i], &vksps_vui_header[i], &vksps_vui[i], &vksps[i]); + h264_params_info.stdSPSCount++; + } + + /* PPS list */ + for (int i = 0; h->ps.pps_list[i]; i++) { + const PPS *pps_l = (const PPS *)h->ps.pps_list[i]->data; + set_pps(pps_l, pps_l->sps, &vkpps_scaling[i], &vkpps[i]); + h264_params_info.stdPPSCount++; + } + + h264_params.maxStdSPSCount = h264_params_info.stdSPSCount; + h264_params.maxStdPPSCount = h264_params_info.stdPPSCount; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create, + ctx->s.hwctx->alloc, par); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params, + ctx, 0); + if (!tmp) { + ff_vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS\n", + h264_params_info.stdSPSCount, h264_params_info.stdPPSCount); + + *buf = tmp; + + return 0; +} + +static int vk_h264_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + int dpb_slot_index = 0; + H264Context *h = avctx->priv_data; + H264Picture *pic = h->cur_pic_ptr; + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + if (!h->hwaccel_params_buf) { + err = vk_h264_create_params(avctx, &h->hwaccel_params_buf); + if (err < 0) + return err; + } + + vp->session_params = av_buffer_ref(h->hwaccel_params_buf); + if (!vp->session_params) + return AVERROR(ENOMEM); + + /* Fill in main slot */ + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (pic == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + + err = vk_h264_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &hp->vkh264_ref, &hp->h264_ref, pic, 1, + h->picture_structure, dpb_slot_index); + if (err < 0) + return err; + + /* Fill in short-term references */ + for (int i = 0; i < h->short_ref_count; i++) { + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (h->short_ref[i] == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &hp->vkh264_refs[i], + &hp->h264_refs[i], h->short_ref[i], 0, + h->DPB[dpb_slot_index].picture_structure, + dpb_slot_index); + if (err < 0) + return err; + } + + /* Fill in long-term refs */ + for (int r = 0, i = h->short_ref_count; i < h->short_ref_count + h->long_ref_count; i++, r++) { + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (h->long_ref[i] == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &hp->vkh264_refs[i], + &hp->h264_refs[i], h->long_ref[r], 0, + h->DPB[dpb_slot_index].picture_structure, + dpb_slot_index); + if (err < 0) + return err; + } + + hp->h264pic = (StdVideoDecodeH264PictureInfo) { + .seq_parameter_set_id = pic->pps->sps_id, + .pic_parameter_set_id = pic->pps->pps_id, + .frame_num = h->poc.frame_num, + .idr_pic_id = h->poc.idr_pic_id, + .PicOrderCnt[0] = pic->field_poc[0], + .PicOrderCnt[1] = pic->field_poc[1], + .flags = (StdVideoDecodeH264PictureInfoFlags) { + .field_pic_flag = FIELD_PICTURE(h), + .is_intra = 1, + .IdrPicFlag = h->picture_idr, + .bottom_field_flag = !!(h->picture_structure & PICT_BOTTOM_FIELD), + .is_reference = h->nal_ref_idc != 0, + + // TODO: Not sure about this + .complementary_field_pair = h->first_field && FIELD_PICTURE(h), + }, + }; + + hp->h264_pic_info = (VkVideoDecodeH264PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR, + .pStdPictureInfo = &hp->h264pic, + .sliceCount = 0, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &hp->h264_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = h->short_ref_count + h->long_ref_count, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->img_view_out, + }, + }; + + hp->ctx = ctx; + + return 0; +} + +static int vk_h264_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + const H264Context *h = avctx->priv_data; + const H264SliceContext *sl = &h->slice_ctx[0]; + H264VulkanDecodePicture *hp = h->cur_pic_ptr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + int err = ff_vk_decode_add_slice(vp, data, size, 1, + &hp->h264_pic_info.sliceCount, + &hp->h264_pic_info.pSliceOffsets); + if (err < 0) + return err; + + /* Frame is only intra of all slices are marked as intra */ + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI) + hp->h264pic.flags.is_intra = 0; + + return 0; +} + +static int vk_h264_end_frame(AVCodecContext *avctx) +{ + const H264Context *h = avctx->priv_data; + H264Picture *pic = h->cur_pic_ptr; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 }; + AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 }; + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + H264Picture *rp = hp->ref_src[i]; + H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private; + + rvp[i] = &rhp->vp; + rav[i] = hp->ref_src[i]->f; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n", + vp->slices_size, hp->h264_pic_info.sliceCount); + + return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); +} + +static void vk_h264_free_frame_priv(AVCodecContext *avctx, void *data) +{ + H264VulkanDecodePicture *hp = data; + + /* Free frame resources, this also destroys the session parameters. */ + ff_vk_decode_free_frame(hp->ctx, &hp->vp); + + /* Free frame context */ + av_free(hp); +} + +const AVHWAccel ff_h264_vulkan_hwaccel = { + .name = "h264_vulkan", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_H264, + .pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_h264_start_frame, + .decode_slice = &vk_h264_decode_slice, + .end_frame = &vk_h264_end_frame, + .free_frame_priv = &vk_h264_free_frame_priv, + .frame_priv_data_size = sizeof(H264VulkanDecodePicture), + .init = &ff_vk_decode_init, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; -- 2.39.2 [-- Attachment #72: 0071-hevcdec-add-hwaccel_params_buf.patch --] [-- Type: text/x-diff, Size: 2828 bytes --] From 756f3a7daf18f402ec56a7f52ea8742d905edf18 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Tue, 17 Jan 2023 05:02:02 +0100 Subject: [PATCH 71/72] hevcdec: add hwaccel_params_buf --- libavcodec/hevcdec.c | 9 +++++++++ libavcodec/hevcdec.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 7c9b46240c..5df831688c 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -2969,6 +2969,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) switch (s->nal_unit_type) { case HEVC_NAL_VPS: + av_buffer_unref(&s->hwaccel_params_buf); if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) { ret = s->avctx->hwaccel->decode_params(s->avctx, nal->type, @@ -2982,6 +2983,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) goto fail; break; case HEVC_NAL_SPS: + av_buffer_unref(&s->hwaccel_params_buf); if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) { ret = s->avctx->hwaccel->decode_params(s->avctx, nal->type, @@ -2996,6 +2998,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) goto fail; break; case HEVC_NAL_PPS: + av_buffer_unref(&s->hwaccel_params_buf); if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) { ret = s->avctx->hwaccel->decode_params(s->avctx, nal->type, @@ -3455,6 +3458,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ff_dovi_ctx_unref(&s->dovi_ctx); av_buffer_unref(&s->rpu_buf); + av_buffer_unref(&s->hwaccel_params_buf); av_freep(&s->md5_ctx); @@ -3606,6 +3610,10 @@ static int hevc_update_thread_context(AVCodecContext *dst, if (ret < 0) return ret; + ret = av_buffer_replace(&s->hwaccel_params_buf, s0->hwaccel_params_buf); + if (ret < 0) + return ret; + ret = av_buffer_replace(&s->rpu_buf, s0->rpu_buf); if (ret < 0) return ret; @@ -3683,6 +3691,7 @@ static void hevc_decode_flush(AVCodecContext *avctx) s->max_ra = INT_MAX; s->eos = 1; + av_buffer_unref(&s->hwaccel_params_buf); if (avctx->hwaccel->flush) avctx->hwaccel->flush(avctx); } diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index 15c4113bdd..774cd95947 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -509,6 +509,8 @@ typedef struct HEVCContext { uint8_t *sao_pixel_buffer_h[3]; uint8_t *sao_pixel_buffer_v[3]; + AVBufferRef *hwaccel_params_buf; + HEVCParamSets ps; HEVCSEI sei; struct AVMD5 *md5_ctx; -- 2.39.2 [-- Attachment #73: 0072-hevcdec-add-Vulkan-hwaccel.patch --] [-- Type: text/x-diff, Size: 50457 bytes --] From d47cb5940bc4808fea572b530eb1b9bf11159540 Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Wed, 14 Dec 2022 08:27:18 +0100 Subject: [PATCH 72/72] hevcdec: add Vulkan hwaccel Thanks to Dave Airlie for figuring out a lot of the parameters. --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/hevcdec.c | 27 +- libavcodec/hwaccels.h | 1 + libavcodec/vulkan_hevc.c | 904 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 934 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan_hevc.c diff --git a/configure b/configure index 60973c38b3..8f7b918565 100755 --- a/configure +++ b/configure @@ -3050,6 +3050,8 @@ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" hevc_vdpau_hwaccel_select="hevc_decoder" hevc_videotoolbox_hwaccel_deps="videotoolbox" hevc_videotoolbox_hwaccel_select="hevc_decoder" +hevc_vulkan_hwaccel_deps="vulkan" +hevc_vulkan_hwaccel_select="hevc_decoder" mjpeg_nvdec_hwaccel_deps="nvdec" mjpeg_nvdec_hwaccel_select="mjpeg_decoder" mjpeg_vaapi_hwaccel_deps="vaapi" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 4c9db167a5..6aa304071a 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -999,6 +999,7 @@ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o +OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL) += vulkan_decode.o vulkan_hevc.o OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL) += vaapi_mjpeg.o OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL) += nvdec_mpeg12.o diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 5df831688c..0ad6418f8d 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -405,7 +405,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) CONFIG_HEVC_NVDEC_HWACCEL + \ CONFIG_HEVC_VAAPI_HWACCEL + \ CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ - CONFIG_HEVC_VDPAU_HWACCEL) + CONFIG_HEVC_VDPAU_HWACCEL + \ + CONFIG_HEVC_VULKAN_HWACCEL) enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; switch (sps->pix_fmt) { @@ -429,6 +430,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV420P10: @@ -445,6 +449,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; #endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_HEVC_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif @@ -464,6 +471,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV422P: @@ -473,11 +483,17 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; case AV_PIX_FMT_YUV444P10: #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif case AV_PIX_FMT_YUV420P12: case AV_PIX_FMT_YUV444P12: @@ -487,6 +503,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #if CONFIG_HEVC_VDPAU_HWACCEL *fmt++ = AV_PIX_FMT_VDPAU; #endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif #if CONFIG_HEVC_NVDEC_HWACCEL *fmt++ = AV_PIX_FMT_CUDA; #endif @@ -494,6 +513,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) case AV_PIX_FMT_YUV422P12: #if CONFIG_HEVC_VAAPI_HWACCEL *fmt++ = AV_PIX_FMT_VAAPI; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; #endif break; } @@ -3752,6 +3774,9 @@ const FFCodec ff_hevc_decoder = { #endif #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL HWACCEL_VIDEOTOOLBOX(hevc), +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + HWACCEL_VULKAN(hevc), #endif NULL }, diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 23d0843c76..a7c74d07cb 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -44,6 +44,7 @@ extern const AVHWAccel ff_hevc_nvdec_hwaccel; extern const AVHWAccel ff_hevc_vaapi_hwaccel; extern const AVHWAccel ff_hevc_vdpau_hwaccel; extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; +extern const AVHWAccel ff_hevc_vulkan_hwaccel; extern const AVHWAccel ff_mjpeg_nvdec_hwaccel; extern const AVHWAccel ff_mjpeg_vaapi_hwaccel; extern const AVHWAccel ff_mpeg1_nvdec_hwaccel; diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c new file mode 100644 index 0000000000..f4991d8f82 --- /dev/null +++ b/libavcodec/vulkan_hevc.c @@ -0,0 +1,904 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "hevcdec.h" +#include "hevc_ps.h" + +#include "vulkan_decode.h" + +const VkExtensionProperties ff_vk_dec_hevc_ext = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION, +}; + +typedef struct HEVCHeaderSPS { + StdVideoH265ScalingLists scaling; + StdVideoH265HrdParameters vui_header; + StdVideoH265SequenceParameterSetVui vui; + StdVideoH265ProfileTierLevel ptl; + StdVideoH265DecPicBufMgr dpbm; + StdVideoH265PredictorPaletteEntries pal; + StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265ShortTermRefPicSet str[HEVC_MAX_SHORT_TERM_REF_PIC_SETS]; + StdVideoH265LongTermRefPicsSps ltr[HEVC_MAX_LONG_TERM_REF_PICS]; +} HEVCHeaderSPS; + +typedef struct HEVCHeaderPPS { + StdVideoH265ScalingLists scaling; + StdVideoH265PredictorPaletteEntries pal; +} HEVCHeaderPPS; + +typedef struct HEVCHeaderVPSSet { + StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; +} HEVCHeaderVPSSet; + +typedef struct HEVCHeaderVPS { + StdVideoH265ProfileTierLevel ptl; + StdVideoH265DecPicBufMgr dpbm; + StdVideoH265HrdParameters hdr[HEVC_MAX_LAYER_SETS]; + HEVCHeaderVPSSet sls[]; +} HEVCHeaderVPS; + +typedef struct HEVCHeaderSet { + StdVideoH265SequenceParameterSet sps[HEVC_MAX_SPS_COUNT]; + HEVCHeaderSPS hsps[HEVC_MAX_SPS_COUNT]; + + StdVideoH265PictureParameterSet pps[HEVC_MAX_PPS_COUNT]; + HEVCHeaderPPS hpps[HEVC_MAX_PPS_COUNT]; + + StdVideoH265VideoParameterSet vps[HEVC_MAX_PPS_COUNT]; + HEVCHeaderVPS hvps[]; +} HEVCHeaderSet; + +static int get_data_set_buf(FFVulkanDecodeContext *s, AVBufferRef **data_buf, + int nb_vps, AVBufferRef * const vps_list[HEVC_MAX_VPS_COUNT]) +{ + size_t buf_size = sizeof(HEVCHeaderSPS)*HEVC_MAX_SPS_COUNT + + sizeof(HEVCHeaderPPS)*HEVC_MAX_PPS_COUNT + + sizeof(StdVideoH265SequenceParameterSet)*HEVC_MAX_SPS_COUNT + + sizeof(StdVideoH265PictureParameterSet)*HEVC_MAX_PPS_COUNT + + sizeof(StdVideoH265VideoParameterSet)*HEVC_MAX_VPS_COUNT; + + buf_size += (sizeof(StdVideoH265ProfileTierLevel) + + sizeof(StdVideoH265DecPicBufMgr) + + sizeof(StdVideoH265HrdParameters)*HEVC_MAX_LAYER_SETS)*nb_vps; + + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data; + buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; + } + + if (buf_size > s->tmp_pool_ele_size) { + av_buffer_pool_uninit(&s->tmp_pool); + s->tmp_pool_ele_size = 0; + s->tmp_pool = av_buffer_pool_init(buf_size, NULL); + if (!s->tmp_pool) + return AVERROR(ENOMEM); + s->tmp_pool_ele_size = buf_size; + } + + *data_buf = av_buffer_pool_get(s->tmp_pool); + if (!(*data_buf)) + return AVERROR(ENOMEM); + + return 0; +} + +typedef struct HEVCVulkanDecodePicture { + FFVulkanDecodeContext *ctx; + FFVulkanDecodePicture vp; + + /* Current picture */ + StdVideoDecodeH265ReferenceInfo h265_ref; + VkVideoDecodeH265DpbSlotInfoKHR vkh265_ref; + + /* Picture refs */ + HEVCFrame *ref_src [HEVC_MAX_REFS]; + StdVideoDecodeH265ReferenceInfo h265_refs [HEVC_MAX_REFS]; + VkVideoDecodeH265DpbSlotInfoKHR vkh265_refs[HEVC_MAX_REFS]; + + /* Current picture (contd.) */ + StdVideoDecodeH265PictureInfo h265pic; + VkVideoDecodeH265PictureInfoKHR h265_pic_info; +} HEVCVulkanDecodePicture; + +static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeH265DpbSlotInfoKHR *vkh265_ref, /* Goes in ^ */ + StdVideoDecodeH265ReferenceInfo *h265_ref, /* Goes in ^ */ + HEVCFrame *pic, int is_current, int pic_id) +{ + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(ctx, pic->frame, vkpic, is_current, + ctx->dedicated_dpb); + if (err < 0) + return err; + + *h265_ref = (StdVideoDecodeH265ReferenceInfo) { + .flags = (StdVideoDecodeH265ReferenceInfoFlags) { + .used_for_long_term_reference = pic->flags & HEVC_FRAME_FLAG_LONG_REF, + .unused_for_reference = 0, + }, + .PicOrderCntVal = pic->poc, + }; + + *vkh265_ref = (VkVideoDecodeH265DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = h265_ref, + }; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height }, + .baseArrayLayer = ctx->layered_dpb ? pic_id : 0, + .imageViewBinding = vkpic->img_view_ref, + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkh265_ref, + .slotIndex = pic_id, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static void set_sps(const HEVCSPS *sps, int sps_idx, + StdVideoH265ScalingLists *vksps_scaling, + StdVideoH265HrdParameters *vksps_vui_header, + StdVideoH265SequenceParameterSetVui *vksps_vui, + StdVideoH265SequenceParameterSet *vksps, + StdVideoH265SubLayerHrdParameters *slhdrnal, + StdVideoH265SubLayerHrdParameters *slhdrvcl, + StdVideoH265ProfileTierLevel *ptl, + StdVideoH265DecPicBufMgr *dpbm, + StdVideoH265PredictorPaletteEntries *pal, + StdVideoH265ShortTermRefPicSet *str, + StdVideoH265LongTermRefPicsSps *ltr) +{ + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_list.sl[0][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList4x4)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_list.sl[1][i], + STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList8x8)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList16x16[i], sps->scaling_list.sl[2][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList16x16)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) + memcpy(vksps_scaling->ScalingList32x32[i], sps->scaling_list.sl[3][i], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList32x32)); + + memcpy(vksps_scaling->ScalingListDCCoef16x16, sps->scaling_list.sl_dc[0], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef16x16)); + + memcpy(vksps_scaling->ScalingListDCCoef32x32, sps->scaling_list.sl_dc[1], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef32x32)); + + *vksps_vui_header = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = sps->hdr.flags.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->hdr.flags.vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = sps->hdr.flags.sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->hdr.flags.sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = sps->hdr.flags.fixed_pic_rate_general_flag, + .fixed_pic_rate_within_cvs_flag = sps->hdr.flags.fixed_pic_rate_within_cvs_flag, + .low_delay_hrd_flag = sps->hdr.flags.low_delay_hrd_flag, + }, + .tick_divisor_minus2 = sps->hdr.tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = sps->hdr.du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = sps->hdr.dpb_output_delay_du_length_minus1, + .bit_rate_scale = sps->hdr.bit_rate_scale, + .cpb_size_scale = sps->hdr.cpb_size_scale, + .cpb_size_du_scale = sps->hdr.cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = sps->hdr.initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = sps->hdr.au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = sps->hdr.dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = slhdrnal, + .pSubLayerHrdParametersNal = slhdrvcl, + }; + + memcpy(vksps_vui_header->cpb_cnt_minus1, sps->hdr.cpb_cnt_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->cpb_cnt_minus1)); + memcpy(vksps_vui_header->elemental_duration_in_tc_minus1, sps->hdr.elemental_duration_in_tc_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->elemental_duration_in_tc_minus1)); + + memcpy(slhdrnal, sps->hdr.nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrnal)); + memcpy(slhdrvcl, sps->hdr.vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrvcl)); + + *vksps_vui = (StdVideoH265SequenceParameterSetVui) { + .flags = (StdVideoH265SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.common.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.common.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.common.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.common.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.common.video_full_range_flag, + .colour_description_present_flag = sps->vui.common.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.common.chroma_loc_info_present_flag, + .neutral_chroma_indication_flag = sps->vui.neutra_chroma_indication_flag, + .field_seq_flag = sps->vui.field_seq_flag, + .frame_field_info_present_flag = sps->vui.frame_field_info_present_flag, + .default_display_window_flag = sps->vui.default_display_window_flag, + .vui_timing_info_present_flag = sps->vui.vui_timing_info_present_flag, + .vui_poc_proportional_to_timing_flag = sps->vui.vui_poc_proportional_to_timing_flag, + .vui_hrd_parameters_present_flag = sps->vui.vui_hrd_parameters_present_flag, + .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, + .tiles_fixed_structure_flag = sps->vui.tiles_fixed_structure_flag, + .motion_vectors_over_pic_boundaries_flag = sps->vui.motion_vectors_over_pic_boundaries_flag, + .restricted_ref_pic_lists_flag = sps->vui.restricted_ref_pic_lists_flag, + }, + .aspect_ratio_idc = sps->vui.common.aspect_ratio_idc, + .sar_width = sps->vui.common.sar.num, + .sar_height = sps->vui.common.sar.den, + .video_format = sps->vui.common.video_format, + .colour_primaries = sps->vui.common.colour_primaries, + .transfer_characteristics = sps->vui.common.transfer_characteristics, + .matrix_coeffs = sps->vui.common.matrix_coeffs, + .chroma_sample_loc_type_top_field = sps->vui.common.chroma_sample_loc_type_top_field, + .chroma_sample_loc_type_bottom_field = sps->vui.common.chroma_sample_loc_type_bottom_field, + /* Reserved */ + /* Reserved */ + .def_disp_win_left_offset = sps->vui.def_disp_win.left_offset, + .def_disp_win_right_offset = sps->vui.def_disp_win.right_offset, + .def_disp_win_top_offset = sps->vui.def_disp_win.top_offset, + .def_disp_win_bottom_offset = sps->vui.def_disp_win.bottom_offset, + .vui_num_units_in_tick = sps->vui.vui_num_units_in_tick, + .vui_time_scale = sps->vui.vui_time_scale, + .vui_num_ticks_poc_diff_one_minus1 = sps->vui.vui_num_ticks_poc_diff_one_minus1, + .min_spatial_segmentation_idc = sps->vui.min_spatial_segmentation_idc, + .max_bytes_per_pic_denom = sps->vui.max_bytes_per_pic_denom, + .max_bits_per_min_cu_denom = sps->vui.max_bits_per_min_cu_denom, + .log2_max_mv_length_horizontal = sps->vui.log2_max_mv_length_horizontal, + .log2_max_mv_length_vertical = sps->vui.log2_max_mv_length_vertical, + .pHrdParameters = vksps_vui_header, + }; + + *ptl = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = sps->ptl.general_ptl.tier_flag, + .general_progressive_source_flag = sps->ptl.general_ptl.progressive_source_flag, + .general_interlaced_source_flag = sps->ptl.general_ptl.interlaced_source_flag, + .general_non_packed_constraint_flag = sps->ptl.general_ptl.non_packed_constraint_flag, + .general_frame_only_constraint_flag = sps->ptl.general_ptl.frame_only_constraint_flag, + }, + .general_profile_idc = sps->ptl.general_ptl.profile_idc, + .general_level_idc = sps->ptl.general_ptl.level_idc, + }; + + for (int i = 0; i < sps->max_sub_layers; i++) { + dpbm->max_latency_increase_plus1[i] = sps->temporal_layer[i].max_latency_increase + 1; + dpbm->max_dec_pic_buffering_minus1[i] = sps->temporal_layer[i].max_dec_pic_buffering - 1; + dpbm->max_num_reorder_pics[i] = sps->temporal_layer[i].num_reorder_pics; + } + + for (int i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) + for (int j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++) + pal->PredictorPaletteEntries[i][j] = sps->palette_predictor_initializers[i][j]; + + for (int i = 0; i < sps->nb_st_rps; i++) { + str[i] = (StdVideoH265ShortTermRefPicSet) { + .flags = (StdVideoH265ShortTermRefPicSetFlags) { + .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict, + .delta_rps_sign = sps->st_rps[i].delta_rps_sign, + }, + .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1, + .use_delta_flag = sps->st_rps[i].use_delta_flag, + .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1, + /* Spec fucked this up + .used_by_curr_pic_flag = + .used_by_curr_pic_s0_flag = + .used_by_curr_pic_s1_flag = + */ + /* Reserved */ + /* Reserved */ + /* Reserved */ + .num_negative_pics = sps->st_rps[i].num_negative_pics, + .num_positive_pics = sps->st_rps[i].num_delta_pocs - sps->st_rps[i].num_negative_pics, + }; + + for (int j = 0; j < str[i].num_negative_pics; j++) + str[i].delta_poc_s0_minus1[j] = sps->st_rps[i].delta_poc_s0[j] - 1; + + for (int j = 0; j < str[i].num_positive_pics; j++) + str[i].delta_poc_s1_minus1[j] = sps->st_rps[i].delta_poc_s1[j] - 1; + } + + for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) { + ltr[i] = (StdVideoH265LongTermRefPicsSps) { + .used_by_curr_pic_lt_sps_flag = sps->used_by_curr_pic_lt_sps_flag[i], + /* Spec fucked this up too*/ + .lt_ref_pic_poc_lsb_sps[0] = sps->lt_ref_pic_poc_lsb_sps[i], + }; + } + + *vksps = (StdVideoH265SequenceParameterSet) { + .flags = (StdVideoH265SpsFlags) { + .sps_temporal_id_nesting_flag = sps->temporal_id_nesting_flag, + .separate_colour_plane_flag = sps->separate_colour_plane_flag, + .conformance_window_flag = sps->conformance_window_flag, + .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info_flag, + .scaling_list_enabled_flag = sps->scaling_list_enable_flag, + .sps_scaling_list_data_present_flag = sps->scaling_list_data_present_flag, + .amp_enabled_flag = sps->amp_enabled_flag, + .sample_adaptive_offset_enabled_flag = sps->sao_enabled, + .pcm_enabled_flag = sps->pcm_enabled_flag, + .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag, + .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag, + .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag, + .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag, + .vui_parameters_present_flag = sps->vui_present, + .sps_extension_present_flag = sps->sps_extension_present_flag, + .sps_range_extension_flag = sps->sps_range_extension_flag, + .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag, + .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag, + .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag, + .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag, + .extended_precision_processing_flag = sps->extended_precision_processing_flag, + .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag, + .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag, + .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag, + .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag, + .sps_scc_extension_flag = sps->sps_scc_extension_flag, + .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag, + .palette_mode_enabled_flag = sps->palette_mode_enabled_flag, + .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializer_present_flag, + .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disable_flag, + }, + .chroma_format_idc = sps->chroma_format_idc, + .pic_width_in_luma_samples = sps->width, + .pic_height_in_luma_samples = sps->height, + .sps_video_parameter_set_id = sps->vps_id, + .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, + .sps_seq_parameter_set_id = sps_idx, + .bit_depth_luma_minus8 = sps->bit_depth - 8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, + .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, + .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, + .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, + .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_transform_block_size, + .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, + .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, + .num_short_term_ref_pic_sets = sps->nb_st_rps, + .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, + .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, + .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, + .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, + .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, + /* Reserved */ + /* Reserved */ + .palette_max_size = sps->palette_max_size, + .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size, + .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc, + .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializer_minus1, + .conf_win_left_offset = sps->pic_conf_win.left_offset, + .conf_win_right_offset = sps->pic_conf_win.right_offset, + .conf_win_top_offset = sps->pic_conf_win.top_offset, + .conf_win_bottom_offset = sps->pic_conf_win.bottom_offset, + .pProfileTierLevel = ptl, + .pDecPicBufMgr = dpbm, + .pScalingLists = vksps_scaling, + .pShortTermRefPicSet = str, + .pLongTermRefPicsSps = ltr, + .pSequenceParameterSetVui = vksps_vui, + .pPredictorPaletteEntries = pal, + }; +} + +static void set_pps(const HEVCPPS *pps, const HEVCSPS *sps, + StdVideoH265ScalingLists *vkpps_scaling, + StdVideoH265PictureParameterSet *vkpps, + StdVideoH265PredictorPaletteEntries *pal) +{ + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_list.sl[0][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList4x4)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_list.sl[1][i], + STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList8x8)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList16x16[i], pps->scaling_list.sl[2][i], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList16x16)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) + memcpy(vkpps_scaling->ScalingList32x32[i], pps->scaling_list.sl[3][i], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList32x32)); + + memcpy(vkpps_scaling->ScalingListDCCoef16x16, pps->scaling_list.sl_dc[0], + STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef16x16)); + + memcpy(vkpps_scaling->ScalingListDCCoef32x32, pps->scaling_list.sl_dc[1], + STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef32x32)); + + *vkpps = (StdVideoH265PictureParameterSet) { + .flags = (StdVideoH265PpsFlags) { + .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, + .output_flag_present_flag = pps->output_flag_present_flag, + .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag, + .cabac_init_present_flag = pps->cabac_init_present_flag, + .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, + .transform_skip_enabled_flag = pps->transform_skip_enabled_flag, + .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag, + .pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag, + .weighted_pred_flag = pps->weighted_pred_flag, + .weighted_bipred_flag = pps->weighted_bipred_flag, + .transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag, + .tiles_enabled_flag = pps->tiles_enabled_flag, + .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag, + .uniform_spacing_flag = pps->uniform_spacing_flag, + .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag, + .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag, + .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, + .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag, + .pps_deblocking_filter_disabled_flag = pps->disable_dbf, + .pps_scaling_list_data_present_flag = pps->scaling_list_data_present_flag, + .lists_modification_present_flag = pps->lists_modification_present_flag, + .slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag, + .pps_extension_present_flag = pps->pps_extension_present_flag, + .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag, + .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag, + .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag, + .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag, + .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag, + .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializer_present_flag, + .monochrome_palette_flag = pps->monochrome_palette_flag, + .pps_range_extension_flag = pps->pps_range_extensions_flag, + }, + .pps_pic_parameter_set_id = pps->pps_id, + .pps_seq_parameter_set_id = pps->sps_id, + .sps_video_parameter_set_id = sps->vps_id, + .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, + .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1, + .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1, + .init_qp_minus26 = pps->pic_init_qp_minus26, + .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, + .pps_cb_qp_offset = pps->cb_qp_offset, + .pps_cr_qp_offset = pps->cr_qp_offset, + .pps_beta_offset_div2 = pps->beta_offset >> 1, + .pps_tc_offset_div2 = pps->tc_offset >> 1, + .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, + .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2, + .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth, + .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1, + .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma, + .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma, + .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset_plus5, + .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset_plus5, + .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset_plus3, + .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializer, + .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry_minus8, + .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry_minus8, + .num_tile_columns_minus1 = pps->num_tile_columns - 1, + .num_tile_rows_minus1 = pps->num_tile_rows - 1, + .pScalingLists = vkpps_scaling, + .pPredictorPaletteEntries = pal, + }; + + for (int i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) { + for (int j = 0; j < pps->pps_num_palette_predictor_initializer; j++) + pal->PredictorPaletteEntries[i][j] = pps->palette_predictor_initializers[i][j]; + } + + for (int i = 0; i < pps->num_tile_columns - 1; i++) + vkpps->column_width_minus1[i] = pps->column_width[i] - 1; + + for (int i = 0; i < pps->num_tile_rows - 1; i++) + vkpps->row_height_minus1[i] = pps->row_height[i] - 1; + + for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) { + vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i]; + vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i]; + } +} + +static void set_vps(const HEVCVPS *vps, + StdVideoH265VideoParameterSet *vkvps, + StdVideoH265ProfileTierLevel *ptl, + StdVideoH265DecPicBufMgr *dpbm, + StdVideoH265HrdParameters *sls_hdr, + HEVCHeaderVPSSet sls[]) +{ + for (int i = 0; i < vps->vps_num_hrd_parameters; i++) { + const HEVCHdrParams *src = &vps->hdr[i]; + + sls_hdr[i] = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = src->flags.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = src->flags.vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = src->flags.sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = src->flags.sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = src->flags.fixed_pic_rate_general_flag, + .fixed_pic_rate_within_cvs_flag = src->flags.fixed_pic_rate_within_cvs_flag, + .low_delay_hrd_flag = src->flags.low_delay_hrd_flag, + }, + .tick_divisor_minus2 = src->tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = src->du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = src->dpb_output_delay_du_length_minus1, + .bit_rate_scale = src->bit_rate_scale, + .cpb_size_scale = src->cpb_size_scale, + .cpb_size_du_scale = src->cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = src->initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = src->au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = src->dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = sls[i].nal_hdr, + .pSubLayerHrdParametersNal = sls[i].vcl_hdr, + }; + + memcpy(sls_hdr[i].cpb_cnt_minus1, src->cpb_cnt_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].cpb_cnt_minus1)); + memcpy(sls_hdr[i].elemental_duration_in_tc_minus1, src->elemental_duration_in_tc_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].elemental_duration_in_tc_minus1)); + + memcpy(sls[i].nal_hdr, src->nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].nal_hdr)); + memcpy(sls[i].vcl_hdr, src->vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].vcl_hdr)); + } + + *ptl = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = vps->ptl.general_ptl.tier_flag, + .general_progressive_source_flag = vps->ptl.general_ptl.progressive_source_flag, + .general_interlaced_source_flag = vps->ptl.general_ptl.interlaced_source_flag, + .general_non_packed_constraint_flag = vps->ptl.general_ptl.non_packed_constraint_flag, + .general_frame_only_constraint_flag = vps->ptl.general_ptl.frame_only_constraint_flag, + }, + .general_profile_idc = vps->ptl.general_ptl.profile_idc, + .general_level_idc = vps->ptl.general_ptl.level_idc, + }; + + for (int i = 0; i < vps->vps_max_sub_layers; i++) { + dpbm->max_latency_increase_plus1[i] = vps->vps_max_latency_increase[i] + 1; + dpbm->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering[i] - 1; + dpbm->max_num_reorder_pics[i] = vps->vps_num_reorder_pics[i]; + } + + *vkvps = (StdVideoH265VideoParameterSet) { + .flags = (StdVideoH265VpsFlags) { + .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag, + .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag, + .vps_timing_info_present_flag = vps->vps_timing_info_present_flag, + .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag, + }, + .vps_video_parameter_set_id = vps->vps_id, + .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers - 1, + /* Reserved */ + /* Reserved */ + .vps_num_units_in_tick = vps->vps_num_units_in_tick, + .vps_time_scale = vps->vps_time_scale, + .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one - 1, + /* Reserved */ + .pDecPicBufMgr = dpbm, + .pHrdParameters = sls_hdr, + .pProfileTierLevel = ptl, + }; +} + +static int vk_hevc_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + int err; + VkResult ret; + const HEVCContext *h = avctx->priv_data; + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoDecodeH265SessionParametersAddInfoKHR h265_params_info = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR, + .stdSPSCount = 0, + .stdPPSCount = 0, + .stdVPSCount = 0, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pParametersAddInfo = &h265_params_info, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h265_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = NULL, + }; + + int nb_vps = 0; + AVBufferRef *data_set; + HEVCHeaderSet *hdr; + + AVBufferRef *tmp; + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + if (!par) + return AVERROR(ENOMEM); + + for (int i = 0; h->ps.vps_list[i]; i++) + nb_vps++; + + err = get_data_set_buf(ctx, &data_set, nb_vps, h->ps.vps_list); + if (err < 0) + return err; + + hdr = (HEVCHeaderSet *)data_set->data; + + h265_params_info.pStdSPSs = hdr->sps; + h265_params_info.pStdPPSs = hdr->pps; + h265_params_info.pStdVPSs = hdr->vps; + + /* SPS list */ + for (int i = 0; h->ps.sps_list[i]; i++) { + const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[i]->data; + set_sps(sps_l, i, &hdr->hsps[i].scaling, &hdr->hsps[i].vui_header, + &hdr->hsps[i].vui, &hdr->sps[i], hdr->hsps[i].nal_hdr, + hdr->hsps[i].vcl_hdr, &hdr->hsps[i].ptl, &hdr->hsps[i].dpbm, + &hdr->hsps[i].pal, hdr->hsps[i].str, hdr->hsps[i].ltr); + h265_params_info.stdSPSCount++; + } + + /* PPS list */ + for (int i = 0; h->ps.pps_list[i]; i++) { + const HEVCPPS *pps_l = (const HEVCPPS *)h->ps.pps_list[i]->data; + const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[pps_l->sps_id]->data; + set_pps(pps_l, sps_l, &hdr->hpps[i].scaling, &hdr->pps[i], &hdr->hpps[i].pal); + h265_params_info.stdPPSCount++; + } + + /* VPS list */ + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps_l = (const HEVCVPS *)h->ps.vps_list[i]->data; + set_vps(vps_l, &hdr->vps[i], &hdr->hvps[i].ptl, &hdr->hvps[i].dpbm, + hdr->hvps[i].hdr, hdr->hvps[i].sls); + h265_params_info.stdVPSCount++; + } + + h265_params.maxStdSPSCount = h265_params_info.stdSPSCount; + h265_params.maxStdPPSCount = h265_params_info.stdPPSCount; + h265_params.maxStdVPSCount = h265_params_info.stdVPSCount; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create, + ctx->s.hwctx->alloc, par); + av_buffer_unref(&data_set); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params, + ctx, 0); + if (!tmp) { + ff_vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS %i VPS\n", + h265_params_info.stdSPSCount, h265_params_info.stdPPSCount, + h265_params_info.stdVPSCount); + + *buf = tmp; + + return 0; +} + +static int vk_hevc_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + HEVCContext *h = avctx->priv_data; + HEVCFrame *pic = h->ref; + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + const HEVCSPS *sps = h->ps.sps; + const HEVCPPS *pps = h->ps.pps; + int nb_refs = 0; + + if (!h->hwaccel_params_buf) { + err = vk_hevc_create_params(avctx, &h->hwaccel_params_buf); + if (err < 0) + return err; + } + + vp->session_params = av_buffer_ref(h->hwaccel_params_buf); + if (!vp->session_params) + return AVERROR(ENOMEM); + + hp->h265pic = (StdVideoDecodeH265PictureInfo) { + .flags = (StdVideoDecodeH265PictureInfoFlags) { + .IrapPicFlag = IS_IRAP(h), + .IdrPicFlag = IS_IDR(h), + .IsReference = h->nal_unit_type < 16 ? h->nal_unit_type & 1 : 1, + .short_term_ref_pic_set_sps_flag = h->sh.short_term_ref_pic_set_sps_flag, + }, + .sps_video_parameter_set_id = sps->vps_id, + .pps_seq_parameter_set_id = pps->sps_id, + .pps_pic_parameter_set_id = pps->pps_id, + .NumDeltaPocsOfRefRpsIdx = h->sh.short_term_rps ? h->sh.short_term_rps->rps_idx_num_delta_pocs : 0, + .PicOrderCntVal = h->poc, + .NumBitsForSTRefPicSetInSlice = !h->sh.short_term_ref_pic_set_sps_flag ? + h->sh.bits_used_for_short_term_rps : 0, + }; + + /* Fill in references */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) { + const HEVCFrame *ref = &h->DPB[i]; + int idx = nb_refs; + + if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF))) + continue; + + if (ref == pic) { + err = vk_hevc_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &hp->vkh265_ref, &hp->h265_ref, pic, 1, i); + if (err < 0) + return err; + + continue; + } + + err = vk_hevc_fill_pict(avctx, &hp->ref_src[idx], &vp->ref_slots[idx], + &vp->refs[idx], &hp->vkh265_refs[idx], + &hp->h265_refs[idx], (HEVCFrame *)ref, 0, i); + if (err < 0) + return err; + + nb_refs++; + } + + memset(hp->h265pic.RefPicSetStCurrBefore, 0xff, 8); + for (int i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { + HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetStCurrBefore[i] = j; + break; + } + } + } + memset(hp->h265pic.RefPicSetStCurrAfter, 0xff, 8); + for (int i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { + HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetStCurrAfter[i] = j; + break; + } + } + } + memset(hp->h265pic.RefPicSetLtCurr, 0xff, 8); + for (int i = 0; i < h->rps[LT_CURR].nb_refs; i++) { + HEVCFrame *frame = h->rps[LT_CURR].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) { + const HEVCFrame *ref = &h->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetLtCurr[i] = j; + break; + } + } + } + + hp->h265_pic_info = (VkVideoDecodeH265PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR, + .pStdPictureInfo = &hp->h265pic, + .sliceSegmentCount = 0, + .pSliceSegmentOffsets = vp->slice_off, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &hp->h265_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = nb_refs, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->img_view_out, + }, + }; + + hp->ctx = ctx; + + return 0; +} + +static int vk_hevc_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + const HEVCContext *h = avctx->priv_data; + HEVCVulkanDecodePicture *hp = h->ref->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + int err = ff_vk_decode_add_slice(vp, data, size, 1, + &hp->h265_pic_info.sliceSegmentCount, + &hp->h265_pic_info.pSliceSegmentOffsets); + if (err < 0) + return err; + + return 0; +} + +static int vk_hevc_end_frame(AVCodecContext *avctx) +{ + const HEVCContext *h = avctx->priv_data; + HEVCFrame *pic = h->ref; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + FFVulkanDecodePicture *rvp[HEVC_MAX_REFS] = { 0 }; + AVFrame *rav[HEVC_MAX_REFS] = { 0 }; + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + HEVCVulkanDecodePicture *rfhp = hp->ref_src[i]->hwaccel_picture_private; + rav[i] = hp->ref_src[i]->frame; + rvp[i] = &rfhp->vp; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n", + vp->slices_size, hp->h265_pic_info.sliceSegmentCount); + + return ff_vk_decode_frame(avctx, pic->frame, vp, rav, rvp); +} + +static void vk_hevc_free_frame_priv(AVCodecContext *avctx, void *data) +{ + HEVCVulkanDecodePicture *hp = data; + + /* Free frame resources */ + ff_vk_decode_free_frame(hp->ctx, &hp->vp); + + /* Free frame context */ + av_free(hp); +} + +const AVHWAccel ff_hevc_vulkan_hwaccel = { + .name = "hevc_vulkan", + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_HEVC, + .pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_hevc_start_frame, + .decode_slice = &vk_hevc_decode_slice, + .end_frame = &vk_hevc_end_frame, + .free_frame_priv = &vk_hevc_free_frame_priv, + .frame_priv_data_size = sizeof(HEVCVulkanDecodePicture), + .init = &ff_vk_decode_init, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; -- 2.39.2 [-- Attachment #74: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2023-02-17 3:44 UTC|newest] Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-02-17 3:43 Lynne [this message] 2023-02-17 9:08 ` Jean-Baptiste Kempf 2023-02-17 9:45 ` Hendrik Leppkes 2023-02-17 10:45 ` Lynne 2023-02-17 11:04 ` Kieran Kunhya [not found] ` <CAK+ULv780c=z_dig_FAhPJ2poZ8u2_QOnnPUmV3SSiYoaQZ+tw@mail.gmail.com-NOU29aV----9> 2023-02-17 11:52 ` Lynne 2023-02-17 15:45 ` Michael Niedermayer 2023-02-17 16:35 ` Lynne 2023-02-18 19:02 ` Michael Niedermayer 2023-02-19 0:08 ` Lynne 2023-02-19 15:40 ` Michael Niedermayer 2023-02-19 15:44 ` Kieran Kunhya 2023-02-19 16:53 ` Lynne 2023-02-19 16:56 ` Jean-Baptiste Kempf 2023-02-19 16:58 ` Lynne 2023-02-19 17:02 ` Jean-Baptiste Kempf 2023-02-19 19:32 ` Niklas Haas 2023-02-19 18:50 ` Michael Niedermayer 2023-02-19 19:02 ` Lynne 2023-02-19 19:44 ` Michael Niedermayer 2023-02-19 20:00 ` Lynne 2023-02-19 20:14 ` Michael Niedermayer [not found] ` <NOea74V--3-9@lynne.ee-NOeaB9K--R-9> 2023-02-19 16:57 ` Lynne 2023-02-19 17:36 ` Kieran Kunhya 2023-02-19 17:42 ` Kieran Kunhya 2023-02-19 18:46 ` Lynne 2023-02-19 21:59 ` Kieran Kunhya 2023-02-19 23:50 ` Neal Gompa 2023-02-20 5:13 ` Jean-Baptiste Kempf 2023-02-20 9:18 ` Hendrik Leppkes 2023-02-20 16:51 ` Anton Khirnov 2023-02-20 16:56 ` Anton Khirnov 2023-02-20 17:21 ` Anton Khirnov 2023-02-20 17:40 ` Anton Khirnov
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=NOST85t--3-9@lynne.ee \ --to=dev@lynne.ee \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git