[FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding

* [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
@ 2023-02-17  3:43 Lynne
  2023-02-17  9:08 ` Jean-Baptiste Kempf
                   ` (5 more replies)
  0 siblings, 6 replies; 34+ messages in thread
From: Lynne @ 2023-02-17  3:43 UTC (permalink / raw)
  To: Ffmpeg Devel

[-- Attachment #1: Type: text/plain, Size: 338 bytes --]

This small patchset mostly rewrites Vulkan to enable using multiplane images,
and implements video decode support. Also, many numerous bugs and issues
were fixed, as well as having quite a lot of performance improvements.

The patchset can be viewed here as well:
https://github.com/cyanreg/FFmpeg/tree/vulkan_staging

Patches attached.


[-- Attachment #2: 0001-h2645_vui-expose-aspect_ratio_idc.patch --]
[-- Type: text/x-diff, Size: 1857 bytes --]

From a03d8aa0e2aa961183440e85de3f4922b14f8075 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:02:11 +0100
Subject: [PATCH 01/72] h2645_vui: expose aspect_ratio_idc

---
 libavcodec/h2645_vui.c | 10 +++++-----
 libavcodec/h2645_vui.h |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c
index 0633fcbddd..93e83a9e1f 100644
--- a/libavcodec/h2645_vui.c
+++ b/libavcodec/h2645_vui.c
@@ -42,15 +42,15 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *l
 
     aspect_ratio_info_present_flag = get_bits1(gb);
     if (aspect_ratio_info_present_flag) {
-        uint8_t aspect_ratio_idc = get_bits(gb, 8);
-        if (aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
-            vui->sar = ff_h2645_pixel_aspect[aspect_ratio_idc];
-        else if (aspect_ratio_idc == EXTENDED_SAR) {
+        vui->aspect_ratio_idc = get_bits(gb, 8);
+        if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
+            vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc];
+        else if (vui->aspect_ratio_idc == EXTENDED_SAR) {
             vui->sar.num = get_bits(gb, 16);
             vui->sar.den = get_bits(gb, 16);
         } else
             av_log(logctx, AV_LOG_WARNING,
-                   "Unknown SAR index: %u.\n", aspect_ratio_idc);
+                   "Unknown SAR index: %u.\n", vui->aspect_ratio_idc);
     } else
         vui->sar = (AVRational){ 0, 1 };
 
diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h
index 638da7c366..f1aeab7758 100644
--- a/libavcodec/h2645_vui.h
+++ b/libavcodec/h2645_vui.h
@@ -26,6 +26,7 @@
 
 typedef struct H2645VUI {
     AVRational sar;
+    int aspect_ratio_idc;
 
     int overscan_info_present_flag;
     int overscan_appropriate_flag;
-- 
2.39.2


[-- Attachment #3: 0002-h2645_vui-expose-aspect_ratio_info_present_flag.patch --]
[-- Type: text/x-diff, Size: 1469 bytes --]

From 42ff928100caea41ffa55ea2c8a8181de39306b7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:03:44 +0100
Subject: [PATCH 02/72] h2645_vui: expose aspect_ratio_info_present_flag

---
 libavcodec/h2645_vui.c | 6 ++----
 libavcodec/h2645_vui.h | 1 +
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c
index 93e83a9e1f..e5c7bf46f9 100644
--- a/libavcodec/h2645_vui.c
+++ b/libavcodec/h2645_vui.c
@@ -36,12 +36,10 @@
 
 void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *logctx)
 {
-    int aspect_ratio_info_present_flag;
-
     av_log(logctx, AV_LOG_DEBUG, "Decoding VUI\n");
 
-    aspect_ratio_info_present_flag = get_bits1(gb);
-    if (aspect_ratio_info_present_flag) {
+    vui->aspect_ratio_info_present_flag = get_bits1(gb);
+    if (vui->aspect_ratio_info_present_flag) {
         vui->aspect_ratio_idc = get_bits(gb, 8);
         if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
             vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc];
diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h
index f1aeab7758..2c839f4b01 100644
--- a/libavcodec/h2645_vui.h
+++ b/libavcodec/h2645_vui.h
@@ -27,6 +27,7 @@
 typedef struct H2645VUI {
     AVRational sar;
     int aspect_ratio_idc;
+    int aspect_ratio_info_present_flag;
 
     int overscan_info_present_flag;
     int overscan_appropriate_flag;
-- 
2.39.2


[-- Attachment #4: 0003-h264_ps-expose-pps_id.patch --]
[-- Type: text/x-diff, Size: 1226 bytes --]

From 5e115cd41e2221cc8048932dfed362be6f80b74b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 18 Mar 2022 15:11:02 +0100
Subject: [PATCH 03/72] h264_ps: expose pps_id

---
 libavcodec/h264_ps.c | 1 +
 libavcodec/h264_ps.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index d0d1e65903..4ec5bd4e80 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -731,6 +731,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
     if (!(bit_length & 7) && pps->data_size < sizeof(pps->data))
         pps->data[pps->data_size++] = 0x80;
 
+    pps->pps_id = pps_id;
     pps->sps_id = get_ue_golomb_31(gb);
     if ((unsigned)pps->sps_id >= MAX_SPS_COUNT ||
         !ps->sps_list[pps->sps_id]) {
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 5c35761fbc..c3f0888f24 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -103,6 +103,7 @@ typedef struct SPS {
  * Picture parameter set
  */
 typedef struct PPS {
+    unsigned int pps_id;
     unsigned int sps_id;
     int cabac;                  ///< entropy_coding_mode_flag
     int pic_order_present;      ///< pic_order_present_flag
-- 
2.39.2


[-- Attachment #5: 0004-h264_ps-set-pic_scaling_matrix_present_flag.patch --]
[-- Type: text/x-diff, Size: 3223 bytes --]

From 2720b9ff2a3d95c5d5887c2e06161de1691fc085 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 18 Mar 2022 16:17:33 +0100
Subject: [PATCH 04/72] h264_ps: set pic_scaling_matrix_present_flag

---
 libavcodec/h264_ps.c | 7 +++++--
 libavcodec/h264_ps.h | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index 4ec5bd4e80..a94f5350c4 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -226,6 +226,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
 /* returns non zero if the provided SPS scaling matrix has been filled */
 static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
                                     const PPS *pps, int is_sps,
+                                    int present_flag,
                                     uint8_t(*scaling_matrix4)[16],
                                     uint8_t(*scaling_matrix8)[64])
 {
@@ -237,7 +238,7 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
         fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
     };
     int ret = 0;
-    if (get_bits1(gb)) {
+    if (present_flag) {
         ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]);        // Intra, Y
         ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr
         ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb
@@ -368,7 +369,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx,
             goto fail;
         }
         sps->transform_bypass = get_bits1(gb);
-        ret = decode_scaling_matrices(gb, sps, NULL, 1,
+        ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb),
                                       sps->scaling_matrix4, sps->scaling_matrix8);
         if (ret < 0)
             goto fail;
@@ -803,7 +804,9 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
     bits_left = bit_length - get_bits_count(gb);
     if (bits_left > 0 && more_rbsp_data_in_pps(sps, avctx)) {
         pps->transform_8x8_mode = get_bits1(gb);
+        pps->pic_scaling_matrix_present_flag = get_bits1(gb);
         ret = decode_scaling_matrices(gb, sps, pps, 0,
+                                pps->pic_scaling_matrix_present_flag,
                                 pps->scaling_matrix4, pps->scaling_matrix8);
         if (ret < 0)
             goto fail;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index c3f0888f24..d2413ae0f8 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -119,6 +119,7 @@ typedef struct PPS {
     int constrained_intra_pred;     ///< constrained_intra_pred_flag
     int redundant_pic_cnt_present;  ///< redundant_pic_cnt_present_flag
     int transform_8x8_mode;         ///< transform_8x8_mode_flag
+    int pic_scaling_matrix_present_flag;
     uint8_t scaling_matrix4[6][16];
     uint8_t scaling_matrix8[6][64];
     uint8_t chroma_qp_table[2][QP_MAX_NUM+1];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
-- 
2.39.2


[-- Attachment #6: 0005-h264_parser-expose-idr_pic_id.patch --]
[-- Type: text/x-diff, Size: 1437 bytes --]

From a9ae85816dfaa8791f974348825fc8ba9209423d Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 18:08:53 +0100
Subject: [PATCH 05/72] h264_parser: expose idr_pic_id

Vulkan needs it.
---
 libavcodec/h264_parse.h  | 1 +
 libavcodec/h264_parser.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_parse.h b/libavcodec/h264_parse.h
index 4ee863df66..4ba0add4f2 100644
--- a/libavcodec/h264_parse.h
+++ b/libavcodec/h264_parse.h
@@ -85,6 +85,7 @@ typedef struct H264POCContext {
     int delta_poc_bottom;
     int delta_poc[2];
     int frame_num;
+    int idr_pic_id;
     int prev_poc_msb;           ///< poc_msb of the last reference pic for POC type 0
     int prev_poc_lsb;           ///< poc_lsb of the last reference pic for POC type 0
     int frame_num_offset;       ///< for POC type 2
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index 46134a1c48..1c330484c1 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -432,7 +432,7 @@ static inline int parse_nal_units(AVCodecParserContext *s,
             }
 
             if (nal.type == H264_NAL_IDR_SLICE)
-                get_ue_golomb_long(&nal.gb); /* idr_pic_id */
+                p->poc.idr_pic_id = get_ue_golomb_long(&nal.gb); /* idr_pic_id */
             if (sps->poc_type == 0) {
                 p->poc.poc_lsb = get_bits(&nal.gb, sps->log2_max_poc_lsb);
 
-- 
2.39.2


[-- Attachment #7: 0006-h264_ps-comment-pic_order_present-better.patch --]
[-- Type: text/x-diff, Size: 997 bytes --]

From e42521563191a899d21fbf24e461bc6cb89661e9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:59:23 +0100
Subject: [PATCH 06/72] h264_ps: comment pic_order_present better

The official name which CBS uses is bottom_field_pic_order_in_frame_present_flag.
---
 libavcodec/h264_ps.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index d2413ae0f8..de4529b353 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -106,7 +106,7 @@ typedef struct PPS {
     unsigned int pps_id;
     unsigned int sps_id;
     int cabac;                  ///< entropy_coding_mode_flag
-    int pic_order_present;      ///< pic_order_present_flag
+    int pic_order_present;      ///< bottom_field_pic_order_in_frame_present_flag
     int slice_group_count;      ///< num_slice_groups_minus1 + 1
     int mb_slice_group_map_type;
     unsigned int ref_count[2];  ///< num_ref_idx_l0/1_active_minus1 + 1
-- 
2.39.2


[-- Attachment #8: 0007-h264_ps-expose-max_dec_frame_buffering.patch --]
[-- Type: text/x-diff, Size: 1396 bytes --]

From e222eaa26f4d8fd36dd04525d754dbf4800c502a Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:06:04 +0100
Subject: [PATCH 07/72] h264_ps: expose max_dec_frame_buffering

---
 libavcodec/h264_ps.c | 2 +-
 libavcodec/h264_ps.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index a94f5350c4..d9df570718 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -176,7 +176,7 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx,
         get_ue_golomb_31(gb); /* log2_max_mv_length_horizontal */
         get_ue_golomb_31(gb); /* log2_max_mv_length_vertical */
         sps->num_reorder_frames = get_ue_golomb_31(gb);
-        get_ue_golomb_31(gb); /*max_dec_frame_buffering*/
+        sps->max_dec_frame_buffering = get_ue_golomb_31(gb);
 
         if (get_bits_left(gb) < 0) {
             sps->num_reorder_frames         = 0;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index de4529b353..906bab7214 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -80,6 +80,7 @@ typedef struct SPS {
     int32_t offset_for_ref_frame[256];
     int bitstream_restriction_flag;
     int num_reorder_frames;
+    int max_dec_frame_buffering;
     int scaling_matrix_present;
     uint8_t scaling_matrix4[6][16];
     uint8_t scaling_matrix8[6][64];
-- 
2.39.2


[-- Attachment #9: 0008-h264_ps-expose-bit-rate-and-CPB-size-fields.patch --]
[-- Type: text/x-diff, Size: 2114 bytes --]

From 1279c6011c610fdb054cd9eea7a6f07c94f69f29 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:09:08 +0100
Subject: [PATCH 08/72] h264_ps: expose bit rate and CPB size fields

---
 libavcodec/h264_ps.c | 8 ++++----
 libavcodec/h264_ps.h | 4 ++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index d9df570718..fc8715876a 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -113,12 +113,12 @@ static inline int decode_hrd_parameters(GetBitContext *gb, void *logctx,
         return AVERROR_INVALIDDATA;
     }
 
-    get_bits(gb, 4); /* bit_rate_scale */
+    sps->bit_rate_scale = get_bits(gb, 4);
     get_bits(gb, 4); /* cpb_size_scale */
     for (i = 0; i < cpb_count; i++) {
-        get_ue_golomb_long(gb); /* bit_rate_value_minus1 */
-        get_ue_golomb_long(gb); /* cpb_size_value_minus1 */
-        get_bits1(gb);          /* cbr_flag */
+        sps->bit_rate_value[i] = get_ue_golomb_long(gb) + 1; /* bit_rate_value_minus1 + 1 */
+        sps->cpb_size_value[i] = get_ue_golomb_long(gb) + 1; /* cpb_size_value_minus1 + 1 */
+        sps->cpr_flag[i]       = get_bits1(gb);
     }
     sps->initial_cpb_removal_delay_length = get_bits(gb, 5) + 1;
     sps->cpb_removal_delay_length         = get_bits(gb, 5) + 1;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 906bab7214..03bd0227d6 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -89,6 +89,10 @@ typedef struct SPS {
     int pic_struct_present_flag;
     int time_offset_length;
     int cpb_cnt;                          ///< See H.264 E.1.2
+    int bit_rate_scale;
+    uint32_t bit_rate_value[32];          ///< bit_rate_value_minus1 + 1
+    uint32_t cpb_size_value[32];          ///< cpb_size_value_minus1 + 1
+    uint8_t cpr_flag[32];
     int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1
     int cpb_removal_delay_length;         ///< cpb_removal_delay_length_minus1 + 1
     int dpb_output_delay_length;          ///< dpb_output_delay_length_minus1 + 1
-- 
2.39.2


[-- Attachment #10: 0009-h264_ps-expose-scaling_matrix_present_mask.patch --]
[-- Type: text/x-diff, Size: 7404 bytes --]

From 3ef9965fe2fa33942eb5b5def748f3f6bf9e0afb Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 17:05:35 +0100
Subject: [PATCH 09/72] h264_ps: expose scaling_matrix_present_mask

Vulkan requires it.
It technically also requires use_default_scaling_matrix_mask,
but we can just be explicit and give it the matrix we fill in as-non
default.
---
 libavcodec/h264_ps.c | 37 +++++++++++++++++++++----------------
 libavcodec/h264_ps.h |  2 ++
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index fc8715876a..9f26514167 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -197,12 +197,14 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx,
 }
 
 static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
-                                const uint8_t *jvt_list,
-                                const uint8_t *fallback_list)
+                               const uint8_t *jvt_list, const uint8_t *fallback_list,
+                               uint16_t *mask, int pos)
 {
     int i, last = 8, next = 8;
     const uint8_t *scan = size == 16 ? ff_zigzag_scan : ff_zigzag_direct;
-    if (!get_bits1(gb)) /* matrix not written, we use the predicted one */
+    uint16_t seq_scaling_list_present_flag = get_bits1(gb);
+    *mask |= (seq_scaling_list_present_flag << pos);
+    if (!seq_scaling_list_present_flag) /* matrix not written, we use the predicted one */
         memcpy(factors, fallback_list, size * sizeof(uint8_t));
     else
         for (i = 0; i < size; i++) {
@@ -226,7 +228,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
 /* returns non zero if the provided SPS scaling matrix has been filled */
 static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
                                     const PPS *pps, int is_sps,
-                                    int present_flag,
+                                    int present_flag, uint16_t *mask,
                                     uint8_t(*scaling_matrix4)[16],
                                     uint8_t(*scaling_matrix8)[64])
 {
@@ -238,21 +240,22 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
         fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
     };
     int ret = 0;
+    *mask = 0x0;
     if (present_flag) {
-        ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]);        // Intra, Y
-        ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr
-        ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb
-        ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1]);        // Inter, Y
-        ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3]); // Inter, Cr
-        ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4]); // Inter, Cb
+        ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0], mask, 0);        // Intra, Y
+        ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0], mask, 1); // Intra, Cr
+        ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1], mask, 2); // Intra, Cb
+        ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1], mask, 3);        // Inter, Y
+        ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3], mask, 4); // Inter, Cr
+        ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4], mask, 5); // Inter, Cb
         if (is_sps || pps->transform_8x8_mode) {
-            ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2]); // Intra, Y
-            ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3]); // Inter, Y
+            ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2], mask, 6); // Intra, Y
+            ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3], mask, 7); // Inter, Y
             if (sps->chroma_format_idc == 3) {
-                ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0]); // Intra, Cr
-                ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3]); // Inter, Cr
-                ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1]); // Intra, Cb
-                ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4]); // Inter, Cb
+                ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0], mask,  8); // Intra, Cr
+                ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3], mask,  9); // Inter, Cr
+                ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1], mask, 10); // Intra, Cb
+                ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4], mask, 11); // Inter, Cb
             }
         }
         if (!ret)
@@ -370,6 +373,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx,
         }
         sps->transform_bypass = get_bits1(gb);
         ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb),
+                                      &sps->scaling_matrix_present_mask,
                                       sps->scaling_matrix4, sps->scaling_matrix8);
         if (ret < 0)
             goto fail;
@@ -807,6 +811,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
         pps->pic_scaling_matrix_present_flag = get_bits1(gb);
         ret = decode_scaling_matrices(gb, sps, pps, 0,
                                 pps->pic_scaling_matrix_present_flag,
+                                &pps->pic_scaling_matrix_present_mask,
                                 pps->scaling_matrix4, pps->scaling_matrix8);
         if (ret < 0)
             goto fail;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 03bd0227d6..60ca9b3cd7 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -82,6 +82,7 @@ typedef struct SPS {
     int num_reorder_frames;
     int max_dec_frame_buffering;
     int scaling_matrix_present;
+    uint16_t scaling_matrix_present_mask;
     uint8_t scaling_matrix4[6][16];
     uint8_t scaling_matrix8[6][64];
     int nal_hrd_parameters_present_flag;
@@ -125,6 +126,7 @@ typedef struct PPS {
     int redundant_pic_cnt_present;  ///< redundant_pic_cnt_present_flag
     int transform_8x8_mode;         ///< transform_8x8_mode_flag
     int pic_scaling_matrix_present_flag;
+    uint16_t pic_scaling_matrix_present_mask;
     uint8_t scaling_matrix4[6][16];
     uint8_t scaling_matrix8[6][64];
     uint8_t chroma_qp_table[2][QP_MAX_NUM+1];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
-- 
2.39.2


[-- Attachment #11: 0010-h264dec-track-picture_structure-in-H264Picture.patch --]
[-- Type: text/x-diff, Size: 2132 bytes --]

From 52ab3cd8d165a838be92189c87c54915efc1c7e5 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 05:20:32 +0100
Subject: [PATCH 10/72] h264dec: track picture_structure in H264Picture

---
 libavcodec/h264_picture.c | 1 +
 libavcodec/h264_slice.c   | 1 +
 libavcodec/h264dec.h      | 1 +
 3 files changed, 3 insertions(+)

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index 2661ff4698..0348166c43 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -80,6 +80,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
     dst->mbaff         = src->mbaff;
     dst->field_picture = src->field_picture;
     dst->reference     = src->reference;
+    dst->picture_structure = src->picture_structure;
     dst->recovered     = src->recovered;
     dst->invalid_gap   = src->invalid_gap;
     dst->sei_recovery_frame_cnt = src->sei_recovery_frame_cnt;
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 6188c74632..8ac66b343c 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -491,6 +491,7 @@ static int h264_frame_start(H264Context *h)
     pic->reference              = h->droppable ? 0 : h->picture_structure;
     pic->f->coded_picture_number = h->coded_picture_number++;
     pic->field_picture          = h->picture_structure != PICT_FRAME;
+    pic->picture_structure       = h->picture_structure;
     pic->frame_num               = h->poc.frame_num;
     /*
      * Zero key_frame here; IDR markings per slice in frame or fields are ORed
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 9a1ec1bace..1b18aba71f 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -137,6 +137,7 @@ typedef struct H264Picture {
     int ref_count[2][2];    ///< number of entries in ref_poc         (FIXME need per slice)
     int mbaff;              ///< 1 -> MBAFF frame 0-> not MBAFF
     int field_picture;      ///< whether or not picture was encoded in separate fields
+    int picture_structure;  ///< picture structure
 
 /**
  * H264Picture.reference has this flag set,
-- 
2.39.2


[-- Attachment #12: 0011-hevc_ps-expose-SPS-and-VPS-headers.patch --]
[-- Type: text/x-diff, Size: 9068 bytes --]

From d80272e0759b686942f51b1c0c7615edb6a81bc6 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 01:29:57 +0100
Subject: [PATCH 11/72] hevc_ps: expose SPS and VPS headers

---
 libavcodec/hevc_ps.c | 100 ++++++++++++++++++++++---------------------
 libavcodec/hevc_ps.h |  41 ++++++++++++++++++
 2 files changed, 93 insertions(+), 48 deletions(-)

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 5fe62ec35b..bd1f278b06 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -355,81 +355,84 @@ static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx,
 }
 
 static void decode_sublayer_hrd(GetBitContext *gb, unsigned int nb_cpb,
-                                int subpic_params_present)
+                                HEVCSublayerHdrParams *par, int subpic_params_present)
 {
     int i;
 
     for (i = 0; i < nb_cpb; i++) {
-        get_ue_golomb_long(gb); // bit_rate_value_minus1
-        get_ue_golomb_long(gb); // cpb_size_value_minus1
+        par->bit_rate_value_minus1[i] = get_ue_golomb_long(gb);
+        par->cpb_size_value_minus1[i] = get_ue_golomb_long(gb);
 
         if (subpic_params_present) {
-            get_ue_golomb_long(gb); // cpb_size_du_value_minus1
-            get_ue_golomb_long(gb); // bit_rate_du_value_minus1
+            par->cpb_size_du_value_minus1[i] = get_ue_golomb_long(gb);
+            par->bit_rate_du_value_minus1[i] = get_ue_golomb_long(gb);
         }
-        skip_bits1(gb); // cbr_flag
+
+        par->cbr_flag = get_bits1(gb);
     }
 }
 
 static int decode_hrd(GetBitContext *gb, int common_inf_present,
-                       int max_sublayers)
+                      HEVCHdrParams *hdr, int max_sublayers)
 {
-    int nal_params_present = 0, vcl_params_present = 0;
-    int subpic_params_present = 0;
-    int i;
-
     if (common_inf_present) {
-        nal_params_present = get_bits1(gb);
-        vcl_params_present = get_bits1(gb);
-
-        if (nal_params_present || vcl_params_present) {
-            subpic_params_present = get_bits1(gb);
-
-            if (subpic_params_present) {
-                skip_bits(gb, 8); // tick_divisor_minus2
-                skip_bits(gb, 5); // du_cpb_removal_delay_increment_length_minus1
-                skip_bits(gb, 1); // sub_pic_cpb_params_in_pic_timing_sei_flag
-                skip_bits(gb, 5); // dpb_output_delay_du_length_minus1
+        hdr->flags.nal_hrd_parameters_present_flag = get_bits1(gb);
+        hdr->flags.vcl_hrd_parameters_present_flag = get_bits1(gb);
+
+        if (hdr->flags.nal_hrd_parameters_present_flag ||
+            hdr->flags.vcl_hrd_parameters_present_flag) {
+            hdr->flags.sub_pic_hrd_params_present_flag = get_bits1(gb);
+
+            if (hdr->flags.sub_pic_hrd_params_present_flag) {
+                hdr->tick_divisor_minus2 = get_bits(gb, 8);
+                hdr->du_cpb_removal_delay_increment_length_minus1 = get_bits(gb, 5);
+                hdr->flags.sub_pic_cpb_params_in_pic_timing_sei_flag = get_bits1(gb);
+                hdr->dpb_output_delay_du_length_minus1 = get_bits(gb, 5);
             }
 
-            skip_bits(gb, 4); // bit_rate_scale
-            skip_bits(gb, 4); // cpb_size_scale
+            hdr->bit_rate_scale = get_bits(gb, 4);
+            hdr->cpb_size_scale = get_bits(gb, 4);
 
-            if (subpic_params_present)
-                skip_bits(gb, 4);  // cpb_size_du_scale
+            if (hdr->flags.sub_pic_hrd_params_present_flag)
+                hdr->cpb_size_du_scale = get_bits(gb, 4);
 
-            skip_bits(gb, 5); // initial_cpb_removal_delay_length_minus1
-            skip_bits(gb, 5); // au_cpb_removal_delay_length_minus1
-            skip_bits(gb, 5); // dpb_output_delay_length_minus1
+            hdr->initial_cpb_removal_delay_length_minus1 = get_bits(gb, 5);
+            hdr->au_cpb_removal_delay_length_minus1 = get_bits(gb, 5);
+            hdr->dpb_output_delay_length_minus1 = get_bits(gb, 5);
         }
     }
 
-    for (i = 0; i < max_sublayers; i++) {
-        int low_delay = 0;
-        unsigned int nb_cpb = 1;
-        int fixed_rate = get_bits1(gb);
+    for (int i = 0; i < max_sublayers; i++) {
+        hdr->flags.fixed_pic_rate_general_flag = get_bits1(gb);
+
+        hdr->cpb_cnt_minus1[i] = 1;
 
-        if (!fixed_rate)
-            fixed_rate = get_bits1(gb);
+        if (!hdr->flags.fixed_pic_rate_general_flag)
+            hdr->flags.fixed_pic_rate_within_cvs_flag = get_bits1(gb);
 
-        if (fixed_rate)
-            get_ue_golomb_long(gb);  // elemental_duration_in_tc_minus1
+        if (hdr->flags.fixed_pic_rate_within_cvs_flag)
+            hdr->elemental_duration_in_tc_minus1[i] = get_ue_golomb_long(gb);
         else
-            low_delay = get_bits1(gb);
+            hdr->flags.low_delay_hrd_flag = get_bits1(gb);
 
-        if (!low_delay) {
-            nb_cpb = get_ue_golomb_long(gb) + 1;
-            if (nb_cpb < 1 || nb_cpb > 32) {
-                av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb);
+        if (!hdr->flags.low_delay_hrd_flag) {
+            hdr->cpb_cnt_minus1[i] = get_ue_golomb_long(gb);
+            if (hdr->cpb_cnt_minus1[i] > 31) {
+                av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n",
+                       hdr->cpb_cnt_minus1[i]);
                 return AVERROR_INVALIDDATA;
             }
         }
 
-        if (nal_params_present)
-            decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
-        if (vcl_params_present)
-            decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
+        if (hdr->flags.nal_hrd_parameters_present_flag)
+            decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->nal_params[i],
+                                hdr->flags.sub_pic_hrd_params_present_flag);
+
+        if (hdr->flags.vcl_hrd_parameters_present_flag)
+            decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->vcl_params[i],
+                                hdr->flags.sub_pic_hrd_params_present_flag);
     }
+
     return 0;
 }
 
@@ -536,7 +539,8 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
             get_ue_golomb_long(gb); // hrd_layer_set_idx
             if (i)
                 common_inf_present = get_bits1(gb);
-            decode_hrd(gb, common_inf_present, vps->vps_max_sub_layers);
+            decode_hrd(gb, common_inf_present, &vps->hdr[i],
+                       vps->vps_max_sub_layers);
         }
     }
     get_bits1(gb); /* vps_extension_flag */
@@ -655,7 +659,7 @@ timing_info:
             vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb);
         vui->vui_hrd_parameters_present_flag = get_bits1(gb);
         if (vui->vui_hrd_parameters_present_flag)
-            decode_hrd(gb, 1, sps->max_sub_layers);
+            decode_hrd(gb, 1, &sps->hdr, sps->max_sub_layers);
     }
 
     vui->bitstream_restriction_flag = get_bits1(gb);
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 18894cfed1..b61d3b32b3 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -32,6 +32,43 @@
 #include "h2645_vui.h"
 #include "hevc.h"
 
+typedef struct HEVCSublayerHdrParams {
+    uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t cpb_size_du_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t bit_rate_du_value_minus1[HEVC_MAX_CPB_CNT];
+    uint32_t cbr_flag;
+} HEVCSublayerHdrParams;
+
+typedef struct HEVCHdrFlagParams {
+    uint32_t nal_hrd_parameters_present_flag;
+    uint32_t vcl_hrd_parameters_present_flag;
+    uint32_t sub_pic_hrd_params_present_flag;
+    uint32_t sub_pic_cpb_params_in_pic_timing_sei_flag;
+    uint32_t fixed_pic_rate_general_flag;
+    uint32_t fixed_pic_rate_within_cvs_flag;
+    uint32_t low_delay_hrd_flag;
+} HEVCHdrFlagParams;
+
+typedef struct HEVCHdrParams {
+    HEVCHdrFlagParams flags;
+
+    uint8_t tick_divisor_minus2;
+    uint8_t du_cpb_removal_delay_increment_length_minus1;
+    uint8_t dpb_output_delay_du_length_minus1;
+    uint8_t bit_rate_scale;
+    uint8_t cpb_size_scale;
+    uint8_t cpb_size_du_scale;
+    uint8_t initial_cpb_removal_delay_length_minus1;
+    uint8_t au_cpb_removal_delay_length_minus1;
+    uint8_t dpb_output_delay_length_minus1;
+    uint8_t cpb_cnt_minus1[HEVC_MAX_SUB_LAYERS];
+    uint16_t elemental_duration_in_tc_minus1[HEVC_MAX_SUB_LAYERS];
+
+    HEVCSublayerHdrParams nal_params[HEVC_MAX_SUB_LAYERS];
+    HEVCSublayerHdrParams vcl_params[HEVC_MAX_SUB_LAYERS];
+} HEVCHdrParams;
+
 typedef struct ShortTermRPS {
     unsigned int num_negative_pics;
     int num_delta_pocs;
@@ -108,6 +145,8 @@ typedef struct PTL {
 } PTL;
 
 typedef struct HEVCVPS {
+    HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS];
+
     uint8_t vps_temporal_id_nesting_flag;
     int vps_max_layers;
     int vps_max_sub_layers; ///< vps_max_temporal_layers_minus1 + 1
@@ -146,6 +185,8 @@ typedef struct HEVCSPS {
 
     HEVCWindow pic_conf_win;
 
+    HEVCHdrParams hdr;
+
     int bit_depth;
     int bit_depth_chroma;
     int pixel_shift;
-- 
2.39.2


[-- Attachment #13: 0012-hevc_ps-expose-pps_id.patch --]
[-- Type: text/x-diff, Size: 1213 bytes --]

From d6e2ac33861642ac5dfa651963874c0f65d9b49b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 05:33:29 +0100
Subject: [PATCH 12/72] hevc_ps: expose pps_id

---
 libavcodec/hevc_ps.c | 2 +-
 libavcodec/hevc_ps.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index bd1f278b06..3242904473 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -1486,7 +1486,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
     pps->log2_max_transform_skip_block_size    = 2;
 
     // Coded parameters
-    pps_id = get_ue_golomb_long(gb);
+    pps_id = pps->pps_id = get_ue_golomb_long(gb);
     if (pps_id >= HEVC_MAX_PPS_COUNT) {
         av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
         ret = AVERROR_INVALIDDATA;
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index b61d3b32b3..4cfcbcf9ae 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -275,6 +275,7 @@ typedef struct HEVCSPS {
 } HEVCSPS;
 
 typedef struct HEVCPPS {
+    unsigned int pps_id;
     unsigned int sps_id; ///< seq_parameter_set_id
 
     uint8_t sign_data_hiding_flag;
-- 
2.39.2


[-- Attachment #14: 0013-hevc_ps-expose-vps_id.patch --]
[-- Type: text/x-diff, Size: 1162 bytes --]

From a09e6d7611f6e89ea3107c4581b27715a7ca480d Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 06:42:44 +0100
Subject: [PATCH 13/72] hevc_ps: expose vps_id

---
 libavcodec/hevc_ps.c | 2 +-
 libavcodec/hevc_ps.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 3242904473..a26f2940fc 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -462,7 +462,7 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
     }
     memcpy(vps->data, gb->buffer, vps->data_size);
 
-    vps_id = get_bits(gb, 4);
+    vps_id = vps->vps_id = get_bits(gb, 4);
 
     if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits
         av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 4cfcbcf9ae..571657d7fd 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -145,6 +145,7 @@ typedef struct PTL {
 } PTL;
 
 typedef struct HEVCVPS {
+    unsigned int vps_id;
     HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS];
 
     uint8_t vps_temporal_id_nesting_flag;
-- 
2.39.2


[-- Attachment #15: 0014-hevc_ps-expose-pps_extension_present_flag.patch --]
[-- Type: text/x-diff, Size: 1512 bytes --]

From 73a6b7e49ba8f01aefe2b7c152b2e2d04edaa3ee Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 12:49:45 +0100
Subject: [PATCH 14/72] hevc_ps: expose pps_extension_present_flag

---
 libavcodec/hevc_ps.c | 3 ++-
 libavcodec/hevc_ps.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index a26f2940fc..b1247bad67 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -1659,7 +1659,8 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
 
     pps->slice_header_extension_present_flag = get_bits1(gb);
 
-    if (get_bits1(gb)) { // pps_extension_present_flag
+    pps->pps_extension_present_flag = get_bits1(gb);
+    if (pps->pps_extension_present_flag) {
         pps->pps_range_extensions_flag = get_bits1(gb);
         skip_bits(gb, 7); // pps_extension_7bits
         if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 571657d7fd..f221640531 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -326,6 +326,7 @@ typedef struct HEVCPPS {
     int num_extra_slice_header_bits;
     uint8_t slice_header_extension_present_flag;
     uint8_t log2_max_transform_skip_block_size;
+    uint8_t pps_extension_present_flag;
     uint8_t pps_range_extensions_flag;
     uint8_t cross_component_prediction_enabled_flag;
     uint8_t chroma_qp_offset_list_enabled_flag;
-- 
2.39.2


[-- Attachment #16: 0015-hevcdec-expose-bits_used_for_short_term_rps.patch --]
[-- Type: text/x-diff, Size: 1228 bytes --]

From 68e33940f494112e359f6a0a769083c1dd82a1c4 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 17:11:36 +0100
Subject: [PATCH 15/72] hevcdec: expose bits_used_for_short_term_rps

---
 libavcodec/hevcdec.c | 1 +
 libavcodec/hevcdec.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 567e8d81d4..43cd963175 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -702,6 +702,7 @@ static int hls_slice_header(HEVCContext *s)
                 if (ret < 0)
                     return ret;
 
+                sh->bits_used_for_short_term_rps = pos - get_bits_left(gb);
                 sh->short_term_rps = &sh->slice_rps;
             } else {
                 int numbits, rps_idx;
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 9d3f4adbb3..15c4113bdd 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -268,6 +268,7 @@ typedef struct SliceHeader {
 
     ///< RPS coded in the slice header itself is stored here
     int short_term_ref_pic_set_sps_flag;
+    int bits_used_for_short_term_rps;
     int short_term_ref_pic_set_size;
     ShortTermRPS slice_rps;
     const ShortTermRPS *short_term_rps;
-- 
2.39.2


[-- Attachment #17: 0016-hevc_ps-expose-vui_present-sublayer_ordering_info-co.patch --]
[-- Type: text/x-diff, Size: 4332 bytes --]

From 46f18bf6af9e8ed0aaa82085a06b31dc8565e0df Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:17:51 +0100
Subject: [PATCH 16/72] hevc_ps: expose vui_present, sublayer_ordering_info,
 conformance_window_flag

---
 libavcodec/hevc_ps.c | 18 ++++++++++--------
 libavcodec/hevc_ps.h |  4 ++++
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index b1247bad67..a740da9f82 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -855,7 +855,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
     HEVCWindow *ow;
     int ret = 0;
     int log2_diff_max_min_transform_block_size;
-    int bit_depth_chroma, start, vui_present, sublayer_ordering_info;
+    int bit_depth_chroma, start;
     int i;
 
     // Coded parameters
@@ -904,7 +904,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
                                    sps->height, 0, avctx)) < 0)
         return ret;
 
-    if (get_bits1(gb)) { // pic_conformance_flag
+    sps->conformance_window_flag = get_bits1(gb);
+    if (sps->conformance_window_flag) { // pic_conformance_flag
         int vert_mult  = hevc_sub_height_c[sps->chroma_format_idc];
         int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc];
         sps->pic_conf_win.left_offset   = get_ue_golomb_long(gb) * horiz_mult;
@@ -951,8 +952,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
         return AVERROR_INVALIDDATA;
     }
 
-    sublayer_ordering_info = get_bits1(gb);
-    start = sublayer_ordering_info ? 0 : sps->max_sub_layers - 1;
+    sps->sublayer_ordering_info_flag = get_bits1(gb);
+    start = sps->sublayer_ordering_info_flag ? 0 : sps->max_sub_layers - 1;
     for (i = start; i < sps->max_sub_layers; i++) {
         sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1;
         sps->temporal_layer[i].num_reorder_pics      = get_ue_golomb_long(gb);
@@ -973,7 +974,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
         }
     }
 
-    if (!sublayer_ordering_info) {
+    if (!sps->sublayer_ordering_info_flag) {
         for (i = 0; i < start; i++) {
             sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering;
             sps->temporal_layer[i].num_reorder_pics      = sps->temporal_layer[start].num_reorder_pics;
@@ -1015,7 +1016,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
     if (sps->scaling_list_enable_flag) {
         set_default_scaling_list_data(&sps->scaling_list);
 
-        if (get_bits1(gb)) {
+        sps->scaling_list_data_present_flag = get_bits1(gb);
+        if (sps->scaling_list_data_present_flag) {
             ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps);
             if (ret < 0)
                 return ret;
@@ -1071,8 +1073,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
     sps->sps_temporal_mvp_enabled_flag          = get_bits1(gb);
     sps->sps_strong_intra_smoothing_enable_flag = get_bits1(gb);
     sps->vui.common.sar = (AVRational){0, 1};
-    vui_present = get_bits1(gb);
-    if (vui_present)
+    sps->vui_present = get_bits1(gb);
+    if (sps->vui_present)
         decode_vui(gb, avctx, apply_defdispwin, sps);
 
     if (get_bits1(gb)) { // sps_extension_flag
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index f221640531..549e0bdf57 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -184,6 +184,7 @@ typedef struct HEVCSPS {
 
     HEVCWindow output_window;
 
+    int conformance_window_flag;
     HEVCWindow pic_conf_win;
 
     HEVCHdrParams hdr;
@@ -196,6 +197,7 @@ typedef struct HEVCSPS {
     unsigned int log2_max_poc_lsb;
     int pcm_enabled_flag;
 
+    int sublayer_ordering_info_flag;
     int max_sub_layers;
     struct {
         int max_dec_pic_buffering;
@@ -204,10 +206,12 @@ typedef struct HEVCSPS {
     } temporal_layer[HEVC_MAX_SUB_LAYERS];
     uint8_t temporal_id_nesting_flag;
 
+    int vui_present;
     VUI vui;
     PTL ptl;
 
     uint8_t scaling_list_enable_flag;
+    int scaling_list_data_present_flag;
     ScalingList scaling_list;
 
     unsigned int nb_st_rps;
-- 
2.39.2


[-- Attachment #18: 0017-hevc_ps-expose-and-parse-scc-range-extension-fields.patch --]
[-- Type: text/x-diff, Size: 7752 bytes --]

From 4645f1fb3249f8249fdebaf9b3edffc848b9af3c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:18:42 +0100
Subject: [PATCH 17/72] hevc_ps: expose and parse scc range extension fields

---
 libavcodec/hevc.h    |  2 ++
 libavcodec/hevc_ps.c | 63 ++++++++++++++++++++++++++++++++++++++++----
 libavcodec/hevc_ps.h | 26 ++++++++++++++++++
 3 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index 1804755327..913c7d4e2e 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -154,6 +154,8 @@ enum {
     // get near that, though, so set a lower limit here with the maximum
     // possible value for 4K video (at most 135 16x16 Ctb rows).
     HEVC_MAX_ENTRY_POINT_OFFSETS = HEVC_MAX_TILE_COLUMNS * 135,
+
+    HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE = 128,
 };
 
 
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index a740da9f82..b03f59efef 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -856,7 +856,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
     int ret = 0;
     int log2_diff_max_min_transform_block_size;
     int bit_depth_chroma, start;
-    int i;
+    int i, j;
 
     // Coded parameters
 
@@ -1077,9 +1077,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
     if (sps->vui_present)
         decode_vui(gb, avctx, apply_defdispwin, sps);
 
-    if (get_bits1(gb)) { // sps_extension_flag
+    sps->sps_extension_present_flag = get_bits1(gb);
+    if (sps->sps_extension_present_flag) { // sps_extension_flag
         sps->sps_range_extension_flag = get_bits1(gb);
-        skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7);
+        skip_bits(gb, 2);
+        sps->sps_scc_extension_flag = get_bits1(gb);
+        skip_bits(gb, 4);
         if (sps->sps_range_extension_flag) {
             sps->transform_skip_rotation_enabled_flag = get_bits1(gb);
             sps->transform_skip_context_enabled_flag  = get_bits1(gb);
@@ -1105,6 +1108,26 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
                 av_log(avctx, AV_LOG_WARNING,
                    "cabac_bypass_alignment_enabled_flag not yet implemented\n");
         }
+        if (sps->sps_scc_extension_flag) {
+            sps->sps_curr_pic_ref_enabled_flag = get_bits1(gb);
+            sps->palette_mode_enabled_flag = get_bits1(gb);
+            if (sps->palette_mode_enabled_flag) {
+                sps->palette_max_size = get_ue_golomb_long(gb);
+                sps->delta_palette_max_predictor_size = get_ue_golomb_long(gb);
+
+                sps->sps_palette_predictor_initializer_present_flag = get_bits1(gb);
+                if (sps->sps_palette_predictor_initializer_present_flag) {
+                    sps->sps_num_palette_predictor_initializer_minus1 = get_ue_golomb_long(gb);
+                    for (i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) {
+                        for (j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++)
+                            sps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb);
+                    }
+                }
+            }
+
+            sps->motion_vector_resolution_control_idc = get_bits(gb, 2);
+            sps->intra_boundary_filtering_disable_flag = get_bits1(gb);
+        }
     }
     if (apply_defdispwin) {
         sps->output_window.left_offset   += sps->vui.def_disp_win.left_offset;
@@ -1446,7 +1469,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
                            HEVCParamSets *ps)
 {
     HEVCSPS      *sps = NULL;
-    int i, ret = 0;
+    int i, j, ret = 0;
     unsigned int pps_id = 0;
     ptrdiff_t nal_size;
     unsigned log2_parallel_merge_level_minus2;
@@ -1664,11 +1687,41 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
     pps->pps_extension_present_flag = get_bits1(gb);
     if (pps->pps_extension_present_flag) {
         pps->pps_range_extensions_flag = get_bits1(gb);
-        skip_bits(gb, 7); // pps_extension_7bits
+        skip_bits(gb, 2);
+        pps->pps_scc_extension_flag = get_bits1(gb);
+        skip_bits(gb, 4);
         if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
             if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
                 goto err;
         }
+        if (pps->pps_scc_extension_flag) {
+            pps->pps_curr_pic_ref_enabled_flag = get_bits1(gb);
+            pps->residual_adaptive_colour_transform_enabled_flag = get_bits1(gb);
+
+            if (pps->residual_adaptive_colour_transform_enabled_flag) {
+                pps->pps_slice_act_qp_offsets_present_flag = get_bits1(gb);
+                pps->pps_act_y_qp_offset_plus5 = get_se_golomb(gb);
+                pps->pps_act_cb_qp_offset_plus5 = get_se_golomb(gb);
+                pps->pps_act_cr_qp_offset_plus3 = get_se_golomb(gb);
+            }
+
+            pps->pps_palette_predictor_initializer_present_flag = get_bits1(gb);
+            if (pps->pps_palette_predictor_initializer_present_flag) {
+                pps->pps_num_palette_predictor_initializer = get_ue_golomb_long(gb);
+                if (pps->pps_num_palette_predictor_initializer) {
+                    pps->monochrome_palette_flag = get_bits1(gb);
+                    pps->luma_bit_depth_entry_minus8 = get_ue_golomb_long(gb);
+
+                    if (!pps->monochrome_palette_flag)
+                        pps->chroma_bit_depth_entry_minus8 = get_ue_golomb_long(gb);
+
+                    for (i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) {
+                        for (j = 0; j < pps->pps_num_palette_predictor_initializer; j++)
+                            pps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb);
+                    }
+                }
+            }
+        }
     }
 
     ret = setup_pps(avctx, gb, pps, sps);
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 549e0bdf57..8dddf7ef8d 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -210,6 +210,18 @@ typedef struct HEVCSPS {
     VUI vui;
     PTL ptl;
 
+    int sps_extension_present_flag;
+    int sps_scc_extension_flag;
+    int sps_curr_pic_ref_enabled_flag;
+    int palette_mode_enabled_flag;
+    uint8_t palette_max_size;
+    uint8_t delta_palette_max_predictor_size;
+    uint8_t motion_vector_resolution_control_idc;
+    uint8_t sps_num_palette_predictor_initializer_minus1;
+    int sps_palette_predictor_initializer_present_flag;
+    int intra_boundary_filtering_disable_flag;
+    uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE];
+
     uint8_t scaling_list_enable_flag;
     int scaling_list_data_present_flag;
     ScalingList scaling_list;
@@ -341,6 +353,20 @@ typedef struct HEVCPPS {
     uint8_t log2_sao_offset_scale_luma;
     uint8_t log2_sao_offset_scale_chroma;
 
+    int pps_scc_extension_flag;
+    int pps_curr_pic_ref_enabled_flag;
+    int residual_adaptive_colour_transform_enabled_flag;
+    int pps_slice_act_qp_offsets_present_flag;
+    int pps_palette_predictor_initializer_present_flag;
+    int pps_num_palette_predictor_initializer;
+    int monochrome_palette_flag;
+    int luma_bit_depth_entry_minus8;
+    int chroma_bit_depth_entry_minus8;
+    int pps_act_y_qp_offset_plus5;
+    int pps_act_cb_qp_offset_plus5;
+    int pps_act_cr_qp_offset_plus3;
+    uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE];
+
     // Inferred parameters
     unsigned int *column_width;  ///< ColumnWidth
     unsigned int *row_height;    ///< RowHeight
-- 
2.39.2


[-- Attachment #19: 0018-hevc_ps-expose-log2_diff_max_min_transform_block_siz.patch --]
[-- Type: text/x-diff, Size: 3078 bytes --]

From 141df2aaa6e9e256cf5260b919fb9151982dabe0 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 04:30:46 +0100
Subject: [PATCH 18/72] hevc_ps: expose log2_diff_max_min_transform_block_size

---
 libavcodec/hevc_ps.c | 18 +++++++++---------
 libavcodec/hevc_ps.h |  1 +
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index b03f59efef..2f0aff5a97 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -854,7 +854,6 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
 {
     HEVCWindow *ow;
     int ret = 0;
-    int log2_diff_max_min_transform_block_size;
     int bit_depth_chroma, start;
     int i, j;
 
@@ -982,12 +981,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
         }
     }
 
-    sps->log2_min_cb_size                    = get_ue_golomb_long(gb) + 3;
-    sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb);
-    sps->log2_min_tb_size                    = get_ue_golomb_long(gb) + 2;
-    log2_diff_max_min_transform_block_size   = get_ue_golomb_long(gb);
-    sps->log2_max_trafo_size                 = log2_diff_max_min_transform_block_size +
-                                               sps->log2_min_tb_size;
+    sps->log2_min_cb_size                       = get_ue_golomb_long(gb) + 3;
+    sps->log2_diff_max_min_coding_block_size    = get_ue_golomb_long(gb);
+    sps->log2_min_tb_size                       = get_ue_golomb_long(gb) + 2;
+    sps->log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb);
+    sps->log2_max_trafo_size                    = sps->log2_diff_max_min_transform_block_size +
+                                                  sps->log2_min_tb_size;
 
     if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) {
         av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size);
@@ -1004,8 +1003,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
         return AVERROR_INVALIDDATA;
     }
 
-    if (log2_diff_max_min_transform_block_size < 0 || log2_diff_max_min_transform_block_size > 30) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size);
+    if (sps->log2_diff_max_min_transform_block_size > 30) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size",
+               sps->log2_diff_max_min_transform_block_size);
         return AVERROR_INVALIDDATA;
     }
 
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 8dddf7ef8d..88e73e97c8 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -253,6 +253,7 @@ typedef struct HEVCSPS {
     unsigned int log2_max_trafo_size;
     unsigned int log2_ctb_size;
     unsigned int log2_min_pu_size;
+    unsigned int log2_diff_max_min_transform_block_size;
 
     int max_transform_hierarchy_depth_inter;
     int max_transform_hierarchy_depth_intra;
-- 
2.39.2


[-- Attachment #20: 0019-hevc_ps-expose-rps-fields.patch --]
[-- Type: text/x-diff, Size: 4900 bytes --]

From b0e8756c78c95ff93b908612b76d2013f79d5c2b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:25:48 +0100
Subject: [PATCH 19/72] hevc_ps: expose rps fields

---
 libavcodec/hevc_ps.c | 37 ++++++++++++++++++-------------------
 libavcodec/hevc_ps.h |  7 +++++++
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 2f0aff5a97..745a4f270e 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -100,51 +100,50 @@ static void remove_vps(HEVCParamSets *s, int id)
 int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
                                   ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header)
 {
-    uint8_t rps_predict = 0;
     int delta_poc;
     int k0 = 0;
     int k  = 0;
     int i;
 
+    rps->rps_predict = 0;
+
     if (rps != sps->st_rps && sps->nb_st_rps)
-        rps_predict = get_bits1(gb);
+        rps->rps_predict = get_bits1(gb);
 
-    if (rps_predict) {
+    if (rps->rps_predict) {
         const ShortTermRPS *rps_ridx;
         int delta_rps;
-        unsigned abs_delta_rps;
-        uint8_t use_delta_flag = 0;
-        uint8_t delta_rps_sign;
 
         if (is_slice_header) {
-            unsigned int delta_idx = get_ue_golomb_long(gb) + 1;
-            if (delta_idx > sps->nb_st_rps) {
+            rps->delta_idx = get_ue_golomb_long(gb) + 1;
+            if (rps->delta_idx > sps->nb_st_rps) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Invalid value of delta_idx in slice header RPS: %d > %d.\n",
-                       delta_idx, sps->nb_st_rps);
+                       rps->delta_idx, sps->nb_st_rps);
                 return AVERROR_INVALIDDATA;
             }
-            rps_ridx = &sps->st_rps[sps->nb_st_rps - delta_idx];
+            rps_ridx = &sps->st_rps[sps->nb_st_rps - rps->delta_idx];
             rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs;
         } else
             rps_ridx = &sps->st_rps[rps - sps->st_rps - 1];
 
-        delta_rps_sign = get_bits1(gb);
-        abs_delta_rps  = get_ue_golomb_long(gb) + 1;
-        if (abs_delta_rps < 1 || abs_delta_rps > 32768) {
+        rps->delta_rps_sign = get_bits1(gb);
+        rps->abs_delta_rps  = get_ue_golomb_long(gb) + 1;
+        if (rps->abs_delta_rps > 32768) {
             av_log(avctx, AV_LOG_ERROR,
                    "Invalid value of abs_delta_rps: %d\n",
-                   abs_delta_rps);
+                   rps->abs_delta_rps);
             return AVERROR_INVALIDDATA;
         }
-        delta_rps      = (1 - (delta_rps_sign << 1)) * abs_delta_rps;
+        delta_rps      = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps;
         for (i = 0; i <= rps_ridx->num_delta_pocs; i++) {
             int used = rps->used[k] = get_bits1(gb);
 
+            rps->use_delta_flag = 0;
             if (!used)
-                use_delta_flag = get_bits1(gb);
+                rps->use_delta_flag = get_bits1(gb);
 
-            if (used || use_delta_flag) {
+            if (used || rps->use_delta_flag) {
                 if (i < rps_ridx->num_delta_pocs)
                     delta_poc = delta_rps + rps_ridx->delta_poc[i];
                 else
@@ -210,7 +209,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
         if (rps->num_delta_pocs) {
             prev = 0;
             for (i = 0; i < rps->num_negative_pics; i++) {
-                delta_poc = get_ue_golomb_long(gb) + 1;
+                delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1;
                 if (delta_poc < 1 || delta_poc > 32768) {
                     av_log(avctx, AV_LOG_ERROR,
                         "Invalid value of delta_poc: %d\n",
@@ -223,7 +222,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
             }
             prev = 0;
             for (i = 0; i < nb_positive_pics; i++) {
-                delta_poc = get_ue_golomb_long(gb) + 1;
+                delta_poc = rps->delta_poc_s1[i] = get_ue_golomb_long(gb) + 1;
                 if (delta_poc < 1 || delta_poc > 32768) {
                     av_log(avctx, AV_LOG_ERROR,
                         "Invalid value of delta_poc: %d\n",
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 88e73e97c8..3cdbf6abec 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -70,9 +70,16 @@ typedef struct HEVCHdrParams {
 } HEVCHdrParams;
 
 typedef struct ShortTermRPS {
+    uint8_t rps_predict;
+    unsigned int delta_idx;
+    uint8_t use_delta_flag;
+    uint8_t delta_rps_sign;
+    unsigned int abs_delta_rps;
     unsigned int num_negative_pics;
     int num_delta_pocs;
     int rps_idx_num_delta_pocs;
+    int32_t delta_poc_s0[32];
+    int32_t delta_poc_s1[32];
     int32_t delta_poc[32];
     uint8_t used[32];
 } ShortTermRPS;
-- 
2.39.2


[-- Attachment #21: 0020-hwcontext_vulkan-initialize-and-require-instance-ver.patch --]
[-- Type: text/x-diff, Size: 2363 bytes --]

From a35cd953f9af8f34836d53006d10e3890a30ebf1 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:15:04 +0100
Subject: [PATCH 20/72] hwcontext_vulkan: initialize and require instance
 version 1.3

---
 configure                    | 4 ++--
 libavutil/hwcontext_vulkan.c | 2 +-
 libavutil/hwcontext_vulkan.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index d38613309d..f0f15b9e87 100755
--- a/configure
+++ b/configure
@@ -7006,8 +7006,8 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.
          "in maintaining it."
 
 if enabled vulkan; then
-    check_pkg_config_header_only vulkan "vulkan >= 1.2.189" "vulkan/vulkan.h" "defined VK_VERSION_1_2" ||
-        check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_3) || (defined(VK_VERSION_1_2) && VK_HEADER_VERSION >= 189)"
+    check_pkg_config_header_only vulkan "vulkan >= 1.3.238" "vulkan/vulkan.h" "defined VK_VERSION_1_3" ||
+        check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) || (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 238)"
 fi
 
 if enabled x86; then
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 2a9b5f4aac..c87f39d072 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -673,7 +673,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
     VkApplicationInfo application_info = {
         .sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
         .pEngineName        = "libavutil",
-        .apiVersion         = VK_API_VERSION_1_2,
+        .apiVersion         = VK_API_VERSION_1_3,
         .engineVersion      = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
                                               LIBAVUTIL_VERSION_MINOR,
                                               LIBAVUTIL_VERSION_MICRO),
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index df86c85b3c..70c8379dc3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -53,7 +53,7 @@ typedef struct AVVulkanDeviceContext {
     PFN_vkGetInstanceProcAddr get_proc_addr;
 
     /**
-     * Vulkan instance. Must be at least version 1.2.
+     * Vulkan instance. Must be at least version 1.3.
      */
     VkInstance inst;
 
-- 
2.39.2


[-- Attachment #22: 0021-hwcontext_vulkan-enable-support-for-YCbCr-samplers.patch --]
[-- Type: text/x-diff, Size: 1833 bytes --]

From f365b7902693a367d77032e13c2e099306308f44 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 02:37:14 +0100
Subject: [PATCH 21/72] hwcontext_vulkan: enable support for YCbCr samplers

---
 libavutil/hwcontext_vulkan.c | 1 +
 libavutil/vulkan_functions.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c87f39d072..72850c03cf 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1378,6 +1378,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
         goto end;
     }
     p->device_features_1_2.timelineSemaphore = 1;
+    p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
 
     /* Setup queue family */
     if ((err = setup_queue_families(ctx, &dev_info)))
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index d15a5d9a42..deb77495a2 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -155,6 +155,8 @@ typedef enum FFVulkanExtensions {
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroyPipeline)                         \
                                                                                          \
     /* Sampler */                                                                        \
+    MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CreateSamplerYcbcrConversion)              \
+    MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroySamplerYcbcrConversion)             \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CreateSampler)                           \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroySampler)                          \
                                                                                          \
-- 
2.39.2


[-- Attachment #23: 0022-hwcontext_vulkan-enable-VK_KHR_synchronization2-if-s.patch --]
[-- Type: text/x-diff, Size: 5364 bytes --]

From b6db2ca65db72b346ba08480df4a201f7e1caea9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 13 Mar 2022 09:06:06 +0100
Subject: [PATCH 22/72] hwcontext_vulkan: enable VK_KHR_synchronization2 if
 supported

---
 libavutil/hwcontext_vulkan.c | 17 +++++++++++++----
 libavutil/vulkan_functions.h |  6 +++++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 72850c03cf..1d0261c8fe 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -89,6 +89,7 @@ typedef struct VulkanDevicePriv {
     /* Features */
     VkPhysicalDeviceVulkan11Features device_features_1_1;
     VkPhysicalDeviceVulkan12Features device_features_1_2;
+    VkPhysicalDeviceVulkan13Features device_features_1_3;
 
     /* Queues */
     uint32_t qfs[5];
@@ -346,7 +347,7 @@ static const VulkanOptExtension optional_device_exts[] = {
     /* Misc or required by other extensions */
     { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,                  FF_VK_EXT_NO_FLAG                },
     { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME,         FF_VK_EXT_NO_FLAG                },
-    { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,                FF_VK_EXT_NO_FLAG                },
+    { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,                FF_VK_EXT_SYNC2                  },
 
     /* Imports/exports */
     { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,               FF_VK_EXT_EXTERNAL_FD_MEMORY     },
@@ -1326,9 +1327,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
     };
+    VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
+        .pNext = &timeline_features,
+    };
     VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
-        .pNext = &timeline_features,
+        .pNext = &dev_features_1_3,
     };
     VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
@@ -1340,8 +1345,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     };
 
     VkDeviceCreateInfo dev_info = {
-        .sType                = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
-        .pNext                = &hwctx->device_features,
+        .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
     };
 
     hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -1349,6 +1353,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
     p->device_features_1_1.pNext = &p->device_features_1_2;
     p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
+    p->device_features_1_2.pNext = &p->device_features_1_3;
+    p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
     ctx->free = vulkan_device_free;
 
     /* Create an instance if not given one */
@@ -1379,6 +1385,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     }
     p->device_features_1_2.timelineSemaphore = 1;
     p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
+    p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+
+    dev_info.pNext = &hwctx->device_features;
 
     /* Setup queue family */
     if ((err = setup_queue_families(ctx, &dev_info)))
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index deb77495a2..103bff3013 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -37,6 +37,7 @@ typedef enum FFVulkanExtensions {
     FF_VK_EXT_EXTERNAL_WIN32_MEMORY  = 1ULL <<  6, /* VK_KHR_external_memory_win32 */
     FF_VK_EXT_EXTERNAL_WIN32_SEM     = 1ULL <<  7, /* VK_KHR_external_semaphore_win32 */
 #endif
+    FF_VK_EXT_SYNC2                  = 1ULL <<  8, /* VK_KHR_synchronization2 */
 
     FF_VK_EXT_NO_FLAG                = 1ULL << 31,
 } FFVulkanExtensions;
@@ -145,7 +146,10 @@ typedef enum FFVulkanExtensions {
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              UpdateDescriptorSetWithTemplate)         \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CreateDescriptorUpdateTemplate)          \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroyDescriptorUpdateTemplate)         \
-                                                                                         \
+                                                                                           \
+    /* sync2 */                                                                            \
+    MACRO(1, 1, FF_VK_EXT_SYNC2,                CmdPipelineBarrier2KHR)                    \
+                                                                                           \
     /* Pipeline */                                                                       \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CreatePipelineLayout)                    \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroyPipelineLayout)                   \
-- 
2.39.2


[-- Attachment #24: 0023-hwcontext_vulkan-support-threadsafe-queue-and-frame-.patch --]
[-- Type: text/x-diff, Size: 19170 bytes --]

From 05e94e06667f305afe181c3b318d08b4e528ce09 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 15 Mar 2022 23:00:32 +0100
Subject: [PATCH 23/72] hwcontext_vulkan: support threadsafe queue and frame
 operations

---
 libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
 libavutil/hwcontext_vulkan.h |  40 +++++++-
 2 files changed, 167 insertions(+), 49 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 1d0261c8fe..5a06a6872d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -27,6 +27,7 @@
 #include <dlfcn.h>
 #endif
 
+#include <pthread.h>
 #include <unistd.h>
 
 #include "config.h"
@@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
     VkPhysicalDeviceVulkan13Features device_features_1_3;
 
     /* Queues */
-    uint32_t qfs[5];
-    int num_qfs;
+    pthread_mutex_t **qf_mutex;
+    int nb_tot_qfs;
+    uint32_t img_qfs[5];
+    int nb_img_qfs;
 
     /* Debug callback */
     VkDebugUtilsMessengerEXT debug_ctx;
@@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
 } VulkanFramesPriv;
 
 typedef struct AVVkFrameInternal {
+    pthread_mutex_t update_mutex;
+
 #if CONFIG_CUDA
     /* Importing external memory into cuda is really expensive so we keep the
      * memory imported all the time */
@@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
     if (p->libvulkan)
         dlclose(p->libvulkan);
 
+    for (int i = 0; i < p->nb_tot_qfs; i++)
+        av_freep(&p->qf_mutex[i]);
+    av_freep(&p->qf_mutex);
+
     RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
     RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
 }
@@ -1436,13 +1445,26 @@ end:
     return err;
 }
 
+static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+    VulkanDevicePriv *p = ctx->internal->priv;
+    pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
+}
+
+static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+    VulkanDevicePriv *p = ctx->internal->priv;
+    pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
+}
+
 static int vulkan_device_init(AVHWDeviceContext *ctx)
 {
     int err;
-    uint32_t queue_num;
+    uint32_t qf_num;
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
     VulkanDevicePriv *p = ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
+    VkQueueFamilyProperties *qf;
     int graph_index, comp_index, tx_index, enc_index, dec_index;
 
     /* Set device extension flags */
@@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
     p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
     p->dev_is_intel  = (p->props.properties.vendorID == 0x8086);
 
-    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
-    if (!queue_num) {
+    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
+    if (!qf_num) {
         av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
         return AVERROR_EXTERNAL;
     }
 
+    qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
+    if (!qf)
+        return AVERROR(ENOMEM);
+
+    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
+
+    p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
+    if (!p->qf_mutex)
+        return AVERROR(ENOMEM);
+    p->nb_tot_qfs = qf_num;
+
+    for (int i = 0; i < qf_num; i++) {
+        p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
+        if (!p->qf_mutex[i])
+            return AVERROR(ENOMEM);
+        for (int j = 0; j < qf[i].queueCount; j++)
+            pthread_mutex_init(&p->qf_mutex[i][j], NULL);
+    }
+
     graph_index = hwctx->queue_family_index;
     comp_index  = hwctx->queue_family_comp_index;
     tx_index    = hwctx->queue_family_tx_index;
@@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
             return AVERROR(EINVAL);                                                             \
         } else if (fidx < 0 || ctx_qf < 0) {                                                    \
             break;                                                                              \
-        } else if (ctx_qf >= queue_num) {                                                       \
+        } else if (ctx_qf >= qf_num) {                                                          \
             av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
-                   type, ctx_qf, queue_num);                                                    \
+                   type, ctx_qf, qf_num);                                                       \
             return AVERROR(EINVAL);                                                             \
         }                                                                                       \
                                                                                                 \
@@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
         tx_index    = (ctx_qf == tx_index)    ? -1 : tx_index;                                  \
         enc_index   = (ctx_qf == enc_index)   ? -1 : enc_index;                                 \
         dec_index   = (ctx_qf == dec_index)   ? -1 : dec_index;                                 \
-        p->qfs[p->num_qfs++] = ctx_qf;                                                          \
+        p->img_qfs[p->nb_img_qfs++] = ctx_qf;                                                   \
     } while (0)
 
     CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index,        hwctx->nb_graphics_queues);
@@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
 
 #undef CHECK_QUEUE
 
+    if (!hwctx->lock_queue)
+        hwctx->lock_queue = lock_queue;
+    if (!hwctx->unlock_queue)
+        hwctx->unlock_queue = unlock_queue;
+
     /* Get device capabilities */
     vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
 
@@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
 {
     AVVkFrameInternal *internal = f->internal;
 
-    if (!internal)
-        return;
-
 #if CONFIG_CUDA
     if (internal->cuda_fc_ref) {
         AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
@@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     uint32_t src_qf, dst_qf;
     VkImageLayout new_layout;
     VkAccessFlags new_access;
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
+    AVFrame tmp = { .data[0] = (uint8_t *)frame };
     uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
 
     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -1944,6 +1989,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     };
 
     VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
+
+    if ((err = wait_start_exec_ctx(hwfc, ectx)))
+        return err;
+
+    vkfc->lock_frame(hwfc, frame);
+
     for (int i = 0; i < planes; i++) {
         wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
         sem_sig_val[i] = frame->sem_value[i] + 1;
@@ -1980,9 +2031,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         break;
     }
 
-    if ((err = wait_start_exec_ctx(hwfc, ectx)))
-        return err;
-
     /* Change the image layout to something more optimal for writes.
      * This also signals the newly created semaphore, making it usable
      * for synchronization */
@@ -2008,7 +2056,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
                            VK_PIPELINE_STAGE_TRANSFER_BIT,
                            0, 0, NULL, 0, NULL, planes, img_bar);
 
-    return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+    err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+    vkfc->unlock_frame(hwfc, frame);
+
+    return err;
 }
 
 static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
@@ -2090,10 +2141,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
             .usage                 = usage,
             .samples               = VK_SAMPLE_COUNT_1_BIT,
-            .pQueueFamilyIndices   = p->qfs,
-            .queueFamilyIndexCount = p->num_qfs,
-            .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
-                                                      VK_SHARING_MODE_EXCLUSIVE,
+            .pQueueFamilyIndices   = p->img_qfs,
+            .queueFamilyIndexCount = p->nb_img_qfs,
+            .sharingMode           = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+                                                         VK_SHARING_MODE_EXCLUSIVE,
         };
 
         get_plane_wh(&create_info.extent.width, &create_info.extent.height,
@@ -2117,6 +2168,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
             return AVERROR_EXTERNAL;
         }
 
+        f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
         f->layout[i] = create_info.initialLayout;
         f->access[i] = 0x0;
         f->sem_value[i] = 0;
@@ -2161,10 +2213,10 @@ static void try_export_flags(AVHWFramesContext *hwfc,
     VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
         .pNext = NULL,
-        .pQueueFamilyIndices   = p->qfs,
-        .queueFamilyIndexCount = p->num_qfs,
-        .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
-                                                  VK_SHARING_MODE_EXCLUSIVE,
+        .pQueueFamilyIndices   = p->img_qfs,
+        .queueFamilyIndexCount = p->nb_img_qfs,
+        .sharingMode           = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+                                                     VK_SHARING_MODE_EXCLUSIVE,
     };
     VkPhysicalDeviceExternalImageFormatInfo enext = {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
@@ -2259,6 +2311,16 @@ fail:
     return NULL;
 }
 
+static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+    pthread_mutex_lock(&vkf->internal->update_mutex);
+}
+
+static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+    pthread_mutex_unlock(&vkf->internal->update_mutex);
+}
+
 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
 {
     VulkanFramesPriv *fp = hwfc->internal->priv;
@@ -2421,6 +2483,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
             return AVERROR(ENOMEM);
     }
 
+    if (!hwctx->lock_frame)
+        hwctx->lock_frame = lock_frame;
+    if (!hwctx->unlock_frame)
+        hwctx->unlock_frame = unlock_frame;
+
     return 0;
 }
 
@@ -2727,10 +2794,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
             .usage                 = VK_IMAGE_USAGE_SAMPLED_BIT |
                                      VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
             .samples               = VK_SAMPLE_COUNT_1_BIT,
-            .pQueueFamilyIndices   = p->qfs,
-            .queueFamilyIndexCount = p->num_qfs,
-            .sharingMode           = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
-                                                      VK_SHARING_MODE_EXCLUSIVE,
+            .pQueueFamilyIndices   = p->img_qfs,
+            .queueFamilyIndexCount = p->nb_img_qfs,
+            .sharingMode           = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+                                                         VK_SHARING_MODE_EXCLUSIVE,
         };
 
         /* Image format verification */
@@ -2809,6 +2876,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
          * offer us anything we could import and sync with, so instead
          * just signal the semaphore we created. */
 
+        f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
         f->layout[i] = create_info.initialLayout;
         f->access[i] = 0x0;
         f->sem_value[i] = 0;
@@ -3017,20 +3085,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
                                                      CU_AD_FORMAT_UNSIGNED_INT8;
 
     dst_f = (AVVkFrame *)frame->data[0];
-
     dst_int = dst_f->internal;
-    if (!dst_int || !dst_int->cuda_fc_ref) {
-        if (!dst_f->internal)
-            dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
-
-        if (!dst_int)
-            return AVERROR(ENOMEM);
 
+    if (!dst_int->cuda_fc_ref) {
         dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
-        if (!dst_int->cuda_fc_ref) {
-            av_freep(&dst_f->internal);
+        if (!dst_int->cuda_fc_ref)
             return AVERROR(ENOMEM);
-        }
 
         for (int i = 0; i < planes; i++) {
             CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
@@ -3704,13 +3764,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
     return err;
 }
 
-static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
+static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
                               AVBufferRef **bufs, size_t *buf_offsets,
                               const int *buf_stride, int w,
                               int h, enum AVPixelFormat pix_fmt, int to_buf)
 {
     int err;
     AVVkFrame *frame = (AVVkFrame *)f->data[0];
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
     VulkanFramesPriv *fp = hwfc->internal->priv;
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
@@ -3745,11 +3806,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
         .waitSemaphoreCount   = planes,
     };
 
-    for (int i = 0; i < planes; i++)
-        sem_signal_values[i] = frame->sem_value[i] + 1;
+    vkfc->lock_frame(hwfc, frame);
 
     if ((err = wait_start_exec_ctx(hwfc, ectx)))
-        return err;
+        goto end;
+
+    for (int i = 0; i < planes; i++)
+        sem_signal_values[i] = frame->sem_value[i] + 1;
 
     /* Change the image layout to something more optimal for transfers */
     for (int i = 0; i < planes; i++) {
@@ -3824,14 +3887,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
             if (!f->buf[ref])
                 break;
             if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
-                return err;
+                goto end;
         }
         if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
-            return err;
-        return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
+            goto end;
+        err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
     } else {
-        return submit_exec_ctx(hwfc, ectx, &s_info, frame,    1);
+        err = submit_exec_ctx(hwfc, ectx, &s_info, frame,    1);
     }
+
+end:
+    vkfc->unlock_frame(hwfc, frame);
+    return err;
 }
 
 static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
@@ -3960,8 +4027,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
     }
 
     /* Copy buffers into/from image */
-    err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
-                             swf->width, swf->height, swf->format, from);
+    err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
+                             tmp.linesize, swf->width, swf->height, swf->format,
+                             from);
 
     if (from) {
         /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
@@ -4142,7 +4210,19 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
 
 AVVkFrame *av_vk_frame_alloc(void)
 {
-    return av_mallocz(sizeof(AVVkFrame));
+    AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
+    if (!f)
+        return NULL;
+
+    f->internal = av_mallocz(sizeof(*f->internal));
+    if (!f->internal) {
+        av_free(f);
+        return NULL;
+    }
+
+    pthread_mutex_init(&f->internal->update_mutex, NULL);
+
+    return f;
 }
 
 const HWContextType ff_hwcontext_type_vulkan = {
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 70c8379dc3..406d8709c3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -27,6 +27,8 @@
 #include "pixfmt.h"
 #include "frame.h"
 
+typedef struct AVVkFrame AVVkFrame;
+
 /**
  * @file
  * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext {
      */
     int queue_family_decode_index;
     int nb_decode_queues;
+
+    /**
+     * Locks a queue, preventing other threads from submitting any command
+     * buffers to this queue.
+     * If set to NULL, will be set to lavu-internal functions that utilize a
+     * mutex.
+     */
+    void (*lock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index);
+
+    /**
+     * Similar to lock_queue(), unlocks a queue. Must only be called after locking.
+     */
+    void (*unlock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index);
 } AVVulkanDeviceContext;
 
 /**
@@ -195,6 +210,23 @@ typedef struct AVVulkanFramesContext {
      * av_hwframe_ctx_init().
      */
     AVVkFrameFlags flags;
+
+    /**
+     * Locks a frame, preventing other threads from changing frame properties.
+     * If set to NULL, will be set to lavu-internal functions that utilize a
+     * mutex.
+     * Users SHOULD only ever lock just before command submission in order
+     * to get accurate frame properties, and unlock immediately after command
+     * submission without waiting for it to finish.
+     *
+     * If unset, will be set to lavu-internal functions that utilize a mutex.
+     */
+    void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
+
+    /**
+     * Similar to lock_frame(), unlocks a frame. Must only be called after locking.
+     */
+    void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
 } AVVulkanFramesContext;
 
 /*
@@ -210,7 +242,7 @@ typedef struct AVVulkanFramesContext {
  * @note the size of this structure is not part of the ABI, to allocate
  * you must use @av_vk_frame_alloc().
  */
-typedef struct AVVkFrame {
+struct AVVkFrame {
     /**
      * Vulkan images to which the memory is bound to.
      */
@@ -264,6 +296,12 @@ typedef struct AVVkFrame {
      * Describes the binding offset of each plane to the VkDeviceMemory.
      */
     ptrdiff_t offset[AV_NUM_DATA_POINTERS];
+
+    /**
+     * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
+     * the image was allocated with the CONCURRENT concurrency option.
+     */
+    uint32_t queue_family[AV_NUM_DATA_POINTERS];
 } AVVkFrame;
 
 /**
-- 
2.39.2


[-- Attachment #25: 0024-hwcontext_vulkan-remove-contiguous-memory-hack.patch --]
[-- Type: text/x-diff, Size: 2600 bytes --]

From 197e5cfa63a2356a64ac6ae20024fa98fda26f43 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 20:32:49 +0100
Subject: [PATCH 24/72] hwcontext_vulkan: remove contiguous memory hack

---
 libavutil/hwcontext_vulkan.c | 12 ------------
 libavutil/hwcontext_vulkan.h |  4 +---
 2 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 5a06a6872d..ab5b24f10c 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -112,9 +112,6 @@ typedef struct VulkanDevicePriv {
 
     /* Nvidia */
     int dev_is_nvidia;
-
-    /* Intel */
-    int dev_is_intel;
 } VulkanDevicePriv;
 
 typedef struct VulkanFramesPriv {
@@ -1501,7 +1498,6 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
                p->hprops.minImportedHostPointerAlignment);
 
     p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
-    p->dev_is_intel  = (p->props.properties.vendorID == 0x8086);
 
     vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
     if (!qf_num) {
@@ -1620,8 +1616,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
             return AVERROR_EXTERNAL;
         }
 
-        if (strstr(vendor, "Intel"))
-            dev_select.vendor_id = 0x8086;
         if (strstr(vendor, "AMD"))
             dev_select.vendor_id = 0x1002;
 
@@ -2356,12 +2350,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
     if (!hwctx->usage)
         hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
 
-    if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) {
-        if (p->contiguous_planes == 1 ||
-           ((p->contiguous_planes == -1) && p->dev_is_intel))
-           hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY;
-    }
-
     modifier_info = vk_find_struct(hwctx->create_pnext,
                                    VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
 
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 406d8709c3..e89fa52927 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags {
      * device and tiling during av_hwframe_ctx_init(). */
     AV_VK_FRAME_FLAG_NONE              = (1ULL << 0),
 
-    /* Image planes will be allocated in a single VkDeviceMemory, rather
-     * than as per-plane VkDeviceMemory allocations. Required for exporting
-     * to VAAPI on Intel devices. */
+    /* DEPRECATED: does nothing. */
     AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),
 } AVVkFrameFlags;
 
-- 
2.39.2


[-- Attachment #26: 0025-hwcontext_vulkan-rename-vk_pixfmt_map-to-vk_pixfmt_p.patch --]
[-- Type: text/x-diff, Size: 1383 bytes --]

From 28903a643a7db85e6eef289a853a03b33b67be41 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 20:35:51 +0100
Subject: [PATCH 25/72] hwcontext_vulkan: rename vk_pixfmt_map to
 vk_pixfmt_planar_map

---
 libavutil/hwcontext_vulkan.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index ab5b24f10c..de5575c031 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal {
 
 static const struct {
     enum AVPixelFormat pixfmt;
-    const VkFormat vkfmts[4];
-} vk_pixfmt_map[] = {
+    const VkFormat vkfmts[5];
+} vk_pixfmt_planar_map[] = {
     { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
     { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
     { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
@@ -244,9 +244,9 @@ static const struct {
 
 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
 {
-    for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
-        if (vk_pixfmt_map[i].pixfmt == p)
-            return vk_pixfmt_map[i].vkfmts;
+    for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++)
+        if (vk_pixfmt_planar_map[i].pixfmt == p)
+            return vk_pixfmt_planar_map[i].vkfmts;
     return NULL;
 }
 
-- 
2.39.2


[-- Attachment #27: 0026-hwcontext_vulkan-fix-minor-type-issue-in-VulkanQueue.patch --]
[-- Type: text/x-diff, Size: 772 bytes --]

From a62f75557a8b2d64fe88670b823d1e8500504bd2 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:52:15 +0100
Subject: [PATCH 26/72] hwcontext_vulkan: fix minor type issue in
 VulkanQueueCtx.buf_deps_alloc_size

---
 libavutil/hwcontext_vulkan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index de5575c031..8141e8c310 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -66,7 +66,7 @@ typedef struct VulkanQueueCtx {
     /* Buffer dependencies */
     AVBufferRef **buf_deps;
     int nb_buf_deps;
-    int buf_deps_alloc_size;
+    unsigned int buf_deps_alloc_size;
 } VulkanQueueCtx;
 
 typedef struct VulkanExecCtx {
-- 
2.39.2


[-- Attachment #28: 0027-hwcontext_vulkan-report-nonCoherentAtomSize.patch --]
[-- Type: text/x-diff, Size: 1140 bytes --]

From 0dec881653e9c9434a1b06ea212735a4c7b9caf8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 28 Dec 2022 05:55:17 +0100
Subject: [PATCH 27/72] hwcontext_vulkan: report nonCoherentAtomSize

---
 libavutil/hwcontext_vulkan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 8141e8c310..7e63c2350c 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1493,6 +1493,8 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
            p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
     av_log(ctx, AV_LOG_VERBOSE, "    minMemoryMapAlignment:              %"SIZE_SPECIFIER"\n",
            p->props.properties.limits.minMemoryMapAlignment);
+    av_log(ctx, AV_LOG_VERBOSE, "    nonCoherentAtomSize:                %"PRIu64"\n",
+           p->props.properties.limits.nonCoherentAtomSize);
     if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
         av_log(ctx, AV_LOG_VERBOSE, "    minImportedHostPointerAlignment:    %"PRIu64"\n",
                p->hprops.minImportedHostPointerAlignment);
-- 
2.39.2


[-- Attachment #29: 0028-hwcontext_vulkan-add-support-for-descriptor-buffers.patch --]
[-- Type: text/x-diff, Size: 6084 bytes --]

From a028bdcd05284bfb306558212646a309e2da4c24 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:15:02 +0100
Subject: [PATCH 28/72] hwcontext_vulkan: add support for descriptor buffers

---
 libavutil/hwcontext_vulkan.c | 13 ++++++++++++-
 libavutil/vulkan_functions.h |  9 +++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 7e63c2350c..60ff11ad3d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -91,6 +91,7 @@ typedef struct VulkanDevicePriv {
     VkPhysicalDeviceVulkan11Features device_features_1_1;
     VkPhysicalDeviceVulkan12Features device_features_1_2;
     VkPhysicalDeviceVulkan13Features device_features_1_3;
+    VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features;
 
     /* Queues */
     pthread_mutex_t **qf_mutex;
@@ -350,6 +351,7 @@ static const VulkanOptExtension optional_device_exts[] = {
     { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,                  FF_VK_EXT_NO_FLAG                },
     { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME,         FF_VK_EXT_NO_FLAG                },
     { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME,                FF_VK_EXT_SYNC2                  },
+    { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,                FF_VK_EXT_DESCRIPTOR_BUFFER,     },
 
     /* Imports/exports */
     { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,               FF_VK_EXT_EXTERNAL_FD_MEMORY     },
@@ -1333,9 +1335,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
     };
+    VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT,
+        .pNext = &timeline_features,
+    };
     VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
-        .pNext = &timeline_features,
+        .pNext = &desc_buf_features,
     };
     VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
@@ -1361,6 +1367,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
     p->device_features_1_2.pNext = &p->device_features_1_3;
     p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
+    p->device_features_1_3.pNext = &p->desc_buf_features;
+    p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT;
     ctx->free = vulkan_device_free;
 
     /* Create an instance if not given one */
@@ -1390,8 +1398,11 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
         goto end;
     }
     p->device_features_1_2.timelineSemaphore = 1;
+    p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress;
     p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
     p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+    p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer;
+    p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors;
 
     dev_info.pNext = &hwctx->device_features;
 
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 103bff3013..f8739da8e5 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -38,6 +38,7 @@ typedef enum FFVulkanExtensions {
     FF_VK_EXT_EXTERNAL_WIN32_SEM     = 1ULL <<  7, /* VK_KHR_external_semaphore_win32 */
 #endif
     FF_VK_EXT_SYNC2                  = 1ULL <<  8, /* VK_KHR_synchronization2 */
+    FF_VK_EXT_DESCRIPTOR_BUFFER      = 1ULL <<  9, /* VK_EXT_descriptor_buffer */
 
     FF_VK_EXT_NO_FLAG                = 1ULL << 31,
 } FFVulkanExtensions;
@@ -121,6 +122,7 @@ typedef enum FFVulkanExtensions {
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              GetBufferMemoryRequirements2)            \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CreateBuffer)                            \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              BindBufferMemory)                        \
+    MACRO(1, 1, FF_VK_EXT_NO_FLAG,              GetBufferDeviceAddress)                    \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroyBuffer)                           \
                                                                                          \
     /* Image */                                                                          \
@@ -142,6 +144,13 @@ typedef enum FFVulkanExtensions {
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroyDescriptorPool)                   \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroyDescriptorSetLayout)              \
                                                                                          \
+    /* Descriptor buffers */                                                               \
+    MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER,    GetDescriptorSetLayoutSizeEXT)             \
+    MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER,    GetDescriptorSetLayoutBindingOffsetEXT)    \
+    MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER,    GetDescriptorEXT)                          \
+    MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER,    CmdBindDescriptorBuffersEXT)               \
+    MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER,    CmdSetDescriptorBufferOffsetsEXT)          \
+                                                                                           \
     /* DescriptorUpdateTemplate */                                                       \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              UpdateDescriptorSetWithTemplate)         \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CreateDescriptorUpdateTemplate)          \
-- 
2.39.2


[-- Attachment #30: 0029-hwcontext_vulkan-add-functions-for-video-decoding.patch --]
[-- Type: text/x-diff, Size: 6637 bytes --]

From cc5ef22f90cc48ee604f6a27d28bb05237b9f2b7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:34:36 +0100
Subject: [PATCH 29/72] hwcontext_vulkan: add functions for video decoding

---
 libavutil/hwcontext_vulkan.c |  6 ++++++
 libavutil/vulkan.c           |  8 +++++---
 libavutil/vulkan_functions.h | 20 ++++++++++++++++++++
 libavutil/vulkan_loader.h    |  4 ++++
 4 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 60ff11ad3d..c0e35d8d78 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -363,6 +363,12 @@ static const VulkanOptExtension optional_device_exts[] = {
     { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,            FF_VK_EXT_EXTERNAL_WIN32_MEMORY  },
     { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME,         FF_VK_EXT_EXTERNAL_WIN32_SEM     },
 #endif
+
+    /* Video encoding/decoding */
+    { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,                      FF_VK_EXT_VIDEO_QUEUE            },
+    { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,               FF_VK_EXT_VIDEO_DECODE_QUEUE     },
+    { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME,                FF_VK_EXT_VIDEO_DECODE_H264      },
+    { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME,                FF_VK_EXT_VIDEO_DECODE_H265      },
 };
 
 /* Converts return values to strings */
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 403f0b1f27..6bf2c214b7 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -85,9 +85,11 @@ const char *ff_vk_ret2str(VkResult res)
     CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
     CASE(VK_ERROR_VALIDATION_FAILED_EXT);
     CASE(VK_ERROR_INVALID_SHADER_NV);
-    CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
-    CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
-    CASE(VK_ERROR_NOT_PERMITTED_EXT);
+    CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
+    CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
+    CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
+    CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
+    CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
     default: return "Unknown error";
     }
 #undef CASE
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index f8739da8e5..65ab560d21 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -39,6 +39,10 @@ typedef enum FFVulkanExtensions {
 #endif
     FF_VK_EXT_SYNC2                  = 1ULL <<  8, /* VK_KHR_synchronization2 */
     FF_VK_EXT_DESCRIPTOR_BUFFER      = 1ULL <<  9, /* VK_EXT_descriptor_buffer */
+    FF_VK_EXT_VIDEO_QUEUE            = 1ULL << 10, /* VK_KHR_video_queue */
+    FF_VK_EXT_VIDEO_DECODE_QUEUE     = 1ULL << 11, /* VK_KHR_video_decode_queue */
+    FF_VK_EXT_VIDEO_DECODE_H264      = 1ULL << 12, /* VK_EXT_video_decode_h264 */
+    FF_VK_EXT_VIDEO_DECODE_H265      = 1ULL << 13, /* VK_EXT_video_decode_h265 */
 
     FF_VK_EXT_NO_FLAG                = 1ULL << 31,
 } FFVulkanExtensions;
@@ -60,6 +64,8 @@ typedef enum FFVulkanExtensions {
     MACRO(1, 0, FF_VK_EXT_NO_FLAG,              CreateDevice)                            \
     MACRO(1, 0, FF_VK_EXT_NO_FLAG,              GetPhysicalDeviceFeatures2)              \
     MACRO(1, 0, FF_VK_EXT_NO_FLAG,              GetPhysicalDeviceProperties)             \
+    MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE,          GetPhysicalDeviceVideoCapabilitiesKHR)     \
+    MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE,          GetPhysicalDeviceVideoFormatPropertiesKHR) \
     MACRO(1, 0, FF_VK_EXT_NO_FLAG,              DeviceWaitIdle)                          \
     MACRO(1, 0, FF_VK_EXT_NO_FLAG,              DestroyDevice)                           \
                                                                                          \
@@ -159,6 +165,20 @@ typedef enum FFVulkanExtensions {
     /* sync2 */                                                                            \
     MACRO(1, 1, FF_VK_EXT_SYNC2,                CmdPipelineBarrier2KHR)                    \
                                                                                            \
+    /* Video queue */                                                                      \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          CreateVideoSessionKHR)                     \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          CreateVideoSessionParametersKHR)           \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          GetVideoSessionMemoryRequirementsKHR)      \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          BindVideoSessionMemoryKHR)                 \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          CmdBeginVideoCodingKHR)                    \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          CmdControlVideoCodingKHR)                  \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          CmdEndVideoCodingKHR)                      \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          DestroyVideoSessionParametersKHR)          \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE,          DestroyVideoSessionKHR)                    \
+                                                                                           \
+    /* Video decoding */                                                                   \
+    MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE,   CmdDecodeVideoKHR)                         \
+                                                                                         \
     /* Pipeline */                                                                       \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CreatePipelineLayout)                    \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              DestroyPipelineLayout)                   \
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 3f1ee6aa46..5385e398bf 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -48,6 +48,10 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
         { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,     FF_VK_EXT_EXTERNAL_WIN32_MEMORY  },
         { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME,  FF_VK_EXT_EXTERNAL_WIN32_SEM     },
 #endif
+        { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,               FF_VK_EXT_VIDEO_QUEUE            },
+        { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,        FF_VK_EXT_VIDEO_DECODE_QUEUE     },
+        { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME,         FF_VK_EXT_VIDEO_DECODE_H264      },
+        { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME,         FF_VK_EXT_VIDEO_DECODE_H265      },
     };
 
     FFVulkanExtensions mask = 0x0;
-- 
2.39.2


[-- Attachment #31: 0030-hwcontext_vulkan-support-PREP_MODE_DECODING-in-prepa.patch --]
[-- Type: text/x-diff, Size: 5554 bytes --]

From 506c7daa8423efd56296868cce017642235b6186 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:18:21 +0100
Subject: [PATCH 30/72] hwcontext_vulkan: support PREP_MODE_DECODING in
 prepare_frame()

---
 libavutil/hwcontext_vulkan.c | 70 ++++++++++++++++++++++++++----------
 1 file changed, 51 insertions(+), 19 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c0e35d8d78..e7c14fad74 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1969,7 +1969,9 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
 enum PrepMode {
     PREP_MODE_WRITE,
     PREP_MODE_EXTERNAL_EXPORT,
-    PREP_MODE_EXTERNAL_IMPORT
+    PREP_MODE_EXTERNAL_IMPORT,
+    PREP_MODE_DECODING_DST,
+    PREP_MODE_DECODING_DPB,
 };
 
 static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
@@ -1978,7 +1980,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     int err;
     uint32_t src_qf, dst_qf;
     VkImageLayout new_layout;
-    VkAccessFlags new_access;
+    VkAccessFlags2 new_access;
     AVVulkanFramesContext *vkfc = hwfc->hwctx;
     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
@@ -1986,7 +1988,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     AVFrame tmp = { .data[0] = (uint8_t *)frame };
     uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
 
-    VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+    VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+    VkDependencyInfo dep_info;
 
     VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -2042,32 +2045,55 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         s_info.pWaitDstStageMask = wait_st;
         s_info.waitSemaphoreCount = planes;
         break;
+    case PREP_MODE_DECODING_DST:
+        new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
+        new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
+        src_qf     = VK_QUEUE_FAMILY_IGNORED;
+        dst_qf     = VK_QUEUE_FAMILY_IGNORED;
+        break;
+    case PREP_MODE_DECODING_DPB:
+        new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
+        new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+        src_qf     = VK_QUEUE_FAMILY_IGNORED;
+        dst_qf     = VK_QUEUE_FAMILY_IGNORED;
+        break;
     }
 
     /* Change the image layout to something more optimal for writes.
      * This also signals the newly created semaphore, making it usable
      * for synchronization */
     for (int i = 0; i < planes; i++) {
-        img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
-        img_bar[i].srcAccessMask = 0x0;
-        img_bar[i].dstAccessMask = new_access;
-        img_bar[i].oldLayout = frame->layout[i];
-        img_bar[i].newLayout = new_layout;
-        img_bar[i].srcQueueFamilyIndex = src_qf;
-        img_bar[i].dstQueueFamilyIndex = dst_qf;
-        img_bar[i].image = frame->img[i];
-        img_bar[i].subresourceRange.levelCount = 1;
-        img_bar[i].subresourceRange.layerCount = 1;
-        img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+        img_bar[i] = (VkImageMemoryBarrier2) {
+            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+            .pNext = NULL,
+            .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
+            .srcAccessMask = 0x0,
+            .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+            .dstAccessMask = new_access,
+            .oldLayout = frame->layout[i],
+            .newLayout = new_layout,
+            .srcQueueFamilyIndex = src_qf,
+            .dstQueueFamilyIndex = dst_qf,
+            .image = frame->img[i],
+            .subresourceRange = (VkImageSubresourceRange) {
+                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                .levelCount = 1,
+                .layerCount = 1,
+            },
+        };
 
         frame->layout[i] = img_bar[i].newLayout;
         frame->access[i] = img_bar[i].dstAccessMask;
     }
 
-    vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
-                           VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                           VK_PIPELINE_STAGE_TRANSFER_BIT,
-                           0, 0, NULL, 0, NULL, planes, img_bar);
+    dep_info = (VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+        .pImageMemoryBarriers = img_bar,
+        .imageMemoryBarrierCount = planes,
+    };
+
+    vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
 
     err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
     vkfc->unlock_frame(hwfc, frame);
@@ -2308,7 +2334,13 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
     if (err)
         goto fail;
 
-    err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
+    if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
+        !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
+        err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DPB);
+    else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
+        err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DST);
+    else
+        err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
     if (err)
         goto fail;
 
-- 
2.39.2


[-- Attachment #32: 0031-vulkan-lock-queues-before-submitting-operations.patch --]
[-- Type: text/x-diff, Size: 1087 bytes --]

From 6da405c60b7b04895a4395f5e226e8cc60e6552e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:04:28 +0100
Subject: [PATCH 31/72] vulkan: lock queues before submitting operations

---
 libavutil/vulkan.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 6bf2c214b7..ad13b8f3cb 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
         return AVERROR_EXTERNAL;
     }
 
+    s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
+                         e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
+
     ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
+
+    s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
+                           e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
+
     if (ret != VK_SUCCESS) {
         av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
                ff_vk_ret2str(ret));
-- 
2.39.2


[-- Attachment #33: 0032-vulkan-define-VK_NO_PROTOTYPES.patch --]
[-- Type: text/x-diff, Size: 573 bytes --]

From 69c6d3dff6040feb9192be9364b064cce340ef3a Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:04:48 +0100
Subject: [PATCH 32/72] vulkan: define VK_NO_PROTOTYPES

---
 libavutil/vulkan.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index d1ea1e24fb..7927b04454 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -19,6 +19,8 @@
 #ifndef AVUTIL_VULKAN_H
 #define AVUTIL_VULKAN_H
 
+#define VK_NO_PROTOTYPES
+
 #include "pixdesc.h"
 #include "bprint.h"
 #include "hwcontext.h"
-- 
2.39.2


[-- Attachment #34: 0033-vulkan-add-additional-error-codes.patch --]
[-- Type: text/x-diff, Size: 1553 bytes --]

From 3049e9213948926ec2a3f42808f065c336eb0126 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:54:35 +0100
Subject: [PATCH 33/72] vulkan: add additional error codes

---
 libavutil/vulkan.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ad13b8f3cb..f2846e628a 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -78,6 +78,12 @@ const char *ff_vk_ret2str(VkResult res)
     CASE(VK_ERROR_TOO_MANY_OBJECTS);
     CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
     CASE(VK_ERROR_FRAGMENTED_POOL);
+    CASE(VK_ERROR_UNKNOWN);
+    CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+    CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+    CASE(VK_ERROR_FRAGMENTATION);
+    CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
+    CASE(VK_PIPELINE_COMPILE_REQUIRED);
     CASE(VK_ERROR_SURFACE_LOST_KHR);
     CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
     CASE(VK_SUBOPTIMAL_KHR);
@@ -90,6 +96,13 @@ const char *ff_vk_ret2str(VkResult res)
     CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
     CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
     CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
+    CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
+    CASE(VK_ERROR_NOT_PERMITTED_KHR);
+    CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
+    CASE(VK_THREAD_IDLE_KHR);
+    CASE(VK_THREAD_DONE_KHR);
+    CASE(VK_OPERATION_DEFERRED_KHR);
+    CASE(VK_OPERATION_NOT_DEFERRED_KHR);
     default: return "Unknown error";
     }
 #undef CASE
-- 
2.39.2


[-- Attachment #35: 0034-vulkan-fix-comment-statement-about-exec_queue-blocki.patch --]
[-- Type: text/x-diff, Size: 919 bytes --]

From 630be2276afccbac78976d7c8a0f3662b72de248 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 21:41:59 +0100
Subject: [PATCH 34/72] vulkan: fix comment statement about exec_queue blocking

---
 libavutil/vulkan.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 7927b04454..a8aa9d8a8b 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -386,9 +386,7 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
                        VkPipelineStageFlagBits in_wait_dst_flag);
 
 /**
- * Submits a command buffer to the queue for execution.
- * Will block until execution has finished in order to simplify resource
- * management.
+ * Submits a command buffer to the queue for execution. Will not block.
  */
 int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
 
-- 
2.39.2


[-- Attachment #36: 0035-vulkan-add-pNext-argument-to-ff_vk_create_buf.patch --]
[-- Type: text/x-diff, Size: 3809 bytes --]

From d9c9bfa670126ea72a95a1808beb6bd0883cbb98 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 17 Mar 2022 12:23:56 +0100
Subject: [PATCH 35/72] vulkan: add pNext argument to ff_vk_create_buf()

---
 libavfilter/vf_gblur_vulkan.c   | 2 +-
 libavfilter/vf_overlay_vulkan.c | 2 +-
 libavfilter/vf_scale_vulkan.c   | 2 +-
 libavutil/vulkan.c              | 4 ++--
 libavutil/vulkan.h              | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index d61f3c778c..c6360799a7 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -174,7 +174,7 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
     RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
     RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
 
-    RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize,
+    RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL,
                          VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
     RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
 
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index e87ee83000..bdf231f4ef 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -181,7 +181,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
         } *par;
 
         err = ff_vk_create_buf(vkctx, &s->params_buf,
-                               sizeof(*par),
+                               sizeof(*par), NULL,
                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
                                VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
         if (err)
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index c140420896..31dc35569b 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -253,7 +253,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
         }
 
         RET(ff_vk_create_buf(vkctx, &s->params_buf,
-                             sizeof(*par),
+                             sizeof(*par), NULL,
                              VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
 
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index f2846e628a..ae6adc5104 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -205,7 +205,7 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
     return 0;
 }
 
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
                      VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
 {
     int err;
@@ -215,7 +215,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
 
     VkBufferCreateInfo buf_spawn = {
         .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-        .pNext       = NULL,
+        .pNext       = pNext,
         .usage       = usage,
         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
         .size        = size, /* Gets FFALIGNED during alloc if host visible
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index a8aa9d8a8b..2311928a8c 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -393,7 +393,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
 /**
  * Create a VkBuffer with the specified parameters.
  */
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
                      VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
 
 /**
-- 
2.39.2


[-- Attachment #37: 0036-vulkan-add-ff_vk_qf_fill.patch --]
[-- Type: text/x-diff, Size: 2777 bytes --]

From da581e95cea93e9b628263aa28de945828f71967 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:03:58 +0100
Subject: [PATCH 36/72] vulkan: add ff_vk_qf_fill()

---
 libavutil/vulkan.c | 25 +++++++++++++++++++++++++
 libavutil/vulkan.h |  9 +++++++++
 2 files changed, 34 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ae6adc5104..eceef295a8 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,6 +108,31 @@ const char *ff_vk_ret2str(VkResult res)
 #undef CASE
 }
 
+void ff_vk_qf_fill(FFVulkanContext *s)
+{
+    s->nb_qfs = 0;
+
+    /* Simply fills in all unique queues into s->qfs */
+    if (s->hwctx->queue_family_index >= 0)
+        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
+    if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
+        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
+    if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
+                       s->qfs[1] != s->hwctx->queue_family_comp_index))
+        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
+    if (s->hwctx->queue_family_decode_index >= 0 &&
+         (s->qfs[0] != s->hwctx->queue_family_decode_index &&
+          s->qfs[1] != s->hwctx->queue_family_decode_index &&
+          s->qfs[2] != s->hwctx->queue_family_decode_index))
+        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
+    if (s->hwctx->queue_family_encode_index >= 0 &&
+         (s->qfs[0] != s->hwctx->queue_family_encode_index &&
+          s->qfs[1] != s->hwctx->queue_family_encode_index &&
+          s->qfs[2] != s->hwctx->queue_family_encode_index &&
+          s->qfs[3] != s->hwctx->queue_family_encode_index))
+        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+}
+
 void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
                    VkQueueFlagBits dev_family, int nb_queues)
 {
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 2311928a8c..7254c21cf7 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -203,6 +203,9 @@ typedef struct FFVulkanContext {
     AVHWFramesContext     *frames;
     AVVulkanFramesContext *hwfc;
 
+    uint32_t               qfs[5];
+    int                    nb_qfs;
+
     FFVkSPIRVCompiler     *spirv_compiler;
 
     /* Properties */
@@ -245,6 +248,12 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
  */
 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
 
+/**
+ * Setup the queue families from the hardware device context.
+ * Necessary for image creation to work.
+ */
+void ff_vk_qf_fill(FFVulkanContext *s);
+
 /**
  * Initialize a queue family with a specific number of queues.
  * If nb_queues == 0, use however many queues the queue family has.
-- 
2.39.2


[-- Attachment #38: 0037-vulkan-add-ff_vk_image_create.patch --]
[-- Type: text/x-diff, Size: 4892 bytes --]

From 9da56b3fc3169588f97f590abeecb7ead3c18202 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:05:59 +0100
Subject: [PATCH 37/72] vulkan: add ff_vk_image_create()

---
 libavutil/vulkan.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 libavutil/vulkan.h | 11 ++++++
 2 files changed, 100 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index eceef295a8..212f134466 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -401,6 +401,95 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
         vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
 }
 
+int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
+                       int width, int height, VkFormat fmt, VkImageTiling tiling,
+                       VkImageUsageFlagBits usage, VkImageCreateFlags flags,
+                       void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
+{
+    int err;
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+    AVVulkanDeviceContext *hwctx = s->hwctx;
+
+    VkExportSemaphoreCreateInfo ext_sem_info = {
+        .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
+#ifdef _WIN32
+        .handleTypes = IsWindows8OrGreater()
+            ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
+            : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
+#else
+        .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+#endif
+    };
+
+    VkSemaphoreTypeCreateInfo sem_type_info = {
+        .sType         = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+#ifdef _WIN32
+        .pNext         = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
+#else
+        .pNext         = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
+#endif
+        .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+        .initialValue  = 0,
+    };
+
+    VkSemaphoreCreateInfo sem_spawn = {
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+        .pNext = &sem_type_info,
+    };
+
+    /* Create the image */
+    VkImageCreateInfo create_info = {
+        .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .pNext                 = create_pnext,
+        .imageType             = VK_IMAGE_TYPE_2D,
+        .format                = fmt,
+        .extent.depth          = 1,
+        .mipLevels             = 1,
+        .arrayLayers           = 1,
+        .flags                 = flags,
+        .tiling                = tiling,
+        .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
+        .usage                 = usage,
+        .samples               = VK_SAMPLE_COUNT_1_BIT,
+        .pQueueFamilyIndices   = s->qfs,
+        .queueFamilyIndexCount = s->nb_qfs,
+        .sharingMode           = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+                                                 VK_SHARING_MODE_EXCLUSIVE,
+    };
+
+    ret = vk->CreateImage(hwctx->act_dev, &create_info,
+                          hwctx->alloc, &f->img[0]);
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
+               ff_vk_ret2str(ret));
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    /* Create semaphore */
+    ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
+                              hwctx->alloc, &f->sem[0]);
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
+    f->layout[0] = create_info.initialLayout;
+    f->access[0] = 0x0;
+    f->sem_value[0] = 0;
+
+    f->flags  = 0x0;
+    f->tiling = tiling;
+
+    return 0;
+
+fail:
+    return err;
+}
+
 int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
                             VkShaderStageFlagBits stage)
 {
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 7254c21cf7..69c099fa8f 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -423,6 +423,17 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
  */
 void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
 
+/**
+ * Creates an image, allocates and binds memory in the given
+ * idx value of the dst frame. If mem is non-NULL, then no memory will be
+ * allocated, but instead the given memory will be bound to the image.
+ */
+int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx,
+                       int width, int height, VkFormat fmt, VkImageTiling tiling,
+                       VkImageUsageFlagBits usage, VkImageCreateFlags flags,
+                       void *create_pnext,
+                       VkDeviceMemory *mem, void *alloc_pnext);
+
 /**
  * Frees the main Vulkan context.
  */
-- 
2.39.2


[-- Attachment #39: 0038-vulkan-expose-ff_vk_alloc_mem.patch --]
[-- Type: text/x-diff, Size: 2666 bytes --]

From 661af851afe7dcb3c2982fab953aff2941b4e5b9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:03:34 +0100
Subject: [PATCH 38/72] vulkan: expose ff_vk_alloc_mem()

---
 libavutil/vulkan.c | 15 ++++++++-------
 libavutil/vulkan.h |  7 +++++++
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 212f134466..7870de351d 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -174,9 +174,9 @@ void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
     qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
 }
 
-static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
-                        VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
-                        VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+                    VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+                    VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
 {
     VkResult ret;
     int index = -1;
@@ -225,7 +225,8 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
         return AVERROR(ENOMEM);
     }
 
-    *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
+    if (mem_flags)
+        *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
 
     return 0;
 }
@@ -279,9 +280,9 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
     if (use_ded_mem)
         ded_alloc.buffer = buf->buf;
 
-    err = vk_alloc_mem(s, &req.memoryRequirements, flags,
-                       use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
-                       &buf->flags, &buf->mem);
+    err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
+                          use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+                          &buf->flags, &buf->mem);
     if (err)
         return err;
 
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 69c099fa8f..afc8bce999 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -254,6 +254,13 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
  */
 void ff_vk_qf_fill(FFVulkanContext *s);
 
+/**
+ * Allocate device memory.
+ */
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+                    VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+                    VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+
 /**
  * Initialize a queue family with a specific number of queues.
  * If nb_queues == 0, use however many queues the queue family has.
-- 
2.39.2


[-- Attachment #40: 0039-vulkan-support-ignoring-memory-properties-when-alloc.patch --]
[-- Type: text/x-diff, Size: 1648 bytes --]

From e2a8084132631c8fad25aa5a2850deb904e42847 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 29 Nov 2022 00:43:19 +0000
Subject: [PATCH 39/72] vulkan: support ignoring memory properties when
 allocating

---
 libavutil/vulkan.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7870de351d..b1553c6537 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -188,7 +188,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
     };
 
     /* Align if we need to */
-    if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+    if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
         req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
 
     alloc_info.allocationSize = req->size;
@@ -201,7 +201,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
             continue;
 
         /* The memory type flags must include our properties */
-        if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+        if ((req_flags != UINT32_MAX) &&
+            ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
             continue;
 
         /* Found a suitable memory type */
@@ -210,7 +211,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
     }
 
     if (index < 0) {
-        av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+        av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
                req_flags);
         return AVERROR(EINVAL);
     }
-- 
2.39.2


[-- Attachment #41: 0040-vulkan-allow-alloc-pNext-in-ff_vk_create_buf.patch --]
[-- Type: text/x-diff, Size: 1878 bytes --]

From 6ac7455f51f0ea1d68b4be2c8cf3ef6f5ca9abde Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 17:43:27 +0100
Subject: [PATCH 40/72] vulkan: allow alloc pNext in ff_vk_create_buf

---
 libavutil/vulkan.c | 5 +++--
 libavutil/vulkan.h | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index b1553c6537..0bb5b1eebf 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
     return 0;
 }
 
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+                     void *pNext, void *alloc_pNext,
                      VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
 {
     int err;
@@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
     };
     VkMemoryDedicatedAllocateInfo ded_alloc = {
         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
-        .pNext = NULL,
+        .pNext = alloc_pNext,
     };
     VkMemoryDedicatedRequirements ded_req = {
         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index afc8bce999..65f24ca138 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -409,7 +409,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
 /**
  * Create a VkBuffer with the specified parameters.
  */
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+                     void *pNext, void *alloc_pNext,
                      VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
 
 /**
-- 
2.39.2


[-- Attachment #42: 0041-vulkan-do-not-wait-for-device-idle-when-destroying-b.patch --]
[-- Type: text/x-diff, Size: 786 bytes --]

From 8ce981bb551f37d27f9a11a36c4af7eb007011cb Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 16 Dec 2022 00:37:53 +0100
Subject: [PATCH 41/72] vulkan: do not wait for device idle when destroying
 buffers

This should be done explicitly.
---
 libavutil/vulkan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 0bb5b1eebf..0250f5aa39 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -396,8 +396,6 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
     if (!buf || !s->hwctx)
         return;
 
-    vk->DeviceWaitIdle(s->hwctx->act_dev);
-
     if (buf->buf != VK_NULL_HANDLE)
         vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
     if (buf->mem != VK_NULL_HANDLE)
-- 
2.39.2


[-- Attachment #43: 0042-vulkan-add-size-tracking-to-buffer-structs.patch --]
[-- Type: text/x-diff, Size: 964 bytes --]

From 0f532a85d9d3fd09d8f35f61911edc8827ed26c0 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 16 Dec 2022 01:47:42 +0100
Subject: [PATCH 42/72] vulkan: add size tracking to buffer structs

---
 libavutil/vulkan.c | 2 ++
 libavutil/vulkan.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 0250f5aa39..faf5cd5508 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -295,6 +295,8 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
         return AVERROR_EXTERNAL;
     }
 
+    buf->size = size;
+
     return 0;
 }
 
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 65f24ca138..c993263324 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -94,6 +94,7 @@ typedef struct FFVkBuffer {
     VkBuffer buf;
     VkDeviceMemory mem;
     VkMemoryPropertyFlagBits flags;
+    size_t size;
 } FFVkBuffer;
 
 typedef struct FFVkQueueFamilyCtx {
-- 
2.39.2


[-- Attachment #44: 0043-vulkan-use-device-properties-2-and-add-a-convenience.patch --]
[-- Type: text/x-diff, Size: 2388 bytes --]

From 054c1925dd67a5918fd42b894bb4ca966e60aec8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 19 Dec 2022 07:57:22 +0100
Subject: [PATCH 43/72] vulkan: use device properties 2 and add a convenience
 loader function

---
 libavutil/vulkan.c | 18 +++++++++++++++++-
 libavutil/vulkan.h |  8 +++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index faf5cd5508..8a583248d1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,6 +108,22 @@ const char *ff_vk_ret2str(VkResult res)
 #undef CASE
 }
 
+void ff_vk_load_props(FFVulkanContext *s)
+{
+    FFVulkanFunctions *vk = &s->vkfn;
+
+    s->driver_props = (VkPhysicalDeviceDriverProperties) {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+    };
+    s->props = (VkPhysicalDeviceProperties2) {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
+        .pNext = &s->driver_props,
+    };
+
+    vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
+    vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+}
+
 void ff_vk_qf_fill(FFVulkanContext *s)
 {
     s->nb_qfs = 0;
@@ -189,7 +205,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
 
     /* Align if we need to */
     if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-        req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
+        req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
 
     alloc_info.allocationSize = req->size;
 
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index c993263324..0f6efd023e 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -193,7 +193,8 @@ typedef struct FFVulkanContext {
 
     FFVulkanFunctions     vkfn;
     FFVulkanExtensions    extensions;
-    VkPhysicalDeviceProperties props;
+    VkPhysicalDeviceProperties2 props;
+    VkPhysicalDeviceDriverProperties driver_props;
     VkPhysicalDeviceMemoryProperties mprops;
 
     AVBufferRef           *device_ref;
@@ -239,6 +240,11 @@ extern const VkComponentMapping ff_comp_identity_map;
  */
 const char *ff_vk_ret2str(VkResult res);
 
+/**
+ * Loads props/mprops/driver_props
+ */
+void ff_vk_load_props(FFVulkanContext *s);
+
 /**
  * Returns 1 if the image is any sort of supported RGB
  */
-- 
2.39.2


[-- Attachment #45: 0044-vulkan-minor-indent-fix-add-support-for-synchronous-.patch --]
[-- Type: text/x-diff, Size: 2945 bytes --]

From 834645640497d6e371fa50c40ee9ef9700494851 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 05:02:50 +0100
Subject: [PATCH 44/72] vulkan: minor indent fix, add support for synchronous
 submission/waiting

---
 libavutil/vulkan.c | 20 ++++++++++++++++++--
 libavutil/vulkan.h |  9 +++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 8a583248d1..b5e08ecc46 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -564,7 +564,7 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
 
     /* Create command pool */
     ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
-                              s->hwctx->alloc, &e->pool);
+                                s->hwctx->alloc, &e->pool);
     if (ret != VK_SUCCESS) {
         av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
                ff_vk_ret2str(ret));
@@ -631,11 +631,13 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
                    ff_vk_ret2str(ret));
             return AVERROR_EXTERNAL;
         }
-    } else {
+    } else if (!q->synchronous) {
         vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
         vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
     }
 
+    q->synchronous = 0;
+
     /* Discard queue dependencies */
     ff_vk_discard_exec_deps(e);
 
@@ -788,9 +790,23 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
     for (int i = 0; i < e->sem_sig_cnt; i++)
         *e->sem_sig_val_dst[i] += 1;
 
+    q->submitted = 1;
+
     return 0;
 }
 
+void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
+{
+    FFVulkanFunctions *vk = &s->vkfn;
+    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+    if (!q->submitted)
+        return;
+
+    vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+    vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+    q->synchronous = 1;
+}
+
 int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
                            AVBufferRef **deps, int nb_deps)
 {
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 0f6efd023e..9ee9469305 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -143,6 +143,9 @@ typedef struct FFVkQueueCtx {
     VkFence fence;
     VkQueue queue;
 
+    int synchronous;
+    int submitted;
+
     /* Buffer dependencies */
     AVBufferRef **buf_deps;
     int nb_buf_deps;
@@ -413,6 +416,12 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
  */
 int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
 
+/**
+ * Wait on a command buffer's execution. Mainly useful for debugging and
+ * development.
+ */
+void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e);
+
 /**
  * Create a VkBuffer with the specified parameters.
  */
-- 
2.39.2


[-- Attachment #46: 0045-vulkan-add-support-for-queries.patch --]
[-- Type: text/x-diff, Size: 7363 bytes --]

From f97d922f523914c0d1e9748876aa3002e0f5811c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 05:03:32 +0100
Subject: [PATCH 45/72] vulkan: add support for queries

---
 libavutil/vulkan.c | 118 +++++++++++++++++++++++++++++++++++++++++++++
 libavutil/vulkan.h |  30 ++++++++++++
 2 files changed, 148 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index b5e08ecc46..de0c300c0e 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -592,6 +592,114 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
     return 0;
 }
 
+int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
+                                     int nb_queries, VkQueryType type,
+                                     int elem_64bits, void *create_pnext)
+{
+    VkResult ret;
+    size_t qd_size;
+    int nb_results = nb_queries;
+    int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
+    int status_stride = 2;
+    int result_elem_size = elem_64bits ? 8 : 4;
+    FFVulkanFunctions *vk = &s->vkfn;
+    VkQueryPoolCreateInfo query_pool_info = {
+        .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+        .pNext = create_pnext,
+        .queryType = type,
+        .queryCount = nb_queries*e->qf->nb_queues,
+    };
+
+    if (e->query.pool)
+        return AVERROR(EINVAL);
+
+    /* Video encode quieries produce two results per query */
+    if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
+        status_stride = 3; /* skip,skip,result,skip,skip,result */
+        nb_results *= 2;
+    } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
+        status_stride = 1;
+        nb_results *= 0;
+    }
+
+    qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
+
+    e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
+    if (!e->query.data)
+        return AVERROR(ENOMEM);
+
+    ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
+                              s->hwctx->alloc, &e->query.pool);
+    if (ret != VK_SUCCESS)
+        return AVERROR_EXTERNAL;
+
+    e->query.data_per_queue = qd_size;
+    e->query.nb_queries     = nb_queries;
+    e->query.nb_results     = nb_results;
+    e->query.nb_statuses    = nb_statuses;
+    e->query.elem_64bits    = elem_64bits;
+    e->query.status_stride  = status_stride;
+
+    return 0;
+}
+
+int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
+                                     int query_idx, void **data, int64_t *status)
+{
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+    uint8_t *qd;
+    int32_t *res32;
+    int64_t *res64;
+    int64_t res = 0;
+    VkQueryResultFlags qf = 0;
+    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+
+    if (!q->submitted) {
+        *data = NULL;
+        return 0;
+    }
+
+    qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
+    qf |= e->query.nb_results && e->query.nb_statuses ?
+          VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
+    qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
+    res32 = (int32_t *)(qd + e->query.nb_results*4);
+    res64 = (int64_t *)(qd + e->query.nb_results*8);
+
+    ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
+                                  query_idx,
+                                  e->query.nb_queries,
+                                  e->query.data_per_queue, qd,
+                                  e->query.elem_64bits ? 8 : 4, qf);
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (e->query.nb_statuses && e->query.elem_64bits) {
+        for (int i = 0; i < e->query.nb_queries; i++) {
+            res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
+                  res64[i] : res;
+            res64 += e->query.status_stride;
+        }
+    } else if (e->query.nb_statuses) {
+        for (int i = 0; i < e->query.nb_queries; i++) {
+            res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
+                  res32[i] : res;
+            res32 += e->query.status_stride;
+        }
+    }
+
+    if (data)
+        *data = qd;
+    if (status)
+        *status = res;
+
+    return 0;
+}
+
 void ff_vk_discard_exec_deps(FFVkExecContext *e)
 {
     FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
@@ -648,6 +756,12 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
         return AVERROR_EXTERNAL;
     }
 
+    if (e->query.pool) {
+        e->query.idx = e->qf->cur_queue*e->query.nb_queries;
+        vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
+                              e->query.idx, e->query.nb_queries);
+    }
+
     return 0;
 }
 
@@ -790,6 +904,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
     for (int i = 0; i < e->sem_sig_cnt; i++)
         *e->sem_sig_val_dst[i] += 1;
 
+    e->query.idx = e->qf->cur_queue*e->query.nb_queries;
     q->submitted = 1;
 
     return 0;
@@ -1483,7 +1598,10 @@ static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
         vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
     if (e->pool)
         vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+    if (e->query.pool)
+        vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
 
+    av_freep(&e->query.data);
     av_freep(&e->bufs);
     av_freep(&e->queues);
     av_freep(&e->sem_sig);
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 9ee9469305..e222f67b5a 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -164,6 +164,19 @@ typedef struct FFVkExecContext {
     VkCommandBuffer *bufs;
     FFVkQueueCtx *queues;
 
+    struct {
+        int           idx;
+        VkQueryPool   pool;
+        uint8_t      *data;
+
+        int           nb_queries;
+        int           nb_results;
+        int           nb_statuses;
+        int           elem_64bits;
+        size_t        data_per_queue;
+        int           status_stride;
+    } query;
+
     AVBufferRef ***deps;
     int *nb_deps;
     int *dep_alloc_size;
@@ -367,6 +380,23 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
 int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
                           FFVkQueueFamilyCtx *qf);
 
+/**
+ * Create a query pool for a command context.
+ * elem_64bits exists to troll driver devs for compliance. All results
+ * and statuses returned should be 32 bits, unless this is set, then it's 64bits.
+ */
+int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
+                                     int nb_queries, VkQueryType type,
+                                     int elem_64bits, void *create_pnext);
+
+/**
+ * Get results for query.
+ * Returns the status of the query.
+ * Sets *res to the status of the queries.
+ */
+int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
+                                     int query_idx, void **data, int64_t *status);
+
 /**
  * Begin recording to the command buffer. Previous execution must have been
  * completed, which ff_vk_submit_exec_queue() will ensure.
-- 
2.39.2


[-- Attachment #47: 0046-vulkan-add-support-for-retrieving-queue-query-and-vi.patch --]
[-- Type: text/x-diff, Size: 7602 bytes --]

From 5422a554ad592c3b4a68c34490db201577f295ee Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 17:37:51 +0100
Subject: [PATCH 46/72] vulkan: add support for retrieving queue, query and
 video properties

---
 libavutil/vulkan.c           | 87 ++++++++++++++++++++++++++++++------
 libavutil/vulkan.h           | 14 ++++--
 libavutil/vulkan_functions.h |  1 +
 3 files changed, 85 insertions(+), 17 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index de0c300c0e..d045ff83c1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res)
 #undef CASE
 }
 
-void ff_vk_load_props(FFVulkanContext *s)
+int ff_vk_load_props(FFVulkanContext *s)
 {
+    uint32_t qc = 0;
     FFVulkanFunctions *vk = &s->vkfn;
 
     s->driver_props = (VkPhysicalDeviceDriverProperties) {
@@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s)
         .pNext = &s->driver_props,
     };
 
+
     vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
     vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+    vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
+
+    if (s->qf_props)
+        return 0;
+
+    s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc);
+    if (!s->qf_props)
+        return AVERROR(ENOMEM);
+
+    s->query_props = av_mallocz(sizeof(*s->query_props)*qc);
+    if (!s->qf_props) {
+        av_freep(&s->qf_props);
+        return AVERROR(ENOMEM);
+    }
+
+    s->video_props = av_mallocz(sizeof(*s->video_props)*qc);
+    if (!s->video_props) {
+        av_freep(&s->qf_props);
+        av_freep(&s->query_props);
+        return AVERROR(ENOMEM);
+    }
+
+    for (uint32_t i = 0; i < qc; i++) {
+        s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) {
+            .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR,
+        };
+        s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) {
+            .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+            .pNext = &s->query_props[i],
+        };
+        s->qf_props[i] = (VkQueueFamilyProperties2) {
+            .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+            .pNext = &s->video_props[i],
+        };
+    }
+
+    vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
+
+    return 0;
 }
 
 void ff_vk_qf_fill(FFVulkanContext *s)
@@ -149,40 +190,54 @@ void ff_vk_qf_fill(FFVulkanContext *s)
         s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
 }
 
-void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
-                   VkQueueFlagBits dev_family, int nb_queues)
+int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
 {
+    int ret, num;
+
     switch (dev_family) {
     case VK_QUEUE_GRAPHICS_BIT:
-        qf->queue_family = s->hwctx->queue_family_index;
-        qf->actual_queues = s->hwctx->nb_graphics_queues;
+        ret = s->hwctx->queue_family_index;
+        num = s->hwctx->nb_graphics_queues;
         break;
     case VK_QUEUE_COMPUTE_BIT:
-        qf->queue_family = s->hwctx->queue_family_comp_index;
-        qf->actual_queues = s->hwctx->nb_comp_queues;
+        ret = s->hwctx->queue_family_comp_index;
+        num = s->hwctx->nb_comp_queues;
         break;
     case VK_QUEUE_TRANSFER_BIT:
-        qf->queue_family = s->hwctx->queue_family_tx_index;
-        qf->actual_queues = s->hwctx->nb_tx_queues;
+        ret = s->hwctx->queue_family_tx_index;
+        num = s->hwctx->nb_tx_queues;
         break;
     case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
-        qf->queue_family = s->hwctx->queue_family_encode_index;
-        qf->actual_queues = s->hwctx->nb_encode_queues;
+        ret = s->hwctx->queue_family_encode_index;
+        num = s->hwctx->nb_encode_queues;
         break;
     case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
-        qf->queue_family = s->hwctx->queue_family_decode_index;
-        qf->actual_queues = s->hwctx->nb_decode_queues;
+        ret = s->hwctx->queue_family_decode_index;
+        num = s->hwctx->nb_decode_queues;
         break;
     default:
         av_assert0(0); /* Should never happen */
     }
 
+    if (nb)
+        *nb = num;
+
+    return ret;
+}
+
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+                  VkQueueFlagBits dev_family, int nb_queues)
+{
+    int ret;
+
+    ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
+
     if (!nb_queues)
         qf->nb_queues = qf->actual_queues;
     else
         qf->nb_queues = nb_queues;
 
-    return;
+    return ret;
 }
 
 void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
@@ -1669,6 +1724,10 @@ void ff_vk_uninit(FFVulkanContext *s)
 {
     FFVulkanFunctions *vk = &s->vkfn;
 
+    av_freep(&s->query_props);
+    av_freep(&s->qf_props);
+    av_freep(&s->video_props);
+
     if (s->spirv_compiler)
         s->spirv_compiler->uninit(&s->spirv_compiler);
 
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index e222f67b5a..ccfa88f44f 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -212,6 +212,9 @@ typedef struct FFVulkanContext {
     VkPhysicalDeviceProperties2 props;
     VkPhysicalDeviceDriverProperties driver_props;
     VkPhysicalDeviceMemoryProperties mprops;
+    VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
+    VkQueueFamilyVideoPropertiesKHR *video_props;
+    VkQueueFamilyProperties2 *qf_props;
 
     AVBufferRef           *device_ref;
     AVHWDeviceContext     *device;
@@ -259,7 +262,7 @@ const char *ff_vk_ret2str(VkResult res);
 /**
  * Loads props/mprops/driver_props
  */
-void ff_vk_load_props(FFVulkanContext *s);
+int ff_vk_load_props(FFVulkanContext *s);
 
 /**
  * Returns 1 if the image is any sort of supported RGB
@@ -284,12 +287,17 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
                     VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
                     VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
 
+/**
+ * Get a queue family index and the number of queues. nb is optional.
+ */
+int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb);
+
 /**
  * Initialize a queue family with a specific number of queues.
  * If nb_queues == 0, use however many queues the queue family has.
  */
-void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
-                   VkQueueFlagBits dev_family, int nb_queues);
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+                  VkQueueFlagBits dev_family, int nb_queues);
 
 /**
  * Rotate through the queues in a queue family.
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 65ab560d21..fa1650e895 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -77,6 +77,7 @@ typedef enum FFVulkanExtensions {
     MACRO(1, 0, FF_VK_EXT_NO_FLAG,              GetPhysicalDeviceFormatProperties2)      \
     MACRO(1, 0, FF_VK_EXT_NO_FLAG,              GetPhysicalDeviceImageFormatProperties2) \
     MACRO(1, 0, FF_VK_EXT_NO_FLAG,              GetPhysicalDeviceQueueFamilyProperties)  \
+    MACRO(1, 0, FF_VK_EXT_NO_FLAG,              GetPhysicalDeviceQueueFamilyProperties2) \
                                                                                          \
     /* Command pool */                                                                   \
     MACRO(1, 1, FF_VK_EXT_NO_FLAG,              CreateCommandPool)                       \
-- 
2.39.2


[-- Attachment #48: 0047-vulkan-return-current-queue-index-from-ff_vk_qf_rota.patch --]
[-- Type: text/x-diff, Size: 1290 bytes --]

From 4632426c65f136ef70c4ab854a1076e1d1c868ff Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 28 Dec 2022 05:55:53 +0100
Subject: [PATCH 47/72] vulkan: return current queue index from
 ff_vk_qf_rotate()

---
 libavutil/vulkan.c | 3 ++-
 libavutil/vulkan.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index d045ff83c1..cb8e08e02f 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -240,9 +240,10 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
     return ret;
 }
 
-void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
+int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
 {
     qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
+    return qf->cur_queue;
 }
 
 int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index ccfa88f44f..dd1bc9c440 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -302,7 +302,7 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
 /**
  * Rotate through the queues in a queue family.
  */
-void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
+int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
 
 /**
  * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
-- 
2.39.2


[-- Attachment #49: 0048-vulkan-rewrite-dependency-handling-code.patch --]
[-- Type: text/x-diff, Size: 82373 bytes --]

From c1e607011ac764b46875add61c533ab2e49ab00e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 29 Dec 2022 21:16:21 +0100
Subject: [PATCH 48/72] vulkan: rewrite dependency handling code

---
 libavutil/vulkan.c | 1350 ++++++++++++++++++++------------------------
 libavutil/vulkan.h |  382 +++++--------
 2 files changed, 749 insertions(+), 983 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index cb8e08e02f..17a5bd6f3f 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -27,27 +27,6 @@
 #include "vulkan_shaderc.c"
 #endif
 
-/* Generic macro for creating contexts which need to keep their addresses
- * if another context is created. */
-#define FN_CREATING(ctx, type, shortname, array, num)                          \
-static av_always_inline type *create_ ##shortname(ctx *dctx)                   \
-{                                                                              \
-    type **array, *sctx = av_mallocz(sizeof(*sctx));                           \
-    if (!sctx)                                                                 \
-        return NULL;                                                           \
-                                                                               \
-    array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
-    if (!array) {                                                              \
-        av_free(sctx);                                                         \
-        return NULL;                                                           \
-    }                                                                          \
-                                                                               \
-    dctx->array = array;                                                       \
-    dctx->array[dctx->num++] = sctx;                                           \
-                                                                               \
-    return sctx;                                                               \
-}
-
 const VkComponentMapping ff_comp_identity_map = {
     .r = VK_COMPONENT_SWIZZLE_IDENTITY,
     .g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -165,32 +144,7 @@ int ff_vk_load_props(FFVulkanContext *s)
     return 0;
 }
 
-void ff_vk_qf_fill(FFVulkanContext *s)
-{
-    s->nb_qfs = 0;
-
-    /* Simply fills in all unique queues into s->qfs */
-    if (s->hwctx->queue_family_index >= 0)
-        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
-    if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
-        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
-    if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
-                       s->qfs[1] != s->hwctx->queue_family_comp_index))
-        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
-    if (s->hwctx->queue_family_decode_index >= 0 &&
-         (s->qfs[0] != s->hwctx->queue_family_decode_index &&
-          s->qfs[1] != s->hwctx->queue_family_decode_index &&
-          s->qfs[2] != s->hwctx->queue_family_decode_index))
-        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
-    if (s->hwctx->queue_family_encode_index >= 0 &&
-         (s->qfs[0] != s->hwctx->queue_family_encode_index &&
-          s->qfs[1] != s->hwctx->queue_family_encode_index &&
-          s->qfs[2] != s->hwctx->queue_family_encode_index &&
-          s->qfs[3] != s->hwctx->queue_family_encode_index))
-        s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
-}
-
-int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
+static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
 {
     int ret, num;
 
@@ -226,790 +180,760 @@ int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
 }
 
 int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
-                  VkQueueFlagBits dev_family, int nb_queues)
+                  VkQueueFlagBits dev_family)
 {
-    int ret;
-
-    ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
-
-    if (!nb_queues)
-        qf->nb_queues = qf->actual_queues;
-    else
-        qf->nb_queues = nb_queues;
-
-    return ret;
-}
+    /* Fill in queue families from context if not done yet */
+    if (!s->nb_qfs) {
+        s->nb_qfs = 0;
+
+        /* Simply fills in all unique queues into s->qfs */
+        if (s->hwctx->queue_family_index >= 0)
+            s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
+        if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
+            s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
+        if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
+                           s->qfs[1] != s->hwctx->queue_family_comp_index))
+            s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
+        if (s->hwctx->queue_family_decode_index >= 0 &&
+             (s->qfs[0] != s->hwctx->queue_family_decode_index &&
+              s->qfs[1] != s->hwctx->queue_family_decode_index &&
+              s->qfs[2] != s->hwctx->queue_family_decode_index))
+            s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
+        if (s->hwctx->queue_family_encode_index >= 0 &&
+             (s->qfs[0] != s->hwctx->queue_family_encode_index &&
+              s->qfs[1] != s->hwctx->queue_family_encode_index &&
+              s->qfs[2] != s->hwctx->queue_family_encode_index &&
+              s->qfs[3] != s->hwctx->queue_family_encode_index))
+            s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+    }
 
-int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
-{
-    qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
-    return qf->cur_queue;
+    return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues));
 }
 
-int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
-                    VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
-                    VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
 {
-    VkResult ret;
-    int index = -1;
     FFVulkanFunctions *vk = &s->vkfn;
 
-    VkMemoryAllocateInfo alloc_info = {
-        .sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
-        .pNext           = alloc_extension,
-    };
-
-    /* Align if we need to */
-    if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-        req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
-
-    alloc_info.allocationSize = req->size;
-
-    /* The vulkan spec requires memory types to be sorted in the "optimal"
-     * order, so the first matching type we find will be the best/fastest one */
-    for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
-        /* The memory type must be supported by the requirements (bitfield) */
-        if (!(req->memoryTypeBits & (1 << i)))
-            continue;
-
-        /* The memory type flags must include our properties */
-        if ((req_flags != UINT32_MAX) &&
-            ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
-            continue;
+    for (int i = 0; i < pool->pool_size; i++) {
+        FFVkExecContext *e = &pool->contexts[i];
 
-        /* Found a suitable memory type */
-        index = i;
-        break;
-    }
+        if (e->fence) {
+            vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+            vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
+        }
 
-    if (index < 0) {
-        av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
-               req_flags);
-        return AVERROR(EINVAL);
+        ff_vk_exec_discard_deps(s, e);
+
+        av_free(e->frame_deps);
+        av_free(e->buf_deps);
+        av_free(e->queue_family_dst);
+        av_free(e->layout_dst);
+        av_free(e->access_dst);
+        av_free(e->frame_update);
+        av_free(e->frame_locked);
+        av_free(e->sem_sig);
+        av_free(e->sem_sig_val);
+        av_free(e->sem_sig_val_dst);
+        av_free(e->sem_wait);
+        av_free(e->sem_wait_dst);
+        av_free(e->sem_wait_val);
     }
 
-    alloc_info.memoryTypeIndex = index;
-
-    ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
-                             s->hwctx->alloc, mem);
-    if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
-               ff_vk_ret2str(ret));
-        return AVERROR(ENOMEM);
-    }
-
-    if (mem_flags)
-        *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
-
-    return 0;
+    if (pool->cmd_bufs)
+        vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool,
+                               pool->pool_size, pool->cmd_bufs);
+    if (pool->cmd_buf_pool)
+        vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc);
+    if (pool->query_pool)
+        vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc);
+
+    av_free(pool->query_data);
+    av_free(pool->cmd_bufs);
+    av_free(pool->contexts);
 }
 
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
-                     void *pNext, void *alloc_pNext,
-                     VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
+int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+                         FFVkExecPool *pool, int nb_contexts,
+                         int nb_queries, VkQueryType query_type, int query_64bit,
+                         void *query_create_pnext)
 {
     int err;
     VkResult ret;
-    int use_ded_mem;
     FFVulkanFunctions *vk = &s->vkfn;
 
-    VkBufferCreateInfo buf_spawn = {
-        .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
-        .pNext       = pNext,
-        .usage       = usage,
-        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
-        .size        = size, /* Gets FFALIGNED during alloc if host visible
-                                but should be ok */
-    };
+    VkCommandPoolCreateInfo cqueue_create;
+    VkCommandBufferAllocateInfo cbuf_create;
 
-    VkBufferMemoryRequirementsInfo2 req_desc = {
-        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
-    };
-    VkMemoryDedicatedAllocateInfo ded_alloc = {
-        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
-        .pNext = alloc_pNext,
-    };
-    VkMemoryDedicatedRequirements ded_req = {
-        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
-    };
-    VkMemoryRequirements2 req = {
-        .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
-        .pNext = &ded_req,
-    };
+    atomic_init(&pool->idx, 0);
 
-    ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+    /* Create command pool */
+    cqueue_create = (VkCommandPoolCreateInfo) {
+        .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+        .flags              = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+                              VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+        .queueFamilyIndex   = qf->queue_family,
+    };
+    ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
+                                s->hwctx->alloc, &pool->cmd_buf_pool);
     if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+        av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
                ff_vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
+        err = AVERROR_EXTERNAL;
+        goto fail;
     }
 
-    req_desc.buffer = buf->buf;
-
-    vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
-
-    /* In case the implementation prefers/requires dedicated allocation */
-    use_ded_mem = ded_req.prefersDedicatedAllocation |
-                  ded_req.requiresDedicatedAllocation;
-    if (use_ded_mem)
-        ded_alloc.buffer = buf->buf;
-
-    err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
-                          use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
-                          &buf->flags, &buf->mem);
-    if (err)
-        return err;
+    /* Allocate space for command buffers */
+    pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs));
+    if (!pool->cmd_bufs) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
 
-    ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
+    /* Allocate command buffer */
+    cbuf_create = (VkCommandBufferAllocateInfo) {
+        .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+        .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+        .commandPool        = pool->cmd_buf_pool,
+        .commandBufferCount = nb_contexts,
+    };
+    ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create,
+                                     pool->cmd_bufs);
     if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+        av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
                ff_vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
+        err = AVERROR_EXTERNAL;
+        goto fail;
     }
 
-    buf->size = size;
+    /* Query pool */
+    if (nb_queries) {
+        VkQueryPoolCreateInfo query_pool_info = {
+            .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+            .pNext = query_create_pnext,
+            .queryType = query_type,
+            .queryCount = nb_queries*nb_contexts,
+        };
+        ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
+                                  s->hwctx->alloc, &pool->query_pool);
+        if (ret != VK_SUCCESS) {
+            av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n",
+                   ff_vk_ret2str(ret));
+            err = AVERROR_EXTERNAL;
+            goto fail;
+        }
 
-    return 0;
-}
+        pool->nb_queries = nb_queries;
+        pool->query_status_stride = 2;
+        pool->query_results = nb_queries;
+        pool->query_statuses = 0; /* if radv supports it, nb_queries; */
+
+        /* Video encode quieries produce two results per query */
+        if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
+            pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */
+            pool->query_results *= 2;
+        } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
+            pool->query_status_stride = 1;
+            pool->query_results = 0;
+            pool->query_statuses = nb_queries;
+        }
 
-int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
-                      int nb_buffers, int invalidate)
-{
-    VkResult ret;
-    FFVulkanFunctions *vk = &s->vkfn;
-    VkMappedMemoryRange *inval_list = NULL;
-    int inval_count = 0;
+        pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4);
 
-    for (int i = 0; i < nb_buffers; i++) {
-        ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
-                            VK_WHOLE_SIZE, 0, (void **)&mem[i]);
-        if (ret != VK_SUCCESS) {
-            av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
-                   ff_vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
+        /* Allocate space for the query data */
+        pool->query_data = av_mallocz(nb_contexts*pool->qd_size);
+        if (!pool->query_data) {
+            err = AVERROR(ENOMEM);
+            goto fail;
         }
     }
 
-    if (!invalidate)
-        return 0;
-
-    for (int i = 0; i < nb_buffers; i++) {
-        const VkMappedMemoryRange ival_buf = {
-            .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
-            .memory = buf[i].mem,
-            .size   = VK_WHOLE_SIZE,
-        };
-        if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
-            continue;
-        inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
-                                     (++inval_count)*sizeof(*inval_list));
-        if (!inval_list)
-            return AVERROR(ENOMEM);
-        inval_list[inval_count - 1] = ival_buf;
+    /* Allocate space for the contexts */
+    pool->contexts = av_mallocz(nb_contexts*sizeof(*pool->contexts));
+    if (!pool->contexts) {
+        err = AVERROR(ENOMEM);
+        goto fail;
     }
 
-    if (inval_count) {
-        ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
-                                               inval_list);
+    pool->pool_size = nb_contexts;
+
+    /* Init contexts */
+    for (int i = 0; i < pool->pool_size; i++) {
+        FFVkExecContext *e = &pool->contexts[i];
+
+        /* Fence */
+        VkFenceCreateInfo fence_create = {
+            .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+            .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+        };
+        ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc,
+                              &e->fence);
         if (ret != VK_SUCCESS) {
-            av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+            av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n",
                    ff_vk_ret2str(ret));
             return AVERROR_EXTERNAL;
         }
+
+        e->parent = pool;
+
+        /* Query data */
+        e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i;
+        e->query_idx = nb_queries*i;
+
+        /* Command buffer */
+        e->buf = pool->cmd_bufs[i];
+
+        /* Queue index distribution */
+        e->qi = i % qf->nb_queues;
+        e->qf = qf->queue_family;
+        vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
+                           e->qi, &e->queue);
     }
 
     return 0;
+
+fail:
+    ff_vk_exec_pool_free(s, pool);
+    return err;
 }
 
-int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
-                        int flush)
+VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
+                              void **data, int64_t *status)
 {
-    int err = 0;
     VkResult ret;
     FFVulkanFunctions *vk = &s->vkfn;
-    VkMappedMemoryRange *flush_list = NULL;
-    int flush_count = 0;
+    const FFVkExecPool *pool = e->parent;
 
-    if (flush) {
-        for (int i = 0; i < nb_buffers; i++) {
-            const VkMappedMemoryRange flush_buf = {
-                .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
-                .memory = buf[i].mem,
-                .size   = VK_WHOLE_SIZE,
-            };
-            if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
-                continue;
-            flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
-                                         (++flush_count)*sizeof(*flush_list));
-            if (!flush_list)
-                return AVERROR(ENOMEM);
-            flush_list[flush_count - 1] = flush_buf;
-        }
-    }
+    int32_t *res32;
+    int64_t *res64;
+    int64_t res = 0;
+    VkQueryResultFlags qf = 0;
 
-    if (flush_count) {
-        ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
-                                          flush_list);
-        if (ret != VK_SUCCESS) {
-            av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
-                   ff_vk_ret2str(ret));
-            err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+    qf |= pool->query_64bit ?
+          VK_QUERY_RESULT_64_BIT : 0x0;
+    qf |= pool->query_statuses ?
+          VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
+
+    ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool,
+                                  e->query_idx,
+                                  pool->nb_queries,
+                                  pool->qd_size, e->query_data,
+                                  pool->query_64bit ? 8 : 4, qf);
+    if (ret != VK_SUCCESS)
+        return ret;
+
+    if (pool->query_statuses && pool->query_64bit) {
+        for (int i = 0; i < pool->query_statuses; i++) {
+            res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
+                  res64[i] : res;
+            res64 += pool->query_status_stride;
+        }
+    } else if (pool->query_statuses) {
+        for (int i = 0; i < pool->query_statuses; i++) {
+            res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
+                  res32[i] : res;
+            res32 += pool->query_status_stride;
         }
     }
 
-    for (int i = 0; i < nb_buffers; i++)
-        vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
+    if (data)
+        *data = e->query_data;
+    if (status)
+        *status = res;
 
-    return err;
+    return VK_SUCCESS;
 }
 
-void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
+FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool)
 {
-    FFVulkanFunctions *vk = &s->vkfn;
-
-    if (!buf || !s->hwctx)
-        return;
+    int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed);
+    idx %= pool->pool_size;
+    return &pool->contexts[idx];
+}
 
-    if (buf->buf != VK_NULL_HANDLE)
-        vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
-    if (buf->mem != VK_NULL_HANDLE)
-        vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
+{
+    FFVulkanFunctions *vk = &s->vkfn;
+    vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
 }
 
-int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
-                       int width, int height, VkFormat fmt, VkImageTiling tiling,
-                       VkImageUsageFlagBits usage, VkImageCreateFlags flags,
-                       void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
+int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
 {
-    int err;
     VkResult ret;
     FFVulkanFunctions *vk = &s->vkfn;
-    AVVulkanDeviceContext *hwctx = s->hwctx;
-
-    VkExportSemaphoreCreateInfo ext_sem_info = {
-        .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
-#ifdef _WIN32
-        .handleTypes = IsWindows8OrGreater()
-            ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
-            : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
-#else
-        .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
-#endif
-    };
+    const FFVkExecPool *pool = e->parent;
 
-    VkSemaphoreTypeCreateInfo sem_type_info = {
-        .sType         = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
-#ifdef _WIN32
-        .pNext         = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
-#else
-        .pNext         = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
-#endif
-        .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
-        .initialValue  = 0,
+    VkCommandBufferBeginInfo cmd_start = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
     };
 
-    VkSemaphoreCreateInfo sem_spawn = {
-        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
-        .pNext = &sem_type_info,
-    };
+    /* Create the fence and don't wait for it initially */
+    vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+    vk->ResetFences(s->hwctx->act_dev, 1, &e->fence);
 
-    /* Create the image */
-    VkImageCreateInfo create_info = {
-        .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
-        .pNext                 = create_pnext,
-        .imageType             = VK_IMAGE_TYPE_2D,
-        .format                = fmt,
-        .extent.depth          = 1,
-        .mipLevels             = 1,
-        .arrayLayers           = 1,
-        .flags                 = flags,
-        .tiling                = tiling,
-        .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
-        .usage                 = usage,
-        .samples               = VK_SAMPLE_COUNT_1_BIT,
-        .pQueueFamilyIndices   = s->qfs,
-        .queueFamilyIndexCount = s->nb_qfs,
-        .sharingMode           = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
-                                                 VK_SHARING_MODE_EXCLUSIVE,
-    };
+    /* Discard queue dependencies */
+    ff_vk_exec_discard_deps(s, e);
 
-    ret = vk->CreateImage(hwctx->act_dev, &create_info,
-                          hwctx->alloc, &f->img[0]);
+    ret = vk->BeginCommandBuffer(e->buf, &cmd_start);
     if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
+        av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
                ff_vk_ret2str(ret));
-        err = AVERROR(EINVAL);
-        goto fail;
+        return AVERROR_EXTERNAL;
     }
 
-    /* Create semaphore */
-    ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
-                              hwctx->alloc, &f->sem[0]);
-    if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
-               ff_vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
+    if (pool->nb_queries)
+        vk->CmdResetQueryPool(e->buf, pool->query_pool,
+                              e->query_idx, pool->nb_queries);
+
+    return 0;
+}
+
+void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
+{
+    for (int j = 0; j < e->nb_buf_deps; j++)
+        av_buffer_unref(&e->buf_deps[j]);
+    e->nb_buf_deps = 0;
+
+    for (int j = 0; j < e->nb_frame_deps; j++) {
+        if (e->frame_locked[j]) {
+            AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+            s->hwfc->unlock_frame(s->frames, f);
+            e->frame_locked[j] = 0;
+            e->frame_update[j] = 0;
+        }
+        av_buffer_unref(&e->frame_deps[j]);
     }
+    e->nb_frame_deps = 0;
 
-    f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
-    f->layout[0] = create_info.initialLayout;
-    f->access[0] = 0x0;
-    f->sem_value[0] = 0;
+    e->sem_wait_cnt = 0;
+    e->sem_sig_cnt = 0;
+}
 
-    f->flags  = 0x0;
-    f->tiling = tiling;
+int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
+                           AVBufferRef **deps, int nb_deps, int ref)
+{
+    AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size,
+                                        (e->nb_buf_deps + nb_deps) * sizeof(*dst));
+    if (!dst) {
+        ff_vk_exec_discard_deps(s, e);
+        return AVERROR(ENOMEM);
+    }
 
-    return 0;
+    e->buf_deps = dst;
 
-fail:
-    return err;
+    for (int i = 0; i < nb_deps; i++) {
+        e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i];
+        if (!e->buf_deps[e->nb_buf_deps]) {
+            ff_vk_exec_discard_deps(s, e);
+            return AVERROR(ENOMEM);
+        }
+        e->nb_buf_deps++;
+    }
+
+    return 0;
 }
 
-int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
-                            VkShaderStageFlagBits stage)
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+                             VkPipelineStageFlagBits in_wait_dst_flag)
 {
-    VkPushConstantRange *pc;
+    uint8_t *frame_locked;
+    uint8_t *frame_update;
+    AVBufferRef **frame_deps;
+    VkImageLayout *layout_dst;
+    uint32_t *queue_family_dst;
+    VkAccessFlagBits *access_dst;
+
+    AVVkFrame *f = (AVVkFrame *)vkfb->data;
+    int nb_images = ff_vk_count_images(f);
+
+#define ARR_REALLOC(str, arr, alloc_s, cnt)                               \
+    do {                                                                  \
+        arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
+        if (!arr) {                                                       \
+            ff_vk_exec_discard_deps(s, e);                                \
+            return AVERROR(ENOMEM);                                       \
+        }                                                                 \
+        str->arr = arr;                                                   \
+    } while (0)
+
+    for (int i = 0; i < nb_images; i++) {
+        VkSemaphore *sem_wait;
+        uint64_t *sem_wait_val;
+        VkPipelineStageFlagBits *sem_wait_dst;
+        VkSemaphore *sem_sig;
+        uint64_t *sem_sig_val;
+        uint64_t **sem_sig_val_dst;
+
+        ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
+        ARR_REALLOC(e, sem_wait_dst, &e->sem_wait_dst_alloc, e->sem_wait_cnt);
+        ARR_REALLOC(e, sem_wait_val, &e->sem_wait_val_alloc, e->sem_wait_cnt);
+        ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
+        ARR_REALLOC(e, sem_sig_val, &e->sem_sig_val_alloc, e->sem_sig_cnt);
+        ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_cnt);
 
-    pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
-                                       pl->push_consts_num + 1);
-    if (!pl->push_consts)
+        e->sem_wait[e->sem_wait_cnt] = f->sem[i];
+        e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
+        e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
+        e->sem_wait_cnt++;
+
+        e->sem_sig[e->sem_sig_cnt] = f->sem[i];
+        e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
+        e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
+        e->sem_sig_cnt++;
+    }
+
+    ARR_REALLOC(e, layout_dst,       &e->layout_dst_alloc,       e->nb_frame_deps);
+    ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps);
+    ARR_REALLOC(e, access_dst,       &e->access_dst_alloc,       e->nb_frame_deps);
+
+    ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps);
+    ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
+    ARR_REALLOC(e, frame_deps,   &e->frame_deps_alloc_size,   e->nb_frame_deps);
+
+    e->frame_deps[e->nb_frame_deps] = av_buffer_ref(vkfb);
+    if (!e->frame_deps[e->nb_frame_deps]) {
+        ff_vk_exec_discard_deps(s, e);
         return AVERROR(ENOMEM);
+    }
 
-    pc = &pl->push_consts[pl->push_consts_num++];
-    memset(pc, 0, sizeof(*pc));
+    s->hwfc->lock_frame(s->frames, f);
+    e->frame_locked[e->nb_frame_deps] = 1;
+    e->frame_update[e->nb_frame_deps] = 0;
 
-    pc->stageFlags = stage;
-    pc->offset = offset;
-    pc->size = size;
+    e->nb_frame_deps++;
 
     return 0;
 }
 
-FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
-int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
-                          FFVkQueueFamilyCtx *qf)
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e,
+                             AVBufferRef *vkfb,
+                             VkImageMemoryBarrier2 *bar)
+{
+    int i;
+    for (i = 0; i < e->nb_frame_deps; i++)
+        if (e->frame_deps[i]->data == vkfb->data)
+            break;
+    av_assert0(i < e->nb_frame_deps);
+
+    e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
+    e->access_dst[i] = bar->dstAccessMask;
+    e->layout_dst[i] = bar->newLayout;
+    e->frame_update[i] = 1;
+}
+
+int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
 {
     VkResult ret;
-    FFVkExecContext *e;
     FFVulkanFunctions *vk = &s->vkfn;
 
-    VkCommandPoolCreateInfo cqueue_create = {
-        .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
-        .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
-        .queueFamilyIndex   = qf->queue_family,
-    };
-    VkCommandBufferAllocateInfo cbuf_create = {
-        .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
-        .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
-        .commandBufferCount = qf->nb_queues,
+    VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
+        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+        .pWaitSemaphoreValues = e->sem_wait_val,
+        .pSignalSemaphoreValues = e->sem_sig_val,
+        .waitSemaphoreValueCount = e->sem_wait_cnt,
+        .signalSemaphoreValueCount = e->sem_sig_cnt,
     };
 
-    e = create_exec_ctx(s);
-    if (!e)
-        return AVERROR(ENOMEM);
+    VkSubmitInfo s_info = {
+        .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .pNext                = &s_timeline_sem_info,
 
-    e->qf = qf;
+        .commandBufferCount   = 1,
+        .pCommandBuffers      = &e->buf,
 
-    e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
-    if (!e->queues)
-        return AVERROR(ENOMEM);
+        .pWaitSemaphores      = e->sem_wait,
+        .pWaitDstStageMask    = e->sem_wait_dst,
+        .waitSemaphoreCount   = e->sem_wait_cnt,
 
-    e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
-    if (!e->bufs)
-        return AVERROR(ENOMEM);
+        .pSignalSemaphores    = e->sem_sig,
+        .signalSemaphoreCount = e->sem_sig_cnt,
+    };
 
-    /* Create command pool */
-    ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
-                                s->hwctx->alloc, &e->pool);
+    ret = vk->EndCommandBuffer(e->buf);
     if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+        av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
                ff_vk_ret2str(ret));
+        ff_vk_exec_discard_deps(s, e);
         return AVERROR_EXTERNAL;
     }
 
-    cbuf_create.commandPool = e->pool;
+    s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, e->qf, e->qi);
+    ret = vk->QueueSubmit(e->queue, 1, &s_info, e->fence);
+    s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, e->qf, e->qi);
 
-    /* Allocate command buffer */
-    ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
     if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+        av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
                ff_vk_ret2str(ret));
+        ff_vk_exec_discard_deps(s, e);
         return AVERROR_EXTERNAL;
     }
 
-    for (int i = 0; i < qf->nb_queues; i++) {
-        FFVkQueueCtx *q = &e->queues[i];
-        vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
-                           i % qf->actual_queues, &q->queue);
-    }
+    for (int i = 0; i < e->sem_sig_cnt; i++)
+        *e->sem_sig_val_dst[i] += 1;
 
-    *ctx = e;
+    /* Unlock all frames */
+    for (int j = 0; j < e->nb_frame_deps; j++) {
+        if (e->frame_locked[j]) {
+            AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+            if (e->frame_update[j]) {
+                int nb_images = ff_vk_count_images(f);
+                for (int i = 0; i < nb_images; i++) {
+                    f->layout[i] = e->layout_dst[j];
+                    f->access[i] = e->access_dst[j];
+                    f->queue_family[i] = e->queue_family_dst[j];
+                }
+            }
+            s->hwfc->unlock_frame(s->frames, f);
+            e->frame_locked[j] = 0;
+        }
+    }
 
     return 0;
 }
 
-int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
-                                     int nb_queries, VkQueryType type,
-                                     int elem_64bits, void *create_pnext)
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+                    VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+                    VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
 {
     VkResult ret;
-    size_t qd_size;
-    int nb_results = nb_queries;
-    int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
-    int status_stride = 2;
-    int result_elem_size = elem_64bits ? 8 : 4;
+    int index = -1;
     FFVulkanFunctions *vk = &s->vkfn;
-    VkQueryPoolCreateInfo query_pool_info = {
-        .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
-        .pNext = create_pnext,
-        .queryType = type,
-        .queryCount = nb_queries*e->qf->nb_queues,
-    };
 
-    if (e->query.pool)
-        return AVERROR(EINVAL);
-
-    /* Video encode quieries produce two results per query */
-    if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
-        status_stride = 3; /* skip,skip,result,skip,skip,result */
-        nb_results *= 2;
-    } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
-        status_stride = 1;
-        nb_results *= 0;
-    }
-
-    qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
-
-    e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
-    if (!e->query.data)
-        return AVERROR(ENOMEM);
-
-    ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
-                              s->hwctx->alloc, &e->query.pool);
-    if (ret != VK_SUCCESS)
-        return AVERROR_EXTERNAL;
+    VkMemoryAllocateInfo alloc_info = {
+        .sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+        .pNext           = alloc_extension,
+    };
 
-    e->query.data_per_queue = qd_size;
-    e->query.nb_queries     = nb_queries;
-    e->query.nb_results     = nb_results;
-    e->query.nb_statuses    = nb_statuses;
-    e->query.elem_64bits    = elem_64bits;
-    e->query.status_stride  = status_stride;
+    /* Align if we need to */
+    if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+        req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
 
-    return 0;
-}
+    alloc_info.allocationSize = req->size;
 
-int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
-                                     int query_idx, void **data, int64_t *status)
-{
-    VkResult ret;
-    FFVulkanFunctions *vk = &s->vkfn;
-    uint8_t *qd;
-    int32_t *res32;
-    int64_t *res64;
-    int64_t res = 0;
-    VkQueryResultFlags qf = 0;
-    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+    /* The vulkan spec requires memory types to be sorted in the "optimal"
+     * order, so the first matching type we find will be the best/fastest one */
+    for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
+        /* The memory type must be supported by the requirements (bitfield) */
+        if (!(req->memoryTypeBits & (1 << i)))
+            continue;
 
-    if (!q->submitted) {
-        *data = NULL;
-        return 0;
-    }
+        /* The memory type flags must include our properties */
+        if ((req_flags != UINT32_MAX) &&
+            ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
+            continue;
 
-    qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
-    qf |= e->query.nb_results && e->query.nb_statuses ?
-          VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
-    qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
-    res32 = (int32_t *)(qd + e->query.nb_results*4);
-    res64 = (int64_t *)(qd + e->query.nb_results*8);
-
-    ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
-                                  query_idx,
-                                  e->query.nb_queries,
-                                  e->query.data_per_queue, qd,
-                                  e->query.elem_64bits ? 8 : 4, qf);
-    if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
-               ff_vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
+        /* Found a suitable memory type */
+        index = i;
+        break;
     }
 
-    if (e->query.nb_statuses && e->query.elem_64bits) {
-        for (int i = 0; i < e->query.nb_queries; i++) {
-            res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
-                  res64[i] : res;
-            res64 += e->query.status_stride;
-        }
-    } else if (e->query.nb_statuses) {
-        for (int i = 0; i < e->query.nb_queries; i++) {
-            res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
-                  res32[i] : res;
-            res32 += e->query.status_stride;
-        }
+    if (index < 0) {
+        av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+               req_flags);
+        return AVERROR(EINVAL);
     }
 
-    if (data)
-        *data = qd;
-    if (status)
-        *status = res;
-
-    return 0;
-}
-
-void ff_vk_discard_exec_deps(FFVkExecContext *e)
-{
-    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+    alloc_info.memoryTypeIndex = index;
 
-    for (int j = 0; j < q->nb_buf_deps; j++)
-        av_buffer_unref(&q->buf_deps[j]);
-    q->nb_buf_deps = 0;
+    ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
+                             s->hwctx->alloc, mem);
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR(ENOMEM);
+    }
 
-    for (int j = 0; j < q->nb_frame_deps; j++)
-        av_frame_free(&q->frame_deps[j]);
-    q->nb_frame_deps = 0;
+    if (mem_flags)
+        *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
 
-    e->sem_wait_cnt = 0;
-    e->sem_sig_cnt = 0;
+    return 0;
 }
 
-int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+                     void *pNext, void *alloc_pNext,
+                     VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
 {
+    int err;
     VkResult ret;
+    int use_ded_mem;
     FFVulkanFunctions *vk = &s->vkfn;
-    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
 
-    VkCommandBufferBeginInfo cmd_start = {
-        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+    VkBufferCreateInfo buf_spawn = {
+        .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext       = pNext,
+        .usage       = usage,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .size        = size, /* Gets FFALIGNED during alloc if host visible
+                                but should be ok */
     };
 
-    /* Create the fence and don't wait for it initially */
-    if (!q->fence) {
-        VkFenceCreateInfo fence_spawn = {
-            .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
-        };
-        ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
-                              &q->fence);
-        if (ret != VK_SUCCESS) {
-            av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
-                   ff_vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
-        }
-    } else if (!q->synchronous) {
-        vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
-        vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+    VkBufferMemoryRequirementsInfo2 req_desc = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
+    };
+    VkMemoryDedicatedAllocateInfo ded_alloc = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+        .pNext = alloc_pNext,
+    };
+    VkMemoryDedicatedRequirements ded_req = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+    };
+    VkMemoryRequirements2 req = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+        .pNext = &ded_req,
+    };
+
+    ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
     }
 
-    q->synchronous = 0;
+    req_desc.buffer = buf->buf;
 
-    /* Discard queue dependencies */
-    ff_vk_discard_exec_deps(e);
+    vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
+
+    /* In case the implementation prefers/requires dedicated allocation */
+    use_ded_mem = ded_req.prefersDedicatedAllocation |
+                  ded_req.requiresDedicatedAllocation;
+    if (use_ded_mem)
+        ded_alloc.buffer = buf->buf;
+
+    err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
+                          use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+                          &buf->flags, &buf->mem);
+    if (err)
+        return err;
 
-    ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
+    ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
     if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
+        av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
                ff_vk_ret2str(ret));
         return AVERROR_EXTERNAL;
     }
 
-    if (e->query.pool) {
-        e->query.idx = e->qf->cur_queue*e->query.nb_queries;
-        vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
-                              e->query.idx, e->query.nb_queries);
-    }
+    buf->size = size;
 
     return 0;
 }
 
-VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
-{
-    return e->bufs[e->qf->cur_queue];
-}
-
-int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
-                       VkPipelineStageFlagBits in_wait_dst_flag)
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
+                      int nb_buffers, int invalidate)
 {
-    AVFrame **dst;
-    AVVkFrame *f = (AVVkFrame *)frame->data[0];
-    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-    AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
-    int planes = av_pix_fmt_count_planes(fc->sw_format);
-
-    for (int i = 0; i < planes; i++) {
-        e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
-                                      (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
-        if (!e->sem_wait) {
-            ff_vk_discard_exec_deps(e);
-            return AVERROR(ENOMEM);
-        }
-
-        e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
-                                          (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
-        if (!e->sem_wait_dst) {
-            ff_vk_discard_exec_deps(e);
-            return AVERROR(ENOMEM);
-        }
-
-        e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
-                                          (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
-        if (!e->sem_wait_val) {
-            ff_vk_discard_exec_deps(e);
-            return AVERROR(ENOMEM);
-        }
-
-        e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
-                                     (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
-        if (!e->sem_sig) {
-            ff_vk_discard_exec_deps(e);
-            return AVERROR(ENOMEM);
-        }
-
-        e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
-                                         (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
-        if (!e->sem_sig_val) {
-            ff_vk_discard_exec_deps(e);
-            return AVERROR(ENOMEM);
-        }
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+    VkMappedMemoryRange inval_list[64];
+    int inval_count = 0;
 
-        e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
-                                             (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
-        if (!e->sem_sig_val_dst) {
-            ff_vk_discard_exec_deps(e);
-            return AVERROR(ENOMEM);
+    for (int i = 0; i < nb_buffers; i++) {
+        ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
+                            VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+        if (ret != VK_SUCCESS) {
+            av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+                   ff_vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
         }
-
-        e->sem_wait[e->sem_wait_cnt] = f->sem[i];
-        e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
-        e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
-        e->sem_wait_cnt++;
-
-        e->sem_sig[e->sem_sig_cnt] = f->sem[i];
-        e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
-        e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
-        e->sem_sig_cnt++;
     }
 
-    dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
-                          (q->nb_frame_deps + 1) * sizeof(*dst));
-    if (!dst) {
-        ff_vk_discard_exec_deps(e);
-        return AVERROR(ENOMEM);
+    if (!invalidate)
+        return 0;
+
+    for (int i = 0; i < nb_buffers; i++) {
+        const VkMappedMemoryRange ival_buf = {
+            .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+            .memory = buf[i].mem,
+            .size   = VK_WHOLE_SIZE,
+        };
+        if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+            continue;
+        inval_list[inval_count++] = ival_buf;
     }
 
-    q->frame_deps = dst;
-    q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
-    if (!q->frame_deps[q->nb_frame_deps]) {
-        ff_vk_discard_exec_deps(e);
-        return AVERROR(ENOMEM);
+    if (inval_count) {
+        ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
+                                               inval_list);
+        if (ret != VK_SUCCESS) {
+            av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+                   ff_vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
+        }
     }
-    q->nb_frame_deps++;
 
     return 0;
 }
 
-int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
+                        int flush)
 {
+    int err = 0;
     VkResult ret;
     FFVulkanFunctions *vk = &s->vkfn;
-    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
-    VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
-        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
-        .pWaitSemaphoreValues = e->sem_wait_val,
-        .pSignalSemaphoreValues = e->sem_sig_val,
-        .waitSemaphoreValueCount = e->sem_wait_cnt,
-        .signalSemaphoreValueCount = e->sem_sig_cnt,
-    };
-
-    VkSubmitInfo s_info = {
-        .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
-        .pNext                = &s_timeline_sem_info,
-
-        .commandBufferCount   = 1,
-        .pCommandBuffers      = &e->bufs[e->qf->cur_queue],
-
-        .pWaitSemaphores      = e->sem_wait,
-        .pWaitDstStageMask    = e->sem_wait_dst,
-        .waitSemaphoreCount   = e->sem_wait_cnt,
-
-        .pSignalSemaphores    = e->sem_sig,
-        .signalSemaphoreCount = e->sem_sig_cnt,
-    };
+    VkMappedMemoryRange flush_list[64];
+    int flush_count = 0;
 
-    ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
-    if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
-               ff_vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
+    if (flush) {
+        for (int i = 0; i < nb_buffers; i++) {
+            const VkMappedMemoryRange flush_buf = {
+                .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+                .memory = buf[i].mem,
+                .size   = VK_WHOLE_SIZE,
+            };
+            if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+                continue;
+            flush_list[flush_count++] = flush_buf;
+        }
     }
 
-    s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
-                         e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
-
-    ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
-
-    s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
-                           e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
-
-    if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
-               ff_vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
+    if (flush_count) {
+        ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
+                                          flush_list);
+        if (ret != VK_SUCCESS) {
+            av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+                   ff_vk_ret2str(ret));
+            err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+        }
     }
 
-    for (int i = 0; i < e->sem_sig_cnt; i++)
-        *e->sem_sig_val_dst[i] += 1;
-
-    e->query.idx = e->qf->cur_queue*e->query.nb_queries;
-    q->submitted = 1;
+    for (int i = 0; i < nb_buffers; i++)
+        vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
 
-    return 0;
+    return err;
 }
 
-void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
+void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
 {
     FFVulkanFunctions *vk = &s->vkfn;
-    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-    if (!q->submitted)
+
+    if (!buf || !s->hwctx)
         return;
 
-    vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
-    vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
-    q->synchronous = 1;
+    if (buf->buf != VK_NULL_HANDLE)
+        vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
+    if (buf->mem != VK_NULL_HANDLE)
+        vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
 }
 
-int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
-                           AVBufferRef **deps, int nb_deps)
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+                            VkShaderStageFlagBits stage)
 {
-    AVBufferRef **dst;
-    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
-    if (!deps || !nb_deps)
-        return 0;
+    VkPushConstantRange *pc;
 
-    dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
-                          (q->nb_buf_deps + nb_deps) * sizeof(*dst));
-    if (!dst)
-        goto err;
+    pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
+                                       pl->push_consts_num + 1);
+    if (!pl->push_consts)
+        return AVERROR(ENOMEM);
 
-    q->buf_deps = dst;
+    pc = &pl->push_consts[pl->push_consts_num++];
+    memset(pc, 0, sizeof(*pc));
 
-    for (int i = 0; i < nb_deps; i++) {
-        q->buf_deps[q->nb_buf_deps] = deps[i];
-        if (!q->buf_deps[q->nb_buf_deps])
-            goto err;
-        q->nb_buf_deps++;
-    }
+    pc->stageFlags = stage;
+    pc->offset = offset;
+    pc->size = size;
 
     return 0;
-
-err:
-    ff_vk_discard_exec_deps(e);
-    return AVERROR(ENOMEM);
 }
 
-FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
+FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
                                 int unnorm_coords, VkFilter filt)
 {
     VkResult ret;
@@ -1030,10 +954,6 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
         .unnormalizedCoordinates = unnorm_coords,
     };
 
-    FFVkSampler *sctx = create_sampler(s);
-    if (!sctx)
-        return NULL;
-
     ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
                             s->hwctx->alloc, &sctx->sampler[0]);
     if (ret != VK_SUCCESS) {
@@ -1048,6 +968,13 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
     return sctx;
 }
 
+void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx)
+{
+    FFVulkanFunctions *vk = &s->vkfn;
+    vk->DestroySampler(s->hwctx->act_dev, sctx->sampler[0],
+                       s->hwctx->alloc);
+}
+
 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
 {
     if (pix_fmt == AV_PIX_FMT_ABGR   || pix_fmt == AV_PIX_FMT_BGRA   ||
@@ -1122,7 +1049,7 @@ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
     }
 
     /* Add to queue dependencies */
-    err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
+    err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
     if (err) {
         av_buffer_unref(&buf);
         return err;
@@ -1133,14 +1060,9 @@ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
     return 0;
 }
 
-FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
-FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
-                                   VkShaderStageFlags stage)
+int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
+                      VkShaderStageFlags stage)
 {
-    FFVkSPIRVShader *shd = create_shader(pl);
-    if (!shd)
-        return NULL;
-
     av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
 
     shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
@@ -1152,10 +1074,10 @@ FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
     GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y))       );
     GLSLC(0,                                                                  );
 
-    return shd;
+    return 0;
 }
 
-void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3])
 {
     shd->local_size[0] = local_size[0];
     shd->local_size[1] = local_size[1];
@@ -1166,7 +1088,7 @@ void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
                shd->local_size[0], shd->local_size[1], shd->local_size[2]);
 }
 
-void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
+void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio)
 {
     int line = 0;
     const char *p = shd->src.str;
@@ -1188,7 +1110,13 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
     av_bprint_finalize(&buf, NULL);
 }
 
-int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
+void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
+{
+    FFVulkanFunctions *vk = &s->vkfn;
+    vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
+}
+
+int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
                          const char *entrypoint)
 {
     int err;
@@ -1437,7 +1365,7 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
         return;
     }
 
-    set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+//    set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
 
     vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
                                         pl->desc_set[set_id],
@@ -1446,12 +1374,12 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
 }
 
 void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
-                            VkShaderStageFlagBits stage, int offset,
-                            size_t size, void *src)
+                            FFVulkanPipeline *pl,
+                            VkShaderStageFlagBits stage,
+                            int offset, size_t size, void *src)
 {
     FFVulkanFunctions *vk = &s->vkfn;
-
-    vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
+    vk->CmdPushConstants(e->buf, pl->pipeline_layout,
                          stage, offset, size, src);
 }
 
@@ -1558,17 +1486,8 @@ int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
     return 0;
 }
 
-FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
-FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
-{
-    FFVulkanPipeline *pl = create_pipeline(s);
-    if (pl)
-        pl->qf = qf;
-
-    return pl;
-}
-
-int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                FFVkQueueFamilyCtx *qf)
 {
     int i;
     VkResult ret;
@@ -1579,6 +1498,8 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
         .layout = pl->pipeline_layout,
     };
 
+    pl->qf = qf;
+
     for (i = 0; i < pl->shaders_num; i++) {
         if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
             pipe.stage = pl->shaders[i]->shader;
@@ -1603,73 +1524,24 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
     return 0;
 }
 
-void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
+void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
                               FFVulkanPipeline *pl)
 {
     FFVulkanFunctions *vk = &s->vkfn;
 
-    vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
+    vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
 
-    for (int i = 0; i < pl->descriptor_sets_num; i++)
-        pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+//    for (int i = 0; i < pl->descriptor_sets_num; i++)
+ //       pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
 
-    vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
+    vk->CmdBindDescriptorSets(e->buf, pl->bind_point,
                               pl->pipeline_layout, 0,
                               pl->descriptor_sets_num,
                               (VkDescriptorSet *)pl->desc_staging,
                               0, NULL);
-
-    e->bound_pl = pl;
-}
-
-static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
-{
-    FFVulkanFunctions *vk = &s->vkfn;
-
-    /* Make sure all queues have finished executing */
-    for (int i = 0; i < e->qf->nb_queues; i++) {
-        FFVkQueueCtx *q = &e->queues[i];
-
-        if (q->fence) {
-            vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
-            vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
-        }
-
-        /* Free the fence */
-        if (q->fence)
-            vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
-
-        /* Free buffer dependencies */
-        for (int j = 0; j < q->nb_buf_deps; j++)
-            av_buffer_unref(&q->buf_deps[j]);
-        av_free(q->buf_deps);
-
-        /* Free frame dependencies */
-        for (int j = 0; j < q->nb_frame_deps; j++)
-            av_frame_free(&q->frame_deps[j]);
-        av_free(q->frame_deps);
-    }
-
-    if (e->bufs)
-        vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
-    if (e->pool)
-        vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
-    if (e->query.pool)
-        vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
-
-    av_freep(&e->query.data);
-    av_freep(&e->bufs);
-    av_freep(&e->queues);
-    av_freep(&e->sem_sig);
-    av_freep(&e->sem_sig_val);
-    av_freep(&e->sem_sig_val_dst);
-    av_freep(&e->sem_wait);
-    av_freep(&e->sem_wait_dst);
-    av_freep(&e->sem_wait_val);
-    av_free(e);
 }
 
-static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
 {
     FFVulkanFunctions *vk = &s->vkfn;
 
@@ -1723,8 +1595,6 @@ static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
 
 void ff_vk_uninit(FFVulkanContext *s)
 {
-    FFVulkanFunctions *vk = &s->vkfn;
-
     av_freep(&s->query_props);
     av_freep(&s->qf_props);
     av_freep(&s->video_props);
@@ -1732,24 +1602,6 @@ void ff_vk_uninit(FFVulkanContext *s)
     if (s->spirv_compiler)
         s->spirv_compiler->uninit(&s->spirv_compiler);
 
-    for (int i = 0; i < s->exec_ctx_num; i++)
-        free_exec_ctx(s, s->exec_ctx[i]);
-    av_freep(&s->exec_ctx);
-
-    for (int i = 0; i < s->samplers_num; i++) {
-        vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
-                           s->hwctx->alloc);
-        av_free(s->samplers[i]);
-    }
-    av_freep(&s->samplers);
-
-    for (int i = 0; i < s->pipelines_num; i++)
-        free_pipeline(s, s->pipelines[i]);
-    av_freep(&s->pipelines);
-
-    av_freep(&s->scratch);
-    s->scratch_size = 0;
-
     av_buffer_unref(&s->device_ref);
     av_buffer_unref(&s->frames_ref);
 }
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index dd1bc9c440..a8f3c458fc 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -21,6 +21,8 @@
 
 #define VK_NO_PROTOTYPES
 
+#include <stdatomic.h>
+
 #include "pixdesc.h"
 #include "bprint.h"
 #include "hwcontext.h"
@@ -100,8 +102,6 @@ typedef struct FFVkBuffer {
 typedef struct FFVkQueueFamilyCtx {
     int queue_family;
     int nb_queues;
-    int cur_queue;
-    int actual_queues;
 } FFVkQueueFamilyCtx;
 
 typedef struct FFVulkanPipeline {
@@ -139,71 +139,88 @@ typedef struct FFVulkanPipeline {
     VkDescriptorPoolSize *pool_size_desc;
 } FFVulkanPipeline;
 
-typedef struct FFVkQueueCtx {
-    VkFence fence;
+typedef struct FFVkExecContext {
+    const struct FFVkExecPool *parent;
+
+    /* Queue for the execution context */
     VkQueue queue;
+    int qf;
+    int qi;
 
-    int synchronous;
-    int submitted;
+    /* Command buffer for the context */
+    VkCommandBuffer buf;
+
+    /* Fence for the command buffer */
+    VkFence fence;
+
+    void *query_data;
+    int query_idx;
 
     /* Buffer dependencies */
     AVBufferRef **buf_deps;
     int nb_buf_deps;
-    int buf_deps_alloc_size;
+    unsigned int buf_deps_alloc_size;
 
     /* Frame dependencies */
-    AVFrame **frame_deps;
+    AVBufferRef **frame_deps;
+    unsigned int frame_deps_alloc_size;
     int nb_frame_deps;
-    int frame_deps_alloc_size;
-} FFVkQueueCtx;
-
-typedef struct FFVkExecContext {
-    FFVkQueueFamilyCtx *qf;
-
-    VkCommandPool pool;
-    VkCommandBuffer *bufs;
-    FFVkQueueCtx *queues;
-
-    struct {
-        int           idx;
-        VkQueryPool   pool;
-        uint8_t      *data;
-
-        int           nb_queries;
-        int           nb_results;
-        int           nb_statuses;
-        int           elem_64bits;
-        size_t        data_per_queue;
-        int           status_stride;
-    } query;
-
-    AVBufferRef ***deps;
-    int *nb_deps;
-    int *dep_alloc_size;
-
-    FFVulkanPipeline *bound_pl;
 
     VkSemaphore *sem_wait;
-    int sem_wait_alloc; /* Allocated sem_wait */
+    unsigned int sem_wait_alloc; /* Allocated sem_wait */
     int sem_wait_cnt;
 
     uint64_t *sem_wait_val;
-    int sem_wait_val_alloc;
+    unsigned int sem_wait_val_alloc;
 
     VkPipelineStageFlagBits *sem_wait_dst;
-    int sem_wait_dst_alloc; /* Allocated sem_wait_dst */
+    unsigned int sem_wait_dst_alloc; /* Allocated sem_wait_dst */
 
     VkSemaphore *sem_sig;
-    int sem_sig_alloc; /* Allocated sem_sig */
+    unsigned int sem_sig_alloc; /* Allocated sem_sig */
     int sem_sig_cnt;
 
     uint64_t *sem_sig_val;
-    int sem_sig_val_alloc;
+    unsigned int sem_sig_val_alloc;
 
     uint64_t **sem_sig_val_dst;
-    int sem_sig_val_dst_alloc;
+    unsigned int sem_sig_val_dst_alloc;
+
+    uint8_t *frame_locked;
+    unsigned int frame_locked_alloc_size;
+
+    VkAccessFlagBits *access_dst;
+    unsigned int access_dst_alloc;
+
+    VkImageLayout *layout_dst;
+    unsigned int layout_dst_alloc;
+
+    uint32_t *queue_family_dst;
+    unsigned int queue_family_dst_alloc;
+
+    uint8_t *frame_update;
+    unsigned int frame_update_alloc_size;
 } FFVkExecContext;
 
+typedef struct FFVkExecPool {
+    FFVkQueueFamilyCtx *qf;
+    FFVkExecContext *contexts;
+    atomic_int_least64_t idx;
+
+    VkCommandPool cmd_buf_pool;
+    VkCommandBuffer *cmd_bufs;
+    int pool_size;
+
+    VkQueryPool query_pool;
+    void *query_data;
+    int query_results;
+    int query_statuses;
+    int query_64bit;
+    int query_status_stride;
+    int nb_queries;
+    size_t qd_size;
+} FFVkExecPool;
+
 typedef struct FFVulkanContext {
     const AVClass *class; /* Filters and encoders use this */
 
@@ -234,21 +251,6 @@ typedef struct FFVulkanContext {
     int                output_height;
     enum AVPixelFormat output_format;
     enum AVPixelFormat  input_format;
-
-    /* Samplers */
-    FFVkSampler **samplers;
-    int samplers_num;
-
-    /* Exec contexts */
-    FFVkExecContext **exec_ctx;
-    int exec_ctx_num;
-
-    /* Pipelines (each can have 1 shader of each type) */
-    FFVulkanPipeline **pipelines;
-    int pipelines_num;
-
-    void *scratch; /* Scratch memory used only in functions */
-    unsigned int scratch_size;
 } FFVulkanContext;
 
 /* Identity mapping - r = r, b = b, g = g, a = a */
@@ -260,244 +262,156 @@ extern const VkComponentMapping ff_comp_identity_map;
 const char *ff_vk_ret2str(VkResult res);
 
 /**
- * Loads props/mprops/driver_props
+ * Returns 1 if pixfmt is a usable RGB format.
  */
-int ff_vk_load_props(FFVulkanContext *s);
+int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
 
 /**
- * Returns 1 if the image is any sort of supported RGB
+ * Returns the format to use for images in shaders.
  */
-int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
+const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
 
 /**
- * Gets the glsl format string for a pixel format
+ * Loads props/mprops/driver_props
  */
-const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
+int ff_vk_load_props(FFVulkanContext *s);
 
 /**
- * Setup the queue families from the hardware device context.
- * Necessary for image creation to work.
+ * Loads queue families into the main context.
+ * Chooses a QF and loads it into a context.
  */
 void ff_vk_qf_fill(FFVulkanContext *s);
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+                  VkQueueFlagBits dev_family);
 
 /**
- * Allocate device memory.
+ * Allocates/frees an execution pool.
  */
-int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
-                    VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
-                    VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+                         FFVkExecPool *pool, int nb_contexts,
+                         int nb_queries, VkQueryType query_type, int query_64bit,
+                         void *query_create_pnext);
+void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool);
 
 /**
- * Get a queue family index and the number of queues. nb is optional.
+ * Retrieve an execution pool. Threadsafe.
  */
-int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb);
+FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool);
 
 /**
- * Initialize a queue family with a specific number of queues.
- * If nb_queues == 0, use however many queues the queue family has.
+ * Explicitly wait on an execution to be finished.
+ * Starting via ff_vk_exec_start() also waits on it.
  */
-int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
-                  VkQueueFlagBits dev_family, int nb_queues);
 
 /**
- * Rotate through the queues in a queue family.
+ * Performs nb_queries queries and returns their results and statuses.
+ * Execution must have been waited on to produce valid results.
+ */
+VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
+                              void **data, int64_t *status);
+
+/**
+ * Start/submit/wait an execution.
+ * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait()
+ * is not necessary (unless using it is just better).
  */
-int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
+int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e);
+int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e);
+void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
 
 /**
- * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
+ * Execution dependency management.
+ * Can attach buffers to executions that will only be unref'd once the
+ * buffer has finished executing.
+ * Adding a frame dep will *lock the frame*, until either the dependencies
+ * are discarded, the execution is submitted, or a failure happens.
+ * update_frame will update the frame's properties before it is unlocked,
+ * only if submission was successful.
  */
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords,
-                                VkFilter filt);
+int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
+                           AVBufferRef **deps, int nb_deps, int ref);
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e,
+                             AVBufferRef *vkfb, VkPipelineStageFlagBits in_wait_dst_flag);
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+                             VkImageMemoryBarrier2 *bar);
+void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
 
 /**
- * Create an imageview.
- * Guaranteed to remain alive until the queue submission has finished executing,
- * and will be destroyed after that.
+ * Create an imageview and add it as a dependency to an execution.
  */
 int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
                            VkImageView *v, VkImage img, VkFormat fmt,
                            const VkComponentMapping map);
 
 /**
- * Define a push constant for a given stage into a pipeline.
- * Must be called before the pipeline layout has been initialized.
+ * Memory/buffer/image allocation helpers.
  */
-int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
-                            VkShaderStageFlagBits stage);
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+                    VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+                    VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+                     void *pNext, void *alloc_pNext,
+                     VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
 
 /**
- * Inits a pipeline. Everything in it will be auto-freed when calling
- * ff_vk_filter_uninit().
+ * Buffer management code.
  */
-FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf);
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
+                      int nb_buffers, int invalidate);
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
+                        int flush);
+void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
 
 /**
- * Inits a shader for a specific pipeline. Will be auto-freed on uninit.
+ * Sampler management.
  */
-FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
-                                   VkShaderStageFlags stage);
+FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
+                                int unnorm_coords, VkFilter filt);
+void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx);
 
 /**
- * Writes the workgroup size for a shader.
+ * Shader management.
  */
-void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
+                      VkShaderStageFlags stage);
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio);
+int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
+                         const char *entrypoint);
+void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd);
 
 /**
- * Adds a descriptor set to the shader and registers them in the pipeline.
+ * Register a descriptor set.
+ * Update a descriptor set for execution.
  */
 int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
                              FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
                              int num, int only_print_to_shader);
-
-/**
- * Compiles the shader, entrypoint must be set to "main".
- */
-int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
-                         const char *entrypoint);
-
-/**
- * Pretty print shader, mainly used by shader compilers.
- */
-void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio);
-
-/**
- * Initializes the pipeline layout after all shaders and descriptor sets have
- * been finished.
- */
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
-
-/**
- * Initializes a compute pipeline. Will pick the first shader with the
- * COMPUTE flag set.
- */
-int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl);
-
-/**
- * Updates a descriptor set via the updaters defined.
- * Can be called immediately after pipeline creation, but must be called
- * at least once before queue submission.
- */
 void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
                                  int set_id);
 
 /**
- * Init an execution context for command recording and queue submission.
- * WIll be auto-freed on uninit.
- */
-int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
-                          FFVkQueueFamilyCtx *qf);
-
-/**
- * Create a query pool for a command context.
- * elem_64bits exists to troll driver devs for compliance. All results
- * and statuses returned should be 32 bits, unless this is set, then it's 64bits.
- */
-int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
-                                     int nb_queries, VkQueryType type,
-                                     int elem_64bits, void *create_pnext);
-
-/**
- * Get results for query.
- * Returns the status of the query.
- * Sets *res to the status of the queries.
- */
-int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
-                                     int query_idx, void **data, int64_t *status);
-
-/**
- * Begin recording to the command buffer. Previous execution must have been
- * completed, which ff_vk_submit_exec_queue() will ensure.
- */
-int ff_vk_start_exec_recording(FFVulkanContext *s,  FFVkExecContext *e);
-
-/**
- * Add a command to bind the completed pipeline and its descriptor sets.
- * Must be called after ff_vk_start_exec_recording() and before submission.
- */
-void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
-                              FFVulkanPipeline *pl);
-
-/**
- * Updates push constants.
- * Must be called after binding a pipeline if any push constants were defined.
+ * Add/update push constants for execution.
  */
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+                            VkShaderStageFlagBits stage);
 void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
-                            VkShaderStageFlagBits stage, int offset,
-                            size_t size, void *src);
+                            FFVulkanPipeline *pl,
+                            VkShaderStageFlagBits stage,
+                            int offset, size_t size, void *src);
 
 /**
- * Gets the command buffer to use for this submission from the exe context.
+ * Pipeline management.
  */
-VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e);
-
-/**
- * Adds a generic AVBufferRef as a queue depenency.
- */
-int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
-                           AVBufferRef **deps, int nb_deps);
-
-/**
- * Discards all queue dependencies
- */
-void ff_vk_discard_exec_deps(FFVkExecContext *e);
-
-/**
- * Adds a frame as a queue dependency. This also manages semaphore signalling.
- * Must be called before submission.
- */
-int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
-                       VkPipelineStageFlagBits in_wait_dst_flag);
-
-/**
- * Submits a command buffer to the queue for execution. Will not block.
- */
-int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Wait on a command buffer's execution. Mainly useful for debugging and
- * development.
- */
-void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Create a VkBuffer with the specified parameters.
- */
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
-                     void *pNext, void *alloc_pNext,
-                     VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
-
-/**
- * Maps the buffer to userspace. Set invalidate to 1 if reading the contents
- * is necessary.
- */
-int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
-                      int nb_buffers, int invalidate);
-
-/**
- * Unmaps the buffer from userspace. Set flush to 1 to write and sync.
- */
-int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
-                        int flush);
-
-/**
- * Frees a buffer.
- */
-void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
-
-/**
- * Creates an image, allocates and binds memory in the given
- * idx value of the dst frame. If mem is non-NULL, then no memory will be
- * allocated, but instead the given memory will be bound to the image.
- */
-int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx,
-                       int width, int height, VkFormat fmt, VkImageTiling tiling,
-                       VkImageUsageFlagBits usage, VkImageCreateFlags flags,
-                       void *create_pnext,
-                       VkDeviceMemory *mem, void *alloc_pnext);
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                FFVkQueueFamilyCtx *qf);
+int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
+void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
+                              FFVulkanPipeline *pl);
+void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl);
 
 /**
- * Frees the main Vulkan context.
+ * Frees main context.
  */
 void ff_vk_uninit(FFVulkanContext *s);
 
-- 
2.39.2


[-- Attachment #50: 0049-vulkan-add-ff_vk_count_images.patch --]
[-- Type: text/x-diff, Size: 779 bytes --]

From 2aad41bb35392d7f2e300857a1b0f73b873ec601 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:37:18 +0100
Subject: [PATCH 49/72] vulkan: add ff_vk_count_images()

---
 libavutil/vulkan.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index a8f3c458fc..e66ca59ef7 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -253,6 +253,15 @@ typedef struct FFVulkanContext {
     enum AVPixelFormat  input_format;
 } FFVulkanContext;
 
+static inline int ff_vk_count_images(AVVkFrame *f)
+{
+    int cnt = 0;
+    while (f->img[cnt])
+        cnt++;
+
+    return cnt;
+}
+
 /* Identity mapping - r = r, b = b, g = g, a = a */
 extern const VkComponentMapping ff_comp_identity_map;
 
-- 
2.39.2


[-- Attachment #51: 0050-vulkan-rewrite-image-handling-code.patch --]
[-- Type: text/x-diff, Size: 64405 bytes --]

From dca500204539da2a17746db4125c476a29851305 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:38:10 +0100
Subject: [PATCH 50/72] vulkan: rewrite image handling code

---
 libavutil/vulkan.c | 919 +++++++++++++++++++++++++--------------------
 libavutil/vulkan.h | 166 ++++----
 2 files changed, 612 insertions(+), 473 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 17a5bd6f3f..20ad269b0a 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -21,12 +23,6 @@
 #include "vulkan.h"
 #include "vulkan_loader.h"
 
-#if CONFIG_LIBGLSLANG
-#include "vulkan_glslang.c"
-#elif CONFIG_LIBSHADERC
-#include "vulkan_shaderc.c"
-#endif
-
 const VkComponentMapping ff_comp_identity_map = {
     .r = VK_COMPONENT_SWIZZLE_IDENTITY,
     .g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -92,15 +88,22 @@ int ff_vk_load_props(FFVulkanContext *s)
     uint32_t qc = 0;
     FFVulkanFunctions *vk = &s->vkfn;
 
+    s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
+    };
+    s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT,
+        .pNext = &s->hprops,
+    };
     s->driver_props = (VkPhysicalDeviceDriverProperties) {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+        .pNext = &s->desc_buf_props,
     };
     s->props = (VkPhysicalDeviceProperties2) {
         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
         .pNext = &s->driver_props,
     };
 
-
     vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
     vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
     vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
@@ -373,6 +376,7 @@ int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
             return AVERROR_EXTERNAL;
         }
 
+        e->idx = i;
         e->parent = pool;
 
         /* Query data */
@@ -496,17 +500,21 @@ void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
 
     for (int j = 0; j < e->nb_frame_deps; j++) {
         if (e->frame_locked[j]) {
-            AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
-            s->hwfc->unlock_frame(s->frames, f);
+            AVFrame *f = e->frame_deps[j];
+            AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+            AVVulkanFramesContext *vkfc = hwfc->hwctx;
+            AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+            vkfc->unlock_frame(hwfc, vkf);
             e->frame_locked[j] = 0;
             e->frame_update[j] = 0;
         }
-        av_buffer_unref(&e->frame_deps[j]);
+        av_frame_free(&e->frame_deps[j]);
     }
     e->nb_frame_deps = 0;
 
     e->sem_wait_cnt = 0;
     e->sem_sig_cnt = 0;
+    e->sem_sig_val_dst_cnt = 0;
 }
 
 int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
@@ -533,18 +541,25 @@ int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
     return 0;
 }
 
-int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
                              VkPipelineStageFlagBits in_wait_dst_flag)
 {
     uint8_t *frame_locked;
     uint8_t *frame_update;
-    AVBufferRef **frame_deps;
+    AVFrame **frame_deps;
     VkImageLayout *layout_dst;
     uint32_t *queue_family_dst;
     VkAccessFlagBits *access_dst;
 
-    AVVkFrame *f = (AVVkFrame *)vkfb->data;
-    int nb_images = ff_vk_count_images(f);
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
+    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+    int nb_images = ff_vk_count_images(vkf);
+
+    /* Don't add duplicates */
+    for (int i = 0; i < e->nb_frame_deps; i++)
+        if (e->frame_deps[i]->data[0] == f->data[0])
+            return 1;
 
 #define ARR_REALLOC(str, arr, alloc_s, cnt)                               \
     do {                                                                  \
@@ -569,17 +584,18 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
         ARR_REALLOC(e, sem_wait_val, &e->sem_wait_val_alloc, e->sem_wait_cnt);
         ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
         ARR_REALLOC(e, sem_sig_val, &e->sem_sig_val_alloc, e->sem_sig_cnt);
-        ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_cnt);
+        ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
 
-        e->sem_wait[e->sem_wait_cnt] = f->sem[i];
+        e->sem_wait[e->sem_wait_cnt] = vkf->sem[i];
         e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
-        e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
+        e->sem_wait_val[e->sem_wait_cnt] = vkf->sem_value[i];
         e->sem_wait_cnt++;
 
-        e->sem_sig[e->sem_sig_cnt] = f->sem[i];
-        e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
-        e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
+        e->sem_sig[e->sem_sig_cnt] = vkf->sem[i];
+        e->sem_sig_val[e->sem_sig_cnt] = vkf->sem_value[i] + 1;
+        e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i];
         e->sem_sig_cnt++;
+        e->sem_sig_val_dst_cnt++;
     }
 
     ARR_REALLOC(e, layout_dst,       &e->layout_dst_alloc,       e->nb_frame_deps);
@@ -590,13 +606,13 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
     ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
     ARR_REALLOC(e, frame_deps,   &e->frame_deps_alloc_size,   e->nb_frame_deps);
 
-    e->frame_deps[e->nb_frame_deps] = av_buffer_ref(vkfb);
+    e->frame_deps[e->nb_frame_deps] = av_frame_clone(f);
     if (!e->frame_deps[e->nb_frame_deps]) {
         ff_vk_exec_discard_deps(s, e);
         return AVERROR(ENOMEM);
     }
 
-    s->hwfc->lock_frame(s->frames, f);
+    vkfc->lock_frame(hwfc, vkf);
     e->frame_locked[e->nb_frame_deps] = 1;
     e->frame_update[e->nb_frame_deps] = 0;
 
@@ -605,22 +621,51 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
     return 0;
 }
 
-void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e,
-                             AVBufferRef *vkfb,
-                             VkImageMemoryBarrier2 *bar)
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+                             VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar)
 {
     int i;
     for (i = 0; i < e->nb_frame_deps; i++)
-        if (e->frame_deps[i]->data == vkfb->data)
+        if (e->frame_deps[i]->data[0] == f->data[0])
             break;
     av_assert0(i < e->nb_frame_deps);
 
+    /* Don't update duplicates */
+    if (nb_img_bar && !e->frame_update[i])
+        (*nb_img_bar)++;
+
     e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
     e->access_dst[i] = bar->dstAccessMask;
     e->layout_dst[i] = bar->newLayout;
     e->frame_update[i] = 1;
 }
 
+int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
+                                VkSemaphore *dst, uint64_t *dst_val,
+                                AVFrame *f)
+{
+    uint64_t **sem_sig_val_dst;
+    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
+    /* Reject unknown frames */
+    int i;
+    for (i = 0; i < e->nb_frame_deps; i++)
+        if (e->frame_deps[i]->data[0] == f->data[0])
+            break;
+    if (i == e->nb_frame_deps)
+        return AVERROR(EINVAL);
+
+    ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
+
+    *dst     = vkf->sem[0];
+    *dst_val = vkf->sem_value[0];
+
+    e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val;
+    e->sem_sig_val_dst_cnt++;
+
+    return 0;
+}
+
 int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
 {
     VkResult ret;
@@ -668,22 +713,26 @@ int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
         return AVERROR_EXTERNAL;
     }
 
-    for (int i = 0; i < e->sem_sig_cnt; i++)
+    for (int i = 0; i < e->sem_sig_val_dst_cnt; i++)
         *e->sem_sig_val_dst[i] += 1;
 
     /* Unlock all frames */
     for (int j = 0; j < e->nb_frame_deps; j++) {
         if (e->frame_locked[j]) {
-            AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+            AVFrame *f = e->frame_deps[j];
+            AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+            AVVulkanFramesContext *vkfc = hwfc->hwctx;
+            AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
             if (e->frame_update[j]) {
-                int nb_images = ff_vk_count_images(f);
+                int nb_images = ff_vk_count_images(vkf);
                 for (int i = 0; i < nb_images; i++) {
-                    f->layout[i] = e->layout_dst[j];
-                    f->access[i] = e->access_dst[j];
-                    f->queue_family[i] = e->queue_family_dst[j];
+                    vkf->layout[i] = e->layout_dst[j];
+                    vkf->access[i] = e->access_dst[j];
+                    vkf->queue_family[i] = e->queue_family_dst[j];
                 }
             }
-            s->hwfc->unlock_frame(s->frames, f);
+            vkfc->unlock_frame(hwfc, vkf);
             e->frame_locked[j] = 0;
         }
     }
@@ -767,6 +816,10 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
                                 but should be ok */
     };
 
+    VkMemoryAllocateFlagsInfo alloc_flags = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
+        .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
+    };
     VkBufferMemoryRequirementsInfo2 req_desc = {
         .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
     };
@@ -796,11 +849,18 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
     /* In case the implementation prefers/requires dedicated allocation */
     use_ded_mem = ded_req.prefersDedicatedAllocation |
                   ded_req.requiresDedicatedAllocation;
-    if (use_ded_mem)
+    if (use_ded_mem) {
         ded_alloc.buffer = buf->buf;
+        ded_alloc.pNext = alloc_pNext;
+        alloc_pNext = &ded_alloc;
+    }
 
-    err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
-                          use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+    if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+        alloc_flags.pNext = alloc_pNext;
+        alloc_pNext = &alloc_flags;
+    }
+
+    err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext,
                           &buf->flags, &buf->mem);
     if (err)
         return err;
@@ -812,6 +872,14 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
         return AVERROR_EXTERNAL;
     }
 
+    if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+        VkBufferDeviceAddressInfo address_info = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
+            .buffer = buf->buf,
+        };
+        buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
+    }
+
     buf->size = size;
 
     return 0;
@@ -933,8 +1001,8 @@ int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
     return 0;
 }
 
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
-                                int unnorm_coords, VkFilter filt)
+int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
+                       int unnorm_coords, VkFilter filt)
 {
     VkResult ret;
     FFVulkanFunctions *vk = &s->vkfn;
@@ -955,24 +1023,14 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
     };
 
     ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
-                            s->hwctx->alloc, &sctx->sampler[0]);
+                            s->hwctx->alloc, sampler);
     if (ret != VK_SUCCESS) {
         av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
                ff_vk_ret2str(ret));
-        return NULL;
+        return AVERROR_EXTERNAL;
     }
 
-    for (int i = 1; i < 4; i++)
-        sctx->sampler[i] = sctx->sampler[0];
-
-    return sctx;
-}
-
-void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx)
-{
-    FFVulkanFunctions *vk = &s->vkfn;
-    vk->DestroySampler(s->hwctx->act_dev, sctx->sampler[0],
-                       s->hwctx->alloc);
+    return 0;
 }
 
 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
@@ -995,69 +1053,131 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
 }
 
 typedef struct ImageViewCtx {
-    VkImageView view;
+    VkImageView views[AV_NUM_DATA_POINTERS];
+    int nb_views;
 } ImageViewCtx;
 
-static void destroy_imageview(void *opaque, uint8_t *data)
+static void destroy_imageviews(void *opaque, uint8_t *data)
 {
     FFVulkanContext *s = opaque;
     FFVulkanFunctions *vk = &s->vkfn;
     ImageViewCtx *iv = (ImageViewCtx *)data;
 
-    vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
+    for (int i = 0; i < iv->nb_views; i++)
+        vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
+
     av_free(iv);
 }
 
-int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
-                           VkImageView *v, VkImage img, VkFormat fmt,
-                           const VkComponentMapping map)
+int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
+                            VkImageView views[AV_NUM_DATA_POINTERS],
+                            AVFrame *f)
 {
     int err;
+    VkResult ret;
     AVBufferRef *buf;
     FFVulkanFunctions *vk = &s->vkfn;
-
-    VkImageViewCreateInfo imgview_spawn = {
-        .sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-        .pNext      = NULL,
-        .image      = img,
-        .viewType   = VK_IMAGE_VIEW_TYPE_2D,
-        .format     = fmt,
-        .components = map,
-        .subresourceRange = {
-            .aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
-            .baseMipLevel   = 0,
-            .levelCount     = 1,
-            .baseArrayLayer = 0,
-            .layerCount     = 1,
-        },
-    };
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+    const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
+    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+    const int nb_images = ff_vk_count_images(vkf);
+    const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
 
     ImageViewCtx *iv = av_mallocz(sizeof(*iv));
 
-    VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
-                                       s->hwctx->alloc, &iv->view);
-    if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
-               ff_vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
+    for (int i = 0; i < nb_planes; i++) {
+        VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+                                              VK_IMAGE_ASPECT_PLANE_0_BIT,
+                                              VK_IMAGE_ASPECT_PLANE_1_BIT,
+                                              VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
+        VkImageViewCreateInfo view_create_info = {
+            .sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+            .pNext      = NULL,
+            .image      = vkf->img[FFMIN(i, nb_images - 1)],
+            .viewType   = VK_IMAGE_VIEW_TYPE_2D,
+            .format     = rep_fmts[i],
+            .components = ff_comp_identity_map,
+            .subresourceRange = {
+                .aspectMask = plane_aspect[(nb_planes != 1) + i*(nb_planes != 1)],
+                .levelCount = 1,
+                .layerCount = 1,
+            },
+        };
+
+        ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
+                                  s->hwctx->alloc, &iv->views[i]);
+        if (ret != VK_SUCCESS) {
+            av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+                   ff_vk_ret2str(ret));
+            err = AVERROR_EXTERNAL;
+            goto fail;
+        }
+
+        iv->nb_views++;
     }
 
-    buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
+    buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0);
     if (!buf) {
-        destroy_imageview(s, (uint8_t *)iv);
-        return AVERROR(ENOMEM);
+        err = AVERROR(ENOMEM);
+        goto fail;
     }
 
     /* Add to queue dependencies */
     err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
-    if (err) {
+    if (err < 0)
         av_buffer_unref(&buf);
-        return err;
-    }
 
-    *v = iv->view;
+    memcpy(views, iv->views, nb_planes*sizeof(*views));
 
-    return 0;
+    return err;
+
+fail:
+    for (int i = 0; i < iv->nb_views; i++)
+        vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
+    av_free(iv);
+    return err;
+}
+
+void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
+                         AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
+                         VkPipelineStageFlags src_stage,
+                         VkPipelineStageFlags dst_stage,
+                         VkAccessFlagBits     new_access,
+                         VkImageLayout        new_layout,
+                         uint32_t             new_qf)
+{
+    int i, found;
+    AVVkFrame *vkf = (AVVkFrame *)pic->data[0];
+    const int nb_images = ff_vk_count_images(vkf);
+    for (i = 0; i < e->nb_frame_deps; i++)
+        if (e->frame_deps[i]->data[0] == pic->data[0])
+            break;
+    found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1;
+
+    for (int i = 0; i < nb_images; i++) {
+        bar[*nb_bar] = (VkImageMemoryBarrier2) {
+            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+            .pNext = NULL,
+            .srcStageMask = src_stage,
+            .dstStageMask = dst_stage,
+            .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i],
+            .dstAccessMask = new_access,
+            .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0],
+            .newLayout = new_layout,
+            .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0],
+            .dstQueueFamilyIndex = new_qf,
+            .image = vkf->img[i],
+            .subresourceRange = (VkImageSubresourceRange) {
+                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                .layerCount = 1,
+                .levelCount = 1,
+            },
+        };
+        *nb_bar += 1;
+    }
+
+    ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL);
 }
 
 int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
@@ -1077,11 +1197,11 @@ int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *na
     return 0;
 }
 
-void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3])
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
 {
-    shd->local_size[0] = local_size[0];
-    shd->local_size[1] = local_size[1];
-    shd->local_size[2] = local_size[2];
+    shd->local_size[0] = x;
+    shd->local_size[1] = y;
+    shd->local_size[2] = z;
 
     av_bprintf(&shd->src, "layout (local_size_x = %i, "
                "local_size_y = %i, local_size_z = %i) in;\n\n",
@@ -1113,39 +1233,21 @@ void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio)
 void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
 {
     FFVulkanFunctions *vk = &s->vkfn;
-    vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
+    av_bprint_finalize(&shd->src, NULL);
+
+    if (shd->shader.module)
+        vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
 }
 
-int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
-                         const char *entrypoint)
+int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
+                        uint8_t *spirv, size_t spirv_size, const char *entrypoint)
 {
-    int err;
     VkResult ret;
     FFVulkanFunctions *vk = &s->vkfn;
     VkShaderModuleCreateInfo shader_create;
-    uint8_t *spirv;
-    size_t spirv_size;
-    void *priv;
 
     shd->shader.pName = entrypoint;
 
-    if (!s->spirv_compiler) {
-#if CONFIG_LIBGLSLANG
-        s->spirv_compiler = ff_vk_glslang_init();
-#elif CONFIG_LIBSHADERC
-        s->spirv_compiler = ff_vk_shaderc_init();
-#else
-        return AVERROR(ENOSYS);
-#endif
-        if (!s->spirv_compiler)
-            return AVERROR(ENOMEM);
-    }
-
-    err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv,
-                                            &spirv_size, entrypoint, &priv);
-    if (err < 0)
-        return err;
-
     av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
            shd->name, spirv_size);
 
@@ -1157,11 +1259,8 @@ int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
 
     ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
                                  &shd->shader.module);
-
-    s->spirv_compiler->free_shader(s->spirv_compiler, &priv);
-
     if (ret != VK_SUCCESS) {
-        av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
+        av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n",
                ff_vk_ret2str(ret));
         return AVERROR_EXTERNAL;
     }
@@ -1190,132 +1289,88 @@ static const struct descriptor_props {
     [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER]   = { sizeof(VkBufferView),           "imageBuffer",   1, 0, 0, 0, },
 };
 
-int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
-                             FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
-                             int num, int only_print_to_shader)
+int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                      FFVkSPIRVShader *shd,
+                                      FFVulkanDescriptorSetBinding *desc, int nb,
+                                      int read_only, int print_to_shader_only)
 {
     VkResult ret;
-    VkDescriptorSetLayout *layout;
+    int has_sampler = 0;
     FFVulkanFunctions *vk = &s->vkfn;
+    FFVulkanDescriptorSet *set;
+    VkDescriptorSetLayoutCreateInfo desc_create_layout;
 
-    if (only_print_to_shader)
+    if (print_to_shader_only)
         goto print;
 
-    pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
-                                       pl->desc_layout_num + pl->qf->nb_queues);
-    if (!pl->desc_layout)
+    /* Actual layout allocated for the pipeline */
+    set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set),
+                           pl->nb_descriptor_sets + 1);
+    if (!set)
         return AVERROR(ENOMEM);
+    pl->desc_set = set;
+    set = &set[pl->nb_descriptor_sets];
+    memset(set, 0, sizeof(*set));
 
-    pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
-                                                sizeof(*pl->desc_set_initialized),
-                                                pl->descriptor_sets_num + 1);
-    if (!pl->desc_set_initialized)
+    set->binding = av_mallocz(nb*sizeof(*set->binding));
+    if (!set->binding)
         return AVERROR(ENOMEM);
 
-    pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
-    layout = &pl->desc_layout[pl->desc_layout_num];
-
-    { /* Create descriptor set layout descriptions */
-        VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
-        VkDescriptorSetLayoutBinding *desc_binding;
-
-        desc_binding = av_mallocz(sizeof(*desc_binding)*num);
-        if (!desc_binding)
-            return AVERROR(ENOMEM);
-
-        for (int i = 0; i < num; i++) {
-            desc_binding[i].binding            = i;
-            desc_binding[i].descriptorType     = desc[i].type;
-            desc_binding[i].descriptorCount    = FFMAX(desc[i].elems, 1);
-            desc_binding[i].stageFlags         = desc[i].stages;
-            desc_binding[i].pImmutableSamplers = desc[i].sampler ?
-                                                 desc[i].sampler->sampler :
-                                                 NULL;
-        }
-
-        desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
-        desc_create_layout.pBindings = desc_binding;
-        desc_create_layout.bindingCount = num;
-
-        for (int i = 0; i < pl->qf->nb_queues; i++) {
-            ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
-                                                s->hwctx->alloc, &layout[i]);
-            if (ret != VK_SUCCESS) {
-                av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
-                       "layout: %s\n", ff_vk_ret2str(ret));
-                av_free(desc_binding);
-                return AVERROR_EXTERNAL;
-            }
-        }
-
-        av_free(desc_binding);
+    set->binding_offset = av_mallocz(nb*sizeof(*set->binding_offset));
+    if (!set->binding_offset) {
+        av_freep(&set->binding);
+        return AVERROR(ENOMEM);
     }
 
-    { /* Pool each descriptor by type and update pool counts */
-        for (int i = 0; i < num; i++) {
-            int j;
-            for (j = 0; j < pl->pool_size_desc_num; j++)
-                if (pl->pool_size_desc[j].type == desc[i].type)
-                    break;
-            if (j >= pl->pool_size_desc_num) {
-                pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
-                                                      sizeof(*pl->pool_size_desc),
-                                                      ++pl->pool_size_desc_num);
-                if (!pl->pool_size_desc)
-                    return AVERROR(ENOMEM);
-                memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
-            }
-            pl->pool_size_desc[j].type             = desc[i].type;
-            pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
-        }
-    }
+    desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .bindingCount = nb,
+        .pBindings = set->binding,
+        .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
+    };
 
-    { /* Create template creation struct */
-        VkDescriptorUpdateTemplateCreateInfo *dt;
-        VkDescriptorUpdateTemplateEntry *des_entries;
+    for (int i = 0; i < nb; i++) {
+        set->binding[i].binding            = i;
+        set->binding[i].descriptorType     = desc[i].type;
+        set->binding[i].descriptorCount    = FFMAX(desc[i].elems, 1);
+        set->binding[i].stageFlags         = desc[i].stages;
+        set->binding[i].pImmutableSamplers = desc[i].samplers;
 
-        /* Freed after descriptor set initialization */
-        des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
-        if (!des_entries)
-            return AVERROR(ENOMEM);
+        if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+            desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+            has_sampler |= 1;
+    }
 
-        for (int i = 0; i < num; i++) {
-            des_entries[i].dstBinding      = i;
-            des_entries[i].descriptorType  = desc[i].type;
-            des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
-            des_entries[i].dstArrayElement = 0;
-            des_entries[i].offset          = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
-            des_entries[i].stride          = descriptor_props[desc[i].type].struct_size;
-        }
+    set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
+                 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+    if (has_sampler)
+        set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
 
-        pl->desc_template_info = av_realloc_array(pl->desc_template_info,
-                                                  sizeof(*pl->desc_template_info),
-                                                  pl->total_descriptor_sets + pl->qf->nb_queues);
-        if (!pl->desc_template_info)
-            return AVERROR(ENOMEM);
+    ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+                                        s->hwctx->alloc, &set->layout);
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
 
-        dt = &pl->desc_template_info[pl->total_descriptor_sets];
-        memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
+    vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size);
 
-        for (int i = 0; i < pl->qf->nb_queues; i++) {
-            dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
-            dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
-            dt[i].descriptorSetLayout = layout[i];
-            dt[i].pDescriptorUpdateEntries = des_entries;
-            dt[i].descriptorUpdateEntryCount = num;
-        }
-    }
+    set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment);
 
-    pl->descriptor_sets_num++;
+    for (int i = 0; i < nb; i++)
+        vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout,
+                                                   i, &set->binding_offset[i]);
 
-    pl->desc_layout_num += pl->qf->nb_queues;
-    pl->total_descriptor_sets += pl->qf->nb_queues;
+    set->read_only = read_only;
+    set->nb_bindings = nb;
+    pl->nb_descriptor_sets++;
 
 print:
     /* Write shader info */
-    for (int i = 0; i < num; i++) {
+    for (int i = 0; i < nb; i++) {
         const struct descriptor_props *prop = &descriptor_props[desc[i].type];
-        GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
+        GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i);
 
         if (desc[i].mem_layout)
             GLSLA(", %s", desc[i].mem_layout);
@@ -1347,171 +1402,260 @@ print:
     return 0;
 }
 
-void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
-                                 int set_id)
+int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
+                                 FFVulkanPipeline *pl)
 {
-    FFVulkanFunctions *vk = &s->vkfn;
+    int err;
 
-    /* If a set has never been updated, update all queues' sets. */
-    if (!pl->desc_set_initialized[set_id]) {
-        for (int i = 0; i < pl->qf->nb_queues; i++) {
-            int idx = set_id*pl->qf->nb_queues + i;
-            vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
-                                                pl->desc_set[idx],
-                                                pl->desc_template[idx],
-                                                s);
-        }
-        pl->desc_set_initialized[set_id] = 1;
-        return;
-    }
+    pl->desc_bind = av_mallocz(pl->nb_descriptor_sets*sizeof(*pl->desc_bind));
+    if (!pl->desc_bind)
+        return AVERROR(ENOMEM);
+
+    pl->bound_buffer_indices = av_mallocz(pl->nb_descriptor_sets*
+                                          sizeof(*pl->bound_buffer_indices));
+    if (!pl->bound_buffer_indices)
+        return AVERROR(ENOMEM);
 
-//    set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+    for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+        FFVulkanDescriptorSet *set = &pl->desc_set[i];
+        int nb = set->read_only ? 1 : pool->pool_size;
+
+        err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb,
+                               NULL, NULL, set->usage,
+                               VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                               VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+        if (err < 0)
+            return err;
+
+        err = ff_vk_map_buffers(s, &set->buf, &set->desc_mem, 1, 0);
+        if (err < 0)
+            return err;
+
+        pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) {
+            .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT,
+            .usage = set->usage,
+            .address = set->buf.address,
+        };
+
+        pl->bound_buffer_indices[i] = i;
+    }
 
-    vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
-                                        pl->desc_set[set_id],
-                                        pl->desc_template[set_id],
-                                        s);
+    return 0;
 }
 
-void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
-                            FFVulkanPipeline *pl,
-                            VkShaderStageFlagBits stage,
-                            int offset, size_t size, void *src)
+static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e,
+                                         FFVulkanDescriptorSet *set,
+                                         int bind_idx, int array_idx,
+                                         VkDescriptorGetInfoEXT *desc_get_info,
+                                         size_t desc_size)
 {
     FFVulkanFunctions *vk = &s->vkfn;
-    vk->CmdPushConstants(e->buf, pl->pipeline_layout,
-                         stage, offset, size, src);
+    const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx;
+    void *desc = set->desc_mem +                 /* Base */
+                 exec_offset +                   /* Execution context */
+                 set->binding_offset[bind_idx] + /* Descriptor binding */
+                 array_idx*desc_size;            /* Array position */
+
+    vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc);
 }
 
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
+int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                 FFVkExecContext *e, int set, int bind, int offs,
+                                 VkSampler *sampler)
 {
-    VkResult ret;
-    FFVulkanFunctions *vk = &s->vkfn;
+    FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+    VkDescriptorGetInfoEXT desc_get_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+        .type = desc_set->binding[bind].descriptorType,
+    };
 
-    pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
-    if (!pl->desc_staging)
-        return AVERROR(ENOMEM);
+    switch (desc_get_info.type) {
+    case VK_DESCRIPTOR_TYPE_SAMPLER:
+        desc_get_info.data.pSampler = sampler;
+        break;
+    default:
+        av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+               set, bind, desc_get_info.type);
+        return AVERROR(EINVAL);
+        break;
+    };
 
-    { /* Init descriptor set pool */
-        VkDescriptorPoolCreateInfo pool_create_info = {
-            .sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
-            .poolSizeCount = pl->pool_size_desc_num,
-            .pPoolSizes    = pl->pool_size_desc,
-            .maxSets       = pl->total_descriptor_sets,
-        };
+    update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info,
+                          s->desc_buf_props.samplerDescriptorSize);
 
-        ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
-                                       s->hwctx->alloc, &pl->desc_pool);
-        av_freep(&pl->pool_size_desc);
-        if (ret != VK_SUCCESS) {
-            av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
-                   "pool: %s\n", ff_vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
-        }
-    }
+    return 0;
+}
 
-    { /* Allocate descriptor sets */
-        VkDescriptorSetAllocateInfo alloc_info = {
-            .sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
-            .descriptorPool     = pl->desc_pool,
-            .descriptorSetCount = pl->total_descriptor_sets,
-            .pSetLayouts        = pl->desc_layout,
-        };
+int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
+                               FFVkExecContext *e, int set, int bind, int offs,
+                               VkImageView view, VkImageLayout layout, VkSampler sampler)
+{
+    FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+    VkDescriptorGetInfoEXT desc_get_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+        .type = desc_set->binding[bind].descriptorType,
+    };
+    VkDescriptorImageInfo desc_img_info = {
+        .imageView = view,
+        .sampler = sampler,
+        .imageLayout = layout,
+    };
+    size_t desc_size;
 
-        pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
-        if (!pl->desc_set)
-            return AVERROR(ENOMEM);
+    switch (desc_get_info.type) {
+    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+        desc_get_info.data.pSampledImage = &desc_img_info;
+        desc_size = s->desc_buf_props.sampledImageDescriptorSize;
+        break;
+    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+        desc_get_info.data.pStorageImage = &desc_img_info;
+        desc_size = s->desc_buf_props.storageImageDescriptorSize;
+        break;
+    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+        desc_get_info.data.pInputAttachmentImage = &desc_img_info;
+        desc_size = s->desc_buf_props.inputAttachmentDescriptorSize;
+        break;
+    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+        desc_get_info.data.pCombinedImageSampler = &desc_img_info;
+        desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize;
+        break;
+    default:
+        av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+               set, bind, desc_get_info.type);
+        return AVERROR(EINVAL);
+        break;
+    };
 
-        ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
-                                         pl->desc_set);
-        if (ret != VK_SUCCESS) {
-            av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
-                   ff_vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
-        }
-    }
+    update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
 
-    { /* Finally create the pipeline layout */
-        VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
-            .sType                  = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
-            .pSetLayouts            = (VkDescriptorSetLayout *)pl->desc_staging,
-            .pushConstantRangeCount = pl->push_consts_num,
-            .pPushConstantRanges    = pl->push_consts,
-        };
+    return 0;
+}
 
-        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
-            pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
+int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                FFVkExecContext *e, int set, int bind, int offs,
+                                VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt)
+{
+    FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+    VkDescriptorGetInfoEXT desc_get_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+        .type = desc_set->binding[bind].descriptorType,
+    };
+    VkDescriptorAddressInfoEXT desc_buf_info = {
+        .address = addr,
+        .range = len,
+        .format = fmt,
+    };
+    size_t desc_size;
 
-        ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
-                                       s->hwctx->alloc, &pl->pipeline_layout);
-        av_freep(&pl->push_consts);
-        pl->push_consts_num = 0;
-        if (ret != VK_SUCCESS) {
-            av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
-                   ff_vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
-        }
-    }
+    switch (desc_get_info.type) {
+    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+        desc_get_info.data.pUniformBuffer = &desc_buf_info;
+        desc_size = s->desc_buf_props.uniformBufferDescriptorSize;
+        break;
+    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+        desc_get_info.data.pStorageBuffer = &desc_buf_info;
+        desc_size = s->desc_buf_props.storageBufferDescriptorSize;
+        break;
+    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+        desc_get_info.data.pUniformTexelBuffer = &desc_buf_info;
+        desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize;
+        break;
+    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+        desc_get_info.data.pStorageTexelBuffer = &desc_buf_info;
+        desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize;
+        break;
+    default:
+        av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+               set, bind, desc_get_info.type);
+        return AVERROR(EINVAL);
+        break;
+    };
 
-    { /* Descriptor template (for tightly packed descriptors) */
-        VkDescriptorUpdateTemplateCreateInfo *dt;
+    update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
 
-        pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
-        if (!pl->desc_template)
-            return AVERROR(ENOMEM);
+    return 0;
+}
 
-        /* Create update templates for the descriptor sets */
-        for (int i = 0; i < pl->total_descriptor_sets; i++) {
-            dt = &pl->desc_template_info[i];
-            dt->pipelineLayout = pl->pipeline_layout;
-            ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
-                                                     dt, s->hwctx->alloc,
-                                                     &pl->desc_template[i]);
-            if (ret != VK_SUCCESS) {
-                av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
-                       "template: %s\n", ff_vk_ret2str(ret));
-                return AVERROR_EXTERNAL;
-            }
-        }
+void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                       FFVkExecContext *e, AVFrame *f,
+                                       VkImageView *views, int set, int binding,
+                                       VkImageLayout layout, VkSampler sampler)
+{
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+    const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
 
-        /* Free the duplicated memory used for the template entries */
-        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
-            dt = &pl->desc_template_info[i];
-            av_free((void *)dt->pDescriptorUpdateEntries);
-        }
+    for (int i = 0; i < nb_planes; i++)
+        ff_vk_set_descriptor_image(s, pl, e, set, binding, i,
+                                   views[i], layout, sampler);
+}
+
+void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
+                            FFVulkanPipeline *pl,
+                            VkShaderStageFlagBits stage,
+                            int offset, size_t size, void *src)
+{
+    FFVulkanFunctions *vk = &s->vkfn;
+    vk->CmdPushConstants(e->buf, pl->pipeline_layout,
+                         stage, offset, size, src);
+}
+
+static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
+{
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+    VkPipelineLayoutCreateInfo pipeline_layout_info;
+
+    VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets*
+                                                    sizeof(desc_layouts));
+    if (!desc_layouts)
+        return AVERROR(ENOMEM);
+
+    for (int i = 0; i < pl->nb_descriptor_sets; i++)
+        desc_layouts[i] = pl->desc_set[i].layout;
+
+    /* Finally create the pipeline layout */
+    pipeline_layout_info = (VkPipelineLayoutCreateInfo) {
+        .sType                  = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .pSetLayouts            = desc_layouts,
+        .setLayoutCount         = pl->nb_descriptor_sets,
+        .pushConstantRangeCount = pl->push_consts_num,
+        .pPushConstantRanges    = pl->push_consts,
+    };
 
-        av_freep(&pl->desc_template_info);
+    ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info,
+                                   s->hwctx->alloc, &pl->pipeline_layout);
+    av_free(desc_layouts);
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
     }
 
     return 0;
 }
 
 int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
-                                FFVkQueueFamilyCtx *qf)
+                                FFVkSPIRVShader *shd)
 {
-    int i;
+    int err;
     VkResult ret;
     FFVulkanFunctions *vk = &s->vkfn;
 
-    VkComputePipelineCreateInfo pipe = {
+    VkComputePipelineCreateInfo pipeline_create_info;
+
+    err = init_pipeline_layout(s, pl);
+    if (err < 0)
+        return err;
+
+    pipeline_create_info = (VkComputePipelineCreateInfo) {
         .sType  = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+        .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
         .layout = pl->pipeline_layout,
+        .stage = shd->shader,
     };
 
-    pl->qf = qf;
-
-    for (i = 0; i < pl->shaders_num; i++) {
-        if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
-            pipe.stage = pl->shaders[i]->shader;
-            break;
-        }
-    }
-    if (i == pl->shaders_num) {
-        av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
-        return AVERROR(EINVAL);
-    }
-
-    ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
+    ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1,
+                                     &pipeline_create_info,
                                      s->hwctx->alloc, &pl->pipeline);
     if (ret != VK_SUCCESS) {
         av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
@@ -1520,77 +1664,57 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
     }
 
     pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
+    pl->wg_size[0] = shd->local_size[0];
+    pl->wg_size[1] = shd->local_size[1];
+    pl->wg_size[2] = shd->local_size[2];
 
     return 0;
 }
 
-void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
+void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
                               FFVulkanPipeline *pl)
 {
     FFVulkanFunctions *vk = &s->vkfn;
+    VkDeviceSize offsets[1024];
 
-    vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
-
-//    for (int i = 0; i < pl->descriptor_sets_num; i++)
- //       pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+    for (int i = 0; i < pl->nb_descriptor_sets; i++)
+        offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx;
 
-    vk->CmdBindDescriptorSets(e->buf, pl->bind_point,
-                              pl->pipeline_layout, 0,
-                              pl->descriptor_sets_num,
-                              (VkDescriptorSet *)pl->desc_staging,
-                              0, NULL);
+    /* Bind pipeline */
+    vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
+    /* Bind descriptor buffers */
+    vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind);
+    /* Binding offsets */
+    vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout,
+                                         0, pl->nb_descriptor_sets,
+                                         pl->bound_buffer_indices, offsets);
 }
 
 void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
 {
     FFVulkanFunctions *vk = &s->vkfn;
 
-    for (int i = 0; i < pl->shaders_num; i++) {
-        FFVkSPIRVShader *shd = pl->shaders[i];
-        av_bprint_finalize(&shd->src, NULL);
-        vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
-                                s->hwctx->alloc);
-        av_free(shd);
-    }
-
-    vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
-    vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
-                              s->hwctx->alloc);
+    if (pl->pipeline)
+        vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
+    if (pl->pipeline_layout)
+        vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
+                                  s->hwctx->alloc);
 
-    for (int i = 0; i < pl->desc_layout_num; i++) {
-        if (pl->desc_template && pl->desc_template[i])
-            vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
-                                                s->hwctx->alloc);
-        if (pl->desc_layout && pl->desc_layout[i])
-            vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
+    for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+        FFVulkanDescriptorSet *set = &pl->desc_set[i];
+        ff_vk_unmap_buffers(s, &set->buf, 1, 0);
+        ff_vk_free_buf(s, &set->buf);
+        if (set->layout)
+            vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout,
                                            s->hwctx->alloc);
+        av_free(set->binding);
+        av_free(set->binding_offset);
     }
 
-    /* Also frees the descriptor sets */
-    if (pl->desc_pool)
-        vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
-                                  s->hwctx->alloc);
-
-    av_freep(&pl->desc_staging);
     av_freep(&pl->desc_set);
-    av_freep(&pl->shaders);
-    av_freep(&pl->desc_layout);
-    av_freep(&pl->desc_template);
-    av_freep(&pl->desc_set_initialized);
+    av_freep(&pl->desc_bind);
     av_freep(&pl->push_consts);
     pl->push_consts_num = 0;
-
-    /* Only freed in case of failure */
-    av_freep(&pl->pool_size_desc);
-    if (pl->desc_template_info) {
-        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
-            VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
-            av_free((void *)dt->pDescriptorUpdateEntries);
-        }
-        av_freep(&pl->desc_template_info);
-    }
-
-    av_free(pl);
 }
 
 void ff_vk_uninit(FFVulkanContext *s)
@@ -1599,9 +1723,6 @@ void ff_vk_uninit(FFVulkanContext *s)
     av_freep(&s->qf_props);
     av_freep(&s->video_props);
 
-    if (s->spirv_compiler)
-        s->spirv_compiler->uninit(&s->spirv_compiler);
-
     av_buffer_unref(&s->device_ref);
     av_buffer_unref(&s->frames_ref);
 }
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index e66ca59ef7..1321fb8ba8 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -30,11 +30,6 @@
 #include "hwcontext_vulkan.h"
 #include "vulkan_loader.h"
 
-#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT      |           \
-                                   VK_IMAGE_USAGE_STORAGE_BIT      |           \
-                                   VK_IMAGE_USAGE_TRANSFER_SRC_BIT |           \
-                                   VK_IMAGE_USAGE_TRANSFER_DST_BIT)
-
 /* GLSL management macros */
 #define INDENT(N) INDENT_##N
 #define INDENT_0
@@ -59,6 +54,8 @@
             goto fail;                                                         \
     } while (0)
 
+#define DUP_SAMPLER(x) { x, x, x, x }
+
 typedef struct FFVkSPIRVShader {
     const char *name;                       /* Name for id/debugging purposes */
     AVBPrint src;
@@ -66,19 +63,6 @@ typedef struct FFVkSPIRVShader {
     VkPipelineShaderStageCreateInfo shader;
 } FFVkSPIRVShader;
 
-typedef struct FFVkSPIRVCompiler {
-    void *priv;
-    int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx,
-                          struct FFVkSPIRVShader *shd, uint8_t **data,
-                          size_t *size, const char *entrypoint, void **opaque);
-    void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque);
-    void (*uninit)(struct FFVkSPIRVCompiler **ctx);
-} FFVkSPIRVCompiler;
-
-typedef struct FFVkSampler {
-    VkSampler sampler[4];
-} FFVkSampler;
-
 typedef struct FFVulkanDescriptorSetBinding {
     const char         *name;
     VkDescriptorType    type;
@@ -88,8 +72,7 @@ typedef struct FFVulkanDescriptorSetBinding {
     uint32_t            dimensions;  /* Needed for e.g. sampler%iD */
     uint32_t            elems;       /* 0 - scalar, 1 or more - vector */
     VkShaderStageFlags  stages;
-    FFVkSampler        *sampler;     /* Sampler to use for all elems */
-    void               *updater;     /* Pointer to VkDescriptor*Info */
+    VkSampler           samplers[4]; /* Sampler to use for all elems */
 } FFVulkanDescriptorSetBinding;
 
 typedef struct FFVkBuffer {
@@ -97,6 +80,7 @@ typedef struct FFVkBuffer {
     VkDeviceMemory mem;
     VkMemoryPropertyFlagBits flags;
     size_t size;
+    VkDeviceAddress address;
 } FFVkBuffer;
 
 typedef struct FFVkQueueFamilyCtx {
@@ -104,42 +88,45 @@ typedef struct FFVkQueueFamilyCtx {
     int nb_queues;
 } FFVkQueueFamilyCtx;
 
-typedef struct FFVulkanPipeline {
-    FFVkQueueFamilyCtx *qf;
+typedef struct FFVulkanDescriptorSet {
+    VkDescriptorSetLayout  layout;
+    FFVkBuffer             buf;
+    uint8_t               *desc_mem;
+    VkDeviceSize           layout_size;
+    VkDeviceSize           aligned_size; /* descriptorBufferOffsetAlignment */
+    VkDeviceSize           total_size; /* Once registered to an exec context */
+    VkBufferUsageFlags     usage;
 
+    VkDescriptorSetLayoutBinding *binding;
+    VkDeviceSize *binding_offset;
+    int nb_bindings;
+
+    int read_only;
+} FFVulkanDescriptorSet;
+
+typedef struct FFVulkanPipeline {
     VkPipelineBindPoint bind_point;
 
     /* Contexts */
     VkPipelineLayout pipeline_layout;
     VkPipeline       pipeline;
 
-    /* Shaders */
-    FFVkSPIRVShader **shaders;
-    int shaders_num;
-
     /* Push consts */
     VkPushConstantRange *push_consts;
     int push_consts_num;
 
+    /* Workgroup */
+    int wg_size[3];
+
     /* Descriptors */
-    VkDescriptorSetLayout         *desc_layout;
-    VkDescriptorPool               desc_pool;
-    VkDescriptorSet               *desc_set;
-    void                         **desc_staging;
-    VkDescriptorSetLayoutBinding **desc_binding;
-    VkDescriptorUpdateTemplate    *desc_template;
-    int                           *desc_set_initialized;
-    int                            desc_layout_num;
-    int                            descriptor_sets_num;
-    int                            total_descriptor_sets;
-    int                            pool_size_desc_num;
-
-    /* Temporary, used to store data in between initialization stages */
-    VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
-    VkDescriptorPoolSize *pool_size_desc;
+    FFVulkanDescriptorSet *desc_set;
+    VkDescriptorBufferBindingInfoEXT *desc_bind;
+    uint32_t *bound_buffer_indices;
+    int nb_descriptor_sets;
 } FFVulkanPipeline;
 
 typedef struct FFVkExecContext {
+    int idx;
     const struct FFVkExecPool *parent;
 
     /* Queue for the execution context */
@@ -162,7 +149,7 @@ typedef struct FFVkExecContext {
     unsigned int buf_deps_alloc_size;
 
     /* Frame dependencies */
-    AVBufferRef **frame_deps;
+    AVFrame **frame_deps;
     unsigned int frame_deps_alloc_size;
     int nb_frame_deps;
 
@@ -185,6 +172,7 @@ typedef struct FFVkExecContext {
 
     uint64_t **sem_sig_val_dst;
     unsigned int sem_sig_val_dst_alloc;
+    int sem_sig_val_dst_cnt;
 
     uint8_t *frame_locked;
     unsigned int frame_locked_alloc_size;
@@ -229,6 +217,8 @@ typedef struct FFVulkanContext {
     VkPhysicalDeviceProperties2 props;
     VkPhysicalDeviceDriverProperties driver_props;
     VkPhysicalDeviceMemoryProperties mprops;
+    VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
+    VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props;
     VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
     VkQueueFamilyVideoPropertiesKHR *video_props;
     VkQueueFamilyProperties2 *qf_props;
@@ -244,8 +234,6 @@ typedef struct FFVulkanContext {
     uint32_t               qfs[5];
     int                    nb_qfs;
 
-    FFVkSPIRVCompiler     *spirv_compiler;
-
     /* Properties */
     int                 output_width;
     int                output_height;
@@ -286,15 +274,15 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
 int ff_vk_load_props(FFVulkanContext *s);
 
 /**
- * Loads queue families into the main context.
  * Chooses a QF and loads it into a context.
  */
-void ff_vk_qf_fill(FFVulkanContext *s);
 int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
                   VkQueueFlagBits dev_family);
 
 /**
  * Allocates/frees an execution pool.
+ * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add()
+ * has been called.
  */
 int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
                          FFVkExecPool *pool, int nb_contexts,
@@ -340,17 +328,28 @@ void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
 int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
                            AVBufferRef **deps, int nb_deps, int ref);
 int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e,
-                             AVBufferRef *vkfb, VkPipelineStageFlagBits in_wait_dst_flag);
-void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
-                             VkImageMemoryBarrier2 *bar);
+                             AVFrame *f, VkPipelineStageFlagBits in_wait_dst_flag);
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+                             VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar);
+int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
+                                VkSemaphore *dst, uint64_t *dst_val,
+                                AVFrame *f);
 void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
 
 /**
  * Create an imageview and add it as a dependency to an execution.
  */
-int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
-                           VkImageView *v, VkImage img, VkFormat fmt,
-                           const VkComponentMapping map);
+int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
+                            VkImageView views[AV_NUM_DATA_POINTERS],
+                            AVFrame *f);
+
+void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
+                         AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
+                         VkPipelineStageFlags src_stage,
+                         VkPipelineStageFlags dst_stage,
+                         VkAccessFlagBits     new_access,
+                         VkImageLayout        new_layout,
+                         uint32_t             new_qf);
 
 /**
  * Memory/buffer/image allocation helpers.
@@ -372,33 +371,22 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
 void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
 
 /**
- * Sampler management.
+ * Create a sampler.
  */
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
-                                int unnorm_coords, VkFilter filt);
-void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx);
+int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
+                       int unnorm_coords, VkFilter filt);
 
 /**
  * Shader management.
  */
 int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
                       VkShaderStageFlags stage);
-void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z);
 void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio);
-int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
-                         const char *entrypoint);
+int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
+                        uint8_t *spirv, size_t spirv_size, const char *entrypoint);
 void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd);
 
-/**
- * Register a descriptor set.
- * Update a descriptor set for execution.
- */
-int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
-                             FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
-                             int num, int only_print_to_shader);
-void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
-                                 int set_id);
-
 /**
  * Add/update push constants for execution.
  */
@@ -410,15 +398,45 @@ void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
                             int offset, size_t size, void *src);
 
 /**
- * Pipeline management.
+ * Add descriptor to a pipeline. Must be called before pipeline init.
  */
+int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                      FFVkSPIRVShader *shd,
+                                      FFVulkanDescriptorSetBinding *desc, int nb,
+                                      int read_only, int print_to_shader_only);
+
+/* Initialize/free a pipeline. */
 int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
-                                FFVkQueueFamilyCtx *qf);
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
-void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
-                              FFVulkanPipeline *pl);
+                                FFVkSPIRVShader *shd);
 void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl);
 
+/**
+ * Register a pipeline with an exec pool.
+ * Pool may be NULL if all descriptor sets are read-only.
+ */
+int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
+                                 FFVulkanPipeline *pl);
+
+/* Bind pipeline */
+void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
+                              FFVulkanPipeline *pl);
+
+/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */
+int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                 FFVkExecContext *e, int set, int bind, int offs,
+                                 VkSampler *sampler);
+int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
+                               FFVkExecContext *e, int set, int bind, int offs,
+                               VkImageView view, VkImageLayout layout, VkSampler sampler);
+int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                FFVkExecContext *e, int set, int bind, int offs,
+                                VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt);
+
+void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
+                                       FFVkExecContext *e, AVFrame *f,
+                                       VkImageView *views, int set, int binding,
+                                       VkImageLayout layout, VkSampler sampler);
+
 /**
  * Frees main context.
  */
-- 
2.39.2


[-- Attachment #52: 0051-hwcontext_vulkan-rewrite-to-support-multiplane-surfa.patch --]
[-- Type: text/x-diff, Size: 68673 bytes --]

From f36680714e0636288dacf687e766a9222fe04867 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:37:35 +0100
Subject: [PATCH 51/72] hwcontext_vulkan: rewrite to support multiplane
 surfaces

---
 libavutil/hwcontext_vulkan.c | 744 +++++++++++++++++------------------
 libavutil/hwcontext_vulkan.h |  69 ++--
 2 files changed, 411 insertions(+), 402 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index e7c14fad74..027ecc76b1 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -62,6 +64,8 @@ typedef struct VulkanQueueCtx {
     VkFence fence;
     VkQueue queue;
     int was_synchronous;
+    int qf;
+    int qidx;
 
     /* Buffer dependencies */
     AVBufferRef **buf_deps;
@@ -116,6 +120,11 @@ typedef struct VulkanDevicePriv {
 } VulkanDevicePriv;
 
 typedef struct VulkanFramesPriv {
+    const VkFormat *fmts;
+    int nb_images;
+    VkImageAspectFlags aspect;
+    const struct FFVkFormatEntry *fmt;
+
     /* Image conversions */
     VulkanExecCtx conv_ctx;
 
@@ -145,112 +154,201 @@ typedef struct AVVkFrameInternal {
 #endif
 } AVVkFrameInternal;
 
-#define ADD_VAL_TO_LIST(list, count, val)                                      \
-    do {                                                                       \
-        list = av_realloc_array(list, sizeof(*list), ++count);                 \
-        if (!list) {                                                           \
-            err = AVERROR(ENOMEM);                                             \
-            goto fail;                                                         \
-        }                                                                      \
-        list[count - 1] = av_strdup(val);                                      \
-        if (!list[count - 1]) {                                                \
-            err = AVERROR(ENOMEM);                                             \
-            goto fail;                                                         \
-        }                                                                      \
-    } while(0)
-
-#define RELEASE_PROPS(props, count)                                            \
-    if (props) {                                                               \
-        for (int i = 0; i < count; i++)                                        \
-            av_free((void *)((props)[i]));                                     \
-        av_free((void *)props);                                                \
-    }
+#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
+#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
 
-static const struct {
+static const struct FFVkFormatEntry {
+    VkFormat vkf;
     enum AVPixelFormat pixfmt;
-    const VkFormat vkfmts[5];
-} vk_pixfmt_planar_map[] = {
-    { AV_PIX_FMT_GRAY8,   { VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_GRAY16,  { VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
-
-    { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-    { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-    { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
-    { AV_PIX_FMT_P012, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
-    { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
-
-    { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-
-    { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-    { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-
-    { AV_PIX_FMT_YUV420P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
-    { AV_PIX_FMT_YUV422P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
-    { AV_PIX_FMT_YUV444P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
-    { AV_PIX_FMT_YUVA420P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    /* There is no AV_PIX_FMT_YUVA420P12 */
-    { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
-    { AV_PIX_FMT_YUVA422P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
-    { AV_PIX_FMT_YUVA444P,   {  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
-    { AV_PIX_FMT_VUYX,   { VK_FORMAT_R8G8B8A8_UNORM } },
-    { AV_PIX_FMT_XV36,   { VK_FORMAT_R16G16B16A16_UNORM } },
-
-    { AV_PIX_FMT_BGRA,   { VK_FORMAT_B8G8R8A8_UNORM } },
-    { AV_PIX_FMT_RGBA,   { VK_FORMAT_R8G8B8A8_UNORM } },
-    { AV_PIX_FMT_RGB24,  { VK_FORMAT_R8G8B8_UNORM } },
-    { AV_PIX_FMT_BGR24,  { VK_FORMAT_B8G8R8_UNORM } },
-    { AV_PIX_FMT_RGB48,  { VK_FORMAT_R16G16B16_UNORM } },
-    { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
-    { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
-    { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
-    { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
-    { AV_PIX_FMT_BGR0,   { VK_FORMAT_B8G8R8A8_UNORM } },
-    { AV_PIX_FMT_RGB0,   { VK_FORMAT_R8G8B8A8_UNORM } },
-
-    /* Lower priority as there's an endianess-dependent overlap between these
-     * and rgba/bgr0, and PACK32 formats are more limited */
-    { AV_PIX_FMT_BGR32,  { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
-    { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
-
-    { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
-
-    { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
-    { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-    { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
-    { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+    VkImageAspectFlags aspect;
+    int vk_planes;
+    int nb_images;
+    int nb_images_fallback;
+    const VkFormat fallback[5];
+} vk_formats_list[] = {
+    /* Gray formats */
+    { VK_FORMAT_R8_UNORM,   AV_PIX_FMT_GRAY8,   VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM   } },
+    { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GRAY16,  VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM  } },
+    { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
+
+    /* RGB formats */
+    { VK_FORMAT_R16G16B16A16_UNORM,       AV_PIX_FMT_XV36,    VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM       } },
+    { VK_FORMAT_B8G8R8A8_UNORM,           AV_PIX_FMT_BGRA,    VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM           } },
+    { VK_FORMAT_R8G8B8A8_UNORM,           AV_PIX_FMT_RGBA,    VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM           } },
+    { VK_FORMAT_R8G8B8_UNORM,             AV_PIX_FMT_RGB24,   VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM             } },
+    { VK_FORMAT_B8G8R8_UNORM,             AV_PIX_FMT_BGR24,   VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM             } },
+    { VK_FORMAT_R16G16B16_UNORM,          AV_PIX_FMT_RGB48,   VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM          } },
+    { VK_FORMAT_R16G16B16A16_UNORM,       AV_PIX_FMT_RGBA64,  VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM       } },
+    { VK_FORMAT_R5G6B5_UNORM_PACK16,      AV_PIX_FMT_RGB565,  VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16      } },
+    { VK_FORMAT_B5G6R5_UNORM_PACK16,      AV_PIX_FMT_BGR565,  VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16      } },
+    { VK_FORMAT_B8G8R8A8_UNORM,           AV_PIX_FMT_BGR0,    VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM           } },
+    { VK_FORMAT_R8G8B8A8_UNORM,           AV_PIX_FMT_RGB0,    VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM           } },
+    { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
+
+    /* Planar RGB */
+    { VK_FORMAT_R8_UNORM,   AV_PIX_FMT_GBRAP,    VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R8_UNORM,   VK_FORMAT_R8_UNORM,   VK_FORMAT_R8_UNORM,   VK_FORMAT_R8_UNORM   } },
+    { VK_FORMAT_R16_UNORM,  AV_PIX_FMT_GBRAP16,  VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM,  VK_FORMAT_R16_UNORM  } },
+    { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32,  VK_IMAGE_ASPECT_COLOR_BIT, 1, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT                       } },
+    { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+
+    /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
+    { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,                  AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM,  VK_FORMAT_R8G8_UNORM   } },
+    { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+    { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+    { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,               AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+    /* Two-plane 422 YUV at 8, 10 and 16 bits */
+    { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM,                  AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM,  VK_FORMAT_R8G8_UNORM   } },
+    { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+    { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,               AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+    /* Two-plane 444 YUV at 8, 10 and 16 bits */
+    { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM,                  AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM,  VK_FORMAT_R8G8_UNORM   } },
+    { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+    { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM,               AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+    /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */
+    { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,    AV_PIX_FMT_YUV420P,   ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM  } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,    AV_PIX_FMT_YUV422P,   ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM  } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,    AV_PIX_FMT_YUV444P,   ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM,  VK_FORMAT_R8_UNORM  } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+    { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+    /* Single plane 422 at 8, 10 and 12 bits */
+    { VK_FORMAT_G8B8G8R8_422_UNORM,                     AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM     } },
+    { VK_FORMAT_B8G8R8G8_422_UNORM,                     AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM     } },
+    { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210,    VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+    { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212,    VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
 };
+static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list);
 
 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
 {
-    for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++)
-        if (vk_pixfmt_planar_map[i].pixfmt == p)
-            return vk_pixfmt_planar_map[i].vkfmts;
+    for (int i = 0; i < nb_vk_formats_list; i++)
+        if (vk_formats_list[i].pixfmt == p)
+            return vk_formats_list[i].fallback;
+    return NULL;
+}
+
+static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p)
+{
+    for (int i = 0; i < nb_vk_formats_list; i++)
+        if (vk_formats_list[i].pixfmt == p)
+            return &vk_formats_list[i];
     return NULL;
 }
 
+/* Malitia pura, Khronos */
+#define FN_MAP_TO(dst_t, dst, src_t, src)                                     \
+    static dst_t map_ ##src## _to_ ##dst(src_t mask2)                         \
+    {                                                                         \
+        dst_t mask1 = 0x0;                                                    \
+        MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \
+                             VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR);        \
+        MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR,    \
+                             VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR);        \
+        MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR,    \
+                             VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR);        \
+        MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR,  \
+                             VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR);        \
+        return mask1;                                                         \
+    }
+
+#define MAP_TO(mask1, mask2, flag1, flag2) if (mask2 & flag2) mask1 |= flag1;
+FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage)
+#undef MAP_TO
+#define MAP_TO(mask1, mask2, flag1, flag2) if (mask1 & flag1) mask2 |= flag2;
+FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats)
+#undef MAP_TO
+#undef FN_MAP_TO
+
+static int av_vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
+                          VkImageUsageFlags additional_usage, const VkFormat **fmts,
+                          int *nb_images, VkImageAspectFlags *aspect,
+                          VkImageUsageFlags *supported_usage)
+{
+    AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
+    VulkanDevicePriv *priv = dev_ctx->internal->priv;
+    FFVulkanFunctions *vk = &priv->vkfn;
+
+    VkFormatProperties2 prop = {
+        .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+    };
+    const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
+                                                 VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT  |
+                                                 VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
+    const VkFormatFeatureFlagBits2 full_flags = VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT |
+                                                basic_flags;
+
+    const VkFormatFeatureFlagBits2 additional_flags = map_usage_to_feats(additional_usage);
+
+    for (int i = 0; i < nb_vk_formats_list; i++) {
+        if (vk_formats_list[i].pixfmt == p) {
+            VkFormatFeatureFlagBits *feat = &prop.formatProperties.optimalTilingFeatures;
+            VkFormatFeatureFlagBits2 feats_vk1, feats_vk2;
+            int basics;
+            int full;
+            int additional;
+
+            basics = 0;
+            full = 0;
+            additional = 0;
+            vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, vk_formats_list[i].vkf,
+                                                   &prop);
+
+            /* We want at least the basics supported */
+            feats_vk1 = *feat;
+            basics = !!(*feat & basic_flags);
+            additional = !!(*feat & additional_flags);
+
+            /* If basics are not supported, OR we have multiplane images,
+             * check the fallback/single-plane rep for support. */
+            if (!basics || vk_formats_list[i].vk_planes > 1)
+                vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
+                                                       vk_formats_list[i].fallback[0],
+                                                       &prop);
+
+            feats_vk2 = *feat;
+            full = !!(*feat & full_flags);
+
+            if (additional_flags && !additional) {
+                return AVERROR(ENOTSUP);
+            } else if (full && basics) {
+                if (fmts)
+                    *fmts = &vk_formats_list[i].vkf;
+                if (nb_images)
+                    *nb_images = 1;
+                if (aspect)
+                    *aspect = vk_formats_list[i].aspect;
+                if (supported_usage)
+                    *supported_usage = map_feats_to_usage(feats_vk1);
+                return 0;
+            } else if (full && (vk_formats_list[i].vk_planes > 1)) {
+                if (fmts)
+                    *fmts = vk_formats_list[i].fallback;
+                if (nb_images)
+                    *nb_images = vk_formats_list[i].nb_images_fallback;
+                if (aspect)
+                    *aspect = vk_formats_list[i].aspect;
+                if (supported_usage)
+                    *supported_usage = map_feats_to_usage(feats_vk2);
+                return 0;
+            } else {
+                return AVERROR(ENOTSUP);
+            }
+        }
+    }
+
+    return AVERROR(EINVAL);
+}
+
 static const void *vk_find_struct(const void *chain, VkStructureType stype)
 {
     const VkBaseInStructure *in = chain;
@@ -276,33 +374,6 @@ static void vk_link_struct(void *chain, void *in)
     out->pNext = in;
 }
 
-static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
-                               int linear)
-{
-    AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
-    VulkanDevicePriv *priv = dev_ctx->internal->priv;
-    FFVulkanFunctions *vk = &priv->vkfn;
-    const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
-    int planes = av_pix_fmt_count_planes(p);
-
-    if (!fmt)
-        return 0;
-
-    for (int i = 0; i < planes; i++) {
-        VkFormatFeatureFlags flags;
-        VkFormatProperties2 prop = {
-            .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
-        };
-        vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
-        flags = linear ? prop.formatProperties.linearTilingFeatures :
-                         prop.formatProperties.optimalTilingFeatures;
-        if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS))
-            return 0;
-    }
-
-    return 1;
-}
-
 static int load_libvulkan(AVHWDeviceContext *ctx)
 {
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
@@ -435,6 +506,27 @@ static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
     return 0;
 }
 
+#define ADD_VAL_TO_LIST(list, count, val)                                      \
+    do {                                                                       \
+        list = av_realloc_array(list, sizeof(*list), ++count);                 \
+        if (!list) {                                                           \
+            err = AVERROR(ENOMEM);                                             \
+            goto fail;                                                         \
+        }                                                                      \
+        list[count - 1] = av_strdup(val);                                      \
+        if (!list[count - 1]) {                                                \
+            err = AVERROR(ENOMEM);                                             \
+            goto fail;                                                         \
+        }                                                                      \
+    } while(0)
+
+#define RELEASE_PROPS(props, count)                                            \
+    if (props) {                                                               \
+        for (int i = 0; i < count; i++)                                        \
+            av_free((void *)((props)[i]));                                     \
+        av_free((void *)props);                                                \
+    }
+
 static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
                             const char * const **dst, uint32_t *num, int debug)
 {
@@ -683,6 +775,10 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
     VkApplicationInfo application_info = {
         .sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+        .pApplicationName   = "ffmpeg",
+        .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
+                                              LIBAVUTIL_VERSION_MINOR,
+                                              LIBAVUTIL_VERSION_MICRO),
         .pEngineName        = "libavutil",
         .apiVersion         = VK_API_VERSION_1_3,
         .engineVersion      = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
@@ -1121,6 +1217,8 @@ static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
         VulkanQueueCtx *q = &cmd->queues[i];
         vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
         q->was_synchronous = 1;
+        q->qf = queue_family_index;
+        q->qidx = i;
     }
 
     return 0;
@@ -1256,6 +1354,7 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
     VkResult ret;
     VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+    AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
     FFVulkanFunctions *vk = &p->vkfn;
 
     ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
@@ -1269,7 +1368,9 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
     s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
     s_info->commandBufferCount = 1;
 
+    hwctx->lock_queue(hwfc->device_ctx, q->qf, q->qidx);
     ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence);
+    hwctx->unlock_queue(hwfc->device_ctx, q->qf, q->qidx);
     if (ret != VK_SUCCESS) {
         av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n",
                vk_ret2str(ret));
@@ -1284,7 +1385,6 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
     q->was_synchronous = synchronous;
 
     if (synchronous) {
-        AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
         vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
         vk->ResetFences(hwctx->act_dev, 1, &q->fence);
         unref_exec_ctx_deps(hwfc, cmd);
@@ -1446,12 +1546,6 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
     if (opt_d)
         p->use_linear_images = strtol(opt_d->value, NULL, 10);
 
-    opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0);
-    if (opt_d)
-        p->contiguous_planes = strtol(opt_d->value, NULL, 10);
-    else
-        p->contiguous_planes = -1;
-
     hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
     hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
 
@@ -1690,8 +1784,10 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
     int count = 0;
     VulkanDevicePriv *p = ctx->internal->priv;
 
-    for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
-        count += pixfmt_is_supported(ctx, i, p->use_linear_images);
+    for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
+        count += av_vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
+                                       0, NULL, NULL, NULL, NULL) >= 0;
+    }
 
 #if CONFIG_CUDA
     if (p->dev_is_nvidia)
@@ -1704,9 +1800,12 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
         return AVERROR(ENOMEM);
 
     count = 0;
-    for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
-        if (pixfmt_is_supported(ctx, i, p->use_linear_images))
-            constraints->valid_sw_formats[count++] = i;
+    for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
+        if (av_vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
+                                  0, NULL, NULL, NULL, NULL) >= 0) {
+            constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt;
+        }
+    }
 
 #if CONFIG_CUDA
     if (p->dev_is_nvidia)
@@ -1714,8 +1813,8 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
 #endif
     constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
 
-    constraints->min_width  = 0;
-    constraints->min_height = 0;
+    constraints->min_width  = 1;
+    constraints->min_height = 1;
     constraints->max_width  = p->props.properties.limits.maxImageDimension2D;
     constraints->max_height = p->props.properties.limits.maxImageDimension2D;
 
@@ -1789,7 +1888,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
 
 static void vulkan_free_internal(AVVkFrame *f)
 {
-    AVVkFrameInternal *internal = f->internal;
+    av_unused AVVkFrameInternal *internal = f->internal;
 
 #if CONFIG_CUDA
     if (internal->cuda_fc_ref) {
@@ -1829,17 +1928,22 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
-    int planes = av_pix_fmt_count_planes(hwfc->sw_format);
+    int nb_images = ff_vk_count_images(f);
+
+    VkSemaphoreWaitInfo sem_wait = {
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+        .pSemaphores = f->sem,
+        .pValues = f->sem_value,
+        .semaphoreCount = nb_images,
+    };
 
-    /* We could use vkWaitSemaphores, but the validation layer seems to have
-     * issues tracking command buffer execution state on uninit. */
-    vk->DeviceWaitIdle(hwctx->act_dev);
+    vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
 
     vulkan_free_internal(f);
 
-    for (int i = 0; i < planes; i++) {
-        vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
-        vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+    for (int i = 0; i < nb_images; i++) {
+        vk->DestroyImage(hwctx->act_dev,     f->img[i], hwctx->alloc);
+        vk->FreeMemory(hwctx->act_dev,       f->mem[i], hwctx->alloc);
         vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
     }
 
@@ -1849,30 +1953,25 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
 static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
                           void *alloc_pnext, size_t alloc_pnext_stride)
 {
-    int err;
+    int img_cnt = 0, err;
     VkResult ret;
     AVHWDeviceContext *ctx = hwfc->device_ctx;
     VulkanDevicePriv *p = ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
     AVVulkanFramesContext *hwfctx = hwfc->hwctx;
-    const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
     VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
 
-    VkMemoryRequirements cont_memory_requirements = { 0 };
-    int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 };
-    int cont_mem_size = 0;
-
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
 
-    for (int i = 0; i < planes; i++) {
+    while (f->img[img_cnt]) {
         int use_ded_mem;
         VkImageMemoryRequirementsInfo2 req_desc = {
             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
-            .image = f->img[i],
+            .image = f->img[img_cnt],
         };
         VkMemoryDedicatedAllocateInfo ded_alloc = {
             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
-            .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
+            .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
         };
         VkMemoryDedicatedRequirements ded_req = {
             .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
@@ -1884,79 +1983,35 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
 
         vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
 
-        if (f->tiling == VK_IMAGE_TILING_LINEAR)
+        if (hwfctx->tiling == VK_IMAGE_TILING_LINEAR)
             req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
                                                   p->props.properties.limits.minMemoryMapAlignment);
 
-        if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
-            if (ded_req.requiresDedicatedAllocation) {
-                av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, "
-                                           "device requires dedicated image allocation!\n");
-                return AVERROR(EINVAL);
-            } else if (!i) {
-                cont_memory_requirements = req.memoryRequirements;
-            } else if (cont_memory_requirements.memoryTypeBits !=
-                       req.memoryRequirements.memoryTypeBits) {
-                av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 "
-                                           "and %i, cannot allocate in a single region!\n",
-                                           i);
-                return AVERROR(EINVAL);
-            }
-
-            cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size,
-                                            req.memoryRequirements.alignment);
-            cont_mem_size += cont_mem_size_list[i];
-            continue;
-        }
-
         /* In case the implementation prefers/requires dedicated allocation */
         use_ded_mem = ded_req.prefersDedicatedAllocation |
                       ded_req.requiresDedicatedAllocation;
         if (use_ded_mem)
-            ded_alloc.image = f->img[i];
+            ded_alloc.image = f->img[img_cnt];
 
         /* Allocate memory */
         if ((err = alloc_mem(ctx, &req.memoryRequirements,
-                             f->tiling == VK_IMAGE_TILING_LINEAR ?
+                             hwfctx->tiling == VK_IMAGE_TILING_LINEAR ?
                              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
                              VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
                              use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
-                             &f->flags, &f->mem[i])))
-            return err;
-
-        f->size[i] = req.memoryRequirements.size;
-        bind_info[i].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
-        bind_info[i].image  = f->img[i];
-        bind_info[i].memory = f->mem[i];
-    }
-
-    if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
-        cont_memory_requirements.size = cont_mem_size;
-
-        /* Allocate memory */
-        if ((err = alloc_mem(ctx, &cont_memory_requirements,
-                                f->tiling == VK_IMAGE_TILING_LINEAR ?
-                                VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
-                                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-                                (void *)(((uint8_t *)alloc_pnext)),
-                                &f->flags, &f->mem[0])))
+                             &f->flags, &f->mem[img_cnt])))
             return err;
 
-        f->size[0] = cont_memory_requirements.size;
-
-        for (int i = 0, offset = 0; i < planes; i++) {
-            bind_info[i].sType        = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
-            bind_info[i].image        = f->img[i];
-            bind_info[i].memory       = f->mem[0];
-            bind_info[i].memoryOffset = offset;
+        f->size[img_cnt] = req.memoryRequirements.size;
+        bind_info[img_cnt].sType  = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+        bind_info[img_cnt].image  = f->img[img_cnt];
+        bind_info[img_cnt].memory = f->mem[img_cnt];
 
-            f->offset[i] = bind_info[i].memoryOffset;
-            offset += cont_mem_size_list[i];
-        }
+        img_cnt++;
     }
 
     /* Bind the allocated memory to the images */
-    ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info);
+    ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
     if (ret != VK_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
                vk_ret2str(ret));
@@ -1982,11 +2037,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     VkImageLayout new_layout;
     VkAccessFlags2 new_access;
     AVVulkanFramesContext *vkfc = hwfc->hwctx;
-    const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
-    AVFrame tmp = { .data[0] = (uint8_t *)frame };
     uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
+    int nb_images = ff_vk_count_images(frame);
 
     VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
     VkDependencyInfo dep_info;
@@ -1994,14 +2048,14 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
         .pSignalSemaphoreValues = sem_sig_val,
-        .signalSemaphoreValueCount = planes,
+        .signalSemaphoreValueCount = nb_images,
     };
 
     VkSubmitInfo s_info = {
         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
         .pNext                = &s_timeline_sem_info,
         .pSignalSemaphores    = frame->sem,
-        .signalSemaphoreCount = planes,
+        .signalSemaphoreCount = nb_images,
     };
 
     VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
@@ -2011,7 +2065,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
 
     vkfc->lock_frame(hwfc, frame);
 
-    for (int i = 0; i < planes; i++) {
+    for (int i = 0; i < nb_images; i++) {
         wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
         sem_sig_val[i] = frame->sem_value[i] + 1;
     }
@@ -2029,10 +2083,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         src_qf     = VK_QUEUE_FAMILY_EXTERNAL_KHR;
         dst_qf     = VK_QUEUE_FAMILY_IGNORED;
         s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
-        s_timeline_sem_info.waitSemaphoreValueCount = planes;
+        s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
         s_info.pWaitSemaphores = frame->sem;
         s_info.pWaitDstStageMask = wait_st;
-        s_info.waitSemaphoreCount = planes;
+        s_info.waitSemaphoreCount = nb_images;
         break;
     case PREP_MODE_EXTERNAL_EXPORT:
         new_layout = VK_IMAGE_LAYOUT_GENERAL;
@@ -2040,10 +2094,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         src_qf     = VK_QUEUE_FAMILY_IGNORED;
         dst_qf     = VK_QUEUE_FAMILY_EXTERNAL_KHR;
         s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
-        s_timeline_sem_info.waitSemaphoreValueCount = planes;
+        s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
         s_info.pWaitSemaphores = frame->sem;
         s_info.pWaitDstStageMask = wait_st;
-        s_info.waitSemaphoreCount = planes;
+        s_info.waitSemaphoreCount = nb_images;
         break;
     case PREP_MODE_DECODING_DST:
         new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
@@ -2062,7 +2116,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     /* Change the image layout to something more optimal for writes.
      * This also signals the newly created semaphore, making it usable
      * for synchronization */
-    for (int i = 0; i < planes; i++) {
+    for (int i = 0; i < nb_images; i++) {
         img_bar[i] = (VkImageMemoryBarrier2) {
             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
             .pNext = NULL,
@@ -2077,8 +2131,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
             .image = frame->img[i],
             .subresourceRange = (VkImageSubresourceRange) {
                 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                .layerCount = VK_REMAINING_ARRAY_LAYERS,
                 .levelCount = 1,
-                .layerCount = 1,
             },
         };
 
@@ -2090,7 +2144,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
         .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
         .pImageMemoryBarriers = img_bar,
-        .imageMemoryBarrierCount = planes,
+        .imageMemoryBarrierCount = nb_images,
     };
 
     vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
@@ -2101,7 +2155,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     return err;
 }
 
-static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
+static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
                                 int frame_w, int frame_h, int plane)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
@@ -2120,17 +2174,17 @@ static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
 
 static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
                         VkImageTiling tiling, VkImageUsageFlagBits usage,
+                        VkImageCreateFlags flags, int nb_layers,
                         void *create_pnext)
 {
     int err;
     VkResult ret;
     AVHWDeviceContext *ctx = hwfc->device_ctx;
     VulkanDevicePriv *p = ctx->internal->priv;
+    VulkanFramesPriv *fp = hwfc->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
-    enum AVPixelFormat format = hwfc->sw_format;
-    const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
-    const int planes = av_pix_fmt_count_planes(format);
+    AVVulkanFramesContext *frames = hwfc->hwctx;
 
     VkExportSemaphoreCreateInfo ext_sem_info = {
         .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
@@ -2165,17 +2219,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
         return AVERROR(ENOMEM);
     }
 
+    // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled)
+
     /* Create the images */
-    for (int i = 0; i < planes; i++) {
+    for (int i = 0; i < fp->nb_images; i++) {
         VkImageCreateInfo create_info = {
             .sType                 = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
             .pNext                 = create_pnext,
             .imageType             = VK_IMAGE_TYPE_2D,
-            .format                = img_fmts[i],
+            .format                = fp->fmts[i],
             .extent.depth          = 1,
             .mipLevels             = 1,
-            .arrayLayers           = 1,
-            .flags                 = VK_IMAGE_CREATE_ALIAS_BIT,
+            .arrayLayers           = nb_layers,
+            .flags                 = flags,
             .tiling                = tiling,
             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED,
             .usage                 = usage,
@@ -2187,7 +2243,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
         };
 
         get_plane_wh(&create_info.extent.width, &create_info.extent.height,
-                     format, hwfc->width, hwfc->height, i);
+                     hwfc->sw_format, hwfc->width, hwfc->height, i);
 
         ret = vk->CreateImage(hwctx->act_dev, &create_info,
                               hwctx->alloc, &f->img[i]);
@@ -2214,7 +2270,9 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
     }
 
     f->flags     = 0x0;
+FF_DISABLE_DEPRECATION_WARNINGS
     f->tiling    = tiling;
+FF_ENABLE_DEPRECATION_WARNINGS
 
     *frame = f;
     return 0;
@@ -2296,41 +2354,23 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
     AVVulkanFramesContext *hwctx = hwfc->hwctx;
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     VulkanFramesPriv *fp = hwfc->internal->priv;
-    VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
     VkExternalMemoryHandleTypeFlags e = 0x0;
 
-    VkExternalMemoryImageCreateInfo eiinfo = {
-        .sType       = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
-        .pNext       = hwctx->create_pnext,
-    };
-
 #ifdef _WIN32
     if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
         try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
                              ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
                              : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
 #else
-    if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
-        try_export_flags(hwfc, &eiinfo.handleTypes, &e,
-                         VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
 
-    if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
-        try_export_flags(hwfc, &eiinfo.handleTypes, &e,
-                         VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
 #endif
 
-    for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
-        eminfo[i].sType       = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
-        eminfo[i].pNext       = hwctx->alloc_pnext[i];
-        eminfo[i].handleTypes = e;
-    }
-
-    err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
-                       eiinfo.handleTypes ? &eiinfo : NULL);
+    err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
+                       hwctx->nb_layers, hwctx->create_pnext);
     if (err)
         return NULL;
 
-    err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
+    err = alloc_bind_mem(hwfc, f, NULL, 0);
     if (err)
         goto fail;
 
@@ -2389,103 +2429,44 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
     VulkanFramesPriv *fp = hwfc->internal->priv;
     AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
-    const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
-    const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS);
-
-    /* Default tiling flags */
-    hwctx->tiling = hwctx->tiling ? hwctx->tiling :
-                    has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
-                    p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
-                    VK_IMAGE_TILING_OPTIMAL;
-
-    if (!hwctx->usage)
-        hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
-
-    modifier_info = vk_find_struct(hwctx->create_pnext,
-                                   VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
-
-    /* Get the supported modifiers if the user has not given any. */
-    if (has_modifiers && !modifier_info) {
-        const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
-        VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
-        FFVulkanFunctions *vk = &p->vkfn;
-        VkDrmFormatModifierPropertiesEXT *mod_props;
-        uint64_t *modifiers;
-        int modifier_count = 0;
-
-        VkDrmFormatModifierPropertiesListEXT mod_props_list = {
-            .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
-            .pNext = NULL,
-            .drmFormatModifierCount = 0,
-            .pDrmFormatModifierProperties = NULL,
-        };
-        VkFormatProperties2 prop = {
-            .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
-            .pNext = &mod_props_list,
-        };
-
-        /* Get all supported modifiers */
-        vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
+    VkImageUsageFlagBits supported_usage;
 
-        if (!mod_props_list.drmFormatModifierCount) {
-            av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n");
-            return AVERROR(EINVAL);
-        }
-
-        /* Createa structure to hold the modifier list info */
-        modifier_info = av_mallocz(sizeof(*modifier_info));
-        if (!modifier_info)
-            return AVERROR(ENOMEM);
-
-        modifier_info->pNext = NULL;
-        modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
+    /* Defaults */
+    if (!hwctx->nb_layers)
+        hwctx->nb_layers = 1;
 
-        /* Add structure to the image creation pNext chain */
-        if (!hwctx->create_pnext)
-            hwctx->create_pnext = modifier_info;
-        else
-            vk_link_struct(hwctx->create_pnext, (void *)modifier_info);
+    /* VK_IMAGE_TILING_OPTIMAL == 0, so no need to check */
 
-        /* Backup the allocated struct to be freed later */
-        fp->modifier_info = modifier_info;
-
-        /* Allocate list of modifiers */
-        modifiers = av_mallocz(mod_props_list.drmFormatModifierCount *
-                               sizeof(*modifiers));
-        if (!modifiers)
-            return AVERROR(ENOMEM);
-
-        modifier_info->pDrmFormatModifiers = modifiers;
+    if (!hwctx->usage)
+        hwctx->usage = VK_IMAGE_USAGE_SAMPLED_BIT      |
+                       VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+                       VK_IMAGE_USAGE_TRANSFER_DST_BIT;
 
-        /* Allocate a temporary list to hold all modifiers supported */
-        mod_props = av_mallocz(mod_props_list.drmFormatModifierCount *
-                               sizeof(*mod_props));
-        if (!mod_props)
-            return AVERROR(ENOMEM);
+    err = av_vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, 0, /* drivers must fix feats. */
+                                &fp->fmts, &fp->nb_images, &fp->aspect, &supported_usage);
+    if (err < 0)
+        return err;
 
-        mod_props_list.pDrmFormatModifierProperties = mod_props;
+    fp->fmt = vk_find_format_entry(hwfc->sw_format);
 
-        /* Finally get all modifiers from the device */
-        vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
+    /* Remove comments once drivers properly signal features for formats */
+    if (fp->fmt->vk_planes > 1) // || supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)
+        hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
 
-        /* Reject any modifiers that don't match our requirements */
-        for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) {
-            if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage))
-                continue;
+//    fp->fmt = vk_find_format_entry(hwfc->sw_format);
 
-            modifiers[modifier_count++] = mod_props[i].drmFormatModifier;
-        }
+    if (!hwctx->img_flags) {
+        hwctx->img_flags = VK_IMAGE_CREATE_ALIAS_BIT;
+        if ((fp->fmt->vk_planes > 1 && fp->nb_images == 1) ||
+            (fp->fmt->vkf != fp->fmt->fallback[0]))
+            hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+    }
 
-        if (!modifier_count) {
-            av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports"
-                                       " the usage flags!\n");
-            av_freep(&mod_props);
-            return AVERROR(EINVAL);
-        }
+    if (!hwctx->lock_frame)
+        hwctx->lock_frame = lock_frame;
 
-        modifier_info->drmFormatModifierCount = modifier_count;
-        av_freep(&mod_props);
-    }
+    if (!hwctx->unlock_frame)
+        hwctx->unlock_frame = unlock_frame;
 
     err = create_exec_ctx(hwfc, &fp->conv_ctx,
                           dev_hwctx->queue_family_comp_index,
@@ -2505,8 +2486,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
         return err;
 
     /* Test to see if allocation will fail */
-    err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
-                       hwctx->create_pnext);
+    err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
+                       hwctx->nb_layers, hwctx->create_pnext);
     if (err)
         return err;
 
@@ -2522,11 +2503,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
             return AVERROR(ENOMEM);
     }
 
-    if (!hwctx->lock_frame)
-        hwctx->lock_frame = lock_frame;
-    if (!hwctx->unlock_frame)
-        hwctx->unlock_frame = unlock_frame;
-
     return 0;
 }
 
@@ -2602,7 +2578,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
                                    const AVFrame *src, int flags)
 {
     VkResult ret;
-    int err, mapped_mem_count = 0, mem_planes = 0;
+    int err, nb_mem = 0, mapped_mem_count = 0, mem_planes = 0;
     AVVkFrame *f = (AVVkFrame *)src->data[0];
     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
     AVVulkanFramesContext *hwfctx = hwfc->hwctx;
@@ -2622,7 +2598,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
     }
 
     if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
-        !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
+        !(hwfctx->tiling == VK_IMAGE_TILING_LINEAR)) {
         av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
                "and linear!\n");
         err = AVERROR(EINVAL);
@@ -2632,35 +2608,35 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
     dst->width  = src->width;
     dst->height = src->height;
 
-    mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes;
-    for (int i = 0; i < mem_planes; i++) {
+    for (int i = 0; i < AV_NUM_DATA_POINTERS; i++)
+        nb_mem += !!f->mem[i];
+
+    for (int i = 0; i < nb_mem; i++) {
         ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0,
                             VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
         if (ret != VK_SUCCESS) {
-            av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
-                vk_ret2str(ret));
+            av_log(hwfc, AV_LOG_ERROR, "Failed to map %ith frame memory: %s\n",
+                   i, vk_ret2str(ret));
             err = AVERROR_EXTERNAL;
             goto fail;
         }
         mapped_mem_count++;
     }
 
-    if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
-        for (int i = 0; i < planes; i++)
-            dst->data[i] = dst->data[0] + f->offset[i];
-    }
+    for (int i = 0; i < planes; i++)
+        dst->data[i] = dst->data[i] + f->offset[i];
 
     /* Check if the memory contents matter */
     if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
         !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
         VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
-        for (int i = 0; i < planes; i++) {
+        for (int i = 0; i < nb_mem; i++) {
             map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
             map_mem_ranges[i].size = VK_WHOLE_SIZE;
             map_mem_ranges[i].memory = f->mem[i];
         }
 
-        ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes,
+        ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, nb_mem,
                                                map_mem_ranges);
         if (ret != VK_SUCCESS) {
             av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
@@ -2702,25 +2678,25 @@ static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwma
 {
     AVVkFrame *f = hwmap->priv;
     AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
-    const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
+    const int nb_images = ff_vk_count_images(f);
 
     VkSemaphoreWaitInfo wait_info = {
         .sType          = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
         .flags          = 0x0,
         .pSemaphores    = f->sem,
         .pValues        = f->sem_value,
-        .semaphoreCount = planes,
+        .semaphoreCount = nb_images,
     };
 
     vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
 
     vulkan_free_internal(f);
 
-    for (int i = 0; i < planes; i++) {
-        vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
-        vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+    for (int i = 0; i < nb_images; i++) {
+        vk->DestroyImage(hwctx->act_dev,     f->img[i], hwctx->alloc);
+        vk->FreeMemory(hwctx->act_dev,       f->mem[i], hwctx->alloc);
         vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
     }
 
@@ -2790,7 +2766,9 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
         goto fail;
     }
 
+FF_DISABLE_DEPRECATION_WARNINGS
     f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
+FF_ENABLE_DEPRECATION_WARNINGS
 
     for (int i = 0; i < desc->nb_layers; i++) {
         const int planes = desc->layers[i].nb_planes;
@@ -2828,7 +2806,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
             .mipLevels             = 1,
             .arrayLayers           = 1,
             .flags                 = 0x0, /* ALIAS flag is implicit for imported images */
-            .tiling                = f->tiling,
+            .tiling                = hwfctx->tiling,
             .initialLayout         = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
             .usage                 = VK_IMAGE_USAGE_SAMPLED_BIT |
                                      VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
@@ -3498,7 +3476,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
 
         drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
 
-        if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
+        if (hwfctx ->tiling == VK_IMAGE_TILING_OPTIMAL)
             continue;
 
         vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
@@ -3818,7 +3796,10 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
     int bar_num = 0;
     VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
 
-    const int planes = av_pix_fmt_count_planes(pix_fmt);
+    const int img_planes = fp->fmt->vk_planes;
+    const int nb_images = ff_vk_count_images(frame);
+    int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt);
+
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 
     VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -3831,8 +3812,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
         .pWaitSemaphoreValues = frame->sem_value,
         .pSignalSemaphoreValues = sem_signal_values,
-        .waitSemaphoreValueCount = planes,
-        .signalSemaphoreValueCount = planes,
+        .waitSemaphoreValueCount = nb_images,
+        .signalSemaphoreValueCount = nb_images,
     };
 
     VkSubmitInfo s_info = {
@@ -3841,8 +3822,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
         .pSignalSemaphores    = frame->sem,
         .pWaitSemaphores      = frame->sem,
         .pWaitDstStageMask    = sem_wait_dst,
-        .signalSemaphoreCount = planes,
-        .waitSemaphoreCount   = planes,
+        .signalSemaphoreCount = nb_images,
+        .waitSemaphoreCount   = nb_images,
     };
 
     vkfc->lock_frame(hwfc, frame);
@@ -3850,11 +3831,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
     if ((err = wait_start_exec_ctx(hwfc, ectx)))
         goto end;
 
-    for (int i = 0; i < planes; i++)
+    for (int i = 0; i < nb_images; i++)
         sem_signal_values[i] = frame->sem_value[i] + 1;
 
     /* Change the image layout to something more optimal for transfers */
-    for (int i = 0; i < planes; i++) {
+    for (int i = 0; i < nb_images; i++) {
         VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
                                             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
         VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
@@ -3890,13 +3871,19 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
                                0, NULL, 0, NULL, bar_num, img_bar);
 
     /* Schedule a copy for each plane */
-    for (int i = 0; i < planes; i++) {
+    for (int i = 0; i < pixfmt_planes; i++) {
+        int idx = FFMIN(i, nb_images - 1);
+        VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+                                              VK_IMAGE_ASPECT_PLANE_0_BIT,
+                                              VK_IMAGE_ASPECT_PLANE_1_BIT,
+                                              VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
         VkBufferImageCopy buf_reg = {
             .bufferOffset = buf_offsets[i],
             .bufferRowLength = buf_stride[i] / desc->comp[i].step,
             .imageSubresource.layerCount = 1,
-            .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+            .imageSubresource.aspectMask = plane_aspect[(img_planes != 1) + i*(img_planes != 1)],
             .imageOffset = { 0, 0, 0, },
         };
 
@@ -3907,11 +3894,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
         buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
 
         if (to_buf)
-            vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
+            vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], frame->layout[idx],
                                      vkbuf->buf, 1, &buf_reg);
         else
-            vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
-                                     frame->layout[i], 1, &buf_reg);
+            vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx],
+                                     frame->layout[idx], 1, &buf_reg);
     }
 
     /* When uploading, do this asynchronously if the source is refcounted by
@@ -3928,7 +3915,7 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
             if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
                 goto end;
         }
-        if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
+        if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, pixfmt_planes)))
             goto end;
         err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
     } else {
@@ -3948,6 +3935,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
     AVVkFrame *f = (AVVkFrame *)vkf->data[0];
     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
     AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
+    AVVulkanFramesContext *fc = hwfc->hwctx;
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
 
@@ -3970,7 +3958,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
         return AVERROR(EINVAL);
 
     /* For linear, host visiable images */
-    if (f->tiling == VK_IMAGE_TILING_LINEAR &&
+    if (fc->tiling == VK_IMAGE_TILING_LINEAR &&
         f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
         AVFrame *map = av_frame_alloc();
         if (!map)
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index e89fa52927..13a40fa563 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -169,26 +169,31 @@ typedef enum AVVkFrameFlags {
  */
 typedef struct AVVulkanFramesContext {
     /**
-     * Controls the tiling of allocated frames. If left as optimal tiling,
-     * then during av_hwframe_ctx_init() will decide based on whether the device
-     * supports DRM modifiers, or if the linear_images flag is set, otherwise
-     * will allocate optimally-tiled images.
+     * Controls the tiling of allocated frames.
+     * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling.
+     * Can be set to VK_IMAGE_TILING_LINEAR to force linear images,
+     * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed
+     * images.
      */
     VkImageTiling tiling;
 
     /**
-     * Defines extra usage of output frames. If left as 0, the following bits
-     * are set: TRANSFER_SRC, TRANSFER_DST. SAMPLED and STORAGE.
+     * Defines extra usage of output frames. If non-zero, all flags MUST be
+     * supported by the VkFormat. Otherwise, will use supported flags amongst:
+     * - VK_IMAGE_USAGE_SAMPLED_BIT
+     * - VK_IMAGE_USAGE_STORAGE_BIT
+     * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT
+     * - VK_IMAGE_USAGE_TRANSFER_DST_BIT
      */
     VkImageUsageFlagBits usage;
 
     /**
      * Extension data for image creation.
-     * If VkImageDrmFormatModifierListCreateInfoEXT is present in the chain,
-     * and the device supports DRM modifiers, then images will be allocated
-     * with the specific requested DRM modifiers.
+     * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure
+     * can be added to specify the exact modifier to use.
+     *
      * Additional structures may be added at av_hwframe_ctx_init() time,
-     * which will be freed automatically on uninit(), so users need only free
+     * which will be freed automatically on uninit(), so users must only free
      * any structures they've allocated themselves.
      */
     void *create_pnext;
@@ -209,6 +214,25 @@ typedef struct AVVulkanFramesContext {
      */
     AVVkFrameFlags flags;
 
+    /**
+     * Flags to set during image creation. If unset, defaults to
+     * VK_IMAGE_CREATE_ALIAS_BIT.
+     */
+    VkImageCreateFlags img_flags;
+
+    /**
+     * Vulkan format for each image. MUST be compatible with the pixel format.
+     * If unset, will be automatically set.
+     * There are at most two compatible formats for a frame - a multiplane
+     * format, and a single-plane multi-image format.
+     */
+    VkFormat format[AV_NUM_DATA_POINTERS];
+
+    /**
+     * Number of layers each image will have.
+     */
+    int nb_layers;
+
     /**
      * Locks a frame, preventing other threads from changing frame properties.
      * If set to NULL, will be set to lavu-internal functions that utilize a
@@ -228,14 +252,7 @@ typedef struct AVVulkanFramesContext {
 } AVVulkanFramesContext;
 
 /*
- * Frame structure, the VkFormat of the image will always match
- * the pool's sw_format.
- * All frames, imported or allocated, will be created with the
- * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed.
- *
- * If all queue family indices in the device context are the same,
- * images will be created with the EXCLUSIVE sharing mode. Otherwise, all images
- * will be created using the CONCURRENT sharing mode.
+ * Frame structure.
  *
  * @note the size of this structure is not part of the ABI, to allocate
  * you must use @av_vk_frame_alloc().
@@ -248,8 +265,9 @@ struct AVVkFrame {
 
     /**
      * The same tiling must be used for all images in the frame.
+     * DEPRECATED: use AVVulkanFramesContext.tiling instead.
      */
-    VkImageTiling tiling;
+    attribute_deprecated VkImageTiling tiling;
 
     /**
      * Memory backing the images. Could be less than the amount of planes,
@@ -265,13 +283,13 @@ struct AVVkFrame {
     VkMemoryPropertyFlagBits flags;
 
     /**
-     * Updated after every barrier
+     * Updated after every barrier. One per VkImage.
      */
     VkAccessFlagBits access[AV_NUM_DATA_POINTERS];
     VkImageLayout layout[AV_NUM_DATA_POINTERS];
 
     /**
-     * Synchronization timeline semaphores, one for each sw_format plane.
+     * Synchronization timeline semaphores, one for each VkImage.
      * Must not be freed manually. Must be waited on at every submission using
      * the value in sem_value, and must be signalled at every submission,
      * using an incremented value.
@@ -280,6 +298,7 @@ struct AVVkFrame {
 
     /**
      * Up to date semaphore value at which each image becomes accessible.
+     * One per VkImage.
      * Clients must wait on this value when submitting a command queue,
      * and increment it when signalling.
      */
@@ -291,16 +310,18 @@ struct AVVkFrame {
     struct AVVkFrameInternal *internal;
 
     /**
-     * Describes the binding offset of each plane to the VkDeviceMemory.
+     * Describes the binding offset of each image to the VkDeviceMemory.
+     * One per VkImage.
      */
     ptrdiff_t offset[AV_NUM_DATA_POINTERS];
 
     /**
      * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
      * the image was allocated with the CONCURRENT concurrency option.
+     * One per VkImage.
      */
     uint32_t queue_family[AV_NUM_DATA_POINTERS];
-} AVVkFrame;
+};
 
 /**
  * Allocates a single AVVkFrame and initializes everything as 0.
@@ -309,7 +330,7 @@ struct AVVkFrame {
 AVVkFrame *av_vk_frame_alloc(void);
 
 /**
- * Returns the format of each image up to the number of planes for a given sw_format.
+ * Returns the optimal format for a given sw_format, one for each plane.
  * Returns NULL on unsupported formats.
  */
 const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p);
-- 
2.39.2


[-- Attachment #53: 0052-hwcontext_vulkan-don-t-change-properties-if-prepare_.patch --]
[-- Type: text/x-diff, Size: 2638 bytes --]

From a9ac0aa322a3369ccb5167ae1a8a984faf2e24d1 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:14:08 +0100
Subject: [PATCH 52/72] hwcontext_vulkan: don't change properties if
 prepare_frame fails

---
 libavutil/hwcontext_vulkan.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 027ecc76b1..75004037da 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2113,16 +2113,13 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         break;
     }
 
-    /* Change the image layout to something more optimal for writes.
-     * This also signals the newly created semaphore, making it usable
-     * for synchronization */
     for (int i = 0; i < nb_images; i++) {
         img_bar[i] = (VkImageMemoryBarrier2) {
             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
             .pNext = NULL,
             .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
-            .srcAccessMask = 0x0,
             .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+            .srcAccessMask = frame->access[i],
             .dstAccessMask = new_access,
             .oldLayout = frame->layout[i],
             .newLayout = new_layout,
@@ -2135,21 +2132,23 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
                 .levelCount = 1,
             },
         };
-
-        frame->layout[i] = img_bar[i].newLayout;
-        frame->access[i] = img_bar[i].dstAccessMask;
     }
 
-    dep_info = (VkDependencyInfo) {
-        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
-        .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
-        .pImageMemoryBarriers = img_bar,
-        .imageMemoryBarrierCount = nb_images,
-    };
-
-    vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
+    vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_images,
+        });
 
     err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+    if (err >= 0) {
+        for (int i = 0; i < nb_images; i++) {
+            frame->layout[i] = img_bar[i].newLayout;
+            frame->access[i] = img_bar[i].dstAccessMask;
+            frame->queue_family[i] = img_bar[i].dstQueueFamilyIndex;
+        }
+    }
     vkfc->unlock_frame(hwfc, frame);
 
     return err;
-- 
2.39.2


[-- Attachment #54: 0053-hwcontext_vulkan-disable-host-mapping-frames-for-tra.patch --]
[-- Type: text/x-diff, Size: 1033 bytes --]

From 51c352d34c0ab2ae5eea1df1753d2a8d615c33d8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:14:24 +0100
Subject: [PATCH 53/72] hwcontext_vulkan: disable host-mapping frames for
 transfers

Currently broken for multiplane surfaces.
---
 libavutil/hwcontext_vulkan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 75004037da..647a072bdd 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -3946,7 +3946,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
     const int planes = av_pix_fmt_count_planes(swf->format);
 
     int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
-    const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY);
+    const int map_host = 0;
 
     if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
         av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
-- 
2.39.2


[-- Attachment #55: 0054-hwcontext_vulkan-disable-all-mapping-code.patch --]
[-- Type: text/x-diff, Size: 5612 bytes --]

From a871a7d4ffe3f94488cd5091794e683c720bc5df Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:30:00 +0100
Subject: [PATCH 54/72] hwcontext_vulkan: disable all mapping code

Multiplane formats are currently not easy to map.
---
 libavutil/hwcontext_vulkan.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 647a072bdd..761a63ddd7 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -44,7 +44,7 @@
 #include "vulkan.h"
 #include "vulkan_loader.h"
 
-#if CONFIG_LIBDRM
+#if 0
 #include <xf86drm.h>
 #include <drm_fourcc.h>
 #include "hwcontext_drm.h"
@@ -54,7 +54,7 @@
 #endif
 #endif
 
-#if CONFIG_CUDA
+#if 0
 #include "hwcontext_cuda_internal.h"
 #include "cuda_check.h"
 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
@@ -139,7 +139,7 @@ typedef struct VulkanFramesPriv {
 typedef struct AVVkFrameInternal {
     pthread_mutex_t update_mutex;
 
-#if CONFIG_CUDA
+#if 0
     /* Importing external memory into cuda is really expensive so we keep the
      * memory imported all the time */
     AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
@@ -1718,7 +1718,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
      * by the following checks (e.g. non-PCIe ARM GPU), having an empty
      * dev_select will mean it'll get picked. */
     switch(src_ctx->type) {
-#if CONFIG_LIBDRM
+#if 0
 #if CONFIG_VAAPI
     case AV_HWDEVICE_TYPE_VAAPI: {
         AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
@@ -1753,7 +1753,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
         return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
     }
 #endif
-#if CONFIG_CUDA
+#if 0
     case AV_HWDEVICE_TYPE_CUDA: {
         AVHWDeviceContext *cuda_cu = src_ctx;
         AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
@@ -1789,7 +1789,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
                                        0, NULL, NULL, NULL, NULL) >= 0;
     }
 
-#if CONFIG_CUDA
+#if 0
     if (p->dev_is_nvidia)
         count++;
 #endif
@@ -1807,7 +1807,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
         }
     }
 
-#if CONFIG_CUDA
+#if 0
     if (p->dev_is_nvidia)
         constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
 #endif
@@ -1890,7 +1890,7 @@ static void vulkan_free_internal(AVVkFrame *f)
 {
     av_unused AVVkFrameInternal *internal = f->internal;
 
-#if CONFIG_CUDA
+#if 0
     if (internal->cuda_fc_ref) {
         AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
         int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
@@ -2672,7 +2672,7 @@ fail:
     return err;
 }
 
-#if CONFIG_LIBDRM
+#if 0
 static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
 {
     AVVkFrame *f = hwmap->priv;
@@ -2746,6 +2746,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
     VulkanDevicePriv *p = ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
+    AVVulkanFramesContext *hwfctx = hwfc->hwctx;
     VulkanFramesPriv *fp = hwfc->internal->priv;
     const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
     VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
@@ -3076,7 +3077,7 @@ fail:
 #endif
 #endif
 
-#if CONFIG_CUDA
+#if 0
 static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
                                  AVBufferRef *cuda_hwfc,
                                  const AVFrame *frame)
@@ -3346,7 +3347,7 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
 
     switch (src->format) {
-#if CONFIG_LIBDRM
+#if 0
 #if CONFIG_VAAPI
     case AV_PIX_FMT_VAAPI:
         if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
@@ -3365,7 +3366,7 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
     }
 }
 
-#if CONFIG_LIBDRM
+#if 0
 typedef struct VulkanDRMMapping {
     AVDRMFrameDescriptor drm_desc;
     AVVkFrame *source;
@@ -3533,7 +3534,7 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
 
     switch (dst->format) {
-#if CONFIG_LIBDRM
+#if 0
     case AV_PIX_FMT_DRM_PRIME:
         if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
             return vulkan_map_to_drm(hwfc, dst, src, flags);
@@ -4091,7 +4092,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
 
     switch (src->format) {
-#if CONFIG_CUDA
+#if 0
     case AV_PIX_FMT_CUDA:
 #ifdef _WIN32
         if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
@@ -4110,7 +4111,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
     }
 }
 
-#if CONFIG_CUDA
+#if 0
 static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
                                         const AVFrame *src)
 {
@@ -4209,7 +4210,7 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
     av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
 
     switch (dst->format) {
-#if CONFIG_CUDA
+#if 0
     case AV_PIX_FMT_CUDA:
 #ifdef _WIN32
         if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
-- 
2.39.2


[-- Attachment #56: 0055-lavfi-add-lavfi-only-Vulkan-infrastructure.patch --]
[-- Type: text/x-diff, Size: 21753 bytes --]

From 6bd109733484568c98c2d08935d9c7f05ad7803c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:10:58 +0100
Subject: [PATCH 55/72] lavfi: add lavfi-only Vulkan infrastructure

---
 libavfilter/Makefile                        |   6 +
 libavfilter/vulkan_filter.c                 | 241 +++++++++++++++++++-
 libavfilter/vulkan_filter.h                 |  25 ++
 {libavutil => libavfilter}/vulkan_glslang.c |  19 +-
 {libavutil => libavfilter}/vulkan_shaderc.c |   8 +-
 libavfilter/vulkan_spirv.h                  |  45 ++++
 6 files changed, 330 insertions(+), 14 deletions(-)
 rename {libavutil => libavfilter}/vulkan_glslang.c (95%)
 rename {libavutil => libavfilter}/vulkan_shaderc.c (96%)
 create mode 100644 libavfilter/vulkan_spirv.h

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 0173b11870..f02e787d61 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -615,6 +615,10 @@ OBJS-$(CONFIG_AVSYNCTEST_FILTER)             += src_avsynctest.o
 OBJS-$(CONFIG_AMOVIE_FILTER)                 += src_movie.o
 OBJS-$(CONFIG_MOVIE_FILTER)                  += src_movie.o
 
+# vulkan libs
+OBJS-$(CONFIG_LIBGLSLANG)                    += vulkan_glslang.o
+OBJS-$(CONFIG_LIBSHADERC)                    += vulkan_shaderc.o
+
 # Objects duplicated from other libraries for shared builds
 SHLIBOBJS                                    += log2_tab.o
 
@@ -628,6 +632,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP)                 += qsvvpp.h
 SKIPHEADERS-$(CONFIG_OPENCL)                 += opencl.h
 SKIPHEADERS-$(CONFIG_VAAPI)                  += vaapi_vpp.h
 SKIPHEADERS-$(CONFIG_VULKAN)                 += vulkan.h vulkan_filter.h
+SKIPHEADERS-$(CONFIG_LIBSHADERC)             += vulkan_spirv.h
+SKIPHEADERS-$(CONFIG_LIBGLSLANG)             += vulkan_spirv.h
 
 TOOLS     = graph2dot
 TESTPROGS = drawutils filtfmts formats integral
diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c
index e22541bd23..ad88931c4b 100644
--- a/libavfilter/vulkan_filter.c
+++ b/libavfilter/vulkan_filter.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -54,7 +56,6 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
     int err;
     AVFilterContext *avctx = inlink->dst;
     FFVulkanContext *s = avctx->priv;
-    FFVulkanFunctions *vk = &s->vkfn;
     AVHWFramesContext *input_frames;
 
     if (!inlink->hw_frames_ctx) {
@@ -85,8 +86,7 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
     if (err < 0)
         return err;
 
-    vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props);
-    vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+    ff_vk_load_props(s);
 
     /* Default output parameters match input parameters. */
     s->input_format = input_frames->sw_format;
@@ -189,3 +189,238 @@ int ff_vk_filter_init(AVFilterContext *avctx)
 
     return 0;
 }
+
+int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
+                                FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
+                                VkSampler sampler, void *push_src, size_t push_size)
+{
+    int err = 0;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
+    VkImageView in_views[AV_NUM_DATA_POINTERS];
+    VkImageView out_views[AV_NUM_DATA_POINTERS];
+    VkImageMemoryBarrier2 img_bar[37];
+    int nb_img_bar = 0;
+
+    /* Update descriptors and init the exec context */
+    FFVkExecContext *exec = ff_vk_exec_get(e);
+    ff_vk_exec_start(vkctx, exec);
+
+    ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+    if (push_src)
+        ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+                               0, push_size, push_src);
+
+    RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
+                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+    RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
+                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+    RET(ff_vk_create_imageviews(vkctx, exec, in_views,  in_f));
+    RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f));
+
+    ff_vk_update_descriptor_img_array(vkctx, pl, exec,  in_f,  in_views, 0, 0,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                      sampler);
+    ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, 1,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      NULL);
+
+    ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT,
+                        VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+    ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_img_bar,
+        });
+
+    vk->CmdDispatch(exec->buf,
+                    FFALIGN(vkctx->output_width,  pl->wg_size[0])/pl->wg_size[0],
+                    FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+                    pl->wg_size[1]);
+
+    return ff_vk_exec_submit(vkctx, exec);
+fail:
+    ff_vk_exec_discard_deps(vkctx, exec);
+    return err;
+}
+
+int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
+                               FFVulkanPipeline *pls[2],
+                               AVFrame *out, AVFrame *tmp, AVFrame *in,
+                               VkSampler sampler, void *push_src, size_t push_size)
+{
+    int err = 0;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
+    VkImageView in_views[AV_NUM_DATA_POINTERS];
+    VkImageView tmp_views[AV_NUM_DATA_POINTERS];
+    VkImageView out_views[AV_NUM_DATA_POINTERS];
+    VkImageMemoryBarrier2 img_bar[37];
+    int nb_img_bar = 0;
+
+    /* Update descriptors and init the exec context */
+    FFVkExecContext *exec = ff_vk_exec_get(e);
+    ff_vk_exec_start(vkctx, exec);
+
+    RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
+                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+    RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
+                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+    RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
+                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+    RET(ff_vk_create_imageviews(vkctx, exec, in_views,  in));
+    RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp));
+    RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
+
+    ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT,
+                        VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+    ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+    ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_img_bar,
+        });
+
+    for (int i = 0; i < 2; i++) {
+        FFVulkanPipeline *pl = pls[i];
+        AVFrame *src_f = !i ? in : tmp;
+        AVFrame *dst_f = !i ? tmp : out;
+        VkImageView *src_views = !i ? in_views : tmp_views;
+        VkImageView *dst_views = !i ? tmp_views : out_views;
+
+        ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+        if (push_src)
+            ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, push_size, push_src);
+
+        ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0,
+                                          !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
+                                               VK_IMAGE_LAYOUT_GENERAL,
+                                          sampler);
+        ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1,
+                                          VK_IMAGE_LAYOUT_GENERAL,
+                                          NULL);
+
+        vk->CmdDispatch(exec->buf,
+                        FFALIGN(vkctx->output_width,  pl->wg_size[0])/pl->wg_size[0],
+                        FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+                        pl->wg_size[1]);
+    }
+
+    return ff_vk_exec_submit(vkctx, exec);
+fail:
+    ff_vk_exec_discard_deps(vkctx, exec);
+    return err;
+}
+
+int ff_vk_filter_process_2in(FFVulkanContext *vkctx, FFVkExecPool *e,
+                             FFVulkanPipeline *pl,
+                             AVFrame *out, AVFrame *in1, AVFrame *in2,
+                             VkSampler sampler, void *push_src, size_t push_size)
+{
+    int err = 0;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
+    VkImageView in1_views[AV_NUM_DATA_POINTERS];
+    VkImageView in2_views[AV_NUM_DATA_POINTERS];
+    VkImageView out_views[AV_NUM_DATA_POINTERS];
+    VkImageMemoryBarrier2 img_bar[37];
+    int nb_img_bar = 0;
+
+    /* Update descriptors and init the exec context */
+    FFVkExecContext *exec = ff_vk_exec_get(e);
+    ff_vk_exec_start(vkctx, exec);
+
+    RET(ff_vk_exec_add_dep_frame(vkctx, exec, in1,
+                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+    RET(ff_vk_exec_add_dep_frame(vkctx, exec, in2,
+                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+    RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
+                                 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+    RET(ff_vk_create_imageviews(vkctx, exec, in1_views, in1));
+    RET(ff_vk_create_imageviews(vkctx, exec, in2_views, in2));
+    RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
+
+    ff_vk_frame_barrier(vkctx, exec, in1, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT,
+                        VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+    ff_vk_frame_barrier(vkctx, exec, in2, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT,
+                        VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+    ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_img_bar,
+        });
+
+    ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+    if (push_src)
+        ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+                               0, push_size, push_src);
+
+    ff_vk_update_descriptor_img_array(vkctx, pl, exec, in1, in1_views, 0, 0,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                      sampler);
+    ff_vk_update_descriptor_img_array(vkctx, pl, exec, in2, in2_views, 0, 1,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                      sampler);
+    ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, 2,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      NULL);
+
+    vk->CmdDispatch(exec->buf,
+                    FFALIGN(vkctx->output_width,  pl->wg_size[0])/pl->wg_size[0],
+                    FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+                    pl->wg_size[1]);
+
+    return ff_vk_exec_submit(vkctx, exec);
+fail:
+    ff_vk_exec_discard_deps(vkctx, exec);
+    return err;
+}
diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h
index bfdb9b2d7d..2a2a0e6e97 100644
--- a/libavfilter/vulkan_filter.h
+++ b/libavfilter/vulkan_filter.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -31,4 +33,27 @@ int  ff_vk_filter_config_input         (AVFilterLink   *inlink);
 int  ff_vk_filter_config_output        (AVFilterLink  *outlink);
 int  ff_vk_filter_config_output_inplace(AVFilterLink  *outlink);
 
+/**
+ * Submit a compute shader with a single in and single out for execution.
+ */
+int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
+                                FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
+                                VkSampler sampler, void *push_src, size_t push_size);
+
+/**
+ * Submit a compute shader with a single in and single out with 2 stages.
+ */
+int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
+                               FFVulkanPipeline *pls[2],
+                               AVFrame *out, AVFrame *tmp, AVFrame *in,
+                               VkSampler sampler, void *push_src, size_t push_size);
+
+/**
+ * Two inputs, one output
+ */
+int ff_vk_filter_process_2in(FFVulkanContext *vkctx, FFVkExecPool *e,
+                             FFVulkanPipeline *pl,
+                             AVFrame *out, AVFrame *in1, AVFrame *in2,
+                             VkSampler sampler, void *push_src, size_t push_size);
+
 #endif /* AVFILTER_VULKAN_FILTER_H */
diff --git a/libavutil/vulkan_glslang.c b/libavfilter/vulkan_glslang.c
similarity index 95%
rename from libavutil/vulkan_glslang.c
rename to libavfilter/vulkan_glslang.c
index e7785f6d40..845a530ee0 100644
--- a/libavutil/vulkan_glslang.c
+++ b/libavfilter/vulkan_glslang.c
@@ -21,8 +21,9 @@
 #include <glslang/build_info.h>
 #include <glslang/Include/glslang_c_interface.h>
 
-#include "mem.h"
-#include "avassert.h"
+#include "vulkan_spirv.h"
+#include "libavutil/mem.h"
+#include "libavutil/avassert.h"
 
 static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int glslc_refcount = 0;
@@ -176,11 +177,13 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
 
     av_assert0(glslc_refcount);
 
+    *opaque = NULL;
+
     if (!(glslc_shader = glslang_shader_create(&glslc_input)))
         return AVERROR(ENOMEM);
 
     if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) {
-        ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+        ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
         av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n",
                glslang_shader_get_info_log(glslc_shader),
                glslang_shader_get_info_debug_log(glslc_shader));
@@ -189,7 +192,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
     }
 
     if (!glslang_shader_parse(glslc_shader, &glslc_input)) {
-        ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+        ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
         av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n",
                glslang_shader_get_info_log(glslc_shader),
                glslang_shader_get_info_debug_log(glslc_shader));
@@ -206,7 +209,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
 
     if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT |
                                              GLSLANG_MSG_VULKAN_RULES_BIT)) {
-        ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+        ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
         av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n",
                glslang_program_get_info_log(glslc_program),
                glslang_program_get_info_debug_log(glslc_program));
@@ -219,10 +222,10 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
 
     messages = glslang_program_SPIRV_get_messages(glslc_program);
     if (messages) {
-        ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+        ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
         av_log(avctx, AV_LOG_WARNING, "%s\n", messages);
     } else {
-        ff_vk_print_shader(avctx, shd, AV_LOG_VERBOSE);
+        ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE);
     }
 
     glslang_shader_delete(glslc_shader);
@@ -257,7 +260,7 @@ static void glslc_uninit(FFVkSPIRVCompiler **ctx)
     av_freep(ctx);
 }
 
-static FFVkSPIRVCompiler *ff_vk_glslang_init(void)
+FFVkSPIRVCompiler *ff_vk_glslang_init(void)
 {
     FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret));
     if (!ret)
diff --git a/libavutil/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c
similarity index 96%
rename from libavutil/vulkan_shaderc.c
rename to libavfilter/vulkan_shaderc.c
index bd40edf187..38be1030ad 100644
--- a/libavutil/vulkan_shaderc.c
+++ b/libavfilter/vulkan_shaderc.c
@@ -18,7 +18,8 @@
 
 #include <shaderc/shaderc.h>
 
-#include "mem.h"
+#include "libavutil/mem.h"
+#include "vulkan_spirv.h"
 
 static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
                                FFVkSPIRVShader *shd, uint8_t **data,
@@ -43,6 +44,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
     };
 
     shaderc_compile_options_t opts = shaderc_compile_options_initialize();
+    *opaque = NULL;
     if (!opts)
         return AVERROR(ENOMEM);
 
@@ -65,7 +67,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
 
     loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE;
 
-    ff_vk_print_shader(avctx, shd, loglevel);
+    ff_vk_shader_print(avctx, shd, loglevel);
     if (message && (err || warn))
         av_log(avctx, loglevel, "%s\n", message);
     status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown";
@@ -104,7 +106,7 @@ static void shdc_uninit(FFVkSPIRVCompiler **ctx)
     av_freep(ctx);
 }
 
-static FFVkSPIRVCompiler *ff_vk_shaderc_init(void)
+FFVkSPIRVCompiler *ff_vk_shaderc_init(void)
 {
     FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret));
     if (!ret)
diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h
new file mode 100644
index 0000000000..5638cd9696
--- /dev/null
+++ b/libavfilter/vulkan_spirv.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VULKAN_SPIRV_H
+#define AVFILTER_VULKAN_SPIRV_H
+
+#include "libavutil/vulkan.h"
+
+#include "vulkan.h"
+#include "config.h"
+
+typedef struct FFVkSPIRVCompiler {
+    void *priv;
+    int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx,
+                          struct FFVkSPIRVShader *shd, uint8_t **data,
+                          size_t *size, const char *entrypoint, void **opaque);
+    void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque);
+    void (*uninit)(struct FFVkSPIRVCompiler **ctx);
+} FFVkSPIRVCompiler;
+
+#if CONFIG_LIBGLSLANG
+FFVkSPIRVCompiler *ff_vk_glslang_init(void);
+#define ff_vk_spirv_init ff_vk_glslang_init
+#endif
+#if CONFIG_LIBSHADERC
+FFVkSPIRVCompiler *ff_vk_shaderc_init(void);
+#define ff_vk_spirv_init ff_vk_shaderc_init
+#endif
+
+#endif /* AVFILTER_VULKAN_H */
-- 
2.39.2


[-- Attachment #57: 0056-avgblur_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18269 bytes --]

From b14473b21aa057181ec85e0ea3bac3e5fa053875 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:19 +0100
Subject: [PATCH 56/72] avgblur_vulkan: port for the rewrite

---
 libavfilter/vf_avgblur_vulkan.c | 339 ++++++++++----------------------
 1 file changed, 108 insertions(+), 231 deletions(-)

diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
index d118ce802c..17b2167951 100644
--- a/libavfilter/vf_avgblur_vulkan.c
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -19,23 +21,20 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 #include "vulkan_filter.h"
+#include "vulkan_spirv.h"
 #include "internal.h"
 
-#define CGS 32
-
 typedef struct AvgBlurVulkanContext {
     FFVulkanContext vkctx;
 
     int initialized;
+    FFVkExecPool e;
     FFVkQueueFamilyCtx qf;
-    FFVkExecContext *exec;
-    FFVulkanPipeline *pl_hor;
-    FFVulkanPipeline *pl_ver;
-
-    /* Shader updators, must be in the main filter struct */
-    VkDescriptorImageInfo input_images[3];
-    VkDescriptorImageInfo tmp_images[3];
-    VkDescriptorImageInfo output_images[3];
+    VkSampler sampler;
+    FFVulkanPipeline pl_hor;
+    FFVkSPIRVShader shd_hor;
+    FFVulkanPipeline pl_ver;
+    FFVkSPIRVShader shd_ver;
 
     int size_x;
     int size_y;
@@ -71,18 +70,41 @@ static const char blur_kernel[] = {
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
     int err;
-    FFVkSPIRVShader *shd;
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque;
     AvgBlurVulkanContext *s = ctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+    FFVkSPIRVShader *shd;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc;
 
-    FFVulkanDescriptorSetBinding desc_i[2] = {
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+    RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+    RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+    RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "avgblur_hor_compute",
+                          VK_SHADER_STAGE_COMPUTE_BIT));
+    RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "avgblur_ver_compute",
+                          VK_SHADER_STAGE_COMPUTE_BIT));
+    shd = &s->shd_hor;
+
+    ff_vk_shader_set_compute_sizes(shd, 32, 1, 1);
+
+    desc = (FFVulkanDescriptorSetBinding []) {
         {
             .name       = "input_img",
             .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            .samplers   = DUP_SAMPLER(s->sampler),
         },
         {
             .name       = "output_img",
@@ -95,238 +117,79 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
         },
     };
 
-    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
+    RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0));
 
-    desc_i[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
-    if (!desc_i[0].sampler)
-        return AVERROR_EXTERNAL;
-
-    { /* Create shader for the horizontal pass */
-        desc_i[0].updater = s->input_images;
-        desc_i[1].updater = s->tmp_images;
-
-        s->pl_hor = ff_vk_create_pipeline(vkctx, &s->qf);
-        if (!s->pl_hor)
-            return AVERROR(ENOMEM);
-
-        shd = ff_vk_init_shader(s->pl_hor, "avgblur_compute_hor",
-                                VK_SHADER_STAGE_COMPUTE_BIT);
-        if (!shd)
-            return AVERROR(ENOMEM);
-
-        ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
-
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl_hor, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0));
-
-        GLSLF(0, #define FILTER_RADIUS (%i)                     ,s->size_x - 1);
-        GLSLC(0, #define INC(x) (ivec2(x, 0))                                 );
-        GLSLC(0, #define DIR(var) (var.x)                                     );
-        GLSLD(   blur_kernel                                                  );
-        GLSLC(0, void main()                                                  );
-        GLSLC(0, {                                                            );
-        GLSLC(1,     ivec2 size;                                              );
-        GLSLC(1,     const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);       );
-        for (int i = 0; i < planes; i++) {
-            GLSLC(0,                                                          );
-            GLSLF(1,  size = imageSize(output_img[%i]);                     ,i);
-            GLSLC(1,  if (IS_WITHIN(pos, size)) {                             );
-            if (s->planes & (1 << i)) {
-                GLSLF(2, distort(pos, %i);                                  ,i);
-            } else {
-                GLSLF(2, vec4 res = texture(input_img[%i], pos);            ,i);
-                GLSLF(2, imageStore(output_img[%i], pos, res);              ,i);
-            }
-            GLSLC(1, }                                                        );
+    GLSLF(0, #define FILTER_RADIUS (%i)                     ,s->size_x - 1);
+    GLSLC(0, #define INC(x) (ivec2(x, 0))                                 );
+    GLSLC(0, #define DIR(var) (var.x)                                     );
+    GLSLD(   blur_kernel                                                  );
+    GLSLC(0, void main()                                                  );
+    GLSLC(0, {                                                            );
+    GLSLC(1,     ivec2 size;                                              );
+    GLSLC(1,     const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);       );
+    for (int i = 0; i < planes; i++) {
+        GLSLC(0,                                                          );
+        GLSLF(1,  size = imageSize(output_img[%i]);                     ,i);
+        GLSLC(1,  if (IS_WITHIN(pos, size)) {                             );
+        if (s->planes & (1 << i)) {
+            GLSLF(2, distort(pos, %i);                                  ,i);
+        } else {
+            GLSLF(2, vec4 res = texture(input_img[%i], pos);            ,i);
+            GLSLF(2, imageStore(output_img[%i], pos, res);              ,i);
         }
-        GLSLC(0, }                                                            );
-
-        RET(ff_vk_compile_shader(vkctx, shd, "main"));
-
-        RET(ff_vk_init_pipeline_layout(vkctx, s->pl_hor));
-        RET(ff_vk_init_compute_pipeline(vkctx, s->pl_hor));
+        GLSLC(1, }                                                        );
     }
-
-    { /* Create shader for the vertical pass */
-        desc_i[0].updater = s->tmp_images;
-        desc_i[1].updater = s->output_images;
-
-        s->pl_ver = ff_vk_create_pipeline(vkctx, &s->qf);
-        if (!s->pl_ver)
-            return AVERROR(ENOMEM);
-
-        shd = ff_vk_init_shader(s->pl_ver, "avgblur_compute_ver",
-                                VK_SHADER_STAGE_COMPUTE_BIT);
-        if (!shd)
-            return AVERROR(ENOMEM);
-
-        ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
-
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl_ver, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0));
-
-        GLSLF(0, #define FILTER_RADIUS (%i)                     ,s->size_y - 1);
-        GLSLC(0, #define INC(x) (ivec2(0, x))                                 );
-        GLSLC(0, #define DIR(var) (var.y)                                     );
-        GLSLD(   blur_kernel                                                  );
-        GLSLC(0, void main()                                                  );
-        GLSLC(0, {                                                            );
-        GLSLC(1,     ivec2 size;                                              );
-        GLSLC(1,     const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);       );
-        for (int i = 0; i < planes; i++) {
-            GLSLC(0,                                                          );
-            GLSLF(1,  size = imageSize(output_img[%i]);                     ,i);
-            GLSLC(1,  if (IS_WITHIN(pos, size)) {                             );
-            if (s->planes & (1 << i)) {
-                GLSLF(2, distort(pos, %i);                                  ,i);
-            } else {
-                GLSLF(2, vec4 res = texture(input_img[%i], pos);            ,i);
-                GLSLF(2, imageStore(output_img[%i], pos, res);              ,i);
-            }
-            GLSLC(1, }                                                        );
+    GLSLC(0, }                                                            );
+
+    shd = &s->shd_ver;
+    ff_vk_shader_set_compute_sizes(shd, 1, 32, 1);
+    RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0));
+
+    GLSLF(0, #define FILTER_RADIUS (%i)                     ,s->size_y - 1);
+    GLSLC(0, #define INC(x) (ivec2(0, x))                                 );
+    GLSLC(0, #define DIR(var) (var.y)                                     );
+    GLSLD(   blur_kernel                                                  );
+    GLSLC(0, void main()                                                  );
+    GLSLC(0, {                                                            );
+    GLSLC(1,     ivec2 size;                                              );
+    GLSLC(1,     const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);       );
+    for (int i = 0; i < planes; i++) {
+        GLSLC(0,                                                          );
+        GLSLF(1,  size = imageSize(output_img[%i]);                     ,i);
+        GLSLC(1,  if (IS_WITHIN(pos, size)) {                             );
+        if (s->planes & (1 << i)) {
+            GLSLF(2, distort(pos, %i);                                  ,i);
+        } else {
+            GLSLF(2, vec4 res = texture(input_img[%i], pos);            ,i);
+            GLSLF(2, imageStore(output_img[%i], pos, res);              ,i);
         }
-        GLSLC(0, }                                                            );
-
-        RET(ff_vk_compile_shader(vkctx, shd, "main"));
-
-        RET(ff_vk_init_pipeline_layout(vkctx, s->pl_ver));
-        RET(ff_vk_init_compute_pipeline(vkctx, s->pl_ver));
+        GLSLC(1, }                                                        );
     }
+    GLSLC(0, }                                                            );
+
+    RET(spv->compile_shader(spv, ctx, &s->shd_hor, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(vkctx, &s->shd_hor, spv_data, spv_len, "main"));
+    RET(spv->compile_shader(spv, ctx, &s->shd_ver, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(vkctx, &s->shd_ver, spv_data, spv_len, "main"));
 
-    /* Execution context */
-    RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+    RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_hor, &s->shd_hor));
+    RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_ver, &s->shd_ver));
+    RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_hor));
+    RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_ver));
 
     s->initialized = 1;
 
     return 0;
 
 fail:
-    return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f)
-{
-    int err;
-    VkCommandBuffer cmd_buf;
-    AvgBlurVulkanContext *s = avctx->priv;
-    FFVulkanContext *vkctx = &s->vkctx;
-    FFVulkanFunctions *vk = &vkctx->vkfn;
-    AVVkFrame *in = (AVVkFrame *)in_f->data[0];
-    AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0];
-    AVVkFrame *out = (AVVkFrame *)out_f->data[0];
-
-    const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
-    const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
-    int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
-    /* Update descriptors and init the exec context */
-    ff_vk_start_exec_recording(vkctx, s->exec);
-    cmd_buf = ff_vk_get_exec_buf(s->exec);
-
-    for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->input_images[i].imageView, in->img[i],
-                                   input_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->tmp_images[i].imageView, tmp->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->output_images[i].imageView, out->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-    }
-
-    ff_vk_update_descriptor_set(vkctx, s->pl_hor, 0);
-    ff_vk_update_descriptor_set(vkctx, s->pl_ver, 0);
-
-    for (int i = 0; i < planes; i++) {
-        VkImageMemoryBarrier bar[] = {
-            {
-                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask = 0,
-                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout = in->layout[i],
-                .newLayout = s->input_images[i].imageLayout,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .image = in->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask = 0,
-                .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout = tmp->layout[i],
-                .newLayout = s->tmp_images[i].imageLayout,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .image = tmp->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask = 0,
-                .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
-                .oldLayout = out->layout[i],
-                .newLayout = s->output_images[i].imageLayout,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .image = out->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-        };
-
-        vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                               VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
-                               0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
-        in->layout[i]  = bar[0].newLayout;
-        in->access[i]  = bar[0].dstAccessMask;
-
-        tmp->layout[i] = bar[1].newLayout;
-        tmp->access[i] = bar[1].dstAccessMask;
-
-        out->layout[i] = bar[2].newLayout;
-        out->access[i] = bar[2].dstAccessMask;
-    }
-
-    ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_hor);
-
-    vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
-                  s->vkctx.output_height, 1);
-
-    ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_ver);
-
-    vk->CmdDispatch(cmd_buf, s->vkctx.output_width,
-                    FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
-
-    ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
-    err = ff_vk_submit_exec_queue(vkctx,s->exec);
-    if (err)
-        return err;
-
-    ff_vk_qf_rotate(&s->qf);
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+    if (spv)
+        spv->uninit(&spv);
 
     return err;
-
-fail:
-    ff_vk_discard_exec_deps(s->exec);
-    return err;
 }
 
 static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
@@ -352,7 +215,9 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
     if (!s->initialized)
         RET(init_filter(ctx, in));
 
-    RET(process_frames(ctx, out, tmp, in));
+    RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e,
+                                   (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver },
+                                   out, tmp, in, s->sampler, NULL, 0));
 
     err = av_frame_copy_props(out, in);
     if (err < 0)
@@ -373,6 +238,18 @@ fail:
 static void avgblur_vulkan_uninit(AVFilterContext *avctx)
 {
     AvgBlurVulkanContext *s = avctx->priv;
+    FFVulkanContext *vkctx = &s->vkctx;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
+
+    ff_vk_exec_pool_free(vkctx, &s->e);
+    ff_vk_pipeline_free(vkctx, &s->pl_hor);
+    ff_vk_pipeline_free(vkctx, &s->pl_ver);
+    ff_vk_shader_free(vkctx, &s->shd_hor);
+    ff_vk_shader_free(vkctx, &s->shd_ver);
+
+    if (s->sampler)
+        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+                           vkctx->hwctx->alloc);
 
     ff_vk_uninit(&s->vkctx);
 
-- 
2.39.2


[-- Attachment #58: 0057-blend_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 16613 bytes --]

From 83edf3b91ffaed33b2103a6ba743487850f5325c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:43 +0100
Subject: [PATCH 57/72] blend_vulkan: port for the rewrite

---
 libavfilter/vf_blend_vulkan.c | 315 +++++++++++-----------------------
 1 file changed, 102 insertions(+), 213 deletions(-)

diff --git a/libavfilter/vf_blend_vulkan.c b/libavfilter/vf_blend_vulkan.c
index fcc21cbc8d..7ffdc9f3bd 100644
--- a/libavfilter/vf_blend_vulkan.c
+++ b/libavfilter/vf_blend_vulkan.c
@@ -1,5 +1,7 @@
 /*
  * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
  * The blend modes are based on the blend.c.
  *
  * This file is part of FFmpeg.
@@ -22,12 +24,11 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 #include "vulkan_filter.h"
+#include "vulkan_spirv.h"
 #include "internal.h"
 #include "framesync.h"
 #include "blend.h"
 
-#define CGS 32
-
 #define IN_TOP    0
 #define IN_BOTTOM 1
 
@@ -40,20 +41,18 @@ typedef struct FilterParamsVulkan {
 
 typedef struct BlendVulkanContext {
     FFVulkanContext vkctx;
-    FFVkQueueFamilyCtx qf;
-    FFVkExecContext *exec;
-    FFVulkanPipeline *pl;
     FFFrameSync fs;
 
-    VkDescriptorImageInfo top_images[3];
-    VkDescriptorImageInfo bottom_images[3];
-    VkDescriptorImageInfo output_images[3];
+    int initialized;
+    FFVulkanPipeline pl;
+    FFVkExecPool e;
+    FFVkQueueFamilyCtx qf;
+    FFVkSPIRVShader shd;
+    VkSampler sampler;
 
     FilterParamsVulkan params[4];
     double all_opacity;
     enum BlendMode all_mode;
-
-    int initialized;
 } BlendVulkanContext;
 
 #define DEFINE_BLEND_MODE(MODE, EXPR) \
@@ -125,223 +124,102 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
 static av_cold int init_filter(AVFilterContext *avctx)
 {
     int err = 0;
-    FFVkSampler *sampler;
-    FFVkSPIRVShader *shd;
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque;
     BlendVulkanContext *s = avctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+    FFVkSPIRVShader *shd = &s->shd;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc;
 
-    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
-    sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
-    if (!sampler)
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
         return AVERROR_EXTERNAL;
-
-    s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
-    if (!s->pl)
-        return AVERROR(ENOMEM);
-
-    {
-        FFVulkanDescriptorSetBinding image_descs[] = {
-            {
-                .name       = "top_images",
-                .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-                .dimensions = 2,
-                .elems      = planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->top_images,
-                .sampler    = sampler,
-            },
-            {
-                .name       = "bottom_images",
-                .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-                .dimensions = 2,
-                .elems      = planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->bottom_images,
-                .sampler    = sampler,
-            },
-            {
-                .name       = "output_images",
-                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-                .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
-                .mem_quali  = "writeonly",
-                .dimensions = 2,
-                .elems      = planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->output_images,
-            },
-        };
-
-        shd = ff_vk_init_shader(s->pl, "blend_compute", image_descs[0].stages);
-        if (!shd)
-            return AVERROR(ENOMEM);
-
-        ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 });
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
-        for (int i = 0, j = 0; i < planes; i++) {
-            for (j = 0; j < i; j++)
-                if (s->params[i].blend_func == s->params[j].blend_func)
-                    break;
-            /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */
-            if (j == i) {
-                GLSLD(s->params[i].blend_func);
-            }
-        }
-
-        GLSLC(0, void main()                                                    );
-        GLSLC(0, {                                                              );
-        GLSLC(1,     ivec2 size;                                                );
-        GLSLC(1,     const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);         );
-        for (int i = 0; i < planes; i++) {
-            GLSLC(0,                                                            );
-            GLSLF(1, size = imageSize(output_images[%i]);                     ,i);
-            GLSLC(1, if (IS_WITHIN(pos, size)) {                                );
-            GLSLF(2,     const vec4 top = texture(top_images[%i], pos);       ,i);
-            GLSLF(2,     const vec4 bottom = texture(bottom_images[%i], pos); ,i);
-            GLSLF(2,     const float opacity = %f;                            ,s->params[i].opacity);
-            GLSLF(2,     vec4 dst = %s(top, bottom, opacity);                 ,s->params[i].blend);
-            GLSLC(0,                                                            );
-            GLSLF(2,     imageStore(output_images[%i], pos, dst);             ,i);
-            GLSLC(1, }                                                          );
-        }
-        GLSLC(0, }                                                              );
-
-        RET(ff_vk_compile_shader(vkctx, shd, "main"));
-        RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
-        RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
     }
 
-    RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
-
-    s->initialized = 1;
-
-fail:
-    return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_frame, AVFrame *top_frame, AVFrame *bottom_frame)
-{
-    int err = 0;
-    VkCommandBuffer cmd_buf;
-    BlendVulkanContext *s = avctx->priv;
-    FFVulkanContext *vkctx = &s->vkctx;
-    FFVulkanFunctions *vk = &s->vkctx.vkfn;
-    const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
-    AVVkFrame *out    = (AVVkFrame *)out_frame->data[0];
-    AVVkFrame *top    = (AVVkFrame *)top_frame->data[0];
-    AVVkFrame *bottom = (AVVkFrame *)bottom_frame->data[0];
-
-    AVHWFramesContext *top_fc    = (AVHWFramesContext*)top_frame->hw_frames_ctx->data;
-    AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom_frame->hw_frames_ctx->data;
-
-    const VkFormat *top_formats    = av_vkfmt_from_pixfmt(top_fc->sw_format);
-    const VkFormat *bottom_formats = av_vkfmt_from_pixfmt(bottom_fc->sw_format);
-    const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
-    ff_vk_start_exec_recording(vkctx, s->exec);
-    cmd_buf = ff_vk_get_exec_buf(s->exec);
-
-    for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->top_images[i].imageView, top->img[i],
-                                   top_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->bottom_images[i].imageView, bottom->img[i],
-                                   bottom_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->output_images[i].imageView, out->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        s->top_images[i].imageLayout    = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->bottom_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+    RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+    RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+    RET(ff_vk_shader_init(&s->pl, &s->shd, "blend_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+    ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+    desc = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name       = "top_images",
+            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .dimensions = 2,
+            .elems      = planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            .samplers   = DUP_SAMPLER(s->sampler),
+        },
+        {
+            .name       = "bottom_images",
+            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .dimensions = 2,
+            .elems      = planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            .samplers   = DUP_SAMPLER(s->sampler),
+        },
+        {
+            .name       = "output_images",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+            .mem_quali  = "writeonly",
+            .dimensions = 2,
+            .elems      = planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+
+    RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0));
+
+    for (int i = 0, j = 0; i < planes; i++) {
+        for (j = 0; j < i; j++)
+            if (s->params[i].blend_func == s->params[j].blend_func)
+                break;
+        /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */
+        if (j == i) {
+            GLSLD(s->params[i].blend_func);
+        }
     }
 
-    ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
+    GLSLC(0, void main()                                                    );
+    GLSLC(0, {                                                              );
+    GLSLC(1,     ivec2 size;                                                );
+    GLSLC(1,     const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);         );
     for (int i = 0; i < planes; i++) {
-        VkImageMemoryBarrier barriers[] = {
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout                   = top->layout[i],
-                .newLayout                   = s->top_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = top->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout                   = bottom->layout[i],
-                .newLayout                   = s->bottom_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = bottom->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_WRITE_BIT,
-                .oldLayout                   = out->layout[i],
-                .newLayout                   = s->output_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = out->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-        };
-
-        vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                               VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
-                               0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
-        top->layout[i] = barriers[0].newLayout;
-        top->access[i] = barriers[0].dstAccessMask;
-
-        bottom->layout[i] = barriers[1].newLayout;
-        bottom->access[i] = barriers[1].dstAccessMask;
-
-        out->layout[i] = barriers[2].newLayout;
-        out->access[i] = barriers[2].dstAccessMask;
+        GLSLC(0,                                                            );
+        GLSLF(1, size = imageSize(output_images[%i]);                     ,i);
+        GLSLC(1, if (IS_WITHIN(pos, size)) {                                );
+        GLSLF(2,     const vec4 top = texture(top_images[%i], pos);       ,i);
+        GLSLF(2,     const vec4 bottom = texture(bottom_images[%i], pos); ,i);
+        GLSLF(2,     const float opacity = %f;                            ,s->params[i].opacity);
+        GLSLF(2,     vec4 dst = %s(top, bottom, opacity);                 ,s->params[i].blend);
+        GLSLC(0,                                                            );
+        GLSLF(2,     imageStore(output_images[%i], pos, dst);             ,i);
+        GLSLC(1, }                                                          );
     }
+    GLSLC(0, }                                                              );
 
-    ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-    vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS) / CGS,
-                    FFALIGN(s->vkctx.output_height, CGS) / CGS, 1);
-
-    ff_vk_add_exec_dep(vkctx, s->exec, top_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, bottom_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, out_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+    RET(spv->compile_shader(spv, avctx, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
 
-    err = ff_vk_submit_exec_queue(vkctx, s->exec);
-    if (err)
-        return err;
+    RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+    RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
 
-    ff_vk_qf_rotate(&s->qf);
-
-    return 0;
+    s->initialized = 1;
 
 fail:
-    ff_vk_discard_exec_deps(s->exec);
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+    if (spv)
+        spv->uninit(&spv);
+
     return err;
 }
 
@@ -375,7 +253,9 @@ static int blend_frame(FFFrameSync *fs)
         RET(init_filter(avctx));
     }
 
-    RET(process_frames(avctx, out, top, bottom));
+    RET(ff_vk_filter_process_2in(&s->vkctx, &s->e, &s->pl,
+                                 out, top, bottom,
+                                 s->sampler, NULL, 0));
 
     return ff_filter_frame(outlink, out);
 
@@ -396,10 +276,19 @@ static av_cold int init(AVFilterContext *avctx)
 static av_cold void uninit(AVFilterContext *avctx)
 {
     BlendVulkanContext *s = avctx->priv;
+    FFVulkanContext *vkctx = &s->vkctx;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
 
-    ff_framesync_uninit(&s->fs);
+    ff_vk_exec_pool_free(vkctx, &s->e);
+    ff_vk_pipeline_free(vkctx, &s->pl);
+    ff_vk_shader_free(vkctx, &s->shd);
+
+    if (s->sampler)
+        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+                           vkctx->hwctx->alloc);
 
     ff_vk_uninit(&s->vkctx);
+    ff_framesync_uninit(&s->fs);
 
     s->initialized = 0;
 }
-- 
2.39.2


[-- Attachment #59: 0058-chromaber_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 14904 bytes --]

From 3328104c3ec2aa1412b5c8ea33ef8a96249acdd9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:53 +0100
Subject: [PATCH 58/72] chromaber_vulkan: port for the rewrite

---
 libavfilter/vf_chromaber_vulkan.c | 288 ++++++++++--------------------
 1 file changed, 99 insertions(+), 189 deletions(-)

diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c
index b9423e417e..24649f7b25 100644
--- a/libavfilter/vf_chromaber_vulkan.c
+++ b/libavfilter/vf_chromaber_vulkan.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -19,21 +21,18 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 #include "vulkan_filter.h"
+#include "vulkan_spirv.h"
 #include "internal.h"
 
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
 typedef struct ChromaticAberrationVulkanContext {
     FFVulkanContext vkctx;
 
     int initialized;
+    FFVulkanPipeline pl;
+    FFVkExecPool e;
     FFVkQueueFamilyCtx qf;
-    FFVkExecContext *exec;
-    FFVulkanPipeline *pl;
-
-    /* Shader updators, must be in the main filter struct */
-    VkDescriptorImageInfo input_images[3];
-    VkDescriptorImageInfo output_images[3];
+    FFVkSPIRVShader shd;
+    VkSampler sampler;
 
     /* Push constants / options */
     struct {
@@ -68,205 +67,105 @@ static const char distort_chroma_kernel[] = {
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
     int err;
-    FFVkSampler *sampler;
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque;
     ChromaticAberrationVulkanContext *s = ctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
-    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
-    /* Create a sampler */
-    sampler = ff_vk_init_sampler(vkctx, 0, VK_FILTER_LINEAR);
-    if (!sampler)
-        return AVERROR_EXTERNAL;
-
-    s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
-    if (!s->pl)
-        return AVERROR(ENOMEM);
+    FFVkSPIRVShader *shd = &s->shd;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc;
 
     /* Normalize options */
     s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f;
     s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f;
 
-    { /* Create the shader */
-        FFVulkanDescriptorSetBinding desc_i[2] = {
-            {
-                .name       = "input_img",
-                .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-                .dimensions = 2,
-                .elems      = planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->input_images,
-                .sampler    = sampler,
-            },
-            {
-                .name       = "output_img",
-                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-                .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
-                .mem_quali  = "writeonly",
-                .dimensions = 2,
-                .elems      = planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->output_images,
-            },
-        };
-
-        FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "chromaber_compute",
-                                                 VK_SHADER_STAGE_COMPUTE_BIT);
-        if (!shd)
-            return AVERROR(ENOMEM);
-
-        ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
-        GLSLC(0, layout(push_constant, std430) uniform pushConstants {        );
-        GLSLC(1,    vec2 dist;                                                );
-        GLSLC(0, };                                                           );
-        GLSLC(0,                                                              );
-
-        ff_vk_add_push_constant(s->pl, 0, sizeof(s->opts),
-                                VK_SHADER_STAGE_COMPUTE_BIT);
-
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
-
-        GLSLD(   distort_chroma_kernel                                        );
-        GLSLC(0, void main()                                                  );
-        GLSLC(0, {                                                            );
-        GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);             );
-        if (planes == 1) {
-            GLSLC(1, distort_rgb(imageSize(output_img[0]), pos);              );
-        } else {
-            GLSLC(1, ivec2 size = imageSize(output_img[0]);                   );
-            GLSLC(1, vec2 npos = vec2(pos)/vec2(size);                        );
-            GLSLC(1, vec4 res = texture(input_img[0], npos);                  );
-            GLSLC(1, imageStore(output_img[0], pos, res);                     );
-            for (int i = 1; i < planes; i++) {
-                GLSLC(0,                                                      );
-                GLSLF(1,  size = imageSize(output_img[%i]);                 ,i);
-                GLSLC(1,  if (IS_WITHIN(pos, size)) {                         );
-                GLSLF(2,      distort_chroma(%i, size, pos);                ,i);
-                GLSLC(1,  } else {                                            );
-                GLSLC(2,    npos = vec2(pos)/vec2(size);                      );
-                GLSLF(2,    res = texture(input_img[%i], npos);             ,i);
-                GLSLF(2,    imageStore(output_img[%i], pos, res);           ,i);
-                GLSLC(1, }                                                    );
-            }
-        }
-        GLSLC(0, }                                                            );
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
 
-        RET(ff_vk_compile_shader(vkctx, shd, "main"));
+    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+    RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+    RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR));
+    RET(ff_vk_shader_init(&s->pl, &s->shd, "chromaber_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+    ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+    GLSLC(0, layout(push_constant, std430) uniform pushConstants {        );
+    GLSLC(1,    vec2 dist;                                                );
+    GLSLC(0, };                                                           );
+    GLSLC(0,                                                              );
+
+    ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+                            VK_SHADER_STAGE_COMPUTE_BIT);
+
+    desc = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name       = "input_img",
+            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .dimensions = 2,
+            .elems      = planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            .samplers   = DUP_SAMPLER(s->sampler),
+        },
+        {
+            .name       = "output_img",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+            .mem_quali  = "writeonly",
+            .dimensions = 2,
+            .elems      = planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+
+    RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
+
+    GLSLD(   distort_chroma_kernel                                        );
+    GLSLC(0, void main()                                                  );
+    GLSLC(0, {                                                            );
+    GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);             );
+    if (planes == 1) {
+        GLSLC(1, distort_rgb(imageSize(output_img[0]), pos);              );
+    } else {
+        GLSLC(1, ivec2 size = imageSize(output_img[0]);                   );
+        GLSLC(1, vec2 npos = vec2(pos)/vec2(size);                        );
+        GLSLC(1, vec4 res = texture(input_img[0], npos);                  );
+        GLSLC(1, imageStore(output_img[0], pos, res);                     );
+        for (int i = 1; i < planes; i++) {
+            GLSLC(0,                                                      );
+            GLSLF(1,  size = imageSize(output_img[%i]);                 ,i);
+            GLSLC(1,  if (IS_WITHIN(pos, size)) {                         );
+            GLSLF(2,      distort_chroma(%i, size, pos);                ,i);
+            GLSLC(1,  } else {                                            );
+            GLSLC(2,    npos = vec2(pos)/vec2(size);                      );
+            GLSLF(2,    res = texture(input_img[%i], npos);             ,i);
+            GLSLF(2,    imageStore(output_img[%i], pos, res);           ,i);
+            GLSLC(1, }                                                    );
+        }
     }
+    GLSLC(0, }                                                            );
 
-    RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
-    RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+    RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
 
-    /* Execution context */
-    RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+    RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+    RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
 
     s->initialized = 1;
 
     return 0;
 
 fail:
-    return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
-{
-    int err = 0;
-    VkCommandBuffer cmd_buf;
-    ChromaticAberrationVulkanContext *s = avctx->priv;
-    FFVulkanContext *vkctx = &s->vkctx;
-    FFVulkanFunctions *vk = &vkctx->vkfn;
-    AVVkFrame *in = (AVVkFrame *)in_f->data[0];
-    AVVkFrame *out = (AVVkFrame *)out_f->data[0];
-    int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-    const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
-    const VkFormat *ouput_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
-    /* Update descriptors and init the exec context */
-    ff_vk_start_exec_recording(vkctx, s->exec);
-    cmd_buf = ff_vk_get_exec_buf(s->exec);
-
-    for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->input_images[i].imageView, in->img[i],
-                                   input_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->output_images[i].imageView, out->img[i],
-                                   ouput_formats[i],
-                                   ff_comp_identity_map));
-
-        s->input_images[i].imageLayout  = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-    }
-
-    ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
-    for (int i = 0; i < planes; i++) {
-        VkImageMemoryBarrier bar[2] = {
-            {
-                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask = 0,
-                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout = in->layout[i],
-                .newLayout = s->input_images[i].imageLayout,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .image = in->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask = 0,
-                .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
-                .oldLayout = out->layout[i],
-                .newLayout = s->output_images[i].imageLayout,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .image = out->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-        };
-
-        vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                               VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
-                               0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
-        in->layout[i]  = bar[0].newLayout;
-        in->access[i]  = bar[0].dstAccessMask;
-
-        out->layout[i] = bar[1].newLayout;
-        out->access[i] = bar[1].dstAccessMask;
-    }
-
-    ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
-    ff_vk_update_push_exec(vkctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT,
-                           0, sizeof(s->opts), &s->opts);
-
-    vk->CmdDispatch(cmd_buf,
-                    FFALIGN(s->vkctx.output_width,  CGROUPS[0])/CGROUPS[0],
-                    FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
-
-    ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
-    err = ff_vk_submit_exec_queue(vkctx, s->exec);
-    if (err)
-        return err;
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+    if (spv)
+        spv->uninit(&spv);
 
-    ff_vk_qf_rotate(&s->qf);
-
-    return err;
-
-fail:
-    ff_vk_discard_exec_deps(s->exec);
     return err;
 }
 
@@ -286,7 +185,8 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
     if (!s->initialized)
         RET(init_filter(ctx, in));
 
-    RET(process_frames(ctx, out, in));
+    RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+                                    s->sampler, &s->opts, sizeof(s->opts)));
 
     err = av_frame_copy_props(out, in);
     if (err < 0)
@@ -305,6 +205,16 @@ fail:
 static void chromaber_vulkan_uninit(AVFilterContext *avctx)
 {
     ChromaticAberrationVulkanContext *s = avctx->priv;
+    FFVulkanContext *vkctx = &s->vkctx;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
+
+    ff_vk_exec_pool_free(vkctx, &s->e);
+    ff_vk_pipeline_free(vkctx, &s->pl);
+    ff_vk_shader_free(vkctx, &s->shd);
+
+    if (s->sampler)
+        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+                           vkctx->hwctx->alloc);
 
     ff_vk_uninit(&s->vkctx);
 
-- 
2.39.2


[-- Attachment #60: 0059-flip_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 13075 bytes --]

From f69abda00b625c1f9d69421e7c6bef6713a43f76 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:12:42 +0100
Subject: [PATCH 59/72] flip_vulkan: port for the rewrite

---
 libavfilter/vf_flip_vulkan.c | 229 ++++++++++++-----------------------
 1 file changed, 78 insertions(+), 151 deletions(-)

diff --git a/libavfilter/vf_flip_vulkan.c b/libavfilter/vf_flip_vulkan.c
index 0223786ef1..0330dce257 100644
--- a/libavfilter/vf_flip_vulkan.c
+++ b/libavfilter/vf_flip_vulkan.c
@@ -1,5 +1,7 @@
 /*
  * copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -20,10 +22,9 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 #include "vulkan_filter.h"
+#include "vulkan_spirv.h"
 #include "internal.h"
 
-#define CGS 32
-
 enum FlipType {
     FLIP_VERTICAL,
     FLIP_HORIZONTAL,
@@ -32,32 +33,49 @@ enum FlipType {
 
 typedef struct FlipVulkanContext {
     FFVulkanContext vkctx;
-    FFVkQueueFamilyCtx qf;
-    FFVkExecContext *exec;
-    FFVulkanPipeline *pl;
-
-    VkDescriptorImageInfo input_images[3];
-    VkDescriptorImageInfo output_images[3];
 
     int initialized;
+    FFVulkanPipeline pl;
+    FFVkExecPool e;
+    FFVkQueueFamilyCtx qf;
+    FFVkSPIRVShader shd;
+    VkSampler sampler;
 } FlipVulkanContext;
 
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type)
 {
     int err = 0;
-    FFVkSPIRVShader *shd;
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque;
     FlipVulkanContext *s = ctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+    FFVkSPIRVShader *shd = &s->shd;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc;
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+    RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+    RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+    RET(ff_vk_shader_init(&s->pl, &s->shd, "flip_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+    ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
 
-    FFVulkanDescriptorSetBinding image_descs[] = {
+    desc = (FFVulkanDescriptorSetBinding []) {
         {
             .name       = "input_image",
             .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-            .updater    = s->input_images,
+            .samplers   = DUP_SAMPLER(s->sampler),
         },
         {
             .name       = "output_image",
@@ -67,167 +85,75 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-            .updater    = s->output_images,
         },
     };
 
-    image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
-    if (!image_descs[0].sampler)
-            return AVERROR_EXTERNAL;
+    RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
 
-    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
-    {
-        s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
-        if (!s->pl)
-            return AVERROR(ENOMEM);
-
-        shd = ff_vk_init_shader(s->pl, "flip_compute", image_descs[0].stages);
-        if (!shd)
-            return AVERROR(ENOMEM);
-
-        ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
-        GLSLC(0, void main()                                                                    );
-        GLSLC(0, {                                                                              );
-        GLSLC(1,     ivec2 size;                                                                );
-        GLSLC(1,     const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);                         );
-        for (int i = 0; i < planes; i++) {
-            GLSLC(0,                                                                            );
-            GLSLF(1, size = imageSize(output_image[%i]);                                      ,i);
-            GLSLC(1, if (IS_WITHIN(pos, size)) {                                                );
-            switch (type)
-            {
-            case FLIP_HORIZONTAL:
-                GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y));   ,i);
-                break;
-            case FLIP_VERTICAL:
-                GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y));   ,i);
-                break;
-            case FLIP_BOTH:
-                GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));,         i);
-                break;
-            default:
-                GLSLF(2, vec4 res = texture(input_image[%i], pos);                            ,i);
-                break;
-            }
-            GLSLF(2,     imageStore(output_image[%i], pos, res);                              ,i);
-            GLSLC(1, }                                                                          );
+    GLSLC(0, void main()                                                                    );
+    GLSLC(0, {                                                                              );
+    GLSLC(1,     ivec2 size;                                                                );
+    GLSLC(1,     const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);                         );
+    for (int i = 0; i < planes; i++) {
+        GLSLC(0,                                                                            );
+        GLSLF(1, size = imageSize(output_image[%i]);                                      ,i);
+        GLSLC(1, if (IS_WITHIN(pos, size)) {                                                );
+        switch (type)
+        {
+        case FLIP_HORIZONTAL:
+            GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y));   ,i);
+            break;
+        case FLIP_VERTICAL:
+            GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y));   ,i);
+            break;
+        case FLIP_BOTH:
+            GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));,         i);
+            break;
+        default:
+            GLSLF(2, vec4 res = texture(input_image[%i], pos);                            ,i);
+            break;
         }
-        GLSLC(0, }                                                                              );
-
-        RET(ff_vk_compile_shader(vkctx, shd, "main"));
-        RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
-        RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+        GLSLF(2,     imageStore(output_image[%i], pos, res);                              ,i);
+        GLSLC(1, }                                                                          );
     }
+    GLSLC(0, }                                                                              );
+
+    RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+    RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
 
-    RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
     s->initialized = 1;
 
 fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+    if (spv)
+        spv->uninit(&spv);
+
     return err;
 }
 
 static av_cold void flip_vulkan_uninit(AVFilterContext *avctx)
 {
     FlipVulkanContext *s = avctx->priv;
-    ff_vk_uninit(&s->vkctx);
 
-    s->initialized = 0;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
-    int err = 0;
-    VkCommandBuffer cmd_buf;
-    FlipVulkanContext *s = avctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
-    FFVulkanFunctions *vk = &s->vkctx.vkfn;
-    AVVkFrame *in = (AVVkFrame *)inframe->data[0];
-    AVVkFrame *out = (AVVkFrame *)outframe->data[0];
-    const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-    const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
-    const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
-    ff_vk_start_exec_recording(vkctx, s->exec);
-    cmd_buf = ff_vk_get_exec_buf(s->exec);
-
-    for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->input_images[i].imageView, in->img[i],
-                                   input_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->output_images[i].imageView, out->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        s->input_images[i].imageLayout  = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-    }
+    FFVulkanFunctions *vk = &vkctx->vkfn;
 
-    ff_vk_update_descriptor_set(vkctx, s->pl, 0);
+    ff_vk_exec_pool_free(vkctx, &s->e);
+    ff_vk_pipeline_free(vkctx, &s->pl);
+    ff_vk_shader_free(vkctx, &s->shd);
 
-    for (int i = 0; i < planes; i++) {
-        VkImageMemoryBarrier barriers[] = {
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout                   = in->layout[i],
-                .newLayout                   = s->input_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = in->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_WRITE_BIT,
-                .oldLayout                   = out->layout[i],
-                .newLayout                   = s->output_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = out->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-        };
-
-        vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                               VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
-                               0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
-        in->layout[i]  = barriers[0].newLayout;
-        in->access[i]  = barriers[0].dstAccessMask;
-
-        out->layout[i] = barriers[1].newLayout;
-        out->access[i] = barriers[1].dstAccessMask;
-    }
-
-    ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-    vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
-                    s->vkctx.output_height, 1);
-
-    ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
-    err = ff_vk_submit_exec_queue(vkctx, s->exec);
-    if (err)
-        return err;
+    if (s->sampler)
+        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+                           vkctx->hwctx->alloc);
 
-    ff_vk_qf_rotate(&s->qf);
+    ff_vk_uninit(&s->vkctx);
 
-    return 0;
-fail:
-    ff_vk_discard_exec_deps(s->exec);
-    return err;
+    s->initialized = 0;
 }
 
 static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type)
@@ -247,7 +173,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type)
     if (!s->initialized)
         RET(init_filter(ctx, in, type));
 
-    RET(process_frames(ctx, out, in));
+    RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+                                    s->sampler, NULL, 0));
 
     RET(av_frame_copy_props(out, in));
 
-- 
2.39.2


[-- Attachment #61: 0060-gblur_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 17658 bytes --]

From 369e41818f25c68097764dd417cd03b6984e3ce6 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:12:55 +0100
Subject: [PATCH 60/72] gblur_vulkan: port for the rewrite

---
 libavfilter/vf_gblur_vulkan.c | 314 ++++++++++------------------------
 1 file changed, 95 insertions(+), 219 deletions(-)

diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index c6360799a7..72308ffe83 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -1,5 +1,7 @@
 /*
  * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -20,6 +22,7 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 #include "vulkan_filter.h"
+#include "vulkan_spirv.h"
 #include "internal.h"
 
 #define CGS 32
@@ -27,26 +30,23 @@
 
 typedef struct GBlurVulkanContext {
     FFVulkanContext vkctx;
-    FFVkQueueFamilyCtx qf;
-    FFVkExecContext *exec;
-    FFVulkanPipeline *pl_hor;
-    FFVulkanPipeline *pl_ver;
-    FFVkBuffer params_buf_hor;
-    FFVkBuffer params_buf_ver;
-
-    VkDescriptorImageInfo input_images[3];
-    VkDescriptorImageInfo tmp_images[3];
-    VkDescriptorImageInfo output_images[3];
-    VkDescriptorBufferInfo params_desc_hor;
-    VkDescriptorBufferInfo params_desc_ver;
 
     int initialized;
+    FFVkExecPool e;
+    FFVkQueueFamilyCtx qf;
+    VkSampler sampler;
+    FFVulkanPipeline pl_hor;
+    FFVkSPIRVShader shd_hor;
+    FFVkBuffer params_hor;
+    FFVulkanPipeline pl_ver;
+    FFVkSPIRVShader shd_ver;
+    FFVkBuffer params_ver;
+
     int size;
     int sizeV;
     int planes;
     float sigma;
     float sigmaV;
-    AVFrame *tmpframe;
 } GBlurVulkanContext;
 
 static const char gblur_func[] = {
@@ -118,16 +118,17 @@ static av_cold void init_gaussian_params(GBlurVulkanContext *s)
         s->sizeV = s->size;
     else
         init_kernel_size(s, &s->sizeV);
-
-    s->tmpframe = NULL;
 }
 
-static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd,
-                               FFVkBuffer *params_buf, VkDescriptorBufferInfo *params_desc,
-                               int ksize, float sigma)
+static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl,
+                               FFVkSPIRVShader *shd, FFVkBuffer *params_buf,
+                               int ksize, float sigma, FFVkSPIRVCompiler *spv)
 {
     int err = 0;
     uint8_t *kernel_mapped;
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque;
 
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
 
@@ -137,7 +138,6 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
         .mem_quali   = "readonly",
         .mem_layout  = "std430",
         .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
-        .updater     = NULL,
         .buf_content = NULL,
     };
 
@@ -145,10 +145,9 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
     if (!kernel_def)
         return AVERROR(ENOMEM);
 
-    buf_desc.updater = params_desc;
     buf_desc.buf_content = kernel_def;
 
-    RET(ff_vk_add_descriptor_set(&s->vkctx, pl, shd, &buf_desc, 1, 0));
+    RET(ff_vk_pipeline_descriptor_set_add(&s->vkctx, pl, shd, &buf_desc, 1, 1, 0));
 
     GLSLD(   gblur_func                                               );
     GLSLC(0, void main()                                              );
@@ -169,26 +168,31 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
     }
     GLSLC(0, }                                                        );
 
-    RET(ff_vk_compile_shader(&s->vkctx, shd, "main"));
+    RET(spv->compile_shader(spv, s, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(&s->vkctx, shd, spv_data, spv_len, "main"));
 
-    RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
-    RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
+    RET(ff_vk_init_compute_pipeline(&s->vkctx, pl, shd));
+    RET(ff_vk_exec_pipeline_register(&s->vkctx, &s->e, pl));
 
-    RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL,
-                         VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+    RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL,
+                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+                         VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
     RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
 
     init_gaussian_kernel((float *)kernel_mapped, sigma, ksize);
 
     RET(ff_vk_unmap_buffers(&s->vkctx, params_buf, 1, 1));
 
-    params_desc->buffer = params_buf->buf;
-    params_desc->range  = VK_WHOLE_SIZE;
-
-    ff_vk_update_descriptor_set(&s->vkctx, pl, 1);
+    RET(ff_vk_set_descriptor_buffer(&s->vkctx, pl, NULL, 1, 0, 0,
+                                    params_buf->address, params_buf->size,
+                                    VK_FORMAT_UNDEFINED));
 
 fail:
     av_free(kernel_def);
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
     return err;
 }
 
@@ -196,16 +200,35 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
     int err = 0;
     GBlurVulkanContext *s = ctx->priv;
-    FFVkSPIRVShader *shd;
+    FFVulkanContext *vkctx = &s->vkctx;
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
 
-    FFVulkanDescriptorSetBinding image_descs[] = {
+    FFVkSPIRVShader *shd;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc;
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+    RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+    RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+    RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "gblur_hor_compute",
+                          VK_SHADER_STAGE_COMPUTE_BIT));
+    RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "gblur_ver_compute",
+                          VK_SHADER_STAGE_COMPUTE_BIT));
+
+    desc = (FFVulkanDescriptorSetBinding []) {
         {
             .name       = "input_images",
             .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            .samplers   = DUP_SAMPLER(s->sampler),
         },
         {
             .name       = "output_images",
@@ -218,215 +241,64 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
         },
     };
 
-    image_descs[0].sampler = ff_vk_init_sampler(&s->vkctx, 1, VK_FILTER_LINEAR);
-    if (!image_descs[0].sampler)
-        return AVERROR_EXTERNAL;
-
     init_gaussian_params(s);
 
-    ff_vk_qf_init(&s->vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
     {
-        /* Create shader for the horizontal pass */
-        image_descs[0].updater = s->input_images;
-        image_descs[1].updater = s->tmp_images;
-
-        s->pl_hor = ff_vk_create_pipeline(&s->vkctx, &s->qf);
-        if (!s->pl_hor) {
-            err = AVERROR(ENOMEM);
-            goto fail;
-        }
-
-        shd = ff_vk_init_shader(s->pl_hor, "gblur_compute_hor", image_descs[0].stages);
-        if (!shd) {
-            err = AVERROR(ENOMEM);
-            goto fail;
-        }
+        shd = &s->shd_hor;
+        ff_vk_shader_set_compute_sizes(shd, 32, 1, 1);
 
-        ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
-        RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
+        RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0));
 
         GLSLC(0, #define OFFSET (vec2(i, 0.0)));
-        RET(init_gblur_pipeline(s, s->pl_hor, shd, &s->params_buf_hor, &s->params_desc_hor,
-                                s->size, s->sigma));
+        RET(init_gblur_pipeline(s, &s->pl_hor, shd, &s->params_hor, s->size, s->sigma, spv));
     }
 
     {
-        /* Create shader for the vertical pass */
-        image_descs[0].updater = s->tmp_images;
-        image_descs[1].updater = s->output_images;
-
-        s->pl_ver = ff_vk_create_pipeline(&s->vkctx, &s->qf);
-        if (!s->pl_ver) {
-            err = AVERROR(ENOMEM);
-            goto fail;
-        }
+        shd = &s->shd_ver;
+        ff_vk_shader_set_compute_sizes(shd, 1, 32, 1);
 
-        shd = ff_vk_init_shader(s->pl_ver, "gblur_compute_ver", image_descs[0].stages);
-        if (!shd) {
-            err = AVERROR(ENOMEM);
-            goto fail;
-        }
-
-        ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
-        RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
+        RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0));
 
         GLSLC(0, #define OFFSET (vec2(0.0, i)));
-        RET(init_gblur_pipeline(s, s->pl_ver, shd, &s->params_buf_ver, &s->params_desc_ver,
-                                s->sizeV, s->sigmaV));
+        RET(init_gblur_pipeline(s, &s->pl_ver, shd, &s->params_ver, s->sizeV, s->sigmaV, spv));
     }
 
-    RET(ff_vk_create_exec_ctx(&s->vkctx, &s->exec, &s->qf));
-
     s->initialized = 1;
 
 fail:
+    if (spv)
+        spv->uninit(&spv);
+
     return err;
 }
 
 static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx)
 {
     GBlurVulkanContext *s = avctx->priv;
+    FFVulkanContext *vkctx = &s->vkctx;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
 
-    av_frame_free(&s->tmpframe);
+    ff_vk_exec_pool_free(vkctx, &s->e);
+    ff_vk_pipeline_free(vkctx, &s->pl_hor);
+    ff_vk_pipeline_free(vkctx, &s->pl_ver);
+    ff_vk_shader_free(vkctx, &s->shd_hor);
+    ff_vk_shader_free(vkctx, &s->shd_ver);
+    ff_vk_free_buf(vkctx, &s->params_hor);
+    ff_vk_free_buf(vkctx, &s->params_ver);
+
+    if (s->sampler)
+        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+                           vkctx->hwctx->alloc);
 
-    ff_vk_free_buf(&s->vkctx, &s->params_buf_hor);
-    ff_vk_free_buf(&s->vkctx, &s->params_buf_ver);
     ff_vk_uninit(&s->vkctx);
 
     s->initialized = 0;
 }
 
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
-    int err;
-    VkCommandBuffer cmd_buf;
-    GBlurVulkanContext *s = avctx->priv;
-    FFVulkanFunctions *vk = &s->vkctx.vkfn;
-
-    const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
-    AVVkFrame *in  = (AVVkFrame *)inframe->data[0];
-    AVVkFrame *out = (AVVkFrame *)outframe->data[0];
-    AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0];
-
-    const VkFormat *input_formats  = av_vkfmt_from_pixfmt(s->vkctx.input_format);
-    const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
-    ff_vk_start_exec_recording(&s->vkctx, s->exec);
-    cmd_buf = ff_vk_get_exec_buf(s->exec);
-
-    for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->input_images[i].imageView,
-                                   in->img[i],
-                                   input_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->tmp_images[i].imageView,
-                                   tmp->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->output_images[i].imageView,
-                                   out->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-    }
-
-    ff_vk_update_descriptor_set(&s->vkctx, s->pl_hor, 0);
-    ff_vk_update_descriptor_set(&s->vkctx, s->pl_ver, 0);
-
-    for (int i = 0; i < planes; i++) {
-        VkImageMemoryBarrier barriers[] = {
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout                   = in->layout[i],
-                .newLayout                   = s->input_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = in->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout                   = tmp->layout[i],
-                .newLayout                   = s->tmp_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = tmp->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_WRITE_BIT,
-                .oldLayout                   = out->layout[i],
-                .newLayout                   = s->output_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = out->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-        };
-
-        vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                               VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
-                               0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
-        in->layout[i]  = barriers[0].newLayout;
-        in->access[i]  = barriers[0].dstAccessMask;
-
-        tmp->layout[i] = barriers[1].newLayout;
-        tmp->access[i] = barriers[1].dstAccessMask;
-
-        out->layout[i] = barriers[2].newLayout;
-        out->access[i] = barriers[2].dstAccessMask;
-    }
-
-    ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_hor);
-
-    vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
-                    s->vkctx.output_height, 1);
-
-    ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_ver);
-
-    vk->CmdDispatch(cmd_buf,s->vkctx.output_width,
-                    FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
-
-    ff_vk_add_exec_dep(&s->vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(&s->vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
-    err = ff_vk_submit_exec_queue(&s->vkctx, s->exec);
-    if (err)
-        return err;
-
-    ff_vk_qf_rotate(&s->qf);
-
-    return 0;
-
-fail:
-    ff_vk_discard_exec_deps(s->exec);
-    return err;
-}
-
 static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
 {
     int err;
-    AVFrame *out = NULL;
+    AVFrame *tmp = NULL, *out = NULL;
     AVFilterContext *ctx = link->dst;
     GBlurVulkanContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
@@ -437,28 +309,32 @@ static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
         goto fail;
     }
 
-    if (!s->initialized) {
-        RET(init_filter(ctx, in));
-        s->tmpframe = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-        if (!s->tmpframe) {
-            err = AVERROR(ENOMEM);
-            goto fail;
-        }
+    tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!tmp) {
+        err = AVERROR(ENOMEM);
+        goto fail;
     }
 
-    RET(process_frames(ctx, out, in));
+    if (!s->initialized)
+        RET(init_filter(ctx, in));
 
-    RET(av_frame_copy_props(out, in));
+    RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e,
+                                   (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver },
+                                   out, tmp, in, s->sampler, NULL, 0));
+
+    err = av_frame_copy_props(out, in);
+    if (err < 0)
+        goto fail;
 
     av_frame_free(&in);
+    av_frame_free(&tmp);
 
     return ff_filter_frame(outlink, out);
 
 fail:
     av_frame_free(&in);
+    av_frame_free(&tmp);
     av_frame_free(&out);
-    av_frame_free(&s->tmpframe);
-
     return err;
 }
 
-- 
2.39.2


[-- Attachment #62: 0061-overlay_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18798 bytes --]

From 1a4987ea3171409cc15b7ea85c2d483cf155378e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:05 +0100
Subject: [PATCH 61/72] overlay_vulkan: port for the rewrite

---
 libavfilter/vf_overlay_vulkan.c | 397 ++++++++++----------------------
 1 file changed, 122 insertions(+), 275 deletions(-)

diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index bdf231f4ef..694cb666d8 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -19,26 +21,26 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 #include "vulkan_filter.h"
+#include "vulkan_spirv.h"
 #include "internal.h"
 #include "framesync.h"
 
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
 typedef struct OverlayVulkanContext {
     FFVulkanContext vkctx;
+    FFFrameSync fs;
 
     int initialized;
+    FFVulkanPipeline pl;
+    FFVkExecPool e;
     FFVkQueueFamilyCtx qf;
-    FFVkExecContext *exec;
-    FFVulkanPipeline *pl;
-    FFFrameSync fs;
-    FFVkBuffer params_buf;
+    FFVkSPIRVShader shd;
+    VkSampler sampler;
 
-    /* Shader updators, must be in the main filter struct */
-    VkDescriptorImageInfo main_images[3];
-    VkDescriptorImageInfo overlay_images[3];
-    VkDescriptorImageInfo output_images[3];
-    VkDescriptorBufferInfo params_desc;
+    /* Push constants / options */
+    struct {
+        int32_t o_offset[2*3];
+        int32_t o_size[2*3];
+    } opts;
 
     int overlay_x;
     int overlay_y;
@@ -80,279 +82,113 @@ static const char overlay_alpha[] = {
 static av_cold int init_filter(AVFilterContext *ctx)
 {
     int err;
-    FFVkSampler *sampler;
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque;
     OverlayVulkanContext *s = ctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
-    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
-    sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_NEAREST);
-    if (!sampler)
+    const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format);
+    FFVkSPIRVShader *shd = &s->shd;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc;
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
         return AVERROR_EXTERNAL;
-
-    s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
-    if (!s->pl)
-        return AVERROR(ENOMEM);
-
-    { /* Create the shader */
-        const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
-
-        FFVulkanDescriptorSetBinding desc_i[3] = {
-            {
-                .name       = "main_img",
-                .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-                .dimensions = 2,
-                .elems      = planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->main_images,
-                .sampler    = sampler,
-            },
-            {
-                .name       = "overlay_img",
-                .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-                .dimensions = 2,
-                .elems      = planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->overlay_images,
-                .sampler    = sampler,
-            },
-            {
-                .name       = "output_img",
-                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-                .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
-                .mem_quali  = "writeonly",
-                .dimensions = 2,
-                .elems      = planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->output_images,
-            },
-        };
-
-        FFVulkanDescriptorSetBinding desc_b = {
-            .name        = "params",
-            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .mem_quali   = "readonly",
-            .mem_layout  = "std430",
-            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
-            .updater     = &s->params_desc,
-            .buf_content = "ivec2 o_offset[3], o_size[3];",
-        };
-
-        FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "overlay_compute",
-                                                 VK_SHADER_STAGE_COMPUTE_BIT);
-        if (!shd)
-            return AVERROR(ENOMEM);
-
-        ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd,  desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
-
-        GLSLD(   overlay_noalpha                                              );
-        GLSLD(   overlay_alpha                                                );
-        GLSLC(0, void main()                                                  );
-        GLSLC(0, {                                                            );
-        GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);             );
-        GLSLF(1,     int planes = %i;                                  ,planes);
-        GLSLC(1,     for (int i = 0; i < planes; i++) {                       );
-        if (ialpha)
-            GLSLC(2,         overlay_alpha_opaque(i, pos);                    );
-        else
-            GLSLC(2,         overlay_noalpha(i, pos);                         );
-        GLSLC(1,     }                                                        );
-        GLSLC(0, }                                                            );
-
-        RET(ff_vk_compile_shader(vkctx, shd, "main"));
-    }
-
-    RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
-    RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
-
-    { /* Create and update buffer */
-        const AVPixFmtDescriptor *desc;
-
-        /* NOTE: std430 requires the same identical struct layout, padding and
-         * alignment as C, so we're allowed to do this, as this will map
-         * exactly to what the shader recieves */
-        struct {
-            int32_t o_offset[2*3];
-            int32_t o_size[2*3];
-        } *par;
-
-        err = ff_vk_create_buf(vkctx, &s->params_buf,
-                               sizeof(*par), NULL,
-                               VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
-                               VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
-        if (err)
-            return err;
-
-        err = ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0);
-        if (err)
-            return err;
-
-        desc = av_pix_fmt_desc_get(s->vkctx.output_format);
-
-        par->o_offset[0] = s->overlay_x;
-        par->o_offset[1] = s->overlay_y;
-        par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w;
-        par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h;
-        par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w;
-        par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h;
-
-        par->o_size[0] = s->overlay_w;
-        par->o_size[1] = s->overlay_h;
-        par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w;
-        par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h;
-        par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w;
-        par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h;
-
-        err = ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1);
-        if (err)
-            return err;
-
-        s->params_desc.buffer = s->params_buf.buf;
-        s->params_desc.range  = VK_WHOLE_SIZE;
-
-        ff_vk_update_descriptor_set(vkctx, s->pl, 1);
     }
 
-    /* Execution context */
-    RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+    RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+    RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+    RET(ff_vk_shader_init(&s->pl, &s->shd, "overlay_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+    ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+    GLSLC(0, layout(push_constant, std430) uniform pushConstants {        );
+    GLSLC(1,    ivec2 o_offset[3];                                        );
+    GLSLC(1,    ivec2 o_size[3];                                          );
+    GLSLC(0, };                                                           );
+    GLSLC(0,                                                              );
+
+    ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+                            VK_SHADER_STAGE_COMPUTE_BIT);
+
+    desc = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name       = "main_img",
+            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .dimensions = 2,
+            .elems      = planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            .samplers   = DUP_SAMPLER(s->sampler),
+        },
+        {
+            .name       = "overlay_img",
+            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .dimensions = 2,
+            .elems      = planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            .samplers   = DUP_SAMPLER(s->sampler),
+        },
+        {
+            .name       = "output_img",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+            .mem_quali  = "writeonly",
+            .dimensions = 2,
+            .elems      = planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+
+    RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0));
+
+    GLSLD(   overlay_noalpha                                              );
+    GLSLD(   overlay_alpha                                                );
+    GLSLC(0, void main()                                                  );
+    GLSLC(0, {                                                            );
+    GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);             );
+    GLSLF(1,     int planes = %i;                                  ,planes);
+    GLSLC(1,     for (int i = 0; i < planes; i++) {                       );
+    if (ialpha)
+        GLSLC(2,         overlay_alpha_opaque(i, pos);                    );
+    else
+        GLSLC(2,         overlay_noalpha(i, pos);                         );
+    GLSLC(1,     }                                                        );
+    GLSLC(0, }                                                            );
+
+    RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+    RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
+
+    s->opts.o_offset[0] = s->overlay_x;
+    s->opts.o_offset[1] = s->overlay_y;
+    s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w;
+    s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h;
+    s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w;
+    s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h;
+
+    s->opts.o_size[0] = s->overlay_w;
+    s->opts.o_size[1] = s->overlay_h;
+    s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w;
+    s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h;
+    s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w;
+    s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h;
 
     s->initialized = 1;
 
-    return 0;
-
 fail:
-    return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
-                          AVFrame *main_f, AVFrame *overlay_f)
-{
-    int err;
-    VkCommandBuffer cmd_buf;
-    OverlayVulkanContext *s = avctx->priv;
-    FFVulkanContext *vkctx = &s->vkctx;
-    FFVulkanFunctions *vk = &vkctx->vkfn;
-    int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
-    AVVkFrame *out     = (AVVkFrame *)out_f->data[0];
-    AVVkFrame *main    = (AVVkFrame *)main_f->data[0];
-    AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0];
-
-    AVHWFramesContext *main_fc    = (AVHWFramesContext*)main_f->hw_frames_ctx->data;
-    AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data;
-
-    const VkFormat *output_formats     = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-    const VkFormat *main_sw_formats    = av_vkfmt_from_pixfmt(main_fc->sw_format);
-    const VkFormat *overlay_sw_formats = av_vkfmt_from_pixfmt(overlay_fc->sw_format);
-
-    /* Update descriptors and init the exec context */
-    ff_vk_start_exec_recording(vkctx, s->exec);
-    cmd_buf = ff_vk_get_exec_buf(s->exec);
-
-    for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->main_images[i].imageView, main->img[i],
-                                   main_sw_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->overlay_images[i].imageView, overlay->img[i],
-                                   overlay_sw_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->output_images[i].imageView, out->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        s->main_images[i].imageLayout    = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->output_images[i].imageLayout  = VK_IMAGE_LAYOUT_GENERAL;
-    }
-
-    ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
-    for (int i = 0; i < planes; i++) {
-        VkImageMemoryBarrier bar[3] = {
-            {
-                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask = 0,
-                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout = main->layout[i],
-                .newLayout = s->main_images[i].imageLayout,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .image = main->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask = 0,
-                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout = overlay->layout[i],
-                .newLayout = s->overlay_images[i].imageLayout,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .image = overlay->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask = 0,
-                .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
-                .oldLayout = out->layout[i],
-                .newLayout = s->output_images[i].imageLayout,
-                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-                .image = out->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-        };
-
-        vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                               VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
-                               0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
-        main->layout[i]    = bar[0].newLayout;
-        main->access[i]    = bar[0].dstAccessMask;
-
-        overlay->layout[i] = bar[1].newLayout;
-        overlay->access[i] = bar[1].dstAccessMask;
-
-        out->layout[i]     = bar[2].newLayout;
-        out->access[i]     = bar[2].dstAccessMask;
-    }
-
-    ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
-    vk->CmdDispatch(cmd_buf,
-                    FFALIGN(s->vkctx.output_width,  CGROUPS[0])/CGROUPS[0],
-                    FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
-
-    ff_vk_add_exec_dep(vkctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
-    err = ff_vk_submit_exec_queue(vkctx, s->exec);
-    if (err)
-        return err;
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+    if (spv)
+        spv->uninit(&spv);
 
-    ff_vk_qf_rotate(&s->qf);
-
-    return err;
-
-fail:
-    ff_vk_discard_exec_deps(s->exec);
     return err;
 }
 
@@ -394,7 +230,9 @@ static int overlay_vulkan_blend(FFFrameSync *fs)
         goto fail;
     }
 
-    RET(process_frames(ctx, out, input_main, input_overlay));
+    RET(ff_vk_filter_process_2in(&s->vkctx, &s->e, &s->pl,
+                                 out, input_main, input_overlay,
+                                 s->sampler, &s->opts, sizeof(s->opts)));
 
     err = av_frame_copy_props(out, input_main);
     if (err < 0)
@@ -443,8 +281,17 @@ static av_cold int overlay_vulkan_init(AVFilterContext *avctx)
 static void overlay_vulkan_uninit(AVFilterContext *avctx)
 {
     OverlayVulkanContext *s = avctx->priv;
+    FFVulkanContext *vkctx = &s->vkctx;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
+
+    ff_vk_exec_pool_free(vkctx, &s->e);
+    ff_vk_pipeline_free(vkctx, &s->pl);
+    ff_vk_shader_free(vkctx, &s->shd);
+
+    if (s->sampler)
+        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+                           vkctx->hwctx->alloc);
 
-    ff_vk_free_buf(&s->vkctx, &s->params_buf);
     ff_vk_uninit(&s->vkctx);
     ff_framesync_uninit(&s->fs);
 
-- 
2.39.2


[-- Attachment #63: 0062-scale_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18951 bytes --]

From 4ec8834fa164e172420cd162d4a51735fbddd986 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:32 +0100
Subject: [PATCH 62/72] scale_vulkan: port for the rewrite

---
 libavfilter/vf_scale_vulkan.c | 365 ++++++++++++----------------------
 1 file changed, 124 insertions(+), 241 deletions(-)

diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 31dc35569b..84bd19c012 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -19,12 +21,11 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 #include "vulkan_filter.h"
+#include "vulkan_spirv.h"
 #include "scale_eval.h"
 #include "internal.h"
 #include "colorspace.h"
 
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
 enum ScalerFunc {
     F_BILINEAR = 0,
     F_NEAREST,
@@ -35,15 +36,17 @@ enum ScalerFunc {
 typedef struct ScaleVulkanContext {
     FFVulkanContext vkctx;
 
+    int initialized;
+    FFVulkanPipeline pl;
+    FFVkExecPool e;
     FFVkQueueFamilyCtx qf;
-    FFVkExecContext *exec;
-    FFVulkanPipeline *pl;
-    FFVkBuffer params_buf;
+    FFVkSPIRVShader shd;
+    VkSampler sampler;
 
-    /* Shader updators, must be in the main filter struct */
-    VkDescriptorImageInfo input_images[3];
-    VkDescriptorImageInfo output_images[3];
-    VkDescriptorBufferInfo params_desc;
+    /* Push constants / options */
+    struct {
+        float yuv_matrix[4][4];
+    } opts;
 
     char *out_format_string;
     char *w_expr;
@@ -51,8 +54,6 @@ typedef struct ScaleVulkanContext {
 
     enum ScalerFunc scaler;
     enum AVColorRange out_range;
-
-    int initialized;
 } ScaleVulkanContext;
 
 static const char scale_bilinear[] = {
@@ -110,10 +111,15 @@ static const char write_444[] = {
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
     int err;
-    FFVkSampler *sampler;
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque;
     VkFilter sampler_mode;
     ScaleVulkanContext *s = ctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
+    FFVkSPIRVShader *shd = &s->shd;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc;
 
     int crop_x = in->crop_left;
     int crop_y = in->crop_top;
@@ -121,8 +127,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
     int crop_h = in->height - (in->crop_top + in->crop_bottom);
     int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
 
-    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
     switch (s->scaler) {
     case F_NEAREST:
         sampler_mode = VK_FILTER_NEAREST;
@@ -132,264 +136,133 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
         break;
     };
 
-    /* Create a sampler */
-    sampler = ff_vk_init_sampler(vkctx, 0, sampler_mode);
-    if (!sampler)
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
         return AVERROR_EXTERNAL;
+    }
 
-    s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
-    if (!s->pl)
-        return AVERROR(ENOMEM);
-
-    { /* Create the shader */
-        FFVulkanDescriptorSetBinding desc_i[2] = {
-            {
-                .name       = "input_img",
-                .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-                .dimensions = 2,
-                .elems      = in_planes,
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->input_images,
-                .sampler    = sampler,
-            },
-            {
-                .name       = "output_img",
-                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-                .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
-                .mem_quali  = "writeonly",
-                .dimensions = 2,
-                .elems      = av_pix_fmt_count_planes(s->vkctx.output_format),
-                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-                .updater    = s->output_images,
-            },
-        };
-
-        FFVulkanDescriptorSetBinding desc_b = {
-            .name        = "params",
-            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-            .mem_quali   = "readonly",
-            .mem_layout  = "std430",
-            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
-            .updater     = &s->params_desc,
-            .buf_content = "mat4 yuv_matrix;",
-        };
-
-        FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "scale_compute",
-                                                 VK_SHADER_STAGE_COMPUTE_BIT);
-        if (!shd)
-            return AVERROR(ENOMEM);
-
-        ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd,  desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
-
-        GLSLD(   scale_bilinear                                                  );
-
-        if (s->vkctx.output_format != s->vkctx.input_format) {
-            GLSLD(   rgb2yuv                                                     );
-        }
+    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+    RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+    RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
+    RET(ff_vk_shader_init(&s->pl, &s->shd, "scale_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+    ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+    GLSLC(0, layout(push_constant, std430) uniform pushConstants {        );
+    GLSLC(1,    mat4 yuv_matrix;                                          );
+    GLSLC(0, };                                                           );
+    GLSLC(0,                                                              );
+
+    ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+                            VK_SHADER_STAGE_COMPUTE_BIT);
+
+    desc = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name       = "input_img",
+            .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .dimensions = 2,
+            .elems      = in_planes,
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+            .samplers   = DUP_SAMPLER(s->sampler),
+        },
+        {
+            .name       = "output_img",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+            .mem_quali  = "writeonly",
+            .dimensions = 2,
+            .elems      = av_pix_fmt_count_planes(s->vkctx.output_format),
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
 
-        switch (s->vkctx.output_format) {
-        case AV_PIX_FMT_NV12:    GLSLD(write_nv12); break;
-        case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
-        case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
-        default: break;
-        }
+    RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
 
-        GLSLC(0, void main()                                                     );
-        GLSLC(0, {                                                               );
-        GLSLC(1,     ivec2 size;                                                 );
-        GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);                );
-        GLSLF(1,     vec2 in_d = vec2(%i, %i);             ,in->width, in->height);
-        GLSLF(1,     vec2 c_r = vec2(%i, %i) / in_d;              ,crop_w, crop_h);
-        GLSLF(1,     vec2 c_o = vec2(%i, %i) / in_d;               ,crop_x,crop_y);
-        GLSLC(0,                                                                 );
-
-        if (s->vkctx.output_format == s->vkctx.input_format) {
-            for (int i = 0; i < desc_i[1].elems; i++) {
-                GLSLF(1,  size = imageSize(output_img[%i]);                    ,i);
-                GLSLC(1,  if (IS_WITHIN(pos, size)) {                            );
-                switch (s->scaler) {
-                case F_NEAREST:
-                case F_BILINEAR:
-                    GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o);     ,i);
-                    GLSLF(2, imageStore(output_img[%i], pos, res);             ,i);
-                    break;
-                };
-                GLSLC(1, }                                                       );
-            }
-        } else {
-            GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o);                );
-            GLSLF(1, res = rgb2yuv(res, %i);    ,s->out_range == AVCOL_RANGE_JPEG);
-            switch (s->vkctx.output_format) {
-            case AV_PIX_FMT_NV12:    GLSLC(1, write_nv12(res, pos); ); break;
-            case AV_PIX_FMT_YUV420P: GLSLC(1,  write_420(res, pos); ); break;
-            case AV_PIX_FMT_YUV444P: GLSLC(1,  write_444(res, pos); ); break;
-            default: return AVERROR(EINVAL);
-            }
-        }
+    GLSLD(   scale_bilinear                                                  );
+
+    if (s->vkctx.output_format != s->vkctx.input_format) {
+        GLSLD(   rgb2yuv                                                     );
+    }
 
-        GLSLC(0, }                                                               );
+    switch (s->vkctx.output_format) {
+    case AV_PIX_FMT_NV12:    GLSLD(write_nv12); break;
+    case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
+    case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
+    default: break;
+    }
 
-        RET(ff_vk_compile_shader(vkctx, shd, "main"));
+    GLSLC(0, void main()                                                     );
+    GLSLC(0, {                                                               );
+    GLSLC(1,     ivec2 size;                                                 );
+    GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);                );
+    GLSLF(1,     vec2 in_d = vec2(%i, %i);             ,in->width, in->height);
+    GLSLF(1,     vec2 c_r = vec2(%i, %i) / in_d;              ,crop_w, crop_h);
+    GLSLF(1,     vec2 c_o = vec2(%i, %i) / in_d;               ,crop_x,crop_y);
+    GLSLC(0,                                                                 );
+
+    if (s->vkctx.output_format == s->vkctx.input_format) {
+        for (int i = 0; i < desc[i].elems; i++) {
+            GLSLF(1,  size = imageSize(output_img[%i]);                    ,i);
+            GLSLC(1,  if (IS_WITHIN(pos, size)) {                            );
+            switch (s->scaler) {
+            case F_NEAREST:
+            case F_BILINEAR:
+                GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o);     ,i);
+                GLSLF(2, imageStore(output_img[%i], pos, res);             ,i);
+                break;
+            };
+            GLSLC(1, }                                                       );
+        }
+    } else {
+        GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o);                );
+        GLSLF(1, res = rgb2yuv(res, %i);    ,s->out_range == AVCOL_RANGE_JPEG);
+        switch (s->vkctx.output_format) {
+        case AV_PIX_FMT_NV12:    GLSLC(1, write_nv12(res, pos); ); break;
+        case AV_PIX_FMT_YUV420P: GLSLC(1,  write_420(res, pos); ); break;
+        case AV_PIX_FMT_YUV444P: GLSLC(1,  write_444(res, pos); ); break;
+        default: return AVERROR(EINVAL);
+        }
     }
 
-    RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
-    RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+    GLSLC(0, }                                                               );
 
     if (s->vkctx.output_format != s->vkctx.input_format) {
         const AVLumaCoefficients *lcoeffs;
         double tmp_mat[3][3];
 
-        struct {
-            float yuv_matrix[4][4];
-        } *par;
-
         lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
         if (!lcoeffs) {
             av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n");
             return AVERROR(EINVAL);
         }
 
-        RET(ff_vk_create_buf(vkctx, &s->params_buf,
-                             sizeof(*par), NULL,
-                             VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
-                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
-
-        RET(ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0));
-
         ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
 
-        memset(par, 0, sizeof(*par));
-
         for (int y = 0; y < 3; y++)
             for (int x = 0; x < 3; x++)
-                par->yuv_matrix[x][y] = tmp_mat[x][y];
-
-        par->yuv_matrix[3][3] = 1.0;
-
-        RET(ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1));
-
-        s->params_desc.buffer = s->params_buf.buf;
-        s->params_desc.range  = VK_WHOLE_SIZE;
-
-        ff_vk_update_descriptor_set(vkctx, s->pl, 1);
+                s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
+        s->opts.yuv_matrix[3][3] = 1.0;
     }
 
-    /* Execution context */
-    RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+    RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+    RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
 
     s->initialized = 1;
 
     return 0;
 
 fail:
-    return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
-{
-    int err = 0;
-    VkCommandBuffer cmd_buf;
-    ScaleVulkanContext *s = avctx->priv;
-    FFVulkanContext *vkctx = &s->vkctx;
-    FFVulkanFunctions *vk = &vkctx->vkfn;
-    AVVkFrame *in = (AVVkFrame *)in_f->data[0];
-    AVVkFrame *out = (AVVkFrame *)out_f->data[0];
-    VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2];
-    int barrier_count = 0;
-    const int planes = av_pix_fmt_count_planes(s->vkctx.input_format);
-    const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
-    const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
-    /* Update descriptors and init the exec context */
-    ff_vk_start_exec_recording(vkctx, s->exec);
-    cmd_buf = ff_vk_get_exec_buf(s->exec);
-
-    for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->input_images[i].imageView, in->img[i],
-                                   input_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->output_images[i].imageView, out->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        s->input_images[i].imageLayout  = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-    }
-
-    ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
-    for (int i = 0; i < planes; i++) {
-        VkImageMemoryBarrier bar = {
-            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-            .srcAccessMask = 0,
-            .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
-            .oldLayout = in->layout[i],
-            .newLayout = s->input_images[i].imageLayout,
-            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .image = in->img[i],
-            .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-            .subresourceRange.levelCount = 1,
-            .subresourceRange.layerCount = 1,
-        };
-
-        memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
-
-        in->layout[i]  = bar.newLayout;
-        in->access[i]  = bar.dstAccessMask;
-    }
-
-    for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
-        VkImageMemoryBarrier bar = {
-            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-            .srcAccessMask = 0,
-            .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
-            .oldLayout = out->layout[i],
-            .newLayout = s->output_images[i].imageLayout,
-            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .image = out->img[i],
-            .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-            .subresourceRange.levelCount = 1,
-            .subresourceRange.layerCount = 1,
-        };
-
-        memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
-
-        out->layout[i] = bar.newLayout;
-        out->access[i] = bar.dstAccessMask;
-    }
-
-    vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                           VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
-                           0, NULL, 0, NULL, barrier_count, barriers);
-
-    ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
-    vk->CmdDispatch(cmd_buf,
-                    FFALIGN(vkctx->output_width,  CGROUPS[0])/CGROUPS[0],
-                    FFALIGN(vkctx->output_height, CGROUPS[1])/CGROUPS[1], 1);
-
-    ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
-    err = ff_vk_submit_exec_queue(vkctx, s->exec);
-    if (err)
-        return err;
-
-    ff_vk_qf_rotate(&s->qf);
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+    if (spv)
+        spv->uninit(&spv);
 
     return err;
-
-fail:
-    ff_vk_discard_exec_deps(s->exec);
-    return err;
 }
 
 static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
@@ -408,7 +281,8 @@ static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
     if (!s->initialized)
         RET(init_filter(ctx, in));
 
-    RET(process_frames(ctx, out, in));
+    RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+                                    s->sampler, &s->opts, sizeof(s->opts)));
 
     err = av_frame_copy_props(out, in);
     if (err < 0)
@@ -475,8 +349,17 @@ static int scale_vulkan_config_output(AVFilterLink *outlink)
 static void scale_vulkan_uninit(AVFilterContext *avctx)
 {
     ScaleVulkanContext *s = avctx->priv;
+    FFVulkanContext *vkctx = &s->vkctx;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
+
+    ff_vk_exec_pool_free(vkctx, &s->e);
+    ff_vk_pipeline_free(vkctx, &s->pl);
+    ff_vk_shader_free(vkctx, &s->shd);
+
+    if (s->sampler)
+        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+                           vkctx->hwctx->alloc);
 
-    ff_vk_free_buf(&s->vkctx, &s->params_buf);
     ff_vk_uninit(&s->vkctx);
 
     s->initialized = 0;
-- 
2.39.2


[-- Attachment #64: 0063-transpose_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 12391 bytes --]

From ec245a2b213f82a52b9a5120062ab4f620519100 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:43 +0100
Subject: [PATCH 63/72] transpose_vulkan: port for the rewrite

---
 libavfilter/vf_transpose_vulkan.c | 223 ++++++++++--------------------
 1 file changed, 75 insertions(+), 148 deletions(-)

diff --git a/libavfilter/vf_transpose_vulkan.c b/libavfilter/vf_transpose_vulkan.c
index 30d052e08c..36f286b219 100644
--- a/libavfilter/vf_transpose_vulkan.c
+++ b/libavfilter/vf_transpose_vulkan.c
@@ -1,5 +1,7 @@
 /*
  * copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -20,41 +22,59 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/opt.h"
 #include "vulkan_filter.h"
+#include "vulkan_spirv.h"
 #include "internal.h"
 #include "transpose.h"
 
-#define CGS 32
-
 typedef struct TransposeVulkanContext {
     FFVulkanContext vkctx;
-    FFVkQueueFamilyCtx qf;
-    FFVkExecContext *exec;
-    FFVulkanPipeline *pl;
 
-    VkDescriptorImageInfo input_images[3];
-    VkDescriptorImageInfo output_images[3];
+    int initialized;
+    FFVulkanPipeline pl;
+    FFVkExecPool e;
+    FFVkQueueFamilyCtx qf;
+    FFVkSPIRVShader shd;
+    VkSampler sampler;
 
     int dir;
     int passthrough;
-    int initialized;
 } TransposeVulkanContext;
 
 static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
 {
-    int err = 0;
-    FFVkSPIRVShader *shd;
+    int err;
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque;
     TransposeVulkanContext *s = ctx->priv;
     FFVulkanContext *vkctx = &s->vkctx;
+
     const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+    FFVkSPIRVShader *shd = &s->shd;
+    FFVkSPIRVCompiler *spv;
+    FFVulkanDescriptorSetBinding *desc;
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+    RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+    RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+    RET(ff_vk_shader_init(&s->pl, &s->shd, "transpose_compute", VK_SHADER_STAGE_COMPUTE_BIT));
 
-    FFVulkanDescriptorSetBinding image_descs[] = {
+    ff_vk_shader_set_compute_sizes(&s->shd, 32, 1, 1);
+
+    desc = (FFVulkanDescriptorSetBinding []) {
         {
             .name       = "input_images",
             .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-            .updater    = s->input_images,
+            .samplers   = DUP_SAMPLER(s->sampler),
         },
         {
             .name       = "output_images",
@@ -64,154 +84,49 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
             .dimensions = 2,
             .elems      = planes,
             .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
-            .updater    = s->output_images,
         },
     };
 
-    image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
-    if (!image_descs[0].sampler)
-            return AVERROR_EXTERNAL;
-
-    ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
-    {
-        s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
-        if (!s->pl)
-            return AVERROR(ENOMEM);
-
-        shd = ff_vk_init_shader(s->pl, "transpose_compute", image_descs[0].stages);
-        if (!shd)
-            return AVERROR(ENOMEM);
-
-        ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
-        RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
-        GLSLC(0, void main()                                               );
-        GLSLC(0, {                                                         );
-        GLSLC(1,     ivec2 size;                                           );
-        GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);          );
-        for (int i = 0; i < planes; i++) {
-            GLSLC(0,                                                       );
-            GLSLF(1, size = imageSize(output_images[%i]);                ,i);
-            GLSLC(1, if (IS_WITHIN(pos, size)) {                           );
-            if (s->dir == TRANSPOSE_CCLOCK)
-                GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
-            else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
-                GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx));      ,i);
-                if (s->dir == TRANSPOSE_CLOCK)
-                    GLSLC(2, pos = ivec2(pos.x, size.y - pos.y);           );
-            } else
-                GLSLF(2, vec4 res = texture(input_images[%i], pos.yx);   ,i);
-            GLSLF(2,     imageStore(output_images[%i], pos, res);        ,i);
-            GLSLC(1, }                                                     );
-        }
-        GLSLC(0, }                                                         );
-
-        RET(ff_vk_compile_shader(vkctx, shd, "main"));
-        RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
-        RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
-    }
-
-    RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
-    s->initialized = 1;
-
-fail:
-    return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
-    int err = 0;
-    VkCommandBuffer cmd_buf;
-    TransposeVulkanContext *s = avctx->priv;
-    FFVulkanContext *vkctx = &s->vkctx;
-    FFVulkanFunctions *vk = &s->vkctx.vkfn;
-    const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
-    AVVkFrame *in  = (AVVkFrame *)inframe->data[0];
-    AVVkFrame *out = (AVVkFrame *)outframe->data[0];
-
-    const VkFormat *input_formats  = av_vkfmt_from_pixfmt(s->vkctx.input_format);
-    const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
-    ff_vk_start_exec_recording(vkctx, s->exec);
-    cmd_buf = ff_vk_get_exec_buf(s->exec);
-
-    for (int i = 0; i < planes; i++) {
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->input_images[i].imageView, in->img[i],
-                                   input_formats[i],
-                                   ff_comp_identity_map));
-
-        RET(ff_vk_create_imageview(vkctx, s->exec,
-                                   &s->output_images[i].imageView, out->img[i],
-                                   output_formats[i],
-                                   ff_comp_identity_map));
-
-        s->input_images[i].imageLayout  = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
-        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
-    }
-
-    ff_vk_update_descriptor_set(vkctx, s->pl, 0);
+    RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
 
+    GLSLC(0, void main()                                               );
+    GLSLC(0, {                                                         );
+    GLSLC(1,     ivec2 size;                                           );
+    GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);          );
     for (int i = 0; i < planes; i++) {
-        VkImageMemoryBarrier barriers[] = {
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_READ_BIT,
-                .oldLayout                   = in->layout[i],
-                .newLayout                   = s->input_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = in->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-            {
-                .sType                       = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
-                .srcAccessMask               = 0,
-                .dstAccessMask               = VK_ACCESS_SHADER_WRITE_BIT,
-                .oldLayout                   = out->layout[i],
-                .newLayout                   = s->output_images[i].imageLayout,
-                .srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED,
-                .image                       = out->img[i],
-                .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
-                .subresourceRange.levelCount = 1,
-                .subresourceRange.layerCount = 1,
-            },
-        };
-
-        vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                               VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
-                               0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
-        in->layout[i]  = barriers[0].newLayout;
-        in->access[i]  = barriers[0].dstAccessMask;
-
-        out->layout[i] = barriers[1].newLayout;
-        out->access[i] = barriers[1].dstAccessMask;
+        GLSLC(0,                                                       );
+        GLSLF(1, size = imageSize(output_images[%i]);                ,i);
+        GLSLC(1, if (IS_WITHIN(pos, size)) {                           );
+        if (s->dir == TRANSPOSE_CCLOCK)
+            GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
+        else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
+            GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx));      ,i);
+            if (s->dir == TRANSPOSE_CLOCK)
+                GLSLC(2, pos = ivec2(pos.x, size.y - pos.y);           );
+        } else
+            GLSLF(2, vec4 res = texture(input_images[%i], pos.yx);   ,i);
+        GLSLF(2,     imageStore(output_images[%i], pos, res);        ,i);
+        GLSLC(1, }                                                     );
     }
+    GLSLC(0, }                                                         );
 
-    ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-    vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
-                    s->vkctx.output_height, 1);
+    RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
 
-    ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-    ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+    RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+    RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
 
-    err = ff_vk_submit_exec_queue(vkctx, s->exec);
-    if (err)
-        return err;
-
-    ff_vk_qf_rotate(&s->qf);
+    s->initialized = 1;
 
     return 0;
 
 fail:
-    ff_vk_discard_exec_deps(s->exec);
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+    if (spv)
+        spv->uninit(&spv);
+
     return err;
 }
 
@@ -235,7 +150,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     if (!s->initialized)
         RET(init_filter(ctx, in));
 
-    RET(process_frames(ctx, out, in));
+    RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+                                    s->sampler, NULL, 0));
 
     RET(av_frame_copy_props(out, in));
 
@@ -259,6 +175,17 @@ fail:
 static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx)
 {
     TransposeVulkanContext *s = avctx->priv;
+    FFVulkanContext *vkctx = &s->vkctx;
+    FFVulkanFunctions *vk = &vkctx->vkfn;
+
+    ff_vk_exec_pool_free(vkctx, &s->e);
+    ff_vk_pipeline_free(vkctx, &s->pl);
+    ff_vk_shader_free(vkctx, &s->shd);
+
+    if (s->sampler)
+        vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+                           vkctx->hwctx->alloc);
+
     ff_vk_uninit(&s->vkctx);
 
     s->initialized = 0;
-- 
2.39.2


[-- Attachment #65: 0064-avcodec-add-AVHWAccel.free_frame_priv-callback.patch --]
[-- Type: text/x-diff, Size: 7769 bytes --]

From dbf81f602283527ea27d7ddac58e8ff648fc5557 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 18:03:05 +0100
Subject: [PATCH 64/72] avcodec: add AVHWAccel.free_frame_priv callback

---
 libavcodec/av1dec.c      |  4 ++--
 libavcodec/avcodec.h     |  8 ++++++++
 libavcodec/decode.c      | 19 +++++++++++++++++++
 libavcodec/decode.h      | 11 +++++++++++
 libavcodec/h264_slice.c  |  3 ++-
 libavcodec/hevc_refs.c   |  3 ++-
 libavcodec/mpegpicture.c |  4 +++-
 libavcodec/vp8.c         |  2 +-
 libavcodec/vp9.c         |  2 +-
 9 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
index d83c902f1f..d105835d51 100644
--- a/libavcodec/av1dec.c
+++ b/libavcodec/av1dec.c
@@ -24,6 +24,7 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
+#include "decode.h"
 #include "av1dec.h"
 #include "bytestream.h"
 #include "codec_internal.h"
@@ -836,8 +837,7 @@ static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
     if (avctx->hwaccel) {
         const AVHWAccel *hwaccel = avctx->hwaccel;
         if (hwaccel->frame_priv_data_size) {
-            f->hwaccel_priv_buf =
-                av_buffer_allocz(hwaccel->frame_priv_data_size);
+            f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, hwaccel);
             if (!f->hwaccel_priv_buf) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 17416791a6..6babfc7132 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2206,6 +2206,14 @@ typedef struct AVHWAccel {
      * that avctx->hwaccel_priv_data is invalid.
      */
     int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+
+    /**
+     * Callback to free the hwaccel-specific frame data.
+     *
+     * @param avctx the codec context
+     * @param data the per-frame hardware accelerator private data to be freed.
+     */
+    void (*free_frame_priv)(AVCodecContext *avctx, void *data);
 } AVHWAccel;
 
 /**
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 93ecd36c2b..b9a2ec84f6 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1675,3 +1675,22 @@ int ff_copy_palette(void *dst, const AVPacket *src, void *logctx)
     }
     return 0;
 }
+
+AVBufferRef *ff_alloc_hwaccel_frame_priv_data(AVCodecContext *avctx,
+                                              const AVHWAccel *hwaccel)
+{
+    AVBufferRef *ref;
+    uint8_t *data = av_mallocz(hwaccel->frame_priv_data_size);
+    if (!data)
+        return NULL;
+
+    ref = av_buffer_create(data, hwaccel->frame_priv_data_size,
+                           (void (*)(void *, uint8_t *))hwaccel->free_frame_priv,
+                           avctx, 0);
+    if (!ref) {
+        av_free(data);
+        return NULL;
+    }
+
+    return ref;
+}
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 8430ffbd66..aa40baafc0 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -150,4 +150,15 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags);
 int ff_side_data_update_matrix_encoding(AVFrame *frame,
                                         enum AVMatrixEncoding matrix_encoding);
 
+/**
+ * Allocate a hwaccel frame private data and create an AVBufferRef
+ * from it.
+ *
+ * @param     avctx   The codec context which to attach as an opaque value
+ * @param     hwaccel The hwaccel for which to allocate
+ * @return            The allocated buffer
+ */
+AVBufferRef *ff_alloc_hwaccel_frame_priv_data(AVCodecContext *avctx,
+                                              const AVHWAccel *hwaccel);
+
 #endif /* AVCODEC_DECODE_H */
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 8ac66b343c..c0aa31bcd9 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -33,6 +33,7 @@
 #include "libavutil/pixdesc.h"
 #include "libavutil/timecode.h"
 #include "internal.h"
+#include "decode.h"
 #include "cabac.h"
 #include "cabac_functions.h"
 #include "decode.h"
@@ -212,7 +213,7 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
         const AVHWAccel *hwaccel = h->avctx->hwaccel;
         av_assert0(!pic->hwaccel_picture_private);
         if (hwaccel->frame_priv_data_size) {
-            pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            pic->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(h->avctx, hwaccel);
             if (!pic->hwaccel_priv_buf)
                 return AVERROR(ENOMEM);
             pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
index 811e8feff8..30cbb8b37a 100644
--- a/libavcodec/hevc_refs.c
+++ b/libavcodec/hevc_refs.c
@@ -23,6 +23,7 @@
 
 #include "libavutil/avassert.h"
 
+#include "decode.h"
 #include "thread.h"
 #include "hevc.h"
 #include "hevcdec.h"
@@ -118,7 +119,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
             const AVHWAccel *hwaccel = s->avctx->hwaccel;
             av_assert0(!frame->hwaccel_picture_private);
             if (hwaccel->frame_priv_data_size) {
-                frame->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+                frame->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(s->avctx, hwaccel);
                 if (!frame->hwaccel_priv_buf)
                     goto fail;
                 frame->hwaccel_picture_private = frame->hwaccel_priv_buf->data;
diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c
index 977bc65191..a1d58f04b3 100644
--- a/libavcodec/mpegpicture.c
+++ b/libavcodec/mpegpicture.c
@@ -27,6 +27,8 @@
 
 #include "avcodec.h"
 #include "encode.h"
+#include "internal.h"
+#include "decode.h"
 #include "motion_est.h"
 #include "mpegpicture.h"
 #include "mpegutils.h"
@@ -172,7 +174,7 @@ static int alloc_frame_buffer(AVCodecContext *avctx,  Picture *pic,
     if (avctx->hwaccel) {
         assert(!pic->hwaccel_picture_private);
         if (avctx->hwaccel->frame_priv_data_size) {
-            pic->hwaccel_priv_buf = av_buffer_allocz(avctx->hwaccel->frame_priv_data_size);
+            pic->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, avctx->hwaccel);
             if (!pic->hwaccel_priv_buf) {
                 av_log(avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
                 return -1;
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index db2419deaf..4c23eb5672 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -109,7 +109,7 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
     if (s->avctx->hwaccel) {
         const AVHWAccel *hwaccel = s->avctx->hwaccel;
         if (hwaccel->frame_priv_data_size) {
-            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(s->avctx, hwaccel);
             if (!f->hwaccel_priv_buf)
                 goto fail;
             f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 7c0a246446..4f345f18db 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -136,7 +136,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
         const AVHWAccel *hwaccel = avctx->hwaccel;
         av_assert0(!f->hwaccel_picture_private);
         if (hwaccel->frame_priv_data_size) {
-            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, hwaccel);
             if (!f->hwaccel_priv_buf)
                 goto fail;
             f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
-- 
2.39.2


[-- Attachment #66: 0065-avcodec-add-AVHWAccel.flush-callback.patch --]
[-- Type: text/x-diff, Size: 3020 bytes --]

From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 6 Jan 2023 03:32:56 +0100
Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback

---
 libavcodec/av1dec.c  | 3 +++
 libavcodec/avcodec.h | 5 +++++
 libavcodec/h264dec.c | 3 +++
 libavcodec/hevcdec.c | 3 +++
 libavcodec/vp8.c     | 3 +++
 libavcodec/vp9.c     | 3 +++
 6 files changed, 20 insertions(+)

diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
index d105835d51..3cbb80bcb5 100644
--- a/libavcodec/av1dec.c
+++ b/libavcodec/av1dec.c
@@ -1228,6 +1228,9 @@ static void av1_decode_flush(AVCodecContext *avctx)
     s->raw_seq = NULL;
 
     ff_cbs_flush(s->cbc);
+
+    if (avctx->hwaccel->flush)
+        avctx->hwaccel->flush(avctx);
 }
 
 #define OFFSET(x) offsetof(AV1DecContext, x)
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 6babfc7132..531998a78c 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2214,6 +2214,11 @@ typedef struct AVHWAccel {
      * @param data the per-frame hardware accelerator private data to be freed.
      */
     void (*free_frame_priv)(AVCodecContext *avctx, void *data);
+
+    /**
+     * Callback to flush the hwaccel state.
+     */
+    void (*flush)(AVCodecContext *avctx);
 } AVHWAccel;
 
 /**
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 2d691731c5..995bf17a8f 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -480,6 +480,9 @@ static void h264_decode_flush(AVCodecContext *avctx)
 
     ff_h264_free_tables(h);
     h->context_initialized = 0;
+
+    if (avctx->hwaccel->flush)
+        avctx->hwaccel->flush(avctx);
 }
 
 static int get_last_needed_nal(H264Context *h)
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 43cd963175..7c9b46240c 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -3682,6 +3682,9 @@ static void hevc_decode_flush(AVCodecContext *avctx)
     av_buffer_unref(&s->rpu_buf);
     s->max_ra = INT_MAX;
     s->eos = 1;
+
+    if (avctx->hwaccel->flush)
+        avctx->hwaccel->flush(avctx);
 }
 
 #define OFFSET(x) offsetof(HEVCContext, x)
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 4c23eb5672..b591b82ad1 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -167,6 +167,9 @@ static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
 
     if (free_mem)
         free_buffers(s);
+
+    if (avctx->hwaccel->flush)
+        avctx->hwaccel->flush(avctx);
 }
 
 static void vp8_decode_flush(AVCodecContext *avctx)
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 4f345f18db..18c2b09f64 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -1791,6 +1791,9 @@ static void vp9_decode_flush(AVCodecContext *avctx)
         vp9_frame_unref(avctx, &s->s.frames[i]);
     for (i = 0; i < 8; i++)
         ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
+
+    if (avctx->hwaccel->flush)
+        avctx->hwaccel->flush(avctx);
 }
 
 static av_cold int vp9_decode_init(AVCodecContext *avctx)
-- 
2.39.2


[-- Attachment #67: 0066-hwconfig-add-a-new-HWACCEL_CAP_THREAD_SAFE-for-threa.patch --]
[-- Type: text/x-diff, Size: 1369 bytes --]

From 99ce9693bcb6218ffe82bb5780827c1dca614092 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 01:06:52 +0100
Subject: [PATCH 66/72] hwconfig: add a new HWACCEL_CAP_THREAD_SAFE for
 threadsafe hwaccels

Vulkan is fully threadsafe and stateless, so we can benefit from this.
---
 libavcodec/hwconfig.h      | 1 +
 libavcodec/pthread_frame.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index 721424912c..e6b78f0160 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -24,6 +24,7 @@
 
 
 #define HWACCEL_CAP_ASYNC_SAFE      (1 << 0)
+#define HWACCEL_CAP_THREAD_SAFE     (1 << 1)
 
 
 typedef struct AVCodecHWConfigInternal {
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 71edd6b3ec..15e8d96a79 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -204,7 +204,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
 
         /* if the previous thread uses hwaccel then we take the lock to ensure
          * the threads don't run concurrently */
-        if (avctx->hwaccel) {
+        if (avctx->hwaccel && !(avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE)) {
             pthread_mutex_lock(&p->parent->hwaccel_mutex);
             p->hwaccel_serializing = 1;
         }
-- 
2.39.2


[-- Attachment #68: 0067-libavcodec-add-Vulkan-common-video-code.patch --]
[-- Type: text/x-diff, Size: 23311 bytes --]

From 2f30e4ddaf855b53cd3d8fd95a863b240bae0047 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 18 Dec 2022 08:31:03 +0100
Subject: [PATCH 67/72] libavcodec: add Vulkan common video code

---
 configure                 |   2 +-
 libavcodec/Makefile       |   2 +
 libavcodec/hwconfig.h     |   2 +
 libavcodec/vulkan.c       |  19 ++
 libavcodec/vulkan.h       |  24 +++
 libavcodec/vulkan_video.c | 417 ++++++++++++++++++++++++++++++++++++++
 libavcodec/vulkan_video.h |  98 +++++++++
 7 files changed, 563 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/vulkan.c
 create mode 100644 libavcodec/vulkan.h
 create mode 100644 libavcodec/vulkan_video.c
 create mode 100644 libavcodec/vulkan_video.h

diff --git a/configure b/configure
index f0f15b9e87..91f715351c 100755
--- a/configure
+++ b/configure
@@ -326,7 +326,6 @@ External library support:
   --disable-securetransport disable Secure Transport, needed for TLS support
                            on OSX if openssl and gnutls are not used [autodetect]
   --enable-vapoursynth     enable VapourSynth demuxer [no]
-  --disable-vulkan         disable Vulkan code [autodetect]
   --disable-xlib           disable xlib [autodetect]
   --disable-zlib           disable zlib [autodetect]
 
@@ -353,6 +352,7 @@ External library support:
   --disable-vaapi          disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
   --disable-vdpau          disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
   --disable-videotoolbox   disable VideoToolbox code [autodetect]
+  --disable-vulkan         disable Vulkan code [autodetect]
 
 Toolchain options:
   --arch=ARCH              select architecture [$arch]
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4971832ff4..a45c32e564 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -976,6 +976,7 @@ OBJS-$(CONFIG_NVDEC)                      += nvdec.o
 OBJS-$(CONFIG_VAAPI)                      += vaapi_decode.o
 OBJS-$(CONFIG_VIDEOTOOLBOX)               += videotoolbox.o
 OBJS-$(CONFIG_VDPAU)                      += vdpau.o
+OBJS-$(CONFIG_VULKAN)                     += vulkan.o vulkan_video.o
 
 OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL)        += dxva2_av1.o
 OBJS-$(CONFIG_AV1_DXVA2_HWACCEL)          += dxva2_av1.o
@@ -1284,6 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
+SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h
 SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
 SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
 
diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index e6b78f0160..220b8a1e95 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -77,6 +77,8 @@ typedef struct AVCodecHWConfigInternal {
     HW_CONFIG_HWACCEL(1, 1, 1, VDPAU,        VDPAU,        ff_ ## codec ## _vdpau_hwaccel)
 #define HWACCEL_VIDEOTOOLBOX(codec) \
     HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel)
+#define HWACCEL_VULKAN(codec) \
+    HW_CONFIG_HWACCEL(1, 1, 1, VULKAN,       VULKAN,       ff_ ## codec ## _vulkan_hwaccel)
 #define HWACCEL_D3D11VA(codec) \
     HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD,  NONE,         ff_ ## codec ## _d3d11va_hwaccel)
 
diff --git a/libavcodec/vulkan.c b/libavcodec/vulkan.c
new file mode 100644
index 0000000000..fc8a1fa47b
--- /dev/null
+++ b/libavcodec/vulkan.c
@@ -0,0 +1,19 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/vulkan.c"
diff --git a/libavcodec/vulkan.h b/libavcodec/vulkan.h
new file mode 100644
index 0000000000..b15efd4add
--- /dev/null
+++ b/libavcodec/vulkan.h
@@ -0,0 +1,24 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_H
+#define AVCODEC_VULKAN_H
+
+#include "libavutil/vulkan.h"
+
+#endif /* AVCODEC_VULKAN_H */
diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c
new file mode 100644
index 0000000000..3e76109b26
--- /dev/null
+++ b/libavcodec/vulkan_video.c
@@ -0,0 +1,417 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "codec_id.h"
+
+#include "vulkan_video.h"
+
+const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO] = {
+    [AV_CODEC_ID_H264] = {
+#if CONFIG_VULKAN_ENCODE
+                           FF_VK_EXT_VIDEO_ENCODE_H264 | FF_VK_EXT_SYNC2,
+                           VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_EXT,
+#else
+                           0,
+                           0,
+#endif
+                           FF_VK_EXT_VIDEO_DECODE_H264 | FF_VK_EXT_SYNC2,
+                           VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR,
+    },
+    [AV_CODEC_ID_HEVC] = {
+#if CONFIG_VULKAN_ENCODE
+                           FF_VK_EXT_VIDEO_ENCODE_H265 | FF_VK_EXT_SYNC2,
+                           VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_EXT,
+#else
+                           0,
+                           0,
+#endif
+                           FF_VK_EXT_VIDEO_DECODE_H265 | FF_VK_EXT_SYNC2,
+                           VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR
+    },
+};
+
+enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf, int *score)
+{
+    switch (vkf) {
+    /* Mono */
+    case VK_FORMAT_R8_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_GRAY8;
+    case VK_FORMAT_R10X6_UNORM_PACK16:
+    case VK_FORMAT_R12X4_UNORM_PACK16:
+        *score = 2;
+        return AV_PIX_FMT_GRAY16;
+    case VK_FORMAT_R16_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_GRAY16;
+
+    /* RGB */
+    case VK_FORMAT_B8G8R8A8_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_BGRA;
+    case VK_FORMAT_R8G8B8A8_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_RGBA;
+    case VK_FORMAT_R8G8B8_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_RGB24;
+    case VK_FORMAT_B8G8R8_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_BGR24;
+
+    /* 420 */
+    case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_NV12;
+    case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_YUV420P;
+    case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+        *score = 2;
+        return AV_PIX_FMT_P010;
+    case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+        *score = 2;
+        return AV_PIX_FMT_YUV420P16;
+    /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16 */
+    case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+        *score = 2;
+        return AV_PIX_FMT_YUV420P12;
+    case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_YUV420P16;
+
+    /* 422 */
+    case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_NV16;
+    case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_YUV422P;
+    case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+        *score = 2;
+        return AV_PIX_FMT_NV20;
+    case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+        *score = 2;
+        return AV_PIX_FMT_YUV422P10;
+    /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16 */
+    case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+        *score = 2;
+        return AV_PIX_FMT_YUV422P12;
+    case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_YUV422P16;
+
+    /* 444 */
+    case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
+        *score = 1;
+        return AV_PIX_FMT_NV24;
+    case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_YUV444P;
+    /* No support for VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT */
+    case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+        *score = 2;
+        return AV_PIX_FMT_YUV444P10;
+    /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT */
+    case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+        *score = 2;
+        return AV_PIX_FMT_YUV444P12;
+    case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+        *score = 1;
+        return AV_PIX_FMT_YUV444P16;
+    default:
+        break;
+    }
+
+    return AV_PIX_FMT_NONE;
+}
+
+VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf)
+{
+    switch (vkf) {
+    case VK_FORMAT_R8_UNORM:
+    case VK_FORMAT_R10X6_UNORM_PACK16:
+    case VK_FORMAT_R12X4_UNORM_PACK16:
+    case VK_FORMAT_R16_UNORM:
+    case VK_FORMAT_B8G8R8A8_UNORM:
+    case VK_FORMAT_R8G8B8A8_UNORM:
+    case VK_FORMAT_R8G8B8_UNORM:
+    case VK_FORMAT_B8G8R8_UNORM:
+        return VK_IMAGE_ASPECT_COLOR_BIT;
+
+    /* 420 */
+    case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+    case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+    case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+    case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+    case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
+        return VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT;
+
+    case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+    case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+    case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+    case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+    case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+    case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+    case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+    case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+    case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+    case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+    case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+    case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+        return VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT;
+
+    default:
+        break;
+    }
+
+    return VK_IMAGE_ASPECT_NONE;
+}
+
+VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc)
+{
+    if (desc->nb_components == 1)
+        return VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR;
+    else if (!desc->log2_chroma_w && !desc->log2_chroma_h)
+        return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR;
+    else if (!desc->log2_chroma_w && desc->log2_chroma_h == 1)
+        return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR;
+    else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1)
+        return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR;
+    return VK_VIDEO_CHROMA_SUBSAMPLING_INVALID_KHR;
+}
+
+VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth)
+{
+    switch (depth) {
+    case  8: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR;
+    case 10: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR;
+    case 12: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR;
+    default: break;
+    }
+    return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR;
+}
+
+static void free_data_buf(void *opaque, uint8_t *data)
+{
+    FFVulkanContext *ctx = opaque;
+    FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data;
+    ff_vk_unmap_buffers(ctx, &buf->buf, 1, 0);
+    ff_vk_free_buf(ctx, &buf->buf);
+    av_free(data);
+}
+
+static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
+{
+    uint8_t *buf = av_mallocz(size);
+    if (!buf)
+        return NULL;
+
+    return av_buffer_create(buf, size, free_data_buf, opaque, 0);
+}
+
+int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s,
+                           AVBufferRef **buf, VkBufferUsageFlags usage,
+                           void *create_pNext, size_t size)
+{
+    int err;
+    AVBufferRef *ref;
+    FFVkVideoBuffer *data;
+
+    if (!s->buf_pool) {
+        s->buf_pool = av_buffer_pool_init2(sizeof(FFVkVideoBuffer), ctx,
+                                           alloc_data_buf, NULL);
+        if (!s->buf_pool)
+            return AVERROR(ENOMEM);
+    }
+
+    *buf = ref = av_buffer_pool_get(s->buf_pool);
+    if (!ref)
+        return AVERROR(ENOMEM);
+
+    data = (FFVkVideoBuffer *)ref->data;
+
+    if (data->buf.size >= size)
+        return 0;
+
+    /* No point in requesting anything smaller. */
+    size = FFMAX(size, 1024*1024);
+    size = FFALIGN(size, s->caps.minBitstreamBufferSizeAlignment);
+
+    /* Align buffer to nearest power of two. Makes fragmentation management
+     * easier, and gives us ample headroom. */
+    size--;
+    size |= size >>  1;
+    size |= size >>  2;
+    size |= size >>  4;
+    size |= size >>  8;
+    size |= size >> 16;
+    size++;
+
+    ff_vk_free_buf(ctx, &data->buf);
+    memset(data, 0, sizeof(FFVkVideoBuffer));
+
+    err = ff_vk_create_buf(ctx, &data->buf, size,
+                           create_pNext, NULL, usage,
+                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+    if (err < 0) {
+        av_buffer_unref(&ref);
+        return err;
+    }
+
+    /* Map the buffer */
+    err = ff_vk_map_buffers(ctx, &data->buf, &data->mem, 1, 0);
+    if (err < 0) {
+        av_buffer_unref(&ref);
+        return err;
+    }
+
+    return 0;
+}
+
+av_cold void ff_vk_video_common_uninit(FFVulkanContext *s,
+                                        FFVkVideoCommon *common)
+{
+    FFVulkanFunctions *vk = &s->vkfn;
+
+    if (common->session) {
+        vk->DestroyVideoSessionKHR(s->hwctx->act_dev, common->session,
+                                   s->hwctx->alloc);
+        common->session = NULL;
+    }
+
+    if (common->nb_mem && common->mem)
+        for (int i = 0; i < common->nb_mem; i++)
+            vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc);
+
+    av_freep(&common->mem);
+
+    av_buffer_pool_uninit(&common->buf_pool);
+}
+
+av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
+                                    FFVkVideoCommon *common,
+                                    VkVideoSessionCreateInfoKHR *session_create)
+{
+    int err;
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+    VkMemoryRequirements2 *mem_req = NULL;
+    VkVideoSessionMemoryRequirementsKHR *mem = NULL;
+    VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL;
+
+    /* Create session */
+    ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create,
+                                    s->hwctx->alloc, &common->session);
+    if (ret != VK_SUCCESS)
+        return AVERROR_EXTERNAL;
+
+    /* Get memory requirements */
+    ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
+                                                   common->session,
+                                                   &common->nb_mem,
+                                                   NULL);
+    if (ret != VK_SUCCESS) {
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    /* Allocate all memory needed to actually allocate memory */
+    common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem);
+    if (!common->mem) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    mem = av_mallocz(sizeof(*mem)*common->nb_mem);
+    if (!mem) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    mem_req = av_mallocz(sizeof(*mem_req)*common->nb_mem);
+    if (!mem_req) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    bind_mem = av_mallocz(sizeof(*bind_mem)*common->nb_mem);
+    if (!bind_mem) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* Set the needed fields to get the memory requirements */
+    for (int i = 0; i < common->nb_mem; i++) {
+        mem_req[i] = (VkMemoryRequirements2) {
+            .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+        };
+        mem[i] = (VkVideoSessionMemoryRequirementsKHR) {
+            .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_MEMORY_REQUIREMENTS_KHR,
+            .memoryRequirements = mem_req[i].memoryRequirements,
+        };
+    }
+
+    /* Finally get the memory requirements */
+    ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
+                                                   common->session, &common->nb_mem,
+                                                   mem);
+    if (ret != VK_SUCCESS) {
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    /* Now allocate each requested memory.
+     * For ricing, could pool together memory that ends up in the same index. */
+    for (int i = 0; i < common->nb_mem; i++) {
+        err = ff_vk_alloc_mem(s, &mem[i].memoryRequirements,
+                              UINT32_MAX, NULL, NULL, &common->mem[i]);
+        if (err < 0)
+            goto fail;
+
+        bind_mem[i] = (VkBindVideoSessionMemoryInfoKHR) {
+            .sType = VK_STRUCTURE_TYPE_BIND_VIDEO_SESSION_MEMORY_INFO_KHR,
+            .memory = common->mem[i],
+            .memoryBindIndex = mem[i].memoryBindIndex,
+            .memoryOffset = 0,
+            .memorySize = mem[i].memoryRequirements.size,
+        };
+
+        av_log(log, AV_LOG_VERBOSE, "Allocating %lu bytes in bind index %i for video session\n",
+               bind_mem[i].memorySize, bind_mem[i].memoryBindIndex);
+    }
+
+    /* Bind the allocated memory */
+    ret = vk->BindVideoSessionMemoryKHR(s->hwctx->act_dev, common->session,
+                                        common->nb_mem, bind_mem);
+    if (ret != VK_SUCCESS) {
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    av_freep(&mem);
+    av_freep(&mem_req);
+    av_freep(&bind_mem);
+
+    return 0;
+
+fail:
+    av_freep(&mem);
+    av_freep(&mem_req);
+    av_freep(&bind_mem);
+
+    ff_vk_video_common_uninit(s, common);
+    return err;
+}
diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h
new file mode 100644
index 0000000000..5e2676a282
--- /dev/null
+++ b/libavcodec/vulkan_video.h
@@ -0,0 +1,98 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_VIDEO_H
+#define AVCODEC_VULKAN_VIDEO_H
+
+#include "codec_id.h"
+#include "vulkan.h"
+
+#include <vk_video/vulkan_video_codecs_common.h>
+
+#define CODEC_VER_MAJ(ver) (ver >> 22)
+#define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1))
+#define CODEC_VER_PAT(ver) (ver & ((1 << 12) - 1))
+#define CODEC_VER(ver) CODEC_VER_MAJ(ver), CODEC_VER_MIN(ver), CODEC_VER_PAT(ver)
+
+typedef struct FFVkCodecMap {
+    FFVulkanExtensions               encode_extension;
+    VkVideoCodecOperationFlagBitsKHR encode_op;
+    FFVulkanExtensions               decode_extension;
+    VkVideoCodecOperationFlagBitsKHR decode_op;
+} FFVkCodecMap;
+
+typedef struct FFVkVideoSession {
+    VkVideoSessionKHR session;
+    VkDeviceMemory *mem;
+    uint32_t nb_mem;
+    VkVideoCapabilitiesKHR caps;
+
+    AVBufferPool *buf_pool;
+} FFVkVideoCommon;
+
+/**
+ * Index is codec_id.
+ */
+extern const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO];
+
+/**
+ * Get pixfmt from a Vulkan format.
+ */
+enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf, int *score);
+
+/**
+ * Get aspect bits which include all planes from a VkFormat.
+ */
+VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf);
+
+/**
+ * Get Vulkan's chroma subsampling from a pixfmt descriptor.
+ */
+VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc);
+
+/**
+ * Get Vulkan's bit depth from an [8:12] integer.
+ */
+VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth);
+
+typedef struct FFVkVideoBuffer {
+    FFVkBuffer buf;
+    uint8_t *mem;
+} FFVkVideoBuffer;
+
+/**
+ * Get a mapped FFVkPooledBuffer with a specific guaranteed minimum size
+ * from a pool.
+ */
+int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s,
+                           AVBufferRef **buf, VkBufferUsageFlags usage,
+                           void *create_pNext, size_t size);
+
+/**
+ * Initialize video session, allocating and binding necessary memory.
+ */
+int ff_vk_video_common_init(void *log, FFVulkanContext *s,
+                            FFVkVideoCommon *common,
+                            VkVideoSessionCreateInfoKHR *session_create);
+
+/**
+ * Free video session and required resources.
+ */
+void ff_vk_video_common_uninit(FFVulkanContext *s, FFVkVideoCommon *common);
+
+#endif /* AVCODEC_VULKAN_VIDEO_H */
-- 
2.39.2


[-- Attachment #69: 0068-libavcodec-add-Vulkan-common-video-decoding-code.patch --]
[-- Type: text/x-diff, Size: 53050 bytes --]

From d3f2fa8e530dc94c9058149a2cee92196c7adb33 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 16 Jan 2023 07:23:27 +0100
Subject: [PATCH 68/72] libavcodec: add Vulkan common video decoding code

---
 libavcodec/Makefile        |    2 +-
 libavcodec/vulkan_decode.c | 1135 ++++++++++++++++++++++++++++++++++++
 libavcodec/vulkan_decode.h |  163 ++++++
 3 files changed, 1299 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/vulkan_decode.c
 create mode 100644 libavcodec/vulkan_decode.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index a45c32e564..eabf4eb43e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1285,7 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
-SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h
+SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
 SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
 SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
 
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
new file mode 100644
index 0000000000..582968e1da
--- /dev/null
+++ b/libavcodec/vulkan_decode.c
@@ -0,0 +1,1135 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_video.h"
+#include "vulkan_decode.h"
+#include "config_components.h"
+
+#if CONFIG_H264_VULKAN_HWACCEL
+extern const VkExtensionProperties ff_vk_dec_h264_ext;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+extern const VkExtensionProperties ff_vk_dec_hevc_ext;
+#endif
+
+static const VkExtensionProperties *dec_ext[] = {
+#if CONFIG_H264_VULKAN_HWACCEL
+    [AV_CODEC_ID_H264] = &ff_vk_dec_h264_ext,
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+    [AV_CODEC_ID_HEVC] = &ff_vk_dec_hevc_ext,
+#endif
+};
+
+static int vk_decode_create_view(FFVulkanDecodeContext *ctx, VkImageView *dst_view,
+                                 VkImageAspectFlags *aspect, AVVkFrame *src)
+{
+    VkResult ret;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+    VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(ctx->pic_format);
+
+    VkSamplerYcbcrConversionInfo yuv_sampler_info = {
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO,
+        .conversion = ctx->yuv_sampler,
+    };
+    VkImageViewCreateInfo img_view_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .pNext = &yuv_sampler_info,
+        .viewType = VK_IMAGE_VIEW_TYPE_2D,
+        .format = ctx->pic_format,
+        .image = src->img[0],
+        .components = (VkComponentMapping) {
+            .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+            .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+            .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+            .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+        },
+        .subresourceRange = (VkImageSubresourceRange) {
+            .aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
+            .baseArrayLayer = 0,
+            .layerCount     = VK_REMAINING_ARRAY_LAYERS,
+            .levelCount     = 1,
+        },
+    };
+
+    ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info,
+                              ctx->s.hwctx->alloc, dst_view);
+    if (ret != VK_SUCCESS)
+        return AVERROR_EXTERNAL;
+
+    *aspect = aspect_mask;
+
+    return 0;
+}
+
+static AVFrame *vk_get_dpb_pool(FFVulkanDecodeContext *ctx)
+{
+    AVFrame *avf = av_frame_alloc();
+    AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
+    if (!avf)
+        return NULL;
+
+    avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref);
+    if (!avf->hw_frames_ctx)
+        av_frame_free(&avf);
+    avf->buf[0] = av_buffer_pool_get(dpb_frames->pool);
+    if (!avf->buf[0])
+        av_frame_free(&avf);
+    avf->data[0] = avf->buf[0]->data;
+
+    return avf;
+}
+
+int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic,
+                               FFVulkanDecodePicture *vkpic, int is_current,
+                               int alloc_dpb)
+{
+    int err;
+
+    vkpic->nb_slices = 0;
+    vkpic->slices_size = 0;
+
+    /* If the decoder made a blank frame to make up for a missing ref, or the
+     * frame is the current frame so it's missing one, create a re-representation */
+    if (vkpic->img_view_ref)
+        return 0;
+
+    /* Pre-allocate slice buffer with a reasonable default */
+    if (is_current) {
+        uint64_t min_alloc = 4096;
+        if (0)
+            min_alloc = 2*ctx->s.hprops.minImportedHostPointerAlignment;
+
+        vkpic->slices = av_fast_realloc(NULL, &vkpic->slices_size_max, min_alloc);
+        if (!vkpic->slices)
+            return AVERROR(ENOMEM);
+
+        if (0)
+            vkpic->slices_size += ctx->s.hprops.minImportedHostPointerAlignment;
+    }
+
+    vkpic->dpb_frame    = NULL;
+    vkpic->dpb_vkf      = NULL;
+    vkpic->img_view_ref = NULL;
+    vkpic->img_view_out = NULL;
+
+    if (ctx->layered_dpb && alloc_dpb) {
+        vkpic->img_view_ref = ctx->layered_view;
+        vkpic->img_aspect_ref = ctx->layered_aspect;
+    } else if (alloc_dpb) {
+        vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+        if (!vkpic->dpb_frame)
+            return AVERROR(ENOMEM);
+
+        vkpic->dpb_vkf = (AVVkFrame *)vkpic->dpb_frame->data[0];
+
+        err = vk_decode_create_view(ctx, &vkpic->img_view_ref,
+                                    &vkpic->img_aspect_ref,
+                                    vkpic->dpb_vkf);
+        if (err < 0)
+            return err;
+    }
+
+    if (!alloc_dpb || is_current) {
+        err = vk_decode_create_view(ctx, &vkpic->img_view_out,
+                                    &vkpic->img_aspect,
+                                    (AVVkFrame *)pic->buf[0]->data);
+        if (err < 0)
+            return err;
+
+        if (!alloc_dpb) {
+            vkpic->img_view_ref = vkpic->img_view_out;
+            vkpic->img_aspect_ref = vkpic->img_aspect;
+        }
+    }
+
+    return 0;
+}
+
+int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp,
+                           const uint8_t *data, size_t size, int add_startcode,
+                           uint32_t *nb_slices, const uint32_t **offsets)
+{
+    static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 };
+    const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0;
+    const int nb = *nb_slices;
+    uint8_t *slices;
+    uint32_t *slice_off;
+
+    slice_off = av_fast_realloc(vp->slice_off, &vp->slice_off_max,
+                                (nb + 1)*sizeof(slice_off));
+    if (!slice_off)
+        return AVERROR(ENOMEM);
+
+    *offsets = vp->slice_off = slice_off;
+    slice_off[nb] = vp->slices_size;
+
+    slices = av_fast_realloc(vp->slices, &vp->slices_size_max,
+                             vp->slices_size + size + startcode_len);
+    if (!slices)
+        return AVERROR(ENOMEM);
+
+    vp->slices = slices;
+
+    /* Startcode */
+    memcpy(slices + vp->slices_size, startcode_prefix, startcode_len);
+
+    /* Slice data */
+    memcpy(slices + vp->slices_size + startcode_len, data, size);
+
+    *nb_slices = nb + 1;
+    vp->nb_slices++;
+    vp->slices_size += startcode_len + size;
+
+    return 0;
+}
+
+void ff_vk_decode_flush(AVCodecContext *avctx)
+{
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+    VkVideoBeginCodingInfoKHR decode_start = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
+        .videoSession = ctx->common.session,
+        .videoSessionParameters = ctx->empty_session_params,
+    };
+    VkVideoCodingControlInfoKHR decode_ctrl = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
+        .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR,
+    };
+    VkVideoEndCodingInfoKHR decode_end = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
+    };
+
+    VkCommandBuffer cmd_buf;
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool);
+    ff_vk_exec_start(&ctx->s, exec);
+    cmd_buf = exec->buf;
+
+    vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start);
+    vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl);
+    vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end);
+    ff_vk_exec_submit(&ctx->s, exec);
+}
+
+static void host_map_buf_free(void *opaque, uint8_t *data)
+{
+    FFVulkanContext *ctx = opaque;
+    FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data;
+    ff_vk_free_buf(ctx, &buf->buf);
+    av_free(data);
+}
+
+int ff_vk_decode_frame(AVCodecContext *avctx,
+                       AVFrame *pic,    FFVulkanDecodePicture *vp,
+                       AVFrame *rpic[], FFVulkanDecodePicture *rvkp[])
+{
+    int err;
+    VkResult ret;
+    VkCommandBuffer cmd_buf;
+    FFVkVideoBuffer *sd_buf;
+
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    /* Output */
+    AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data;
+
+    /* Quirks */
+    const int layered_dpb = ctx->layered_dpb;
+
+    VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)vp->session_params->data;
+    VkVideoBeginCodingInfoKHR decode_start = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
+        .videoSession = ctx->common.session,
+        .videoSessionParameters = *par,
+        .referenceSlotCount = vp->decode_info.referenceSlotCount,
+        .pReferenceSlots = vp->decode_info.pReferenceSlots,
+    };
+    VkVideoEndCodingInfoKHR decode_end = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
+    };
+
+    VkImageMemoryBarrier2 img_bar[37];
+    int nb_img_bar = 0;
+    AVBufferRef *sd_ref = NULL;
+    size_t data_size = FFALIGN(vp->slices_size, ctx->common.caps.minBitstreamBufferSizeAlignment);
+
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool);
+
+    if (ctx->exec_pool.nb_queries) {
+        int64_t prev_sub_res = 0;
+        ff_vk_exec_wait(&ctx->s, exec);
+        ret = ff_vk_exec_get_query(&ctx->s, exec, NULL, &prev_sub_res);
+        if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+                   ff_vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
+        }
+
+        if (ret == VK_SUCCESS)
+            av_log(avctx, prev_sub_res < 0 ? AV_LOG_ERROR : AV_LOG_DEBUG,
+                   "Result of previous frame decoding: %li\n", prev_sub_res);
+    }
+
+    if (0) {
+        size_t req_size;
+        VkExternalMemoryBufferCreateInfo create_desc = {
+            .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+            .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+            .pNext = &ctx->profile_list,
+        };
+
+        VkImportMemoryHostPointerInfoEXT import_desc = {
+            .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+            .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+        };
+
+        VkMemoryHostPointerPropertiesEXT p_props = {
+            .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
+        };
+
+        /* Align slices pointer */
+        import_desc.pHostPointer = (void *)FFALIGN((uintptr_t)vp->slices,
+                                                   ctx->s.hprops.minImportedHostPointerAlignment);
+
+        req_size = FFALIGN(data_size,
+                           ctx->s.hprops.minImportedHostPointerAlignment);
+
+        ret = vk->GetMemoryHostPointerPropertiesEXT(ctx->s.hwctx->act_dev,
+                                                    import_desc.handleType,
+                                                    import_desc.pHostPointer,
+                                                    &p_props);
+
+        if (ret == VK_SUCCESS) {
+            sd_buf = av_mallocz(sizeof(*sd_buf));
+            if (!sd_buf)
+                return AVERROR(ENOMEM);
+
+            err = ff_vk_create_buf(&ctx->s, &sd_buf->buf, req_size,
+                                   &create_desc, &import_desc,
+                                   VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
+                                   VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+            if (err < 0) {
+                av_free(sd_buf);
+                return err; /* This shouldn't error out, unless it's critical */
+            } else {
+                size_t neg_offs = (uint8_t *)import_desc.pHostPointer - vp->slices;
+
+                sd_ref = av_buffer_create((uint8_t *)sd_buf, sizeof(*sd_buf),
+                                          host_map_buf_free, &ctx->s, 0);
+                if (!sd_ref) {
+                    ff_vk_free_buf(&ctx->s, &sd_buf->buf);
+                    av_free(sd_buf);
+                    return AVERROR(ENOMEM);
+                }
+
+                for (int i = 0; i < vp->nb_slices; i++)
+                    vp->slice_off[i] -= neg_offs;
+
+                sd_buf->mem = vp->slices;
+            }
+        }
+    }
+
+    if (!sd_ref) {
+        err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &sd_ref,
+                                     VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
+                                     &ctx->profile_list, data_size);
+        if (err < 0)
+            return err;
+
+        sd_buf = (FFVkVideoBuffer *)sd_ref->data;
+
+        /* Copy the slices data to the buffer */
+        memcpy(sd_buf->mem, vp->slices, vp->slices_size);
+    }
+
+    /* Flush if needed */
+    if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+        VkMappedMemoryRange flush_buf = {
+            .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+            .memory = sd_buf->buf.mem,
+            .offset = 0,
+            .size = FFALIGN(vp->slices_size,
+                            ctx->s.props.properties.limits.nonCoherentAtomSize),
+        };
+
+        ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf);
+        if (ret != VK_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+                   ff_vk_ret2str(ret));
+            av_buffer_unref(&sd_ref);
+            return AVERROR_EXTERNAL;
+        }
+    }
+
+    vp->decode_info.srcBuffer       = sd_buf->buf.buf;
+    vp->decode_info.srcBufferOffset = 0;
+    vp->decode_info.srcBufferRange  = data_size;
+
+    /* Start command buffer recording */
+    ff_vk_exec_start(&ctx->s, exec);
+    cmd_buf = exec->buf;
+
+    /* Slices */
+    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &sd_ref, 1, 0);
+    if (err < 0)
+        return err;
+
+    /* Parameters */
+    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->session_params, 1, 0);
+    if (err < 0)
+        return err;
+
+    err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic,
+                                   VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+    if (err < 0)
+        return err;
+
+    err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+                                      pic);
+    if (err < 0)
+        return err;
+
+    /* Output image - change layout, as it comes from a pool */
+    img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+        .pNext = NULL,
+        .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+        .srcAccessMask = vkf->access[0],
+        .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
+        .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
+        .oldLayout = vkf->layout[0],
+        .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR,
+        .srcQueueFamilyIndex = vkf->queue_family[0],
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .image = vkf->img[0],
+        .subresourceRange = (VkImageSubresourceRange) {
+            .aspectMask = vp->img_aspect,
+            .layerCount = 1,
+            .levelCount = 1,
+        },
+    };
+    ff_vk_exec_update_frame(&ctx->s, exec, pic,
+                            &img_bar[nb_img_bar], &nb_img_bar);
+
+    /* Reference for the current image, if existing and not layered */
+    if (vp->dpb_frame) {
+        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
+                                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+        if (err < 0)
+            return err;
+    }
+
+    if (!layered_dpb) {
+        /* All references (apart from the current) for non-layered refs */
+
+        for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+            AVFrame *ref_frame = rpic[i];
+            FFVulkanDecodePicture *rvp = rvkp[i];
+            AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame;
+
+            err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref,
+                                           VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+            if (err < 0)
+                return err;
+
+            if (err == 0) {
+                err = ff_vk_exec_mirror_sem_value(&ctx->s, exec,
+                                                  &rvp->sem, &rvp->sem_value,
+                                                  ref);
+                if (err < 0)
+                    return err;
+            }
+
+            if (!rvp->dpb_frame) {
+                AVVkFrame *rvkf = (AVVkFrame *)ref->data;
+
+                img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
+                    .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+                    .pNext = NULL,
+                    .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                    .srcAccessMask = rvkf->access[0],
+                    .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
+                    .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR |
+                                     VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
+                    .oldLayout = rvkf->layout[0],
+                    .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR,
+                    .srcQueueFamilyIndex = rvkf->queue_family[0],
+                    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                    .image = rvkf->img[0],
+                    .subresourceRange = (VkImageSubresourceRange) {
+                        .aspectMask = rvp->img_aspect_ref,
+                        .layerCount = 1,
+                        .levelCount = 1,
+                    },
+                };
+                ff_vk_exec_update_frame(&ctx->s, exec, ref,
+                                        &img_bar[nb_img_bar], &nb_img_bar);
+            }
+        }
+    } else if (vp->decode_info.referenceSlotCount ||
+               vp->img_view_out != vp->img_view_ref) {
+        /* Single barrier for a single layered ref */
+        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->layered_frame,
+                                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+        if (err < 0)
+            return err;
+    }
+
+    /* Change image layout */
+    vk->CmdPipelineBarrier2KHR(cmd_buf, &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_img_bar,
+        });
+
+    /* Start, use parameters, decode and end decoding */
+    vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start);
+
+    /* Start status query TODO: remove check when radv gets support */
+    if (ctx->exec_pool.nb_queries)
+        vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0);
+
+    vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info);
+
+    /* End status query */
+    if (ctx->exec_pool.nb_queries)
+        vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0);
+
+    vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end);
+
+    /* End recording and submit for execution */
+    return ff_vk_exec_submit(&ctx->s, exec);
+}
+
+void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp)
+{
+    FFVulkanFunctions *vk;
+    VkSemaphoreWaitInfo sem_wait;
+
+    // TODO: investigate why this happens
+    if (!ctx) {
+        av_freep(&vp->slices);
+        av_freep(&vp->slice_off);
+        av_frame_free(&vp->dpb_frame);
+        return;
+    }
+
+    vk = &ctx->s.vkfn;
+
+    /* We do not have to lock the frame here because we're not interested
+     * in the actual current semaphore value, but only that it's later than
+     * the time we submitted the image for decoding. */
+    sem_wait = (VkSemaphoreWaitInfo) {
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+        .pSemaphores = &vp->sem,
+        .pValues = &vp->sem_value,
+        .semaphoreCount = 1,
+    };
+
+    if (vp->sem)
+        vk->WaitSemaphores(ctx->s.hwctx->act_dev, &sem_wait, UINT64_MAX);
+
+    /* Free slices data
+     * TODO: use a pool in the decode context instead to avoid per-frame allocs. */
+    av_freep(&vp->slices);
+    av_freep(&vp->slice_off);
+
+    /* Destroy image view (out) */
+    if (vp->img_view_out != vp->img_view_ref && vp->img_view_out)
+        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_out, ctx->s.hwctx->alloc);
+
+    /* Destroy image view (ref, unlayered) */
+    if (vp->dpb_vkf && vp->img_view_ref)
+        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_ref, ctx->s.hwctx->alloc);
+
+    av_frame_free(&vp->dpb_frame);
+}
+
+/* Since to even get decoder capabilities, we have to initialize quite a lot,
+ * this function does initialization and saves it to hwaccel_priv_data if
+ * available. */
+static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref,
+                                    int *width_align, int *height_align,
+                                    enum AVPixelFormat *pix_fmt, int *dpb_dedicate)
+{
+    VkResult ret;
+    int err, max_level, score = INT32_MAX;
+    const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id];
+    AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
+    AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data;
+    AVVulkanDeviceContext *hwctx = device->hwctx;
+    enum AVPixelFormat context_format = frames->sw_format;
+    int context_format_was_found = 0;
+    int base_profile, cur_profile = avctx->profile;
+
+    int dedicated_dpb;
+    int layered_dpb;
+
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    FFVulkanExtensions local_extensions = 0x0;
+    FFVulkanExtensions *extensions = ctx ? &ctx->s.extensions : &local_extensions;
+    FFVulkanFunctions local_vk = { 0 };
+    FFVulkanFunctions *vk = ctx ? &ctx->s.vkfn : &local_vk;
+    VkVideoCapabilitiesKHR local_caps = { 0 };
+    VkVideoCapabilitiesKHR *caps = ctx ? &ctx->common.caps : &local_caps;
+    VkVideoDecodeCapabilitiesKHR local_dec_caps = { 0 };
+    VkVideoDecodeCapabilitiesKHR *dec_caps = ctx ? &ctx->dec_caps : &local_dec_caps;
+    VkVideoDecodeUsageInfoKHR local_usage = { 0 };
+    VkVideoDecodeUsageInfoKHR *usage = ctx ? &ctx->usage : &local_usage;
+    VkVideoProfileInfoKHR local_profile = { 0 };
+    VkVideoProfileInfoKHR *profile = ctx ? &ctx->profile : &local_profile;
+    VkVideoProfileListInfoKHR local_profile_list = { 0 };
+    VkVideoProfileListInfoKHR *profile_list = ctx ? &ctx->profile_list : &local_profile_list;
+
+    VkVideoDecodeH264ProfileInfoKHR local_h264_profile = { 0 };
+    VkVideoDecodeH264ProfileInfoKHR *h264_profile = ctx ? &ctx->h264_profile : &local_h264_profile;
+
+    VkVideoDecodeH264ProfileInfoKHR local_h265_profile = { 0 };
+    VkVideoDecodeH264ProfileInfoKHR *h265_profile = ctx ? &ctx->h265_profile : &local_h265_profile;
+
+    VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
+        .pNext = profile_list,
+    };
+    VkVideoDecodeH264CapabilitiesKHR h264_caps = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR,
+    };
+    VkVideoDecodeH265CapabilitiesKHR h265_caps = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR,
+    };
+    VkVideoFormatPropertiesKHR *ret_info;
+    uint32_t nb_out_fmts = 0;
+
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+    if (!desc)
+        return AVERROR(EINVAL);
+
+    if (ctx && ctx->init)
+        return 0;
+
+    if (!vk_codec->decode_op)
+        return AVERROR(EINVAL);
+
+    *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions,
+                                           hwctx->nb_enabled_dev_extensions);
+
+    if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
+        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
+               VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
+        return AVERROR(ENOSYS);
+    } else if (!vk_codec->decode_extension) {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n",
+               avcodec_get_name(avctx->codec_id));
+        return AVERROR(ENOSYS);
+    } else if (!(vk_codec->decode_extension & *extensions)) {
+        av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n",
+               avcodec_get_name(avctx->codec_id));
+        return AVERROR(ENOSYS);
+    }
+
+    err = ff_vk_load_functions(device, vk, *extensions, 1, 1);
+    if (err < 0)
+        return err;
+
+repeat:
+    if (avctx->codec_id == AV_CODEC_ID_H264) {
+        base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;
+        dec_caps->pNext = &h264_caps;
+        usage->pNext = h264_profile;
+        h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR;
+        h264_profile->stdProfileIdc = cur_profile;
+        h264_profile->pictureLayout = avctx->field_order == AV_FIELD_PROGRESSIVE ?
+                                      VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR :
+                                      VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR;
+    } else if (avctx->codec_id == AV_CODEC_ID_H265) {
+        base_profile = FF_PROFILE_HEVC_MAIN;
+        dec_caps->pNext = &h265_caps;
+        usage->pNext = h265_profile;
+        h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
+        h265_profile->stdProfileIdc = cur_profile;
+    }
+
+    usage->sType           = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR;
+    usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR;
+
+    profile->sType               = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
+    /* NOTE: NVIDIA's implementation fails if the USAGE hint is inserted.
+     * Remove this once it's fixed. */
+    profile->pNext               = usage->pNext;
+    profile->videoCodecOperation = vk_codec->decode_op;
+    profile->chromaSubsampling   = ff_vk_subsampling_from_av_desc(desc);
+    profile->lumaBitDepth        = ff_vk_depth_from_av_depth(desc->comp[0].depth);
+    profile->chromaBitDepth      = profile->lumaBitDepth;
+
+    profile_list->sType        = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
+    profile_list->profileCount = 1;
+    profile_list->pProfiles    = profile;
+
+    /* Get the capabilities of the decoder for the given profile */
+    caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
+    caps->pNext = dec_caps;
+    dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR;
+    /* dec_caps->pNext already filled in */
+
+    ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile,
+                                                    caps);
+    if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR &&
+        avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH &&
+        cur_profile != base_profile) {
+        cur_profile = base_profile;
+        av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting "
+               "again with profile %s\n",
+               avcodec_get_name(avctx->codec_id),
+               avcodec_profile_name(avctx->codec_id, avctx->profile),
+               avcodec_profile_name(avctx->codec_id, base_profile));
+        goto repeat;
+    } else if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) {
+        av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: "
+               "%s profile \"%s\" not supported!\n",
+               avcodec_get_name(avctx->codec_id),
+               avcodec_profile_name(avctx->codec_id, cur_profile));
+        return AVERROR(EINVAL);
+    } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) {
+        av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: "
+               "format (%s) not supported!\n",
+               av_get_pix_fmt_name(avctx->sw_pix_fmt));
+        return AVERROR(EINVAL);
+    } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT ||
+               ret == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+        return AVERROR(EINVAL);
+    } else if (ret != VK_SUCCESS) {
+        return AVERROR_EXTERNAL;
+    }
+
+    max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc :
+                avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc :
+                0;
+
+    if (ctx) {
+        av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n",
+               avcodec_get_name(avctx->codec_id),
+               avcodec_profile_name(avctx->codec_id, avctx->profile));
+        av_log(avctx, AV_LOG_VERBOSE, "    Maximum level: %i\n",
+               max_level);
+        av_log(avctx, AV_LOG_VERBOSE, "    Width: from %i to %i\n",
+               caps->minCodedExtent.width, caps->maxCodedExtent.width);
+        av_log(avctx, AV_LOG_VERBOSE, "    Height: from %i to %i\n",
+               caps->minCodedExtent.height, caps->maxCodedExtent.height);
+        av_log(avctx, AV_LOG_VERBOSE, "    Width alignment: %i\n",
+               caps->pictureAccessGranularity.width);
+        av_log(avctx, AV_LOG_VERBOSE, "    Height alignment: %i\n",
+               caps->pictureAccessGranularity.height);
+        av_log(avctx, AV_LOG_VERBOSE, "    Bitstream offset alignment: %"PRIu64"\n",
+               caps->minBitstreamBufferOffsetAlignment);
+        av_log(avctx, AV_LOG_VERBOSE, "    Bitstream size alignment: %"PRIu64"\n",
+               caps->minBitstreamBufferSizeAlignment);
+        av_log(avctx, AV_LOG_VERBOSE, "    Maximum references: %u\n",
+               caps->maxDpbSlots);
+        av_log(avctx, AV_LOG_VERBOSE, "    Maximum active references: %u\n",
+               caps->maxActiveReferencePictures);
+        av_log(avctx, AV_LOG_VERBOSE, "    Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n",
+               CODEC_VER(caps->stdHeaderVersion.specVersion),
+               CODEC_VER(dec_ext[avctx->codec_id]->specVersion));
+        av_log(avctx, AV_LOG_VERBOSE, "    Decode modes:%s%s%s\n",
+               dec_caps->flags ? "" :
+                   " invalid",
+               dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ?
+                   " reuse_dst_dpb" : "",
+               dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ?
+                   " dedicated_dpb" : "");
+        av_log(avctx, AV_LOG_VERBOSE, "    Capability flags:%s%s%s\n",
+               caps->flags ? "" :
+                   " none",
+               caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ?
+                   " protected" : "",
+               caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ?
+                   " separate_references" : "");
+    }
+
+    /* Check if decoding is possible with the given parameters */
+    if (avctx->coded_width  < caps->minCodedExtent.width   ||
+        avctx->coded_height < caps->minCodedExtent.height  ||
+        avctx->coded_width  > caps->maxCodedExtent.width   ||
+        avctx->coded_height > caps->maxCodedExtent.height)
+        return AVERROR(EINVAL);
+
+    if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) &&
+        avctx->level > max_level)
+        return AVERROR(EINVAL);
+
+    /* Some basic sanity checking */
+    if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR |
+                             VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) {
+        av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither "
+               "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor "
+               "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n");
+        return AVERROR_EXTERNAL;
+    } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR |
+                                   VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) ==
+                                   VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) &&
+               !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: "
+               "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set "
+               "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    /* TODO: make dedicated_dpb tunable */
+    dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR);
+    layered_dpb   = !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
+
+    if (dedicated_dpb) {
+        fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+    } else {
+        fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
+                              VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR |
+                              VK_IMAGE_USAGE_TRANSFER_SRC_BIT         |
+                              VK_IMAGE_USAGE_SAMPLED_BIT;
+    }
+
+    /* Get the format of the images necessary */
+    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev,
+                                                        &fmt_info,
+                                                        &nb_out_fmts, NULL);
+    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
+        (!nb_out_fmts && ret == VK_SUCCESS)) {
+        return AVERROR(EINVAL);
+    } else if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts);
+    if (!ret_info)
+        return AVERROR(ENOMEM);
+
+    for (int i = 0; i < nb_out_fmts; i++)
+        ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR;
+
+    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev,
+                                                        &fmt_info,
+                                                        &nb_out_fmts, ret_info);
+    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
+        (!nb_out_fmts && ret == VK_SUCCESS)) {
+        av_free(ret_info);
+        return AVERROR(EINVAL);
+    } else if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
+               ff_vk_ret2str(ret));
+        av_free(ret_info);
+        return AVERROR_EXTERNAL;
+    }
+
+    if (ctx) {
+        ctx->dedicated_dpb = dedicated_dpb;
+        ctx->layered_dpb = layered_dpb;
+        ctx->init = 1;
+    }
+
+    *pix_fmt = AV_PIX_FMT_NONE;
+
+    av_log(avctx, AV_LOG_DEBUG, "Pixel format list for decoding:\n");
+    for (int i = 0; i < nb_out_fmts; i++) {
+        int tmp_score;
+        enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format,
+                                                          &tmp_score);
+        const AVPixFmtDescriptor *tmp_desc = av_pix_fmt_desc_get(tmp);
+        if (tmp == AV_PIX_FMT_NONE || !tmp_desc)
+            continue;
+
+        av_log(avctx, AV_LOG_DEBUG, "    %i - %s (%i), score %i\n", i,
+               av_get_pix_fmt_name(tmp), ret_info[i].format, tmp_score);
+
+        if (context_format == tmp || tmp_score < score) {
+            if (ctx)
+                ctx->pic_format = ret_info[i].format;
+            *pix_fmt = tmp;
+            context_format_was_found |= context_format == tmp;
+            if (context_format_was_found)
+                break;
+        }
+    }
+
+    if (*pix_fmt == AV_PIX_FMT_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "No valid pixel format for decoding!\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (width_align)
+        *width_align = caps->pictureAccessGranularity.width;
+    if (height_align)
+        *height_align = caps->pictureAccessGranularity.height;
+    if (dpb_dedicate)
+        *dpb_dedicate = dedicated_dpb;
+
+    av_free(ret_info);
+
+    av_log(avctx, AV_LOG_VERBOSE, "Chosen frames format: %s\n",
+           av_get_pix_fmt_name(*pix_fmt));
+
+    if (context_format != AV_PIX_FMT_NONE && !context_format_was_found) {
+        av_log(avctx, AV_LOG_ERROR, "Frames context had a pixel format set which "
+               "was not available for decoding into!\n");
+        return AVERROR(EINVAL);
+    }
+
+    return *pix_fmt == AV_PIX_FMT_NONE ? AVERROR(EINVAL) : 0;
+}
+
+int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
+{
+    int err, width_align, height_align, dedicated_dpb;
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
+    AVVulkanFramesContext *hwfc = frames_ctx->hwctx;
+
+    err = vulkan_decode_check_init(avctx, hw_frames_ctx, &width_align, &height_align,
+                                   &frames_ctx->sw_format, &dedicated_dpb);
+    if (err < 0)
+        return err;
+
+    frames_ctx->width  = FFALIGN(avctx->coded_width, width_align);
+    frames_ctx->height = FFALIGN(avctx->coded_height, height_align);
+    frames_ctx->format = AV_PIX_FMT_VULKAN;
+
+    hwfc->tiling       = VK_IMAGE_TILING_OPTIMAL;
+    hwfc->usage        = VK_IMAGE_USAGE_TRANSFER_SRC_BIT         |
+                         VK_IMAGE_USAGE_SAMPLED_BIT              |
+                         VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
+
+    if (!dedicated_dpb)
+        hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+
+    return err;
+}
+
+void ff_vk_decode_free_params(void *opaque, uint8_t *data)
+{
+    FFVulkanDecodeContext *ctx = opaque;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+    VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data;
+    vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par,
+                                         ctx->s.hwctx->alloc);
+    av_free(par);
+}
+
+int ff_vk_decode_uninit(AVCodecContext *avctx)
+{
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    FFVulkanContext *s = &ctx->s;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    /* Wait on and free execution pool */
+    ff_vk_exec_pool_free(s, &ctx->exec_pool);
+
+    /* Destroy layered view */
+    if (ctx->layered_view)
+        vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc);
+
+    /* This also frees all references from this pool */
+    av_frame_free(&ctx->layered_frame);
+    av_buffer_unref(&ctx->dpb_hwfc_ref);
+
+    /* Destroy parameters */
+    if (ctx->empty_session_params)
+        vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
+                                             ctx->empty_session_params,
+                                             s->hwctx->alloc);
+
+    ff_vk_video_common_uninit(s, &ctx->common);
+
+    vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler,
+                                      s->hwctx->alloc);
+
+    av_buffer_pool_uninit(&ctx->tmp_pool);
+
+    ff_vk_uninit(s);
+
+    return 0;
+}
+
+int ff_vk_decode_init(AVCodecContext *avctx)
+{
+    int err, qf, cxpos = 0, cypos = 0, nb_q = 0;
+    VkResult ret;
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    FFVulkanContext *s = &ctx->s;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
+    };
+    VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR,
+    };
+    VkVideoSessionParametersCreateInfoKHR session_params_create = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+        .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params :
+                 avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params :
+                 NULL,
+    };
+    VkVideoSessionCreateInfoKHR session_create = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR,
+    };
+    VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = {
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO,
+        .components = ff_comp_identity_map,
+        .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY,
+        .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */
+    };
+
+    err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN);
+    if (err < 0)
+        return err;
+
+    s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
+    s->frames = (AVHWFramesContext *)s->frames_ref->data;
+    s->hwfc = s->frames->hwctx;
+
+    s->device_ref = av_buffer_ref(s->frames->device_ref);
+    s->device = (AVHWDeviceContext *)s->device_ref->data;
+    s->hwctx = s->device->hwctx;
+
+    /* Get parameters, capabilities and final pixel/vulkan format */
+    err = vulkan_decode_check_init(avctx, s->frames_ref, NULL, NULL,
+                                   &ctx->sw_format, NULL);
+    if (err < 0)
+        goto fail;
+
+    /* Load all properties */
+    err = ff_vk_load_props(s);
+    if (err < 0)
+        goto fail;
+
+    /* Create queue context */
+    qf = ff_vk_qf_init(s, &ctx->qf_dec, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+
+    /* Check for support */
+    if (!(s->video_props[qf].videoCodecOperations &
+          ff_vk_codec_map[avctx->codec_id].decode_op)) {
+        av_log(avctx, AV_LOG_ERROR, "Decoding %s not supported on the given "
+               "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf);
+        return AVERROR(EINVAL);
+    }
+
+    /* TODO: enable when stable and tested. */
+    if (s->query_props[qf].queryResultStatusSupport)
+        nb_q = 1;
+
+    /* Create decode exec context.
+     * 4 async contexts per thread seems like a good number. */
+    err = ff_vk_exec_pool_init(s, &ctx->qf_dec, &ctx->exec_pool, 4*avctx->thread_count,
+                               nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0,
+                               &ctx->profile);
+    if (err < 0)
+        goto fail;
+
+    session_create.pVideoProfile = &ctx->profile;
+    session_create.flags = 0x0;
+    session_create.queueFamilyIndex = s->hwctx->queue_family_decode_index;
+    session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent;
+    session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots;
+    session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures;
+    session_create.pictureFormat = ctx->pic_format;
+    session_create.referencePictureFormat = session_create.pictureFormat;
+    session_create.pStdHeaderVersion = dec_ext[avctx->codec_id];
+
+    err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
+    if (err < 0)
+        goto fail;
+
+    /* Get sampler */
+    av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location);
+    yuv_sampler_info.xChromaOffset = cxpos >> 7;
+    yuv_sampler_info.yChromaOffset = cypos >> 7;
+    yuv_sampler_info.format = ctx->pic_format;
+    ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info,
+                                           s->hwctx->alloc, &ctx->yuv_sampler);
+    if (ret != VK_SUCCESS) {
+        err = AVERROR_EXTERNAL;
+        goto fail;
+    }
+
+    /* If doing an out-of-place decoding, create a DPB pool */
+    if (ctx->dedicated_dpb) {
+        AVHWFramesContext *dpb_frames;
+        AVVulkanFramesContext *dpb_hwfc;
+
+        ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->device_ref);
+        if (!ctx->dpb_hwfc_ref) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
+        dpb_frames->format    = s->frames->format;
+        dpb_frames->sw_format = s->frames->sw_format;
+        dpb_frames->width     = s->frames->width;
+        dpb_frames->height    = s->frames->height;
+
+        dpb_hwfc = dpb_frames->hwctx;
+        dpb_hwfc->create_pnext = &ctx->profile_list;
+        dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL;
+        dpb_hwfc->usage  = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
+                           VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */
+
+        if (ctx->layered_dpb)
+            dpb_hwfc->nb_layers = ctx->common.caps.maxDpbSlots;
+
+        err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref);
+        if (err < 0)
+            goto fail;
+
+        if (ctx->layered_dpb) {
+            ctx->layered_frame = vk_get_dpb_pool(ctx);
+            if (!ctx->layered_frame) {
+                err = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            err = vk_decode_create_view(ctx, &ctx->layered_view, &ctx->layered_aspect,
+                                        (AVVkFrame *)ctx->layered_frame->data);
+            if (err < 0)
+                goto fail;
+        }
+    }
+
+    session_params_create.videoSession = ctx->common.session;
+    ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
+                                              s->hwctx->alloc, &ctx->empty_session_params);
+    if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ff_vk_decode_flush(avctx);
+
+    av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n");
+
+    return 0;
+
+fail:
+    ff_vk_decode_uninit(avctx);
+
+    return err;
+}
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
new file mode 100644
index 0000000000..9f9676bbfa
--- /dev/null
+++ b/libavcodec/vulkan_decode.h
@@ -0,0 +1,163 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_DECODE_H
+#define AVCODEC_VULKAN_DECODE_H
+
+#include "decode.h"
+#include "hwconfig.h"
+#include "internal.h"
+
+#include "vulkan_video.h"
+
+typedef struct FFVulkanDecodeContext {
+    FFVulkanContext s;
+    FFVkVideoCommon common;
+
+    int dedicated_dpb; /* Oddity  #1 - separate DPB images */
+    int layered_dpb;   /* Madness #1 - layered  DPB images */
+
+    AVBufferRef *dpb_hwfc_ref;  /* Only used for dedicated_dpb */
+
+    AVFrame *layered_frame;     /* Only used for layered_dpb   */
+    VkImageView layered_view;
+    VkImageAspectFlags layered_aspect;
+
+    VkVideoDecodeH264ProfileInfoKHR h264_profile;
+    VkVideoDecodeH264ProfileInfoKHR h265_profile;
+    VkVideoSessionParametersKHR empty_session_params;
+
+    VkSamplerYcbcrConversion yuv_sampler;
+    VkVideoDecodeUsageInfoKHR usage;
+    VkVideoProfileInfoKHR profile;
+    VkVideoDecodeCapabilitiesKHR dec_caps;
+    VkVideoProfileListInfoKHR profile_list;
+    VkFormat pic_format;
+    enum AVPixelFormat sw_format;
+    int init;
+
+    AVBufferRef *session_params;
+
+    FFVkQueueFamilyCtx qf_dec;
+    FFVkExecPool exec_pool;
+
+    AVBufferPool *tmp_pool; /* Pool for temporary data, if needed (HEVC) */
+    size_t tmp_pool_ele_size;
+
+    uint16_t last_ref_frames_in_use;
+} FFVulkanDecodeContext;
+
+typedef struct FFVulkanDecodePicture {
+    AVFrame                        *dpb_frame;      /* Only used for out-of-place decoding. */
+    AVVkFrame                      *dpb_vkf;        /* Only used for out-of-place decoding. */
+
+    VkImageView                     img_view_ref;   /* Image representation view (reference) */
+    VkImageView                     img_view_out;   /* Image representation view (output-only) */
+    VkImageAspectFlags              img_aspect;     /* Image plane mask bits */
+    VkImageAspectFlags              img_aspect_ref; /* Only used for out-of-place decoding */
+
+    VkSemaphore                     sem;
+    uint64_t                        sem_value;
+
+    /* State */
+    int                             update_params;
+    AVBufferRef                    *session_params;
+
+    /* Current picture */
+    VkVideoPictureResourceInfoKHR   ref;
+    VkVideoReferenceSlotInfoKHR     ref_slot;
+
+    /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */
+    VkVideoPictureResourceInfoKHR   refs     [36];
+    VkVideoReferenceSlotInfoKHR     ref_slots[36];
+
+    /* Main decoding struct */
+    AVBufferRef                    *params_buf;
+    VkVideoDecodeInfoKHR            decode_info;
+
+    /* Slice data */
+    uint8_t                        *slices;
+    size_t                          slices_size;
+    unsigned int                    slices_size_max;
+    uint32_t                       *slice_off;
+    unsigned int                    slice_off_max;
+    uint32_t                        nb_slices;
+} FFVulkanDecodePicture;
+
+/**
+ * Initialize decoder.
+ */
+int ff_vk_decode_init(AVCodecContext *avctx);
+
+/**
+ * Initialize hw_frames_ctx with the parameters needed to decode the stream
+ * using the parameters from avctx.
+ *
+ * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize
+ * the context.
+ */
+int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+
+/**
+ * Prepare a frame, creates the image view, and sets up the dpb fields.
+ */
+int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic,
+                               FFVulkanDecodePicture *vkpic, int is_current,
+                               int alloc_dpb);
+
+/**
+ * Add slice data to frame.
+ */
+int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp,
+                           const uint8_t *data, size_t size, int add_startcode,
+                           uint32_t *nb_slices, const uint32_t **offsets);
+
+/**
+ * Decode a frame.
+ */
+int ff_vk_decode_frame(AVCodecContext *avctx,
+                       AVFrame *pic,    FFVulkanDecodePicture *vp,
+                       AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]);
+
+/**
+ * Free a frame and its state.
+ */
+void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp);
+
+/**
+ * Get an FFVkBuffer suitable for decoding from.
+ */
+int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf,
+                            void *create_pNext, size_t size);
+
+/**
+ * Free VkVideoSessionParametersKHR.
+ */
+void ff_vk_decode_free_params(void *opaque, uint8_t *data);
+
+/**
+ * Flush decoder.
+ */
+void ff_vk_decode_flush(AVCodecContext *avctx);
+
+/**
+ * Free decoder.
+ */
+int ff_vk_decode_uninit(AVCodecContext *avctx);
+
+#endif /* AVCODEC_VULKAN_DECODE_H */
-- 
2.39.2


[-- Attachment #70: 0069-h264dec-add-hwaccel_params_buf.patch --]
[-- Type: text/x-diff, Size: 2737 bytes --]

From e26c514b35f5c87321a8fa6c6eb70b54220a92ed Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 17 Jan 2023 05:01:45 +0100
Subject: [PATCH 69/72] h264dec: add hwaccel_params_buf

---
 libavcodec/h264_slice.c | 4 ++++
 libavcodec/h264dec.c    | 4 ++++
 libavcodec/h264dec.h    | 2 ++
 3 files changed, 10 insertions(+)

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index c0aa31bcd9..0c7f80c018 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -347,6 +347,10 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
             return ret;
     }
 
+    ret = av_buffer_replace(&h->hwaccel_params_buf, h1->hwaccel_params_buf);
+    if (ret < 0)
+        return ret;
+
     ret = av_buffer_replace(&h->ps.pps_ref, h1->ps.pps_ref);
     if (ret < 0)
         return ret;
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 995bf17a8f..f6059da950 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -341,6 +341,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
     H264Context *h = avctx->priv_data;
     int i;
 
+    av_buffer_unref(&h->hwaccel_params_buf);
     ff_h264_remove_all_refs(h);
     ff_h264_free_tables(h);
 
@@ -470,6 +471,7 @@ static void h264_decode_flush(AVCodecContext *avctx)
 
     ff_h264_flush_change(h);
     ff_h264_sei_uninit(&h->sei);
+    av_buffer_unref(&h->hwaccel_params_buf);
 
     for (i = 0; i < H264_MAX_PICTURE_COUNT; i++)
         ff_h264_unref_picture(h, &h->DPB[i]);
@@ -669,6 +671,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
             avpriv_request_sample(avctx, "data partitioning");
             break;
         case H264_NAL_SEI:
+            av_buffer_unref(&h->hwaccel_params_buf);
             if (h->setup_finished) {
                 avpriv_request_sample(avctx, "Late SEI");
                 break;
@@ -682,6 +685,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
             break;
         case H264_NAL_SPS: {
             GetBitContext tmp_gb = nal->gb;
+            av_buffer_unref(&h->hwaccel_params_buf);
             if (avctx->hwaccel && avctx->hwaccel->decode_params) {
                 ret = avctx->hwaccel->decode_params(avctx,
                                                     nal->type,
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 1b18aba71f..5b1620c3f1 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -342,6 +342,8 @@ typedef struct H264Context {
     H264Picture cur_pic;
     H264Picture last_pic_for_ec;
 
+    AVBufferRef *hwaccel_params_buf;
+
     H264SliceContext *slice_ctx;
     int            nb_slice_ctx;
     int            nb_slice_ctx_queued;
-- 
2.39.2


[-- Attachment #71: 0070-h264dec-add-Vulkan-hwaccel.patch --]
[-- Type: text/x-diff, Size: 27544 bytes --]

From b5ff58808482bedf12b981ee1c03dd95099a9332 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 01:13:01 +0100
Subject: [PATCH 70/72] h264dec: add Vulkan hwaccel

Thanks to Dave Airlie for figuring out a lot of the parameters.
---
 configure                |   2 +
 libavcodec/Makefile      |   1 +
 libavcodec/h264_slice.c  |  12 +-
 libavcodec/h264dec.c     |   3 +
 libavcodec/hwaccels.h    |   1 +
 libavcodec/vulkan_h264.c | 521 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 539 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/vulkan_h264.c

diff --git a/configure b/configure
index 91f715351c..60973c38b3 100755
--- a/configure
+++ b/configure
@@ -3034,6 +3034,8 @@ h264_vdpau_hwaccel_deps="vdpau"
 h264_vdpau_hwaccel_select="h264_decoder"
 h264_videotoolbox_hwaccel_deps="videotoolbox"
 h264_videotoolbox_hwaccel_select="h264_decoder"
+h264_vulkan_hwaccel_deps="vulkan"
+h264_vulkan_hwaccel_select="h264_decoder"
 hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
 hevc_d3d11va_hwaccel_select="hevc_decoder"
 hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index eabf4eb43e..4c9db167a5 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -992,6 +992,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL)           += qsvdec.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL)         += vaapi_h264.o
 OBJS-$(CONFIG_H264_VDPAU_HWACCEL)         += vdpau_h264.o
 OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
+OBJS-$(CONFIG_H264_VULKAN_HWACCEL)        += vulkan_decode.o vulkan_h264.o
 OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)       += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 0c7f80c018..50d138e2a9 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -782,7 +782,8 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
                      CONFIG_H264_NVDEC_HWACCEL + \
                      CONFIG_H264_VAAPI_HWACCEL + \
                      CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
-                     CONFIG_H264_VDPAU_HWACCEL)
+                     CONFIG_H264_VDPAU_HWACCEL + \
+                     CONFIG_H264_VULKAN_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
     const enum AVPixelFormat *choices = pix_fmts;
     int i;
@@ -803,6 +804,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
 #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
         if (h->avctx->colorspace != AVCOL_SPC_RGB)
             *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_H264_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
 #endif
         if (CHROMA444(h)) {
             if (h->avctx->colorspace == AVCOL_SPC_RGB) {
@@ -815,6 +819,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
             *fmt++ = AV_PIX_FMT_YUV420P10;
         break;
     case 12:
+#if CONFIG_H264_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
         if (CHROMA444(h)) {
             if (h->avctx->colorspace == AVCOL_SPC_RGB) {
                 *fmt++ = AV_PIX_FMT_GBRP12;
@@ -840,6 +847,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
 #if CONFIG_H264_VDPAU_HWACCEL
         *fmt++ = AV_PIX_FMT_VDPAU;
 #endif
+#if CONFIG_H264_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
 #if CONFIG_H264_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
 #endif
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index f6059da950..15a6e74829 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -1100,6 +1100,9 @@ const FFCodec ff_h264_decoder = {
 #endif
 #if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
                                HWACCEL_VIDEOTOOLBOX(h264),
+#endif
+#if CONFIG_H264_VULKAN_HWACCEL
+                               HWACCEL_VULKAN(h264),
 #endif
                                NULL
                            },
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index aca55831f3..23d0843c76 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -36,6 +36,7 @@ extern const AVHWAccel ff_h264_nvdec_hwaccel;
 extern const AVHWAccel ff_h264_vaapi_hwaccel;
 extern const AVHWAccel ff_h264_vdpau_hwaccel;
 extern const AVHWAccel ff_h264_videotoolbox_hwaccel;
+extern const AVHWAccel ff_h264_vulkan_hwaccel;
 extern const AVHWAccel ff_hevc_d3d11va_hwaccel;
 extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
 extern const AVHWAccel ff_hevc_dxva2_hwaccel;
diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c
new file mode 100644
index 0000000000..241a7d8f5b
--- /dev/null
+++ b/libavcodec/vulkan_h264.c
@@ -0,0 +1,521 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264dec.h"
+#include "h264_ps.h"
+
+#include "vulkan_decode.h"
+
+const VkExtensionProperties ff_vk_dec_h264_ext = {
+    .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME,
+    .specVersion   = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION,
+};
+
+typedef struct H264VulkanDecodePicture {
+    FFVulkanDecodeContext          *ctx;
+    FFVulkanDecodePicture           vp;
+
+    /* Current picture */
+    StdVideoDecodeH264ReferenceInfo h264_ref;
+    VkVideoDecodeH264DpbSlotInfoKHR vkh264_ref;
+
+    /* Picture refs */
+    H264Picture                    *ref_src    [H264_MAX_PICTURE_COUNT];
+    StdVideoDecodeH264ReferenceInfo h264_refs  [H264_MAX_PICTURE_COUNT];
+    VkVideoDecodeH264DpbSlotInfoKHR vkh264_refs[H264_MAX_PICTURE_COUNT];
+
+    /* Current picture (contd.) */
+    StdVideoDecodeH264PictureInfo   h264pic;
+    VkVideoDecodeH264PictureInfoKHR h264_pic_info;
+} H264VulkanDecodePicture;
+
+static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
+                             VkVideoReferenceSlotInfoKHR *ref_slot,       /* Main structure */
+                             VkVideoPictureResourceInfoKHR *ref,          /* Goes in ^ */
+                             VkVideoDecodeH264DpbSlotInfoKHR *vkh264_ref, /* Goes in ^ */
+                             StdVideoDecodeH264ReferenceInfo *h264_ref,   /* Goes in ^ */
+                             H264Picture *pic, int is_current, int picture_structure,
+                             int dpb_slot_index)
+{
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vkpic = &hp->vp;
+
+    int err = ff_vk_decode_prepare_frame(ctx, pic->f, vkpic, is_current,
+                                         ctx->dedicated_dpb);
+    if (err < 0)
+        return err;
+
+    *h264_ref = (StdVideoDecodeH264ReferenceInfo) {
+        .FrameNum = pic->long_ref ? pic->pic_id : pic->frame_num, /* TODO: kinda sure */
+        .PicOrderCnt = { pic->field_poc[0], pic->field_poc[1] },
+        .flags = (StdVideoDecodeH264ReferenceInfoFlags) {
+            .top_field_flag = !!(picture_structure & PICT_TOP_FIELD),
+            .bottom_field_flag = !!(picture_structure & PICT_BOTTOM_FIELD),
+            .used_for_long_term_reference = pic->reference && pic->long_ref,
+            .is_non_existing = 0,
+        },
+    };
+
+    *vkh264_ref = (VkVideoDecodeH264DpbSlotInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR,
+        .pStdReferenceInfo = h264_ref,
+    };
+
+    *ref = (VkVideoPictureResourceInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+        .codedOffset = (VkOffset2D){ 0, 0 },
+        .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
+        .baseArrayLayer = ctx->layered_dpb ? dpb_slot_index : 0,
+        .imageViewBinding = vkpic->img_view_ref,
+    };
+
+    *ref_slot = (VkVideoReferenceSlotInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+        .pNext = vkh264_ref,
+        .slotIndex = dpb_slot_index, /* TODO: kinda sure */
+        .pPictureResource = ref,
+    };
+
+    if (ref_src)
+        *ref_src = pic;
+
+    return 0;
+}
+
+static void set_sps(const SPS *sps,
+                    StdVideoH264ScalingLists *vksps_scaling,
+                    StdVideoH264HrdParameters *vksps_vui_header,
+                    StdVideoH264SequenceParameterSetVui *vksps_vui,
+                    StdVideoH264SequenceParameterSet *vksps)
+{
+    *vksps_scaling = (StdVideoH264ScalingLists) {
+        .scaling_list_present_mask = sps->scaling_matrix_present_mask,
+        .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */
+    };
+
+    for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++)
+        memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_matrix4[i],
+               STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**sps->scaling_matrix4));
+
+    for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++)
+        memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_matrix8[i],
+               STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**sps->scaling_matrix8));
+
+    *vksps_vui_header = (StdVideoH264HrdParameters) {
+        .cpb_cnt_minus1 = sps->cpb_cnt - 1,
+        .bit_rate_scale = sps->bit_rate_scale,
+        .initial_cpb_removal_delay_length_minus1 = sps->initial_cpb_removal_delay_length - 1,
+        .cpb_removal_delay_length_minus1 = sps->cpb_removal_delay_length - 1,
+        .dpb_output_delay_length_minus1 = sps->dpb_output_delay_length - 1,
+        .time_offset_length = sps->time_offset_length,
+    };
+
+    for (int i = 0; i < sps->cpb_cnt; i++) {
+        vksps_vui_header->bit_rate_value_minus1[i] = sps->bit_rate_value[i] - 1;
+        vksps_vui_header->cpb_size_value_minus1[i] = sps->cpb_size_value[i] - 1;
+        vksps_vui_header->cbr_flag[i] = sps->cpr_flag[i];
+    }
+
+    *vksps_vui = (StdVideoH264SequenceParameterSetVui) {
+        .aspect_ratio_idc = sps->vui.aspect_ratio_idc,
+        .sar_width = sps->vui.sar.num,
+        .sar_height = sps->vui.sar.den,
+        .video_format = sps->vui.video_format,
+        .colour_primaries = sps->vui.colour_primaries,
+        .transfer_characteristics = sps->vui.transfer_characteristics,
+        .matrix_coefficients = sps->vui.matrix_coeffs,
+        .num_units_in_tick = sps->num_units_in_tick,
+        .time_scale = sps->time_scale,
+        .pHrdParameters = vksps_vui_header,
+        .max_num_reorder_frames = sps->num_reorder_frames,
+        .max_dec_frame_buffering = sps->max_dec_frame_buffering,
+        .flags = (StdVideoH264SpsVuiFlags) {
+            .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag,
+            .overscan_info_present_flag = sps->vui.overscan_info_present_flag,
+            .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag,
+            .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag,
+            .video_full_range_flag = sps->vui.video_full_range_flag,
+            .color_description_present_flag = sps->vui.colour_description_present_flag,
+            .chroma_loc_info_present_flag = sps->vui.chroma_location,
+            .timing_info_present_flag = sps->timing_info_present_flag,
+            .fixed_frame_rate_flag = sps->fixed_frame_rate_flag,
+            .bitstream_restriction_flag = sps->bitstream_restriction_flag,
+            .nal_hrd_parameters_present_flag = sps->nal_hrd_parameters_present_flag,
+            .vcl_hrd_parameters_present_flag = sps->vcl_hrd_parameters_present_flag,
+        },
+    };
+
+    *vksps = (StdVideoH264SequenceParameterSet) {
+        .profile_idc = sps->profile_idc,
+        .level_idc = sps->level_idc,
+        .seq_parameter_set_id = sps->sps_id,
+        .chroma_format_idc = sps->chroma_format_idc,
+        .bit_depth_luma_minus8 = sps->bit_depth_luma - 8,
+        .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8,
+        .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4,
+        .pic_order_cnt_type = sps->poc_type,
+        .log2_max_pic_order_cnt_lsb_minus4 = sps->poc_type ? 0 : sps->log2_max_poc_lsb - 4,
+        .offset_for_non_ref_pic = sps->offset_for_non_ref_pic,
+        .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field,
+        .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length,
+        .max_num_ref_frames = sps->ref_frame_count,
+        .pic_width_in_mbs_minus1 = sps->mb_width - 1,
+        .pic_height_in_map_units_minus1 = (sps->mb_height/(2 - sps->frame_mbs_only_flag)) - 1,
+        .frame_crop_left_offset = sps->crop_left,
+        .frame_crop_right_offset = sps->crop_right,
+        .frame_crop_top_offset = sps->crop_top,
+        .frame_crop_bottom_offset = sps->crop_bottom,
+        .flags = (StdVideoH264SpsFlags) {
+            .constraint_set0_flag = (sps->constraint_set_flags >> 0) & 0x1,
+            .constraint_set1_flag = (sps->constraint_set_flags >> 1) & 0x1,
+            .constraint_set2_flag = (sps->constraint_set_flags >> 2) & 0x1,
+            .constraint_set3_flag = (sps->constraint_set_flags >> 3) & 0x1,
+            .constraint_set4_flag = (sps->constraint_set_flags >> 4) & 0x1,
+            .constraint_set5_flag = (sps->constraint_set_flags >> 5) & 0x1,
+            .direct_8x8_inference_flag = sps->direct_8x8_inference_flag,
+            .mb_adaptive_frame_field_flag = sps->mb_aff,
+            .frame_mbs_only_flag = sps->frame_mbs_only_flag,
+            .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag,
+            .separate_colour_plane_flag = sps->residual_color_transform_flag,
+            .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag,
+            .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass,
+            .frame_cropping_flag = sps->crop,
+            .seq_scaling_matrix_present_flag = sps->scaling_matrix_present,
+            .vui_parameters_present_flag = sps->vui_parameters_present_flag,
+        },
+        .pOffsetForRefFrame = sps->offset_for_ref_frame,
+        .pScalingLists = vksps_scaling,
+        .pSequenceParameterSetVui = vksps_vui,
+    };
+}
+
+static void set_pps(const PPS *pps, const SPS *sps,
+                    StdVideoH264ScalingLists *vkpps_scaling,
+                    StdVideoH264PictureParameterSet *vkpps)
+{
+    *vkpps_scaling = (StdVideoH264ScalingLists) {
+        .scaling_list_present_mask = pps->pic_scaling_matrix_present_mask,
+        .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */
+    };
+
+    for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++)
+        memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_matrix4[i],
+               STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**pps->scaling_matrix4));
+
+    for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++)
+        memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_matrix8[i],
+               STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**pps->scaling_matrix8));
+
+    *vkpps = (StdVideoH264PictureParameterSet) {
+        .seq_parameter_set_id = pps->sps_id,
+        .pic_parameter_set_id = pps->pps_id,
+        .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1,
+        .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1,
+        .weighted_bipred_idc = pps->weighted_bipred_idc,
+        .pic_init_qp_minus26 = pps->init_qp - 26,
+        .pic_init_qs_minus26 = pps->init_qs - 26,
+        .chroma_qp_index_offset = pps->chroma_qp_index_offset[0],
+        .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1],
+        .flags = (StdVideoH264PpsFlags) {
+            .transform_8x8_mode_flag = pps->transform_8x8_mode,
+            .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present,
+            .constrained_intra_pred_flag = pps->constrained_intra_pred,
+            .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present,
+            .weighted_pred_flag = pps->weighted_pred,
+            .bottom_field_pic_order_in_frame_present_flag = pps->pic_order_present,
+            .entropy_coding_mode_flag = pps->cabac,
+            .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag,
+        },
+        .pScalingLists = vkpps_scaling,
+    };
+}
+
+static int vk_h264_create_params(AVCodecContext *avctx, AVBufferRef **buf)
+{
+    VkResult ret;
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+    const H264Context *h = avctx->priv_data;
+
+    /* SPS */
+    StdVideoH264ScalingLists vksps_scaling[MAX_SPS_COUNT];
+    StdVideoH264HrdParameters vksps_vui_header[MAX_SPS_COUNT];
+    StdVideoH264SequenceParameterSetVui vksps_vui[MAX_SPS_COUNT];
+    StdVideoH264SequenceParameterSet vksps[MAX_SPS_COUNT];
+
+    /* PPS */
+    StdVideoH264ScalingLists vkpps_scaling[MAX_PPS_COUNT];
+    StdVideoH264PictureParameterSet vkpps[MAX_PPS_COUNT];
+
+    VkVideoDecodeH264SessionParametersAddInfoKHR h264_params_info = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR,
+        .pStdSPSs = vksps,
+        .stdSPSCount = 0,
+        .pStdPPSs = vkpps,
+        .stdPPSCount = 0,
+    };
+    VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
+        .pParametersAddInfo = &h264_params_info,
+    };
+    VkVideoSessionParametersCreateInfoKHR session_params_create = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+        .pNext = &h264_params,
+        .videoSession = ctx->common.session,
+        .videoSessionParametersTemplate = NULL,
+    };
+
+    AVBufferRef *tmp;
+    VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par));
+    if (!par)
+        return AVERROR(ENOMEM);
+
+    /* SPS list */
+    for (int i = 0; h->ps.sps_list[i]; i++) {
+        const SPS *sps_l = (const SPS *)h->ps.sps_list[i]->data;
+        set_sps(sps_l, &vksps_scaling[i], &vksps_vui_header[i], &vksps_vui[i], &vksps[i]);
+        h264_params_info.stdSPSCount++;
+    }
+
+    /* PPS list */
+    for (int i = 0; h->ps.pps_list[i]; i++) {
+        const PPS *pps_l = (const PPS *)h->ps.pps_list[i]->data;
+        set_pps(pps_l, pps_l->sps, &vkpps_scaling[i], &vkpps[i]);
+        h264_params_info.stdPPSCount++;
+    }
+
+    h264_params.maxStdSPSCount = h264_params_info.stdSPSCount;
+    h264_params.maxStdPPSCount = h264_params_info.stdPPSCount;
+
+    /* Create session parameters */
+    ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create,
+                                              ctx->s.hwctx->alloc, par);
+    if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params,
+                           ctx, 0);
+    if (!tmp) {
+        ff_vk_decode_free_params(ctx, (uint8_t *)par);
+        return AVERROR(ENOMEM);
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS\n",
+           h264_params_info.stdSPSCount, h264_params_info.stdPPSCount);
+
+    *buf = tmp;
+
+    return 0;
+}
+
+static int vk_h264_start_frame(AVCodecContext          *avctx,
+                               av_unused const uint8_t *buffer,
+                               av_unused uint32_t       size)
+{
+    int err;
+    int dpb_slot_index = 0;
+    H264Context *h = avctx->priv_data;
+    H264Picture *pic = h->cur_pic_ptr;
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &hp->vp;
+
+    if (!h->hwaccel_params_buf) {
+        err = vk_h264_create_params(avctx, &h->hwaccel_params_buf);
+        if (err < 0)
+            return err;
+    }
+
+    vp->session_params = av_buffer_ref(h->hwaccel_params_buf);
+    if (!vp->session_params)
+        return AVERROR(ENOMEM);
+
+    /* Fill in main slot */
+    dpb_slot_index = 0;
+    for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+        if (pic == &h->DPB[slot]) {
+            dpb_slot_index = slot;
+            break;
+        }
+    }
+
+    err = vk_h264_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref,
+                            &hp->vkh264_ref, &hp->h264_ref, pic, 1,
+                            h->picture_structure, dpb_slot_index);
+    if (err < 0)
+        return err;
+
+    /* Fill in short-term references */
+    for (int i = 0; i < h->short_ref_count; i++) {
+        dpb_slot_index = 0;
+        for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+            if (h->short_ref[i] == &h->DPB[slot]) {
+                dpb_slot_index = slot;
+                break;
+            }
+        }
+        err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i],
+                                &vp->refs[i], &hp->vkh264_refs[i],
+                                &hp->h264_refs[i], h->short_ref[i], 0,
+                                h->DPB[dpb_slot_index].picture_structure,
+                                dpb_slot_index);
+        if (err < 0)
+            return err;
+    }
+
+    /* Fill in long-term refs */
+    for (int r = 0, i = h->short_ref_count; i < h->short_ref_count + h->long_ref_count; i++, r++) {
+        dpb_slot_index = 0;
+        for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+            if (h->long_ref[i] == &h->DPB[slot]) {
+                dpb_slot_index = slot;
+                break;
+            }
+        }
+        err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i],
+                                &vp->refs[i], &hp->vkh264_refs[i],
+                                &hp->h264_refs[i], h->long_ref[r], 0,
+                                h->DPB[dpb_slot_index].picture_structure,
+                                dpb_slot_index);
+        if (err < 0)
+            return err;
+    }
+
+    hp->h264pic = (StdVideoDecodeH264PictureInfo) {
+        .seq_parameter_set_id = pic->pps->sps_id,
+        .pic_parameter_set_id = pic->pps->pps_id,
+        .frame_num = h->poc.frame_num,
+        .idr_pic_id = h->poc.idr_pic_id,
+        .PicOrderCnt[0] = pic->field_poc[0],
+        .PicOrderCnt[1] = pic->field_poc[1],
+        .flags = (StdVideoDecodeH264PictureInfoFlags) {
+            .field_pic_flag = FIELD_PICTURE(h),
+            .is_intra = 1,
+            .IdrPicFlag = h->picture_idr,
+            .bottom_field_flag = !!(h->picture_structure & PICT_BOTTOM_FIELD),
+            .is_reference = h->nal_ref_idc != 0,
+
+            // TODO: Not sure about this
+            .complementary_field_pair = h->first_field && FIELD_PICTURE(h),
+        },
+    };
+
+    hp->h264_pic_info = (VkVideoDecodeH264PictureInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR,
+        .pStdPictureInfo = &hp->h264pic,
+        .sliceCount = 0,
+    };
+
+    vp->decode_info = (VkVideoDecodeInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR,
+        .pNext = &hp->h264_pic_info,
+        .flags = 0x0,
+        .pSetupReferenceSlot = &vp->ref_slot,
+        .referenceSlotCount = h->short_ref_count + h->long_ref_count,
+        .pReferenceSlots = vp->ref_slots,
+        .dstPictureResource = (VkVideoPictureResourceInfoKHR) {
+            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+            .codedOffset = (VkOffset2D){ 0, 0 },
+            .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
+            .baseArrayLayer = 0,
+            .imageViewBinding = vp->img_view_out,
+        },
+    };
+
+    hp->ctx = ctx;
+
+    return 0;
+}
+
+static int vk_h264_decode_slice(AVCodecContext *avctx,
+                                const uint8_t  *data,
+                                uint32_t        size)
+{
+    const H264Context *h = avctx->priv_data;
+    const H264SliceContext *sl  = &h->slice_ctx[0];
+    H264VulkanDecodePicture *hp = h->cur_pic_ptr->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &hp->vp;
+
+    int err = ff_vk_decode_add_slice(vp, data, size, 1,
+                                     &hp->h264_pic_info.sliceCount,
+                                     &hp->h264_pic_info.pSliceOffsets);
+    if (err < 0)
+        return err;
+
+    /* Frame is only intra of all slices are marked as intra */
+    if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI)
+        hp->h264pic.flags.is_intra = 0;
+
+    return 0;
+}
+
+static int vk_h264_end_frame(AVCodecContext *avctx)
+{
+    const H264Context *h = avctx->priv_data;
+    H264Picture *pic = h->cur_pic_ptr;
+    H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &hp->vp;
+    FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 };
+    AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 };
+
+    for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+        H264Picture *rp = hp->ref_src[i];
+        H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private;
+
+        rvp[i] = &rhp->vp;
+        rav[i] = hp->ref_src[i]->f;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n",
+           vp->slices_size, hp->h264_pic_info.sliceCount);
+
+    return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp);
+}
+
+static void vk_h264_free_frame_priv(AVCodecContext *avctx, void *data)
+{
+    H264VulkanDecodePicture *hp = data;
+
+    /* Free frame resources, this also destroys the session parameters. */
+    ff_vk_decode_free_frame(hp->ctx, &hp->vp);
+
+    /* Free frame context */
+    av_free(hp);
+}
+
+const AVHWAccel ff_h264_vulkan_hwaccel = {
+    .name                 = "h264_vulkan",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_H264,
+    .pix_fmt              = AV_PIX_FMT_VULKAN,
+    .start_frame          = &vk_h264_start_frame,
+    .decode_slice         = &vk_h264_decode_slice,
+    .end_frame            = &vk_h264_end_frame,
+    .free_frame_priv      = &vk_h264_free_frame_priv,
+    .frame_priv_data_size = sizeof(H264VulkanDecodePicture),
+    .init                 = &ff_vk_decode_init,
+    .flush                = &ff_vk_decode_flush,
+    .uninit               = &ff_vk_decode_uninit,
+    .frame_params         = &ff_vk_frame_params,
+    .priv_data_size       = sizeof(FFVulkanDecodeContext),
+    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
-- 
2.39.2


[-- Attachment #72: 0071-hevcdec-add-hwaccel_params_buf.patch --]
[-- Type: text/x-diff, Size: 2828 bytes --]

From 756f3a7daf18f402ec56a7f52ea8742d905edf18 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 17 Jan 2023 05:02:02 +0100
Subject: [PATCH 71/72] hevcdec: add hwaccel_params_buf

---
 libavcodec/hevcdec.c | 9 +++++++++
 libavcodec/hevcdec.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 7c9b46240c..5df831688c 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2969,6 +2969,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
 
     switch (s->nal_unit_type) {
     case HEVC_NAL_VPS:
+        av_buffer_unref(&s->hwaccel_params_buf);
         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
             ret = s->avctx->hwaccel->decode_params(s->avctx,
                                                    nal->type,
@@ -2982,6 +2983,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
             goto fail;
         break;
     case HEVC_NAL_SPS:
+        av_buffer_unref(&s->hwaccel_params_buf);
         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
             ret = s->avctx->hwaccel->decode_params(s->avctx,
                                                    nal->type,
@@ -2996,6 +2998,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
             goto fail;
         break;
     case HEVC_NAL_PPS:
+        av_buffer_unref(&s->hwaccel_params_buf);
         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
             ret = s->avctx->hwaccel->decode_params(s->avctx,
                                                    nal->type,
@@ -3455,6 +3458,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
 
     ff_dovi_ctx_unref(&s->dovi_ctx);
     av_buffer_unref(&s->rpu_buf);
+    av_buffer_unref(&s->hwaccel_params_buf);
 
     av_freep(&s->md5_ctx);
 
@@ -3606,6 +3610,10 @@ static int hevc_update_thread_context(AVCodecContext *dst,
     if (ret < 0)
         return ret;
 
+    ret = av_buffer_replace(&s->hwaccel_params_buf, s0->hwaccel_params_buf);
+    if (ret < 0)
+        return ret;
+
     ret = av_buffer_replace(&s->rpu_buf, s0->rpu_buf);
     if (ret < 0)
         return ret;
@@ -3683,6 +3691,7 @@ static void hevc_decode_flush(AVCodecContext *avctx)
     s->max_ra = INT_MAX;
     s->eos = 1;
 
+    av_buffer_unref(&s->hwaccel_params_buf);
     if (avctx->hwaccel->flush)
         avctx->hwaccel->flush(avctx);
 }
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 15c4113bdd..774cd95947 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -509,6 +509,8 @@ typedef struct HEVCContext {
     uint8_t *sao_pixel_buffer_h[3];
     uint8_t *sao_pixel_buffer_v[3];
 
+    AVBufferRef *hwaccel_params_buf;
+
     HEVCParamSets ps;
     HEVCSEI sei;
     struct AVMD5 *md5_ctx;
-- 
2.39.2


[-- Attachment #73: 0072-hevcdec-add-Vulkan-hwaccel.patch --]
[-- Type: text/x-diff, Size: 50457 bytes --]

From d47cb5940bc4808fea572b530eb1b9bf11159540 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 08:27:18 +0100
Subject: [PATCH 72/72] hevcdec: add Vulkan hwaccel

Thanks to Dave Airlie for figuring out a lot of the parameters.
---
 configure                |   2 +
 libavcodec/Makefile      |   1 +
 libavcodec/hevcdec.c     |  27 +-
 libavcodec/hwaccels.h    |   1 +
 libavcodec/vulkan_hevc.c | 904 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 934 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/vulkan_hevc.c

diff --git a/configure b/configure
index 60973c38b3..8f7b918565 100755
--- a/configure
+++ b/configure
@@ -3050,6 +3050,8 @@ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
 hevc_vdpau_hwaccel_select="hevc_decoder"
 hevc_videotoolbox_hwaccel_deps="videotoolbox"
 hevc_videotoolbox_hwaccel_select="hevc_decoder"
+hevc_vulkan_hwaccel_deps="vulkan"
+hevc_vulkan_hwaccel_select="hevc_decoder"
 mjpeg_nvdec_hwaccel_deps="nvdec"
 mjpeg_nvdec_hwaccel_select="mjpeg_decoder"
 mjpeg_vaapi_hwaccel_deps="vaapi"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4c9db167a5..6aa304071a 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -999,6 +999,7 @@ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
 OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec.o
 OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
 OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o h265_profile_level.o
+OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL)        += vulkan_decode.o vulkan_hevc.o
 OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
 OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL)        += vaapi_mjpeg.o
 OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL)        += nvdec_mpeg12.o
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 5df831688c..0ad6418f8d 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -405,7 +405,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
                      CONFIG_HEVC_NVDEC_HWACCEL + \
                      CONFIG_HEVC_VAAPI_HWACCEL + \
                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
-                     CONFIG_HEVC_VDPAU_HWACCEL)
+                     CONFIG_HEVC_VDPAU_HWACCEL + \
+                     CONFIG_HEVC_VULKAN_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 
     switch (sps->pix_fmt) {
@@ -429,6 +430,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
 #endif
         break;
     case AV_PIX_FMT_YUV420P10:
@@ -445,6 +449,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 #endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
 #if CONFIG_HEVC_VDPAU_HWACCEL
         *fmt++ = AV_PIX_FMT_VDPAU;
 #endif
@@ -464,6 +471,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
 #endif
         break;
     case AV_PIX_FMT_YUV422P:
@@ -473,11 +483,17 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #endif
 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
 #endif
         break;
     case AV_PIX_FMT_YUV444P10:
 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
 #endif
     case AV_PIX_FMT_YUV420P12:
     case AV_PIX_FMT_YUV444P12:
@@ -487,6 +503,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 #if CONFIG_HEVC_VDPAU_HWACCEL
         *fmt++ = AV_PIX_FMT_VDPAU;
 #endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
 #if CONFIG_HEVC_NVDEC_HWACCEL
         *fmt++ = AV_PIX_FMT_CUDA;
 #endif
@@ -494,6 +513,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
     case AV_PIX_FMT_YUV422P12:
 #if CONFIG_HEVC_VAAPI_HWACCEL
        *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+        *fmt++ = AV_PIX_FMT_VULKAN;
 #endif
         break;
     }
@@ -3752,6 +3774,9 @@ const FFCodec ff_hevc_decoder = {
 #endif
 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
                                HWACCEL_VIDEOTOOLBOX(hevc),
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+                               HWACCEL_VULKAN(hevc),
 #endif
                                NULL
                            },
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 23d0843c76..a7c74d07cb 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -44,6 +44,7 @@ extern const AVHWAccel ff_hevc_nvdec_hwaccel;
 extern const AVHWAccel ff_hevc_vaapi_hwaccel;
 extern const AVHWAccel ff_hevc_vdpau_hwaccel;
 extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
+extern const AVHWAccel ff_hevc_vulkan_hwaccel;
 extern const AVHWAccel ff_mjpeg_nvdec_hwaccel;
 extern const AVHWAccel ff_mjpeg_vaapi_hwaccel;
 extern const AVHWAccel ff_mpeg1_nvdec_hwaccel;
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
new file mode 100644
index 0000000000..f4991d8f82
--- /dev/null
+++ b/libavcodec/vulkan_hevc.c
@@ -0,0 +1,904 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "hevcdec.h"
+#include "hevc_ps.h"
+
+#include "vulkan_decode.h"
+
+const VkExtensionProperties ff_vk_dec_hevc_ext = {
+    .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME,
+    .specVersion   = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION,
+};
+
+typedef struct HEVCHeaderSPS {
+    StdVideoH265ScalingLists scaling;
+    StdVideoH265HrdParameters vui_header;
+    StdVideoH265SequenceParameterSetVui vui;
+    StdVideoH265ProfileTierLevel ptl;
+    StdVideoH265DecPicBufMgr dpbm;
+    StdVideoH265PredictorPaletteEntries pal;
+    StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS];
+    StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS];
+    StdVideoH265ShortTermRefPicSet str[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
+    StdVideoH265LongTermRefPicsSps ltr[HEVC_MAX_LONG_TERM_REF_PICS];
+} HEVCHeaderSPS;
+
+typedef struct HEVCHeaderPPS {
+    StdVideoH265ScalingLists scaling;
+    StdVideoH265PredictorPaletteEntries pal;
+} HEVCHeaderPPS;
+
+typedef struct HEVCHeaderVPSSet {
+    StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS];
+    StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS];
+} HEVCHeaderVPSSet;
+
+typedef struct HEVCHeaderVPS {
+    StdVideoH265ProfileTierLevel ptl;
+    StdVideoH265DecPicBufMgr dpbm;
+    StdVideoH265HrdParameters hdr[HEVC_MAX_LAYER_SETS];
+    HEVCHeaderVPSSet sls[];
+} HEVCHeaderVPS;
+
+typedef struct HEVCHeaderSet {
+    StdVideoH265SequenceParameterSet sps[HEVC_MAX_SPS_COUNT];
+    HEVCHeaderSPS hsps[HEVC_MAX_SPS_COUNT];
+
+    StdVideoH265PictureParameterSet pps[HEVC_MAX_PPS_COUNT];
+    HEVCHeaderPPS hpps[HEVC_MAX_PPS_COUNT];
+
+    StdVideoH265VideoParameterSet vps[HEVC_MAX_PPS_COUNT];
+    HEVCHeaderVPS hvps[];
+} HEVCHeaderSet;
+
+static int get_data_set_buf(FFVulkanDecodeContext *s, AVBufferRef **data_buf,
+                            int nb_vps, AVBufferRef * const vps_list[HEVC_MAX_VPS_COUNT])
+{
+    size_t buf_size = sizeof(HEVCHeaderSPS)*HEVC_MAX_SPS_COUNT +
+                      sizeof(HEVCHeaderPPS)*HEVC_MAX_PPS_COUNT +
+                      sizeof(StdVideoH265SequenceParameterSet)*HEVC_MAX_SPS_COUNT +
+                      sizeof(StdVideoH265PictureParameterSet)*HEVC_MAX_PPS_COUNT +
+                      sizeof(StdVideoH265VideoParameterSet)*HEVC_MAX_VPS_COUNT;
+
+    buf_size += (sizeof(StdVideoH265ProfileTierLevel) +
+                 sizeof(StdVideoH265DecPicBufMgr) +
+                 sizeof(StdVideoH265HrdParameters)*HEVC_MAX_LAYER_SETS)*nb_vps;
+
+    for (int i = 0; i < nb_vps; i++) {
+        const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data;
+        buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters;
+    }
+
+    if (buf_size > s->tmp_pool_ele_size) {
+        av_buffer_pool_uninit(&s->tmp_pool);
+        s->tmp_pool_ele_size = 0;
+        s->tmp_pool = av_buffer_pool_init(buf_size, NULL);
+        if (!s->tmp_pool)
+            return AVERROR(ENOMEM);
+        s->tmp_pool_ele_size = buf_size;
+    }
+
+    *data_buf = av_buffer_pool_get(s->tmp_pool);
+    if (!(*data_buf))
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+typedef struct HEVCVulkanDecodePicture {
+    FFVulkanDecodeContext          *ctx;
+    FFVulkanDecodePicture           vp;
+
+    /* Current picture */
+    StdVideoDecodeH265ReferenceInfo h265_ref;
+    VkVideoDecodeH265DpbSlotInfoKHR vkh265_ref;
+
+    /* Picture refs */
+    HEVCFrame                      *ref_src    [HEVC_MAX_REFS];
+    StdVideoDecodeH265ReferenceInfo h265_refs  [HEVC_MAX_REFS];
+    VkVideoDecodeH265DpbSlotInfoKHR vkh265_refs[HEVC_MAX_REFS];
+
+    /* Current picture (contd.) */
+    StdVideoDecodeH265PictureInfo   h265pic;
+    VkVideoDecodeH265PictureInfoKHR h265_pic_info;
+} HEVCVulkanDecodePicture;
+
+static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
+                             VkVideoReferenceSlotInfoKHR *ref_slot,       /* Main structure */
+                             VkVideoPictureResourceInfoKHR *ref,          /* Goes in ^ */
+                             VkVideoDecodeH265DpbSlotInfoKHR *vkh265_ref, /* Goes in ^ */
+                             StdVideoDecodeH265ReferenceInfo *h265_ref,   /* Goes in ^ */
+                             HEVCFrame *pic, int is_current, int pic_id)
+{
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vkpic = &hp->vp;
+
+    int err = ff_vk_decode_prepare_frame(ctx, pic->frame, vkpic, is_current,
+                                         ctx->dedicated_dpb);
+    if (err < 0)
+        return err;
+
+    *h265_ref = (StdVideoDecodeH265ReferenceInfo) {
+        .flags = (StdVideoDecodeH265ReferenceInfoFlags) {
+            .used_for_long_term_reference = pic->flags & HEVC_FRAME_FLAG_LONG_REF,
+            .unused_for_reference = 0,
+        },
+        .PicOrderCntVal = pic->poc,
+    };
+
+    *vkh265_ref = (VkVideoDecodeH265DpbSlotInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR,
+        .pStdReferenceInfo = h265_ref,
+    };
+
+    *ref = (VkVideoPictureResourceInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+        .codedOffset = (VkOffset2D){ 0, 0 },
+        .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height },
+        .baseArrayLayer = ctx->layered_dpb ? pic_id : 0,
+        .imageViewBinding = vkpic->img_view_ref,
+    };
+
+    *ref_slot = (VkVideoReferenceSlotInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+        .pNext = vkh265_ref,
+        .slotIndex = pic_id,
+        .pPictureResource = ref,
+    };
+
+    if (ref_src)
+        *ref_src = pic;
+
+    return 0;
+}
+
+static void set_sps(const HEVCSPS *sps, int sps_idx,
+                    StdVideoH265ScalingLists *vksps_scaling,
+                    StdVideoH265HrdParameters *vksps_vui_header,
+                    StdVideoH265SequenceParameterSetVui *vksps_vui,
+                    StdVideoH265SequenceParameterSet *vksps,
+                    StdVideoH265SubLayerHrdParameters *slhdrnal,
+                    StdVideoH265SubLayerHrdParameters *slhdrvcl,
+                    StdVideoH265ProfileTierLevel *ptl,
+                    StdVideoH265DecPicBufMgr *dpbm,
+                    StdVideoH265PredictorPaletteEntries *pal,
+                    StdVideoH265ShortTermRefPicSet *str,
+                    StdVideoH265LongTermRefPicsSps *ltr)
+{
+    for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++)
+        memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_list.sl[0][i],
+               STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList4x4));
+
+    for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++)
+        memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_list.sl[1][i],
+               STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList8x8));
+
+    for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++)
+        memcpy(vksps_scaling->ScalingList16x16[i], sps->scaling_list.sl[2][i],
+               STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList16x16));
+
+    for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++)
+        memcpy(vksps_scaling->ScalingList32x32[i], sps->scaling_list.sl[3][i],
+               STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList32x32));
+
+    memcpy(vksps_scaling->ScalingListDCCoef16x16, sps->scaling_list.sl_dc[0],
+           STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef16x16));
+
+    memcpy(vksps_scaling->ScalingListDCCoef32x32, sps->scaling_list.sl_dc[1],
+           STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef32x32));
+
+    *vksps_vui_header = (StdVideoH265HrdParameters) {
+        .flags = (StdVideoH265HrdFlags) {
+            .nal_hrd_parameters_present_flag = sps->hdr.flags.nal_hrd_parameters_present_flag,
+            .vcl_hrd_parameters_present_flag = sps->hdr.flags.vcl_hrd_parameters_present_flag,
+            .sub_pic_hrd_params_present_flag = sps->hdr.flags.sub_pic_hrd_params_present_flag,
+            .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->hdr.flags.sub_pic_cpb_params_in_pic_timing_sei_flag,
+            .fixed_pic_rate_general_flag = sps->hdr.flags.fixed_pic_rate_general_flag,
+            .fixed_pic_rate_within_cvs_flag = sps->hdr.flags.fixed_pic_rate_within_cvs_flag,
+            .low_delay_hrd_flag = sps->hdr.flags.low_delay_hrd_flag,
+        },
+        .tick_divisor_minus2 = sps->hdr.tick_divisor_minus2,
+        .du_cpb_removal_delay_increment_length_minus1 = sps->hdr.du_cpb_removal_delay_increment_length_minus1,
+        .dpb_output_delay_du_length_minus1 = sps->hdr.dpb_output_delay_du_length_minus1,
+        .bit_rate_scale = sps->hdr.bit_rate_scale,
+        .cpb_size_scale = sps->hdr.cpb_size_scale,
+        .cpb_size_du_scale = sps->hdr.cpb_size_du_scale,
+        .initial_cpb_removal_delay_length_minus1 = sps->hdr.initial_cpb_removal_delay_length_minus1,
+        .au_cpb_removal_delay_length_minus1 = sps->hdr.au_cpb_removal_delay_length_minus1,
+        .dpb_output_delay_length_minus1 = sps->hdr.dpb_output_delay_length_minus1,
+        /* Reserved - 3*16 bits */
+        .pSubLayerHrdParametersNal = slhdrnal,
+        .pSubLayerHrdParametersNal = slhdrvcl,
+    };
+
+    memcpy(vksps_vui_header->cpb_cnt_minus1, sps->hdr.cpb_cnt_minus1,
+           STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->cpb_cnt_minus1));
+    memcpy(vksps_vui_header->elemental_duration_in_tc_minus1, sps->hdr.elemental_duration_in_tc_minus1,
+           STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->elemental_duration_in_tc_minus1));
+
+    memcpy(slhdrnal, sps->hdr.nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrnal));
+    memcpy(slhdrvcl, sps->hdr.vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrvcl));
+
+    *vksps_vui = (StdVideoH265SequenceParameterSetVui) {
+        .flags = (StdVideoH265SpsVuiFlags) {
+            .aspect_ratio_info_present_flag = sps->vui.common.aspect_ratio_info_present_flag,
+            .overscan_info_present_flag = sps->vui.common.overscan_info_present_flag,
+            .overscan_appropriate_flag = sps->vui.common.overscan_appropriate_flag,
+            .video_signal_type_present_flag = sps->vui.common.video_signal_type_present_flag,
+            .video_full_range_flag = sps->vui.common.video_full_range_flag,
+            .colour_description_present_flag = sps->vui.common.colour_description_present_flag,
+            .chroma_loc_info_present_flag = sps->vui.common.chroma_loc_info_present_flag,
+            .neutral_chroma_indication_flag = sps->vui.neutra_chroma_indication_flag,
+            .field_seq_flag = sps->vui.field_seq_flag,
+            .frame_field_info_present_flag = sps->vui.frame_field_info_present_flag,
+            .default_display_window_flag = sps->vui.default_display_window_flag,
+            .vui_timing_info_present_flag = sps->vui.vui_timing_info_present_flag,
+            .vui_poc_proportional_to_timing_flag = sps->vui.vui_poc_proportional_to_timing_flag,
+            .vui_hrd_parameters_present_flag = sps->vui.vui_hrd_parameters_present_flag,
+            .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag,
+            .tiles_fixed_structure_flag = sps->vui.tiles_fixed_structure_flag,
+            .motion_vectors_over_pic_boundaries_flag = sps->vui.motion_vectors_over_pic_boundaries_flag,
+            .restricted_ref_pic_lists_flag = sps->vui.restricted_ref_pic_lists_flag,
+        },
+        .aspect_ratio_idc = sps->vui.common.aspect_ratio_idc,
+        .sar_width = sps->vui.common.sar.num,
+        .sar_height = sps->vui.common.sar.den,
+        .video_format = sps->vui.common.video_format,
+        .colour_primaries = sps->vui.common.colour_primaries,
+        .transfer_characteristics = sps->vui.common.transfer_characteristics,
+        .matrix_coeffs = sps->vui.common.matrix_coeffs,
+        .chroma_sample_loc_type_top_field = sps->vui.common.chroma_sample_loc_type_top_field,
+        .chroma_sample_loc_type_bottom_field = sps->vui.common.chroma_sample_loc_type_bottom_field,
+        /* Reserved */
+        /* Reserved */
+        .def_disp_win_left_offset = sps->vui.def_disp_win.left_offset,
+        .def_disp_win_right_offset = sps->vui.def_disp_win.right_offset,
+        .def_disp_win_top_offset = sps->vui.def_disp_win.top_offset,
+        .def_disp_win_bottom_offset = sps->vui.def_disp_win.bottom_offset,
+        .vui_num_units_in_tick = sps->vui.vui_num_units_in_tick,
+        .vui_time_scale = sps->vui.vui_time_scale,
+        .vui_num_ticks_poc_diff_one_minus1 = sps->vui.vui_num_ticks_poc_diff_one_minus1,
+        .min_spatial_segmentation_idc = sps->vui.min_spatial_segmentation_idc,
+        .max_bytes_per_pic_denom = sps->vui.max_bytes_per_pic_denom,
+        .max_bits_per_min_cu_denom = sps->vui.max_bits_per_min_cu_denom,
+        .log2_max_mv_length_horizontal = sps->vui.log2_max_mv_length_horizontal,
+        .log2_max_mv_length_vertical = sps->vui.log2_max_mv_length_vertical,
+        .pHrdParameters = vksps_vui_header,
+    };
+
+    *ptl = (StdVideoH265ProfileTierLevel) {
+        .flags = (StdVideoH265ProfileTierLevelFlags) {
+            .general_tier_flag = sps->ptl.general_ptl.tier_flag,
+            .general_progressive_source_flag = sps->ptl.general_ptl.progressive_source_flag,
+            .general_interlaced_source_flag = sps->ptl.general_ptl.interlaced_source_flag,
+            .general_non_packed_constraint_flag = sps->ptl.general_ptl.non_packed_constraint_flag,
+            .general_frame_only_constraint_flag = sps->ptl.general_ptl.frame_only_constraint_flag,
+        },
+        .general_profile_idc = sps->ptl.general_ptl.profile_idc,
+        .general_level_idc = sps->ptl.general_ptl.level_idc,
+    };
+
+    for (int i = 0; i < sps->max_sub_layers; i++) {
+        dpbm->max_latency_increase_plus1[i] = sps->temporal_layer[i].max_latency_increase + 1;
+        dpbm->max_dec_pic_buffering_minus1[i] = sps->temporal_layer[i].max_dec_pic_buffering - 1;
+        dpbm->max_num_reorder_pics[i] = sps->temporal_layer[i].num_reorder_pics;
+    }
+
+    for (int i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++)
+        for (int j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++)
+            pal->PredictorPaletteEntries[i][j] = sps->palette_predictor_initializers[i][j];
+
+    for (int i = 0; i < sps->nb_st_rps; i++) {
+        str[i] = (StdVideoH265ShortTermRefPicSet) {
+            .flags = (StdVideoH265ShortTermRefPicSetFlags) {
+                .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict,
+                .delta_rps_sign = sps->st_rps[i].delta_rps_sign,
+            },
+            .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1,
+            .use_delta_flag = sps->st_rps[i].use_delta_flag,
+            .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1,
+            /* Spec fucked this up
+            .used_by_curr_pic_flag =
+            .used_by_curr_pic_s0_flag =
+            .used_by_curr_pic_s1_flag =
+            */
+            /* Reserved */
+            /* Reserved */
+            /* Reserved */
+            .num_negative_pics = sps->st_rps[i].num_negative_pics,
+            .num_positive_pics = sps->st_rps[i].num_delta_pocs - sps->st_rps[i].num_negative_pics,
+        };
+
+        for (int j = 0; j < str[i].num_negative_pics; j++)
+            str[i].delta_poc_s0_minus1[j] = sps->st_rps[i].delta_poc_s0[j] - 1;
+
+        for (int j = 0; j < str[i].num_positive_pics; j++)
+            str[i].delta_poc_s1_minus1[j] = sps->st_rps[i].delta_poc_s1[j] - 1;
+    }
+
+    for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) {
+        ltr[i] = (StdVideoH265LongTermRefPicsSps) {
+            .used_by_curr_pic_lt_sps_flag = sps->used_by_curr_pic_lt_sps_flag[i],
+            /* Spec fucked this up too*/
+            .lt_ref_pic_poc_lsb_sps[0] = sps->lt_ref_pic_poc_lsb_sps[i],
+        };
+    }
+
+    *vksps = (StdVideoH265SequenceParameterSet) {
+        .flags = (StdVideoH265SpsFlags) {
+            .sps_temporal_id_nesting_flag = sps->temporal_id_nesting_flag,
+            .separate_colour_plane_flag = sps->separate_colour_plane_flag,
+            .conformance_window_flag = sps->conformance_window_flag,
+            .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info_flag,
+            .scaling_list_enabled_flag = sps->scaling_list_enable_flag,
+            .sps_scaling_list_data_present_flag = sps->scaling_list_data_present_flag,
+            .amp_enabled_flag = sps->amp_enabled_flag,
+            .sample_adaptive_offset_enabled_flag = sps->sao_enabled,
+            .pcm_enabled_flag = sps->pcm_enabled_flag,
+            .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag,
+            .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag,
+            .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag,
+            .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag,
+            .vui_parameters_present_flag = sps->vui_present,
+            .sps_extension_present_flag = sps->sps_extension_present_flag,
+            .sps_range_extension_flag = sps->sps_range_extension_flag,
+            .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag,
+            .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag,
+            .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag,
+            .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag,
+            .extended_precision_processing_flag = sps->extended_precision_processing_flag,
+            .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag,
+            .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag,
+            .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag,
+            .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag,
+            .sps_scc_extension_flag = sps->sps_scc_extension_flag,
+            .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag,
+            .palette_mode_enabled_flag = sps->palette_mode_enabled_flag,
+            .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializer_present_flag,
+            .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disable_flag,
+        },
+        .chroma_format_idc = sps->chroma_format_idc,
+        .pic_width_in_luma_samples = sps->width,
+        .pic_height_in_luma_samples = sps->height,
+        .sps_video_parameter_set_id = sps->vps_id,
+        .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
+        .sps_seq_parameter_set_id = sps_idx,
+        .bit_depth_luma_minus8 = sps->bit_depth - 8,
+        .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8,
+        .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
+        .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
+        .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
+        .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
+        .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_transform_block_size,
+        .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
+        .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
+        .num_short_term_ref_pic_sets = sps->nb_st_rps,
+        .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
+        .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
+        .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
+        .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
+        .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
+        /* Reserved */
+        /* Reserved */
+        .palette_max_size = sps->palette_max_size,
+        .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size,
+        .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc,
+        .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializer_minus1,
+        .conf_win_left_offset = sps->pic_conf_win.left_offset,
+        .conf_win_right_offset = sps->pic_conf_win.right_offset,
+        .conf_win_top_offset = sps->pic_conf_win.top_offset,
+        .conf_win_bottom_offset = sps->pic_conf_win.bottom_offset,
+        .pProfileTierLevel = ptl,
+        .pDecPicBufMgr = dpbm,
+        .pScalingLists = vksps_scaling,
+        .pShortTermRefPicSet = str,
+        .pLongTermRefPicsSps = ltr,
+        .pSequenceParameterSetVui = vksps_vui,
+        .pPredictorPaletteEntries = pal,
+    };
+}
+
+static void set_pps(const HEVCPPS *pps, const HEVCSPS *sps,
+                    StdVideoH265ScalingLists *vkpps_scaling,
+                    StdVideoH265PictureParameterSet *vkpps,
+                    StdVideoH265PredictorPaletteEntries *pal)
+{
+    for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++)
+        memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_list.sl[0][i],
+               STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList4x4));
+
+    for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++)
+        memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_list.sl[1][i],
+               STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList8x8));
+
+    for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++)
+        memcpy(vkpps_scaling->ScalingList16x16[i], pps->scaling_list.sl[2][i],
+               STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList16x16));
+
+    for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++)
+        memcpy(vkpps_scaling->ScalingList32x32[i], pps->scaling_list.sl[3][i],
+               STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList32x32));
+
+    memcpy(vkpps_scaling->ScalingListDCCoef16x16, pps->scaling_list.sl_dc[0],
+           STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef16x16));
+
+    memcpy(vkpps_scaling->ScalingListDCCoef32x32, pps->scaling_list.sl_dc[1],
+           STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef32x32));
+
+    *vkpps = (StdVideoH265PictureParameterSet) {
+        .flags = (StdVideoH265PpsFlags) {
+            .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag,
+            .output_flag_present_flag = pps->output_flag_present_flag,
+            .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag,
+            .cabac_init_present_flag = pps->cabac_init_present_flag,
+            .constrained_intra_pred_flag = pps->constrained_intra_pred_flag,
+            .transform_skip_enabled_flag = pps->transform_skip_enabled_flag,
+            .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag,
+            .pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag,
+            .weighted_pred_flag = pps->weighted_pred_flag,
+            .weighted_bipred_flag = pps->weighted_bipred_flag,
+            .transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag,
+            .tiles_enabled_flag = pps->tiles_enabled_flag,
+            .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag,
+            .uniform_spacing_flag = pps->uniform_spacing_flag,
+            .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag,
+            .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag,
+            .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag,
+            .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag,
+            .pps_deblocking_filter_disabled_flag = pps->disable_dbf,
+            .pps_scaling_list_data_present_flag = pps->scaling_list_data_present_flag,
+            .lists_modification_present_flag = pps->lists_modification_present_flag,
+            .slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag,
+            .pps_extension_present_flag = pps->pps_extension_present_flag,
+            .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag,
+            .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag,
+            .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag,
+            .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag,
+            .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag,
+            .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializer_present_flag,
+            .monochrome_palette_flag = pps->monochrome_palette_flag,
+            .pps_range_extension_flag = pps->pps_range_extensions_flag,
+        },
+        .pps_pic_parameter_set_id = pps->pps_id,
+        .pps_seq_parameter_set_id = pps->sps_id,
+        .sps_video_parameter_set_id = sps->vps_id,
+        .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
+        .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1,
+        .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1,
+        .init_qp_minus26 = pps->pic_init_qp_minus26,
+        .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
+        .pps_cb_qp_offset = pps->cb_qp_offset,
+        .pps_cr_qp_offset = pps->cr_qp_offset,
+        .pps_beta_offset_div2 = pps->beta_offset >> 1,
+        .pps_tc_offset_div2 = pps->tc_offset >> 1,
+        .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
+        .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2,
+        .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth,
+        .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1,
+        .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma,
+        .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma,
+        .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset_plus5,
+        .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset_plus5,
+        .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset_plus3,
+        .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializer,
+        .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry_minus8,
+        .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry_minus8,
+        .num_tile_columns_minus1 = pps->num_tile_columns - 1,
+        .num_tile_rows_minus1 = pps->num_tile_rows - 1,
+        .pScalingLists = vkpps_scaling,
+        .pPredictorPaletteEntries = pal,
+    };
+
+    for (int i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) {
+        for (int j = 0; j < pps->pps_num_palette_predictor_initializer; j++)
+            pal->PredictorPaletteEntries[i][j] = pps->palette_predictor_initializers[i][j];
+    }
+
+    for (int i = 0; i < pps->num_tile_columns - 1; i++)
+        vkpps->column_width_minus1[i] = pps->column_width[i] - 1;
+
+    for (int i = 0; i < pps->num_tile_rows - 1; i++)
+        vkpps->row_height_minus1[i] = pps->row_height[i] - 1;
+
+    for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
+        vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i];
+        vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i];
+    }
+}
+
+static void set_vps(const HEVCVPS *vps,
+                    StdVideoH265VideoParameterSet *vkvps,
+                    StdVideoH265ProfileTierLevel *ptl,
+                    StdVideoH265DecPicBufMgr *dpbm,
+                    StdVideoH265HrdParameters *sls_hdr,
+                    HEVCHeaderVPSSet sls[])
+{
+    for (int i = 0; i < vps->vps_num_hrd_parameters; i++) {
+        const HEVCHdrParams *src = &vps->hdr[i];
+
+        sls_hdr[i] = (StdVideoH265HrdParameters) {
+            .flags = (StdVideoH265HrdFlags) {
+                .nal_hrd_parameters_present_flag = src->flags.nal_hrd_parameters_present_flag,
+                .vcl_hrd_parameters_present_flag = src->flags.vcl_hrd_parameters_present_flag,
+                .sub_pic_hrd_params_present_flag = src->flags.sub_pic_hrd_params_present_flag,
+                .sub_pic_cpb_params_in_pic_timing_sei_flag = src->flags.sub_pic_cpb_params_in_pic_timing_sei_flag,
+                .fixed_pic_rate_general_flag = src->flags.fixed_pic_rate_general_flag,
+                .fixed_pic_rate_within_cvs_flag = src->flags.fixed_pic_rate_within_cvs_flag,
+                .low_delay_hrd_flag = src->flags.low_delay_hrd_flag,
+            },
+            .tick_divisor_minus2 = src->tick_divisor_minus2,
+            .du_cpb_removal_delay_increment_length_minus1 = src->du_cpb_removal_delay_increment_length_minus1,
+            .dpb_output_delay_du_length_minus1 = src->dpb_output_delay_du_length_minus1,
+            .bit_rate_scale = src->bit_rate_scale,
+            .cpb_size_scale = src->cpb_size_scale,
+            .cpb_size_du_scale = src->cpb_size_du_scale,
+            .initial_cpb_removal_delay_length_minus1 = src->initial_cpb_removal_delay_length_minus1,
+            .au_cpb_removal_delay_length_minus1 = src->au_cpb_removal_delay_length_minus1,
+            .dpb_output_delay_length_minus1 = src->dpb_output_delay_length_minus1,
+            /* Reserved - 3*16 bits */
+            .pSubLayerHrdParametersNal = sls[i].nal_hdr,
+            .pSubLayerHrdParametersNal = sls[i].vcl_hdr,
+        };
+
+        memcpy(sls_hdr[i].cpb_cnt_minus1, src->cpb_cnt_minus1,
+               STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].cpb_cnt_minus1));
+        memcpy(sls_hdr[i].elemental_duration_in_tc_minus1, src->elemental_duration_in_tc_minus1,
+               STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].elemental_duration_in_tc_minus1));
+
+        memcpy(sls[i].nal_hdr, src->nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].nal_hdr));
+        memcpy(sls[i].vcl_hdr, src->vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].vcl_hdr));
+    }
+
+    *ptl = (StdVideoH265ProfileTierLevel) {
+        .flags = (StdVideoH265ProfileTierLevelFlags) {
+            .general_tier_flag = vps->ptl.general_ptl.tier_flag,
+            .general_progressive_source_flag = vps->ptl.general_ptl.progressive_source_flag,
+            .general_interlaced_source_flag = vps->ptl.general_ptl.interlaced_source_flag,
+            .general_non_packed_constraint_flag = vps->ptl.general_ptl.non_packed_constraint_flag,
+            .general_frame_only_constraint_flag = vps->ptl.general_ptl.frame_only_constraint_flag,
+        },
+        .general_profile_idc = vps->ptl.general_ptl.profile_idc,
+        .general_level_idc = vps->ptl.general_ptl.level_idc,
+    };
+
+    for (int i = 0; i < vps->vps_max_sub_layers; i++) {
+        dpbm->max_latency_increase_plus1[i] = vps->vps_max_latency_increase[i] + 1;
+        dpbm->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering[i] - 1;
+        dpbm->max_num_reorder_pics[i] = vps->vps_num_reorder_pics[i];
+    }
+
+    *vkvps = (StdVideoH265VideoParameterSet) {
+        .flags = (StdVideoH265VpsFlags) {
+            .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag,
+            .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag,
+            .vps_timing_info_present_flag = vps->vps_timing_info_present_flag,
+            .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag,
+        },
+        .vps_video_parameter_set_id = vps->vps_id,
+        .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers - 1,
+        /* Reserved */
+        /* Reserved */
+        .vps_num_units_in_tick = vps->vps_num_units_in_tick,
+        .vps_time_scale = vps->vps_time_scale,
+        .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one - 1,
+        /* Reserved */
+        .pDecPicBufMgr = dpbm,
+        .pHrdParameters = sls_hdr,
+        .pProfileTierLevel = ptl,
+    };
+}
+
+static int vk_hevc_create_params(AVCodecContext *avctx, AVBufferRef **buf)
+{
+    int err;
+    VkResult ret;
+    const HEVCContext *h = avctx->priv_data;
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    VkVideoDecodeH265SessionParametersAddInfoKHR h265_params_info = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR,
+        .stdSPSCount = 0,
+        .stdPPSCount = 0,
+        .stdVPSCount = 0,
+    };
+    VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR,
+        .pParametersAddInfo = &h265_params_info,
+    };
+    VkVideoSessionParametersCreateInfoKHR session_params_create = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+        .pNext = &h265_params,
+        .videoSession = ctx->common.session,
+        .videoSessionParametersTemplate = NULL,
+    };
+
+    int nb_vps = 0;
+    AVBufferRef *data_set;
+    HEVCHeaderSet *hdr;
+
+    AVBufferRef *tmp;
+    VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par));
+    if (!par)
+        return AVERROR(ENOMEM);
+
+    for (int i = 0; h->ps.vps_list[i]; i++)
+        nb_vps++;
+
+    err = get_data_set_buf(ctx, &data_set, nb_vps, h->ps.vps_list);
+    if (err < 0)
+        return err;
+
+    hdr = (HEVCHeaderSet *)data_set->data;
+
+    h265_params_info.pStdSPSs = hdr->sps;
+    h265_params_info.pStdPPSs = hdr->pps;
+    h265_params_info.pStdVPSs = hdr->vps;
+
+    /* SPS list */
+    for (int i = 0; h->ps.sps_list[i]; i++) {
+        const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[i]->data;
+        set_sps(sps_l, i, &hdr->hsps[i].scaling, &hdr->hsps[i].vui_header,
+                &hdr->hsps[i].vui, &hdr->sps[i], hdr->hsps[i].nal_hdr,
+                hdr->hsps[i].vcl_hdr, &hdr->hsps[i].ptl, &hdr->hsps[i].dpbm,
+                &hdr->hsps[i].pal, hdr->hsps[i].str, hdr->hsps[i].ltr);
+        h265_params_info.stdSPSCount++;
+    }
+
+    /* PPS list */
+    for (int i = 0; h->ps.pps_list[i]; i++) {
+        const HEVCPPS *pps_l = (const HEVCPPS *)h->ps.pps_list[i]->data;
+        const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[pps_l->sps_id]->data;
+        set_pps(pps_l, sps_l, &hdr->hpps[i].scaling, &hdr->pps[i], &hdr->hpps[i].pal);
+        h265_params_info.stdPPSCount++;
+    }
+
+    /* VPS list */
+    for (int i = 0; i < nb_vps; i++) {
+        const HEVCVPS *vps_l = (const HEVCVPS *)h->ps.vps_list[i]->data;
+        set_vps(vps_l, &hdr->vps[i], &hdr->hvps[i].ptl, &hdr->hvps[i].dpbm,
+                hdr->hvps[i].hdr, hdr->hvps[i].sls);
+        h265_params_info.stdVPSCount++;
+    }
+
+    h265_params.maxStdSPSCount = h265_params_info.stdSPSCount;
+    h265_params.maxStdPPSCount = h265_params_info.stdPPSCount;
+    h265_params.maxStdVPSCount = h265_params_info.stdVPSCount;
+
+    /* Create session parameters */
+    ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create,
+                                              ctx->s.hwctx->alloc, par);
+    av_buffer_unref(&data_set);
+    if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params,
+                           ctx, 0);
+    if (!tmp) {
+        ff_vk_decode_free_params(ctx, (uint8_t *)par);
+        return AVERROR(ENOMEM);
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS %i VPS\n",
+           h265_params_info.stdSPSCount, h265_params_info.stdPPSCount,
+           h265_params_info.stdVPSCount);
+
+    *buf = tmp;
+
+    return 0;
+}
+
+static int vk_hevc_start_frame(AVCodecContext          *avctx,
+                               av_unused const uint8_t *buffer,
+                               av_unused uint32_t       size)
+{
+    int err;
+    HEVCContext *h = avctx->priv_data;
+    HEVCFrame *pic = h->ref;
+    FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+    HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &hp->vp;
+    const HEVCSPS *sps = h->ps.sps;
+    const HEVCPPS *pps = h->ps.pps;
+    int nb_refs = 0;
+
+    if (!h->hwaccel_params_buf) {
+        err = vk_hevc_create_params(avctx, &h->hwaccel_params_buf);
+        if (err < 0)
+            return err;
+    }
+
+    vp->session_params = av_buffer_ref(h->hwaccel_params_buf);
+    if (!vp->session_params)
+        return AVERROR(ENOMEM);
+
+    hp->h265pic = (StdVideoDecodeH265PictureInfo) {
+        .flags = (StdVideoDecodeH265PictureInfoFlags) {
+            .IrapPicFlag = IS_IRAP(h),
+            .IdrPicFlag = IS_IDR(h),
+            .IsReference = h->nal_unit_type < 16 ? h->nal_unit_type & 1 : 1,
+            .short_term_ref_pic_set_sps_flag = h->sh.short_term_ref_pic_set_sps_flag,
+        },
+        .sps_video_parameter_set_id = sps->vps_id,
+        .pps_seq_parameter_set_id = pps->sps_id,
+        .pps_pic_parameter_set_id = pps->pps_id,
+        .NumDeltaPocsOfRefRpsIdx = h->sh.short_term_rps ? h->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
+        .PicOrderCntVal = h->poc,
+        .NumBitsForSTRefPicSetInSlice = !h->sh.short_term_ref_pic_set_sps_flag ?
+                                         h->sh.bits_used_for_short_term_rps : 0,
+    };
+
+    /* Fill in references */
+    for (int i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
+        const HEVCFrame *ref = &h->DPB[i];
+        int idx = nb_refs;
+
+        if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF)))
+            continue;
+
+        if (ref == pic) {
+            err = vk_hevc_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref,
+                                    &hp->vkh265_ref, &hp->h265_ref, pic, 1, i);
+            if (err < 0)
+                return err;
+
+            continue;
+        }
+
+        err = vk_hevc_fill_pict(avctx, &hp->ref_src[idx], &vp->ref_slots[idx],
+                                &vp->refs[idx], &hp->vkh265_refs[idx],
+                                &hp->h265_refs[idx], (HEVCFrame *)ref, 0, i);
+        if (err < 0)
+            return err;
+
+        nb_refs++;
+    }
+
+    memset(hp->h265pic.RefPicSetStCurrBefore, 0xff, 8);
+    for (int i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
+        HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i];
+        for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+            const HEVCFrame *ref = &h->DPB[j];
+            if (ref == frame) {
+                hp->h265pic.RefPicSetStCurrBefore[i] = j;
+                break;
+            }
+        }
+    }
+    memset(hp->h265pic.RefPicSetStCurrAfter, 0xff, 8);
+    for (int i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
+        HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i];
+        for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+            const HEVCFrame *ref = &h->DPB[j];
+            if (ref == frame) {
+                hp->h265pic.RefPicSetStCurrAfter[i] = j;
+                break;
+            }
+        }
+    }
+    memset(hp->h265pic.RefPicSetLtCurr, 0xff, 8);
+    for (int i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
+        HEVCFrame *frame = h->rps[LT_CURR].ref[i];
+        for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+            const HEVCFrame *ref = &h->DPB[j];
+            if (ref == frame) {
+                hp->h265pic.RefPicSetLtCurr[i] = j;
+                break;
+            }
+        }
+    }
+
+    hp->h265_pic_info = (VkVideoDecodeH265PictureInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR,
+        .pStdPictureInfo = &hp->h265pic,
+        .sliceSegmentCount = 0,
+        .pSliceSegmentOffsets = vp->slice_off,
+    };
+
+    vp->decode_info = (VkVideoDecodeInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR,
+        .pNext = &hp->h265_pic_info,
+        .flags = 0x0,
+        .pSetupReferenceSlot = &vp->ref_slot,
+        .referenceSlotCount = nb_refs,
+        .pReferenceSlots = vp->ref_slots,
+        .dstPictureResource = (VkVideoPictureResourceInfoKHR) {
+            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+            .codedOffset = (VkOffset2D){ 0, 0 },
+            .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height },
+            .baseArrayLayer = 0,
+            .imageViewBinding = vp->img_view_out,
+        },
+    };
+
+    hp->ctx = ctx;
+
+    return 0;
+}
+
+static int vk_hevc_decode_slice(AVCodecContext *avctx,
+                                const uint8_t  *data,
+                                uint32_t        size)
+{
+    const HEVCContext *h = avctx->priv_data;
+    HEVCVulkanDecodePicture *hp = h->ref->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &hp->vp;
+
+    int err = ff_vk_decode_add_slice(vp, data, size, 1,
+                                     &hp->h265_pic_info.sliceSegmentCount,
+                                     &hp->h265_pic_info.pSliceSegmentOffsets);
+    if (err < 0)
+        return err;
+
+    return 0;
+}
+
+static int vk_hevc_end_frame(AVCodecContext *avctx)
+{
+    const HEVCContext *h = avctx->priv_data;
+    HEVCFrame *pic = h->ref;
+    HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &hp->vp;
+    FFVulkanDecodePicture *rvp[HEVC_MAX_REFS] = { 0 };
+    AVFrame *rav[HEVC_MAX_REFS] = { 0 };
+
+    for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+        HEVCVulkanDecodePicture *rfhp = hp->ref_src[i]->hwaccel_picture_private;
+        rav[i] = hp->ref_src[i]->frame;
+        rvp[i] = &rfhp->vp;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n",
+           vp->slices_size, hp->h265_pic_info.sliceSegmentCount);
+
+    return ff_vk_decode_frame(avctx, pic->frame, vp, rav, rvp);
+}
+
+static void vk_hevc_free_frame_priv(AVCodecContext *avctx, void *data)
+{
+    HEVCVulkanDecodePicture *hp = data;
+
+    /* Free frame resources */
+    ff_vk_decode_free_frame(hp->ctx, &hp->vp);
+
+    /* Free frame context */
+    av_free(hp);
+}
+
+const AVHWAccel ff_hevc_vulkan_hwaccel = {
+    .name                 = "hevc_vulkan",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_HEVC,
+    .pix_fmt              = AV_PIX_FMT_VULKAN,
+    .start_frame          = &vk_hevc_start_frame,
+    .decode_slice         = &vk_hevc_decode_slice,
+    .end_frame            = &vk_hevc_end_frame,
+    .free_frame_priv      = &vk_hevc_free_frame_priv,
+    .frame_priv_data_size = sizeof(HEVCVulkanDecodePicture),
+    .init                 = &ff_vk_decode_init,
+    .flush                = &ff_vk_decode_flush,
+    .uninit               = &ff_vk_decode_uninit,
+    .frame_params         = &ff_vk_frame_params,
+    .priv_data_size       = sizeof(FFVulkanDecodeContext),
+    .caps_internal        = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
-- 
2.39.2


[-- Attachment #74: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 34+ messages in thread