From: Lynne <dev@lynne.ee>
To: Ffmpeg Devel <ffmpeg-devel@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
Date: Fri, 17 Feb 2023 04:43:50 +0100 (CET)
Message-ID: <NOST85t--3-9@lynne.ee> (raw)
[-- Attachment #1: Type: text/plain, Size: 338 bytes --]
This small patchset mostly rewrites Vulkan to enable using multiplane images,
and implements video decode support. Also, many numerous bugs and issues
were fixed, as well as having quite a lot of performance improvements.
The patchset can be viewed here as well:
https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
Patches attached.
[-- Attachment #2: 0001-h2645_vui-expose-aspect_ratio_idc.patch --]
[-- Type: text/x-diff, Size: 1857 bytes --]
From a03d8aa0e2aa961183440e85de3f4922b14f8075 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:02:11 +0100
Subject: [PATCH 01/72] h2645_vui: expose aspect_ratio_idc
---
libavcodec/h2645_vui.c | 10 +++++-----
libavcodec/h2645_vui.h | 1 +
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c
index 0633fcbddd..93e83a9e1f 100644
--- a/libavcodec/h2645_vui.c
+++ b/libavcodec/h2645_vui.c
@@ -42,15 +42,15 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *l
aspect_ratio_info_present_flag = get_bits1(gb);
if (aspect_ratio_info_present_flag) {
- uint8_t aspect_ratio_idc = get_bits(gb, 8);
- if (aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
- vui->sar = ff_h2645_pixel_aspect[aspect_ratio_idc];
- else if (aspect_ratio_idc == EXTENDED_SAR) {
+ vui->aspect_ratio_idc = get_bits(gb, 8);
+ if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
+ vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc];
+ else if (vui->aspect_ratio_idc == EXTENDED_SAR) {
vui->sar.num = get_bits(gb, 16);
vui->sar.den = get_bits(gb, 16);
} else
av_log(logctx, AV_LOG_WARNING,
- "Unknown SAR index: %u.\n", aspect_ratio_idc);
+ "Unknown SAR index: %u.\n", vui->aspect_ratio_idc);
} else
vui->sar = (AVRational){ 0, 1 };
diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h
index 638da7c366..f1aeab7758 100644
--- a/libavcodec/h2645_vui.h
+++ b/libavcodec/h2645_vui.h
@@ -26,6 +26,7 @@
typedef struct H2645VUI {
AVRational sar;
+ int aspect_ratio_idc;
int overscan_info_present_flag;
int overscan_appropriate_flag;
--
2.39.2
[-- Attachment #3: 0002-h2645_vui-expose-aspect_ratio_info_present_flag.patch --]
[-- Type: text/x-diff, Size: 1469 bytes --]
From 42ff928100caea41ffa55ea2c8a8181de39306b7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:03:44 +0100
Subject: [PATCH 02/72] h2645_vui: expose aspect_ratio_info_present_flag
---
libavcodec/h2645_vui.c | 6 ++----
libavcodec/h2645_vui.h | 1 +
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c
index 93e83a9e1f..e5c7bf46f9 100644
--- a/libavcodec/h2645_vui.c
+++ b/libavcodec/h2645_vui.c
@@ -36,12 +36,10 @@
void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *logctx)
{
- int aspect_ratio_info_present_flag;
-
av_log(logctx, AV_LOG_DEBUG, "Decoding VUI\n");
- aspect_ratio_info_present_flag = get_bits1(gb);
- if (aspect_ratio_info_present_flag) {
+ vui->aspect_ratio_info_present_flag = get_bits1(gb);
+ if (vui->aspect_ratio_info_present_flag) {
vui->aspect_ratio_idc = get_bits(gb, 8);
if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc];
diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h
index f1aeab7758..2c839f4b01 100644
--- a/libavcodec/h2645_vui.h
+++ b/libavcodec/h2645_vui.h
@@ -27,6 +27,7 @@
typedef struct H2645VUI {
AVRational sar;
int aspect_ratio_idc;
+ int aspect_ratio_info_present_flag;
int overscan_info_present_flag;
int overscan_appropriate_flag;
--
2.39.2
[-- Attachment #4: 0003-h264_ps-expose-pps_id.patch --]
[-- Type: text/x-diff, Size: 1226 bytes --]
From 5e115cd41e2221cc8048932dfed362be6f80b74b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 18 Mar 2022 15:11:02 +0100
Subject: [PATCH 03/72] h264_ps: expose pps_id
---
libavcodec/h264_ps.c | 1 +
libavcodec/h264_ps.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index d0d1e65903..4ec5bd4e80 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -731,6 +731,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
if (!(bit_length & 7) && pps->data_size < sizeof(pps->data))
pps->data[pps->data_size++] = 0x80;
+ pps->pps_id = pps_id;
pps->sps_id = get_ue_golomb_31(gb);
if ((unsigned)pps->sps_id >= MAX_SPS_COUNT ||
!ps->sps_list[pps->sps_id]) {
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 5c35761fbc..c3f0888f24 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -103,6 +103,7 @@ typedef struct SPS {
* Picture parameter set
*/
typedef struct PPS {
+ unsigned int pps_id;
unsigned int sps_id;
int cabac; ///< entropy_coding_mode_flag
int pic_order_present; ///< pic_order_present_flag
--
2.39.2
[-- Attachment #5: 0004-h264_ps-set-pic_scaling_matrix_present_flag.patch --]
[-- Type: text/x-diff, Size: 3223 bytes --]
From 2720b9ff2a3d95c5d5887c2e06161de1691fc085 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 18 Mar 2022 16:17:33 +0100
Subject: [PATCH 04/72] h264_ps: set pic_scaling_matrix_present_flag
---
libavcodec/h264_ps.c | 7 +++++--
libavcodec/h264_ps.h | 1 +
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index 4ec5bd4e80..a94f5350c4 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -226,6 +226,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
/* returns non zero if the provided SPS scaling matrix has been filled */
static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
const PPS *pps, int is_sps,
+ int present_flag,
uint8_t(*scaling_matrix4)[16],
uint8_t(*scaling_matrix8)[64])
{
@@ -237,7 +238,7 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
};
int ret = 0;
- if (get_bits1(gb)) {
+ if (present_flag) {
ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y
ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr
ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb
@@ -368,7 +369,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx,
goto fail;
}
sps->transform_bypass = get_bits1(gb);
- ret = decode_scaling_matrices(gb, sps, NULL, 1,
+ ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb),
sps->scaling_matrix4, sps->scaling_matrix8);
if (ret < 0)
goto fail;
@@ -803,7 +804,9 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
bits_left = bit_length - get_bits_count(gb);
if (bits_left > 0 && more_rbsp_data_in_pps(sps, avctx)) {
pps->transform_8x8_mode = get_bits1(gb);
+ pps->pic_scaling_matrix_present_flag = get_bits1(gb);
ret = decode_scaling_matrices(gb, sps, pps, 0,
+ pps->pic_scaling_matrix_present_flag,
pps->scaling_matrix4, pps->scaling_matrix8);
if (ret < 0)
goto fail;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index c3f0888f24..d2413ae0f8 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -119,6 +119,7 @@ typedef struct PPS {
int constrained_intra_pred; ///< constrained_intra_pred_flag
int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
int transform_8x8_mode; ///< transform_8x8_mode_flag
+ int pic_scaling_matrix_present_flag;
uint8_t scaling_matrix4[6][16];
uint8_t scaling_matrix8[6][64];
uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
--
2.39.2
[-- Attachment #6: 0005-h264_parser-expose-idr_pic_id.patch --]
[-- Type: text/x-diff, Size: 1437 bytes --]
From a9ae85816dfaa8791f974348825fc8ba9209423d Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 18:08:53 +0100
Subject: [PATCH 05/72] h264_parser: expose idr_pic_id
Vulkan needs it.
---
libavcodec/h264_parse.h | 1 +
libavcodec/h264_parser.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/h264_parse.h b/libavcodec/h264_parse.h
index 4ee863df66..4ba0add4f2 100644
--- a/libavcodec/h264_parse.h
+++ b/libavcodec/h264_parse.h
@@ -85,6 +85,7 @@ typedef struct H264POCContext {
int delta_poc_bottom;
int delta_poc[2];
int frame_num;
+ int idr_pic_id;
int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
int frame_num_offset; ///< for POC type 2
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index 46134a1c48..1c330484c1 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -432,7 +432,7 @@ static inline int parse_nal_units(AVCodecParserContext *s,
}
if (nal.type == H264_NAL_IDR_SLICE)
- get_ue_golomb_long(&nal.gb); /* idr_pic_id */
+ p->poc.idr_pic_id = get_ue_golomb_long(&nal.gb); /* idr_pic_id */
if (sps->poc_type == 0) {
p->poc.poc_lsb = get_bits(&nal.gb, sps->log2_max_poc_lsb);
--
2.39.2
[-- Attachment #7: 0006-h264_ps-comment-pic_order_present-better.patch --]
[-- Type: text/x-diff, Size: 997 bytes --]
From e42521563191a899d21fbf24e461bc6cb89661e9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:59:23 +0100
Subject: [PATCH 06/72] h264_ps: comment pic_order_present better
The official name which CBS uses is bottom_field_pic_order_in_frame_present_flag.
---
libavcodec/h264_ps.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index d2413ae0f8..de4529b353 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -106,7 +106,7 @@ typedef struct PPS {
unsigned int pps_id;
unsigned int sps_id;
int cabac; ///< entropy_coding_mode_flag
- int pic_order_present; ///< pic_order_present_flag
+ int pic_order_present; ///< bottom_field_pic_order_in_frame_present_flag
int slice_group_count; ///< num_slice_groups_minus1 + 1
int mb_slice_group_map_type;
unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
--
2.39.2
[-- Attachment #8: 0007-h264_ps-expose-max_dec_frame_buffering.patch --]
[-- Type: text/x-diff, Size: 1396 bytes --]
From e222eaa26f4d8fd36dd04525d754dbf4800c502a Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:06:04 +0100
Subject: [PATCH 07/72] h264_ps: expose max_dec_frame_buffering
---
libavcodec/h264_ps.c | 2 +-
libavcodec/h264_ps.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index a94f5350c4..d9df570718 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -176,7 +176,7 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx,
get_ue_golomb_31(gb); /* log2_max_mv_length_horizontal */
get_ue_golomb_31(gb); /* log2_max_mv_length_vertical */
sps->num_reorder_frames = get_ue_golomb_31(gb);
- get_ue_golomb_31(gb); /*max_dec_frame_buffering*/
+ sps->max_dec_frame_buffering = get_ue_golomb_31(gb);
if (get_bits_left(gb) < 0) {
sps->num_reorder_frames = 0;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index de4529b353..906bab7214 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -80,6 +80,7 @@ typedef struct SPS {
int32_t offset_for_ref_frame[256];
int bitstream_restriction_flag;
int num_reorder_frames;
+ int max_dec_frame_buffering;
int scaling_matrix_present;
uint8_t scaling_matrix4[6][16];
uint8_t scaling_matrix8[6][64];
--
2.39.2
[-- Attachment #9: 0008-h264_ps-expose-bit-rate-and-CPB-size-fields.patch --]
[-- Type: text/x-diff, Size: 2114 bytes --]
From 1279c6011c610fdb054cd9eea7a6f07c94f69f29 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:09:08 +0100
Subject: [PATCH 08/72] h264_ps: expose bit rate and CPB size fields
---
libavcodec/h264_ps.c | 8 ++++----
libavcodec/h264_ps.h | 4 ++++
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index d9df570718..fc8715876a 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -113,12 +113,12 @@ static inline int decode_hrd_parameters(GetBitContext *gb, void *logctx,
return AVERROR_INVALIDDATA;
}
- get_bits(gb, 4); /* bit_rate_scale */
+ sps->bit_rate_scale = get_bits(gb, 4);
get_bits(gb, 4); /* cpb_size_scale */
for (i = 0; i < cpb_count; i++) {
- get_ue_golomb_long(gb); /* bit_rate_value_minus1 */
- get_ue_golomb_long(gb); /* cpb_size_value_minus1 */
- get_bits1(gb); /* cbr_flag */
+ sps->bit_rate_value[i] = get_ue_golomb_long(gb) + 1; /* bit_rate_value_minus1 + 1 */
+ sps->cpb_size_value[i] = get_ue_golomb_long(gb) + 1; /* cpb_size_value_minus1 + 1 */
+ sps->cpr_flag[i] = get_bits1(gb);
}
sps->initial_cpb_removal_delay_length = get_bits(gb, 5) + 1;
sps->cpb_removal_delay_length = get_bits(gb, 5) + 1;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 906bab7214..03bd0227d6 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -89,6 +89,10 @@ typedef struct SPS {
int pic_struct_present_flag;
int time_offset_length;
int cpb_cnt; ///< See H.264 E.1.2
+ int bit_rate_scale;
+ uint32_t bit_rate_value[32]; ///< bit_rate_value_minus1 + 1
+ uint32_t cpb_size_value[32]; ///< cpb_size_value_minus1 + 1
+ uint8_t cpr_flag[32];
int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1
int cpb_removal_delay_length; ///< cpb_removal_delay_length_minus1 + 1
int dpb_output_delay_length; ///< dpb_output_delay_length_minus1 + 1
--
2.39.2
[-- Attachment #10: 0009-h264_ps-expose-scaling_matrix_present_mask.patch --]
[-- Type: text/x-diff, Size: 7404 bytes --]
From 3ef9965fe2fa33942eb5b5def748f3f6bf9e0afb Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 17:05:35 +0100
Subject: [PATCH 09/72] h264_ps: expose scaling_matrix_present_mask
Vulkan requires it.
It technically also requires use_default_scaling_matrix_mask,
but we can just be explicit and give it the matrix we fill in as-non
default.
---
libavcodec/h264_ps.c | 37 +++++++++++++++++++++----------------
libavcodec/h264_ps.h | 2 ++
2 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index fc8715876a..9f26514167 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -197,12 +197,14 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx,
}
static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
- const uint8_t *jvt_list,
- const uint8_t *fallback_list)
+ const uint8_t *jvt_list, const uint8_t *fallback_list,
+ uint16_t *mask, int pos)
{
int i, last = 8, next = 8;
const uint8_t *scan = size == 16 ? ff_zigzag_scan : ff_zigzag_direct;
- if (!get_bits1(gb)) /* matrix not written, we use the predicted one */
+ uint16_t seq_scaling_list_present_flag = get_bits1(gb);
+ *mask |= (seq_scaling_list_present_flag << pos);
+ if (!seq_scaling_list_present_flag) /* matrix not written, we use the predicted one */
memcpy(factors, fallback_list, size * sizeof(uint8_t));
else
for (i = 0; i < size; i++) {
@@ -226,7 +228,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
/* returns non zero if the provided SPS scaling matrix has been filled */
static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
const PPS *pps, int is_sps,
- int present_flag,
+ int present_flag, uint16_t *mask,
uint8_t(*scaling_matrix4)[16],
uint8_t(*scaling_matrix8)[64])
{
@@ -238,21 +240,22 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
};
int ret = 0;
+ *mask = 0x0;
if (present_flag) {
- ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y
- ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr
- ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb
- ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1]); // Inter, Y
- ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3]); // Inter, Cr
- ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4]); // Inter, Cb
+ ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0], mask, 0); // Intra, Y
+ ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0], mask, 1); // Intra, Cr
+ ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1], mask, 2); // Intra, Cb
+ ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1], mask, 3); // Inter, Y
+ ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3], mask, 4); // Inter, Cr
+ ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4], mask, 5); // Inter, Cb
if (is_sps || pps->transform_8x8_mode) {
- ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2]); // Intra, Y
- ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3]); // Inter, Y
+ ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2], mask, 6); // Intra, Y
+ ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3], mask, 7); // Inter, Y
if (sps->chroma_format_idc == 3) {
- ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0]); // Intra, Cr
- ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3]); // Inter, Cr
- ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1]); // Intra, Cb
- ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4]); // Inter, Cb
+ ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0], mask, 8); // Intra, Cr
+ ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3], mask, 9); // Inter, Cr
+ ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1], mask, 10); // Intra, Cb
+ ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4], mask, 11); // Inter, Cb
}
}
if (!ret)
@@ -370,6 +373,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx,
}
sps->transform_bypass = get_bits1(gb);
ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb),
+ &sps->scaling_matrix_present_mask,
sps->scaling_matrix4, sps->scaling_matrix8);
if (ret < 0)
goto fail;
@@ -807,6 +811,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
pps->pic_scaling_matrix_present_flag = get_bits1(gb);
ret = decode_scaling_matrices(gb, sps, pps, 0,
pps->pic_scaling_matrix_present_flag,
+ &pps->pic_scaling_matrix_present_mask,
pps->scaling_matrix4, pps->scaling_matrix8);
if (ret < 0)
goto fail;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 03bd0227d6..60ca9b3cd7 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -82,6 +82,7 @@ typedef struct SPS {
int num_reorder_frames;
int max_dec_frame_buffering;
int scaling_matrix_present;
+ uint16_t scaling_matrix_present_mask;
uint8_t scaling_matrix4[6][16];
uint8_t scaling_matrix8[6][64];
int nal_hrd_parameters_present_flag;
@@ -125,6 +126,7 @@ typedef struct PPS {
int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
int transform_8x8_mode; ///< transform_8x8_mode_flag
int pic_scaling_matrix_present_flag;
+ uint16_t pic_scaling_matrix_present_mask;
uint8_t scaling_matrix4[6][16];
uint8_t scaling_matrix8[6][64];
uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
--
2.39.2
[-- Attachment #11: 0010-h264dec-track-picture_structure-in-H264Picture.patch --]
[-- Type: text/x-diff, Size: 2132 bytes --]
From 52ab3cd8d165a838be92189c87c54915efc1c7e5 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 05:20:32 +0100
Subject: [PATCH 10/72] h264dec: track picture_structure in H264Picture
---
libavcodec/h264_picture.c | 1 +
libavcodec/h264_slice.c | 1 +
libavcodec/h264dec.h | 1 +
3 files changed, 3 insertions(+)
diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index 2661ff4698..0348166c43 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -80,6 +80,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
dst->mbaff = src->mbaff;
dst->field_picture = src->field_picture;
dst->reference = src->reference;
+ dst->picture_structure = src->picture_structure;
dst->recovered = src->recovered;
dst->invalid_gap = src->invalid_gap;
dst->sei_recovery_frame_cnt = src->sei_recovery_frame_cnt;
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 6188c74632..8ac66b343c 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -491,6 +491,7 @@ static int h264_frame_start(H264Context *h)
pic->reference = h->droppable ? 0 : h->picture_structure;
pic->f->coded_picture_number = h->coded_picture_number++;
pic->field_picture = h->picture_structure != PICT_FRAME;
+ pic->picture_structure = h->picture_structure;
pic->frame_num = h->poc.frame_num;
/*
* Zero key_frame here; IDR markings per slice in frame or fields are ORed
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 9a1ec1bace..1b18aba71f 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -137,6 +137,7 @@ typedef struct H264Picture {
int ref_count[2][2]; ///< number of entries in ref_poc (FIXME need per slice)
int mbaff; ///< 1 -> MBAFF frame 0-> not MBAFF
int field_picture; ///< whether or not picture was encoded in separate fields
+ int picture_structure; ///< picture structure
/**
* H264Picture.reference has this flag set,
--
2.39.2
[-- Attachment #12: 0011-hevc_ps-expose-SPS-and-VPS-headers.patch --]
[-- Type: text/x-diff, Size: 9068 bytes --]
From d80272e0759b686942f51b1c0c7615edb6a81bc6 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 01:29:57 +0100
Subject: [PATCH 11/72] hevc_ps: expose SPS and VPS headers
---
libavcodec/hevc_ps.c | 100 ++++++++++++++++++++++---------------------
libavcodec/hevc_ps.h | 41 ++++++++++++++++++
2 files changed, 93 insertions(+), 48 deletions(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 5fe62ec35b..bd1f278b06 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -355,81 +355,84 @@ static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx,
}
static void decode_sublayer_hrd(GetBitContext *gb, unsigned int nb_cpb,
- int subpic_params_present)
+ HEVCSublayerHdrParams *par, int subpic_params_present)
{
int i;
for (i = 0; i < nb_cpb; i++) {
- get_ue_golomb_long(gb); // bit_rate_value_minus1
- get_ue_golomb_long(gb); // cpb_size_value_minus1
+ par->bit_rate_value_minus1[i] = get_ue_golomb_long(gb);
+ par->cpb_size_value_minus1[i] = get_ue_golomb_long(gb);
if (subpic_params_present) {
- get_ue_golomb_long(gb); // cpb_size_du_value_minus1
- get_ue_golomb_long(gb); // bit_rate_du_value_minus1
+ par->cpb_size_du_value_minus1[i] = get_ue_golomb_long(gb);
+ par->bit_rate_du_value_minus1[i] = get_ue_golomb_long(gb);
}
- skip_bits1(gb); // cbr_flag
+
+ par->cbr_flag = get_bits1(gb);
}
}
static int decode_hrd(GetBitContext *gb, int common_inf_present,
- int max_sublayers)
+ HEVCHdrParams *hdr, int max_sublayers)
{
- int nal_params_present = 0, vcl_params_present = 0;
- int subpic_params_present = 0;
- int i;
-
if (common_inf_present) {
- nal_params_present = get_bits1(gb);
- vcl_params_present = get_bits1(gb);
-
- if (nal_params_present || vcl_params_present) {
- subpic_params_present = get_bits1(gb);
-
- if (subpic_params_present) {
- skip_bits(gb, 8); // tick_divisor_minus2
- skip_bits(gb, 5); // du_cpb_removal_delay_increment_length_minus1
- skip_bits(gb, 1); // sub_pic_cpb_params_in_pic_timing_sei_flag
- skip_bits(gb, 5); // dpb_output_delay_du_length_minus1
+ hdr->flags.nal_hrd_parameters_present_flag = get_bits1(gb);
+ hdr->flags.vcl_hrd_parameters_present_flag = get_bits1(gb);
+
+ if (hdr->flags.nal_hrd_parameters_present_flag ||
+ hdr->flags.vcl_hrd_parameters_present_flag) {
+ hdr->flags.sub_pic_hrd_params_present_flag = get_bits1(gb);
+
+ if (hdr->flags.sub_pic_hrd_params_present_flag) {
+ hdr->tick_divisor_minus2 = get_bits(gb, 8);
+ hdr->du_cpb_removal_delay_increment_length_minus1 = get_bits(gb, 5);
+ hdr->flags.sub_pic_cpb_params_in_pic_timing_sei_flag = get_bits1(gb);
+ hdr->dpb_output_delay_du_length_minus1 = get_bits(gb, 5);
}
- skip_bits(gb, 4); // bit_rate_scale
- skip_bits(gb, 4); // cpb_size_scale
+ hdr->bit_rate_scale = get_bits(gb, 4);
+ hdr->cpb_size_scale = get_bits(gb, 4);
- if (subpic_params_present)
- skip_bits(gb, 4); // cpb_size_du_scale
+ if (hdr->flags.sub_pic_hrd_params_present_flag)
+ hdr->cpb_size_du_scale = get_bits(gb, 4);
- skip_bits(gb, 5); // initial_cpb_removal_delay_length_minus1
- skip_bits(gb, 5); // au_cpb_removal_delay_length_minus1
- skip_bits(gb, 5); // dpb_output_delay_length_minus1
+ hdr->initial_cpb_removal_delay_length_minus1 = get_bits(gb, 5);
+ hdr->au_cpb_removal_delay_length_minus1 = get_bits(gb, 5);
+ hdr->dpb_output_delay_length_minus1 = get_bits(gb, 5);
}
}
- for (i = 0; i < max_sublayers; i++) {
- int low_delay = 0;
- unsigned int nb_cpb = 1;
- int fixed_rate = get_bits1(gb);
+ for (int i = 0; i < max_sublayers; i++) {
+ hdr->flags.fixed_pic_rate_general_flag = get_bits1(gb);
+
+ hdr->cpb_cnt_minus1[i] = 1;
- if (!fixed_rate)
- fixed_rate = get_bits1(gb);
+ if (!hdr->flags.fixed_pic_rate_general_flag)
+ hdr->flags.fixed_pic_rate_within_cvs_flag = get_bits1(gb);
- if (fixed_rate)
- get_ue_golomb_long(gb); // elemental_duration_in_tc_minus1
+ if (hdr->flags.fixed_pic_rate_within_cvs_flag)
+ hdr->elemental_duration_in_tc_minus1[i] = get_ue_golomb_long(gb);
else
- low_delay = get_bits1(gb);
+ hdr->flags.low_delay_hrd_flag = get_bits1(gb);
- if (!low_delay) {
- nb_cpb = get_ue_golomb_long(gb) + 1;
- if (nb_cpb < 1 || nb_cpb > 32) {
- av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb);
+ if (!hdr->flags.low_delay_hrd_flag) {
+ hdr->cpb_cnt_minus1[i] = get_ue_golomb_long(gb);
+ if (hdr->cpb_cnt_minus1[i] > 31) {
+ av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n",
+ hdr->cpb_cnt_minus1[i]);
return AVERROR_INVALIDDATA;
}
}
- if (nal_params_present)
- decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
- if (vcl_params_present)
- decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
+ if (hdr->flags.nal_hrd_parameters_present_flag)
+ decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->nal_params[i],
+ hdr->flags.sub_pic_hrd_params_present_flag);
+
+ if (hdr->flags.vcl_hrd_parameters_present_flag)
+ decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->vcl_params[i],
+ hdr->flags.sub_pic_hrd_params_present_flag);
}
+
return 0;
}
@@ -536,7 +539,8 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
get_ue_golomb_long(gb); // hrd_layer_set_idx
if (i)
common_inf_present = get_bits1(gb);
- decode_hrd(gb, common_inf_present, vps->vps_max_sub_layers);
+ decode_hrd(gb, common_inf_present, &vps->hdr[i],
+ vps->vps_max_sub_layers);
}
}
get_bits1(gb); /* vps_extension_flag */
@@ -655,7 +659,7 @@ timing_info:
vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb);
vui->vui_hrd_parameters_present_flag = get_bits1(gb);
if (vui->vui_hrd_parameters_present_flag)
- decode_hrd(gb, 1, sps->max_sub_layers);
+ decode_hrd(gb, 1, &sps->hdr, sps->max_sub_layers);
}
vui->bitstream_restriction_flag = get_bits1(gb);
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 18894cfed1..b61d3b32b3 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -32,6 +32,43 @@
#include "h2645_vui.h"
#include "hevc.h"
+typedef struct HEVCSublayerHdrParams {
+ uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT];
+ uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT];
+ uint32_t cpb_size_du_value_minus1[HEVC_MAX_CPB_CNT];
+ uint32_t bit_rate_du_value_minus1[HEVC_MAX_CPB_CNT];
+ uint32_t cbr_flag;
+} HEVCSublayerHdrParams;
+
+typedef struct HEVCHdrFlagParams {
+ uint32_t nal_hrd_parameters_present_flag;
+ uint32_t vcl_hrd_parameters_present_flag;
+ uint32_t sub_pic_hrd_params_present_flag;
+ uint32_t sub_pic_cpb_params_in_pic_timing_sei_flag;
+ uint32_t fixed_pic_rate_general_flag;
+ uint32_t fixed_pic_rate_within_cvs_flag;
+ uint32_t low_delay_hrd_flag;
+} HEVCHdrFlagParams;
+
+typedef struct HEVCHdrParams {
+ HEVCHdrFlagParams flags;
+
+ uint8_t tick_divisor_minus2;
+ uint8_t du_cpb_removal_delay_increment_length_minus1;
+ uint8_t dpb_output_delay_du_length_minus1;
+ uint8_t bit_rate_scale;
+ uint8_t cpb_size_scale;
+ uint8_t cpb_size_du_scale;
+ uint8_t initial_cpb_removal_delay_length_minus1;
+ uint8_t au_cpb_removal_delay_length_minus1;
+ uint8_t dpb_output_delay_length_minus1;
+ uint8_t cpb_cnt_minus1[HEVC_MAX_SUB_LAYERS];
+ uint16_t elemental_duration_in_tc_minus1[HEVC_MAX_SUB_LAYERS];
+
+ HEVCSublayerHdrParams nal_params[HEVC_MAX_SUB_LAYERS];
+ HEVCSublayerHdrParams vcl_params[HEVC_MAX_SUB_LAYERS];
+} HEVCHdrParams;
+
typedef struct ShortTermRPS {
unsigned int num_negative_pics;
int num_delta_pocs;
@@ -108,6 +145,8 @@ typedef struct PTL {
} PTL;
typedef struct HEVCVPS {
+ HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS];
+
uint8_t vps_temporal_id_nesting_flag;
int vps_max_layers;
int vps_max_sub_layers; ///< vps_max_temporal_layers_minus1 + 1
@@ -146,6 +185,8 @@ typedef struct HEVCSPS {
HEVCWindow pic_conf_win;
+ HEVCHdrParams hdr;
+
int bit_depth;
int bit_depth_chroma;
int pixel_shift;
--
2.39.2
[-- Attachment #13: 0012-hevc_ps-expose-pps_id.patch --]
[-- Type: text/x-diff, Size: 1213 bytes --]
From d6e2ac33861642ac5dfa651963874c0f65d9b49b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 05:33:29 +0100
Subject: [PATCH 12/72] hevc_ps: expose pps_id
---
libavcodec/hevc_ps.c | 2 +-
libavcodec/hevc_ps.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index bd1f278b06..3242904473 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -1486,7 +1486,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
pps->log2_max_transform_skip_block_size = 2;
// Coded parameters
- pps_id = get_ue_golomb_long(gb);
+ pps_id = pps->pps_id = get_ue_golomb_long(gb);
if (pps_id >= HEVC_MAX_PPS_COUNT) {
av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
ret = AVERROR_INVALIDDATA;
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index b61d3b32b3..4cfcbcf9ae 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -275,6 +275,7 @@ typedef struct HEVCSPS {
} HEVCSPS;
typedef struct HEVCPPS {
+ unsigned int pps_id;
unsigned int sps_id; ///< seq_parameter_set_id
uint8_t sign_data_hiding_flag;
--
2.39.2
[-- Attachment #14: 0013-hevc_ps-expose-vps_id.patch --]
[-- Type: text/x-diff, Size: 1162 bytes --]
From a09e6d7611f6e89ea3107c4581b27715a7ca480d Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 06:42:44 +0100
Subject: [PATCH 13/72] hevc_ps: expose vps_id
---
libavcodec/hevc_ps.c | 2 +-
libavcodec/hevc_ps.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 3242904473..a26f2940fc 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -462,7 +462,7 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
}
memcpy(vps->data, gb->buffer, vps->data_size);
- vps_id = get_bits(gb, 4);
+ vps_id = vps->vps_id = get_bits(gb, 4);
if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits
av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 4cfcbcf9ae..571657d7fd 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -145,6 +145,7 @@ typedef struct PTL {
} PTL;
typedef struct HEVCVPS {
+ unsigned int vps_id;
HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS];
uint8_t vps_temporal_id_nesting_flag;
--
2.39.2
[-- Attachment #15: 0014-hevc_ps-expose-pps_extension_present_flag.patch --]
[-- Type: text/x-diff, Size: 1512 bytes --]
From 73a6b7e49ba8f01aefe2b7c152b2e2d04edaa3ee Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 12:49:45 +0100
Subject: [PATCH 14/72] hevc_ps: expose pps_extension_present_flag
---
libavcodec/hevc_ps.c | 3 ++-
libavcodec/hevc_ps.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index a26f2940fc..b1247bad67 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -1659,7 +1659,8 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
pps->slice_header_extension_present_flag = get_bits1(gb);
- if (get_bits1(gb)) { // pps_extension_present_flag
+ pps->pps_extension_present_flag = get_bits1(gb);
+ if (pps->pps_extension_present_flag) {
pps->pps_range_extensions_flag = get_bits1(gb);
skip_bits(gb, 7); // pps_extension_7bits
if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 571657d7fd..f221640531 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -326,6 +326,7 @@ typedef struct HEVCPPS {
int num_extra_slice_header_bits;
uint8_t slice_header_extension_present_flag;
uint8_t log2_max_transform_skip_block_size;
+ uint8_t pps_extension_present_flag;
uint8_t pps_range_extensions_flag;
uint8_t cross_component_prediction_enabled_flag;
uint8_t chroma_qp_offset_list_enabled_flag;
--
2.39.2
[-- Attachment #16: 0015-hevcdec-expose-bits_used_for_short_term_rps.patch --]
[-- Type: text/x-diff, Size: 1228 bytes --]
From 68e33940f494112e359f6a0a769083c1dd82a1c4 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 17:11:36 +0100
Subject: [PATCH 15/72] hevcdec: expose bits_used_for_short_term_rps
---
libavcodec/hevcdec.c | 1 +
libavcodec/hevcdec.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 567e8d81d4..43cd963175 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -702,6 +702,7 @@ static int hls_slice_header(HEVCContext *s)
if (ret < 0)
return ret;
+ sh->bits_used_for_short_term_rps = pos - get_bits_left(gb);
sh->short_term_rps = &sh->slice_rps;
} else {
int numbits, rps_idx;
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 9d3f4adbb3..15c4113bdd 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -268,6 +268,7 @@ typedef struct SliceHeader {
///< RPS coded in the slice header itself is stored here
int short_term_ref_pic_set_sps_flag;
+ int bits_used_for_short_term_rps;
int short_term_ref_pic_set_size;
ShortTermRPS slice_rps;
const ShortTermRPS *short_term_rps;
--
2.39.2
[-- Attachment #17: 0016-hevc_ps-expose-vui_present-sublayer_ordering_info-co.patch --]
[-- Type: text/x-diff, Size: 4332 bytes --]
From 46f18bf6af9e8ed0aaa82085a06b31dc8565e0df Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:17:51 +0100
Subject: [PATCH 16/72] hevc_ps: expose vui_present, sublayer_ordering_info,
conformance_window_flag
---
libavcodec/hevc_ps.c | 18 ++++++++++--------
libavcodec/hevc_ps.h | 4 ++++
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index b1247bad67..a740da9f82 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -855,7 +855,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
HEVCWindow *ow;
int ret = 0;
int log2_diff_max_min_transform_block_size;
- int bit_depth_chroma, start, vui_present, sublayer_ordering_info;
+ int bit_depth_chroma, start;
int i;
// Coded parameters
@@ -904,7 +904,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
sps->height, 0, avctx)) < 0)
return ret;
- if (get_bits1(gb)) { // pic_conformance_flag
+ sps->conformance_window_flag = get_bits1(gb);
+ if (sps->conformance_window_flag) { // pic_conformance_flag
int vert_mult = hevc_sub_height_c[sps->chroma_format_idc];
int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc];
sps->pic_conf_win.left_offset = get_ue_golomb_long(gb) * horiz_mult;
@@ -951,8 +952,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
return AVERROR_INVALIDDATA;
}
- sublayer_ordering_info = get_bits1(gb);
- start = sublayer_ordering_info ? 0 : sps->max_sub_layers - 1;
+ sps->sublayer_ordering_info_flag = get_bits1(gb);
+ start = sps->sublayer_ordering_info_flag ? 0 : sps->max_sub_layers - 1;
for (i = start; i < sps->max_sub_layers; i++) {
sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1;
sps->temporal_layer[i].num_reorder_pics = get_ue_golomb_long(gb);
@@ -973,7 +974,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
}
}
- if (!sublayer_ordering_info) {
+ if (!sps->sublayer_ordering_info_flag) {
for (i = 0; i < start; i++) {
sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering;
sps->temporal_layer[i].num_reorder_pics = sps->temporal_layer[start].num_reorder_pics;
@@ -1015,7 +1016,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
if (sps->scaling_list_enable_flag) {
set_default_scaling_list_data(&sps->scaling_list);
- if (get_bits1(gb)) {
+ sps->scaling_list_data_present_flag = get_bits1(gb);
+ if (sps->scaling_list_data_present_flag) {
ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps);
if (ret < 0)
return ret;
@@ -1071,8 +1073,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
sps->sps_temporal_mvp_enabled_flag = get_bits1(gb);
sps->sps_strong_intra_smoothing_enable_flag = get_bits1(gb);
sps->vui.common.sar = (AVRational){0, 1};
- vui_present = get_bits1(gb);
- if (vui_present)
+ sps->vui_present = get_bits1(gb);
+ if (sps->vui_present)
decode_vui(gb, avctx, apply_defdispwin, sps);
if (get_bits1(gb)) { // sps_extension_flag
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index f221640531..549e0bdf57 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -184,6 +184,7 @@ typedef struct HEVCSPS {
HEVCWindow output_window;
+ int conformance_window_flag;
HEVCWindow pic_conf_win;
HEVCHdrParams hdr;
@@ -196,6 +197,7 @@ typedef struct HEVCSPS {
unsigned int log2_max_poc_lsb;
int pcm_enabled_flag;
+ int sublayer_ordering_info_flag;
int max_sub_layers;
struct {
int max_dec_pic_buffering;
@@ -204,10 +206,12 @@ typedef struct HEVCSPS {
} temporal_layer[HEVC_MAX_SUB_LAYERS];
uint8_t temporal_id_nesting_flag;
+ int vui_present;
VUI vui;
PTL ptl;
uint8_t scaling_list_enable_flag;
+ int scaling_list_data_present_flag;
ScalingList scaling_list;
unsigned int nb_st_rps;
--
2.39.2
[-- Attachment #18: 0017-hevc_ps-expose-and-parse-scc-range-extension-fields.patch --]
[-- Type: text/x-diff, Size: 7752 bytes --]
From 4645f1fb3249f8249fdebaf9b3edffc848b9af3c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:18:42 +0100
Subject: [PATCH 17/72] hevc_ps: expose and parse scc range extension fields
---
libavcodec/hevc.h | 2 ++
libavcodec/hevc_ps.c | 63 ++++++++++++++++++++++++++++++++++++++++----
libavcodec/hevc_ps.h | 26 ++++++++++++++++++
3 files changed, 86 insertions(+), 5 deletions(-)
diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index 1804755327..913c7d4e2e 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -154,6 +154,8 @@ enum {
// get near that, though, so set a lower limit here with the maximum
// possible value for 4K video (at most 135 16x16 Ctb rows).
HEVC_MAX_ENTRY_POINT_OFFSETS = HEVC_MAX_TILE_COLUMNS * 135,
+
+ HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE = 128,
};
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index a740da9f82..b03f59efef 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -856,7 +856,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
int ret = 0;
int log2_diff_max_min_transform_block_size;
int bit_depth_chroma, start;
- int i;
+ int i, j;
// Coded parameters
@@ -1077,9 +1077,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
if (sps->vui_present)
decode_vui(gb, avctx, apply_defdispwin, sps);
- if (get_bits1(gb)) { // sps_extension_flag
+ sps->sps_extension_present_flag = get_bits1(gb);
+ if (sps->sps_extension_present_flag) { // sps_extension_flag
sps->sps_range_extension_flag = get_bits1(gb);
- skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7);
+ skip_bits(gb, 2);
+ sps->sps_scc_extension_flag = get_bits1(gb);
+ skip_bits(gb, 4);
if (sps->sps_range_extension_flag) {
sps->transform_skip_rotation_enabled_flag = get_bits1(gb);
sps->transform_skip_context_enabled_flag = get_bits1(gb);
@@ -1105,6 +1108,26 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
av_log(avctx, AV_LOG_WARNING,
"cabac_bypass_alignment_enabled_flag not yet implemented\n");
}
+ if (sps->sps_scc_extension_flag) {
+ sps->sps_curr_pic_ref_enabled_flag = get_bits1(gb);
+ sps->palette_mode_enabled_flag = get_bits1(gb);
+ if (sps->palette_mode_enabled_flag) {
+ sps->palette_max_size = get_ue_golomb_long(gb);
+ sps->delta_palette_max_predictor_size = get_ue_golomb_long(gb);
+
+ sps->sps_palette_predictor_initializer_present_flag = get_bits1(gb);
+ if (sps->sps_palette_predictor_initializer_present_flag) {
+ sps->sps_num_palette_predictor_initializer_minus1 = get_ue_golomb_long(gb);
+ for (i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) {
+ for (j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++)
+ sps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb);
+ }
+ }
+ }
+
+ sps->motion_vector_resolution_control_idc = get_bits(gb, 2);
+ sps->intra_boundary_filtering_disable_flag = get_bits1(gb);
+ }
}
if (apply_defdispwin) {
sps->output_window.left_offset += sps->vui.def_disp_win.left_offset;
@@ -1446,7 +1469,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
HEVCParamSets *ps)
{
HEVCSPS *sps = NULL;
- int i, ret = 0;
+ int i, j, ret = 0;
unsigned int pps_id = 0;
ptrdiff_t nal_size;
unsigned log2_parallel_merge_level_minus2;
@@ -1664,11 +1687,41 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
pps->pps_extension_present_flag = get_bits1(gb);
if (pps->pps_extension_present_flag) {
pps->pps_range_extensions_flag = get_bits1(gb);
- skip_bits(gb, 7); // pps_extension_7bits
+ skip_bits(gb, 2);
+ pps->pps_scc_extension_flag = get_bits1(gb);
+ skip_bits(gb, 4);
if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
goto err;
}
+ if (pps->pps_scc_extension_flag) {
+ pps->pps_curr_pic_ref_enabled_flag = get_bits1(gb);
+ pps->residual_adaptive_colour_transform_enabled_flag = get_bits1(gb);
+
+ if (pps->residual_adaptive_colour_transform_enabled_flag) {
+ pps->pps_slice_act_qp_offsets_present_flag = get_bits1(gb);
+ pps->pps_act_y_qp_offset_plus5 = get_se_golomb(gb);
+ pps->pps_act_cb_qp_offset_plus5 = get_se_golomb(gb);
+ pps->pps_act_cr_qp_offset_plus3 = get_se_golomb(gb);
+ }
+
+ pps->pps_palette_predictor_initializer_present_flag = get_bits1(gb);
+ if (pps->pps_palette_predictor_initializer_present_flag) {
+ pps->pps_num_palette_predictor_initializer = get_ue_golomb_long(gb);
+ if (pps->pps_num_palette_predictor_initializer) {
+ pps->monochrome_palette_flag = get_bits1(gb);
+ pps->luma_bit_depth_entry_minus8 = get_ue_golomb_long(gb);
+
+ if (!pps->monochrome_palette_flag)
+ pps->chroma_bit_depth_entry_minus8 = get_ue_golomb_long(gb);
+
+ for (i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) {
+ for (j = 0; j < pps->pps_num_palette_predictor_initializer; j++)
+ pps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb);
+ }
+ }
+ }
+ }
}
ret = setup_pps(avctx, gb, pps, sps);
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 549e0bdf57..8dddf7ef8d 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -210,6 +210,18 @@ typedef struct HEVCSPS {
VUI vui;
PTL ptl;
+ int sps_extension_present_flag;
+ int sps_scc_extension_flag;
+ int sps_curr_pic_ref_enabled_flag;
+ int palette_mode_enabled_flag;
+ uint8_t palette_max_size;
+ uint8_t delta_palette_max_predictor_size;
+ uint8_t motion_vector_resolution_control_idc;
+ uint8_t sps_num_palette_predictor_initializer_minus1;
+ int sps_palette_predictor_initializer_present_flag;
+ int intra_boundary_filtering_disable_flag;
+ uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE];
+
uint8_t scaling_list_enable_flag;
int scaling_list_data_present_flag;
ScalingList scaling_list;
@@ -341,6 +353,20 @@ typedef struct HEVCPPS {
uint8_t log2_sao_offset_scale_luma;
uint8_t log2_sao_offset_scale_chroma;
+ int pps_scc_extension_flag;
+ int pps_curr_pic_ref_enabled_flag;
+ int residual_adaptive_colour_transform_enabled_flag;
+ int pps_slice_act_qp_offsets_present_flag;
+ int pps_palette_predictor_initializer_present_flag;
+ int pps_num_palette_predictor_initializer;
+ int monochrome_palette_flag;
+ int luma_bit_depth_entry_minus8;
+ int chroma_bit_depth_entry_minus8;
+ int pps_act_y_qp_offset_plus5;
+ int pps_act_cb_qp_offset_plus5;
+ int pps_act_cr_qp_offset_plus3;
+ uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE];
+
// Inferred parameters
unsigned int *column_width; ///< ColumnWidth
unsigned int *row_height; ///< RowHeight
--
2.39.2
[-- Attachment #19: 0018-hevc_ps-expose-log2_diff_max_min_transform_block_siz.patch --]
[-- Type: text/x-diff, Size: 3078 bytes --]
From 141df2aaa6e9e256cf5260b919fb9151982dabe0 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 04:30:46 +0100
Subject: [PATCH 18/72] hevc_ps: expose log2_diff_max_min_transform_block_size
---
libavcodec/hevc_ps.c | 18 +++++++++---------
libavcodec/hevc_ps.h | 1 +
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index b03f59efef..2f0aff5a97 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -854,7 +854,6 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
{
HEVCWindow *ow;
int ret = 0;
- int log2_diff_max_min_transform_block_size;
int bit_depth_chroma, start;
int i, j;
@@ -982,12 +981,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
}
}
- sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3;
- sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb);
- sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2;
- log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb);
- sps->log2_max_trafo_size = log2_diff_max_min_transform_block_size +
- sps->log2_min_tb_size;
+ sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3;
+ sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb);
+ sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2;
+ sps->log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb);
+ sps->log2_max_trafo_size = sps->log2_diff_max_min_transform_block_size +
+ sps->log2_min_tb_size;
if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) {
av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size);
@@ -1004,8 +1003,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
return AVERROR_INVALIDDATA;
}
- if (log2_diff_max_min_transform_block_size < 0 || log2_diff_max_min_transform_block_size > 30) {
- av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size);
+ if (sps->log2_diff_max_min_transform_block_size > 30) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size",
+ sps->log2_diff_max_min_transform_block_size);
return AVERROR_INVALIDDATA;
}
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 8dddf7ef8d..88e73e97c8 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -253,6 +253,7 @@ typedef struct HEVCSPS {
unsigned int log2_max_trafo_size;
unsigned int log2_ctb_size;
unsigned int log2_min_pu_size;
+ unsigned int log2_diff_max_min_transform_block_size;
int max_transform_hierarchy_depth_inter;
int max_transform_hierarchy_depth_intra;
--
2.39.2
[-- Attachment #20: 0019-hevc_ps-expose-rps-fields.patch --]
[-- Type: text/x-diff, Size: 4900 bytes --]
From b0e8756c78c95ff93b908612b76d2013f79d5c2b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:25:48 +0100
Subject: [PATCH 19/72] hevc_ps: expose rps fields
---
libavcodec/hevc_ps.c | 37 ++++++++++++++++++-------------------
libavcodec/hevc_ps.h | 7 +++++++
2 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 2f0aff5a97..745a4f270e 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -100,51 +100,50 @@ static void remove_vps(HEVCParamSets *s, int id)
int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header)
{
- uint8_t rps_predict = 0;
int delta_poc;
int k0 = 0;
int k = 0;
int i;
+ rps->rps_predict = 0;
+
if (rps != sps->st_rps && sps->nb_st_rps)
- rps_predict = get_bits1(gb);
+ rps->rps_predict = get_bits1(gb);
- if (rps_predict) {
+ if (rps->rps_predict) {
const ShortTermRPS *rps_ridx;
int delta_rps;
- unsigned abs_delta_rps;
- uint8_t use_delta_flag = 0;
- uint8_t delta_rps_sign;
if (is_slice_header) {
- unsigned int delta_idx = get_ue_golomb_long(gb) + 1;
- if (delta_idx > sps->nb_st_rps) {
+ rps->delta_idx = get_ue_golomb_long(gb) + 1;
+ if (rps->delta_idx > sps->nb_st_rps) {
av_log(avctx, AV_LOG_ERROR,
"Invalid value of delta_idx in slice header RPS: %d > %d.\n",
- delta_idx, sps->nb_st_rps);
+ rps->delta_idx, sps->nb_st_rps);
return AVERROR_INVALIDDATA;
}
- rps_ridx = &sps->st_rps[sps->nb_st_rps - delta_idx];
+ rps_ridx = &sps->st_rps[sps->nb_st_rps - rps->delta_idx];
rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs;
} else
rps_ridx = &sps->st_rps[rps - sps->st_rps - 1];
- delta_rps_sign = get_bits1(gb);
- abs_delta_rps = get_ue_golomb_long(gb) + 1;
- if (abs_delta_rps < 1 || abs_delta_rps > 32768) {
+ rps->delta_rps_sign = get_bits1(gb);
+ rps->abs_delta_rps = get_ue_golomb_long(gb) + 1;
+ if (rps->abs_delta_rps > 32768) {
av_log(avctx, AV_LOG_ERROR,
"Invalid value of abs_delta_rps: %d\n",
- abs_delta_rps);
+ rps->abs_delta_rps);
return AVERROR_INVALIDDATA;
}
- delta_rps = (1 - (delta_rps_sign << 1)) * abs_delta_rps;
+ delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps;
for (i = 0; i <= rps_ridx->num_delta_pocs; i++) {
int used = rps->used[k] = get_bits1(gb);
+ rps->use_delta_flag = 0;
if (!used)
- use_delta_flag = get_bits1(gb);
+ rps->use_delta_flag = get_bits1(gb);
- if (used || use_delta_flag) {
+ if (used || rps->use_delta_flag) {
if (i < rps_ridx->num_delta_pocs)
delta_poc = delta_rps + rps_ridx->delta_poc[i];
else
@@ -210,7 +209,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
if (rps->num_delta_pocs) {
prev = 0;
for (i = 0; i < rps->num_negative_pics; i++) {
- delta_poc = get_ue_golomb_long(gb) + 1;
+ delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1;
if (delta_poc < 1 || delta_poc > 32768) {
av_log(avctx, AV_LOG_ERROR,
"Invalid value of delta_poc: %d\n",
@@ -223,7 +222,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
}
prev = 0;
for (i = 0; i < nb_positive_pics; i++) {
- delta_poc = get_ue_golomb_long(gb) + 1;
+ delta_poc = rps->delta_poc_s1[i] = get_ue_golomb_long(gb) + 1;
if (delta_poc < 1 || delta_poc > 32768) {
av_log(avctx, AV_LOG_ERROR,
"Invalid value of delta_poc: %d\n",
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 88e73e97c8..3cdbf6abec 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -70,9 +70,16 @@ typedef struct HEVCHdrParams {
} HEVCHdrParams;
typedef struct ShortTermRPS {
+ uint8_t rps_predict;
+ unsigned int delta_idx;
+ uint8_t use_delta_flag;
+ uint8_t delta_rps_sign;
+ unsigned int abs_delta_rps;
unsigned int num_negative_pics;
int num_delta_pocs;
int rps_idx_num_delta_pocs;
+ int32_t delta_poc_s0[32];
+ int32_t delta_poc_s1[32];
int32_t delta_poc[32];
uint8_t used[32];
} ShortTermRPS;
--
2.39.2
[-- Attachment #21: 0020-hwcontext_vulkan-initialize-and-require-instance-ver.patch --]
[-- Type: text/x-diff, Size: 2363 bytes --]
From a35cd953f9af8f34836d53006d10e3890a30ebf1 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:15:04 +0100
Subject: [PATCH 20/72] hwcontext_vulkan: initialize and require instance
version 1.3
---
configure | 4 ++--
libavutil/hwcontext_vulkan.c | 2 +-
libavutil/hwcontext_vulkan.h | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/configure b/configure
index d38613309d..f0f15b9e87 100755
--- a/configure
+++ b/configure
@@ -7006,8 +7006,8 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.
"in maintaining it."
if enabled vulkan; then
- check_pkg_config_header_only vulkan "vulkan >= 1.2.189" "vulkan/vulkan.h" "defined VK_VERSION_1_2" ||
- check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_3) || (defined(VK_VERSION_1_2) && VK_HEADER_VERSION >= 189)"
+ check_pkg_config_header_only vulkan "vulkan >= 1.3.238" "vulkan/vulkan.h" "defined VK_VERSION_1_3" ||
+ check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) || (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 238)"
fi
if enabled x86; then
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 2a9b5f4aac..c87f39d072 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -673,7 +673,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
VkApplicationInfo application_info = {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
.pEngineName = "libavutil",
- .apiVersion = VK_API_VERSION_1_2,
+ .apiVersion = VK_API_VERSION_1_3,
.engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
LIBAVUTIL_VERSION_MINOR,
LIBAVUTIL_VERSION_MICRO),
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index df86c85b3c..70c8379dc3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -53,7 +53,7 @@ typedef struct AVVulkanDeviceContext {
PFN_vkGetInstanceProcAddr get_proc_addr;
/**
- * Vulkan instance. Must be at least version 1.2.
+ * Vulkan instance. Must be at least version 1.3.
*/
VkInstance inst;
--
2.39.2
[-- Attachment #22: 0021-hwcontext_vulkan-enable-support-for-YCbCr-samplers.patch --]
[-- Type: text/x-diff, Size: 1833 bytes --]
From f365b7902693a367d77032e13c2e099306308f44 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 02:37:14 +0100
Subject: [PATCH 21/72] hwcontext_vulkan: enable support for YCbCr samplers
---
libavutil/hwcontext_vulkan.c | 1 +
libavutil/vulkan_functions.h | 2 ++
2 files changed, 3 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c87f39d072..72850c03cf 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1378,6 +1378,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
goto end;
}
p->device_features_1_2.timelineSemaphore = 1;
+ p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
/* Setup queue family */
if ((err = setup_queue_families(ctx, &dev_info)))
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index d15a5d9a42..deb77495a2 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -155,6 +155,8 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipeline) \
\
/* Sampler */ \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSamplerYcbcrConversion) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \
\
--
2.39.2
[-- Attachment #23: 0022-hwcontext_vulkan-enable-VK_KHR_synchronization2-if-s.patch --]
[-- Type: text/x-diff, Size: 5364 bytes --]
From b6db2ca65db72b346ba08480df4a201f7e1caea9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 13 Mar 2022 09:06:06 +0100
Subject: [PATCH 22/72] hwcontext_vulkan: enable VK_KHR_synchronization2 if
supported
---
libavutil/hwcontext_vulkan.c | 17 +++++++++++++----
libavutil/vulkan_functions.h | 6 +++++-
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 72850c03cf..1d0261c8fe 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -89,6 +89,7 @@ typedef struct VulkanDevicePriv {
/* Features */
VkPhysicalDeviceVulkan11Features device_features_1_1;
VkPhysicalDeviceVulkan12Features device_features_1_2;
+ VkPhysicalDeviceVulkan13Features device_features_1_3;
/* Queues */
uint32_t qfs[5];
@@ -346,7 +347,7 @@ static const VulkanOptExtension optional_device_exts[] = {
/* Misc or required by other extensions */
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
- { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
+ { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1326,9 +1327,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
+ VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
+ .pNext = &timeline_features,
+ };
VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
- .pNext = &timeline_features,
+ .pNext = &dev_features_1_3,
};
VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
@@ -1340,8 +1345,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
};
VkDeviceCreateInfo dev_info = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
- .pNext = &hwctx->device_features,
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
};
hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -1349,6 +1353,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
p->device_features_1_1.pNext = &p->device_features_1_2;
p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
+ p->device_features_1_2.pNext = &p->device_features_1_3;
+ p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
ctx->free = vulkan_device_free;
/* Create an instance if not given one */
@@ -1379,6 +1385,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
}
p->device_features_1_2.timelineSemaphore = 1;
p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
+ p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+
+ dev_info.pNext = &hwctx->device_features;
/* Setup queue family */
if ((err = setup_queue_families(ctx, &dev_info)))
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index deb77495a2..103bff3013 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -37,6 +37,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_EXTERNAL_WIN32_MEMORY = 1ULL << 6, /* VK_KHR_external_memory_win32 */
FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */
#endif
+ FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -145,7 +146,10 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \
- \
+ \
+ /* sync2 */ \
+ MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \
+ \
/* Pipeline */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \
--
2.39.2
[-- Attachment #24: 0023-hwcontext_vulkan-support-threadsafe-queue-and-frame-.patch --]
[-- Type: text/x-diff, Size: 19170 bytes --]
From 05e94e06667f305afe181c3b318d08b4e528ce09 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 15 Mar 2022 23:00:32 +0100
Subject: [PATCH 23/72] hwcontext_vulkan: support threadsafe queue and frame
operations
---
libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
libavutil/hwcontext_vulkan.h | 40 +++++++-
2 files changed, 167 insertions(+), 49 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 1d0261c8fe..5a06a6872d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -27,6 +27,7 @@
#include <dlfcn.h>
#endif
+#include <pthread.h>
#include <unistd.h>
#include "config.h"
@@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceVulkan13Features device_features_1_3;
/* Queues */
- uint32_t qfs[5];
- int num_qfs;
+ pthread_mutex_t **qf_mutex;
+ int nb_tot_qfs;
+ uint32_t img_qfs[5];
+ int nb_img_qfs;
/* Debug callback */
VkDebugUtilsMessengerEXT debug_ctx;
@@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
} VulkanFramesPriv;
typedef struct AVVkFrameInternal {
+ pthread_mutex_t update_mutex;
+
#if CONFIG_CUDA
/* Importing external memory into cuda is really expensive so we keep the
* memory imported all the time */
@@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
if (p->libvulkan)
dlclose(p->libvulkan);
+ for (int i = 0; i < p->nb_tot_qfs; i++)
+ av_freep(&p->qf_mutex[i]);
+ av_freep(&p->qf_mutex);
+
RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
}
@@ -1436,13 +1445,26 @@ end:
return err;
}
+static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
+}
+
+static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
+}
+
static int vulkan_device_init(AVHWDeviceContext *ctx)
{
int err;
- uint32_t queue_num;
+ uint32_t qf_num;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ VkQueueFamilyProperties *qf;
int graph_index, comp_index, tx_index, enc_index, dec_index;
/* Set device extension flags */
@@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
- if (!queue_num) {
+ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
+ if (!qf_num) {
av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
return AVERROR_EXTERNAL;
}
+ qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
+ if (!qf)
+ return AVERROR(ENOMEM);
+
+ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
+
+ p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
+ if (!p->qf_mutex)
+ return AVERROR(ENOMEM);
+ p->nb_tot_qfs = qf_num;
+
+ for (int i = 0; i < qf_num; i++) {
+ p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
+ if (!p->qf_mutex[i])
+ return AVERROR(ENOMEM);
+ for (int j = 0; j < qf[i].queueCount; j++)
+ pthread_mutex_init(&p->qf_mutex[i][j], NULL);
+ }
+
graph_index = hwctx->queue_family_index;
comp_index = hwctx->queue_family_comp_index;
tx_index = hwctx->queue_family_tx_index;
@@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
return AVERROR(EINVAL); \
} else if (fidx < 0 || ctx_qf < 0) { \
break; \
- } else if (ctx_qf >= queue_num) { \
+ } else if (ctx_qf >= qf_num) { \
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
- type, ctx_qf, queue_num); \
+ type, ctx_qf, qf_num); \
return AVERROR(EINVAL); \
} \
\
@@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
- p->qfs[p->num_qfs++] = ctx_qf; \
+ p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
} while (0)
CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
@@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
#undef CHECK_QUEUE
+ if (!hwctx->lock_queue)
+ hwctx->lock_queue = lock_queue;
+ if (!hwctx->unlock_queue)
+ hwctx->unlock_queue = unlock_queue;
+
/* Get device capabilities */
vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
@@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
{
AVVkFrameInternal *internal = f->internal;
- if (!internal)
- return;
-
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
@@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
VkAccessFlags new_access;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -1944,6 +1989,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
};
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
+
+ if ((err = wait_start_exec_ctx(hwfc, ectx)))
+ return err;
+
+ vkfc->lock_frame(hwfc, frame);
+
for (int i = 0; i < planes; i++) {
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
sem_sig_val[i] = frame->sem_value[i] + 1;
@@ -1980,9 +2031,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break;
}
- if ((err = wait_start_exec_ctx(hwfc, ectx)))
- return err;
-
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
@@ -2008,7 +2056,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, NULL, 0, NULL, planes, img_bar);
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ vkfc->unlock_frame(hwfc, frame);
+
+ return err;
}
static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
@@ -2090,10 +2141,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
@@ -2117,6 +2168,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
return AVERROR_EXTERNAL;
}
+ f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@@ -2161,10 +2213,10 @@ static void try_export_flags(AVHWFramesContext *hwfc,
VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
.pNext = NULL,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
VkPhysicalDeviceExternalImageFormatInfo enext = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
@@ -2259,6 +2311,16 @@ fail:
return NULL;
}
+static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+ pthread_mutex_lock(&vkf->internal->update_mutex);
+}
+
+static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+ pthread_mutex_unlock(&vkf->internal->update_mutex);
+}
+
static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
{
VulkanFramesPriv *fp = hwfc->internal->priv;
@@ -2421,6 +2483,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return AVERROR(ENOMEM);
}
+ if (!hwctx->lock_frame)
+ hwctx->lock_frame = lock_frame;
+ if (!hwctx->unlock_frame)
+ hwctx->unlock_frame = unlock_frame;
+
return 0;
}
@@ -2727,10 +2794,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
/* Image format verification */
@@ -2809,6 +2876,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
* offer us anything we could import and sync with, so instead
* just signal the semaphore we created. */
+ f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@@ -3017,20 +3085,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
CU_AD_FORMAT_UNSIGNED_INT8;
dst_f = (AVVkFrame *)frame->data[0];
-
dst_int = dst_f->internal;
- if (!dst_int || !dst_int->cuda_fc_ref) {
- if (!dst_f->internal)
- dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
-
- if (!dst_int)
- return AVERROR(ENOMEM);
+ if (!dst_int->cuda_fc_ref) {
dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
- if (!dst_int->cuda_fc_ref) {
- av_freep(&dst_f->internal);
+ if (!dst_int->cuda_fc_ref)
return AVERROR(ENOMEM);
- }
for (int i = 0; i < planes; i++) {
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
@@ -3704,13 +3764,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
return err;
}
-static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
+static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
AVBufferRef **bufs, size_t *buf_offsets,
const int *buf_stride, int w,
int h, enum AVPixelFormat pix_fmt, int to_buf)
{
int err;
AVVkFrame *frame = (AVVkFrame *)f->data[0];
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
@@ -3745,11 +3806,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
.waitSemaphoreCount = planes,
};
- for (int i = 0; i < planes; i++)
- sem_signal_values[i] = frame->sem_value[i] + 1;
+ vkfc->lock_frame(hwfc, frame);
if ((err = wait_start_exec_ctx(hwfc, ectx)))
- return err;
+ goto end;
+
+ for (int i = 0; i < planes; i++)
+ sem_signal_values[i] = frame->sem_value[i] + 1;
/* Change the image layout to something more optimal for transfers */
for (int i = 0; i < planes; i++) {
@@ -3824,14 +3887,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
if (!f->buf[ref])
break;
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
- return err;
+ goto end;
}
if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
- return err;
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
+ goto end;
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
} else {
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
}
+
+end:
+ vkfc->unlock_frame(hwfc, frame);
+ return err;
}
static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
@@ -3960,8 +4027,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
}
/* Copy buffers into/from image */
- err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
- swf->width, swf->height, swf->format, from);
+ err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
+ tmp.linesize, swf->width, swf->height, swf->format,
+ from);
if (from) {
/* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
@@ -4142,7 +4210,19 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
AVVkFrame *av_vk_frame_alloc(void)
{
- return av_mallocz(sizeof(AVVkFrame));
+ AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
+ if (!f)
+ return NULL;
+
+ f->internal = av_mallocz(sizeof(*f->internal));
+ if (!f->internal) {
+ av_free(f);
+ return NULL;
+ }
+
+ pthread_mutex_init(&f->internal->update_mutex, NULL);
+
+ return f;
}
const HWContextType ff_hwcontext_type_vulkan = {
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 70c8379dc3..406d8709c3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -27,6 +27,8 @@
#include "pixfmt.h"
#include "frame.h"
+typedef struct AVVkFrame AVVkFrame;
+
/**
* @file
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext {
*/
int queue_family_decode_index;
int nb_decode_queues;
+
+ /**
+ * Locks a queue, preventing other threads from submitting any command
+ * buffers to this queue.
+ * If set to NULL, will be set to lavu-internal functions that utilize a
+ * mutex.
+ */
+ void (*lock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index);
+
+ /**
+ * Similar to lock_queue(), unlocks a queue. Must only be called after locking.
+ */
+ void (*unlock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index);
} AVVulkanDeviceContext;
/**
@@ -195,6 +210,23 @@ typedef struct AVVulkanFramesContext {
* av_hwframe_ctx_init().
*/
AVVkFrameFlags flags;
+
+ /**
+ * Locks a frame, preventing other threads from changing frame properties.
+ * If set to NULL, will be set to lavu-internal functions that utilize a
+ * mutex.
+ * Users SHOULD only ever lock just before command submission in order
+ * to get accurate frame properties, and unlock immediately after command
+ * submission without waiting for it to finish.
+ *
+ * If unset, will be set to lavu-internal functions that utilize a mutex.
+ */
+ void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
+
+ /**
+ * Similar to lock_frame(), unlocks a frame. Must only be called after locking.
+ */
+ void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
} AVVulkanFramesContext;
/*
@@ -210,7 +242,7 @@ typedef struct AVVulkanFramesContext {
* @note the size of this structure is not part of the ABI, to allocate
* you must use @av_vk_frame_alloc().
*/
-typedef struct AVVkFrame {
+struct AVVkFrame {
/**
* Vulkan images to which the memory is bound to.
*/
@@ -264,6 +296,12 @@ typedef struct AVVkFrame {
* Describes the binding offset of each plane to the VkDeviceMemory.
*/
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
+
+ /**
+ * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
+ * the image was allocated with the CONCURRENT concurrency option.
+ */
+ uint32_t queue_family[AV_NUM_DATA_POINTERS];
} AVVkFrame;
/**
--
2.39.2
[-- Attachment #25: 0024-hwcontext_vulkan-remove-contiguous-memory-hack.patch --]
[-- Type: text/x-diff, Size: 2600 bytes --]
From 197e5cfa63a2356a64ac6ae20024fa98fda26f43 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 20:32:49 +0100
Subject: [PATCH 24/72] hwcontext_vulkan: remove contiguous memory hack
---
libavutil/hwcontext_vulkan.c | 12 ------------
libavutil/hwcontext_vulkan.h | 4 +---
2 files changed, 1 insertion(+), 15 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 5a06a6872d..ab5b24f10c 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -112,9 +112,6 @@ typedef struct VulkanDevicePriv {
/* Nvidia */
int dev_is_nvidia;
-
- /* Intel */
- int dev_is_intel;
} VulkanDevicePriv;
typedef struct VulkanFramesPriv {
@@ -1501,7 +1498,6 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->hprops.minImportedHostPointerAlignment);
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
- p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
if (!qf_num) {
@@ -1620,8 +1616,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
return AVERROR_EXTERNAL;
}
- if (strstr(vendor, "Intel"))
- dev_select.vendor_id = 0x8086;
if (strstr(vendor, "AMD"))
dev_select.vendor_id = 0x1002;
@@ -2356,12 +2350,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
if (!hwctx->usage)
hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
- if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) {
- if (p->contiguous_planes == 1 ||
- ((p->contiguous_planes == -1) && p->dev_is_intel))
- hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY;
- }
-
modifier_info = vk_find_struct(hwctx->create_pnext,
VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 406d8709c3..e89fa52927 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags {
* device and tiling during av_hwframe_ctx_init(). */
AV_VK_FRAME_FLAG_NONE = (1ULL << 0),
- /* Image planes will be allocated in a single VkDeviceMemory, rather
- * than as per-plane VkDeviceMemory allocations. Required for exporting
- * to VAAPI on Intel devices. */
+ /* DEPRECATED: does nothing. */
AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),
} AVVkFrameFlags;
--
2.39.2
[-- Attachment #26: 0025-hwcontext_vulkan-rename-vk_pixfmt_map-to-vk_pixfmt_p.patch --]
[-- Type: text/x-diff, Size: 1383 bytes --]
From 28903a643a7db85e6eef289a853a03b33b67be41 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 20:35:51 +0100
Subject: [PATCH 25/72] hwcontext_vulkan: rename vk_pixfmt_map to
vk_pixfmt_planar_map
---
libavutil/hwcontext_vulkan.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index ab5b24f10c..de5575c031 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal {
static const struct {
enum AVPixelFormat pixfmt;
- const VkFormat vkfmts[4];
-} vk_pixfmt_map[] = {
+ const VkFormat vkfmts[5];
+} vk_pixfmt_planar_map[] = {
{ AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
{ AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
{ AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
@@ -244,9 +244,9 @@ static const struct {
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
{
- for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
- if (vk_pixfmt_map[i].pixfmt == p)
- return vk_pixfmt_map[i].vkfmts;
+ for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++)
+ if (vk_pixfmt_planar_map[i].pixfmt == p)
+ return vk_pixfmt_planar_map[i].vkfmts;
return NULL;
}
--
2.39.2
[-- Attachment #27: 0026-hwcontext_vulkan-fix-minor-type-issue-in-VulkanQueue.patch --]
[-- Type: text/x-diff, Size: 772 bytes --]
From a62f75557a8b2d64fe88670b823d1e8500504bd2 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:52:15 +0100
Subject: [PATCH 26/72] hwcontext_vulkan: fix minor type issue in
VulkanQueueCtx.buf_deps_alloc_size
---
libavutil/hwcontext_vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index de5575c031..8141e8c310 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -66,7 +66,7 @@ typedef struct VulkanQueueCtx {
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
- int buf_deps_alloc_size;
+ unsigned int buf_deps_alloc_size;
} VulkanQueueCtx;
typedef struct VulkanExecCtx {
--
2.39.2
[-- Attachment #28: 0027-hwcontext_vulkan-report-nonCoherentAtomSize.patch --]
[-- Type: text/x-diff, Size: 1140 bytes --]
From 0dec881653e9c9434a1b06ea212735a4c7b9caf8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 28 Dec 2022 05:55:17 +0100
Subject: [PATCH 27/72] hwcontext_vulkan: report nonCoherentAtomSize
---
libavutil/hwcontext_vulkan.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 8141e8c310..7e63c2350c 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1493,6 +1493,8 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n",
p->props.properties.limits.minMemoryMapAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n",
+ p->props.properties.limits.nonCoherentAtomSize);
if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
p->hprops.minImportedHostPointerAlignment);
--
2.39.2
[-- Attachment #29: 0028-hwcontext_vulkan-add-support-for-descriptor-buffers.patch --]
[-- Type: text/x-diff, Size: 6084 bytes --]
From a028bdcd05284bfb306558212646a309e2da4c24 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:15:02 +0100
Subject: [PATCH 28/72] hwcontext_vulkan: add support for descriptor buffers
---
libavutil/hwcontext_vulkan.c | 13 ++++++++++++-
libavutil/vulkan_functions.h | 9 +++++++++
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 7e63c2350c..60ff11ad3d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -91,6 +91,7 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceVulkan11Features device_features_1_1;
VkPhysicalDeviceVulkan12Features device_features_1_2;
VkPhysicalDeviceVulkan13Features device_features_1_3;
+ VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features;
/* Queues */
pthread_mutex_t **qf_mutex;
@@ -350,6 +351,7 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 },
+ { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1333,9 +1335,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
+ VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT,
+ .pNext = &timeline_features,
+ };
VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
- .pNext = &timeline_features,
+ .pNext = &desc_buf_features,
};
VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
@@ -1361,6 +1367,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
p->device_features_1_2.pNext = &p->device_features_1_3;
p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
+ p->device_features_1_3.pNext = &p->desc_buf_features;
+ p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT;
ctx->free = vulkan_device_free;
/* Create an instance if not given one */
@@ -1390,8 +1398,11 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
goto end;
}
p->device_features_1_2.timelineSemaphore = 1;
+ p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress;
p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+ p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer;
+ p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors;
dev_info.pNext = &hwctx->device_features;
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 103bff3013..f8739da8e5 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -38,6 +38,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */
#endif
FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
+ FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -121,6 +122,7 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \
\
/* Image */ \
@@ -142,6 +144,13 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorPool) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorSetLayout) \
\
+ /* Descriptor buffers */ \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutSizeEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutBindingOffsetEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdBindDescriptorBuffersEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdSetDescriptorBufferOffsetsEXT) \
+ \
/* DescriptorUpdateTemplate */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \
--
2.39.2
[-- Attachment #30: 0029-hwcontext_vulkan-add-functions-for-video-decoding.patch --]
[-- Type: text/x-diff, Size: 6637 bytes --]
From cc5ef22f90cc48ee604f6a27d28bb05237b9f2b7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:34:36 +0100
Subject: [PATCH 29/72] hwcontext_vulkan: add functions for video decoding
---
libavutil/hwcontext_vulkan.c | 6 ++++++
libavutil/vulkan.c | 8 +++++---
libavutil/vulkan_functions.h | 20 ++++++++++++++++++++
libavutil/vulkan_loader.h | 4 ++++
4 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 60ff11ad3d..c0e35d8d78 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -363,6 +363,12 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
#endif
+
+ /* Video encoding/decoding */
+ { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
+ { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
+ { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
+ { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
};
/* Converts return values to strings */
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 403f0b1f27..6bf2c214b7 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -85,9 +85,11 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
CASE(VK_ERROR_VALIDATION_FAILED_EXT);
CASE(VK_ERROR_INVALID_SHADER_NV);
- CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
- CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
- CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
default: return "Unknown error";
}
#undef CASE
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index f8739da8e5..65ab560d21 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -39,6 +39,10 @@ typedef enum FFVulkanExtensions {
#endif
FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */
+ FF_VK_EXT_VIDEO_QUEUE = 1ULL << 10, /* VK_KHR_video_queue */
+ FF_VK_EXT_VIDEO_DECODE_QUEUE = 1ULL << 11, /* VK_KHR_video_decode_queue */
+ FF_VK_EXT_VIDEO_DECODE_H264 = 1ULL << 12, /* VK_EXT_video_decode_h264 */
+ FF_VK_EXT_VIDEO_DECODE_H265 = 1ULL << 13, /* VK_EXT_video_decode_h265 */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -60,6 +64,8 @@ typedef enum FFVulkanExtensions {
MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDevice) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFeatures2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties) \
+ MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoCapabilitiesKHR) \
+ MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoFormatPropertiesKHR) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, DeviceWaitIdle) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDevice) \
\
@@ -159,6 +165,20 @@ typedef enum FFVulkanExtensions {
/* sync2 */ \
MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \
\
+ /* Video queue */ \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionParametersKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, GetVideoSessionMemoryRequirementsKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, BindVideoSessionMemoryKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdBeginVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdControlVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdEndVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionParametersKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionKHR) \
+ \
+ /* Video decoding */ \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \
+ \
/* Pipeline */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 3f1ee6aa46..5385e398bf 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -48,6 +48,10 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
#endif
+ { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
+ { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
+ { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
+ { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
};
FFVulkanExtensions mask = 0x0;
--
2.39.2
[-- Attachment #31: 0030-hwcontext_vulkan-support-PREP_MODE_DECODING-in-prepa.patch --]
[-- Type: text/x-diff, Size: 5554 bytes --]
From 506c7daa8423efd56296868cce017642235b6186 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:18:21 +0100
Subject: [PATCH 30/72] hwcontext_vulkan: support PREP_MODE_DECODING in
prepare_frame()
---
libavutil/hwcontext_vulkan.c | 70 ++++++++++++++++++++++++++----------
1 file changed, 51 insertions(+), 19 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c0e35d8d78..e7c14fad74 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1969,7 +1969,9 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
enum PrepMode {
PREP_MODE_WRITE,
PREP_MODE_EXTERNAL_EXPORT,
- PREP_MODE_EXTERNAL_IMPORT
+ PREP_MODE_EXTERNAL_IMPORT,
+ PREP_MODE_DECODING_DST,
+ PREP_MODE_DECODING_DPB,
};
static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
@@ -1978,7 +1980,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
int err;
uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
- VkAccessFlags new_access;
+ VkAccessFlags2 new_access;
AVVulkanFramesContext *vkfc = hwfc->hwctx;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
@@ -1986,7 +1988,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
- VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+ VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+ VkDependencyInfo dep_info;
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -2042,32 +2045,55 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
s_info.pWaitDstStageMask = wait_st;
s_info.waitSemaphoreCount = planes;
break;
+ case PREP_MODE_DECODING_DST:
+ new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
+ new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
+ dst_qf = VK_QUEUE_FAMILY_IGNORED;
+ break;
+ case PREP_MODE_DECODING_DPB:
+ new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
+ new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
+ dst_qf = VK_QUEUE_FAMILY_IGNORED;
+ break;
}
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
for (int i = 0; i < planes; i++) {
- img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
- img_bar[i].srcAccessMask = 0x0;
- img_bar[i].dstAccessMask = new_access;
- img_bar[i].oldLayout = frame->layout[i];
- img_bar[i].newLayout = new_layout;
- img_bar[i].srcQueueFamilyIndex = src_qf;
- img_bar[i].dstQueueFamilyIndex = dst_qf;
- img_bar[i].image = frame->img[i];
- img_bar[i].subresourceRange.levelCount = 1;
- img_bar[i].subresourceRange.layerCount = 1;
- img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ img_bar[i] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
+ .srcAccessMask = 0x0,
+ .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+ .dstAccessMask = new_access,
+ .oldLayout = frame->layout[i],
+ .newLayout = new_layout,
+ .srcQueueFamilyIndex = src_qf,
+ .dstQueueFamilyIndex = dst_qf,
+ .image = frame->img[i],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
frame->layout[i] = img_bar[i].newLayout;
frame->access[i] = img_bar[i].dstAccessMask;
}
- vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
- VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, 0, NULL, 0, NULL, planes, img_bar);
+ dep_info = (VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = planes,
+ };
+
+ vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
vkfc->unlock_frame(hwfc, frame);
@@ -2308,7 +2334,13 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
if (err)
goto fail;
- err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
+ if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
+ !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DPB);
+ else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DST);
+ else
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
if (err)
goto fail;
--
2.39.2
[-- Attachment #32: 0031-vulkan-lock-queues-before-submitting-operations.patch --]
[-- Type: text/x-diff, Size: 1087 bytes --]
From 6da405c60b7b04895a4395f5e226e8cc60e6552e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:04:28 +0100
Subject: [PATCH 31/72] vulkan: lock queues before submitting operations
---
libavutil/vulkan.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 6bf2c214b7..ad13b8f3cb 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
+ s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
+ e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
+
ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
+
+ s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
+ e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
+
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
ff_vk_ret2str(ret));
--
2.39.2
[-- Attachment #33: 0032-vulkan-define-VK_NO_PROTOTYPES.patch --]
[-- Type: text/x-diff, Size: 573 bytes --]
From 69c6d3dff6040feb9192be9364b064cce340ef3a Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:04:48 +0100
Subject: [PATCH 32/72] vulkan: define VK_NO_PROTOTYPES
---
libavutil/vulkan.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index d1ea1e24fb..7927b04454 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -19,6 +19,8 @@
#ifndef AVUTIL_VULKAN_H
#define AVUTIL_VULKAN_H
+#define VK_NO_PROTOTYPES
+
#include "pixdesc.h"
#include "bprint.h"
#include "hwcontext.h"
--
2.39.2
[-- Attachment #34: 0033-vulkan-add-additional-error-codes.patch --]
[-- Type: text/x-diff, Size: 1553 bytes --]
From 3049e9213948926ec2a3f42808f065c336eb0126 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:54:35 +0100
Subject: [PATCH 33/72] vulkan: add additional error codes
---
libavutil/vulkan.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ad13b8f3cb..f2846e628a 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -78,6 +78,12 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_TOO_MANY_OBJECTS);
CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_UNKNOWN);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_FRAGMENTATION);
+ CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
+ CASE(VK_PIPELINE_COMPILE_REQUIRED);
CASE(VK_ERROR_SURFACE_LOST_KHR);
CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
CASE(VK_SUBOPTIMAL_KHR);
@@ -90,6 +96,13 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
+ CASE(VK_ERROR_NOT_PERMITTED_KHR);
+ CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
+ CASE(VK_THREAD_IDLE_KHR);
+ CASE(VK_THREAD_DONE_KHR);
+ CASE(VK_OPERATION_DEFERRED_KHR);
+ CASE(VK_OPERATION_NOT_DEFERRED_KHR);
default: return "Unknown error";
}
#undef CASE
--
2.39.2
[-- Attachment #35: 0034-vulkan-fix-comment-statement-about-exec_queue-blocki.patch --]
[-- Type: text/x-diff, Size: 919 bytes --]
From 630be2276afccbac78976d7c8a0f3662b72de248 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 21:41:59 +0100
Subject: [PATCH 34/72] vulkan: fix comment statement about exec_queue blocking
---
libavutil/vulkan.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 7927b04454..a8aa9d8a8b 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -386,9 +386,7 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
VkPipelineStageFlagBits in_wait_dst_flag);
/**
- * Submits a command buffer to the queue for execution.
- * Will block until execution has finished in order to simplify resource
- * management.
+ * Submits a command buffer to the queue for execution. Will not block.
*/
int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
--
2.39.2
[-- Attachment #36: 0035-vulkan-add-pNext-argument-to-ff_vk_create_buf.patch --]
[-- Type: text/x-diff, Size: 3809 bytes --]
From d9c9bfa670126ea72a95a1808beb6bd0883cbb98 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 17 Mar 2022 12:23:56 +0100
Subject: [PATCH 35/72] vulkan: add pNext argument to ff_vk_create_buf()
---
libavfilter/vf_gblur_vulkan.c | 2 +-
libavfilter/vf_overlay_vulkan.c | 2 +-
libavfilter/vf_scale_vulkan.c | 2 +-
libavutil/vulkan.c | 4 ++--
libavutil/vulkan.h | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index d61f3c778c..c6360799a7 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -174,7 +174,7 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
- RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize,
+ RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index e87ee83000..bdf231f4ef 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -181,7 +181,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
} *par;
err = ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par),
+ sizeof(*par), NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err)
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index c140420896..31dc35569b 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -253,7 +253,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}
RET(ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par),
+ sizeof(*par), NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index f2846e628a..ae6adc5104 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -205,7 +205,7 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return 0;
}
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
int err;
@@ -215,7 +215,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
VkBufferCreateInfo buf_spawn = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = NULL,
+ .pNext = pNext,
.usage = usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.size = size, /* Gets FFALIGNED during alloc if host visible
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index a8aa9d8a8b..2311928a8c 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -393,7 +393,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
/**
* Create a VkBuffer with the specified parameters.
*/
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
--
2.39.2
[-- Attachment #37: 0036-vulkan-add-ff_vk_qf_fill.patch --]
[-- Type: text/x-diff, Size: 2777 bytes --]
From da581e95cea93e9b628263aa28de945828f71967 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:03:58 +0100
Subject: [PATCH 36/72] vulkan: add ff_vk_qf_fill()
---
libavutil/vulkan.c | 25 +++++++++++++++++++++++++
libavutil/vulkan.h | 9 +++++++++
2 files changed, 34 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ae6adc5104..eceef295a8 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,6 +108,31 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
+void ff_vk_qf_fill(FFVulkanContext *s)
+{
+ s->nb_qfs = 0;
+
+ /* Simply fills in all unique queues into s->qfs */
+ if (s->hwctx->queue_family_index >= 0)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
+ if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
+ if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
+ s->qfs[1] != s->hwctx->queue_family_comp_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
+ if (s->hwctx->queue_family_decode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_decode_index &&
+ s->qfs[1] != s->hwctx->queue_family_decode_index &&
+ s->qfs[2] != s->hwctx->queue_family_decode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
+ if (s->hwctx->queue_family_encode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_encode_index &&
+ s->qfs[1] != s->hwctx->queue_family_encode_index &&
+ s->qfs[2] != s->hwctx->queue_family_encode_index &&
+ s->qfs[3] != s->hwctx->queue_family_encode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+}
+
void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family, int nb_queues)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 2311928a8c..7254c21cf7 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -203,6 +203,9 @@ typedef struct FFVulkanContext {
AVHWFramesContext *frames;
AVVulkanFramesContext *hwfc;
+ uint32_t qfs[5];
+ int nb_qfs;
+
FFVkSPIRVCompiler *spirv_compiler;
/* Properties */
@@ -245,6 +248,12 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
*/
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
+/**
+ * Setup the queue families from the hardware device context.
+ * Necessary for image creation to work.
+ */
+void ff_vk_qf_fill(FFVulkanContext *s);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
--
2.39.2
[-- Attachment #38: 0037-vulkan-add-ff_vk_image_create.patch --]
[-- Type: text/x-diff, Size: 4892 bytes --]
From 9da56b3fc3169588f97f590abeecb7ead3c18202 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:05:59 +0100
Subject: [PATCH 37/72] vulkan: add ff_vk_image_create()
---
libavutil/vulkan.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++
libavutil/vulkan.h | 11 ++++++
2 files changed, 100 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index eceef295a8..212f134466 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -401,6 +401,95 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
+int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
+ int width, int height, VkFormat fmt, VkImageTiling tiling,
+ VkImageUsageFlagBits usage, VkImageCreateFlags flags,
+ void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
+{
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ AVVulkanDeviceContext *hwctx = s->hwctx;
+
+ VkExportSemaphoreCreateInfo ext_sem_info = {
+ .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
+#ifdef _WIN32
+ .handleTypes = IsWindows8OrGreater()
+ ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
+ : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
+#else
+ .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+#endif
+ };
+
+ VkSemaphoreTypeCreateInfo sem_type_info = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+#ifdef _WIN32
+ .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
+#else
+ .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
+#endif
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+ .initialValue = 0,
+ };
+
+ VkSemaphoreCreateInfo sem_spawn = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = &sem_type_info,
+ };
+
+ /* Create the image */
+ VkImageCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = create_pnext,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = fmt,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .flags = flags,
+ .tiling = tiling,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = usage,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .pQueueFamilyIndices = s->qfs,
+ .queueFamilyIndexCount = s->nb_qfs,
+ .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
+ };
+
+ ret = vk->CreateImage(hwctx->act_dev, &create_info,
+ hwctx->alloc, &f->img[0]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ /* Create semaphore */
+ ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
+ hwctx->alloc, &f->sem[0]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
+ f->layout[0] = create_info.initialLayout;
+ f->access[0] = 0x0;
+ f->sem_value[0] = 0;
+
+ f->flags = 0x0;
+ f->tiling = tiling;
+
+ return 0;
+
+fail:
+ return err;
+}
+
int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
VkShaderStageFlagBits stage)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 7254c21cf7..69c099fa8f 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -423,6 +423,17 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
*/
void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
+/**
+ * Creates an image, allocates and binds memory in the given
+ * idx value of the dst frame. If mem is non-NULL, then no memory will be
+ * allocated, but instead the given memory will be bound to the image.
+ */
+int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx,
+ int width, int height, VkFormat fmt, VkImageTiling tiling,
+ VkImageUsageFlagBits usage, VkImageCreateFlags flags,
+ void *create_pnext,
+ VkDeviceMemory *mem, void *alloc_pnext);
+
/**
* Frees the main Vulkan context.
*/
--
2.39.2
[-- Attachment #39: 0038-vulkan-expose-ff_vk_alloc_mem.patch --]
[-- Type: text/x-diff, Size: 2666 bytes --]
From 661af851afe7dcb3c2982fab953aff2941b4e5b9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:03:34 +0100
Subject: [PATCH 38/72] vulkan: expose ff_vk_alloc_mem()
---
libavutil/vulkan.c | 15 ++++++++-------
libavutil/vulkan.h | 7 +++++++
2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 212f134466..7870de351d 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -174,9 +174,9 @@ void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
}
-static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
- VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
int index = -1;
@@ -225,7 +225,8 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return AVERROR(ENOMEM);
}
- *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
+ if (mem_flags)
+ *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
return 0;
}
@@ -279,9 +280,9 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
if (use_ded_mem)
ded_alloc.buffer = buf->buf;
- err = vk_alloc_mem(s, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &buf->flags, &buf->mem);
+ err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &buf->flags, &buf->mem);
if (err)
return err;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 69c099fa8f..afc8bce999 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -254,6 +254,13 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
*/
void ff_vk_qf_fill(FFVulkanContext *s);
+/**
+ * Allocate device memory.
+ */
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
--
2.39.2
[-- Attachment #40: 0039-vulkan-support-ignoring-memory-properties-when-alloc.patch --]
[-- Type: text/x-diff, Size: 1648 bytes --]
From e2a8084132631c8fad25aa5a2850deb904e42847 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 29 Nov 2022 00:43:19 +0000
Subject: [PATCH 39/72] vulkan: support ignoring memory properties when
allocating
---
libavutil/vulkan.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7870de351d..b1553c6537 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -188,7 +188,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
};
/* Align if we need to */
- if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
@@ -201,7 +201,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
continue;
/* The memory type flags must include our properties */
- if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ if ((req_flags != UINT32_MAX) &&
+ ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
continue;
/* Found a suitable memory type */
@@ -210,7 +211,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
}
if (index < 0) {
- av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
req_flags);
return AVERROR(EINVAL);
}
--
2.39.2
[-- Attachment #41: 0040-vulkan-allow-alloc-pNext-in-ff_vk_create_buf.patch --]
[-- Type: text/x-diff, Size: 1878 bytes --]
From 6ac7455f51f0ea1d68b4be2c8cf3ef6f5ca9abde Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 17:43:27 +0100
Subject: [PATCH 40/72] vulkan: allow alloc pNext in ff_vk_create_buf
---
libavutil/vulkan.c | 5 +++--
libavutil/vulkan.h | 3 ++-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index b1553c6537..0bb5b1eebf 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return 0;
}
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
int err;
@@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
};
VkMemoryDedicatedAllocateInfo ded_alloc = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = NULL,
+ .pNext = alloc_pNext,
};
VkMemoryDedicatedRequirements ded_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index afc8bce999..65f24ca138 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -409,7 +409,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
/**
* Create a VkBuffer with the specified parameters.
*/
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
--
2.39.2
[-- Attachment #42: 0041-vulkan-do-not-wait-for-device-idle-when-destroying-b.patch --]
[-- Type: text/x-diff, Size: 786 bytes --]
From 8ce981bb551f37d27f9a11a36c4af7eb007011cb Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 16 Dec 2022 00:37:53 +0100
Subject: [PATCH 41/72] vulkan: do not wait for device idle when destroying
buffers
This should be done explicitly.
---
libavutil/vulkan.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 0bb5b1eebf..0250f5aa39 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -396,8 +396,6 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
if (!buf || !s->hwctx)
return;
- vk->DeviceWaitIdle(s->hwctx->act_dev);
-
if (buf->buf != VK_NULL_HANDLE)
vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
if (buf->mem != VK_NULL_HANDLE)
--
2.39.2
[-- Attachment #43: 0042-vulkan-add-size-tracking-to-buffer-structs.patch --]
[-- Type: text/x-diff, Size: 964 bytes --]
From 0f532a85d9d3fd09d8f35f61911edc8827ed26c0 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 16 Dec 2022 01:47:42 +0100
Subject: [PATCH 42/72] vulkan: add size tracking to buffer structs
---
libavutil/vulkan.c | 2 ++
libavutil/vulkan.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 0250f5aa39..faf5cd5508 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -295,6 +295,8 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
return AVERROR_EXTERNAL;
}
+ buf->size = size;
+
return 0;
}
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 65f24ca138..c993263324 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -94,6 +94,7 @@ typedef struct FFVkBuffer {
VkBuffer buf;
VkDeviceMemory mem;
VkMemoryPropertyFlagBits flags;
+ size_t size;
} FFVkBuffer;
typedef struct FFVkQueueFamilyCtx {
--
2.39.2
[-- Attachment #44: 0043-vulkan-use-device-properties-2-and-add-a-convenience.patch --]
[-- Type: text/x-diff, Size: 2388 bytes --]
From 054c1925dd67a5918fd42b894bb4ca966e60aec8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 19 Dec 2022 07:57:22 +0100
Subject: [PATCH 43/72] vulkan: use device properties 2 and add a convenience
loader function
---
libavutil/vulkan.c | 18 +++++++++++++++++-
libavutil/vulkan.h | 8 +++++++-
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index faf5cd5508..8a583248d1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,6 +108,22 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
+void ff_vk_load_props(FFVulkanContext *s)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ s->driver_props = (VkPhysicalDeviceDriverProperties) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+ };
+ s->props = (VkPhysicalDeviceProperties2) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
+ .pNext = &s->driver_props,
+ };
+
+ vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
+ vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+}
+
void ff_vk_qf_fill(FFVulkanContext *s)
{
s->nb_qfs = 0;
@@ -189,7 +205,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
/* Align if we need to */
if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
- req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
+ req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index c993263324..0f6efd023e 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -193,7 +193,8 @@ typedef struct FFVulkanContext {
FFVulkanFunctions vkfn;
FFVulkanExtensions extensions;
- VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceProperties2 props;
+ VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
AVBufferRef *device_ref;
@@ -239,6 +240,11 @@ extern const VkComponentMapping ff_comp_identity_map;
*/
const char *ff_vk_ret2str(VkResult res);
+/**
+ * Loads props/mprops/driver_props
+ */
+void ff_vk_load_props(FFVulkanContext *s);
+
/**
* Returns 1 if the image is any sort of supported RGB
*/
--
2.39.2
[-- Attachment #45: 0044-vulkan-minor-indent-fix-add-support-for-synchronous-.patch --]
[-- Type: text/x-diff, Size: 2945 bytes --]
From 834645640497d6e371fa50c40ee9ef9700494851 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 05:02:50 +0100
Subject: [PATCH 44/72] vulkan: minor indent fix, add support for synchronous
submission/waiting
---
libavutil/vulkan.c | 20 ++++++++++++++++++--
libavutil/vulkan.h | 9 +++++++++
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 8a583248d1..b5e08ecc46 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -564,7 +564,7 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
/* Create command pool */
ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
- s->hwctx->alloc, &e->pool);
+ s->hwctx->alloc, &e->pool);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
ff_vk_ret2str(ret));
@@ -631,11 +631,13 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
- } else {
+ } else if (!q->synchronous) {
vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
}
+ q->synchronous = 0;
+
/* Discard queue dependencies */
ff_vk_discard_exec_deps(e);
@@ -788,9 +790,23 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
for (int i = 0; i < e->sem_sig_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
+ q->submitted = 1;
+
return 0;
}
+void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ if (!q->submitted)
+ return;
+
+ vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+ vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+ q->synchronous = 1;
+}
+
int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 0f6efd023e..9ee9469305 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -143,6 +143,9 @@ typedef struct FFVkQueueCtx {
VkFence fence;
VkQueue queue;
+ int synchronous;
+ int submitted;
+
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
@@ -413,6 +416,12 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
*/
int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
+/**
+ * Wait on a command buffer's execution. Mainly useful for debugging and
+ * development.
+ */
+void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e);
+
/**
* Create a VkBuffer with the specified parameters.
*/
--
2.39.2
[-- Attachment #46: 0045-vulkan-add-support-for-queries.patch --]
[-- Type: text/x-diff, Size: 7363 bytes --]
From f97d922f523914c0d1e9748876aa3002e0f5811c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 05:03:32 +0100
Subject: [PATCH 45/72] vulkan: add support for queries
---
libavutil/vulkan.c | 118 +++++++++++++++++++++++++++++++++++++++++++++
libavutil/vulkan.h | 30 ++++++++++++
2 files changed, 148 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index b5e08ecc46..de0c300c0e 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -592,6 +592,114 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
return 0;
}
+int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
+ int nb_queries, VkQueryType type,
+ int elem_64bits, void *create_pnext)
+{
+ VkResult ret;
+ size_t qd_size;
+ int nb_results = nb_queries;
+ int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
+ int status_stride = 2;
+ int result_elem_size = elem_64bits ? 8 : 4;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkQueryPoolCreateInfo query_pool_info = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .pNext = create_pnext,
+ .queryType = type,
+ .queryCount = nb_queries*e->qf->nb_queues,
+ };
+
+ if (e->query.pool)
+ return AVERROR(EINVAL);
+
+ /* Video encode quieries produce two results per query */
+ if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
+ status_stride = 3; /* skip,skip,result,skip,skip,result */
+ nb_results *= 2;
+ } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
+ status_stride = 1;
+ nb_results *= 0;
+ }
+
+ qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
+
+ e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
+ if (!e->query.data)
+ return AVERROR(ENOMEM);
+
+ ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
+ s->hwctx->alloc, &e->query.pool);
+ if (ret != VK_SUCCESS)
+ return AVERROR_EXTERNAL;
+
+ e->query.data_per_queue = qd_size;
+ e->query.nb_queries = nb_queries;
+ e->query.nb_results = nb_results;
+ e->query.nb_statuses = nb_statuses;
+ e->query.elem_64bits = elem_64bits;
+ e->query.status_stride = status_stride;
+
+ return 0;
+}
+
+int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
+ int query_idx, void **data, int64_t *status)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ uint8_t *qd;
+ int32_t *res32;
+ int64_t *res64;
+ int64_t res = 0;
+ VkQueryResultFlags qf = 0;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+
+ if (!q->submitted) {
+ *data = NULL;
+ return 0;
+ }
+
+ qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
+ qf |= e->query.nb_results && e->query.nb_statuses ?
+ VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
+ qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
+ res32 = (int32_t *)(qd + e->query.nb_results*4);
+ res64 = (int64_t *)(qd + e->query.nb_results*8);
+
+ ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
+ query_idx,
+ e->query.nb_queries,
+ e->query.data_per_queue, qd,
+ e->query.elem_64bits ? 8 : 4, qf);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (e->query.nb_statuses && e->query.elem_64bits) {
+ for (int i = 0; i < e->query.nb_queries; i++) {
+ res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
+ res64[i] : res;
+ res64 += e->query.status_stride;
+ }
+ } else if (e->query.nb_statuses) {
+ for (int i = 0; i < e->query.nb_queries; i++) {
+ res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
+ res32[i] : res;
+ res32 += e->query.status_stride;
+ }
+ }
+
+ if (data)
+ *data = qd;
+ if (status)
+ *status = res;
+
+ return 0;
+}
+
void ff_vk_discard_exec_deps(FFVkExecContext *e)
{
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
@@ -648,6 +756,12 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
+ if (e->query.pool) {
+ e->query.idx = e->qf->cur_queue*e->query.nb_queries;
+ vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
+ e->query.idx, e->query.nb_queries);
+ }
+
return 0;
}
@@ -790,6 +904,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
for (int i = 0; i < e->sem_sig_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
+ e->query.idx = e->qf->cur_queue*e->query.nb_queries;
q->submitted = 1;
return 0;
@@ -1483,7 +1598,10 @@ static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
if (e->pool)
vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+ if (e->query.pool)
+ vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
+ av_freep(&e->query.data);
av_freep(&e->bufs);
av_freep(&e->queues);
av_freep(&e->sem_sig);
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 9ee9469305..e222f67b5a 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -164,6 +164,19 @@ typedef struct FFVkExecContext {
VkCommandBuffer *bufs;
FFVkQueueCtx *queues;
+ struct {
+ int idx;
+ VkQueryPool pool;
+ uint8_t *data;
+
+ int nb_queries;
+ int nb_results;
+ int nb_statuses;
+ int elem_64bits;
+ size_t data_per_queue;
+ int status_stride;
+ } query;
+
AVBufferRef ***deps;
int *nb_deps;
int *dep_alloc_size;
@@ -367,6 +380,23 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
FFVkQueueFamilyCtx *qf);
+/**
+ * Create a query pool for a command context.
+ * elem_64bits exists to troll driver devs for compliance. All results
+ * and statuses returned should be 32 bits, unless this is set, then it's 64bits.
+ */
+int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
+ int nb_queries, VkQueryType type,
+ int elem_64bits, void *create_pnext);
+
+/**
+ * Get results for query.
+ * Returns the status of the query.
+ * Sets *res to the status of the queries.
+ */
+int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
+ int query_idx, void **data, int64_t *status);
+
/**
* Begin recording to the command buffer. Previous execution must have been
* completed, which ff_vk_submit_exec_queue() will ensure.
--
2.39.2
[-- Attachment #47: 0046-vulkan-add-support-for-retrieving-queue-query-and-vi.patch --]
[-- Type: text/x-diff, Size: 7602 bytes --]
From 5422a554ad592c3b4a68c34490db201577f295ee Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 17:37:51 +0100
Subject: [PATCH 46/72] vulkan: add support for retrieving queue, query and
video properties
---
libavutil/vulkan.c | 87 ++++++++++++++++++++++++++++++------
libavutil/vulkan.h | 14 ++++--
libavutil/vulkan_functions.h | 1 +
3 files changed, 85 insertions(+), 17 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index de0c300c0e..d045ff83c1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
-void ff_vk_load_props(FFVulkanContext *s)
+int ff_vk_load_props(FFVulkanContext *s)
{
+ uint32_t qc = 0;
FFVulkanFunctions *vk = &s->vkfn;
s->driver_props = (VkPhysicalDeviceDriverProperties) {
@@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s)
.pNext = &s->driver_props,
};
+
vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+ vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
+
+ if (s->qf_props)
+ return 0;
+
+ s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc);
+ if (!s->qf_props)
+ return AVERROR(ENOMEM);
+
+ s->query_props = av_mallocz(sizeof(*s->query_props)*qc);
+ if (!s->qf_props) {
+ av_freep(&s->qf_props);
+ return AVERROR(ENOMEM);
+ }
+
+ s->video_props = av_mallocz(sizeof(*s->video_props)*qc);
+ if (!s->video_props) {
+ av_freep(&s->qf_props);
+ av_freep(&s->query_props);
+ return AVERROR(ENOMEM);
+ }
+
+ for (uint32_t i = 0; i < qc; i++) {
+ s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR,
+ };
+ s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+ .pNext = &s->query_props[i],
+ };
+ s->qf_props[i] = (VkQueueFamilyProperties2) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+ .pNext = &s->video_props[i],
+ };
+ }
+
+ vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
+
+ return 0;
}
void ff_vk_qf_fill(FFVulkanContext *s)
@@ -149,40 +190,54 @@ void ff_vk_qf_fill(FFVulkanContext *s)
s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
}
-void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues)
+int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
{
+ int ret, num;
+
switch (dev_family) {
case VK_QUEUE_GRAPHICS_BIT:
- qf->queue_family = s->hwctx->queue_family_index;
- qf->actual_queues = s->hwctx->nb_graphics_queues;
+ ret = s->hwctx->queue_family_index;
+ num = s->hwctx->nb_graphics_queues;
break;
case VK_QUEUE_COMPUTE_BIT:
- qf->queue_family = s->hwctx->queue_family_comp_index;
- qf->actual_queues = s->hwctx->nb_comp_queues;
+ ret = s->hwctx->queue_family_comp_index;
+ num = s->hwctx->nb_comp_queues;
break;
case VK_QUEUE_TRANSFER_BIT:
- qf->queue_family = s->hwctx->queue_family_tx_index;
- qf->actual_queues = s->hwctx->nb_tx_queues;
+ ret = s->hwctx->queue_family_tx_index;
+ num = s->hwctx->nb_tx_queues;
break;
case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
- qf->queue_family = s->hwctx->queue_family_encode_index;
- qf->actual_queues = s->hwctx->nb_encode_queues;
+ ret = s->hwctx->queue_family_encode_index;
+ num = s->hwctx->nb_encode_queues;
break;
case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
- qf->queue_family = s->hwctx->queue_family_decode_index;
- qf->actual_queues = s->hwctx->nb_decode_queues;
+ ret = s->hwctx->queue_family_decode_index;
+ num = s->hwctx->nb_decode_queues;
break;
default:
av_assert0(0); /* Should never happen */
}
+ if (nb)
+ *nb = num;
+
+ return ret;
+}
+
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family, int nb_queues)
+{
+ int ret;
+
+ ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
+
if (!nb_queues)
qf->nb_queues = qf->actual_queues;
else
qf->nb_queues = nb_queues;
- return;
+ return ret;
}
void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
@@ -1669,6 +1724,10 @@ void ff_vk_uninit(FFVulkanContext *s)
{
FFVulkanFunctions *vk = &s->vkfn;
+ av_freep(&s->query_props);
+ av_freep(&s->qf_props);
+ av_freep(&s->video_props);
+
if (s->spirv_compiler)
s->spirv_compiler->uninit(&s->spirv_compiler);
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index e222f67b5a..ccfa88f44f 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -212,6 +212,9 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceProperties2 props;
VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
+ VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
+ VkQueueFamilyVideoPropertiesKHR *video_props;
+ VkQueueFamilyProperties2 *qf_props;
AVBufferRef *device_ref;
AVHWDeviceContext *device;
@@ -259,7 +262,7 @@ const char *ff_vk_ret2str(VkResult res);
/**
* Loads props/mprops/driver_props
*/
-void ff_vk_load_props(FFVulkanContext *s);
+int ff_vk_load_props(FFVulkanContext *s);
/**
* Returns 1 if the image is any sort of supported RGB
@@ -284,12 +287,17 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+/**
+ * Get a queue family index and the number of queues. nb is optional.
+ */
+int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
*/
-void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues);
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family, int nb_queues);
/**
* Rotate through the queues in a queue family.
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 65ab560d21..fa1650e895 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -77,6 +77,7 @@ typedef enum FFVulkanExtensions {
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \
+ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties2) \
\
/* Command pool */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateCommandPool) \
--
2.39.2
[-- Attachment #48: 0047-vulkan-return-current-queue-index-from-ff_vk_qf_rota.patch --]
[-- Type: text/x-diff, Size: 1290 bytes --]
From 4632426c65f136ef70c4ab854a1076e1d1c868ff Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 28 Dec 2022 05:55:53 +0100
Subject: [PATCH 47/72] vulkan: return current queue index from
ff_vk_qf_rotate()
---
libavutil/vulkan.c | 3 ++-
libavutil/vulkan.h | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index d045ff83c1..cb8e08e02f 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -240,9 +240,10 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
return ret;
}
-void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
+int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
{
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
+ return qf->cur_queue;
}
int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index ccfa88f44f..dd1bc9c440 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -302,7 +302,7 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
/**
* Rotate through the queues in a queue family.
*/
-void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
+int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
/**
* Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
--
2.39.2
[-- Attachment #49: 0048-vulkan-rewrite-dependency-handling-code.patch --]
[-- Type: text/x-diff, Size: 82373 bytes --]
From c1e607011ac764b46875add61c533ab2e49ab00e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 29 Dec 2022 21:16:21 +0100
Subject: [PATCH 48/72] vulkan: rewrite dependency handling code
---
libavutil/vulkan.c | 1350 ++++++++++++++++++++------------------------
libavutil/vulkan.h | 382 +++++--------
2 files changed, 749 insertions(+), 983 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index cb8e08e02f..17a5bd6f3f 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -27,27 +27,6 @@
#include "vulkan_shaderc.c"
#endif
-/* Generic macro for creating contexts which need to keep their addresses
- * if another context is created. */
-#define FN_CREATING(ctx, type, shortname, array, num) \
-static av_always_inline type *create_ ##shortname(ctx *dctx) \
-{ \
- type **array, *sctx = av_mallocz(sizeof(*sctx)); \
- if (!sctx) \
- return NULL; \
- \
- array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
- if (!array) { \
- av_free(sctx); \
- return NULL; \
- } \
- \
- dctx->array = array; \
- dctx->array[dctx->num++] = sctx; \
- \
- return sctx; \
-}
-
const VkComponentMapping ff_comp_identity_map = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -165,32 +144,7 @@ int ff_vk_load_props(FFVulkanContext *s)
return 0;
}
-void ff_vk_qf_fill(FFVulkanContext *s)
-{
- s->nb_qfs = 0;
-
- /* Simply fills in all unique queues into s->qfs */
- if (s->hwctx->queue_family_index >= 0)
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
- if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
- if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
- s->qfs[1] != s->hwctx->queue_family_comp_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
- if (s->hwctx->queue_family_decode_index >= 0 &&
- (s->qfs[0] != s->hwctx->queue_family_decode_index &&
- s->qfs[1] != s->hwctx->queue_family_decode_index &&
- s->qfs[2] != s->hwctx->queue_family_decode_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
- if (s->hwctx->queue_family_encode_index >= 0 &&
- (s->qfs[0] != s->hwctx->queue_family_encode_index &&
- s->qfs[1] != s->hwctx->queue_family_encode_index &&
- s->qfs[2] != s->hwctx->queue_family_encode_index &&
- s->qfs[3] != s->hwctx->queue_family_encode_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
-}
-
-int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
+static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
{
int ret, num;
@@ -226,790 +180,760 @@ int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
}
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues)
+ VkQueueFlagBits dev_family)
{
- int ret;
-
- ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
-
- if (!nb_queues)
- qf->nb_queues = qf->actual_queues;
- else
- qf->nb_queues = nb_queues;
-
- return ret;
-}
+ /* Fill in queue families from context if not done yet */
+ if (!s->nb_qfs) {
+ s->nb_qfs = 0;
+
+ /* Simply fills in all unique queues into s->qfs */
+ if (s->hwctx->queue_family_index >= 0)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
+ if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
+ if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
+ s->qfs[1] != s->hwctx->queue_family_comp_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
+ if (s->hwctx->queue_family_decode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_decode_index &&
+ s->qfs[1] != s->hwctx->queue_family_decode_index &&
+ s->qfs[2] != s->hwctx->queue_family_decode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
+ if (s->hwctx->queue_family_encode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_encode_index &&
+ s->qfs[1] != s->hwctx->queue_family_encode_index &&
+ s->qfs[2] != s->hwctx->queue_family_encode_index &&
+ s->qfs[3] != s->hwctx->queue_family_encode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+ }
-int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
-{
- qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
- return qf->cur_queue;
+ return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues));
}
-int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
- VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
{
- VkResult ret;
- int index = -1;
FFVulkanFunctions *vk = &s->vkfn;
- VkMemoryAllocateInfo alloc_info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = alloc_extension,
- };
-
- /* Align if we need to */
- if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
- req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
-
- alloc_info.allocationSize = req->size;
-
- /* The vulkan spec requires memory types to be sorted in the "optimal"
- * order, so the first matching type we find will be the best/fastest one */
- for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
- /* The memory type must be supported by the requirements (bitfield) */
- if (!(req->memoryTypeBits & (1 << i)))
- continue;
-
- /* The memory type flags must include our properties */
- if ((req_flags != UINT32_MAX) &&
- ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
- continue;
+ for (int i = 0; i < pool->pool_size; i++) {
+ FFVkExecContext *e = &pool->contexts[i];
- /* Found a suitable memory type */
- index = i;
- break;
- }
+ if (e->fence) {
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+ vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
+ }
- if (index < 0) {
- av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
- req_flags);
- return AVERROR(EINVAL);
+ ff_vk_exec_discard_deps(s, e);
+
+ av_free(e->frame_deps);
+ av_free(e->buf_deps);
+ av_free(e->queue_family_dst);
+ av_free(e->layout_dst);
+ av_free(e->access_dst);
+ av_free(e->frame_update);
+ av_free(e->frame_locked);
+ av_free(e->sem_sig);
+ av_free(e->sem_sig_val);
+ av_free(e->sem_sig_val_dst);
+ av_free(e->sem_wait);
+ av_free(e->sem_wait_dst);
+ av_free(e->sem_wait_val);
}
- alloc_info.memoryTypeIndex = index;
-
- ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
- s->hwctx->alloc, mem);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR(ENOMEM);
- }
-
- if (mem_flags)
- *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
-
- return 0;
+ if (pool->cmd_bufs)
+ vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool,
+ pool->pool_size, pool->cmd_bufs);
+ if (pool->cmd_buf_pool)
+ vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc);
+ if (pool->query_pool)
+ vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc);
+
+ av_free(pool->query_data);
+ av_free(pool->cmd_bufs);
+ av_free(pool->contexts);
}
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
- void *pNext, void *alloc_pNext,
- VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
+int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ FFVkExecPool *pool, int nb_contexts,
+ int nb_queries, VkQueryType query_type, int query_64bit,
+ void *query_create_pnext)
{
int err;
VkResult ret;
- int use_ded_mem;
FFVulkanFunctions *vk = &s->vkfn;
- VkBufferCreateInfo buf_spawn = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = pNext,
- .usage = usage,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .size = size, /* Gets FFALIGNED during alloc if host visible
- but should be ok */
- };
+ VkCommandPoolCreateInfo cqueue_create;
+ VkCommandBufferAllocateInfo cbuf_create;
- VkBufferMemoryRequirementsInfo2 req_desc = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
- };
- VkMemoryDedicatedAllocateInfo ded_alloc = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = alloc_pNext,
- };
- VkMemoryDedicatedRequirements ded_req = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
- };
- VkMemoryRequirements2 req = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
- .pNext = &ded_req,
- };
+ atomic_init(&pool->idx, 0);
- ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ /* Create command pool */
+ cqueue_create = (VkCommandPoolCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+ VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = qf->queue_family,
+ };
+ ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
+ s->hwctx->alloc, &pool->cmd_buf_pool);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ err = AVERROR_EXTERNAL;
+ goto fail;
}
- req_desc.buffer = buf->buf;
-
- vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
-
- /* In case the implementation prefers/requires dedicated allocation */
- use_ded_mem = ded_req.prefersDedicatedAllocation |
- ded_req.requiresDedicatedAllocation;
- if (use_ded_mem)
- ded_alloc.buffer = buf->buf;
-
- err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &buf->flags, &buf->mem);
- if (err)
- return err;
+ /* Allocate space for command buffers */
+ pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs));
+ if (!pool->cmd_bufs) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
- ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
+ /* Allocate command buffer */
+ cbuf_create = (VkCommandBufferAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandPool = pool->cmd_buf_pool,
+ .commandBufferCount = nb_contexts,
+ };
+ ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create,
+ pool->cmd_bufs);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ err = AVERROR_EXTERNAL;
+ goto fail;
}
- buf->size = size;
+ /* Query pool */
+ if (nb_queries) {
+ VkQueryPoolCreateInfo query_pool_info = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .pNext = query_create_pnext,
+ .queryType = query_type,
+ .queryCount = nb_queries*nb_contexts,
+ };
+ ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
+ s->hwctx->alloc, &pool->query_pool);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
- return 0;
-}
+ pool->nb_queries = nb_queries;
+ pool->query_status_stride = 2;
+ pool->query_results = nb_queries;
+ pool->query_statuses = 0; /* if radv supports it, nb_queries; */
+
+ /* Video encode quieries produce two results per query */
+ if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
+ pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */
+ pool->query_results *= 2;
+ } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
+ pool->query_status_stride = 1;
+ pool->query_results = 0;
+ pool->query_statuses = nb_queries;
+ }
-int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
- int nb_buffers, int invalidate)
-{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
- VkMappedMemoryRange *inval_list = NULL;
- int inval_count = 0;
+ pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4);
- for (int i = 0; i < nb_buffers; i++) {
- ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
- VK_WHOLE_SIZE, 0, (void **)&mem[i]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ /* Allocate space for the query data */
+ pool->query_data = av_mallocz(nb_contexts*pool->qd_size);
+ if (!pool->query_data) {
+ err = AVERROR(ENOMEM);
+ goto fail;
}
}
- if (!invalidate)
- return 0;
-
- for (int i = 0; i < nb_buffers; i++) {
- const VkMappedMemoryRange ival_buf = {
- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = buf[i].mem,
- .size = VK_WHOLE_SIZE,
- };
- if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
- continue;
- inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
- (++inval_count)*sizeof(*inval_list));
- if (!inval_list)
- return AVERROR(ENOMEM);
- inval_list[inval_count - 1] = ival_buf;
+ /* Allocate space for the contexts */
+ pool->contexts = av_mallocz(nb_contexts*sizeof(*pool->contexts));
+ if (!pool->contexts) {
+ err = AVERROR(ENOMEM);
+ goto fail;
}
- if (inval_count) {
- ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
- inval_list);
+ pool->pool_size = nb_contexts;
+
+ /* Init contexts */
+ for (int i = 0; i < pool->pool_size; i++) {
+ FFVkExecContext *e = &pool->contexts[i];
+
+ /* Fence */
+ VkFenceCreateInfo fence_create = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+ };
+ ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc,
+ &e->fence);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
+
+ e->parent = pool;
+
+ /* Query data */
+ e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i;
+ e->query_idx = nb_queries*i;
+
+ /* Command buffer */
+ e->buf = pool->cmd_bufs[i];
+
+ /* Queue index distribution */
+ e->qi = i % qf->nb_queues;
+ e->qf = qf->queue_family;
+ vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
+ e->qi, &e->queue);
}
return 0;
+
+fail:
+ ff_vk_exec_pool_free(s, pool);
+ return err;
}
-int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
- int flush)
+VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
+ void **data, int64_t *status)
{
- int err = 0;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- VkMappedMemoryRange *flush_list = NULL;
- int flush_count = 0;
+ const FFVkExecPool *pool = e->parent;
- if (flush) {
- for (int i = 0; i < nb_buffers; i++) {
- const VkMappedMemoryRange flush_buf = {
- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = buf[i].mem,
- .size = VK_WHOLE_SIZE,
- };
- if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
- continue;
- flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
- (++flush_count)*sizeof(*flush_list));
- if (!flush_list)
- return AVERROR(ENOMEM);
- flush_list[flush_count - 1] = flush_buf;
- }
- }
+ int32_t *res32;
+ int64_t *res64;
+ int64_t res = 0;
+ VkQueryResultFlags qf = 0;
- if (flush_count) {
- ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
- flush_list);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
- ff_vk_ret2str(ret));
- err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ qf |= pool->query_64bit ?
+ VK_QUERY_RESULT_64_BIT : 0x0;
+ qf |= pool->query_statuses ?
+ VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
+
+ ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool,
+ e->query_idx,
+ pool->nb_queries,
+ pool->qd_size, e->query_data,
+ pool->query_64bit ? 8 : 4, qf);
+ if (ret != VK_SUCCESS)
+ return ret;
+
+ if (pool->query_statuses && pool->query_64bit) {
+ for (int i = 0; i < pool->query_statuses; i++) {
+ res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
+ res64[i] : res;
+ res64 += pool->query_status_stride;
+ }
+ } else if (pool->query_statuses) {
+ for (int i = 0; i < pool->query_statuses; i++) {
+ res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
+ res32[i] : res;
+ res32 += pool->query_status_stride;
}
}
- for (int i = 0; i < nb_buffers; i++)
- vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
+ if (data)
+ *data = e->query_data;
+ if (status)
+ *status = res;
- return err;
+ return VK_SUCCESS;
}
-void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
+FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool)
{
- FFVulkanFunctions *vk = &s->vkfn;
-
- if (!buf || !s->hwctx)
- return;
+ int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed);
+ idx %= pool->pool_size;
+ return &pool->contexts[idx];
+}
- if (buf->buf != VK_NULL_HANDLE)
- vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
- if (buf->mem != VK_NULL_HANDLE)
- vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
}
-int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
- int width, int height, VkFormat fmt, VkImageTiling tiling,
- VkImageUsageFlagBits usage, VkImageCreateFlags flags,
- void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
+int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
{
- int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- AVVulkanDeviceContext *hwctx = s->hwctx;
-
- VkExportSemaphoreCreateInfo ext_sem_info = {
- .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
-#ifdef _WIN32
- .handleTypes = IsWindows8OrGreater()
- ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
- : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
-#else
- .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
-#endif
- };
+ const FFVkExecPool *pool = e->parent;
- VkSemaphoreTypeCreateInfo sem_type_info = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
-#ifdef _WIN32
- .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
-#else
- .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
-#endif
- .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
- .initialValue = 0,
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
- VkSemaphoreCreateInfo sem_spawn = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- .pNext = &sem_type_info,
- };
+ /* Create the fence and don't wait for it initially */
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+ vk->ResetFences(s->hwctx->act_dev, 1, &e->fence);
- /* Create the image */
- VkImageCreateInfo create_info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .pNext = create_pnext,
- .imageType = VK_IMAGE_TYPE_2D,
- .format = fmt,
- .extent.depth = 1,
- .mipLevels = 1,
- .arrayLayers = 1,
- .flags = flags,
- .tiling = tiling,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .usage = usage,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = s->qfs,
- .queueFamilyIndexCount = s->nb_qfs,
- .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
- };
+ /* Discard queue dependencies */
+ ff_vk_exec_discard_deps(s, e);
- ret = vk->CreateImage(hwctx->act_dev, &create_info,
- hwctx->alloc, &f->img[0]);
+ ret = vk->BeginCommandBuffer(e->buf, &cmd_start);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
+ av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
ff_vk_ret2str(ret));
- err = AVERROR(EINVAL);
- goto fail;
+ return AVERROR_EXTERNAL;
}
- /* Create semaphore */
- ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
- hwctx->alloc, &f->sem[0]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ if (pool->nb_queries)
+ vk->CmdResetQueryPool(e->buf, pool->query_pool,
+ e->query_idx, pool->nb_queries);
+
+ return 0;
+}
+
+void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
+{
+ for (int j = 0; j < e->nb_buf_deps; j++)
+ av_buffer_unref(&e->buf_deps[j]);
+ e->nb_buf_deps = 0;
+
+ for (int j = 0; j < e->nb_frame_deps; j++) {
+ if (e->frame_locked[j]) {
+ AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+ s->hwfc->unlock_frame(s->frames, f);
+ e->frame_locked[j] = 0;
+ e->frame_update[j] = 0;
+ }
+ av_buffer_unref(&e->frame_deps[j]);
}
+ e->nb_frame_deps = 0;
- f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
- f->layout[0] = create_info.initialLayout;
- f->access[0] = 0x0;
- f->sem_value[0] = 0;
+ e->sem_wait_cnt = 0;
+ e->sem_sig_cnt = 0;
+}
- f->flags = 0x0;
- f->tiling = tiling;
+int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef **deps, int nb_deps, int ref)
+{
+ AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size,
+ (e->nb_buf_deps + nb_deps) * sizeof(*dst));
+ if (!dst) {
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR(ENOMEM);
+ }
- return 0;
+ e->buf_deps = dst;
-fail:
- return err;
+ for (int i = 0; i < nb_deps; i++) {
+ e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i];
+ if (!e->buf_deps[e->nb_buf_deps]) {
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR(ENOMEM);
+ }
+ e->nb_buf_deps++;
+ }
+
+ return 0;
}
-int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
- VkShaderStageFlagBits stage)
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+ VkPipelineStageFlagBits in_wait_dst_flag)
{
- VkPushConstantRange *pc;
+ uint8_t *frame_locked;
+ uint8_t *frame_update;
+ AVBufferRef **frame_deps;
+ VkImageLayout *layout_dst;
+ uint32_t *queue_family_dst;
+ VkAccessFlagBits *access_dst;
+
+ AVVkFrame *f = (AVVkFrame *)vkfb->data;
+ int nb_images = ff_vk_count_images(f);
+
+#define ARR_REALLOC(str, arr, alloc_s, cnt) \
+ do { \
+ arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
+ if (!arr) { \
+ ff_vk_exec_discard_deps(s, e); \
+ return AVERROR(ENOMEM); \
+ } \
+ str->arr = arr; \
+ } while (0)
+
+ for (int i = 0; i < nb_images; i++) {
+ VkSemaphore *sem_wait;
+ uint64_t *sem_wait_val;
+ VkPipelineStageFlagBits *sem_wait_dst;
+ VkSemaphore *sem_sig;
+ uint64_t *sem_sig_val;
+ uint64_t **sem_sig_val_dst;
+
+ ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
+ ARR_REALLOC(e, sem_wait_dst, &e->sem_wait_dst_alloc, e->sem_wait_cnt);
+ ARR_REALLOC(e, sem_wait_val, &e->sem_wait_val_alloc, e->sem_wait_cnt);
+ ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
+ ARR_REALLOC(e, sem_sig_val, &e->sem_sig_val_alloc, e->sem_sig_cnt);
+ ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_cnt);
- pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
- pl->push_consts_num + 1);
- if (!pl->push_consts)
+ e->sem_wait[e->sem_wait_cnt] = f->sem[i];
+ e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
+ e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
+ e->sem_wait_cnt++;
+
+ e->sem_sig[e->sem_sig_cnt] = f->sem[i];
+ e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
+ e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
+ e->sem_sig_cnt++;
+ }
+
+ ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps);
+ ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps);
+ ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps);
+
+ ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps);
+ ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
+ ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps);
+
+ e->frame_deps[e->nb_frame_deps] = av_buffer_ref(vkfb);
+ if (!e->frame_deps[e->nb_frame_deps]) {
+ ff_vk_exec_discard_deps(s, e);
return AVERROR(ENOMEM);
+ }
- pc = &pl->push_consts[pl->push_consts_num++];
- memset(pc, 0, sizeof(*pc));
+ s->hwfc->lock_frame(s->frames, f);
+ e->frame_locked[e->nb_frame_deps] = 1;
+ e->frame_update[e->nb_frame_deps] = 0;
- pc->stageFlags = stage;
- pc->offset = offset;
- pc->size = size;
+ e->nb_frame_deps++;
return 0;
}
-FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
-int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
- FFVkQueueFamilyCtx *qf)
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef *vkfb,
+ VkImageMemoryBarrier2 *bar)
+{
+ int i;
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data == vkfb->data)
+ break;
+ av_assert0(i < e->nb_frame_deps);
+
+ e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
+ e->access_dst[i] = bar->dstAccessMask;
+ e->layout_dst[i] = bar->newLayout;
+ e->frame_update[i] = 1;
+}
+
+int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
{
VkResult ret;
- FFVkExecContext *e;
FFVulkanFunctions *vk = &s->vkfn;
- VkCommandPoolCreateInfo cqueue_create = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = qf->queue_family,
- };
- VkCommandBufferAllocateInfo cbuf_create = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
- .commandBufferCount = qf->nb_queues,
+ VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+ .pWaitSemaphoreValues = e->sem_wait_val,
+ .pSignalSemaphoreValues = e->sem_sig_val,
+ .waitSemaphoreValueCount = e->sem_wait_cnt,
+ .signalSemaphoreValueCount = e->sem_sig_cnt,
};
- e = create_exec_ctx(s);
- if (!e)
- return AVERROR(ENOMEM);
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = &s_timeline_sem_info,
- e->qf = qf;
+ .commandBufferCount = 1,
+ .pCommandBuffers = &e->buf,
- e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
- if (!e->queues)
- return AVERROR(ENOMEM);
+ .pWaitSemaphores = e->sem_wait,
+ .pWaitDstStageMask = e->sem_wait_dst,
+ .waitSemaphoreCount = e->sem_wait_cnt,
- e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
- if (!e->bufs)
- return AVERROR(ENOMEM);
+ .pSignalSemaphores = e->sem_sig,
+ .signalSemaphoreCount = e->sem_sig_cnt,
+ };
- /* Create command pool */
- ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
- s->hwctx->alloc, &e->pool);
+ ret = vk->EndCommandBuffer(e->buf);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
ff_vk_ret2str(ret));
+ ff_vk_exec_discard_deps(s, e);
return AVERROR_EXTERNAL;
}
- cbuf_create.commandPool = e->pool;
+ s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, e->qf, e->qi);
+ ret = vk->QueueSubmit(e->queue, 1, &s_info, e->fence);
+ s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, e->qf, e->qi);
- /* Allocate command buffer */
- ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
ff_vk_ret2str(ret));
+ ff_vk_exec_discard_deps(s, e);
return AVERROR_EXTERNAL;
}
- for (int i = 0; i < qf->nb_queues; i++) {
- FFVkQueueCtx *q = &e->queues[i];
- vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
- i % qf->actual_queues, &q->queue);
- }
+ for (int i = 0; i < e->sem_sig_cnt; i++)
+ *e->sem_sig_val_dst[i] += 1;
- *ctx = e;
+ /* Unlock all frames */
+ for (int j = 0; j < e->nb_frame_deps; j++) {
+ if (e->frame_locked[j]) {
+ AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+ if (e->frame_update[j]) {
+ int nb_images = ff_vk_count_images(f);
+ for (int i = 0; i < nb_images; i++) {
+ f->layout[i] = e->layout_dst[j];
+ f->access[i] = e->access_dst[j];
+ f->queue_family[i] = e->queue_family_dst[j];
+ }
+ }
+ s->hwfc->unlock_frame(s->frames, f);
+ e->frame_locked[j] = 0;
+ }
+ }
return 0;
}
-int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
- int nb_queries, VkQueryType type,
- int elem_64bits, void *create_pnext)
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
- size_t qd_size;
- int nb_results = nb_queries;
- int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
- int status_stride = 2;
- int result_elem_size = elem_64bits ? 8 : 4;
+ int index = -1;
FFVulkanFunctions *vk = &s->vkfn;
- VkQueryPoolCreateInfo query_pool_info = {
- .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
- .pNext = create_pnext,
- .queryType = type,
- .queryCount = nb_queries*e->qf->nb_queues,
- };
- if (e->query.pool)
- return AVERROR(EINVAL);
-
- /* Video encode quieries produce two results per query */
- if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
- status_stride = 3; /* skip,skip,result,skip,skip,result */
- nb_results *= 2;
- } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
- status_stride = 1;
- nb_results *= 0;
- }
-
- qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
-
- e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
- if (!e->query.data)
- return AVERROR(ENOMEM);
-
- ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
- s->hwctx->alloc, &e->query.pool);
- if (ret != VK_SUCCESS)
- return AVERROR_EXTERNAL;
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = alloc_extension,
+ };
- e->query.data_per_queue = qd_size;
- e->query.nb_queries = nb_queries;
- e->query.nb_results = nb_results;
- e->query.nb_statuses = nb_statuses;
- e->query.elem_64bits = elem_64bits;
- e->query.status_stride = status_stride;
+ /* Align if we need to */
+ if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
- return 0;
-}
+ alloc_info.allocationSize = req->size;
-int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
- int query_idx, void **data, int64_t *status)
-{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
- uint8_t *qd;
- int32_t *res32;
- int64_t *res64;
- int64_t res = 0;
- VkQueryResultFlags qf = 0;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ /* The vulkan spec requires memory types to be sorted in the "optimal"
+ * order, so the first matching type we find will be the best/fastest one */
+ for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
+ /* The memory type must be supported by the requirements (bitfield) */
+ if (!(req->memoryTypeBits & (1 << i)))
+ continue;
- if (!q->submitted) {
- *data = NULL;
- return 0;
- }
+ /* The memory type flags must include our properties */
+ if ((req_flags != UINT32_MAX) &&
+ ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
+ continue;
- qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
- qf |= e->query.nb_results && e->query.nb_statuses ?
- VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
- qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
- res32 = (int32_t *)(qd + e->query.nb_results*4);
- res64 = (int64_t *)(qd + e->query.nb_results*8);
-
- ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
- query_idx,
- e->query.nb_queries,
- e->query.data_per_queue, qd,
- e->query.elem_64bits ? 8 : 4, qf);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ /* Found a suitable memory type */
+ index = i;
+ break;
}
- if (e->query.nb_statuses && e->query.elem_64bits) {
- for (int i = 0; i < e->query.nb_queries; i++) {
- res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
- res64[i] : res;
- res64 += e->query.status_stride;
- }
- } else if (e->query.nb_statuses) {
- for (int i = 0; i < e->query.nb_queries; i++) {
- res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
- res32[i] : res;
- res32 += e->query.status_stride;
- }
+ if (index < 0) {
+ av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ req_flags);
+ return AVERROR(EINVAL);
}
- if (data)
- *data = qd;
- if (status)
- *status = res;
-
- return 0;
-}
-
-void ff_vk_discard_exec_deps(FFVkExecContext *e)
-{
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ alloc_info.memoryTypeIndex = index;
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- q->nb_buf_deps = 0;
+ ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
+ s->hwctx->alloc, mem);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR(ENOMEM);
+ }
- for (int j = 0; j < q->nb_frame_deps; j++)
- av_frame_free(&q->frame_deps[j]);
- q->nb_frame_deps = 0;
+ if (mem_flags)
+ *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
- e->sem_wait_cnt = 0;
- e->sem_sig_cnt = 0;
+ return 0;
}
-int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
+ int err;
VkResult ret;
+ int use_ded_mem;
FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
- VkCommandBufferBeginInfo cmd_start = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = pNext,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
};
- /* Create the fence and don't wait for it initially */
- if (!q->fence) {
- VkFenceCreateInfo fence_spawn = {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- };
- ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
- &q->fence);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- } else if (!q->synchronous) {
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+ VkBufferMemoryRequirementsInfo2 req_desc = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
+ };
+ VkMemoryDedicatedAllocateInfo ded_alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = alloc_pNext,
+ };
+ VkMemoryDedicatedRequirements ded_req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ };
+ VkMemoryRequirements2 req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = &ded_req,
+ };
+
+ ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
}
- q->synchronous = 0;
+ req_desc.buffer = buf->buf;
- /* Discard queue dependencies */
- ff_vk_discard_exec_deps(e);
+ vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
+
+ /* In case the implementation prefers/requires dedicated allocation */
+ use_ded_mem = ded_req.prefersDedicatedAllocation |
+ ded_req.requiresDedicatedAllocation;
+ if (use_ded_mem)
+ ded_alloc.buffer = buf->buf;
+
+ err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &buf->flags, &buf->mem);
+ if (err)
+ return err;
- ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
+ ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
+ av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
- if (e->query.pool) {
- e->query.idx = e->qf->cur_queue*e->query.nb_queries;
- vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
- e->query.idx, e->query.nb_queries);
- }
+ buf->size = size;
return 0;
}
-VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
-{
- return e->bufs[e->qf->cur_queue];
-}
-
-int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
- VkPipelineStageFlagBits in_wait_dst_flag)
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
{
- AVFrame **dst;
- AVVkFrame *f = (AVVkFrame *)frame->data[0];
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
- AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
- int planes = av_pix_fmt_count_planes(fc->sw_format);
-
- for (int i = 0; i < planes; i++) {
- e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
- if (!e->sem_wait) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
- if (!e->sem_wait_dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
- if (!e->sem_wait_val) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
- if (!e->sem_sig) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
- if (!e->sem_sig_val) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkMappedMemoryRange inval_list[64];
+ int inval_count = 0;
- e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
- if (!e->sem_sig_val_dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
+ for (int i = 0; i < nb_buffers; i++) {
+ ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
}
-
- e->sem_wait[e->sem_wait_cnt] = f->sem[i];
- e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
- e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
- e->sem_wait_cnt++;
-
- e->sem_sig[e->sem_sig_cnt] = f->sem[i];
- e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
- e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
- e->sem_sig_cnt++;
}
- dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
- (q->nb_frame_deps + 1) * sizeof(*dst));
- if (!dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
+ if (!invalidate)
+ return 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ inval_list[inval_count++] = ival_buf;
}
- q->frame_deps = dst;
- q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
- if (!q->frame_deps[q->nb_frame_deps]) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
+ if (inval_count) {
+ ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
+ inval_list);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
}
- q->nb_frame_deps++;
return 0;
}
-int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
+ int flush)
{
+ int err = 0;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
- VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
- .pWaitSemaphoreValues = e->sem_wait_val,
- .pSignalSemaphoreValues = e->sem_sig_val,
- .waitSemaphoreValueCount = e->sem_wait_cnt,
- .signalSemaphoreValueCount = e->sem_sig_cnt,
- };
-
- VkSubmitInfo s_info = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &s_timeline_sem_info,
-
- .commandBufferCount = 1,
- .pCommandBuffers = &e->bufs[e->qf->cur_queue],
-
- .pWaitSemaphores = e->sem_wait,
- .pWaitDstStageMask = e->sem_wait_dst,
- .waitSemaphoreCount = e->sem_wait_cnt,
-
- .pSignalSemaphores = e->sem_sig,
- .signalSemaphoreCount = e->sem_sig_cnt,
- };
+ VkMappedMemoryRange flush_list[64];
+ int flush_count = 0;
- ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ if (flush) {
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ flush_list[flush_count++] = flush_buf;
+ }
}
- s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
- e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
-
- ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
-
- s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
- e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
-
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ if (flush_count) {
+ ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
+ flush_list);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ }
}
- for (int i = 0; i < e->sem_sig_cnt; i++)
- *e->sem_sig_val_dst[i] += 1;
-
- e->query.idx = e->qf->cur_queue*e->query.nb_queries;
- q->submitted = 1;
+ for (int i = 0; i < nb_buffers; i++)
+ vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
- return 0;
+ return err;
}
-void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
+void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
{
FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
- if (!q->submitted)
+
+ if (!buf || !s->hwctx)
return;
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
- q->synchronous = 1;
+ if (buf->buf != VK_NULL_HANDLE)
+ vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
+ if (buf->mem != VK_NULL_HANDLE)
+ vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
-int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef **deps, int nb_deps)
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+ VkShaderStageFlagBits stage)
{
- AVBufferRef **dst;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
- if (!deps || !nb_deps)
- return 0;
+ VkPushConstantRange *pc;
- dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
- (q->nb_buf_deps + nb_deps) * sizeof(*dst));
- if (!dst)
- goto err;
+ pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
+ pl->push_consts_num + 1);
+ if (!pl->push_consts)
+ return AVERROR(ENOMEM);
- q->buf_deps = dst;
+ pc = &pl->push_consts[pl->push_consts_num++];
+ memset(pc, 0, sizeof(*pc));
- for (int i = 0; i < nb_deps; i++) {
- q->buf_deps[q->nb_buf_deps] = deps[i];
- if (!q->buf_deps[q->nb_buf_deps])
- goto err;
- q->nb_buf_deps++;
- }
+ pc->stageFlags = stage;
+ pc->offset = offset;
+ pc->size = size;
return 0;
-
-err:
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
}
-FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
+FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
int unnorm_coords, VkFilter filt)
{
VkResult ret;
@@ -1030,10 +954,6 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
.unnormalizedCoordinates = unnorm_coords,
};
- FFVkSampler *sctx = create_sampler(s);
- if (!sctx)
- return NULL;
-
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
s->hwctx->alloc, &sctx->sampler[0]);
if (ret != VK_SUCCESS) {
@@ -1048,6 +968,13 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
return sctx;
}
+void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->DestroySampler(s->hwctx->act_dev, sctx->sampler[0],
+ s->hwctx->alloc);
+}
+
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
{
if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
@@ -1122,7 +1049,7 @@ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
}
/* Add to queue dependencies */
- err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
+ err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
if (err) {
av_buffer_unref(&buf);
return err;
@@ -1133,14 +1060,9 @@ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
return 0;
}
-FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
-FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
- VkShaderStageFlags stage)
+int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
+ VkShaderStageFlags stage)
{
- FFVkSPIRVShader *shd = create_shader(pl);
- if (!shd)
- return NULL;
-
av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
@@ -1152,10 +1074,10 @@ FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
GLSLC(0, );
- return shd;
+ return 0;
}
-void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3])
{
shd->local_size[0] = local_size[0];
shd->local_size[1] = local_size[1];
@@ -1166,7 +1088,7 @@ void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
shd->local_size[0], shd->local_size[1], shd->local_size[2]);
}
-void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
+void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio)
{
int line = 0;
const char *p = shd->src.str;
@@ -1188,7 +1110,13 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
av_bprint_finalize(&buf, NULL);
}
-int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
+void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
+}
+
+int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
const char *entrypoint)
{
int err;
@@ -1437,7 +1365,7 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
return;
}
- set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+// set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
pl->desc_set[set_id],
@@ -1446,12 +1374,12 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
}
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
- VkShaderStageFlagBits stage, int offset,
- size_t size, void *src)
+ FFVulkanPipeline *pl,
+ VkShaderStageFlagBits stage,
+ int offset, size_t size, void *src)
{
FFVulkanFunctions *vk = &s->vkfn;
-
- vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
+ vk->CmdPushConstants(e->buf, pl->pipeline_layout,
stage, offset, size, src);
}
@@ -1558,17 +1486,8 @@ int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
return 0;
}
-FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
-FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
-{
- FFVulkanPipeline *pl = create_pipeline(s);
- if (pl)
- pl->qf = qf;
-
- return pl;
-}
-
-int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkQueueFamilyCtx *qf)
{
int i;
VkResult ret;
@@ -1579,6 +1498,8 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
.layout = pl->pipeline_layout,
};
+ pl->qf = qf;
+
for (i = 0; i < pl->shaders_num; i++) {
if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
pipe.stage = pl->shaders[i]->shader;
@@ -1603,73 +1524,24 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
return 0;
}
-void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
+void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
- vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
+ vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
- for (int i = 0; i < pl->descriptor_sets_num; i++)
- pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+// for (int i = 0; i < pl->descriptor_sets_num; i++)
+ // pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
- vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
+ vk->CmdBindDescriptorSets(e->buf, pl->bind_point,
pl->pipeline_layout, 0,
pl->descriptor_sets_num,
(VkDescriptorSet *)pl->desc_staging,
0, NULL);
-
- e->bound_pl = pl;
-}
-
-static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
-{
- FFVulkanFunctions *vk = &s->vkfn;
-
- /* Make sure all queues have finished executing */
- for (int i = 0; i < e->qf->nb_queues; i++) {
- FFVkQueueCtx *q = &e->queues[i];
-
- if (q->fence) {
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
- }
-
- /* Free the fence */
- if (q->fence)
- vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
-
- /* Free buffer dependencies */
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- av_free(q->buf_deps);
-
- /* Free frame dependencies */
- for (int j = 0; j < q->nb_frame_deps; j++)
- av_frame_free(&q->frame_deps[j]);
- av_free(q->frame_deps);
- }
-
- if (e->bufs)
- vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
- if (e->pool)
- vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
- if (e->query.pool)
- vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
-
- av_freep(&e->query.data);
- av_freep(&e->bufs);
- av_freep(&e->queues);
- av_freep(&e->sem_sig);
- av_freep(&e->sem_sig_val);
- av_freep(&e->sem_sig_val_dst);
- av_freep(&e->sem_wait);
- av_freep(&e->sem_wait_dst);
- av_freep(&e->sem_wait_val);
- av_free(e);
}
-static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
@@ -1723,8 +1595,6 @@ static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
void ff_vk_uninit(FFVulkanContext *s)
{
- FFVulkanFunctions *vk = &s->vkfn;
-
av_freep(&s->query_props);
av_freep(&s->qf_props);
av_freep(&s->video_props);
@@ -1732,24 +1602,6 @@ void ff_vk_uninit(FFVulkanContext *s)
if (s->spirv_compiler)
s->spirv_compiler->uninit(&s->spirv_compiler);
- for (int i = 0; i < s->exec_ctx_num; i++)
- free_exec_ctx(s, s->exec_ctx[i]);
- av_freep(&s->exec_ctx);
-
- for (int i = 0; i < s->samplers_num; i++) {
- vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
- s->hwctx->alloc);
- av_free(s->samplers[i]);
- }
- av_freep(&s->samplers);
-
- for (int i = 0; i < s->pipelines_num; i++)
- free_pipeline(s, s->pipelines[i]);
- av_freep(&s->pipelines);
-
- av_freep(&s->scratch);
- s->scratch_size = 0;
-
av_buffer_unref(&s->device_ref);
av_buffer_unref(&s->frames_ref);
}
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index dd1bc9c440..a8f3c458fc 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -21,6 +21,8 @@
#define VK_NO_PROTOTYPES
+#include <stdatomic.h>
+
#include "pixdesc.h"
#include "bprint.h"
#include "hwcontext.h"
@@ -100,8 +102,6 @@ typedef struct FFVkBuffer {
typedef struct FFVkQueueFamilyCtx {
int queue_family;
int nb_queues;
- int cur_queue;
- int actual_queues;
} FFVkQueueFamilyCtx;
typedef struct FFVulkanPipeline {
@@ -139,71 +139,88 @@ typedef struct FFVulkanPipeline {
VkDescriptorPoolSize *pool_size_desc;
} FFVulkanPipeline;
-typedef struct FFVkQueueCtx {
- VkFence fence;
+typedef struct FFVkExecContext {
+ const struct FFVkExecPool *parent;
+
+ /* Queue for the execution context */
VkQueue queue;
+ int qf;
+ int qi;
- int synchronous;
- int submitted;
+ /* Command buffer for the context */
+ VkCommandBuffer buf;
+
+ /* Fence for the command buffer */
+ VkFence fence;
+
+ void *query_data;
+ int query_idx;
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
- int buf_deps_alloc_size;
+ unsigned int buf_deps_alloc_size;
/* Frame dependencies */
- AVFrame **frame_deps;
+ AVBufferRef **frame_deps;
+ unsigned int frame_deps_alloc_size;
int nb_frame_deps;
- int frame_deps_alloc_size;
-} FFVkQueueCtx;
-
-typedef struct FFVkExecContext {
- FFVkQueueFamilyCtx *qf;
-
- VkCommandPool pool;
- VkCommandBuffer *bufs;
- FFVkQueueCtx *queues;
-
- struct {
- int idx;
- VkQueryPool pool;
- uint8_t *data;
-
- int nb_queries;
- int nb_results;
- int nb_statuses;
- int elem_64bits;
- size_t data_per_queue;
- int status_stride;
- } query;
-
- AVBufferRef ***deps;
- int *nb_deps;
- int *dep_alloc_size;
-
- FFVulkanPipeline *bound_pl;
VkSemaphore *sem_wait;
- int sem_wait_alloc; /* Allocated sem_wait */
+ unsigned int sem_wait_alloc; /* Allocated sem_wait */
int sem_wait_cnt;
uint64_t *sem_wait_val;
- int sem_wait_val_alloc;
+ unsigned int sem_wait_val_alloc;
VkPipelineStageFlagBits *sem_wait_dst;
- int sem_wait_dst_alloc; /* Allocated sem_wait_dst */
+ unsigned int sem_wait_dst_alloc; /* Allocated sem_wait_dst */
VkSemaphore *sem_sig;
- int sem_sig_alloc; /* Allocated sem_sig */
+ unsigned int sem_sig_alloc; /* Allocated sem_sig */
int sem_sig_cnt;
uint64_t *sem_sig_val;
- int sem_sig_val_alloc;
+ unsigned int sem_sig_val_alloc;
uint64_t **sem_sig_val_dst;
- int sem_sig_val_dst_alloc;
+ unsigned int sem_sig_val_dst_alloc;
+
+ uint8_t *frame_locked;
+ unsigned int frame_locked_alloc_size;
+
+ VkAccessFlagBits *access_dst;
+ unsigned int access_dst_alloc;
+
+ VkImageLayout *layout_dst;
+ unsigned int layout_dst_alloc;
+
+ uint32_t *queue_family_dst;
+ unsigned int queue_family_dst_alloc;
+
+ uint8_t *frame_update;
+ unsigned int frame_update_alloc_size;
} FFVkExecContext;
+typedef struct FFVkExecPool {
+ FFVkQueueFamilyCtx *qf;
+ FFVkExecContext *contexts;
+ atomic_int_least64_t idx;
+
+ VkCommandPool cmd_buf_pool;
+ VkCommandBuffer *cmd_bufs;
+ int pool_size;
+
+ VkQueryPool query_pool;
+ void *query_data;
+ int query_results;
+ int query_statuses;
+ int query_64bit;
+ int query_status_stride;
+ int nb_queries;
+ size_t qd_size;
+} FFVkExecPool;
+
typedef struct FFVulkanContext {
const AVClass *class; /* Filters and encoders use this */
@@ -234,21 +251,6 @@ typedef struct FFVulkanContext {
int output_height;
enum AVPixelFormat output_format;
enum AVPixelFormat input_format;
-
- /* Samplers */
- FFVkSampler **samplers;
- int samplers_num;
-
- /* Exec contexts */
- FFVkExecContext **exec_ctx;
- int exec_ctx_num;
-
- /* Pipelines (each can have 1 shader of each type) */
- FFVulkanPipeline **pipelines;
- int pipelines_num;
-
- void *scratch; /* Scratch memory used only in functions */
- unsigned int scratch_size;
} FFVulkanContext;
/* Identity mapping - r = r, b = b, g = g, a = a */
@@ -260,244 +262,156 @@ extern const VkComponentMapping ff_comp_identity_map;
const char *ff_vk_ret2str(VkResult res);
/**
- * Loads props/mprops/driver_props
+ * Returns 1 if pixfmt is a usable RGB format.
*/
-int ff_vk_load_props(FFVulkanContext *s);
+int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
/**
- * Returns 1 if the image is any sort of supported RGB
+ * Returns the format to use for images in shaders.
*/
-int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
+const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
/**
- * Gets the glsl format string for a pixel format
+ * Loads props/mprops/driver_props
*/
-const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
+int ff_vk_load_props(FFVulkanContext *s);
/**
- * Setup the queue families from the hardware device context.
- * Necessary for image creation to work.
+ * Loads queue families into the main context.
+ * Chooses a QF and loads it into a context.
*/
void ff_vk_qf_fill(FFVulkanContext *s);
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family);
/**
- * Allocate device memory.
+ * Allocates/frees an execution pool.
*/
-int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
- VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ FFVkExecPool *pool, int nb_contexts,
+ int nb_queries, VkQueryType query_type, int query_64bit,
+ void *query_create_pnext);
+void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool);
/**
- * Get a queue family index and the number of queues. nb is optional.
+ * Retrieve an execution pool. Threadsafe.
*/
-int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb);
+FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool);
/**
- * Initialize a queue family with a specific number of queues.
- * If nb_queues == 0, use however many queues the queue family has.
+ * Explicitly wait on an execution to be finished.
+ * Starting via ff_vk_exec_start() also waits on it.
*/
-int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues);
/**
- * Rotate through the queues in a queue family.
+ * Performs nb_queries queries and returns their results and statuses.
+ * Execution must have been waited on to produce valid results.
+ */
+VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
+ void **data, int64_t *status);
+
+/**
+ * Start/submit/wait an execution.
+ * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait()
+ * is not necessary (unless using it is just better).
*/
-int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
+int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e);
+int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e);
+void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
/**
- * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
+ * Execution dependency management.
+ * Can attach buffers to executions that will only be unref'd once the
+ * buffer has finished executing.
+ * Adding a frame dep will *lock the frame*, until either the dependencies
+ * are discarded, the execution is submitted, or a failure happens.
+ * update_frame will update the frame's properties before it is unlocked,
+ * only if submission was successful.
*/
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords,
- VkFilter filt);
+int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef **deps, int nb_deps, int ref);
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef *vkfb, VkPipelineStageFlagBits in_wait_dst_flag);
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+ VkImageMemoryBarrier2 *bar);
+void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
/**
- * Create an imageview.
- * Guaranteed to remain alive until the queue submission has finished executing,
- * and will be destroyed after that.
+ * Create an imageview and add it as a dependency to an execution.
*/
int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
VkImageView *v, VkImage img, VkFormat fmt,
const VkComponentMapping map);
/**
- * Define a push constant for a given stage into a pipeline.
- * Must be called before the pipeline layout has been initialized.
+ * Memory/buffer/image allocation helpers.
*/
-int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
- VkShaderStageFlagBits stage);
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
- * Inits a pipeline. Everything in it will be auto-freed when calling
- * ff_vk_filter_uninit().
+ * Buffer management code.
*/
-FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf);
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate);
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
+ int flush);
+void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
/**
- * Inits a shader for a specific pipeline. Will be auto-freed on uninit.
+ * Sampler management.
*/
-FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
- VkShaderStageFlags stage);
+FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
+ int unnorm_coords, VkFilter filt);
+void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx);
/**
- * Writes the workgroup size for a shader.
+ * Shader management.
*/
-void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
+ VkShaderStageFlags stage);
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio);
+int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ const char *entrypoint);
+void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd);
/**
- * Adds a descriptor set to the shader and registers them in the pipeline.
+ * Register a descriptor set.
+ * Update a descriptor set for execution.
*/
int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader);
-
-/**
- * Compiles the shader, entrypoint must be set to "main".
- */
-int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
- const char *entrypoint);
-
-/**
- * Pretty print shader, mainly used by shader compilers.
- */
-void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio);
-
-/**
- * Initializes the pipeline layout after all shaders and descriptor sets have
- * been finished.
- */
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
-
-/**
- * Initializes a compute pipeline. Will pick the first shader with the
- * COMPUTE flag set.
- */
-int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl);
-
-/**
- * Updates a descriptor set via the updaters defined.
- * Can be called immediately after pipeline creation, but must be called
- * at least once before queue submission.
- */
void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
int set_id);
/**
- * Init an execution context for command recording and queue submission.
- * WIll be auto-freed on uninit.
- */
-int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
- FFVkQueueFamilyCtx *qf);
-
-/**
- * Create a query pool for a command context.
- * elem_64bits exists to troll driver devs for compliance. All results
- * and statuses returned should be 32 bits, unless this is set, then it's 64bits.
- */
-int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
- int nb_queries, VkQueryType type,
- int elem_64bits, void *create_pnext);
-
-/**
- * Get results for query.
- * Returns the status of the query.
- * Sets *res to the status of the queries.
- */
-int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
- int query_idx, void **data, int64_t *status);
-
-/**
- * Begin recording to the command buffer. Previous execution must have been
- * completed, which ff_vk_submit_exec_queue() will ensure.
- */
-int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Add a command to bind the completed pipeline and its descriptor sets.
- * Must be called after ff_vk_start_exec_recording() and before submission.
- */
-void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
- FFVulkanPipeline *pl);
-
-/**
- * Updates push constants.
- * Must be called after binding a pipeline if any push constants were defined.
+ * Add/update push constants for execution.
*/
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+ VkShaderStageFlagBits stage);
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
- VkShaderStageFlagBits stage, int offset,
- size_t size, void *src);
+ FFVulkanPipeline *pl,
+ VkShaderStageFlagBits stage,
+ int offset, size_t size, void *src);
/**
- * Gets the command buffer to use for this submission from the exe context.
+ * Pipeline management.
*/
-VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e);
-
-/**
- * Adds a generic AVBufferRef as a queue depenency.
- */
-int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef **deps, int nb_deps);
-
-/**
- * Discards all queue dependencies
- */
-void ff_vk_discard_exec_deps(FFVkExecContext *e);
-
-/**
- * Adds a frame as a queue dependency. This also manages semaphore signalling.
- * Must be called before submission.
- */
-int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
- VkPipelineStageFlagBits in_wait_dst_flag);
-
-/**
- * Submits a command buffer to the queue for execution. Will not block.
- */
-int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Wait on a command buffer's execution. Mainly useful for debugging and
- * development.
- */
-void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Create a VkBuffer with the specified parameters.
- */
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
- void *pNext, void *alloc_pNext,
- VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
-
-/**
- * Maps the buffer to userspace. Set invalidate to 1 if reading the contents
- * is necessary.
- */
-int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
- int nb_buffers, int invalidate);
-
-/**
- * Unmaps the buffer from userspace. Set flush to 1 to write and sync.
- */
-int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
- int flush);
-
-/**
- * Frees a buffer.
- */
-void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
-
-/**
- * Creates an image, allocates and binds memory in the given
- * idx value of the dst frame. If mem is non-NULL, then no memory will be
- * allocated, but instead the given memory will be bound to the image.
- */
-int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx,
- int width, int height, VkFormat fmt, VkImageTiling tiling,
- VkImageUsageFlagBits usage, VkImageCreateFlags flags,
- void *create_pnext,
- VkDeviceMemory *mem, void *alloc_pnext);
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkQueueFamilyCtx *qf);
+int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
+void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl);
+void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl);
/**
- * Frees the main Vulkan context.
+ * Frees main context.
*/
void ff_vk_uninit(FFVulkanContext *s);
--
2.39.2
[-- Attachment #50: 0049-vulkan-add-ff_vk_count_images.patch --]
[-- Type: text/x-diff, Size: 779 bytes --]
From 2aad41bb35392d7f2e300857a1b0f73b873ec601 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:37:18 +0100
Subject: [PATCH 49/72] vulkan: add ff_vk_count_images()
---
libavutil/vulkan.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index a8f3c458fc..e66ca59ef7 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -253,6 +253,15 @@ typedef struct FFVulkanContext {
enum AVPixelFormat input_format;
} FFVulkanContext;
+static inline int ff_vk_count_images(AVVkFrame *f)
+{
+ int cnt = 0;
+ while (f->img[cnt])
+ cnt++;
+
+ return cnt;
+}
+
/* Identity mapping - r = r, b = b, g = g, a = a */
extern const VkComponentMapping ff_comp_identity_map;
--
2.39.2
[-- Attachment #51: 0050-vulkan-rewrite-image-handling-code.patch --]
[-- Type: text/x-diff, Size: 64405 bytes --]
From dca500204539da2a17746db4125c476a29851305 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:38:10 +0100
Subject: [PATCH 50/72] vulkan: rewrite image handling code
---
libavutil/vulkan.c | 919 +++++++++++++++++++++++++--------------------
libavutil/vulkan.h | 166 ++++----
2 files changed, 612 insertions(+), 473 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 17a5bd6f3f..20ad269b0a 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -21,12 +23,6 @@
#include "vulkan.h"
#include "vulkan_loader.h"
-#if CONFIG_LIBGLSLANG
-#include "vulkan_glslang.c"
-#elif CONFIG_LIBSHADERC
-#include "vulkan_shaderc.c"
-#endif
-
const VkComponentMapping ff_comp_identity_map = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -92,15 +88,22 @@ int ff_vk_load_props(FFVulkanContext *s)
uint32_t qc = 0;
FFVulkanFunctions *vk = &s->vkfn;
+ s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
+ };
+ s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT,
+ .pNext = &s->hprops,
+ };
s->driver_props = (VkPhysicalDeviceDriverProperties) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+ .pNext = &s->desc_buf_props,
};
s->props = (VkPhysicalDeviceProperties2) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = &s->driver_props,
};
-
vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
@@ -373,6 +376,7 @@ int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
return AVERROR_EXTERNAL;
}
+ e->idx = i;
e->parent = pool;
/* Query data */
@@ -496,17 +500,21 @@ void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
for (int j = 0; j < e->nb_frame_deps; j++) {
if (e->frame_locked[j]) {
- AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
- s->hwfc->unlock_frame(s->frames, f);
+ AVFrame *f = e->frame_deps[j];
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ vkfc->unlock_frame(hwfc, vkf);
e->frame_locked[j] = 0;
e->frame_update[j] = 0;
}
- av_buffer_unref(&e->frame_deps[j]);
+ av_frame_free(&e->frame_deps[j]);
}
e->nb_frame_deps = 0;
e->sem_wait_cnt = 0;
e->sem_sig_cnt = 0;
+ e->sem_sig_val_dst_cnt = 0;
}
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
@@ -533,18 +541,25 @@ int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
return 0;
}
-int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
VkPipelineStageFlagBits in_wait_dst_flag)
{
uint8_t *frame_locked;
uint8_t *frame_update;
- AVBufferRef **frame_deps;
+ AVFrame **frame_deps;
VkImageLayout *layout_dst;
uint32_t *queue_family_dst;
VkAccessFlagBits *access_dst;
- AVVkFrame *f = (AVVkFrame *)vkfb->data;
- int nb_images = ff_vk_count_images(f);
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ int nb_images = ff_vk_count_images(vkf);
+
+ /* Don't add duplicates */
+ for (int i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == f->data[0])
+ return 1;
#define ARR_REALLOC(str, arr, alloc_s, cnt) \
do { \
@@ -569,17 +584,18 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
ARR_REALLOC(e, sem_wait_val, &e->sem_wait_val_alloc, e->sem_wait_cnt);
ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
ARR_REALLOC(e, sem_sig_val, &e->sem_sig_val_alloc, e->sem_sig_cnt);
- ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_cnt);
+ ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
- e->sem_wait[e->sem_wait_cnt] = f->sem[i];
+ e->sem_wait[e->sem_wait_cnt] = vkf->sem[i];
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
- e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
+ e->sem_wait_val[e->sem_wait_cnt] = vkf->sem_value[i];
e->sem_wait_cnt++;
- e->sem_sig[e->sem_sig_cnt] = f->sem[i];
- e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
- e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
+ e->sem_sig[e->sem_sig_cnt] = vkf->sem[i];
+ e->sem_sig_val[e->sem_sig_cnt] = vkf->sem_value[i] + 1;
+ e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i];
e->sem_sig_cnt++;
+ e->sem_sig_val_dst_cnt++;
}
ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps);
@@ -590,13 +606,13 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps);
- e->frame_deps[e->nb_frame_deps] = av_buffer_ref(vkfb);
+ e->frame_deps[e->nb_frame_deps] = av_frame_clone(f);
if (!e->frame_deps[e->nb_frame_deps]) {
ff_vk_exec_discard_deps(s, e);
return AVERROR(ENOMEM);
}
- s->hwfc->lock_frame(s->frames, f);
+ vkfc->lock_frame(hwfc, vkf);
e->frame_locked[e->nb_frame_deps] = 1;
e->frame_update[e->nb_frame_deps] = 0;
@@ -605,22 +621,51 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
return 0;
}
-void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef *vkfb,
- VkImageMemoryBarrier2 *bar)
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+ VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar)
{
int i;
for (i = 0; i < e->nb_frame_deps; i++)
- if (e->frame_deps[i]->data == vkfb->data)
+ if (e->frame_deps[i]->data[0] == f->data[0])
break;
av_assert0(i < e->nb_frame_deps);
+ /* Don't update duplicates */
+ if (nb_img_bar && !e->frame_update[i])
+ (*nb_img_bar)++;
+
e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
e->access_dst[i] = bar->dstAccessMask;
e->layout_dst[i] = bar->newLayout;
e->frame_update[i] = 1;
}
+int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
+ VkSemaphore *dst, uint64_t *dst_val,
+ AVFrame *f)
+{
+ uint64_t **sem_sig_val_dst;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
+ /* Reject unknown frames */
+ int i;
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == f->data[0])
+ break;
+ if (i == e->nb_frame_deps)
+ return AVERROR(EINVAL);
+
+ ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
+
+ *dst = vkf->sem[0];
+ *dst_val = vkf->sem_value[0];
+
+ e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val;
+ e->sem_sig_val_dst_cnt++;
+
+ return 0;
+}
+
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
{
VkResult ret;
@@ -668,22 +713,26 @@ int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
- for (int i = 0; i < e->sem_sig_cnt; i++)
+ for (int i = 0; i < e->sem_sig_val_dst_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
/* Unlock all frames */
for (int j = 0; j < e->nb_frame_deps; j++) {
if (e->frame_locked[j]) {
- AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+ AVFrame *f = e->frame_deps[j];
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
if (e->frame_update[j]) {
- int nb_images = ff_vk_count_images(f);
+ int nb_images = ff_vk_count_images(vkf);
for (int i = 0; i < nb_images; i++) {
- f->layout[i] = e->layout_dst[j];
- f->access[i] = e->access_dst[j];
- f->queue_family[i] = e->queue_family_dst[j];
+ vkf->layout[i] = e->layout_dst[j];
+ vkf->access[i] = e->access_dst[j];
+ vkf->queue_family[i] = e->queue_family_dst[j];
}
}
- s->hwfc->unlock_frame(s->frames, f);
+ vkfc->unlock_frame(hwfc, vkf);
e->frame_locked[j] = 0;
}
}
@@ -767,6 +816,10 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
but should be ok */
};
+ VkMemoryAllocateFlagsInfo alloc_flags = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
+ .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
+ };
VkBufferMemoryRequirementsInfo2 req_desc = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
};
@@ -796,11 +849,18 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
/* In case the implementation prefers/requires dedicated allocation */
use_ded_mem = ded_req.prefersDedicatedAllocation |
ded_req.requiresDedicatedAllocation;
- if (use_ded_mem)
+ if (use_ded_mem) {
ded_alloc.buffer = buf->buf;
+ ded_alloc.pNext = alloc_pNext;
+ alloc_pNext = &ded_alloc;
+ }
- err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+ alloc_flags.pNext = alloc_pNext;
+ alloc_pNext = &alloc_flags;
+ }
+
+ err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext,
&buf->flags, &buf->mem);
if (err)
return err;
@@ -812,6 +872,14 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
return AVERROR_EXTERNAL;
}
+ if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+ VkBufferDeviceAddressInfo address_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
+ .buffer = buf->buf,
+ };
+ buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
+ }
+
buf->size = size;
return 0;
@@ -933,8 +1001,8 @@ int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
return 0;
}
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
- int unnorm_coords, VkFilter filt)
+int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
+ int unnorm_coords, VkFilter filt)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
@@ -955,24 +1023,14 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
};
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
- s->hwctx->alloc, &sctx->sampler[0]);
+ s->hwctx->alloc, sampler);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
ff_vk_ret2str(ret));
- return NULL;
+ return AVERROR_EXTERNAL;
}
- for (int i = 1; i < 4; i++)
- sctx->sampler[i] = sctx->sampler[0];
-
- return sctx;
-}
-
-void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx)
-{
- FFVulkanFunctions *vk = &s->vkfn;
- vk->DestroySampler(s->hwctx->act_dev, sctx->sampler[0],
- s->hwctx->alloc);
+ return 0;
}
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
@@ -995,69 +1053,131 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
}
typedef struct ImageViewCtx {
- VkImageView view;
+ VkImageView views[AV_NUM_DATA_POINTERS];
+ int nb_views;
} ImageViewCtx;
-static void destroy_imageview(void *opaque, uint8_t *data)
+static void destroy_imageviews(void *opaque, uint8_t *data)
{
FFVulkanContext *s = opaque;
FFVulkanFunctions *vk = &s->vkfn;
ImageViewCtx *iv = (ImageViewCtx *)data;
- vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
+ for (int i = 0; i < iv->nb_views; i++)
+ vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
+
av_free(iv);
}
-int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
- VkImageView *v, VkImage img, VkFormat fmt,
- const VkComponentMapping map)
+int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
+ VkImageView views[AV_NUM_DATA_POINTERS],
+ AVFrame *f)
{
int err;
+ VkResult ret;
AVBufferRef *buf;
FFVulkanFunctions *vk = &s->vkfn;
-
- VkImageViewCreateInfo imgview_spawn = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .pNext = NULL,
- .image = img,
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = fmt,
- .components = map,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- };
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ const int nb_images = ff_vk_count_images(vkf);
+ const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
ImageViewCtx *iv = av_mallocz(sizeof(*iv));
- VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
- s->hwctx->alloc, &iv->view);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ for (int i = 0; i < nb_planes; i++) {
+ VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
+ VkImageViewCreateInfo view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = NULL,
+ .image = vkf->img[FFMIN(i, nb_images - 1)],
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = rep_fmts[i],
+ .components = ff_comp_identity_map,
+ .subresourceRange = {
+ .aspectMask = plane_aspect[(nb_planes != 1) + i*(nb_planes != 1)],
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
+
+ ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
+ s->hwctx->alloc, &iv->views[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ iv->nb_views++;
}
- buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
+ buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0);
if (!buf) {
- destroy_imageview(s, (uint8_t *)iv);
- return AVERROR(ENOMEM);
+ err = AVERROR(ENOMEM);
+ goto fail;
}
/* Add to queue dependencies */
err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
- if (err) {
+ if (err < 0)
av_buffer_unref(&buf);
- return err;
- }
- *v = iv->view;
+ memcpy(views, iv->views, nb_planes*sizeof(*views));
- return 0;
+ return err;
+
+fail:
+ for (int i = 0; i < iv->nb_views; i++)
+ vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
+ av_free(iv);
+ return err;
+}
+
+void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
+ AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
+ VkPipelineStageFlags src_stage,
+ VkPipelineStageFlags dst_stage,
+ VkAccessFlagBits new_access,
+ VkImageLayout new_layout,
+ uint32_t new_qf)
+{
+ int i, found;
+ AVVkFrame *vkf = (AVVkFrame *)pic->data[0];
+ const int nb_images = ff_vk_count_images(vkf);
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == pic->data[0])
+ break;
+ found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1;
+
+ for (int i = 0; i < nb_images; i++) {
+ bar[*nb_bar] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = src_stage,
+ .dstStageMask = dst_stage,
+ .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i],
+ .dstAccessMask = new_access,
+ .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0],
+ .newLayout = new_layout,
+ .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0],
+ .dstQueueFamilyIndex = new_qf,
+ .image = vkf->img[i],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .layerCount = 1,
+ .levelCount = 1,
+ },
+ };
+ *nb_bar += 1;
+ }
+
+ ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL);
}
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
@@ -1077,11 +1197,11 @@ int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *na
return 0;
}
-void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3])
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
{
- shd->local_size[0] = local_size[0];
- shd->local_size[1] = local_size[1];
- shd->local_size[2] = local_size[2];
+ shd->local_size[0] = x;
+ shd->local_size[1] = y;
+ shd->local_size[2] = z;
av_bprintf(&shd->src, "layout (local_size_x = %i, "
"local_size_y = %i, local_size_z = %i) in;\n\n",
@@ -1113,39 +1233,21 @@ void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio)
void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
{
FFVulkanFunctions *vk = &s->vkfn;
- vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
+ av_bprint_finalize(&shd->src, NULL);
+
+ if (shd->shader.module)
+ vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
}
-int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
- const char *entrypoint)
+int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ uint8_t *spirv, size_t spirv_size, const char *entrypoint)
{
- int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkShaderModuleCreateInfo shader_create;
- uint8_t *spirv;
- size_t spirv_size;
- void *priv;
shd->shader.pName = entrypoint;
- if (!s->spirv_compiler) {
-#if CONFIG_LIBGLSLANG
- s->spirv_compiler = ff_vk_glslang_init();
-#elif CONFIG_LIBSHADERC
- s->spirv_compiler = ff_vk_shaderc_init();
-#else
- return AVERROR(ENOSYS);
-#endif
- if (!s->spirv_compiler)
- return AVERROR(ENOMEM);
- }
-
- err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv,
- &spirv_size, entrypoint, &priv);
- if (err < 0)
- return err;
-
av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
shd->name, spirv_size);
@@ -1157,11 +1259,8 @@ int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
&shd->shader.module);
-
- s->spirv_compiler->free_shader(s->spirv_compiler, &priv);
-
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
+ av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
@@ -1190,132 +1289,88 @@ static const struct descriptor_props {
[VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
};
-int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
- int num, int only_print_to_shader)
+int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd,
+ FFVulkanDescriptorSetBinding *desc, int nb,
+ int read_only, int print_to_shader_only)
{
VkResult ret;
- VkDescriptorSetLayout *layout;
+ int has_sampler = 0;
FFVulkanFunctions *vk = &s->vkfn;
+ FFVulkanDescriptorSet *set;
+ VkDescriptorSetLayoutCreateInfo desc_create_layout;
- if (only_print_to_shader)
+ if (print_to_shader_only)
goto print;
- pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
- pl->desc_layout_num + pl->qf->nb_queues);
- if (!pl->desc_layout)
+ /* Actual layout allocated for the pipeline */
+ set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set),
+ pl->nb_descriptor_sets + 1);
+ if (!set)
return AVERROR(ENOMEM);
+ pl->desc_set = set;
+ set = &set[pl->nb_descriptor_sets];
+ memset(set, 0, sizeof(*set));
- pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
- sizeof(*pl->desc_set_initialized),
- pl->descriptor_sets_num + 1);
- if (!pl->desc_set_initialized)
+ set->binding = av_mallocz(nb*sizeof(*set->binding));
+ if (!set->binding)
return AVERROR(ENOMEM);
- pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
- layout = &pl->desc_layout[pl->desc_layout_num];
-
- { /* Create descriptor set layout descriptions */
- VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
- VkDescriptorSetLayoutBinding *desc_binding;
-
- desc_binding = av_mallocz(sizeof(*desc_binding)*num);
- if (!desc_binding)
- return AVERROR(ENOMEM);
-
- for (int i = 0; i < num; i++) {
- desc_binding[i].binding = i;
- desc_binding[i].descriptorType = desc[i].type;
- desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
- desc_binding[i].stageFlags = desc[i].stages;
- desc_binding[i].pImmutableSamplers = desc[i].sampler ?
- desc[i].sampler->sampler :
- NULL;
- }
-
- desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- desc_create_layout.pBindings = desc_binding;
- desc_create_layout.bindingCount = num;
-
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
- s->hwctx->alloc, &layout[i]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
- "layout: %s\n", ff_vk_ret2str(ret));
- av_free(desc_binding);
- return AVERROR_EXTERNAL;
- }
- }
-
- av_free(desc_binding);
+ set->binding_offset = av_mallocz(nb*sizeof(*set->binding_offset));
+ if (!set->binding_offset) {
+ av_freep(&set->binding);
+ return AVERROR(ENOMEM);
}
- { /* Pool each descriptor by type and update pool counts */
- for (int i = 0; i < num; i++) {
- int j;
- for (j = 0; j < pl->pool_size_desc_num; j++)
- if (pl->pool_size_desc[j].type == desc[i].type)
- break;
- if (j >= pl->pool_size_desc_num) {
- pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
- sizeof(*pl->pool_size_desc),
- ++pl->pool_size_desc_num);
- if (!pl->pool_size_desc)
- return AVERROR(ENOMEM);
- memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
- }
- pl->pool_size_desc[j].type = desc[i].type;
- pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
- }
- }
+ desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = nb,
+ .pBindings = set->binding,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
+ };
- { /* Create template creation struct */
- VkDescriptorUpdateTemplateCreateInfo *dt;
- VkDescriptorUpdateTemplateEntry *des_entries;
+ for (int i = 0; i < nb; i++) {
+ set->binding[i].binding = i;
+ set->binding[i].descriptorType = desc[i].type;
+ set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ set->binding[i].stageFlags = desc[i].stages;
+ set->binding[i].pImmutableSamplers = desc[i].samplers;
- /* Freed after descriptor set initialization */
- des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
- if (!des_entries)
- return AVERROR(ENOMEM);
+ if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ has_sampler |= 1;
+ }
- for (int i = 0; i < num; i++) {
- des_entries[i].dstBinding = i;
- des_entries[i].descriptorType = desc[i].type;
- des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
- des_entries[i].dstArrayElement = 0;
- des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
- des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
- }
+ set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+ if (has_sampler)
+ set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
- pl->desc_template_info = av_realloc_array(pl->desc_template_info,
- sizeof(*pl->desc_template_info),
- pl->total_descriptor_sets + pl->qf->nb_queues);
- if (!pl->desc_template_info)
- return AVERROR(ENOMEM);
+ ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+ s->hwctx->alloc, &set->layout);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
- dt = &pl->desc_template_info[pl->total_descriptor_sets];
- memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
+ vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size);
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
- dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
- dt[i].descriptorSetLayout = layout[i];
- dt[i].pDescriptorUpdateEntries = des_entries;
- dt[i].descriptorUpdateEntryCount = num;
- }
- }
+ set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment);
- pl->descriptor_sets_num++;
+ for (int i = 0; i < nb; i++)
+ vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout,
+ i, &set->binding_offset[i]);
- pl->desc_layout_num += pl->qf->nb_queues;
- pl->total_descriptor_sets += pl->qf->nb_queues;
+ set->read_only = read_only;
+ set->nb_bindings = nb;
+ pl->nb_descriptor_sets++;
print:
/* Write shader info */
- for (int i = 0; i < num; i++) {
+ for (int i = 0; i < nb; i++) {
const struct descriptor_props *prop = &descriptor_props[desc[i].type];
- GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
+ GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i);
if (desc[i].mem_layout)
GLSLA(", %s", desc[i].mem_layout);
@@ -1347,171 +1402,260 @@ print:
return 0;
}
-void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- int set_id)
+int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
+ FFVulkanPipeline *pl)
{
- FFVulkanFunctions *vk = &s->vkfn;
+ int err;
- /* If a set has never been updated, update all queues' sets. */
- if (!pl->desc_set_initialized[set_id]) {
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- int idx = set_id*pl->qf->nb_queues + i;
- vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
- pl->desc_set[idx],
- pl->desc_template[idx],
- s);
- }
- pl->desc_set_initialized[set_id] = 1;
- return;
- }
+ pl->desc_bind = av_mallocz(pl->nb_descriptor_sets*sizeof(*pl->desc_bind));
+ if (!pl->desc_bind)
+ return AVERROR(ENOMEM);
+
+ pl->bound_buffer_indices = av_mallocz(pl->nb_descriptor_sets*
+ sizeof(*pl->bound_buffer_indices));
+ if (!pl->bound_buffer_indices)
+ return AVERROR(ENOMEM);
-// set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+ for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+ FFVulkanDescriptorSet *set = &pl->desc_set[i];
+ int nb = set->read_only ? 1 : pool->pool_size;
+
+ err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb,
+ NULL, NULL, set->usage,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_map_buffers(s, &set->buf, &set->desc_mem, 1, 0);
+ if (err < 0)
+ return err;
+
+ pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT,
+ .usage = set->usage,
+ .address = set->buf.address,
+ };
+
+ pl->bound_buffer_indices[i] = i;
+ }
- vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
- pl->desc_set[set_id],
- pl->desc_template[set_id],
- s);
+ return 0;
}
-void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
- FFVulkanPipeline *pl,
- VkShaderStageFlagBits stage,
- int offset, size_t size, void *src)
+static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanDescriptorSet *set,
+ int bind_idx, int array_idx,
+ VkDescriptorGetInfoEXT *desc_get_info,
+ size_t desc_size)
{
FFVulkanFunctions *vk = &s->vkfn;
- vk->CmdPushConstants(e->buf, pl->pipeline_layout,
- stage, offset, size, src);
+ const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx;
+ void *desc = set->desc_mem + /* Base */
+ exec_offset + /* Execution context */
+ set->binding_offset[bind_idx] + /* Descriptor binding */
+ array_idx*desc_size; /* Array position */
+
+ vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc);
}
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
+int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkSampler *sampler)
{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
- pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
- if (!pl->desc_staging)
- return AVERROR(ENOMEM);
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ desc_get_info.data.pSampler = sampler;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- { /* Init descriptor set pool */
- VkDescriptorPoolCreateInfo pool_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
- .poolSizeCount = pl->pool_size_desc_num,
- .pPoolSizes = pl->pool_size_desc,
- .maxSets = pl->total_descriptor_sets,
- };
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info,
+ s->desc_buf_props.samplerDescriptorSize);
- ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
- s->hwctx->alloc, &pl->desc_pool);
- av_freep(&pl->pool_size_desc);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
- "pool: %s\n", ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ return 0;
+}
- { /* Allocate descriptor sets */
- VkDescriptorSetAllocateInfo alloc_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
- .descriptorPool = pl->desc_pool,
- .descriptorSetCount = pl->total_descriptor_sets,
- .pSetLayouts = pl->desc_layout,
- };
+int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkImageView view, VkImageLayout layout, VkSampler sampler)
+{
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
+ VkDescriptorImageInfo desc_img_info = {
+ .imageView = view,
+ .sampler = sampler,
+ .imageLayout = layout,
+ };
+ size_t desc_size;
- pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
- if (!pl->desc_set)
- return AVERROR(ENOMEM);
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ desc_get_info.data.pSampledImage = &desc_img_info;
+ desc_size = s->desc_buf_props.sampledImageDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ desc_get_info.data.pStorageImage = &desc_img_info;
+ desc_size = s->desc_buf_props.storageImageDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ desc_get_info.data.pInputAttachmentImage = &desc_img_info;
+ desc_size = s->desc_buf_props.inputAttachmentDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ desc_get_info.data.pCombinedImageSampler = &desc_img_info;
+ desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
- pl->desc_set);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
- { /* Finally create the pipeline layout */
- VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging,
- .pushConstantRangeCount = pl->push_consts_num,
- .pPushConstantRanges = pl->push_consts,
- };
+ return 0;
+}
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
- pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
+int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt)
+{
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
+ VkDescriptorAddressInfoEXT desc_buf_info = {
+ .address = addr,
+ .range = len,
+ .format = fmt,
+ };
+ size_t desc_size;
- ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
- s->hwctx->alloc, &pl->pipeline_layout);
- av_freep(&pl->push_consts);
- pl->push_consts_num = 0;
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ desc_get_info.data.pUniformBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.uniformBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ desc_get_info.data.pStorageBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.storageBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ desc_get_info.data.pUniformTexelBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ desc_get_info.data.pStorageTexelBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- { /* Descriptor template (for tightly packed descriptors) */
- VkDescriptorUpdateTemplateCreateInfo *dt;
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
- pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
- if (!pl->desc_template)
- return AVERROR(ENOMEM);
+ return 0;
+}
- /* Create update templates for the descriptor sets */
- for (int i = 0; i < pl->total_descriptor_sets; i++) {
- dt = &pl->desc_template_info[i];
- dt->pipelineLayout = pl->pipeline_layout;
- ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
- dt, s->hwctx->alloc,
- &pl->desc_template[i]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
- "template: %s\n", ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, AVFrame *f,
+ VkImageView *views, int set, int binding,
+ VkImageLayout layout, VkSampler sampler)
+{
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
- /* Free the duplicated memory used for the template entries */
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
- dt = &pl->desc_template_info[i];
- av_free((void *)dt->pDescriptorUpdateEntries);
- }
+ for (int i = 0; i < nb_planes; i++)
+ ff_vk_set_descriptor_image(s, pl, e, set, binding, i,
+ views[i], layout, sampler);
+}
+
+void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl,
+ VkShaderStageFlagBits stage,
+ int offset, size_t size, void *src)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->CmdPushConstants(e->buf, pl->pipeline_layout,
+ stage, offset, size, src);
+}
+
+static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkPipelineLayoutCreateInfo pipeline_layout_info;
+
+ VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets*
+ sizeof(desc_layouts));
+ if (!desc_layouts)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < pl->nb_descriptor_sets; i++)
+ desc_layouts[i] = pl->desc_set[i].layout;
+
+ /* Finally create the pipeline layout */
+ pipeline_layout_info = (VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pSetLayouts = desc_layouts,
+ .setLayoutCount = pl->nb_descriptor_sets,
+ .pushConstantRangeCount = pl->push_consts_num,
+ .pPushConstantRanges = pl->push_consts,
+ };
- av_freep(&pl->desc_template_info);
+ ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info,
+ s->hwctx->alloc, &pl->pipeline_layout);
+ av_free(desc_layouts);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
}
return 0;
}
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkQueueFamilyCtx *qf)
+ FFVkSPIRVShader *shd)
{
- int i;
+ int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- VkComputePipelineCreateInfo pipe = {
+ VkComputePipelineCreateInfo pipeline_create_info;
+
+ err = init_pipeline_layout(s, pl);
+ if (err < 0)
+ return err;
+
+ pipeline_create_info = (VkComputePipelineCreateInfo) {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
.layout = pl->pipeline_layout,
+ .stage = shd->shader,
};
- pl->qf = qf;
-
- for (i = 0; i < pl->shaders_num; i++) {
- if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
- pipe.stage = pl->shaders[i]->shader;
- break;
- }
- }
- if (i == pl->shaders_num) {
- av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
- return AVERROR(EINVAL);
- }
-
- ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
+ ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1,
+ &pipeline_create_info,
s->hwctx->alloc, &pl->pipeline);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
@@ -1520,77 +1664,57 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
}
pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
+ pl->wg_size[0] = shd->local_size[0];
+ pl->wg_size[1] = shd->local_size[1];
+ pl->wg_size[2] = shd->local_size[2];
return 0;
}
-void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
+void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
+ VkDeviceSize offsets[1024];
- vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
-
-// for (int i = 0; i < pl->descriptor_sets_num; i++)
- // pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+ for (int i = 0; i < pl->nb_descriptor_sets; i++)
+ offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx;
- vk->CmdBindDescriptorSets(e->buf, pl->bind_point,
- pl->pipeline_layout, 0,
- pl->descriptor_sets_num,
- (VkDescriptorSet *)pl->desc_staging,
- 0, NULL);
+ /* Bind pipeline */
+ vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
+ /* Bind descriptor buffers */
+ vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind);
+ /* Binding offsets */
+ vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout,
+ 0, pl->nb_descriptor_sets,
+ pl->bound_buffer_indices, offsets);
}
void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
- for (int i = 0; i < pl->shaders_num; i++) {
- FFVkSPIRVShader *shd = pl->shaders[i];
- av_bprint_finalize(&shd->src, NULL);
- vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
- s->hwctx->alloc);
- av_free(shd);
- }
-
- vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
- vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
- s->hwctx->alloc);
+ if (pl->pipeline)
+ vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
+ if (pl->pipeline_layout)
+ vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
+ s->hwctx->alloc);
- for (int i = 0; i < pl->desc_layout_num; i++) {
- if (pl->desc_template && pl->desc_template[i])
- vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
- s->hwctx->alloc);
- if (pl->desc_layout && pl->desc_layout[i])
- vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
+ for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+ FFVulkanDescriptorSet *set = &pl->desc_set[i];
+ ff_vk_unmap_buffers(s, &set->buf, 1, 0);
+ ff_vk_free_buf(s, &set->buf);
+ if (set->layout)
+ vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout,
s->hwctx->alloc);
+ av_free(set->binding);
+ av_free(set->binding_offset);
}
- /* Also frees the descriptor sets */
- if (pl->desc_pool)
- vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
- s->hwctx->alloc);
-
- av_freep(&pl->desc_staging);
av_freep(&pl->desc_set);
- av_freep(&pl->shaders);
- av_freep(&pl->desc_layout);
- av_freep(&pl->desc_template);
- av_freep(&pl->desc_set_initialized);
+ av_freep(&pl->desc_bind);
av_freep(&pl->push_consts);
pl->push_consts_num = 0;
-
- /* Only freed in case of failure */
- av_freep(&pl->pool_size_desc);
- if (pl->desc_template_info) {
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
- VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
- av_free((void *)dt->pDescriptorUpdateEntries);
- }
- av_freep(&pl->desc_template_info);
- }
-
- av_free(pl);
}
void ff_vk_uninit(FFVulkanContext *s)
@@ -1599,9 +1723,6 @@ void ff_vk_uninit(FFVulkanContext *s)
av_freep(&s->qf_props);
av_freep(&s->video_props);
- if (s->spirv_compiler)
- s->spirv_compiler->uninit(&s->spirv_compiler);
-
av_buffer_unref(&s->device_ref);
av_buffer_unref(&s->frames_ref);
}
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index e66ca59ef7..1321fb8ba8 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -30,11 +30,6 @@
#include "hwcontext_vulkan.h"
#include "vulkan_loader.h"
-#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
- VK_IMAGE_USAGE_STORAGE_BIT | \
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
- VK_IMAGE_USAGE_TRANSFER_DST_BIT)
-
/* GLSL management macros */
#define INDENT(N) INDENT_##N
#define INDENT_0
@@ -59,6 +54,8 @@
goto fail; \
} while (0)
+#define DUP_SAMPLER(x) { x, x, x, x }
+
typedef struct FFVkSPIRVShader {
const char *name; /* Name for id/debugging purposes */
AVBPrint src;
@@ -66,19 +63,6 @@ typedef struct FFVkSPIRVShader {
VkPipelineShaderStageCreateInfo shader;
} FFVkSPIRVShader;
-typedef struct FFVkSPIRVCompiler {
- void *priv;
- int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx,
- struct FFVkSPIRVShader *shd, uint8_t **data,
- size_t *size, const char *entrypoint, void **opaque);
- void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque);
- void (*uninit)(struct FFVkSPIRVCompiler **ctx);
-} FFVkSPIRVCompiler;
-
-typedef struct FFVkSampler {
- VkSampler sampler[4];
-} FFVkSampler;
-
typedef struct FFVulkanDescriptorSetBinding {
const char *name;
VkDescriptorType type;
@@ -88,8 +72,7 @@ typedef struct FFVulkanDescriptorSetBinding {
uint32_t dimensions; /* Needed for e.g. sampler%iD */
uint32_t elems; /* 0 - scalar, 1 or more - vector */
VkShaderStageFlags stages;
- FFVkSampler *sampler; /* Sampler to use for all elems */
- void *updater; /* Pointer to VkDescriptor*Info */
+ VkSampler samplers[4]; /* Sampler to use for all elems */
} FFVulkanDescriptorSetBinding;
typedef struct FFVkBuffer {
@@ -97,6 +80,7 @@ typedef struct FFVkBuffer {
VkDeviceMemory mem;
VkMemoryPropertyFlagBits flags;
size_t size;
+ VkDeviceAddress address;
} FFVkBuffer;
typedef struct FFVkQueueFamilyCtx {
@@ -104,42 +88,45 @@ typedef struct FFVkQueueFamilyCtx {
int nb_queues;
} FFVkQueueFamilyCtx;
-typedef struct FFVulkanPipeline {
- FFVkQueueFamilyCtx *qf;
+typedef struct FFVulkanDescriptorSet {
+ VkDescriptorSetLayout layout;
+ FFVkBuffer buf;
+ uint8_t *desc_mem;
+ VkDeviceSize layout_size;
+ VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */
+ VkDeviceSize total_size; /* Once registered to an exec context */
+ VkBufferUsageFlags usage;
+ VkDescriptorSetLayoutBinding *binding;
+ VkDeviceSize *binding_offset;
+ int nb_bindings;
+
+ int read_only;
+} FFVulkanDescriptorSet;
+
+typedef struct FFVulkanPipeline {
VkPipelineBindPoint bind_point;
/* Contexts */
VkPipelineLayout pipeline_layout;
VkPipeline pipeline;
- /* Shaders */
- FFVkSPIRVShader **shaders;
- int shaders_num;
-
/* Push consts */
VkPushConstantRange *push_consts;
int push_consts_num;
+ /* Workgroup */
+ int wg_size[3];
+
/* Descriptors */
- VkDescriptorSetLayout *desc_layout;
- VkDescriptorPool desc_pool;
- VkDescriptorSet *desc_set;
- void **desc_staging;
- VkDescriptorSetLayoutBinding **desc_binding;
- VkDescriptorUpdateTemplate *desc_template;
- int *desc_set_initialized;
- int desc_layout_num;
- int descriptor_sets_num;
- int total_descriptor_sets;
- int pool_size_desc_num;
-
- /* Temporary, used to store data in between initialization stages */
- VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
- VkDescriptorPoolSize *pool_size_desc;
+ FFVulkanDescriptorSet *desc_set;
+ VkDescriptorBufferBindingInfoEXT *desc_bind;
+ uint32_t *bound_buffer_indices;
+ int nb_descriptor_sets;
} FFVulkanPipeline;
typedef struct FFVkExecContext {
+ int idx;
const struct FFVkExecPool *parent;
/* Queue for the execution context */
@@ -162,7 +149,7 @@ typedef struct FFVkExecContext {
unsigned int buf_deps_alloc_size;
/* Frame dependencies */
- AVBufferRef **frame_deps;
+ AVFrame **frame_deps;
unsigned int frame_deps_alloc_size;
int nb_frame_deps;
@@ -185,6 +172,7 @@ typedef struct FFVkExecContext {
uint64_t **sem_sig_val_dst;
unsigned int sem_sig_val_dst_alloc;
+ int sem_sig_val_dst_cnt;
uint8_t *frame_locked;
unsigned int frame_locked_alloc_size;
@@ -229,6 +217,8 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceProperties2 props;
VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
+ VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
+ VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props;
VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
VkQueueFamilyVideoPropertiesKHR *video_props;
VkQueueFamilyProperties2 *qf_props;
@@ -244,8 +234,6 @@ typedef struct FFVulkanContext {
uint32_t qfs[5];
int nb_qfs;
- FFVkSPIRVCompiler *spirv_compiler;
-
/* Properties */
int output_width;
int output_height;
@@ -286,15 +274,15 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
int ff_vk_load_props(FFVulkanContext *s);
/**
- * Loads queue families into the main context.
* Chooses a QF and loads it into a context.
*/
-void ff_vk_qf_fill(FFVulkanContext *s);
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family);
/**
* Allocates/frees an execution pool.
+ * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add()
+ * has been called.
*/
int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
FFVkExecPool *pool, int nb_contexts,
@@ -340,17 +328,28 @@ void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps, int ref);
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef *vkfb, VkPipelineStageFlagBits in_wait_dst_flag);
-void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
- VkImageMemoryBarrier2 *bar);
+ AVFrame *f, VkPipelineStageFlagBits in_wait_dst_flag);
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+ VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar);
+int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
+ VkSemaphore *dst, uint64_t *dst_val,
+ AVFrame *f);
void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
/**
* Create an imageview and add it as a dependency to an execution.
*/
-int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
- VkImageView *v, VkImage img, VkFormat fmt,
- const VkComponentMapping map);
+int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
+ VkImageView views[AV_NUM_DATA_POINTERS],
+ AVFrame *f);
+
+void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
+ AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
+ VkPipelineStageFlags src_stage,
+ VkPipelineStageFlags dst_stage,
+ VkAccessFlagBits new_access,
+ VkImageLayout new_layout,
+ uint32_t new_qf);
/**
* Memory/buffer/image allocation helpers.
@@ -372,33 +371,22 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
/**
- * Sampler management.
+ * Create a sampler.
*/
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
- int unnorm_coords, VkFilter filt);
-void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx);
+int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
+ int unnorm_coords, VkFilter filt);
/**
* Shader management.
*/
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
VkShaderStageFlags stage);
-void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z);
void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio);
-int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
- const char *entrypoint);
+int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ uint8_t *spirv, size_t spirv_size, const char *entrypoint);
void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd);
-/**
- * Register a descriptor set.
- * Update a descriptor set for execution.
- */
-int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
- int num, int only_print_to_shader);
-void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- int set_id);
-
/**
* Add/update push constants for execution.
*/
@@ -410,15 +398,45 @@ void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
int offset, size_t size, void *src);
/**
- * Pipeline management.
+ * Add descriptor to a pipeline. Must be called before pipeline init.
*/
+int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd,
+ FFVulkanDescriptorSetBinding *desc, int nb,
+ int read_only, int print_to_shader_only);
+
+/* Initialize/free a pipeline. */
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkQueueFamilyCtx *qf);
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
-void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
- FFVulkanPipeline *pl);
+ FFVkSPIRVShader *shd);
void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl);
+/**
+ * Register a pipeline with an exec pool.
+ * Pool may be NULL if all descriptor sets are read-only.
+ */
+int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
+ FFVulkanPipeline *pl);
+
+/* Bind pipeline */
+void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl);
+
+/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */
+int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkSampler *sampler);
+int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkImageView view, VkImageLayout layout, VkSampler sampler);
+int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt);
+
+void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, AVFrame *f,
+ VkImageView *views, int set, int binding,
+ VkImageLayout layout, VkSampler sampler);
+
/**
* Frees main context.
*/
--
2.39.2
[-- Attachment #52: 0051-hwcontext_vulkan-rewrite-to-support-multiplane-surfa.patch --]
[-- Type: text/x-diff, Size: 68673 bytes --]
From f36680714e0636288dacf687e766a9222fe04867 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:37:35 +0100
Subject: [PATCH 51/72] hwcontext_vulkan: rewrite to support multiplane
surfaces
---
libavutil/hwcontext_vulkan.c | 744 +++++++++++++++++------------------
libavutil/hwcontext_vulkan.h | 69 ++--
2 files changed, 411 insertions(+), 402 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index e7c14fad74..027ecc76b1 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -62,6 +64,8 @@ typedef struct VulkanQueueCtx {
VkFence fence;
VkQueue queue;
int was_synchronous;
+ int qf;
+ int qidx;
/* Buffer dependencies */
AVBufferRef **buf_deps;
@@ -116,6 +120,11 @@ typedef struct VulkanDevicePriv {
} VulkanDevicePriv;
typedef struct VulkanFramesPriv {
+ const VkFormat *fmts;
+ int nb_images;
+ VkImageAspectFlags aspect;
+ const struct FFVkFormatEntry *fmt;
+
/* Image conversions */
VulkanExecCtx conv_ctx;
@@ -145,112 +154,201 @@ typedef struct AVVkFrameInternal {
#endif
} AVVkFrameInternal;
-#define ADD_VAL_TO_LIST(list, count, val) \
- do { \
- list = av_realloc_array(list, sizeof(*list), ++count); \
- if (!list) { \
- err = AVERROR(ENOMEM); \
- goto fail; \
- } \
- list[count - 1] = av_strdup(val); \
- if (!list[count - 1]) { \
- err = AVERROR(ENOMEM); \
- goto fail; \
- } \
- } while(0)
-
-#define RELEASE_PROPS(props, count) \
- if (props) { \
- for (int i = 0; i < count; i++) \
- av_free((void *)((props)[i])); \
- av_free((void *)props); \
- }
+#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
+#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
-static const struct {
+static const struct FFVkFormatEntry {
+ VkFormat vkf;
enum AVPixelFormat pixfmt;
- const VkFormat vkfmts[5];
-} vk_pixfmt_planar_map[] = {
- { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
-
- { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
- { AV_PIX_FMT_P012, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
- { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
-
- { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-
- { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-
- { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- /* There is no AV_PIX_FMT_YUVA420P12 */
- { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_VUYX, { VK_FORMAT_R8G8B8A8_UNORM } },
- { AV_PIX_FMT_XV36, { VK_FORMAT_R16G16B16A16_UNORM } },
-
- { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
- { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
- { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
- { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
- { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
- { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
- { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
- { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
- { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
- { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
- { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
-
- /* Lower priority as there's an endianess-dependent overlap between these
- * and rgba/bgr0, and PACK32 formats are more limited */
- { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
- { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
-
- { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
-
- { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
- { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ VkImageAspectFlags aspect;
+ int vk_planes;
+ int nb_images;
+ int nb_images_fallback;
+ const VkFormat fallback[5];
+} vk_formats_list[] = {
+ /* Gray formats */
+ { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
+
+ /* RGB formats */
+ { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
+ { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } },
+ { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } },
+ { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } },
+ { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
+ { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
+ { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
+ { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
+
+ /* Planar RGB */
+ { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+
+ /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Two-plane 422 YUV at 8, 10 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Two-plane 444 YUV at 8, 10 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */
+ { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+ /* Single plane 422 at 8, 10 and 12 bits */
+ { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
};
+static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list);
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
{
- for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++)
- if (vk_pixfmt_planar_map[i].pixfmt == p)
- return vk_pixfmt_planar_map[i].vkfmts;
+ for (int i = 0; i < nb_vk_formats_list; i++)
+ if (vk_formats_list[i].pixfmt == p)
+ return vk_formats_list[i].fallback;
+ return NULL;
+}
+
+static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p)
+{
+ for (int i = 0; i < nb_vk_formats_list; i++)
+ if (vk_formats_list[i].pixfmt == p)
+ return &vk_formats_list[i];
return NULL;
}
+/* Malitia pura, Khronos */
+#define FN_MAP_TO(dst_t, dst, src_t, src) \
+ static dst_t map_ ##src## _to_ ##dst(src_t mask2) \
+ { \
+ dst_t mask1 = 0x0; \
+ MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \
+ MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \
+ MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \
+ MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \
+ return mask1; \
+ }
+
+#define MAP_TO(mask1, mask2, flag1, flag2) if (mask2 & flag2) mask1 |= flag1;
+FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage)
+#undef MAP_TO
+#define MAP_TO(mask1, mask2, flag1, flag2) if (mask1 & flag1) mask2 |= flag2;
+FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats)
+#undef MAP_TO
+#undef FN_MAP_TO
+
+static int av_vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
+ VkImageUsageFlags additional_usage, const VkFormat **fmts,
+ int *nb_images, VkImageAspectFlags *aspect,
+ VkImageUsageFlags *supported_usage)
+{
+ AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
+ VulkanDevicePriv *priv = dev_ctx->internal->priv;
+ FFVulkanFunctions *vk = &priv->vkfn;
+
+ VkFormatProperties2 prop = {
+ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+ };
+ const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
+ const VkFormatFeatureFlagBits2 full_flags = VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT |
+ basic_flags;
+
+ const VkFormatFeatureFlagBits2 additional_flags = map_usage_to_feats(additional_usage);
+
+ for (int i = 0; i < nb_vk_formats_list; i++) {
+ if (vk_formats_list[i].pixfmt == p) {
+ VkFormatFeatureFlagBits *feat = &prop.formatProperties.optimalTilingFeatures;
+ VkFormatFeatureFlagBits2 feats_vk1, feats_vk2;
+ int basics;
+ int full;
+ int additional;
+
+ basics = 0;
+ full = 0;
+ additional = 0;
+ vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, vk_formats_list[i].vkf,
+ &prop);
+
+ /* We want at least the basics supported */
+ feats_vk1 = *feat;
+ basics = !!(*feat & basic_flags);
+ additional = !!(*feat & additional_flags);
+
+ /* If basics are not supported, OR we have multiplane images,
+ * check the fallback/single-plane rep for support. */
+ if (!basics || vk_formats_list[i].vk_planes > 1)
+ vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
+ vk_formats_list[i].fallback[0],
+ &prop);
+
+ feats_vk2 = *feat;
+ full = !!(*feat & full_flags);
+
+ if (additional_flags && !additional) {
+ return AVERROR(ENOTSUP);
+ } else if (full && basics) {
+ if (fmts)
+ *fmts = &vk_formats_list[i].vkf;
+ if (nb_images)
+ *nb_images = 1;
+ if (aspect)
+ *aspect = vk_formats_list[i].aspect;
+ if (supported_usage)
+ *supported_usage = map_feats_to_usage(feats_vk1);
+ return 0;
+ } else if (full && (vk_formats_list[i].vk_planes > 1)) {
+ if (fmts)
+ *fmts = vk_formats_list[i].fallback;
+ if (nb_images)
+ *nb_images = vk_formats_list[i].nb_images_fallback;
+ if (aspect)
+ *aspect = vk_formats_list[i].aspect;
+ if (supported_usage)
+ *supported_usage = map_feats_to_usage(feats_vk2);
+ return 0;
+ } else {
+ return AVERROR(ENOTSUP);
+ }
+ }
+ }
+
+ return AVERROR(EINVAL);
+}
+
static const void *vk_find_struct(const void *chain, VkStructureType stype)
{
const VkBaseInStructure *in = chain;
@@ -276,33 +374,6 @@ static void vk_link_struct(void *chain, void *in)
out->pNext = in;
}
-static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
- int linear)
-{
- AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
- VulkanDevicePriv *priv = dev_ctx->internal->priv;
- FFVulkanFunctions *vk = &priv->vkfn;
- const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
- int planes = av_pix_fmt_count_planes(p);
-
- if (!fmt)
- return 0;
-
- for (int i = 0; i < planes; i++) {
- VkFormatFeatureFlags flags;
- VkFormatProperties2 prop = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
- };
- vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
- flags = linear ? prop.formatProperties.linearTilingFeatures :
- prop.formatProperties.optimalTilingFeatures;
- if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS))
- return 0;
- }
-
- return 1;
-}
-
static int load_libvulkan(AVHWDeviceContext *ctx)
{
AVVulkanDeviceContext *hwctx = ctx->hwctx;
@@ -435,6 +506,27 @@ static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
return 0;
}
+#define ADD_VAL_TO_LIST(list, count, val) \
+ do { \
+ list = av_realloc_array(list, sizeof(*list), ++count); \
+ if (!list) { \
+ err = AVERROR(ENOMEM); \
+ goto fail; \
+ } \
+ list[count - 1] = av_strdup(val); \
+ if (!list[count - 1]) { \
+ err = AVERROR(ENOMEM); \
+ goto fail; \
+ } \
+ } while(0)
+
+#define RELEASE_PROPS(props, count) \
+ if (props) { \
+ for (int i = 0; i < count; i++) \
+ av_free((void *)((props)[i])); \
+ av_free((void *)props); \
+ }
+
static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
const char * const **dst, uint32_t *num, int debug)
{
@@ -683,6 +775,10 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VkApplicationInfo application_info = {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "ffmpeg",
+ .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
+ LIBAVUTIL_VERSION_MINOR,
+ LIBAVUTIL_VERSION_MICRO),
.pEngineName = "libavutil",
.apiVersion = VK_API_VERSION_1_3,
.engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
@@ -1121,6 +1217,8 @@ static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
VulkanQueueCtx *q = &cmd->queues[i];
vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
q->was_synchronous = 1;
+ q->qf = queue_family_index;
+ q->qidx = i;
}
return 0;
@@ -1256,6 +1354,7 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
VkResult ret;
VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
FFVulkanFunctions *vk = &p->vkfn;
ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
@@ -1269,7 +1368,9 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
s_info->commandBufferCount = 1;
+ hwctx->lock_queue(hwfc->device_ctx, q->qf, q->qidx);
ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence);
+ hwctx->unlock_queue(hwfc->device_ctx, q->qf, q->qidx);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n",
vk_ret2str(ret));
@@ -1284,7 +1385,6 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
q->was_synchronous = synchronous;
if (synchronous) {
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(hwctx->act_dev, 1, &q->fence);
unref_exec_ctx_deps(hwfc, cmd);
@@ -1446,12 +1546,6 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
if (opt_d)
p->use_linear_images = strtol(opt_d->value, NULL, 10);
- opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0);
- if (opt_d)
- p->contiguous_planes = strtol(opt_d->value, NULL, 10);
- else
- p->contiguous_planes = -1;
-
hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
@@ -1690,8 +1784,10 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
int count = 0;
VulkanDevicePriv *p = ctx->internal->priv;
- for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
- count += pixfmt_is_supported(ctx, i, p->use_linear_images);
+ for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
+ count += av_vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
+ 0, NULL, NULL, NULL, NULL) >= 0;
+ }
#if CONFIG_CUDA
if (p->dev_is_nvidia)
@@ -1704,9 +1800,12 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
return AVERROR(ENOMEM);
count = 0;
- for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
- if (pixfmt_is_supported(ctx, i, p->use_linear_images))
- constraints->valid_sw_formats[count++] = i;
+ for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
+ if (av_vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
+ 0, NULL, NULL, NULL, NULL) >= 0) {
+ constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt;
+ }
+ }
#if CONFIG_CUDA
if (p->dev_is_nvidia)
@@ -1714,8 +1813,8 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
#endif
constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
- constraints->min_width = 0;
- constraints->min_height = 0;
+ constraints->min_width = 1;
+ constraints->min_height = 1;
constraints->max_width = p->props.properties.limits.maxImageDimension2D;
constraints->max_height = p->props.properties.limits.maxImageDimension2D;
@@ -1789,7 +1888,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
static void vulkan_free_internal(AVVkFrame *f)
{
- AVVkFrameInternal *internal = f->internal;
+ av_unused AVVkFrameInternal *internal = f->internal;
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
@@ -1829,17 +1928,22 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
- int planes = av_pix_fmt_count_planes(hwfc->sw_format);
+ int nb_images = ff_vk_count_images(f);
+
+ VkSemaphoreWaitInfo sem_wait = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+ .pSemaphores = f->sem,
+ .pValues = f->sem_value,
+ .semaphoreCount = nb_images,
+ };
- /* We could use vkWaitSemaphores, but the validation layer seems to have
- * issues tracking command buffer execution state on uninit. */
- vk->DeviceWaitIdle(hwctx->act_dev);
+ vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
vulkan_free_internal(f);
- for (int i = 0; i < planes; i++) {
- vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
- vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+ for (int i = 0; i < nb_images; i++) {
+ vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
+ vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
}
@@ -1849,30 +1953,25 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
void *alloc_pnext, size_t alloc_pnext_stride)
{
- int err;
+ int img_cnt = 0, err;
VkResult ret;
AVHWDeviceContext *ctx = hwfc->device_ctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
AVVulkanFramesContext *hwfctx = hwfc->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
- VkMemoryRequirements cont_memory_requirements = { 0 };
- int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 };
- int cont_mem_size = 0;
-
AVVulkanDeviceContext *hwctx = ctx->hwctx;
- for (int i = 0; i < planes; i++) {
+ while (f->img[img_cnt]) {
int use_ded_mem;
VkImageMemoryRequirementsInfo2 req_desc = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
- .image = f->img[i],
+ .image = f->img[img_cnt],
};
VkMemoryDedicatedAllocateInfo ded_alloc = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
+ .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
};
VkMemoryDedicatedRequirements ded_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
@@ -1884,79 +1983,35 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
- if (f->tiling == VK_IMAGE_TILING_LINEAR)
+ if (hwfctx->tiling == VK_IMAGE_TILING_LINEAR)
req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
p->props.properties.limits.minMemoryMapAlignment);
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- if (ded_req.requiresDedicatedAllocation) {
- av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, "
- "device requires dedicated image allocation!\n");
- return AVERROR(EINVAL);
- } else if (!i) {
- cont_memory_requirements = req.memoryRequirements;
- } else if (cont_memory_requirements.memoryTypeBits !=
- req.memoryRequirements.memoryTypeBits) {
- av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 "
- "and %i, cannot allocate in a single region!\n",
- i);
- return AVERROR(EINVAL);
- }
-
- cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size,
- req.memoryRequirements.alignment);
- cont_mem_size += cont_mem_size_list[i];
- continue;
- }
-
/* In case the implementation prefers/requires dedicated allocation */
use_ded_mem = ded_req.prefersDedicatedAllocation |
ded_req.requiresDedicatedAllocation;
if (use_ded_mem)
- ded_alloc.image = f->img[i];
+ ded_alloc.image = f->img[img_cnt];
/* Allocate memory */
if ((err = alloc_mem(ctx, &req.memoryRequirements,
- f->tiling == VK_IMAGE_TILING_LINEAR ?
+ hwfctx->tiling == VK_IMAGE_TILING_LINEAR ?
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &f->flags, &f->mem[i])))
- return err;
-
- f->size[i] = req.memoryRequirements.size;
- bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
- bind_info[i].image = f->img[i];
- bind_info[i].memory = f->mem[i];
- }
-
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- cont_memory_requirements.size = cont_mem_size;
-
- /* Allocate memory */
- if ((err = alloc_mem(ctx, &cont_memory_requirements,
- f->tiling == VK_IMAGE_TILING_LINEAR ?
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- (void *)(((uint8_t *)alloc_pnext)),
- &f->flags, &f->mem[0])))
+ &f->flags, &f->mem[img_cnt])))
return err;
- f->size[0] = cont_memory_requirements.size;
-
- for (int i = 0, offset = 0; i < planes; i++) {
- bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
- bind_info[i].image = f->img[i];
- bind_info[i].memory = f->mem[0];
- bind_info[i].memoryOffset = offset;
+ f->size[img_cnt] = req.memoryRequirements.size;
+ bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+ bind_info[img_cnt].image = f->img[img_cnt];
+ bind_info[img_cnt].memory = f->mem[img_cnt];
- f->offset[i] = bind_info[i].memoryOffset;
- offset += cont_mem_size_list[i];
- }
+ img_cnt++;
}
/* Bind the allocated memory to the images */
- ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info);
+ ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
vk_ret2str(ret));
@@ -1982,11 +2037,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VkImageLayout new_layout;
VkAccessFlags2 new_access;
AVVulkanFramesContext *vkfc = hwfc->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
- AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
+ int nb_images = ff_vk_count_images(frame);
VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
VkDependencyInfo dep_info;
@@ -1994,14 +2048,14 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pSignalSemaphoreValues = sem_sig_val,
- .signalSemaphoreValueCount = planes,
+ .signalSemaphoreValueCount = nb_images,
};
VkSubmitInfo s_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &s_timeline_sem_info,
.pSignalSemaphores = frame->sem,
- .signalSemaphoreCount = planes,
+ .signalSemaphoreCount = nb_images,
};
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
@@ -2011,7 +2065,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
vkfc->lock_frame(hwfc, frame);
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
sem_sig_val[i] = frame->sem_value[i] + 1;
}
@@ -2029,10 +2083,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
dst_qf = VK_QUEUE_FAMILY_IGNORED;
s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
- s_timeline_sem_info.waitSemaphoreValueCount = planes;
+ s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
s_info.pWaitSemaphores = frame->sem;
s_info.pWaitDstStageMask = wait_st;
- s_info.waitSemaphoreCount = planes;
+ s_info.waitSemaphoreCount = nb_images;
break;
case PREP_MODE_EXTERNAL_EXPORT:
new_layout = VK_IMAGE_LAYOUT_GENERAL;
@@ -2040,10 +2094,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
src_qf = VK_QUEUE_FAMILY_IGNORED;
dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
- s_timeline_sem_info.waitSemaphoreValueCount = planes;
+ s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
s_info.pWaitSemaphores = frame->sem;
s_info.pWaitDstStageMask = wait_st;
- s_info.waitSemaphoreCount = planes;
+ s_info.waitSemaphoreCount = nb_images;
break;
case PREP_MODE_DECODING_DST:
new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
@@ -2062,7 +2116,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
img_bar[i] = (VkImageMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.pNext = NULL,
@@ -2077,8 +2131,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.image = frame->img[i],
.subresourceRange = (VkImageSubresourceRange) {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
.levelCount = 1,
- .layerCount = 1,
},
};
@@ -2090,7 +2144,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
.pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = planes,
+ .imageMemoryBarrierCount = nb_images,
};
vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
@@ -2101,7 +2155,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
return err;
}
-static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
+static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
int frame_w, int frame_h, int plane)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
@@ -2120,17 +2174,17 @@ static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
VkImageTiling tiling, VkImageUsageFlagBits usage,
+ VkImageCreateFlags flags, int nb_layers,
void *create_pnext)
{
int err;
VkResult ret;
AVHWDeviceContext *ctx = hwfc->device_ctx;
VulkanDevicePriv *p = ctx->internal->priv;
+ VulkanFramesPriv *fp = hwfc->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
- enum AVPixelFormat format = hwfc->sw_format;
- const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
- const int planes = av_pix_fmt_count_planes(format);
+ AVVulkanFramesContext *frames = hwfc->hwctx;
VkExportSemaphoreCreateInfo ext_sem_info = {
.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
@@ -2165,17 +2219,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
return AVERROR(ENOMEM);
}
+ // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled)
+
/* Create the images */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < fp->nb_images; i++) {
VkImageCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = create_pnext,
.imageType = VK_IMAGE_TYPE_2D,
- .format = img_fmts[i],
+ .format = fp->fmts[i],
.extent.depth = 1,
.mipLevels = 1,
- .arrayLayers = 1,
- .flags = VK_IMAGE_CREATE_ALIAS_BIT,
+ .arrayLayers = nb_layers,
+ .flags = flags,
.tiling = tiling,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
@@ -2187,7 +2243,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
};
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
- format, hwfc->width, hwfc->height, i);
+ hwfc->sw_format, hwfc->width, hwfc->height, i);
ret = vk->CreateImage(hwctx->act_dev, &create_info,
hwctx->alloc, &f->img[i]);
@@ -2214,7 +2270,9 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
}
f->flags = 0x0;
+FF_DISABLE_DEPRECATION_WARNINGS
f->tiling = tiling;
+FF_ENABLE_DEPRECATION_WARNINGS
*frame = f;
return 0;
@@ -2296,41 +2354,23 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
AVVulkanFramesContext *hwctx = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
VulkanFramesPriv *fp = hwfc->internal->priv;
- VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
VkExternalMemoryHandleTypeFlags e = 0x0;
- VkExternalMemoryImageCreateInfo eiinfo = {
- .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
- .pNext = hwctx->create_pnext,
- };
-
#ifdef _WIN32
if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
#else
- if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
- try_export_flags(hwfc, &eiinfo.handleTypes, &e,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
- if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
- try_export_flags(hwfc, &eiinfo.handleTypes, &e,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
#endif
- for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
- eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
- eminfo[i].pNext = hwctx->alloc_pnext[i];
- eminfo[i].handleTypes = e;
- }
-
- err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
- eiinfo.handleTypes ? &eiinfo : NULL);
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
+ hwctx->nb_layers, hwctx->create_pnext);
if (err)
return NULL;
- err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
+ err = alloc_bind_mem(hwfc, f, NULL, 0);
if (err)
goto fail;
@@ -2389,103 +2429,44 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
VulkanFramesPriv *fp = hwfc->internal->priv;
AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
- const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS);
-
- /* Default tiling flags */
- hwctx->tiling = hwctx->tiling ? hwctx->tiling :
- has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
- p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
- VK_IMAGE_TILING_OPTIMAL;
-
- if (!hwctx->usage)
- hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
-
- modifier_info = vk_find_struct(hwctx->create_pnext,
- VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
-
- /* Get the supported modifiers if the user has not given any. */
- if (has_modifiers && !modifier_info) {
- const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
- VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
- FFVulkanFunctions *vk = &p->vkfn;
- VkDrmFormatModifierPropertiesEXT *mod_props;
- uint64_t *modifiers;
- int modifier_count = 0;
-
- VkDrmFormatModifierPropertiesListEXT mod_props_list = {
- .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
- .pNext = NULL,
- .drmFormatModifierCount = 0,
- .pDrmFormatModifierProperties = NULL,
- };
- VkFormatProperties2 prop = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
- .pNext = &mod_props_list,
- };
-
- /* Get all supported modifiers */
- vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
+ VkImageUsageFlagBits supported_usage;
- if (!mod_props_list.drmFormatModifierCount) {
- av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n");
- return AVERROR(EINVAL);
- }
-
- /* Createa structure to hold the modifier list info */
- modifier_info = av_mallocz(sizeof(*modifier_info));
- if (!modifier_info)
- return AVERROR(ENOMEM);
-
- modifier_info->pNext = NULL;
- modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
+ /* Defaults */
+ if (!hwctx->nb_layers)
+ hwctx->nb_layers = 1;
- /* Add structure to the image creation pNext chain */
- if (!hwctx->create_pnext)
- hwctx->create_pnext = modifier_info;
- else
- vk_link_struct(hwctx->create_pnext, (void *)modifier_info);
+ /* VK_IMAGE_TILING_OPTIMAL == 0, so no need to check */
- /* Backup the allocated struct to be freed later */
- fp->modifier_info = modifier_info;
-
- /* Allocate list of modifiers */
- modifiers = av_mallocz(mod_props_list.drmFormatModifierCount *
- sizeof(*modifiers));
- if (!modifiers)
- return AVERROR(ENOMEM);
-
- modifier_info->pDrmFormatModifiers = modifiers;
+ if (!hwctx->usage)
+ hwctx->usage = VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT;
- /* Allocate a temporary list to hold all modifiers supported */
- mod_props = av_mallocz(mod_props_list.drmFormatModifierCount *
- sizeof(*mod_props));
- if (!mod_props)
- return AVERROR(ENOMEM);
+ err = av_vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, 0, /* drivers must fix feats. */
+ &fp->fmts, &fp->nb_images, &fp->aspect, &supported_usage);
+ if (err < 0)
+ return err;
- mod_props_list.pDrmFormatModifierProperties = mod_props;
+ fp->fmt = vk_find_format_entry(hwfc->sw_format);
- /* Finally get all modifiers from the device */
- vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
+ /* Remove comments once drivers properly signal features for formats */
+ if (fp->fmt->vk_planes > 1) // || supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)
+ hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
- /* Reject any modifiers that don't match our requirements */
- for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) {
- if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage))
- continue;
+// fp->fmt = vk_find_format_entry(hwfc->sw_format);
- modifiers[modifier_count++] = mod_props[i].drmFormatModifier;
- }
+ if (!hwctx->img_flags) {
+ hwctx->img_flags = VK_IMAGE_CREATE_ALIAS_BIT;
+ if ((fp->fmt->vk_planes > 1 && fp->nb_images == 1) ||
+ (fp->fmt->vkf != fp->fmt->fallback[0]))
+ hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ }
- if (!modifier_count) {
- av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports"
- " the usage flags!\n");
- av_freep(&mod_props);
- return AVERROR(EINVAL);
- }
+ if (!hwctx->lock_frame)
+ hwctx->lock_frame = lock_frame;
- modifier_info->drmFormatModifierCount = modifier_count;
- av_freep(&mod_props);
- }
+ if (!hwctx->unlock_frame)
+ hwctx->unlock_frame = unlock_frame;
err = create_exec_ctx(hwfc, &fp->conv_ctx,
dev_hwctx->queue_family_comp_index,
@@ -2505,8 +2486,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return err;
/* Test to see if allocation will fail */
- err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
- hwctx->create_pnext);
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
+ hwctx->nb_layers, hwctx->create_pnext);
if (err)
return err;
@@ -2522,11 +2503,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return AVERROR(ENOMEM);
}
- if (!hwctx->lock_frame)
- hwctx->lock_frame = lock_frame;
- if (!hwctx->unlock_frame)
- hwctx->unlock_frame = unlock_frame;
-
return 0;
}
@@ -2602,7 +2578,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
const AVFrame *src, int flags)
{
VkResult ret;
- int err, mapped_mem_count = 0, mem_planes = 0;
+ int err, nb_mem = 0, mapped_mem_count = 0, mem_planes = 0;
AVVkFrame *f = (AVVkFrame *)src->data[0];
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
AVVulkanFramesContext *hwfctx = hwfc->hwctx;
@@ -2622,7 +2598,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
}
if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
- !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
+ !(hwfctx->tiling == VK_IMAGE_TILING_LINEAR)) {
av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
"and linear!\n");
err = AVERROR(EINVAL);
@@ -2632,35 +2608,35 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
dst->width = src->width;
dst->height = src->height;
- mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes;
- for (int i = 0; i < mem_planes; i++) {
+ for (int i = 0; i < AV_NUM_DATA_POINTERS; i++)
+ nb_mem += !!f->mem[i];
+
+ for (int i = 0; i < nb_mem; i++) {
ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0,
VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
- vk_ret2str(ret));
+ av_log(hwfc, AV_LOG_ERROR, "Failed to map %ith frame memory: %s\n",
+ i, vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
mapped_mem_count++;
}
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- for (int i = 0; i < planes; i++)
- dst->data[i] = dst->data[0] + f->offset[i];
- }
+ for (int i = 0; i < planes; i++)
+ dst->data[i] = dst->data[i] + f->offset[i];
/* Check if the memory contents matter */
if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
!(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_mem; i++) {
map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
map_mem_ranges[i].size = VK_WHOLE_SIZE;
map_mem_ranges[i].memory = f->mem[i];
}
- ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes,
+ ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, nb_mem,
map_mem_ranges);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
@@ -2702,25 +2678,25 @@ static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwma
{
AVVkFrame *f = hwmap->priv;
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ const int nb_images = ff_vk_count_images(f);
VkSemaphoreWaitInfo wait_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
.flags = 0x0,
.pSemaphores = f->sem,
.pValues = f->sem_value,
- .semaphoreCount = planes,
+ .semaphoreCount = nb_images,
};
vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
vulkan_free_internal(f);
- for (int i = 0; i < planes; i++) {
- vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
- vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+ for (int i = 0; i < nb_images; i++) {
+ vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
+ vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
}
@@ -2790,7 +2766,9 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
goto fail;
}
+FF_DISABLE_DEPRECATION_WARNINGS
f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
+FF_ENABLE_DEPRECATION_WARNINGS
for (int i = 0; i < desc->nb_layers; i++) {
const int planes = desc->layers[i].nb_planes;
@@ -2828,7 +2806,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
.mipLevels = 1,
.arrayLayers = 1,
.flags = 0x0, /* ALIAS flag is implicit for imported images */
- .tiling = f->tiling,
+ .tiling = hwfctx->tiling,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
@@ -3498,7 +3476,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
- if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
+ if (hwfctx ->tiling == VK_IMAGE_TILING_OPTIMAL)
continue;
vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
@@ -3818,7 +3796,10 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
int bar_num = 0;
VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
- const int planes = av_pix_fmt_count_planes(pix_fmt);
+ const int img_planes = fp->fmt->vk_planes;
+ const int nb_images = ff_vk_count_images(frame);
+ int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt);
+
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -3831,8 +3812,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pWaitSemaphoreValues = frame->sem_value,
.pSignalSemaphoreValues = sem_signal_values,
- .waitSemaphoreValueCount = planes,
- .signalSemaphoreValueCount = planes,
+ .waitSemaphoreValueCount = nb_images,
+ .signalSemaphoreValueCount = nb_images,
};
VkSubmitInfo s_info = {
@@ -3841,8 +3822,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
.pSignalSemaphores = frame->sem,
.pWaitSemaphores = frame->sem,
.pWaitDstStageMask = sem_wait_dst,
- .signalSemaphoreCount = planes,
- .waitSemaphoreCount = planes,
+ .signalSemaphoreCount = nb_images,
+ .waitSemaphoreCount = nb_images,
};
vkfc->lock_frame(hwfc, frame);
@@ -3850,11 +3831,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
if ((err = wait_start_exec_ctx(hwfc, ectx)))
goto end;
- for (int i = 0; i < planes; i++)
+ for (int i = 0; i < nb_images; i++)
sem_signal_values[i] = frame->sem_value[i] + 1;
/* Change the image layout to something more optimal for transfers */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
@@ -3890,13 +3871,19 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
0, NULL, 0, NULL, bar_num, img_bar);
/* Schedule a copy for each plane */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < pixfmt_planes; i++) {
+ int idx = FFMIN(i, nb_images - 1);
+ VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
VkBufferImageCopy buf_reg = {
.bufferOffset = buf_offsets[i],
.bufferRowLength = buf_stride[i] / desc->comp[i].step,
.imageSubresource.layerCount = 1,
- .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .imageSubresource.aspectMask = plane_aspect[(img_planes != 1) + i*(img_planes != 1)],
.imageOffset = { 0, 0, 0, },
};
@@ -3907,11 +3894,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
if (to_buf)
- vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
+ vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], frame->layout[idx],
vkbuf->buf, 1, &buf_reg);
else
- vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
- frame->layout[i], 1, &buf_reg);
+ vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx],
+ frame->layout[idx], 1, &buf_reg);
}
/* When uploading, do this asynchronously if the source is refcounted by
@@ -3928,7 +3915,7 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
goto end;
}
- if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
+ if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, pixfmt_planes)))
goto end;
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
} else {
@@ -3948,6 +3935,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
AVVkFrame *f = (AVVkFrame *)vkf->data[0];
AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
+ AVVulkanFramesContext *fc = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
@@ -3970,7 +3958,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
return AVERROR(EINVAL);
/* For linear, host visiable images */
- if (f->tiling == VK_IMAGE_TILING_LINEAR &&
+ if (fc->tiling == VK_IMAGE_TILING_LINEAR &&
f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
AVFrame *map = av_frame_alloc();
if (!map)
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index e89fa52927..13a40fa563 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -169,26 +169,31 @@ typedef enum AVVkFrameFlags {
*/
typedef struct AVVulkanFramesContext {
/**
- * Controls the tiling of allocated frames. If left as optimal tiling,
- * then during av_hwframe_ctx_init() will decide based on whether the device
- * supports DRM modifiers, or if the linear_images flag is set, otherwise
- * will allocate optimally-tiled images.
+ * Controls the tiling of allocated frames.
+ * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling.
+ * Can be set to VK_IMAGE_TILING_LINEAR to force linear images,
+ * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed
+ * images.
*/
VkImageTiling tiling;
/**
- * Defines extra usage of output frames. If left as 0, the following bits
- * are set: TRANSFER_SRC, TRANSFER_DST. SAMPLED and STORAGE.
+ * Defines extra usage of output frames. If non-zero, all flags MUST be
+ * supported by the VkFormat. Otherwise, will use supported flags amongst:
+ * - VK_IMAGE_USAGE_SAMPLED_BIT
+ * - VK_IMAGE_USAGE_STORAGE_BIT
+ * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT
+ * - VK_IMAGE_USAGE_TRANSFER_DST_BIT
*/
VkImageUsageFlagBits usage;
/**
* Extension data for image creation.
- * If VkImageDrmFormatModifierListCreateInfoEXT is present in the chain,
- * and the device supports DRM modifiers, then images will be allocated
- * with the specific requested DRM modifiers.
+ * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure
+ * can be added to specify the exact modifier to use.
+ *
* Additional structures may be added at av_hwframe_ctx_init() time,
- * which will be freed automatically on uninit(), so users need only free
+ * which will be freed automatically on uninit(), so users must only free
* any structures they've allocated themselves.
*/
void *create_pnext;
@@ -209,6 +214,25 @@ typedef struct AVVulkanFramesContext {
*/
AVVkFrameFlags flags;
+ /**
+ * Flags to set during image creation. If unset, defaults to
+ * VK_IMAGE_CREATE_ALIAS_BIT.
+ */
+ VkImageCreateFlags img_flags;
+
+ /**
+ * Vulkan format for each image. MUST be compatible with the pixel format.
+ * If unset, will be automatically set.
+ * There are at most two compatible formats for a frame - a multiplane
+ * format, and a single-plane multi-image format.
+ */
+ VkFormat format[AV_NUM_DATA_POINTERS];
+
+ /**
+ * Number of layers each image will have.
+ */
+ int nb_layers;
+
/**
* Locks a frame, preventing other threads from changing frame properties.
* If set to NULL, will be set to lavu-internal functions that utilize a
@@ -228,14 +252,7 @@ typedef struct AVVulkanFramesContext {
} AVVulkanFramesContext;
/*
- * Frame structure, the VkFormat of the image will always match
- * the pool's sw_format.
- * All frames, imported or allocated, will be created with the
- * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed.
- *
- * If all queue family indices in the device context are the same,
- * images will be created with the EXCLUSIVE sharing mode. Otherwise, all images
- * will be created using the CONCURRENT sharing mode.
+ * Frame structure.
*
* @note the size of this structure is not part of the ABI, to allocate
* you must use @av_vk_frame_alloc().
@@ -248,8 +265,9 @@ struct AVVkFrame {
/**
* The same tiling must be used for all images in the frame.
+ * DEPRECATED: use AVVulkanFramesContext.tiling instead.
*/
- VkImageTiling tiling;
+ attribute_deprecated VkImageTiling tiling;
/**
* Memory backing the images. Could be less than the amount of planes,
@@ -265,13 +283,13 @@ struct AVVkFrame {
VkMemoryPropertyFlagBits flags;
/**
- * Updated after every barrier
+ * Updated after every barrier. One per VkImage.
*/
VkAccessFlagBits access[AV_NUM_DATA_POINTERS];
VkImageLayout layout[AV_NUM_DATA_POINTERS];
/**
- * Synchronization timeline semaphores, one for each sw_format plane.
+ * Synchronization timeline semaphores, one for each VkImage.
* Must not be freed manually. Must be waited on at every submission using
* the value in sem_value, and must be signalled at every submission,
* using an incremented value.
@@ -280,6 +298,7 @@ struct AVVkFrame {
/**
* Up to date semaphore value at which each image becomes accessible.
+ * One per VkImage.
* Clients must wait on this value when submitting a command queue,
* and increment it when signalling.
*/
@@ -291,16 +310,18 @@ struct AVVkFrame {
struct AVVkFrameInternal *internal;
/**
- * Describes the binding offset of each plane to the VkDeviceMemory.
+ * Describes the binding offset of each image to the VkDeviceMemory.
+ * One per VkImage.
*/
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
/**
* Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
* the image was allocated with the CONCURRENT concurrency option.
+ * One per VkImage.
*/
uint32_t queue_family[AV_NUM_DATA_POINTERS];
-} AVVkFrame;
+};
/**
* Allocates a single AVVkFrame and initializes everything as 0.
@@ -309,7 +330,7 @@ struct AVVkFrame {
AVVkFrame *av_vk_frame_alloc(void);
/**
- * Returns the format of each image up to the number of planes for a given sw_format.
+ * Returns the optimal format for a given sw_format, one for each plane.
* Returns NULL on unsupported formats.
*/
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p);
--
2.39.2
[-- Attachment #53: 0052-hwcontext_vulkan-don-t-change-properties-if-prepare_.patch --]
[-- Type: text/x-diff, Size: 2638 bytes --]
From a9ac0aa322a3369ccb5167ae1a8a984faf2e24d1 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:14:08 +0100
Subject: [PATCH 52/72] hwcontext_vulkan: don't change properties if
prepare_frame fails
---
libavutil/hwcontext_vulkan.c | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 027ecc76b1..75004037da 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2113,16 +2113,13 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break;
}
- /* Change the image layout to something more optimal for writes.
- * This also signals the newly created semaphore, making it usable
- * for synchronization */
for (int i = 0; i < nb_images; i++) {
img_bar[i] = (VkImageMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.pNext = NULL,
.srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
- .srcAccessMask = 0x0,
.dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+ .srcAccessMask = frame->access[i],
.dstAccessMask = new_access,
.oldLayout = frame->layout[i],
.newLayout = new_layout,
@@ -2135,21 +2132,23 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.levelCount = 1,
},
};
-
- frame->layout[i] = img_bar[i].newLayout;
- frame->access[i] = img_bar[i].dstAccessMask;
}
- dep_info = (VkDependencyInfo) {
- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
- .pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = nb_images,
- };
-
- vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
+ vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_images,
+ });
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ if (err >= 0) {
+ for (int i = 0; i < nb_images; i++) {
+ frame->layout[i] = img_bar[i].newLayout;
+ frame->access[i] = img_bar[i].dstAccessMask;
+ frame->queue_family[i] = img_bar[i].dstQueueFamilyIndex;
+ }
+ }
vkfc->unlock_frame(hwfc, frame);
return err;
--
2.39.2
[-- Attachment #54: 0053-hwcontext_vulkan-disable-host-mapping-frames-for-tra.patch --]
[-- Type: text/x-diff, Size: 1033 bytes --]
From 51c352d34c0ab2ae5eea1df1753d2a8d615c33d8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:14:24 +0100
Subject: [PATCH 53/72] hwcontext_vulkan: disable host-mapping frames for
transfers
Currently broken for multiplane surfaces.
---
libavutil/hwcontext_vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 75004037da..647a072bdd 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -3946,7 +3946,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
const int planes = av_pix_fmt_count_planes(swf->format);
int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
- const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY);
+ const int map_host = 0;
if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
--
2.39.2
[-- Attachment #55: 0054-hwcontext_vulkan-disable-all-mapping-code.patch --]
[-- Type: text/x-diff, Size: 5612 bytes --]
From a871a7d4ffe3f94488cd5091794e683c720bc5df Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:30:00 +0100
Subject: [PATCH 54/72] hwcontext_vulkan: disable all mapping code
Multiplane formats are currently not easy to map.
---
libavutil/hwcontext_vulkan.c | 33 +++++++++++++++++----------------
1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 647a072bdd..761a63ddd7 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -44,7 +44,7 @@
#include "vulkan.h"
#include "vulkan_loader.h"
-#if CONFIG_LIBDRM
+#if 0
#include <xf86drm.h>
#include <drm_fourcc.h>
#include "hwcontext_drm.h"
@@ -54,7 +54,7 @@
#endif
#endif
-#if CONFIG_CUDA
+#if 0
#include "hwcontext_cuda_internal.h"
#include "cuda_check.h"
#define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
@@ -139,7 +139,7 @@ typedef struct VulkanFramesPriv {
typedef struct AVVkFrameInternal {
pthread_mutex_t update_mutex;
-#if CONFIG_CUDA
+#if 0
/* Importing external memory into cuda is really expensive so we keep the
* memory imported all the time */
AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
@@ -1718,7 +1718,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
* by the following checks (e.g. non-PCIe ARM GPU), having an empty
* dev_select will mean it'll get picked. */
switch(src_ctx->type) {
-#if CONFIG_LIBDRM
+#if 0
#if CONFIG_VAAPI
case AV_HWDEVICE_TYPE_VAAPI: {
AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
@@ -1753,7 +1753,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
}
#endif
-#if CONFIG_CUDA
+#if 0
case AV_HWDEVICE_TYPE_CUDA: {
AVHWDeviceContext *cuda_cu = src_ctx;
AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
@@ -1789,7 +1789,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
0, NULL, NULL, NULL, NULL) >= 0;
}
-#if CONFIG_CUDA
+#if 0
if (p->dev_is_nvidia)
count++;
#endif
@@ -1807,7 +1807,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
}
}
-#if CONFIG_CUDA
+#if 0
if (p->dev_is_nvidia)
constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
#endif
@@ -1890,7 +1890,7 @@ static void vulkan_free_internal(AVVkFrame *f)
{
av_unused AVVkFrameInternal *internal = f->internal;
-#if CONFIG_CUDA
+#if 0
if (internal->cuda_fc_ref) {
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
@@ -2672,7 +2672,7 @@ fail:
return err;
}
-#if CONFIG_LIBDRM
+#if 0
static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
{
AVVkFrame *f = hwmap->priv;
@@ -2746,6 +2746,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ AVVulkanFramesContext *hwfctx = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
@@ -3076,7 +3077,7 @@ fail:
#endif
#endif
-#if CONFIG_CUDA
+#if 0
static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
AVBufferRef *cuda_hwfc,
const AVFrame *frame)
@@ -3346,7 +3347,7 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
switch (src->format) {
-#if CONFIG_LIBDRM
+#if 0
#if CONFIG_VAAPI
case AV_PIX_FMT_VAAPI:
if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
@@ -3365,7 +3366,7 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
}
}
-#if CONFIG_LIBDRM
+#if 0
typedef struct VulkanDRMMapping {
AVDRMFrameDescriptor drm_desc;
AVVkFrame *source;
@@ -3533,7 +3534,7 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
switch (dst->format) {
-#if CONFIG_LIBDRM
+#if 0
case AV_PIX_FMT_DRM_PRIME:
if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
return vulkan_map_to_drm(hwfc, dst, src, flags);
@@ -4091,7 +4092,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
switch (src->format) {
-#if CONFIG_CUDA
+#if 0
case AV_PIX_FMT_CUDA:
#ifdef _WIN32
if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
@@ -4110,7 +4111,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
}
}
-#if CONFIG_CUDA
+#if 0
static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
const AVFrame *src)
{
@@ -4209,7 +4210,7 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
switch (dst->format) {
-#if CONFIG_CUDA
+#if 0
case AV_PIX_FMT_CUDA:
#ifdef _WIN32
if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
--
2.39.2
[-- Attachment #56: 0055-lavfi-add-lavfi-only-Vulkan-infrastructure.patch --]
[-- Type: text/x-diff, Size: 21753 bytes --]
From 6bd109733484568c98c2d08935d9c7f05ad7803c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:10:58 +0100
Subject: [PATCH 55/72] lavfi: add lavfi-only Vulkan infrastructure
---
libavfilter/Makefile | 6 +
libavfilter/vulkan_filter.c | 241 +++++++++++++++++++-
libavfilter/vulkan_filter.h | 25 ++
{libavutil => libavfilter}/vulkan_glslang.c | 19 +-
{libavutil => libavfilter}/vulkan_shaderc.c | 8 +-
libavfilter/vulkan_spirv.h | 45 ++++
6 files changed, 330 insertions(+), 14 deletions(-)
rename {libavutil => libavfilter}/vulkan_glslang.c (95%)
rename {libavutil => libavfilter}/vulkan_shaderc.c (96%)
create mode 100644 libavfilter/vulkan_spirv.h
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 0173b11870..f02e787d61 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -615,6 +615,10 @@ OBJS-$(CONFIG_AVSYNCTEST_FILTER) += src_avsynctest.o
OBJS-$(CONFIG_AMOVIE_FILTER) += src_movie.o
OBJS-$(CONFIG_MOVIE_FILTER) += src_movie.o
+# vulkan libs
+OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o
+OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o
+
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o
@@ -628,6 +632,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h
SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h
SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_filter.h
+SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h
+SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h
TOOLS = graph2dot
TESTPROGS = drawutils filtfmts formats integral
diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c
index e22541bd23..ad88931c4b 100644
--- a/libavfilter/vulkan_filter.c
+++ b/libavfilter/vulkan_filter.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -54,7 +56,6 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
int err;
AVFilterContext *avctx = inlink->dst;
FFVulkanContext *s = avctx->priv;
- FFVulkanFunctions *vk = &s->vkfn;
AVHWFramesContext *input_frames;
if (!inlink->hw_frames_ctx) {
@@ -85,8 +86,7 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
if (err < 0)
return err;
- vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props);
- vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+ ff_vk_load_props(s);
/* Default output parameters match input parameters. */
s->input_format = input_frames->sw_format;
@@ -189,3 +189,238 @@ int ff_vk_filter_init(AVFilterContext *avctx)
return 0;
}
+
+int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
+ VkSampler sampler, void *push_src, size_t push_size)
+{
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in_views[AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+ RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f));
+
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ NULL);
+
+ ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[1]);
+
+ return ff_vk_exec_submit(vkctx, exec);
+fail:
+ ff_vk_exec_discard_deps(vkctx, exec);
+ return err;
+}
+
+int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pls[2],
+ AVFrame *out, AVFrame *tmp, AVFrame *in,
+ VkSampler sampler, void *push_src, size_t push_size)
+{
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in_views[AV_NUM_DATA_POINTERS];
+ VkImageView tmp_views[AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+ RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
+ RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
+
+ ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ for (int i = 0; i < 2; i++) {
+ FFVulkanPipeline *pl = pls[i];
+ AVFrame *src_f = !i ? in : tmp;
+ AVFrame *dst_f = !i ? tmp : out;
+ VkImageView *src_views = !i ? in_views : tmp_views;
+ VkImageView *dst_views = !i ? tmp_views : out_views;
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0,
+ !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
+ VK_IMAGE_LAYOUT_GENERAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ NULL);
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[1]);
+ }
+
+ return ff_vk_exec_submit(vkctx, exec);
+fail:
+ ff_vk_exec_discard_deps(vkctx, exec);
+ return err;
+}
+
+int ff_vk_filter_process_2in(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl,
+ AVFrame *out, AVFrame *in1, AVFrame *in2,
+ VkSampler sampler, void *push_src, size_t push_size)
+{
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in1_views[AV_NUM_DATA_POINTERS];
+ VkImageView in2_views[AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in1,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in2,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+ RET(ff_vk_create_imageviews(vkctx, exec, in1_views, in1));
+ RET(ff_vk_create_imageviews(vkctx, exec, in2_views, in2));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
+
+ ff_vk_frame_barrier(vkctx, exec, in1, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, in2, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, in1, in1_views, 0, 0,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, in2, in2_views, 0, 1,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, 2,
+ VK_IMAGE_LAYOUT_GENERAL,
+ NULL);
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[1]);
+
+ return ff_vk_exec_submit(vkctx, exec);
+fail:
+ ff_vk_exec_discard_deps(vkctx, exec);
+ return err;
+}
diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h
index bfdb9b2d7d..2a2a0e6e97 100644
--- a/libavfilter/vulkan_filter.h
+++ b/libavfilter/vulkan_filter.h
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -31,4 +33,27 @@ int ff_vk_filter_config_input (AVFilterLink *inlink);
int ff_vk_filter_config_output (AVFilterLink *outlink);
int ff_vk_filter_config_output_inplace(AVFilterLink *outlink);
+/**
+ * Submit a compute shader with a single in and single out for execution.
+ */
+int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
+ VkSampler sampler, void *push_src, size_t push_size);
+
+/**
+ * Submit a compute shader with a single in and single out with 2 stages.
+ */
+int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pls[2],
+ AVFrame *out, AVFrame *tmp, AVFrame *in,
+ VkSampler sampler, void *push_src, size_t push_size);
+
+/**
+ * Two inputs, one output
+ */
+int ff_vk_filter_process_2in(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl,
+ AVFrame *out, AVFrame *in1, AVFrame *in2,
+ VkSampler sampler, void *push_src, size_t push_size);
+
#endif /* AVFILTER_VULKAN_FILTER_H */
diff --git a/libavutil/vulkan_glslang.c b/libavfilter/vulkan_glslang.c
similarity index 95%
rename from libavutil/vulkan_glslang.c
rename to libavfilter/vulkan_glslang.c
index e7785f6d40..845a530ee0 100644
--- a/libavutil/vulkan_glslang.c
+++ b/libavfilter/vulkan_glslang.c
@@ -21,8 +21,9 @@
#include <glslang/build_info.h>
#include <glslang/Include/glslang_c_interface.h>
-#include "mem.h"
-#include "avassert.h"
+#include "vulkan_spirv.h"
+#include "libavutil/mem.h"
+#include "libavutil/avassert.h"
static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int glslc_refcount = 0;
@@ -176,11 +177,13 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
av_assert0(glslc_refcount);
+ *opaque = NULL;
+
if (!(glslc_shader = glslang_shader_create(&glslc_input)))
return AVERROR(ENOMEM);
if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n",
glslang_shader_get_info_log(glslc_shader),
glslang_shader_get_info_debug_log(glslc_shader));
@@ -189,7 +192,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
}
if (!glslang_shader_parse(glslc_shader, &glslc_input)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n",
glslang_shader_get_info_log(glslc_shader),
glslang_shader_get_info_debug_log(glslc_shader));
@@ -206,7 +209,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT |
GLSLANG_MSG_VULKAN_RULES_BIT)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n",
glslang_program_get_info_log(glslc_program),
glslang_program_get_info_debug_log(glslc_program));
@@ -219,10 +222,10 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
messages = glslang_program_SPIRV_get_messages(glslc_program);
if (messages) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_WARNING, "%s\n", messages);
} else {
- ff_vk_print_shader(avctx, shd, AV_LOG_VERBOSE);
+ ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE);
}
glslang_shader_delete(glslc_shader);
@@ -257,7 +260,7 @@ static void glslc_uninit(FFVkSPIRVCompiler **ctx)
av_freep(ctx);
}
-static FFVkSPIRVCompiler *ff_vk_glslang_init(void)
+FFVkSPIRVCompiler *ff_vk_glslang_init(void)
{
FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret));
if (!ret)
diff --git a/libavutil/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c
similarity index 96%
rename from libavutil/vulkan_shaderc.c
rename to libavfilter/vulkan_shaderc.c
index bd40edf187..38be1030ad 100644
--- a/libavutil/vulkan_shaderc.c
+++ b/libavfilter/vulkan_shaderc.c
@@ -18,7 +18,8 @@
#include <shaderc/shaderc.h>
-#include "mem.h"
+#include "libavutil/mem.h"
+#include "vulkan_spirv.h"
static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
FFVkSPIRVShader *shd, uint8_t **data,
@@ -43,6 +44,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
};
shaderc_compile_options_t opts = shaderc_compile_options_initialize();
+ *opaque = NULL;
if (!opts)
return AVERROR(ENOMEM);
@@ -65,7 +67,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE;
- ff_vk_print_shader(avctx, shd, loglevel);
+ ff_vk_shader_print(avctx, shd, loglevel);
if (message && (err || warn))
av_log(avctx, loglevel, "%s\n", message);
status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown";
@@ -104,7 +106,7 @@ static void shdc_uninit(FFVkSPIRVCompiler **ctx)
av_freep(ctx);
}
-static FFVkSPIRVCompiler *ff_vk_shaderc_init(void)
+FFVkSPIRVCompiler *ff_vk_shaderc_init(void)
{
FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret));
if (!ret)
diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h
new file mode 100644
index 0000000000..5638cd9696
--- /dev/null
+++ b/libavfilter/vulkan_spirv.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VULKAN_SPIRV_H
+#define AVFILTER_VULKAN_SPIRV_H
+
+#include "libavutil/vulkan.h"
+
+#include "vulkan.h"
+#include "config.h"
+
+typedef struct FFVkSPIRVCompiler {
+ void *priv;
+ int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx,
+ struct FFVkSPIRVShader *shd, uint8_t **data,
+ size_t *size, const char *entrypoint, void **opaque);
+ void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque);
+ void (*uninit)(struct FFVkSPIRVCompiler **ctx);
+} FFVkSPIRVCompiler;
+
+#if CONFIG_LIBGLSLANG
+FFVkSPIRVCompiler *ff_vk_glslang_init(void);
+#define ff_vk_spirv_init ff_vk_glslang_init
+#endif
+#if CONFIG_LIBSHADERC
+FFVkSPIRVCompiler *ff_vk_shaderc_init(void);
+#define ff_vk_spirv_init ff_vk_shaderc_init
+#endif
+
+#endif /* AVFILTER_VULKAN_H */
--
2.39.2
[-- Attachment #57: 0056-avgblur_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18269 bytes --]
From b14473b21aa057181ec85e0ea3bac3e5fa053875 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:19 +0100
Subject: [PATCH 56/72] avgblur_vulkan: port for the rewrite
---
libavfilter/vf_avgblur_vulkan.c | 339 ++++++++++----------------------
1 file changed, 108 insertions(+), 231 deletions(-)
diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
index d118ce802c..17b2167951 100644
--- a/libavfilter/vf_avgblur_vulkan.c
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,23 +21,20 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGS 32
-
typedef struct AvgBlurVulkanContext {
FFVulkanContext vkctx;
int initialized;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl_hor;
- FFVulkanPipeline *pl_ver;
-
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo tmp_images[3];
- VkDescriptorImageInfo output_images[3];
+ VkSampler sampler;
+ FFVulkanPipeline pl_hor;
+ FFVkSPIRVShader shd_hor;
+ FFVulkanPipeline pl_ver;
+ FFVkSPIRVShader shd_ver;
int size_x;
int size_y;
@@ -71,18 +70,41 @@ static const char blur_kernel[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
AvgBlurVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
- FFVulkanDescriptorSetBinding desc_i[2] = {
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "avgblur_hor_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT));
+ RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "avgblur_ver_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT));
+ shd = &s->shd_hor;
+
+ ff_vk_shader_set_compute_sizes(shd, 32, 1, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_img",
@@ -95,238 +117,79 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0));
- desc_i[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!desc_i[0].sampler)
- return AVERROR_EXTERNAL;
-
- { /* Create shader for the horizontal pass */
- desc_i[0].updater = s->input_images;
- desc_i[1].updater = s->tmp_images;
-
- s->pl_hor = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl_hor)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl_hor, "avgblur_compute_hor",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl_hor, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0));
-
- GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1);
- GLSLC(0, #define INC(x) (ivec2(x, 0)) );
- GLSLC(0, #define DIR(var) (var.x) );
- GLSLD( blur_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->planes & (1 << i)) {
- GLSLF(2, distort(pos, %i); ,i);
- } else {
- GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- }
- GLSLC(1, } );
+ GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1);
+ GLSLC(0, #define INC(x) (ivec2(x, 0)) );
+ GLSLC(0, #define DIR(var) (var.x) );
+ GLSLD( blur_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ if (s->planes & (1 << i)) {
+ GLSLF(2, distort(pos, %i); ,i);
+ } else {
+ GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
}
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl_hor));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl_hor));
+ GLSLC(1, } );
}
-
- { /* Create shader for the vertical pass */
- desc_i[0].updater = s->tmp_images;
- desc_i[1].updater = s->output_images;
-
- s->pl_ver = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl_ver)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl_ver, "avgblur_compute_ver",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl_ver, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0));
-
- GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1);
- GLSLC(0, #define INC(x) (ivec2(0, x)) );
- GLSLC(0, #define DIR(var) (var.y) );
- GLSLD( blur_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->planes & (1 << i)) {
- GLSLF(2, distort(pos, %i); ,i);
- } else {
- GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- }
- GLSLC(1, } );
+ GLSLC(0, } );
+
+ shd = &s->shd_ver;
+ ff_vk_shader_set_compute_sizes(shd, 1, 32, 1);
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0));
+
+ GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1);
+ GLSLC(0, #define INC(x) (ivec2(0, x)) );
+ GLSLC(0, #define DIR(var) (var.y) );
+ GLSLD( blur_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ if (s->planes & (1 << i)) {
+ GLSLF(2, distort(pos, %i); ,i);
+ } else {
+ GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
}
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl_ver));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl_ver));
+ GLSLC(1, } );
}
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, &s->shd_hor, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, &s->shd_hor, spv_data, spv_len, "main"));
+ RET(spv->compile_shader(spv, ctx, &s->shd_ver, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, &s->shd_ver, spv_data, spv_len, "main"));
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_hor, &s->shd_hor));
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_ver, &s->shd_ver));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_hor));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_ver));
s->initialized = 1;
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f)
-{
- int err;
- VkCommandBuffer cmd_buf;
- AvgBlurVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->tmp_images[i].imageView, tmp->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl_hor, 0);
- ff_vk_update_descriptor_set(vkctx, s->pl_ver, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = tmp->layout[i],
- .newLayout = s->tmp_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = tmp->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- in->layout[i] = bar[0].newLayout;
- in->access[i] = bar[0].dstAccessMask;
-
- tmp->layout[i] = bar[1].newLayout;
- tmp->access[i] = bar[1].dstAccessMask;
-
- out->layout[i] = bar[2].newLayout;
- out->access[i] = bar[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_hor);
-
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_ver);
-
- vk->CmdDispatch(cmd_buf, s->vkctx.output_width,
- FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx,s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
}
static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
@@ -352,7 +215,9 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, tmp, in));
+ RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e,
+ (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver },
+ out, tmp, in, s->sampler, NULL, 0));
err = av_frame_copy_props(out, in);
if (err < 0)
@@ -373,6 +238,18 @@ fail:
static void avgblur_vulkan_uninit(AVFilterContext *avctx)
{
AvgBlurVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl_hor);
+ ff_vk_pipeline_free(vkctx, &s->pl_ver);
+ ff_vk_shader_free(vkctx, &s->shd_hor);
+ ff_vk_shader_free(vkctx, &s->shd_ver);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
--
2.39.2
[-- Attachment #58: 0057-blend_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 16613 bytes --]
From 83edf3b91ffaed33b2103a6ba743487850f5325c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:43 +0100
Subject: [PATCH 57/72] blend_vulkan: port for the rewrite
---
libavfilter/vf_blend_vulkan.c | 315 +++++++++++-----------------------
1 file changed, 102 insertions(+), 213 deletions(-)
diff --git a/libavfilter/vf_blend_vulkan.c b/libavfilter/vf_blend_vulkan.c
index fcc21cbc8d..7ffdc9f3bd 100644
--- a/libavfilter/vf_blend_vulkan.c
+++ b/libavfilter/vf_blend_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* The blend modes are based on the blend.c.
*
* This file is part of FFmpeg.
@@ -22,12 +24,11 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "framesync.h"
#include "blend.h"
-#define CGS 32
-
#define IN_TOP 0
#define IN_BOTTOM 1
@@ -40,20 +41,18 @@ typedef struct FilterParamsVulkan {
typedef struct BlendVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
FFFrameSync fs;
- VkDescriptorImageInfo top_images[3];
- VkDescriptorImageInfo bottom_images[3];
- VkDescriptorImageInfo output_images[3];
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
FilterParamsVulkan params[4];
double all_opacity;
enum BlendMode all_mode;
-
- int initialized;
} BlendVulkanContext;
#define DEFINE_BLEND_MODE(MODE, EXPR) \
@@ -125,223 +124,102 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
static av_cold int init_filter(AVFilterContext *avctx)
{
int err = 0;
- FFVkSampler *sampler;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
BlendVulkanContext *s = avctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!sampler)
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- {
- FFVulkanDescriptorSetBinding image_descs[] = {
- {
- .name = "top_images",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->top_images,
- .sampler = sampler,
- },
- {
- .name = "bottom_images",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->bottom_images,
- .sampler = sampler,
- },
- {
- .name = "output_images",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- shd = ff_vk_init_shader(s->pl, "blend_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- for (int i = 0, j = 0; i < planes; i++) {
- for (j = 0; j < i; j++)
- if (s->params[i].blend_func == s->params[j].blend_func)
- break;
- /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */
- if (j == i) {
- GLSLD(s->params[i].blend_func);
- }
- }
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_images[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i);
- GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i);
- GLSLF(2, const float opacity = %f; ,s->params[i].opacity);
- GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend);
- GLSLC(0, );
- GLSLF(2, imageStore(output_images[%i], pos, dst); ,i);
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
}
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
-
- s->initialized = 1;
-
-fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_frame, AVFrame *top_frame, AVFrame *bottom_frame)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- BlendVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *out = (AVVkFrame *)out_frame->data[0];
- AVVkFrame *top = (AVVkFrame *)top_frame->data[0];
- AVVkFrame *bottom = (AVVkFrame *)bottom_frame->data[0];
-
- AVHWFramesContext *top_fc = (AVHWFramesContext*)top_frame->hw_frames_ctx->data;
- AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom_frame->hw_frames_ctx->data;
-
- const VkFormat *top_formats = av_vkfmt_from_pixfmt(top_fc->sw_format);
- const VkFormat *bottom_formats = av_vkfmt_from_pixfmt(bottom_fc->sw_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->top_images[i].imageView, top->img[i],
- top_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->bottom_images[i].imageView, bottom->img[i],
- bottom_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->top_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->bottom_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "blend_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "top_images",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "bottom_images",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_images",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0));
+
+ for (int i = 0, j = 0; i < planes; i++) {
+ for (j = 0; j < i; j++)
+ if (s->params[i].blend_func == s->params[j].blend_func)
+ break;
+ /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */
+ if (j == i) {
+ GLSLD(s->params[i].blend_func);
+ }
}
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = top->layout[i],
- .newLayout = s->top_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = top->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = bottom->layout[i],
- .newLayout = s->bottom_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = bottom->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- top->layout[i] = barriers[0].newLayout;
- top->access[i] = barriers[0].dstAccessMask;
-
- bottom->layout[i] = barriers[1].newLayout;
- bottom->access[i] = barriers[1].dstAccessMask;
-
- out->layout[i] = barriers[2].newLayout;
- out->access[i] = barriers[2].dstAccessMask;
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_images[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i);
+ GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i);
+ GLSLF(2, const float opacity = %f; ,s->params[i].opacity);
+ GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend);
+ GLSLC(0, );
+ GLSLF(2, imageStore(output_images[%i], pos, dst); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS) / CGS,
- FFALIGN(s->vkctx.output_height, CGS) / CGS, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, top_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, bottom_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ RET(spv->compile_shader(spv, avctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- ff_vk_qf_rotate(&s->qf);
-
- return 0;
+ s->initialized = 1;
fail:
- ff_vk_discard_exec_deps(s->exec);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
@@ -375,7 +253,9 @@ static int blend_frame(FFFrameSync *fs)
RET(init_filter(avctx));
}
- RET(process_frames(avctx, out, top, bottom));
+ RET(ff_vk_filter_process_2in(&s->vkctx, &s->e, &s->pl,
+ out, top, bottom,
+ s->sampler, NULL, 0));
return ff_filter_frame(outlink, out);
@@ -396,10 +276,19 @@ static av_cold int init(AVFilterContext *avctx)
static av_cold void uninit(AVFilterContext *avctx)
{
BlendVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- ff_framesync_uninit(&s->fs);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
+ ff_framesync_uninit(&s->fs);
s->initialized = 0;
}
--
2.39.2
[-- Attachment #59: 0058-chromaber_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 14904 bytes --]
From 3328104c3ec2aa1412b5c8ea33ef8a96249acdd9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:53 +0100
Subject: [PATCH 58/72] chromaber_vulkan: port for the rewrite
---
libavfilter/vf_chromaber_vulkan.c | 288 ++++++++++--------------------
1 file changed, 99 insertions(+), 189 deletions(-)
diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c
index b9423e417e..24649f7b25 100644
--- a/libavfilter/vf_chromaber_vulkan.c
+++ b/libavfilter/vf_chromaber_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,21 +21,18 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
typedef struct ChromaticAberrationVulkanContext {
FFVulkanContext vkctx;
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
-
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
/* Push constants / options */
struct {
@@ -68,205 +67,105 @@ static const char distort_chroma_kernel[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
ChromaticAberrationVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- /* Create a sampler */
- sampler = ff_vk_init_sampler(vkctx, 0, VK_FILTER_LINEAR);
- if (!sampler)
- return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
/* Normalize options */
s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f;
s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f;
- { /* Create the shader */
- FFVulkanDescriptorSetBinding desc_i[2] = {
- {
- .name = "input_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "chromaber_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
- GLSLC(1, vec2 dist; );
- GLSLC(0, }; );
- GLSLC(0, );
-
- ff_vk_add_push_constant(s->pl, 0, sizeof(s->opts),
- VK_SHADER_STAGE_COMPUTE_BIT);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
-
- GLSLD( distort_chroma_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- if (planes == 1) {
- GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); );
- } else {
- GLSLC(1, ivec2 size = imageSize(output_img[0]); );
- GLSLC(1, vec2 npos = vec2(pos)/vec2(size); );
- GLSLC(1, vec4 res = texture(input_img[0], npos); );
- GLSLC(1, imageStore(output_img[0], pos, res); );
- for (int i = 1; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- GLSLF(2, distort_chroma(%i, size, pos); ,i);
- GLSLC(1, } else { );
- GLSLC(2, npos = vec2(pos)/vec2(size); );
- GLSLF(2, res = texture(input_img[%i], npos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- GLSLC(1, } );
- }
- }
- GLSLC(0, } );
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "chromaber_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, vec2 dist; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
+
+ GLSLD( distort_chroma_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ if (planes == 1) {
+ GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); );
+ } else {
+ GLSLC(1, ivec2 size = imageSize(output_img[0]); );
+ GLSLC(1, vec2 npos = vec2(pos)/vec2(size); );
+ GLSLC(1, vec4 res = texture(input_img[0], npos); );
+ GLSLC(1, imageStore(output_img[0], pos, res); );
+ for (int i = 1; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ GLSLF(2, distort_chroma(%i, size, pos); ,i);
+ GLSLC(1, } else { );
+ GLSLC(2, npos = vec2(pos)/vec2(size); );
+ GLSLF(2, res = texture(input_img[%i], npos); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
+ GLSLC(1, } );
+ }
}
+ GLSLC(0, } );
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
s->initialized = 1;
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- ChromaticAberrationVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *ouput_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- ouput_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[2] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- in->layout[i] = bar[0].newLayout;
- in->access[i] = bar[0].dstAccessMask;
-
- out->layout[i] = bar[1].newLayout;
- out->access[i] = bar[1].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- ff_vk_update_push_exec(vkctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(s->opts), &s->opts);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
- ff_vk_qf_rotate(&s->qf);
-
- return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
return err;
}
@@ -286,7 +185,8 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, in);
if (err < 0)
@@ -305,6 +205,16 @@ fail:
static void chromaber_vulkan_uninit(AVFilterContext *avctx)
{
ChromaticAberrationVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
--
2.39.2
[-- Attachment #60: 0059-flip_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 13075 bytes --]
From f69abda00b625c1f9d69421e7c6bef6713a43f76 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:12:42 +0100
Subject: [PATCH 59/72] flip_vulkan: port for the rewrite
---
libavfilter/vf_flip_vulkan.c | 229 ++++++++++++-----------------------
1 file changed, 78 insertions(+), 151 deletions(-)
diff --git a/libavfilter/vf_flip_vulkan.c b/libavfilter/vf_flip_vulkan.c
index 0223786ef1..0330dce257 100644
--- a/libavfilter/vf_flip_vulkan.c
+++ b/libavfilter/vf_flip_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,10 +22,9 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGS 32
-
enum FlipType {
FLIP_VERTICAL,
FLIP_HORIZONTAL,
@@ -32,32 +33,49 @@ enum FlipType {
typedef struct FlipVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
-
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
} FlipVulkanContext;
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type)
{
int err = 0;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
FlipVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "flip_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
- FFVulkanDescriptorSetBinding image_descs[] = {
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_image",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_image",
@@ -67,167 +85,75 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
},
};
- image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- {
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl, "flip_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_image[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- switch (type)
- {
- case FLIP_HORIZONTAL:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i);
- break;
- case FLIP_VERTICAL:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i);
- break;
- case FLIP_BOTH:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i);
- break;
- default:
- GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i);
- break;
- }
- GLSLF(2, imageStore(output_image[%i], pos, res); ,i);
- GLSLC(1, } );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_image[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ switch (type)
+ {
+ case FLIP_HORIZONTAL:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i);
+ break;
+ case FLIP_VERTICAL:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i);
+ break;
+ case FLIP_BOTH:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i);
+ break;
+ default:
+ GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i);
+ break;
}
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ GLSLF(2, imageStore(output_image[%i], pos, res); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
s->initialized = 1;
fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
static av_cold void flip_vulkan_uninit(AVFilterContext *avctx)
{
FlipVulkanContext *s = avctx->priv;
- ff_vk_uninit(&s->vkctx);
- s->initialized = 0;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- FlipVulkanContext *s = avctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- out->layout[i] = barriers[1].newLayout;
- out->access[i] = barriers[1].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_qf_rotate(&s->qf);
+ ff_vk_uninit(&s->vkctx);
- return 0;
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
+ s->initialized = 0;
}
static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type)
@@ -247,7 +173,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type)
if (!s->initialized)
RET(init_filter(ctx, in, type));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, NULL, 0));
RET(av_frame_copy_props(out, in));
--
2.39.2
[-- Attachment #61: 0060-gblur_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 17658 bytes --]
From 369e41818f25c68097764dd417cd03b6984e3ce6 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:12:55 +0100
Subject: [PATCH 60/72] gblur_vulkan: port for the rewrite
---
libavfilter/vf_gblur_vulkan.c | 314 ++++++++++------------------------
1 file changed, 95 insertions(+), 219 deletions(-)
diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index c6360799a7..72308ffe83 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,6 +22,7 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#define CGS 32
@@ -27,26 +30,23 @@
typedef struct GBlurVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl_hor;
- FFVulkanPipeline *pl_ver;
- FFVkBuffer params_buf_hor;
- FFVkBuffer params_buf_ver;
-
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo tmp_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc_hor;
- VkDescriptorBufferInfo params_desc_ver;
int initialized;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ VkSampler sampler;
+ FFVulkanPipeline pl_hor;
+ FFVkSPIRVShader shd_hor;
+ FFVkBuffer params_hor;
+ FFVulkanPipeline pl_ver;
+ FFVkSPIRVShader shd_ver;
+ FFVkBuffer params_ver;
+
int size;
int sizeV;
int planes;
float sigma;
float sigmaV;
- AVFrame *tmpframe;
} GBlurVulkanContext;
static const char gblur_func[] = {
@@ -118,16 +118,17 @@ static av_cold void init_gaussian_params(GBlurVulkanContext *s)
s->sizeV = s->size;
else
init_kernel_size(s, &s->sizeV);
-
- s->tmpframe = NULL;
}
-static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd,
- FFVkBuffer *params_buf, VkDescriptorBufferInfo *params_desc,
- int ksize, float sigma)
+static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd, FFVkBuffer *params_buf,
+ int ksize, float sigma, FFVkSPIRVCompiler *spv)
{
int err = 0;
uint8_t *kernel_mapped;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
@@ -137,7 +138,6 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
.mem_quali = "readonly",
.mem_layout = "std430",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = NULL,
.buf_content = NULL,
};
@@ -145,10 +145,9 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
if (!kernel_def)
return AVERROR(ENOMEM);
- buf_desc.updater = params_desc;
buf_desc.buf_content = kernel_def;
- RET(ff_vk_add_descriptor_set(&s->vkctx, pl, shd, &buf_desc, 1, 0));
+ RET(ff_vk_pipeline_descriptor_set_add(&s->vkctx, pl, shd, &buf_desc, 1, 1, 0));
GLSLD( gblur_func );
GLSLC(0, void main() );
@@ -169,26 +168,31 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
}
GLSLC(0, } );
- RET(ff_vk_compile_shader(&s->vkctx, shd, "main"));
+ RET(spv->compile_shader(spv, s, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(&s->vkctx, shd, spv_data, spv_len, "main"));
- RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
- RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
+ RET(ff_vk_init_compute_pipeline(&s->vkctx, pl, shd));
+ RET(ff_vk_exec_pipeline_register(&s->vkctx, &s->e, pl));
- RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
init_gaussian_kernel((float *)kernel_mapped, sigma, ksize);
RET(ff_vk_unmap_buffers(&s->vkctx, params_buf, 1, 1));
- params_desc->buffer = params_buf->buf;
- params_desc->range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(&s->vkctx, pl, 1);
+ RET(ff_vk_set_descriptor_buffer(&s->vkctx, pl, NULL, 1, 0, 0,
+ params_buf->address, params_buf->size,
+ VK_FORMAT_UNDEFINED));
fail:
av_free(kernel_def);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
return err;
}
@@ -196,16 +200,35 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err = 0;
GBlurVulkanContext *s = ctx->priv;
- FFVkSPIRVShader *shd;
+ FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- FFVulkanDescriptorSetBinding image_descs[] = {
+ FFVkSPIRVShader *shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "gblur_hor_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT));
+ RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "gblur_ver_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT));
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_images",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_images",
@@ -218,215 +241,64 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
- image_descs[0].sampler = ff_vk_init_sampler(&s->vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
-
init_gaussian_params(s);
- ff_vk_qf_init(&s->vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
{
- /* Create shader for the horizontal pass */
- image_descs[0].updater = s->input_images;
- image_descs[1].updater = s->tmp_images;
-
- s->pl_hor = ff_vk_create_pipeline(&s->vkctx, &s->qf);
- if (!s->pl_hor) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
-
- shd = ff_vk_init_shader(s->pl_hor, "gblur_compute_hor", image_descs[0].stages);
- if (!shd) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ shd = &s->shd_hor;
+ ff_vk_shader_set_compute_sizes(shd, 32, 1, 1);
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0));
GLSLC(0, #define OFFSET (vec2(i, 0.0)));
- RET(init_gblur_pipeline(s, s->pl_hor, shd, &s->params_buf_hor, &s->params_desc_hor,
- s->size, s->sigma));
+ RET(init_gblur_pipeline(s, &s->pl_hor, shd, &s->params_hor, s->size, s->sigma, spv));
}
{
- /* Create shader for the vertical pass */
- image_descs[0].updater = s->tmp_images;
- image_descs[1].updater = s->output_images;
-
- s->pl_ver = ff_vk_create_pipeline(&s->vkctx, &s->qf);
- if (!s->pl_ver) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ shd = &s->shd_ver;
+ ff_vk_shader_set_compute_sizes(shd, 1, 32, 1);
- shd = ff_vk_init_shader(s->pl_ver, "gblur_compute_ver", image_descs[0].stages);
- if (!shd) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
- RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0));
GLSLC(0, #define OFFSET (vec2(0.0, i)));
- RET(init_gblur_pipeline(s, s->pl_ver, shd, &s->params_buf_ver, &s->params_desc_ver,
- s->sizeV, s->sigmaV));
+ RET(init_gblur_pipeline(s, &s->pl_ver, shd, &s->params_ver, s->sizeV, s->sigmaV, spv));
}
- RET(ff_vk_create_exec_ctx(&s->vkctx, &s->exec, &s->qf));
-
s->initialized = 1;
fail:
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx)
{
GBlurVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- av_frame_free(&s->tmpframe);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl_hor);
+ ff_vk_pipeline_free(vkctx, &s->pl_ver);
+ ff_vk_shader_free(vkctx, &s->shd_hor);
+ ff_vk_shader_free(vkctx, &s->shd_ver);
+ ff_vk_free_buf(vkctx, &s->params_hor);
+ ff_vk_free_buf(vkctx, &s->params_ver);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf_hor);
- ff_vk_free_buf(&s->vkctx, &s->params_buf_ver);
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
}
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err;
- VkCommandBuffer cmd_buf;
- GBlurVulkanContext *s = avctx->priv;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
-
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
- AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(&s->vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->input_images[i].imageView,
- in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->tmp_images[i].imageView,
- tmp->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->output_images[i].imageView,
- out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(&s->vkctx, s->pl_hor, 0);
- ff_vk_update_descriptor_set(&s->vkctx, s->pl_ver, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = tmp->layout[i],
- .newLayout = s->tmp_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = tmp->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- tmp->layout[i] = barriers[1].newLayout;
- tmp->access[i] = barriers[1].dstAccessMask;
-
- out->layout[i] = barriers[2].newLayout;
- out->access[i] = barriers[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_hor);
-
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_ver);
-
- vk->CmdDispatch(cmd_buf,s->vkctx.output_width,
- FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
-
- ff_vk_add_exec_dep(&s->vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(&s->vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(&s->vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
-
- return 0;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
-}
-
static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
{
int err;
- AVFrame *out = NULL;
+ AVFrame *tmp = NULL, *out = NULL;
AVFilterContext *ctx = link->dst;
GBlurVulkanContext *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
@@ -437,28 +309,32 @@ static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
goto fail;
}
- if (!s->initialized) {
- RET(init_filter(ctx, in));
- s->tmpframe = ff_get_video_buffer(outlink, outlink->w, outlink->h);
- if (!s->tmpframe) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!tmp) {
+ err = AVERROR(ENOMEM);
+ goto fail;
}
- RET(process_frames(ctx, out, in));
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
- RET(av_frame_copy_props(out, in));
+ RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e,
+ (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver },
+ out, tmp, in, s->sampler, NULL, 0));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
av_frame_free(&in);
+ av_frame_free(&tmp);
return ff_filter_frame(outlink, out);
fail:
av_frame_free(&in);
+ av_frame_free(&tmp);
av_frame_free(&out);
- av_frame_free(&s->tmpframe);
-
return err;
}
--
2.39.2
[-- Attachment #62: 0061-overlay_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18798 bytes --]
From 1a4987ea3171409cc15b7ea85c2d483cf155378e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:05 +0100
Subject: [PATCH 61/72] overlay_vulkan: port for the rewrite
---
libavfilter/vf_overlay_vulkan.c | 397 ++++++++++----------------------
1 file changed, 122 insertions(+), 275 deletions(-)
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index bdf231f4ef..694cb666d8 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,26 +21,26 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "framesync.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
typedef struct OverlayVulkanContext {
FFVulkanContext vkctx;
+ FFFrameSync fs;
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- FFFrameSync fs;
- FFVkBuffer params_buf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo main_images[3];
- VkDescriptorImageInfo overlay_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc;
+ /* Push constants / options */
+ struct {
+ int32_t o_offset[2*3];
+ int32_t o_size[2*3];
+ } opts;
int overlay_x;
int overlay_y;
@@ -80,279 +82,113 @@ static const char overlay_alpha[] = {
static av_cold int init_filter(AVFilterContext *ctx)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
OverlayVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_NEAREST);
- if (!sampler)
+ const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
+ const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- { /* Create the shader */
- const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
-
- FFVulkanDescriptorSetBinding desc_i[3] = {
- {
- .name = "main_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->main_images,
- .sampler = sampler,
- },
- {
- .name = "overlay_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->overlay_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVulkanDescriptorSetBinding desc_b = {
- .name = "params",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .mem_quali = "readonly",
- .mem_layout = "std430",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = &s->params_desc,
- .buf_content = "ivec2 o_offset[3], o_size[3];",
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "overlay_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
-
- GLSLD( overlay_noalpha );
- GLSLD( overlay_alpha );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- GLSLF(1, int planes = %i; ,planes);
- GLSLC(1, for (int i = 0; i < planes; i++) { );
- if (ialpha)
- GLSLC(2, overlay_alpha_opaque(i, pos); );
- else
- GLSLC(2, overlay_noalpha(i, pos); );
- GLSLC(1, } );
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- }
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
-
- { /* Create and update buffer */
- const AVPixFmtDescriptor *desc;
-
- /* NOTE: std430 requires the same identical struct layout, padding and
- * alignment as C, so we're allowed to do this, as this will map
- * exactly to what the shader recieves */
- struct {
- int32_t o_offset[2*3];
- int32_t o_size[2*3];
- } *par;
-
- err = ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par), NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
- if (err)
- return err;
-
- err = ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0);
- if (err)
- return err;
-
- desc = av_pix_fmt_desc_get(s->vkctx.output_format);
-
- par->o_offset[0] = s->overlay_x;
- par->o_offset[1] = s->overlay_y;
- par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w;
- par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h;
- par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w;
- par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h;
-
- par->o_size[0] = s->overlay_w;
- par->o_size[1] = s->overlay_h;
- par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w;
- par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h;
- par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w;
- par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h;
-
- err = ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1);
- if (err)
- return err;
-
- s->params_desc.buffer = s->params_buf.buf;
- s->params_desc.range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 1);
}
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "overlay_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, ivec2 o_offset[3]; );
+ GLSLC(1, ivec2 o_size[3]; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "main_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "overlay_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0));
+
+ GLSLD( overlay_noalpha );
+ GLSLD( overlay_alpha );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, int planes = %i; ,planes);
+ GLSLC(1, for (int i = 0; i < planes; i++) { );
+ if (ialpha)
+ GLSLC(2, overlay_alpha_opaque(i, pos); );
+ else
+ GLSLC(2, overlay_noalpha(i, pos); );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
+
+ s->opts.o_offset[0] = s->overlay_x;
+ s->opts.o_offset[1] = s->overlay_y;
+ s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h;
+ s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h;
+
+ s->opts.o_size[0] = s->overlay_w;
+ s->opts.o_size[1] = s->overlay_h;
+ s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h;
+ s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h;
s->initialized = 1;
- return 0;
-
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
- AVFrame *main_f, AVFrame *overlay_f)
-{
- int err;
- VkCommandBuffer cmd_buf;
- OverlayVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- AVVkFrame *main = (AVVkFrame *)main_f->data[0];
- AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0];
-
- AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data;
- AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data;
-
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
- const VkFormat *main_sw_formats = av_vkfmt_from_pixfmt(main_fc->sw_format);
- const VkFormat *overlay_sw_formats = av_vkfmt_from_pixfmt(overlay_fc->sw_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->main_images[i].imageView, main->img[i],
- main_sw_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->overlay_images[i].imageView, overlay->img[i],
- overlay_sw_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[3] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = main->layout[i],
- .newLayout = s->main_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = main->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = overlay->layout[i],
- .newLayout = s->overlay_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = overlay->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- main->layout[i] = bar[0].newLayout;
- main->access[i] = bar[0].dstAccessMask;
-
- overlay->layout[i] = bar[1].newLayout;
- overlay->access[i] = bar[1].dstAccessMask;
-
- out->layout[i] = bar[2].newLayout;
- out->access[i] = bar[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
- ff_vk_qf_rotate(&s->qf);
-
- return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
return err;
}
@@ -394,7 +230,9 @@ static int overlay_vulkan_blend(FFFrameSync *fs)
goto fail;
}
- RET(process_frames(ctx, out, input_main, input_overlay));
+ RET(ff_vk_filter_process_2in(&s->vkctx, &s->e, &s->pl,
+ out, input_main, input_overlay,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, input_main);
if (err < 0)
@@ -443,8 +281,17 @@ static av_cold int overlay_vulkan_init(AVFilterContext *avctx)
static void overlay_vulkan_uninit(AVFilterContext *avctx)
{
OverlayVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf);
ff_vk_uninit(&s->vkctx);
ff_framesync_uninit(&s->fs);
--
2.39.2
[-- Attachment #63: 0062-scale_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18951 bytes --]
From 4ec8834fa164e172420cd162d4a51735fbddd986 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:32 +0100
Subject: [PATCH 62/72] scale_vulkan: port for the rewrite
---
libavfilter/vf_scale_vulkan.c | 365 ++++++++++++----------------------
1 file changed, 124 insertions(+), 241 deletions(-)
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 31dc35569b..84bd19c012 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,12 +21,11 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "scale_eval.h"
#include "internal.h"
#include "colorspace.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
enum ScalerFunc {
F_BILINEAR = 0,
F_NEAREST,
@@ -35,15 +36,17 @@ enum ScalerFunc {
typedef struct ScaleVulkanContext {
FFVulkanContext vkctx;
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- FFVkBuffer params_buf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc;
+ /* Push constants / options */
+ struct {
+ float yuv_matrix[4][4];
+ } opts;
char *out_format_string;
char *w_expr;
@@ -51,8 +54,6 @@ typedef struct ScaleVulkanContext {
enum ScalerFunc scaler;
enum AVColorRange out_range;
-
- int initialized;
} ScaleVulkanContext;
static const char scale_bilinear[] = {
@@ -110,10 +111,15 @@ static const char write_444[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
VkFilter sampler_mode;
ScaleVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
int crop_x = in->crop_left;
int crop_y = in->crop_top;
@@ -121,8 +127,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
int crop_h = in->height - (in->crop_top + in->crop_bottom);
int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
switch (s->scaler) {
case F_NEAREST:
sampler_mode = VK_FILTER_NEAREST;
@@ -132,264 +136,133 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
break;
};
- /* Create a sampler */
- sampler = ff_vk_init_sampler(vkctx, 0, sampler_mode);
- if (!sampler)
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
+ }
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- { /* Create the shader */
- FFVulkanDescriptorSetBinding desc_i[2] = {
- {
- .name = "input_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = in_planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = av_pix_fmt_count_planes(s->vkctx.output_format),
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVulkanDescriptorSetBinding desc_b = {
- .name = "params",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .mem_quali = "readonly",
- .mem_layout = "std430",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = &s->params_desc,
- .buf_content = "mat4 yuv_matrix;",
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "scale_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
-
- GLSLD( scale_bilinear );
-
- if (s->vkctx.output_format != s->vkctx.input_format) {
- GLSLD( rgb2yuv );
- }
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "scale_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, mat4 yuv_matrix; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = in_planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = av_pix_fmt_count_planes(s->vkctx.output_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
- switch (s->vkctx.output_format) {
- case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
- case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
- case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
- default: break;
- }
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
- GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h);
- GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y);
- GLSLC(0, );
-
- if (s->vkctx.output_format == s->vkctx.input_format) {
- for (int i = 0; i < desc_i[1].elems; i++) {
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- switch (s->scaler) {
- case F_NEAREST:
- case F_BILINEAR:
- GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- break;
- };
- GLSLC(1, } );
- }
- } else {
- GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
- GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
- switch (s->vkctx.output_format) {
- case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
- case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
- case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
- default: return AVERROR(EINVAL);
- }
- }
+ GLSLD( scale_bilinear );
+
+ if (s->vkctx.output_format != s->vkctx.input_format) {
+ GLSLD( rgb2yuv );
+ }
- GLSLC(0, } );
+ switch (s->vkctx.output_format) {
+ case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
+ case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
+ case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
+ default: break;
+ }
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
+ GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h);
+ GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y);
+ GLSLC(0, );
+
+ if (s->vkctx.output_format == s->vkctx.input_format) {
+ for (int i = 0; i < desc[i].elems; i++) {
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ switch (s->scaler) {
+ case F_NEAREST:
+ case F_BILINEAR:
+ GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
+ break;
+ };
+ GLSLC(1, } );
+ }
+ } else {
+ GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
+ GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
+ switch (s->vkctx.output_format) {
+ case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
+ case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
+ case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
+ default: return AVERROR(EINVAL);
+ }
}
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ GLSLC(0, } );
if (s->vkctx.output_format != s->vkctx.input_format) {
const AVLumaCoefficients *lcoeffs;
double tmp_mat[3][3];
- struct {
- float yuv_matrix[4][4];
- } *par;
-
lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
if (!lcoeffs) {
av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n");
return AVERROR(EINVAL);
}
- RET(ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par), NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
-
- RET(ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0));
-
ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
- memset(par, 0, sizeof(*par));
-
for (int y = 0; y < 3; y++)
for (int x = 0; x < 3; x++)
- par->yuv_matrix[x][y] = tmp_mat[x][y];
-
- par->yuv_matrix[3][3] = 1.0;
-
- RET(ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1));
-
- s->params_desc.buffer = s->params_buf.buf;
- s->params_desc.range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 1);
+ s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
+ s->opts.yuv_matrix[3][3] = 1.0;
}
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
s->initialized = 1;
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- ScaleVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2];
- int barrier_count = 0;
- const int planes = av_pix_fmt_count_planes(s->vkctx.input_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- };
-
- memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
-
- in->layout[i] = bar.newLayout;
- in->access[i] = bar.dstAccessMask;
- }
-
- for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
- VkImageMemoryBarrier bar = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- };
-
- memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
-
- out->layout[i] = bar.newLayout;
- out->access[i] = bar.dstAccessMask;
- }
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, barrier_count, barriers);
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(vkctx->output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(vkctx->output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
}
static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
@@ -408,7 +281,8 @@ static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, in);
if (err < 0)
@@ -475,8 +349,17 @@ static int scale_vulkan_config_output(AVFilterLink *outlink)
static void scale_vulkan_uninit(AVFilterContext *avctx)
{
ScaleVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf);
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
--
2.39.2
[-- Attachment #64: 0063-transpose_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 12391 bytes --]
From ec245a2b213f82a52b9a5120062ab4f620519100 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:43 +0100
Subject: [PATCH 63/72] transpose_vulkan: port for the rewrite
---
libavfilter/vf_transpose_vulkan.c | 223 ++++++++++--------------------
1 file changed, 75 insertions(+), 148 deletions(-)
diff --git a/libavfilter/vf_transpose_vulkan.c b/libavfilter/vf_transpose_vulkan.c
index 30d052e08c..36f286b219 100644
--- a/libavfilter/vf_transpose_vulkan.c
+++ b/libavfilter/vf_transpose_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,41 +22,59 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "transpose.h"
-#define CGS 32
-
typedef struct TransposeVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
int dir;
int passthrough;
- int initialized;
} TransposeVulkanContext;
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
- int err = 0;
- FFVkSPIRVShader *shd;
+ int err;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
TransposeVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
+
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "transpose_compute", VK_SHADER_STAGE_COMPUTE_BIT));
- FFVulkanDescriptorSetBinding image_descs[] = {
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 1, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_images",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_images",
@@ -64,154 +84,49 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
},
};
- image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- {
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl, "transpose_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_images[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->dir == TRANSPOSE_CCLOCK)
- GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
- else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
- GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i);
- if (s->dir == TRANSPOSE_CLOCK)
- GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); );
- } else
- GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i);
- GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
- }
-
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
- s->initialized = 1;
-
-fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- TransposeVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- out->layout[i] = barriers[1].newLayout;
- out->access[i] = barriers[1].dstAccessMask;
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_images[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ if (s->dir == TRANSPOSE_CCLOCK)
+ GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
+ else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
+ GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i);
+ if (s->dir == TRANSPOSE_CLOCK)
+ GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); );
+ } else
+ GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i);
+ GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ s->initialized = 1;
return 0;
fail:
- ff_vk_discard_exec_deps(s->exec);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
@@ -235,7 +150,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, NULL, 0));
RET(av_frame_copy_props(out, in));
@@ -259,6 +175,17 @@ fail:
static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx)
{
TransposeVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
+
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
--
2.39.2
[-- Attachment #65: 0064-avcodec-add-AVHWAccel.free_frame_priv-callback.patch --]
[-- Type: text/x-diff, Size: 7769 bytes --]
From dbf81f602283527ea27d7ddac58e8ff648fc5557 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 18:03:05 +0100
Subject: [PATCH 64/72] avcodec: add AVHWAccel.free_frame_priv callback
---
libavcodec/av1dec.c | 4 ++--
libavcodec/avcodec.h | 8 ++++++++
libavcodec/decode.c | 19 +++++++++++++++++++
libavcodec/decode.h | 11 +++++++++++
libavcodec/h264_slice.c | 3 ++-
libavcodec/hevc_refs.c | 3 ++-
libavcodec/mpegpicture.c | 4 +++-
libavcodec/vp8.c | 2 +-
libavcodec/vp9.c | 2 +-
9 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
index d83c902f1f..d105835d51 100644
--- a/libavcodec/av1dec.c
+++ b/libavcodec/av1dec.c
@@ -24,6 +24,7 @@
#include "libavutil/pixdesc.h"
#include "libavutil/opt.h"
#include "avcodec.h"
+#include "decode.h"
#include "av1dec.h"
#include "bytestream.h"
#include "codec_internal.h"
@@ -836,8 +837,7 @@ static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
if (avctx->hwaccel) {
const AVHWAccel *hwaccel = avctx->hwaccel;
if (hwaccel->frame_priv_data_size) {
- f->hwaccel_priv_buf =
- av_buffer_allocz(hwaccel->frame_priv_data_size);
+ f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, hwaccel);
if (!f->hwaccel_priv_buf) {
ret = AVERROR(ENOMEM);
goto fail;
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 17416791a6..6babfc7132 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2206,6 +2206,14 @@ typedef struct AVHWAccel {
* that avctx->hwaccel_priv_data is invalid.
*/
int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+
+ /**
+ * Callback to free the hwaccel-specific frame data.
+ *
+ * @param avctx the codec context
+ * @param data the per-frame hardware accelerator private data to be freed.
+ */
+ void (*free_frame_priv)(AVCodecContext *avctx, void *data);
} AVHWAccel;
/**
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 93ecd36c2b..b9a2ec84f6 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1675,3 +1675,22 @@ int ff_copy_palette(void *dst, const AVPacket *src, void *logctx)
}
return 0;
}
+
+AVBufferRef *ff_alloc_hwaccel_frame_priv_data(AVCodecContext *avctx,
+ const AVHWAccel *hwaccel)
+{
+ AVBufferRef *ref;
+ uint8_t *data = av_mallocz(hwaccel->frame_priv_data_size);
+ if (!data)
+ return NULL;
+
+ ref = av_buffer_create(data, hwaccel->frame_priv_data_size,
+ (void (*)(void *, uint8_t *))hwaccel->free_frame_priv,
+ avctx, 0);
+ if (!ref) {
+ av_free(data);
+ return NULL;
+ }
+
+ return ref;
+}
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 8430ffbd66..aa40baafc0 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -150,4 +150,15 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags);
int ff_side_data_update_matrix_encoding(AVFrame *frame,
enum AVMatrixEncoding matrix_encoding);
+/**
+ * Allocate a hwaccel frame private data and create an AVBufferRef
+ * from it.
+ *
+ * @param avctx The codec context which to attach as an opaque value
+ * @param hwaccel The hwaccel for which to allocate
+ * @return The allocated buffer
+ */
+AVBufferRef *ff_alloc_hwaccel_frame_priv_data(AVCodecContext *avctx,
+ const AVHWAccel *hwaccel);
+
#endif /* AVCODEC_DECODE_H */
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 8ac66b343c..c0aa31bcd9 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -33,6 +33,7 @@
#include "libavutil/pixdesc.h"
#include "libavutil/timecode.h"
#include "internal.h"
+#include "decode.h"
#include "cabac.h"
#include "cabac_functions.h"
#include "decode.h"
@@ -212,7 +213,7 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
const AVHWAccel *hwaccel = h->avctx->hwaccel;
av_assert0(!pic->hwaccel_picture_private);
if (hwaccel->frame_priv_data_size) {
- pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ pic->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(h->avctx, hwaccel);
if (!pic->hwaccel_priv_buf)
return AVERROR(ENOMEM);
pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
index 811e8feff8..30cbb8b37a 100644
--- a/libavcodec/hevc_refs.c
+++ b/libavcodec/hevc_refs.c
@@ -23,6 +23,7 @@
#include "libavutil/avassert.h"
+#include "decode.h"
#include "thread.h"
#include "hevc.h"
#include "hevcdec.h"
@@ -118,7 +119,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
const AVHWAccel *hwaccel = s->avctx->hwaccel;
av_assert0(!frame->hwaccel_picture_private);
if (hwaccel->frame_priv_data_size) {
- frame->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ frame->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(s->avctx, hwaccel);
if (!frame->hwaccel_priv_buf)
goto fail;
frame->hwaccel_picture_private = frame->hwaccel_priv_buf->data;
diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c
index 977bc65191..a1d58f04b3 100644
--- a/libavcodec/mpegpicture.c
+++ b/libavcodec/mpegpicture.c
@@ -27,6 +27,8 @@
#include "avcodec.h"
#include "encode.h"
+#include "internal.h"
+#include "decode.h"
#include "motion_est.h"
#include "mpegpicture.h"
#include "mpegutils.h"
@@ -172,7 +174,7 @@ static int alloc_frame_buffer(AVCodecContext *avctx, Picture *pic,
if (avctx->hwaccel) {
assert(!pic->hwaccel_picture_private);
if (avctx->hwaccel->frame_priv_data_size) {
- pic->hwaccel_priv_buf = av_buffer_allocz(avctx->hwaccel->frame_priv_data_size);
+ pic->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, avctx->hwaccel);
if (!pic->hwaccel_priv_buf) {
av_log(avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
return -1;
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index db2419deaf..4c23eb5672 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -109,7 +109,7 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
if (s->avctx->hwaccel) {
const AVHWAccel *hwaccel = s->avctx->hwaccel;
if (hwaccel->frame_priv_data_size) {
- f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(s->avctx, hwaccel);
if (!f->hwaccel_priv_buf)
goto fail;
f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 7c0a246446..4f345f18db 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -136,7 +136,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
const AVHWAccel *hwaccel = avctx->hwaccel;
av_assert0(!f->hwaccel_picture_private);
if (hwaccel->frame_priv_data_size) {
- f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, hwaccel);
if (!f->hwaccel_priv_buf)
goto fail;
f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
--
2.39.2
[-- Attachment #66: 0065-avcodec-add-AVHWAccel.flush-callback.patch --]
[-- Type: text/x-diff, Size: 3020 bytes --]
From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 6 Jan 2023 03:32:56 +0100
Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback
---
libavcodec/av1dec.c | 3 +++
libavcodec/avcodec.h | 5 +++++
libavcodec/h264dec.c | 3 +++
libavcodec/hevcdec.c | 3 +++
libavcodec/vp8.c | 3 +++
libavcodec/vp9.c | 3 +++
6 files changed, 20 insertions(+)
diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
index d105835d51..3cbb80bcb5 100644
--- a/libavcodec/av1dec.c
+++ b/libavcodec/av1dec.c
@@ -1228,6 +1228,9 @@ static void av1_decode_flush(AVCodecContext *avctx)
s->raw_seq = NULL;
ff_cbs_flush(s->cbc);
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
#define OFFSET(x) offsetof(AV1DecContext, x)
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 6babfc7132..531998a78c 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2214,6 +2214,11 @@ typedef struct AVHWAccel {
* @param data the per-frame hardware accelerator private data to be freed.
*/
void (*free_frame_priv)(AVCodecContext *avctx, void *data);
+
+ /**
+ * Callback to flush the hwaccel state.
+ */
+ void (*flush)(AVCodecContext *avctx);
} AVHWAccel;
/**
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 2d691731c5..995bf17a8f 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -480,6 +480,9 @@ static void h264_decode_flush(AVCodecContext *avctx)
ff_h264_free_tables(h);
h->context_initialized = 0;
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
static int get_last_needed_nal(H264Context *h)
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 43cd963175..7c9b46240c 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -3682,6 +3682,9 @@ static void hevc_decode_flush(AVCodecContext *avctx)
av_buffer_unref(&s->rpu_buf);
s->max_ra = INT_MAX;
s->eos = 1;
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
#define OFFSET(x) offsetof(HEVCContext, x)
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 4c23eb5672..b591b82ad1 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -167,6 +167,9 @@ static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
if (free_mem)
free_buffers(s);
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
static void vp8_decode_flush(AVCodecContext *avctx)
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 4f345f18db..18c2b09f64 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -1791,6 +1791,9 @@ static void vp9_decode_flush(AVCodecContext *avctx)
vp9_frame_unref(avctx, &s->s.frames[i]);
for (i = 0; i < 8; i++)
ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
static av_cold int vp9_decode_init(AVCodecContext *avctx)
--
2.39.2
[-- Attachment #67: 0066-hwconfig-add-a-new-HWACCEL_CAP_THREAD_SAFE-for-threa.patch --]
[-- Type: text/x-diff, Size: 1369 bytes --]
From 99ce9693bcb6218ffe82bb5780827c1dca614092 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 01:06:52 +0100
Subject: [PATCH 66/72] hwconfig: add a new HWACCEL_CAP_THREAD_SAFE for
threadsafe hwaccels
Vulkan is fully threadsafe and stateless, so we can benefit from this.
---
libavcodec/hwconfig.h | 1 +
libavcodec/pthread_frame.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index 721424912c..e6b78f0160 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -24,6 +24,7 @@
#define HWACCEL_CAP_ASYNC_SAFE (1 << 0)
+#define HWACCEL_CAP_THREAD_SAFE (1 << 1)
typedef struct AVCodecHWConfigInternal {
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 71edd6b3ec..15e8d96a79 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -204,7 +204,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
/* if the previous thread uses hwaccel then we take the lock to ensure
* the threads don't run concurrently */
- if (avctx->hwaccel) {
+ if (avctx->hwaccel && !(avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE)) {
pthread_mutex_lock(&p->parent->hwaccel_mutex);
p->hwaccel_serializing = 1;
}
--
2.39.2
[-- Attachment #68: 0067-libavcodec-add-Vulkan-common-video-code.patch --]
[-- Type: text/x-diff, Size: 23311 bytes --]
From 2f30e4ddaf855b53cd3d8fd95a863b240bae0047 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 18 Dec 2022 08:31:03 +0100
Subject: [PATCH 67/72] libavcodec: add Vulkan common video code
---
configure | 2 +-
libavcodec/Makefile | 2 +
libavcodec/hwconfig.h | 2 +
libavcodec/vulkan.c | 19 ++
libavcodec/vulkan.h | 24 +++
libavcodec/vulkan_video.c | 417 ++++++++++++++++++++++++++++++++++++++
libavcodec/vulkan_video.h | 98 +++++++++
7 files changed, 563 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan.c
create mode 100644 libavcodec/vulkan.h
create mode 100644 libavcodec/vulkan_video.c
create mode 100644 libavcodec/vulkan_video.h
diff --git a/configure b/configure
index f0f15b9e87..91f715351c 100755
--- a/configure
+++ b/configure
@@ -326,7 +326,6 @@ External library support:
--disable-securetransport disable Secure Transport, needed for TLS support
on OSX if openssl and gnutls are not used [autodetect]
--enable-vapoursynth enable VapourSynth demuxer [no]
- --disable-vulkan disable Vulkan code [autodetect]
--disable-xlib disable xlib [autodetect]
--disable-zlib disable zlib [autodetect]
@@ -353,6 +352,7 @@ External library support:
--disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
--disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
--disable-videotoolbox disable VideoToolbox code [autodetect]
+ --disable-vulkan disable Vulkan code [autodetect]
Toolchain options:
--arch=ARCH select architecture [$arch]
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4971832ff4..a45c32e564 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -976,6 +976,7 @@ OBJS-$(CONFIG_NVDEC) += nvdec.o
OBJS-$(CONFIG_VAAPI) += vaapi_decode.o
OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o
OBJS-$(CONFIG_VDPAU) += vdpau.o
+OBJS-$(CONFIG_VULKAN) += vulkan.o vulkan_video.o
OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL) += dxva2_av1.o
OBJS-$(CONFIG_AV1_DXVA2_HWACCEL) += dxva2_av1.o
@@ -1284,6 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
+SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h
diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index e6b78f0160..220b8a1e95 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -77,6 +77,8 @@ typedef struct AVCodecHWConfigInternal {
HW_CONFIG_HWACCEL(1, 1, 1, VDPAU, VDPAU, ff_ ## codec ## _vdpau_hwaccel)
#define HWACCEL_VIDEOTOOLBOX(codec) \
HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel)
+#define HWACCEL_VULKAN(codec) \
+ HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## _vulkan_hwaccel)
#define HWACCEL_D3D11VA(codec) \
HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel)
diff --git a/libavcodec/vulkan.c b/libavcodec/vulkan.c
new file mode 100644
index 0000000000..fc8a1fa47b
--- /dev/null
+++ b/libavcodec/vulkan.c
@@ -0,0 +1,19 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/vulkan.c"
diff --git a/libavcodec/vulkan.h b/libavcodec/vulkan.h
new file mode 100644
index 0000000000..b15efd4add
--- /dev/null
+++ b/libavcodec/vulkan.h
@@ -0,0 +1,24 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_H
+#define AVCODEC_VULKAN_H
+
+#include "libavutil/vulkan.h"
+
+#endif /* AVCODEC_VULKAN_H */
diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c
new file mode 100644
index 0000000000..3e76109b26
--- /dev/null
+++ b/libavcodec/vulkan_video.c
@@ -0,0 +1,417 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "codec_id.h"
+
+#include "vulkan_video.h"
+
+const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO] = {
+ [AV_CODEC_ID_H264] = {
+#if CONFIG_VULKAN_ENCODE
+ FF_VK_EXT_VIDEO_ENCODE_H264 | FF_VK_EXT_SYNC2,
+ VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_EXT,
+#else
+ 0,
+ 0,
+#endif
+ FF_VK_EXT_VIDEO_DECODE_H264 | FF_VK_EXT_SYNC2,
+ VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR,
+ },
+ [AV_CODEC_ID_HEVC] = {
+#if CONFIG_VULKAN_ENCODE
+ FF_VK_EXT_VIDEO_ENCODE_H265 | FF_VK_EXT_SYNC2,
+ VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_EXT,
+#else
+ 0,
+ 0,
+#endif
+ FF_VK_EXT_VIDEO_DECODE_H265 | FF_VK_EXT_SYNC2,
+ VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR
+ },
+};
+
+enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf, int *score)
+{
+ switch (vkf) {
+ /* Mono */
+ case VK_FORMAT_R8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_GRAY8;
+ case VK_FORMAT_R10X6_UNORM_PACK16:
+ case VK_FORMAT_R12X4_UNORM_PACK16:
+ *score = 2;
+ return AV_PIX_FMT_GRAY16;
+ case VK_FORMAT_R16_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_GRAY16;
+
+ /* RGB */
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_BGRA;
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_RGBA;
+ case VK_FORMAT_R8G8B8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_RGB24;
+ case VK_FORMAT_B8G8R8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_BGR24;
+
+ /* 420 */
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_NV12;
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV420P;
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_P010;
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV420P16;
+ /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16 */
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV420P12;
+ case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV420P16;
+
+ /* 422 */
+ case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_NV16;
+ case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV422P;
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_NV20;
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV422P10;
+ /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16 */
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV422P12;
+ case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV422P16;
+
+ /* 444 */
+ case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
+ *score = 1;
+ return AV_PIX_FMT_NV24;
+ case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV444P;
+ /* No support for VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT */
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV444P10;
+ /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT */
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV444P12;
+ case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV444P16;
+ default:
+ break;
+ }
+
+ return AV_PIX_FMT_NONE;
+}
+
+VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf)
+{
+ switch (vkf) {
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_R10X6_UNORM_PACK16:
+ case VK_FORMAT_R12X4_UNORM_PACK16:
+ case VK_FORMAT_R16_UNORM:
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8_UNORM:
+ case VK_FORMAT_B8G8R8_UNORM:
+ return VK_IMAGE_ASPECT_COLOR_BIT;
+
+ /* 420 */
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+ case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+ case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
+ return VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT;
+
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+ case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+ case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+ case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+ case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+ case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+ return VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT;
+
+ default:
+ break;
+ }
+
+ return VK_IMAGE_ASPECT_NONE;
+}
+
+VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc)
+{
+ if (desc->nb_components == 1)
+ return VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR;
+ else if (!desc->log2_chroma_w && !desc->log2_chroma_h)
+ return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR;
+ else if (!desc->log2_chroma_w && desc->log2_chroma_h == 1)
+ return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR;
+ else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1)
+ return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR;
+ return VK_VIDEO_CHROMA_SUBSAMPLING_INVALID_KHR;
+}
+
+VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth)
+{
+ switch (depth) {
+ case 8: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR;
+ case 10: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR;
+ case 12: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR;
+ default: break;
+ }
+ return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR;
+}
+
+static void free_data_buf(void *opaque, uint8_t *data)
+{
+ FFVulkanContext *ctx = opaque;
+ FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data;
+ ff_vk_unmap_buffers(ctx, &buf->buf, 1, 0);
+ ff_vk_free_buf(ctx, &buf->buf);
+ av_free(data);
+}
+
+static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
+{
+ uint8_t *buf = av_mallocz(size);
+ if (!buf)
+ return NULL;
+
+ return av_buffer_create(buf, size, free_data_buf, opaque, 0);
+}
+
+int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s,
+ AVBufferRef **buf, VkBufferUsageFlags usage,
+ void *create_pNext, size_t size)
+{
+ int err;
+ AVBufferRef *ref;
+ FFVkVideoBuffer *data;
+
+ if (!s->buf_pool) {
+ s->buf_pool = av_buffer_pool_init2(sizeof(FFVkVideoBuffer), ctx,
+ alloc_data_buf, NULL);
+ if (!s->buf_pool)
+ return AVERROR(ENOMEM);
+ }
+
+ *buf = ref = av_buffer_pool_get(s->buf_pool);
+ if (!ref)
+ return AVERROR(ENOMEM);
+
+ data = (FFVkVideoBuffer *)ref->data;
+
+ if (data->buf.size >= size)
+ return 0;
+
+ /* No point in requesting anything smaller. */
+ size = FFMAX(size, 1024*1024);
+ size = FFALIGN(size, s->caps.minBitstreamBufferSizeAlignment);
+
+ /* Align buffer to nearest power of two. Makes fragmentation management
+ * easier, and gives us ample headroom. */
+ size--;
+ size |= size >> 1;
+ size |= size >> 2;
+ size |= size >> 4;
+ size |= size >> 8;
+ size |= size >> 16;
+ size++;
+
+ ff_vk_free_buf(ctx, &data->buf);
+ memset(data, 0, sizeof(FFVkVideoBuffer));
+
+ err = ff_vk_create_buf(ctx, &data->buf, size,
+ create_pNext, NULL, usage,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0) {
+ av_buffer_unref(&ref);
+ return err;
+ }
+
+ /* Map the buffer */
+ err = ff_vk_map_buffers(ctx, &data->buf, &data->mem, 1, 0);
+ if (err < 0) {
+ av_buffer_unref(&ref);
+ return err;
+ }
+
+ return 0;
+}
+
+av_cold void ff_vk_video_common_uninit(FFVulkanContext *s,
+ FFVkVideoCommon *common)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ if (common->session) {
+ vk->DestroyVideoSessionKHR(s->hwctx->act_dev, common->session,
+ s->hwctx->alloc);
+ common->session = NULL;
+ }
+
+ if (common->nb_mem && common->mem)
+ for (int i = 0; i < common->nb_mem; i++)
+ vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc);
+
+ av_freep(&common->mem);
+
+ av_buffer_pool_uninit(&common->buf_pool);
+}
+
+av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
+ FFVkVideoCommon *common,
+ VkVideoSessionCreateInfoKHR *session_create)
+{
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkMemoryRequirements2 *mem_req = NULL;
+ VkVideoSessionMemoryRequirementsKHR *mem = NULL;
+ VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL;
+
+ /* Create session */
+ ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create,
+ s->hwctx->alloc, &common->session);
+ if (ret != VK_SUCCESS)
+ return AVERROR_EXTERNAL;
+
+ /* Get memory requirements */
+ ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
+ common->session,
+ &common->nb_mem,
+ NULL);
+ if (ret != VK_SUCCESS) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ /* Allocate all memory needed to actually allocate memory */
+ common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem);
+ if (!common->mem) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ mem = av_mallocz(sizeof(*mem)*common->nb_mem);
+ if (!mem) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ mem_req = av_mallocz(sizeof(*mem_req)*common->nb_mem);
+ if (!mem_req) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ bind_mem = av_mallocz(sizeof(*bind_mem)*common->nb_mem);
+ if (!bind_mem) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ /* Set the needed fields to get the memory requirements */
+ for (int i = 0; i < common->nb_mem; i++) {
+ mem_req[i] = (VkMemoryRequirements2) {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ };
+ mem[i] = (VkVideoSessionMemoryRequirementsKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_MEMORY_REQUIREMENTS_KHR,
+ .memoryRequirements = mem_req[i].memoryRequirements,
+ };
+ }
+
+ /* Finally get the memory requirements */
+ ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
+ common->session, &common->nb_mem,
+ mem);
+ if (ret != VK_SUCCESS) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ /* Now allocate each requested memory.
+ * For ricing, could pool together memory that ends up in the same index. */
+ for (int i = 0; i < common->nb_mem; i++) {
+ err = ff_vk_alloc_mem(s, &mem[i].memoryRequirements,
+ UINT32_MAX, NULL, NULL, &common->mem[i]);
+ if (err < 0)
+ goto fail;
+
+ bind_mem[i] = (VkBindVideoSessionMemoryInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_BIND_VIDEO_SESSION_MEMORY_INFO_KHR,
+ .memory = common->mem[i],
+ .memoryBindIndex = mem[i].memoryBindIndex,
+ .memoryOffset = 0,
+ .memorySize = mem[i].memoryRequirements.size,
+ };
+
+ av_log(log, AV_LOG_VERBOSE, "Allocating %lu bytes in bind index %i for video session\n",
+ bind_mem[i].memorySize, bind_mem[i].memoryBindIndex);
+ }
+
+ /* Bind the allocated memory */
+ ret = vk->BindVideoSessionMemoryKHR(s->hwctx->act_dev, common->session,
+ common->nb_mem, bind_mem);
+ if (ret != VK_SUCCESS) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ av_freep(&mem);
+ av_freep(&mem_req);
+ av_freep(&bind_mem);
+
+ return 0;
+
+fail:
+ av_freep(&mem);
+ av_freep(&mem_req);
+ av_freep(&bind_mem);
+
+ ff_vk_video_common_uninit(s, common);
+ return err;
+}
diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h
new file mode 100644
index 0000000000..5e2676a282
--- /dev/null
+++ b/libavcodec/vulkan_video.h
@@ -0,0 +1,98 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_VIDEO_H
+#define AVCODEC_VULKAN_VIDEO_H
+
+#include "codec_id.h"
+#include "vulkan.h"
+
+#include <vk_video/vulkan_video_codecs_common.h>
+
+#define CODEC_VER_MAJ(ver) (ver >> 22)
+#define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1))
+#define CODEC_VER_PAT(ver) (ver & ((1 << 12) - 1))
+#define CODEC_VER(ver) CODEC_VER_MAJ(ver), CODEC_VER_MIN(ver), CODEC_VER_PAT(ver)
+
+typedef struct FFVkCodecMap {
+ FFVulkanExtensions encode_extension;
+ VkVideoCodecOperationFlagBitsKHR encode_op;
+ FFVulkanExtensions decode_extension;
+ VkVideoCodecOperationFlagBitsKHR decode_op;
+} FFVkCodecMap;
+
+typedef struct FFVkVideoSession {
+ VkVideoSessionKHR session;
+ VkDeviceMemory *mem;
+ uint32_t nb_mem;
+ VkVideoCapabilitiesKHR caps;
+
+ AVBufferPool *buf_pool;
+} FFVkVideoCommon;
+
+/**
+ * Index is codec_id.
+ */
+extern const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO];
+
+/**
+ * Get pixfmt from a Vulkan format.
+ */
+enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf, int *score);
+
+/**
+ * Get aspect bits which include all planes from a VkFormat.
+ */
+VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf);
+
+/**
+ * Get Vulkan's chroma subsampling from a pixfmt descriptor.
+ */
+VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc);
+
+/**
+ * Get Vulkan's bit depth from an [8:12] integer.
+ */
+VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth);
+
+typedef struct FFVkVideoBuffer {
+ FFVkBuffer buf;
+ uint8_t *mem;
+} FFVkVideoBuffer;
+
+/**
+ * Get a mapped FFVkPooledBuffer with a specific guaranteed minimum size
+ * from a pool.
+ */
+int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s,
+ AVBufferRef **buf, VkBufferUsageFlags usage,
+ void *create_pNext, size_t size);
+
+/**
+ * Initialize video session, allocating and binding necessary memory.
+ */
+int ff_vk_video_common_init(void *log, FFVulkanContext *s,
+ FFVkVideoCommon *common,
+ VkVideoSessionCreateInfoKHR *session_create);
+
+/**
+ * Free video session and required resources.
+ */
+void ff_vk_video_common_uninit(FFVulkanContext *s, FFVkVideoCommon *common);
+
+#endif /* AVCODEC_VULKAN_VIDEO_H */
--
2.39.2
[-- Attachment #69: 0068-libavcodec-add-Vulkan-common-video-decoding-code.patch --]
[-- Type: text/x-diff, Size: 53050 bytes --]
From d3f2fa8e530dc94c9058149a2cee92196c7adb33 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 16 Jan 2023 07:23:27 +0100
Subject: [PATCH 68/72] libavcodec: add Vulkan common video decoding code
---
libavcodec/Makefile | 2 +-
libavcodec/vulkan_decode.c | 1135 ++++++++++++++++++++++++++++++++++++
libavcodec/vulkan_decode.h | 163 ++++++
3 files changed, 1299 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan_decode.c
create mode 100644 libavcodec/vulkan_decode.h
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index a45c32e564..eabf4eb43e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1285,7 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
-SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h
+SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_decode.h
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
new file mode 100644
index 0000000000..582968e1da
--- /dev/null
+++ b/libavcodec/vulkan_decode.c
@@ -0,0 +1,1135 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_video.h"
+#include "vulkan_decode.h"
+#include "config_components.h"
+
+#if CONFIG_H264_VULKAN_HWACCEL
+extern const VkExtensionProperties ff_vk_dec_h264_ext;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+extern const VkExtensionProperties ff_vk_dec_hevc_ext;
+#endif
+
+static const VkExtensionProperties *dec_ext[] = {
+#if CONFIG_H264_VULKAN_HWACCEL
+ [AV_CODEC_ID_H264] = &ff_vk_dec_h264_ext,
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ [AV_CODEC_ID_HEVC] = &ff_vk_dec_hevc_ext,
+#endif
+};
+
+static int vk_decode_create_view(FFVulkanDecodeContext *ctx, VkImageView *dst_view,
+ VkImageAspectFlags *aspect, AVVkFrame *src)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+ VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(ctx->pic_format);
+
+ VkSamplerYcbcrConversionInfo yuv_sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO,
+ .conversion = ctx->yuv_sampler,
+ };
+ VkImageViewCreateInfo img_view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = &yuv_sampler_info,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = ctx->pic_format,
+ .image = src->img[0],
+ .components = (VkComponentMapping) {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ .levelCount = 1,
+ },
+ };
+
+ ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info,
+ ctx->s.hwctx->alloc, dst_view);
+ if (ret != VK_SUCCESS)
+ return AVERROR_EXTERNAL;
+
+ *aspect = aspect_mask;
+
+ return 0;
+}
+
+static AVFrame *vk_get_dpb_pool(FFVulkanDecodeContext *ctx)
+{
+ AVFrame *avf = av_frame_alloc();
+ AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
+ if (!avf)
+ return NULL;
+
+ avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref);
+ if (!avf->hw_frames_ctx)
+ av_frame_free(&avf);
+ avf->buf[0] = av_buffer_pool_get(dpb_frames->pool);
+ if (!avf->buf[0])
+ av_frame_free(&avf);
+ avf->data[0] = avf->buf[0]->data;
+
+ return avf;
+}
+
+int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic,
+ FFVulkanDecodePicture *vkpic, int is_current,
+ int alloc_dpb)
+{
+ int err;
+
+ vkpic->nb_slices = 0;
+ vkpic->slices_size = 0;
+
+ /* If the decoder made a blank frame to make up for a missing ref, or the
+ * frame is the current frame so it's missing one, create a re-representation */
+ if (vkpic->img_view_ref)
+ return 0;
+
+ /* Pre-allocate slice buffer with a reasonable default */
+ if (is_current) {
+ uint64_t min_alloc = 4096;
+ if (0)
+ min_alloc = 2*ctx->s.hprops.minImportedHostPointerAlignment;
+
+ vkpic->slices = av_fast_realloc(NULL, &vkpic->slices_size_max, min_alloc);
+ if (!vkpic->slices)
+ return AVERROR(ENOMEM);
+
+ if (0)
+ vkpic->slices_size += ctx->s.hprops.minImportedHostPointerAlignment;
+ }
+
+ vkpic->dpb_frame = NULL;
+ vkpic->dpb_vkf = NULL;
+ vkpic->img_view_ref = NULL;
+ vkpic->img_view_out = NULL;
+
+ if (ctx->layered_dpb && alloc_dpb) {
+ vkpic->img_view_ref = ctx->layered_view;
+ vkpic->img_aspect_ref = ctx->layered_aspect;
+ } else if (alloc_dpb) {
+ vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+ if (!vkpic->dpb_frame)
+ return AVERROR(ENOMEM);
+
+ vkpic->dpb_vkf = (AVVkFrame *)vkpic->dpb_frame->data[0];
+
+ err = vk_decode_create_view(ctx, &vkpic->img_view_ref,
+ &vkpic->img_aspect_ref,
+ vkpic->dpb_vkf);
+ if (err < 0)
+ return err;
+ }
+
+ if (!alloc_dpb || is_current) {
+ err = vk_decode_create_view(ctx, &vkpic->img_view_out,
+ &vkpic->img_aspect,
+ (AVVkFrame *)pic->buf[0]->data);
+ if (err < 0)
+ return err;
+
+ if (!alloc_dpb) {
+ vkpic->img_view_ref = vkpic->img_view_out;
+ vkpic->img_aspect_ref = vkpic->img_aspect;
+ }
+ }
+
+ return 0;
+}
+
+int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp,
+ const uint8_t *data, size_t size, int add_startcode,
+ uint32_t *nb_slices, const uint32_t **offsets)
+{
+ static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 };
+ const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0;
+ const int nb = *nb_slices;
+ uint8_t *slices;
+ uint32_t *slice_off;
+
+ slice_off = av_fast_realloc(vp->slice_off, &vp->slice_off_max,
+ (nb + 1)*sizeof(slice_off));
+ if (!slice_off)
+ return AVERROR(ENOMEM);
+
+ *offsets = vp->slice_off = slice_off;
+ slice_off[nb] = vp->slices_size;
+
+ slices = av_fast_realloc(vp->slices, &vp->slices_size_max,
+ vp->slices_size + size + startcode_len);
+ if (!slices)
+ return AVERROR(ENOMEM);
+
+ vp->slices = slices;
+
+ /* Startcode */
+ memcpy(slices + vp->slices_size, startcode_prefix, startcode_len);
+
+ /* Slice data */
+ memcpy(slices + vp->slices_size + startcode_len, data, size);
+
+ *nb_slices = nb + 1;
+ vp->nb_slices++;
+ vp->slices_size += startcode_len + size;
+
+ return 0;
+}
+
+void ff_vk_decode_flush(AVCodecContext *avctx)
+{
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+ VkVideoBeginCodingInfoKHR decode_start = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
+ .videoSession = ctx->common.session,
+ .videoSessionParameters = ctx->empty_session_params,
+ };
+ VkVideoCodingControlInfoKHR decode_ctrl = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
+ .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR,
+ };
+ VkVideoEndCodingInfoKHR decode_end = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
+ };
+
+ VkCommandBuffer cmd_buf;
+ FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool);
+ ff_vk_exec_start(&ctx->s, exec);
+ cmd_buf = exec->buf;
+
+ vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start);
+ vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl);
+ vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end);
+ ff_vk_exec_submit(&ctx->s, exec);
+}
+
+static void host_map_buf_free(void *opaque, uint8_t *data)
+{
+ FFVulkanContext *ctx = opaque;
+ FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data;
+ ff_vk_free_buf(ctx, &buf->buf);
+ av_free(data);
+}
+
+int ff_vk_decode_frame(AVCodecContext *avctx,
+ AVFrame *pic, FFVulkanDecodePicture *vp,
+ AVFrame *rpic[], FFVulkanDecodePicture *rvkp[])
+{
+ int err;
+ VkResult ret;
+ VkCommandBuffer cmd_buf;
+ FFVkVideoBuffer *sd_buf;
+
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ /* Output */
+ AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data;
+
+ /* Quirks */
+ const int layered_dpb = ctx->layered_dpb;
+
+ VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)vp->session_params->data;
+ VkVideoBeginCodingInfoKHR decode_start = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
+ .videoSession = ctx->common.session,
+ .videoSessionParameters = *par,
+ .referenceSlotCount = vp->decode_info.referenceSlotCount,
+ .pReferenceSlots = vp->decode_info.pReferenceSlots,
+ };
+ VkVideoEndCodingInfoKHR decode_end = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
+ };
+
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+ AVBufferRef *sd_ref = NULL;
+ size_t data_size = FFALIGN(vp->slices_size, ctx->common.caps.minBitstreamBufferSizeAlignment);
+
+ FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool);
+
+ if (ctx->exec_pool.nb_queries) {
+ int64_t prev_sub_res = 0;
+ ff_vk_exec_wait(&ctx->s, exec);
+ ret = ff_vk_exec_get_query(&ctx->s, exec, NULL, &prev_sub_res);
+ if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (ret == VK_SUCCESS)
+ av_log(avctx, prev_sub_res < 0 ? AV_LOG_ERROR : AV_LOG_DEBUG,
+ "Result of previous frame decoding: %li\n", prev_sub_res);
+ }
+
+ if (0) {
+ size_t req_size;
+ VkExternalMemoryBufferCreateInfo create_desc = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ .pNext = &ctx->profile_list,
+ };
+
+ VkImportMemoryHostPointerInfoEXT import_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ };
+
+ VkMemoryHostPointerPropertiesEXT p_props = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
+ };
+
+ /* Align slices pointer */
+ import_desc.pHostPointer = (void *)FFALIGN((uintptr_t)vp->slices,
+ ctx->s.hprops.minImportedHostPointerAlignment);
+
+ req_size = FFALIGN(data_size,
+ ctx->s.hprops.minImportedHostPointerAlignment);
+
+ ret = vk->GetMemoryHostPointerPropertiesEXT(ctx->s.hwctx->act_dev,
+ import_desc.handleType,
+ import_desc.pHostPointer,
+ &p_props);
+
+ if (ret == VK_SUCCESS) {
+ sd_buf = av_mallocz(sizeof(*sd_buf));
+ if (!sd_buf)
+ return AVERROR(ENOMEM);
+
+ err = ff_vk_create_buf(&ctx->s, &sd_buf->buf, req_size,
+ &create_desc, &import_desc,
+ VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0) {
+ av_free(sd_buf);
+ return err; /* This shouldn't error out, unless it's critical */
+ } else {
+ size_t neg_offs = (uint8_t *)import_desc.pHostPointer - vp->slices;
+
+ sd_ref = av_buffer_create((uint8_t *)sd_buf, sizeof(*sd_buf),
+ host_map_buf_free, &ctx->s, 0);
+ if (!sd_ref) {
+ ff_vk_free_buf(&ctx->s, &sd_buf->buf);
+ av_free(sd_buf);
+ return AVERROR(ENOMEM);
+ }
+
+ for (int i = 0; i < vp->nb_slices; i++)
+ vp->slice_off[i] -= neg_offs;
+
+ sd_buf->mem = vp->slices;
+ }
+ }
+ }
+
+ if (!sd_ref) {
+ err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &sd_ref,
+ VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
+ &ctx->profile_list, data_size);
+ if (err < 0)
+ return err;
+
+ sd_buf = (FFVkVideoBuffer *)sd_ref->data;
+
+ /* Copy the slices data to the buffer */
+ memcpy(sd_buf->mem, vp->slices, vp->slices_size);
+ }
+
+ /* Flush if needed */
+ if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = sd_buf->buf.mem,
+ .offset = 0,
+ .size = FFALIGN(vp->slices_size,
+ ctx->s.props.properties.limits.nonCoherentAtomSize),
+ };
+
+ ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ ff_vk_ret2str(ret));
+ av_buffer_unref(&sd_ref);
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ vp->decode_info.srcBuffer = sd_buf->buf.buf;
+ vp->decode_info.srcBufferOffset = 0;
+ vp->decode_info.srcBufferRange = data_size;
+
+ /* Start command buffer recording */
+ ff_vk_exec_start(&ctx->s, exec);
+ cmd_buf = exec->buf;
+
+ /* Slices */
+ err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &sd_ref, 1, 0);
+ if (err < 0)
+ return err;
+
+ /* Parameters */
+ err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->session_params, 1, 0);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+ pic);
+ if (err < 0)
+ return err;
+
+ /* Output image - change layout, as it comes from a pool */
+ img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ .srcAccessMask = vkf->access[0],
+ .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
+ .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
+ .oldLayout = vkf->layout[0],
+ .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR,
+ .srcQueueFamilyIndex = vkf->queue_family[0],
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = vkf->img[0],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = vp->img_aspect,
+ .layerCount = 1,
+ .levelCount = 1,
+ },
+ };
+ ff_vk_exec_update_frame(&ctx->s, exec, pic,
+ &img_bar[nb_img_bar], &nb_img_bar);
+
+ /* Reference for the current image, if existing and not layered */
+ if (vp->dpb_frame) {
+ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ if (err < 0)
+ return err;
+ }
+
+ if (!layered_dpb) {
+ /* All references (apart from the current) for non-layered refs */
+
+ for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+ AVFrame *ref_frame = rpic[i];
+ FFVulkanDecodePicture *rvp = rvkp[i];
+ AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame;
+
+ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ if (err < 0)
+ return err;
+
+ if (err == 0) {
+ err = ff_vk_exec_mirror_sem_value(&ctx->s, exec,
+ &rvp->sem, &rvp->sem_value,
+ ref);
+ if (err < 0)
+ return err;
+ }
+
+ if (!rvp->dpb_frame) {
+ AVVkFrame *rvkf = (AVVkFrame *)ref->data;
+
+ img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ .srcAccessMask = rvkf->access[0],
+ .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
+ .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR |
+ VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
+ .oldLayout = rvkf->layout[0],
+ .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR,
+ .srcQueueFamilyIndex = rvkf->queue_family[0],
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = rvkf->img[0],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = rvp->img_aspect_ref,
+ .layerCount = 1,
+ .levelCount = 1,
+ },
+ };
+ ff_vk_exec_update_frame(&ctx->s, exec, ref,
+ &img_bar[nb_img_bar], &nb_img_bar);
+ }
+ }
+ } else if (vp->decode_info.referenceSlotCount ||
+ vp->img_view_out != vp->img_view_ref) {
+ /* Single barrier for a single layered ref */
+ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->layered_frame,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ if (err < 0)
+ return err;
+ }
+
+ /* Change image layout */
+ vk->CmdPipelineBarrier2KHR(cmd_buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ /* Start, use parameters, decode and end decoding */
+ vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start);
+
+ /* Start status query TODO: remove check when radv gets support */
+ if (ctx->exec_pool.nb_queries)
+ vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0);
+
+ vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info);
+
+ /* End status query */
+ if (ctx->exec_pool.nb_queries)
+ vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0);
+
+ vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end);
+
+ /* End recording and submit for execution */
+ return ff_vk_exec_submit(&ctx->s, exec);
+}
+
+void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp)
+{
+ FFVulkanFunctions *vk;
+ VkSemaphoreWaitInfo sem_wait;
+
+ // TODO: investigate why this happens
+ if (!ctx) {
+ av_freep(&vp->slices);
+ av_freep(&vp->slice_off);
+ av_frame_free(&vp->dpb_frame);
+ return;
+ }
+
+ vk = &ctx->s.vkfn;
+
+ /* We do not have to lock the frame here because we're not interested
+ * in the actual current semaphore value, but only that it's later than
+ * the time we submitted the image for decoding. */
+ sem_wait = (VkSemaphoreWaitInfo) {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+ .pSemaphores = &vp->sem,
+ .pValues = &vp->sem_value,
+ .semaphoreCount = 1,
+ };
+
+ if (vp->sem)
+ vk->WaitSemaphores(ctx->s.hwctx->act_dev, &sem_wait, UINT64_MAX);
+
+ /* Free slices data
+ * TODO: use a pool in the decode context instead to avoid per-frame allocs. */
+ av_freep(&vp->slices);
+ av_freep(&vp->slice_off);
+
+ /* Destroy image view (out) */
+ if (vp->img_view_out != vp->img_view_ref && vp->img_view_out)
+ vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_out, ctx->s.hwctx->alloc);
+
+ /* Destroy image view (ref, unlayered) */
+ if (vp->dpb_vkf && vp->img_view_ref)
+ vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_ref, ctx->s.hwctx->alloc);
+
+ av_frame_free(&vp->dpb_frame);
+}
+
+/* Since to even get decoder capabilities, we have to initialize quite a lot,
+ * this function does initialization and saves it to hwaccel_priv_data if
+ * available. */
+static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref,
+ int *width_align, int *height_align,
+ enum AVPixelFormat *pix_fmt, int *dpb_dedicate)
+{
+ VkResult ret;
+ int err, max_level, score = INT32_MAX;
+ const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id];
+ AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
+ AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data;
+ AVVulkanDeviceContext *hwctx = device->hwctx;
+ enum AVPixelFormat context_format = frames->sw_format;
+ int context_format_was_found = 0;
+ int base_profile, cur_profile = avctx->profile;
+
+ int dedicated_dpb;
+ int layered_dpb;
+
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanExtensions local_extensions = 0x0;
+ FFVulkanExtensions *extensions = ctx ? &ctx->s.extensions : &local_extensions;
+ FFVulkanFunctions local_vk = { 0 };
+ FFVulkanFunctions *vk = ctx ? &ctx->s.vkfn : &local_vk;
+ VkVideoCapabilitiesKHR local_caps = { 0 };
+ VkVideoCapabilitiesKHR *caps = ctx ? &ctx->common.caps : &local_caps;
+ VkVideoDecodeCapabilitiesKHR local_dec_caps = { 0 };
+ VkVideoDecodeCapabilitiesKHR *dec_caps = ctx ? &ctx->dec_caps : &local_dec_caps;
+ VkVideoDecodeUsageInfoKHR local_usage = { 0 };
+ VkVideoDecodeUsageInfoKHR *usage = ctx ? &ctx->usage : &local_usage;
+ VkVideoProfileInfoKHR local_profile = { 0 };
+ VkVideoProfileInfoKHR *profile = ctx ? &ctx->profile : &local_profile;
+ VkVideoProfileListInfoKHR local_profile_list = { 0 };
+ VkVideoProfileListInfoKHR *profile_list = ctx ? &ctx->profile_list : &local_profile_list;
+
+ VkVideoDecodeH264ProfileInfoKHR local_h264_profile = { 0 };
+ VkVideoDecodeH264ProfileInfoKHR *h264_profile = ctx ? &ctx->h264_profile : &local_h264_profile;
+
+ VkVideoDecodeH264ProfileInfoKHR local_h265_profile = { 0 };
+ VkVideoDecodeH264ProfileInfoKHR *h265_profile = ctx ? &ctx->h265_profile : &local_h265_profile;
+
+ VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
+ .pNext = profile_list,
+ };
+ VkVideoDecodeH264CapabilitiesKHR h264_caps = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR,
+ };
+ VkVideoDecodeH265CapabilitiesKHR h265_caps = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR,
+ };
+ VkVideoFormatPropertiesKHR *ret_info;
+ uint32_t nb_out_fmts = 0;
+
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+ if (!desc)
+ return AVERROR(EINVAL);
+
+ if (ctx && ctx->init)
+ return 0;
+
+ if (!vk_codec->decode_op)
+ return AVERROR(EINVAL);
+
+ *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions,
+ hwctx->nb_enabled_dev_extensions);
+
+ if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
+ av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
+ VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
+ return AVERROR(ENOSYS);
+ } else if (!vk_codec->decode_extension) {
+ av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n",
+ avcodec_get_name(avctx->codec_id));
+ return AVERROR(ENOSYS);
+ } else if (!(vk_codec->decode_extension & *extensions)) {
+ av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n",
+ avcodec_get_name(avctx->codec_id));
+ return AVERROR(ENOSYS);
+ }
+
+ err = ff_vk_load_functions(device, vk, *extensions, 1, 1);
+ if (err < 0)
+ return err;
+
+repeat:
+ if (avctx->codec_id == AV_CODEC_ID_H264) {
+ base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;
+ dec_caps->pNext = &h264_caps;
+ usage->pNext = h264_profile;
+ h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR;
+ h264_profile->stdProfileIdc = cur_profile;
+ h264_profile->pictureLayout = avctx->field_order == AV_FIELD_PROGRESSIVE ?
+ VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR :
+ VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR;
+ } else if (avctx->codec_id == AV_CODEC_ID_H265) {
+ base_profile = FF_PROFILE_HEVC_MAIN;
+ dec_caps->pNext = &h265_caps;
+ usage->pNext = h265_profile;
+ h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
+ h265_profile->stdProfileIdc = cur_profile;
+ }
+
+ usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR;
+ usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR;
+
+ profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
+ /* NOTE: NVIDIA's implementation fails if the USAGE hint is inserted.
+ * Remove this once it's fixed. */
+ profile->pNext = usage->pNext;
+ profile->videoCodecOperation = vk_codec->decode_op;
+ profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc);
+ profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth);
+ profile->chromaBitDepth = profile->lumaBitDepth;
+
+ profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
+ profile_list->profileCount = 1;
+ profile_list->pProfiles = profile;
+
+ /* Get the capabilities of the decoder for the given profile */
+ caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
+ caps->pNext = dec_caps;
+ dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR;
+ /* dec_caps->pNext already filled in */
+
+ ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile,
+ caps);
+ if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR &&
+ avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH &&
+ cur_profile != base_profile) {
+ cur_profile = base_profile;
+ av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting "
+ "again with profile %s\n",
+ avcodec_get_name(avctx->codec_id),
+ avcodec_profile_name(avctx->codec_id, avctx->profile),
+ avcodec_profile_name(avctx->codec_id, base_profile));
+ goto repeat;
+ } else if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) {
+ av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: "
+ "%s profile \"%s\" not supported!\n",
+ avcodec_get_name(avctx->codec_id),
+ avcodec_profile_name(avctx->codec_id, cur_profile));
+ return AVERROR(EINVAL);
+ } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) {
+ av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: "
+ "format (%s) not supported!\n",
+ av_get_pix_fmt_name(avctx->sw_pix_fmt));
+ return AVERROR(EINVAL);
+ } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT ||
+ ret == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+ return AVERROR(EINVAL);
+ } else if (ret != VK_SUCCESS) {
+ return AVERROR_EXTERNAL;
+ }
+
+ max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc :
+ avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc :
+ 0;
+
+ if (ctx) {
+ av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n",
+ avcodec_get_name(avctx->codec_id),
+ avcodec_profile_name(avctx->codec_id, avctx->profile));
+ av_log(avctx, AV_LOG_VERBOSE, " Maximum level: %i\n",
+ max_level);
+ av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n",
+ caps->minCodedExtent.width, caps->maxCodedExtent.width);
+ av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n",
+ caps->minCodedExtent.height, caps->maxCodedExtent.height);
+ av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n",
+ caps->pictureAccessGranularity.width);
+ av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n",
+ caps->pictureAccessGranularity.height);
+ av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n",
+ caps->minBitstreamBufferOffsetAlignment);
+ av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n",
+ caps->minBitstreamBufferSizeAlignment);
+ av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n",
+ caps->maxDpbSlots);
+ av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n",
+ caps->maxActiveReferencePictures);
+ av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n",
+ CODEC_VER(caps->stdHeaderVersion.specVersion),
+ CODEC_VER(dec_ext[avctx->codec_id]->specVersion));
+ av_log(avctx, AV_LOG_VERBOSE, " Decode modes:%s%s%s\n",
+ dec_caps->flags ? "" :
+ " invalid",
+ dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ?
+ " reuse_dst_dpb" : "",
+ dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ?
+ " dedicated_dpb" : "");
+ av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n",
+ caps->flags ? "" :
+ " none",
+ caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ?
+ " protected" : "",
+ caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ?
+ " separate_references" : "");
+ }
+
+ /* Check if decoding is possible with the given parameters */
+ if (avctx->coded_width < caps->minCodedExtent.width ||
+ avctx->coded_height < caps->minCodedExtent.height ||
+ avctx->coded_width > caps->maxCodedExtent.width ||
+ avctx->coded_height > caps->maxCodedExtent.height)
+ return AVERROR(EINVAL);
+
+ if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) &&
+ avctx->level > max_level)
+ return AVERROR(EINVAL);
+
+ /* Some basic sanity checking */
+ if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR |
+ VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) {
+ av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither "
+ "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor "
+ "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n");
+ return AVERROR_EXTERNAL;
+ } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR |
+ VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) ==
+ VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) &&
+ !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) {
+ av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: "
+ "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set "
+ "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ /* TODO: make dedicated_dpb tunable */
+ dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR);
+ layered_dpb = !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
+
+ if (dedicated_dpb) {
+ fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+ } else {
+ fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
+ VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT;
+ }
+
+ /* Get the format of the images necessary */
+ ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev,
+ &fmt_info,
+ &nb_out_fmts, NULL);
+ if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
+ (!nb_out_fmts && ret == VK_SUCCESS)) {
+ return AVERROR(EINVAL);
+ } else if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts);
+ if (!ret_info)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < nb_out_fmts; i++)
+ ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR;
+
+ ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev,
+ &fmt_info,
+ &nb_out_fmts, ret_info);
+ if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
+ (!nb_out_fmts && ret == VK_SUCCESS)) {
+ av_free(ret_info);
+ return AVERROR(EINVAL);
+ } else if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
+ ff_vk_ret2str(ret));
+ av_free(ret_info);
+ return AVERROR_EXTERNAL;
+ }
+
+ if (ctx) {
+ ctx->dedicated_dpb = dedicated_dpb;
+ ctx->layered_dpb = layered_dpb;
+ ctx->init = 1;
+ }
+
+ *pix_fmt = AV_PIX_FMT_NONE;
+
+ av_log(avctx, AV_LOG_DEBUG, "Pixel format list for decoding:\n");
+ for (int i = 0; i < nb_out_fmts; i++) {
+ int tmp_score;
+ enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format,
+ &tmp_score);
+ const AVPixFmtDescriptor *tmp_desc = av_pix_fmt_desc_get(tmp);
+ if (tmp == AV_PIX_FMT_NONE || !tmp_desc)
+ continue;
+
+ av_log(avctx, AV_LOG_DEBUG, " %i - %s (%i), score %i\n", i,
+ av_get_pix_fmt_name(tmp), ret_info[i].format, tmp_score);
+
+ if (context_format == tmp || tmp_score < score) {
+ if (ctx)
+ ctx->pic_format = ret_info[i].format;
+ *pix_fmt = tmp;
+ context_format_was_found |= context_format == tmp;
+ if (context_format_was_found)
+ break;
+ }
+ }
+
+ if (*pix_fmt == AV_PIX_FMT_NONE) {
+ av_log(avctx, AV_LOG_ERROR, "No valid pixel format for decoding!\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (width_align)
+ *width_align = caps->pictureAccessGranularity.width;
+ if (height_align)
+ *height_align = caps->pictureAccessGranularity.height;
+ if (dpb_dedicate)
+ *dpb_dedicate = dedicated_dpb;
+
+ av_free(ret_info);
+
+ av_log(avctx, AV_LOG_VERBOSE, "Chosen frames format: %s\n",
+ av_get_pix_fmt_name(*pix_fmt));
+
+ if (context_format != AV_PIX_FMT_NONE && !context_format_was_found) {
+ av_log(avctx, AV_LOG_ERROR, "Frames context had a pixel format set which "
+ "was not available for decoding into!\n");
+ return AVERROR(EINVAL);
+ }
+
+ return *pix_fmt == AV_PIX_FMT_NONE ? AVERROR(EINVAL) : 0;
+}
+
+int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
+{
+ int err, width_align, height_align, dedicated_dpb;
+ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
+ AVVulkanFramesContext *hwfc = frames_ctx->hwctx;
+
+ err = vulkan_decode_check_init(avctx, hw_frames_ctx, &width_align, &height_align,
+ &frames_ctx->sw_format, &dedicated_dpb);
+ if (err < 0)
+ return err;
+
+ frames_ctx->width = FFALIGN(avctx->coded_width, width_align);
+ frames_ctx->height = FFALIGN(avctx->coded_height, height_align);
+ frames_ctx->format = AV_PIX_FMT_VULKAN;
+
+ hwfc->tiling = VK_IMAGE_TILING_OPTIMAL;
+ hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
+
+ if (!dedicated_dpb)
+ hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+
+ return err;
+}
+
+void ff_vk_decode_free_params(void *opaque, uint8_t *data)
+{
+ FFVulkanDecodeContext *ctx = opaque;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+ VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data;
+ vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par,
+ ctx->s.hwctx->alloc);
+ av_free(par);
+}
+
+int ff_vk_decode_uninit(AVCodecContext *avctx)
+{
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanContext *s = &ctx->s;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ /* Wait on and free execution pool */
+ ff_vk_exec_pool_free(s, &ctx->exec_pool);
+
+ /* Destroy layered view */
+ if (ctx->layered_view)
+ vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc);
+
+ /* This also frees all references from this pool */
+ av_frame_free(&ctx->layered_frame);
+ av_buffer_unref(&ctx->dpb_hwfc_ref);
+
+ /* Destroy parameters */
+ if (ctx->empty_session_params)
+ vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
+ ctx->empty_session_params,
+ s->hwctx->alloc);
+
+ ff_vk_video_common_uninit(s, &ctx->common);
+
+ vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler,
+ s->hwctx->alloc);
+
+ av_buffer_pool_uninit(&ctx->tmp_pool);
+
+ ff_vk_uninit(s);
+
+ return 0;
+}
+
+int ff_vk_decode_init(AVCodecContext *avctx)
+{
+ int err, qf, cxpos = 0, cypos = 0, nb_q = 0;
+ VkResult ret;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanContext *s = &ctx->s;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ };
+ VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ };
+ VkVideoSessionParametersCreateInfoKHR session_params_create = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params :
+ avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params :
+ NULL,
+ };
+ VkVideoSessionCreateInfoKHR session_create = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR,
+ };
+ VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO,
+ .components = ff_comp_identity_map,
+ .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY,
+ .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */
+ };
+
+ err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN);
+ if (err < 0)
+ return err;
+
+ s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
+ s->frames = (AVHWFramesContext *)s->frames_ref->data;
+ s->hwfc = s->frames->hwctx;
+
+ s->device_ref = av_buffer_ref(s->frames->device_ref);
+ s->device = (AVHWDeviceContext *)s->device_ref->data;
+ s->hwctx = s->device->hwctx;
+
+ /* Get parameters, capabilities and final pixel/vulkan format */
+ err = vulkan_decode_check_init(avctx, s->frames_ref, NULL, NULL,
+ &ctx->sw_format, NULL);
+ if (err < 0)
+ goto fail;
+
+ /* Load all properties */
+ err = ff_vk_load_props(s);
+ if (err < 0)
+ goto fail;
+
+ /* Create queue context */
+ qf = ff_vk_qf_init(s, &ctx->qf_dec, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+
+ /* Check for support */
+ if (!(s->video_props[qf].videoCodecOperations &
+ ff_vk_codec_map[avctx->codec_id].decode_op)) {
+ av_log(avctx, AV_LOG_ERROR, "Decoding %s not supported on the given "
+ "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf);
+ return AVERROR(EINVAL);
+ }
+
+ /* TODO: enable when stable and tested. */
+ if (s->query_props[qf].queryResultStatusSupport)
+ nb_q = 1;
+
+ /* Create decode exec context.
+ * 4 async contexts per thread seems like a good number. */
+ err = ff_vk_exec_pool_init(s, &ctx->qf_dec, &ctx->exec_pool, 4*avctx->thread_count,
+ nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0,
+ &ctx->profile);
+ if (err < 0)
+ goto fail;
+
+ session_create.pVideoProfile = &ctx->profile;
+ session_create.flags = 0x0;
+ session_create.queueFamilyIndex = s->hwctx->queue_family_decode_index;
+ session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent;
+ session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots;
+ session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures;
+ session_create.pictureFormat = ctx->pic_format;
+ session_create.referencePictureFormat = session_create.pictureFormat;
+ session_create.pStdHeaderVersion = dec_ext[avctx->codec_id];
+
+ err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
+ if (err < 0)
+ goto fail;
+
+ /* Get sampler */
+ av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location);
+ yuv_sampler_info.xChromaOffset = cxpos >> 7;
+ yuv_sampler_info.yChromaOffset = cypos >> 7;
+ yuv_sampler_info.format = ctx->pic_format;
+ ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info,
+ s->hwctx->alloc, &ctx->yuv_sampler);
+ if (ret != VK_SUCCESS) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ /* If doing an out-of-place decoding, create a DPB pool */
+ if (ctx->dedicated_dpb) {
+ AVHWFramesContext *dpb_frames;
+ AVVulkanFramesContext *dpb_hwfc;
+
+ ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->device_ref);
+ if (!ctx->dpb_hwfc_ref) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
+ dpb_frames->format = s->frames->format;
+ dpb_frames->sw_format = s->frames->sw_format;
+ dpb_frames->width = s->frames->width;
+ dpb_frames->height = s->frames->height;
+
+ dpb_hwfc = dpb_frames->hwctx;
+ dpb_hwfc->create_pnext = &ctx->profile_list;
+ dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL;
+ dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
+ VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */
+
+ if (ctx->layered_dpb)
+ dpb_hwfc->nb_layers = ctx->common.caps.maxDpbSlots;
+
+ err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref);
+ if (err < 0)
+ goto fail;
+
+ if (ctx->layered_dpb) {
+ ctx->layered_frame = vk_get_dpb_pool(ctx);
+ if (!ctx->layered_frame) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ err = vk_decode_create_view(ctx, &ctx->layered_view, &ctx->layered_aspect,
+ (AVVkFrame *)ctx->layered_frame->data);
+ if (err < 0)
+ goto fail;
+ }
+ }
+
+ session_params_create.videoSession = ctx->common.session;
+ ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
+ s->hwctx->alloc, &ctx->empty_session_params);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_decode_flush(avctx);
+
+ av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n");
+
+ return 0;
+
+fail:
+ ff_vk_decode_uninit(avctx);
+
+ return err;
+}
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
new file mode 100644
index 0000000000..9f9676bbfa
--- /dev/null
+++ b/libavcodec/vulkan_decode.h
@@ -0,0 +1,163 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_DECODE_H
+#define AVCODEC_VULKAN_DECODE_H
+
+#include "decode.h"
+#include "hwconfig.h"
+#include "internal.h"
+
+#include "vulkan_video.h"
+
+typedef struct FFVulkanDecodeContext {
+ FFVulkanContext s;
+ FFVkVideoCommon common;
+
+ int dedicated_dpb; /* Oddity #1 - separate DPB images */
+ int layered_dpb; /* Madness #1 - layered DPB images */
+
+ AVBufferRef *dpb_hwfc_ref; /* Only used for dedicated_dpb */
+
+ AVFrame *layered_frame; /* Only used for layered_dpb */
+ VkImageView layered_view;
+ VkImageAspectFlags layered_aspect;
+
+ VkVideoDecodeH264ProfileInfoKHR h264_profile;
+ VkVideoDecodeH264ProfileInfoKHR h265_profile;
+ VkVideoSessionParametersKHR empty_session_params;
+
+ VkSamplerYcbcrConversion yuv_sampler;
+ VkVideoDecodeUsageInfoKHR usage;
+ VkVideoProfileInfoKHR profile;
+ VkVideoDecodeCapabilitiesKHR dec_caps;
+ VkVideoProfileListInfoKHR profile_list;
+ VkFormat pic_format;
+ enum AVPixelFormat sw_format;
+ int init;
+
+ AVBufferRef *session_params;
+
+ FFVkQueueFamilyCtx qf_dec;
+ FFVkExecPool exec_pool;
+
+ AVBufferPool *tmp_pool; /* Pool for temporary data, if needed (HEVC) */
+ size_t tmp_pool_ele_size;
+
+ uint16_t last_ref_frames_in_use;
+} FFVulkanDecodeContext;
+
+typedef struct FFVulkanDecodePicture {
+ AVFrame *dpb_frame; /* Only used for out-of-place decoding. */
+ AVVkFrame *dpb_vkf; /* Only used for out-of-place decoding. */
+
+ VkImageView img_view_ref; /* Image representation view (reference) */
+ VkImageView img_view_out; /* Image representation view (output-only) */
+ VkImageAspectFlags img_aspect; /* Image plane mask bits */
+ VkImageAspectFlags img_aspect_ref; /* Only used for out-of-place decoding */
+
+ VkSemaphore sem;
+ uint64_t sem_value;
+
+ /* State */
+ int update_params;
+ AVBufferRef *session_params;
+
+ /* Current picture */
+ VkVideoPictureResourceInfoKHR ref;
+ VkVideoReferenceSlotInfoKHR ref_slot;
+
+ /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */
+ VkVideoPictureResourceInfoKHR refs [36];
+ VkVideoReferenceSlotInfoKHR ref_slots[36];
+
+ /* Main decoding struct */
+ AVBufferRef *params_buf;
+ VkVideoDecodeInfoKHR decode_info;
+
+ /* Slice data */
+ uint8_t *slices;
+ size_t slices_size;
+ unsigned int slices_size_max;
+ uint32_t *slice_off;
+ unsigned int slice_off_max;
+ uint32_t nb_slices;
+} FFVulkanDecodePicture;
+
+/**
+ * Initialize decoder.
+ */
+int ff_vk_decode_init(AVCodecContext *avctx);
+
+/**
+ * Initialize hw_frames_ctx with the parameters needed to decode the stream
+ * using the parameters from avctx.
+ *
+ * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize
+ * the context.
+ */
+int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+
+/**
+ * Prepare a frame, creates the image view, and sets up the dpb fields.
+ */
+int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic,
+ FFVulkanDecodePicture *vkpic, int is_current,
+ int alloc_dpb);
+
+/**
+ * Add slice data to frame.
+ */
+int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp,
+ const uint8_t *data, size_t size, int add_startcode,
+ uint32_t *nb_slices, const uint32_t **offsets);
+
+/**
+ * Decode a frame.
+ */
+int ff_vk_decode_frame(AVCodecContext *avctx,
+ AVFrame *pic, FFVulkanDecodePicture *vp,
+ AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]);
+
+/**
+ * Free a frame and its state.
+ */
+void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp);
+
+/**
+ * Get an FFVkBuffer suitable for decoding from.
+ */
+int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf,
+ void *create_pNext, size_t size);
+
+/**
+ * Free VkVideoSessionParametersKHR.
+ */
+void ff_vk_decode_free_params(void *opaque, uint8_t *data);
+
+/**
+ * Flush decoder.
+ */
+void ff_vk_decode_flush(AVCodecContext *avctx);
+
+/**
+ * Free decoder.
+ */
+int ff_vk_decode_uninit(AVCodecContext *avctx);
+
+#endif /* AVCODEC_VULKAN_DECODE_H */
--
2.39.2
[-- Attachment #70: 0069-h264dec-add-hwaccel_params_buf.patch --]
[-- Type: text/x-diff, Size: 2737 bytes --]
From e26c514b35f5c87321a8fa6c6eb70b54220a92ed Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 17 Jan 2023 05:01:45 +0100
Subject: [PATCH 69/72] h264dec: add hwaccel_params_buf
---
libavcodec/h264_slice.c | 4 ++++
libavcodec/h264dec.c | 4 ++++
libavcodec/h264dec.h | 2 ++
3 files changed, 10 insertions(+)
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index c0aa31bcd9..0c7f80c018 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -347,6 +347,10 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
return ret;
}
+ ret = av_buffer_replace(&h->hwaccel_params_buf, h1->hwaccel_params_buf);
+ if (ret < 0)
+ return ret;
+
ret = av_buffer_replace(&h->ps.pps_ref, h1->ps.pps_ref);
if (ret < 0)
return ret;
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 995bf17a8f..f6059da950 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -341,6 +341,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
H264Context *h = avctx->priv_data;
int i;
+ av_buffer_unref(&h->hwaccel_params_buf);
ff_h264_remove_all_refs(h);
ff_h264_free_tables(h);
@@ -470,6 +471,7 @@ static void h264_decode_flush(AVCodecContext *avctx)
ff_h264_flush_change(h);
ff_h264_sei_uninit(&h->sei);
+ av_buffer_unref(&h->hwaccel_params_buf);
for (i = 0; i < H264_MAX_PICTURE_COUNT; i++)
ff_h264_unref_picture(h, &h->DPB[i]);
@@ -669,6 +671,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
avpriv_request_sample(avctx, "data partitioning");
break;
case H264_NAL_SEI:
+ av_buffer_unref(&h->hwaccel_params_buf);
if (h->setup_finished) {
avpriv_request_sample(avctx, "Late SEI");
break;
@@ -682,6 +685,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
break;
case H264_NAL_SPS: {
GetBitContext tmp_gb = nal->gb;
+ av_buffer_unref(&h->hwaccel_params_buf);
if (avctx->hwaccel && avctx->hwaccel->decode_params) {
ret = avctx->hwaccel->decode_params(avctx,
nal->type,
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 1b18aba71f..5b1620c3f1 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -342,6 +342,8 @@ typedef struct H264Context {
H264Picture cur_pic;
H264Picture last_pic_for_ec;
+ AVBufferRef *hwaccel_params_buf;
+
H264SliceContext *slice_ctx;
int nb_slice_ctx;
int nb_slice_ctx_queued;
--
2.39.2
[-- Attachment #71: 0070-h264dec-add-Vulkan-hwaccel.patch --]
[-- Type: text/x-diff, Size: 27544 bytes --]
From b5ff58808482bedf12b981ee1c03dd95099a9332 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 01:13:01 +0100
Subject: [PATCH 70/72] h264dec: add Vulkan hwaccel
Thanks to Dave Airlie for figuring out a lot of the parameters.
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/h264_slice.c | 12 +-
libavcodec/h264dec.c | 3 +
libavcodec/hwaccels.h | 1 +
libavcodec/vulkan_h264.c | 521 +++++++++++++++++++++++++++++++++++++++
6 files changed, 539 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan_h264.c
diff --git a/configure b/configure
index 91f715351c..60973c38b3 100755
--- a/configure
+++ b/configure
@@ -3034,6 +3034,8 @@ h264_vdpau_hwaccel_deps="vdpau"
h264_vdpau_hwaccel_select="h264_decoder"
h264_videotoolbox_hwaccel_deps="videotoolbox"
h264_videotoolbox_hwaccel_select="h264_decoder"
+h264_vulkan_hwaccel_deps="vulkan"
+h264_vulkan_hwaccel_select="h264_decoder"
hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
hevc_d3d11va_hwaccel_select="hevc_decoder"
hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index eabf4eb43e..4c9db167a5 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -992,6 +992,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o
OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o
OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
+OBJS-$(CONFIG_H264_VULKAN_HWACCEL) += vulkan_decode.o vulkan_h264.o
OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o
OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o
OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 0c7f80c018..50d138e2a9 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -782,7 +782,8 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
CONFIG_H264_NVDEC_HWACCEL + \
CONFIG_H264_VAAPI_HWACCEL + \
CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
- CONFIG_H264_VDPAU_HWACCEL)
+ CONFIG_H264_VDPAU_HWACCEL + \
+ CONFIG_H264_VULKAN_HWACCEL)
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
const enum AVPixelFormat *choices = pix_fmts;
int i;
@@ -803,6 +804,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
#if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
if (h->avctx->colorspace != AVCOL_SPC_RGB)
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_H264_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
if (CHROMA444(h)) {
if (h->avctx->colorspace == AVCOL_SPC_RGB) {
@@ -815,6 +819,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
*fmt++ = AV_PIX_FMT_YUV420P10;
break;
case 12:
+#if CONFIG_H264_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
if (CHROMA444(h)) {
if (h->avctx->colorspace == AVCOL_SPC_RGB) {
*fmt++ = AV_PIX_FMT_GBRP12;
@@ -840,6 +847,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
#if CONFIG_H264_VDPAU_HWACCEL
*fmt++ = AV_PIX_FMT_VDPAU;
#endif
+#if CONFIG_H264_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
#if CONFIG_H264_NVDEC_HWACCEL
*fmt++ = AV_PIX_FMT_CUDA;
#endif
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index f6059da950..15a6e74829 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -1100,6 +1100,9 @@ const FFCodec ff_h264_decoder = {
#endif
#if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
HWACCEL_VIDEOTOOLBOX(h264),
+#endif
+#if CONFIG_H264_VULKAN_HWACCEL
+ HWACCEL_VULKAN(h264),
#endif
NULL
},
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index aca55831f3..23d0843c76 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -36,6 +36,7 @@ extern const AVHWAccel ff_h264_nvdec_hwaccel;
extern const AVHWAccel ff_h264_vaapi_hwaccel;
extern const AVHWAccel ff_h264_vdpau_hwaccel;
extern const AVHWAccel ff_h264_videotoolbox_hwaccel;
+extern const AVHWAccel ff_h264_vulkan_hwaccel;
extern const AVHWAccel ff_hevc_d3d11va_hwaccel;
extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
extern const AVHWAccel ff_hevc_dxva2_hwaccel;
diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c
new file mode 100644
index 0000000000..241a7d8f5b
--- /dev/null
+++ b/libavcodec/vulkan_h264.c
@@ -0,0 +1,521 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264dec.h"
+#include "h264_ps.h"
+
+#include "vulkan_decode.h"
+
+const VkExtensionProperties ff_vk_dec_h264_ext = {
+ .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME,
+ .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION,
+};
+
+typedef struct H264VulkanDecodePicture {
+ FFVulkanDecodeContext *ctx;
+ FFVulkanDecodePicture vp;
+
+ /* Current picture */
+ StdVideoDecodeH264ReferenceInfo h264_ref;
+ VkVideoDecodeH264DpbSlotInfoKHR vkh264_ref;
+
+ /* Picture refs */
+ H264Picture *ref_src [H264_MAX_PICTURE_COUNT];
+ StdVideoDecodeH264ReferenceInfo h264_refs [H264_MAX_PICTURE_COUNT];
+ VkVideoDecodeH264DpbSlotInfoKHR vkh264_refs[H264_MAX_PICTURE_COUNT];
+
+ /* Current picture (contd.) */
+ StdVideoDecodeH264PictureInfo h264pic;
+ VkVideoDecodeH264PictureInfoKHR h264_pic_info;
+} H264VulkanDecodePicture;
+
+static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
+ VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */
+ VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */
+ VkVideoDecodeH264DpbSlotInfoKHR *vkh264_ref, /* Goes in ^ */
+ StdVideoDecodeH264ReferenceInfo *h264_ref, /* Goes in ^ */
+ H264Picture *pic, int is_current, int picture_structure,
+ int dpb_slot_index)
+{
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vkpic = &hp->vp;
+
+ int err = ff_vk_decode_prepare_frame(ctx, pic->f, vkpic, is_current,
+ ctx->dedicated_dpb);
+ if (err < 0)
+ return err;
+
+ *h264_ref = (StdVideoDecodeH264ReferenceInfo) {
+ .FrameNum = pic->long_ref ? pic->pic_id : pic->frame_num, /* TODO: kinda sure */
+ .PicOrderCnt = { pic->field_poc[0], pic->field_poc[1] },
+ .flags = (StdVideoDecodeH264ReferenceInfoFlags) {
+ .top_field_flag = !!(picture_structure & PICT_TOP_FIELD),
+ .bottom_field_flag = !!(picture_structure & PICT_BOTTOM_FIELD),
+ .used_for_long_term_reference = pic->reference && pic->long_ref,
+ .is_non_existing = 0,
+ },
+ };
+
+ *vkh264_ref = (VkVideoDecodeH264DpbSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR,
+ .pStdReferenceInfo = h264_ref,
+ };
+
+ *ref = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
+ .baseArrayLayer = ctx->layered_dpb ? dpb_slot_index : 0,
+ .imageViewBinding = vkpic->img_view_ref,
+ };
+
+ *ref_slot = (VkVideoReferenceSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+ .pNext = vkh264_ref,
+ .slotIndex = dpb_slot_index, /* TODO: kinda sure */
+ .pPictureResource = ref,
+ };
+
+ if (ref_src)
+ *ref_src = pic;
+
+ return 0;
+}
+
+static void set_sps(const SPS *sps,
+ StdVideoH264ScalingLists *vksps_scaling,
+ StdVideoH264HrdParameters *vksps_vui_header,
+ StdVideoH264SequenceParameterSetVui *vksps_vui,
+ StdVideoH264SequenceParameterSet *vksps)
+{
+ *vksps_scaling = (StdVideoH264ScalingLists) {
+ .scaling_list_present_mask = sps->scaling_matrix_present_mask,
+ .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */
+ };
+
+ for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_matrix4[i],
+ STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**sps->scaling_matrix4));
+
+ for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_matrix8[i],
+ STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**sps->scaling_matrix8));
+
+ *vksps_vui_header = (StdVideoH264HrdParameters) {
+ .cpb_cnt_minus1 = sps->cpb_cnt - 1,
+ .bit_rate_scale = sps->bit_rate_scale,
+ .initial_cpb_removal_delay_length_minus1 = sps->initial_cpb_removal_delay_length - 1,
+ .cpb_removal_delay_length_minus1 = sps->cpb_removal_delay_length - 1,
+ .dpb_output_delay_length_minus1 = sps->dpb_output_delay_length - 1,
+ .time_offset_length = sps->time_offset_length,
+ };
+
+ for (int i = 0; i < sps->cpb_cnt; i++) {
+ vksps_vui_header->bit_rate_value_minus1[i] = sps->bit_rate_value[i] - 1;
+ vksps_vui_header->cpb_size_value_minus1[i] = sps->cpb_size_value[i] - 1;
+ vksps_vui_header->cbr_flag[i] = sps->cpr_flag[i];
+ }
+
+ *vksps_vui = (StdVideoH264SequenceParameterSetVui) {
+ .aspect_ratio_idc = sps->vui.aspect_ratio_idc,
+ .sar_width = sps->vui.sar.num,
+ .sar_height = sps->vui.sar.den,
+ .video_format = sps->vui.video_format,
+ .colour_primaries = sps->vui.colour_primaries,
+ .transfer_characteristics = sps->vui.transfer_characteristics,
+ .matrix_coefficients = sps->vui.matrix_coeffs,
+ .num_units_in_tick = sps->num_units_in_tick,
+ .time_scale = sps->time_scale,
+ .pHrdParameters = vksps_vui_header,
+ .max_num_reorder_frames = sps->num_reorder_frames,
+ .max_dec_frame_buffering = sps->max_dec_frame_buffering,
+ .flags = (StdVideoH264SpsVuiFlags) {
+ .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag,
+ .overscan_info_present_flag = sps->vui.overscan_info_present_flag,
+ .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag,
+ .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag,
+ .video_full_range_flag = sps->vui.video_full_range_flag,
+ .color_description_present_flag = sps->vui.colour_description_present_flag,
+ .chroma_loc_info_present_flag = sps->vui.chroma_location,
+ .timing_info_present_flag = sps->timing_info_present_flag,
+ .fixed_frame_rate_flag = sps->fixed_frame_rate_flag,
+ .bitstream_restriction_flag = sps->bitstream_restriction_flag,
+ .nal_hrd_parameters_present_flag = sps->nal_hrd_parameters_present_flag,
+ .vcl_hrd_parameters_present_flag = sps->vcl_hrd_parameters_present_flag,
+ },
+ };
+
+ *vksps = (StdVideoH264SequenceParameterSet) {
+ .profile_idc = sps->profile_idc,
+ .level_idc = sps->level_idc,
+ .seq_parameter_set_id = sps->sps_id,
+ .chroma_format_idc = sps->chroma_format_idc,
+ .bit_depth_luma_minus8 = sps->bit_depth_luma - 8,
+ .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8,
+ .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4,
+ .pic_order_cnt_type = sps->poc_type,
+ .log2_max_pic_order_cnt_lsb_minus4 = sps->poc_type ? 0 : sps->log2_max_poc_lsb - 4,
+ .offset_for_non_ref_pic = sps->offset_for_non_ref_pic,
+ .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field,
+ .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length,
+ .max_num_ref_frames = sps->ref_frame_count,
+ .pic_width_in_mbs_minus1 = sps->mb_width - 1,
+ .pic_height_in_map_units_minus1 = (sps->mb_height/(2 - sps->frame_mbs_only_flag)) - 1,
+ .frame_crop_left_offset = sps->crop_left,
+ .frame_crop_right_offset = sps->crop_right,
+ .frame_crop_top_offset = sps->crop_top,
+ .frame_crop_bottom_offset = sps->crop_bottom,
+ .flags = (StdVideoH264SpsFlags) {
+ .constraint_set0_flag = (sps->constraint_set_flags >> 0) & 0x1,
+ .constraint_set1_flag = (sps->constraint_set_flags >> 1) & 0x1,
+ .constraint_set2_flag = (sps->constraint_set_flags >> 2) & 0x1,
+ .constraint_set3_flag = (sps->constraint_set_flags >> 3) & 0x1,
+ .constraint_set4_flag = (sps->constraint_set_flags >> 4) & 0x1,
+ .constraint_set5_flag = (sps->constraint_set_flags >> 5) & 0x1,
+ .direct_8x8_inference_flag = sps->direct_8x8_inference_flag,
+ .mb_adaptive_frame_field_flag = sps->mb_aff,
+ .frame_mbs_only_flag = sps->frame_mbs_only_flag,
+ .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag,
+ .separate_colour_plane_flag = sps->residual_color_transform_flag,
+ .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag,
+ .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass,
+ .frame_cropping_flag = sps->crop,
+ .seq_scaling_matrix_present_flag = sps->scaling_matrix_present,
+ .vui_parameters_present_flag = sps->vui_parameters_present_flag,
+ },
+ .pOffsetForRefFrame = sps->offset_for_ref_frame,
+ .pScalingLists = vksps_scaling,
+ .pSequenceParameterSetVui = vksps_vui,
+ };
+}
+
+static void set_pps(const PPS *pps, const SPS *sps,
+ StdVideoH264ScalingLists *vkpps_scaling,
+ StdVideoH264PictureParameterSet *vkpps)
+{
+ *vkpps_scaling = (StdVideoH264ScalingLists) {
+ .scaling_list_present_mask = pps->pic_scaling_matrix_present_mask,
+ .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */
+ };
+
+ for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_matrix4[i],
+ STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**pps->scaling_matrix4));
+
+ for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_matrix8[i],
+ STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**pps->scaling_matrix8));
+
+ *vkpps = (StdVideoH264PictureParameterSet) {
+ .seq_parameter_set_id = pps->sps_id,
+ .pic_parameter_set_id = pps->pps_id,
+ .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1,
+ .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1,
+ .weighted_bipred_idc = pps->weighted_bipred_idc,
+ .pic_init_qp_minus26 = pps->init_qp - 26,
+ .pic_init_qs_minus26 = pps->init_qs - 26,
+ .chroma_qp_index_offset = pps->chroma_qp_index_offset[0],
+ .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1],
+ .flags = (StdVideoH264PpsFlags) {
+ .transform_8x8_mode_flag = pps->transform_8x8_mode,
+ .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present,
+ .constrained_intra_pred_flag = pps->constrained_intra_pred,
+ .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present,
+ .weighted_pred_flag = pps->weighted_pred,
+ .bottom_field_pic_order_in_frame_present_flag = pps->pic_order_present,
+ .entropy_coding_mode_flag = pps->cabac,
+ .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag,
+ },
+ .pScalingLists = vkpps_scaling,
+ };
+}
+
+static int vk_h264_create_params(AVCodecContext *avctx, AVBufferRef **buf)
+{
+ VkResult ret;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+ const H264Context *h = avctx->priv_data;
+
+ /* SPS */
+ StdVideoH264ScalingLists vksps_scaling[MAX_SPS_COUNT];
+ StdVideoH264HrdParameters vksps_vui_header[MAX_SPS_COUNT];
+ StdVideoH264SequenceParameterSetVui vksps_vui[MAX_SPS_COUNT];
+ StdVideoH264SequenceParameterSet vksps[MAX_SPS_COUNT];
+
+ /* PPS */
+ StdVideoH264ScalingLists vkpps_scaling[MAX_PPS_COUNT];
+ StdVideoH264PictureParameterSet vkpps[MAX_PPS_COUNT];
+
+ VkVideoDecodeH264SessionParametersAddInfoKHR h264_params_info = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR,
+ .pStdSPSs = vksps,
+ .stdSPSCount = 0,
+ .pStdPPSs = vkpps,
+ .stdPPSCount = 0,
+ };
+ VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pParametersAddInfo = &h264_params_info,
+ };
+ VkVideoSessionParametersCreateInfoKHR session_params_create = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pNext = &h264_params,
+ .videoSession = ctx->common.session,
+ .videoSessionParametersTemplate = NULL,
+ };
+
+ AVBufferRef *tmp;
+ VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par));
+ if (!par)
+ return AVERROR(ENOMEM);
+
+ /* SPS list */
+ for (int i = 0; h->ps.sps_list[i]; i++) {
+ const SPS *sps_l = (const SPS *)h->ps.sps_list[i]->data;
+ set_sps(sps_l, &vksps_scaling[i], &vksps_vui_header[i], &vksps_vui[i], &vksps[i]);
+ h264_params_info.stdSPSCount++;
+ }
+
+ /* PPS list */
+ for (int i = 0; h->ps.pps_list[i]; i++) {
+ const PPS *pps_l = (const PPS *)h->ps.pps_list[i]->data;
+ set_pps(pps_l, pps_l->sps, &vkpps_scaling[i], &vkpps[i]);
+ h264_params_info.stdPPSCount++;
+ }
+
+ h264_params.maxStdSPSCount = h264_params_info.stdSPSCount;
+ h264_params.maxStdPPSCount = h264_params_info.stdPPSCount;
+
+ /* Create session parameters */
+ ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create,
+ ctx->s.hwctx->alloc, par);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params,
+ ctx, 0);
+ if (!tmp) {
+ ff_vk_decode_free_params(ctx, (uint8_t *)par);
+ return AVERROR(ENOMEM);
+ }
+
+ av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS\n",
+ h264_params_info.stdSPSCount, h264_params_info.stdPPSCount);
+
+ *buf = tmp;
+
+ return 0;
+}
+
+static int vk_h264_start_frame(AVCodecContext *avctx,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ int dpb_slot_index = 0;
+ H264Context *h = avctx->priv_data;
+ H264Picture *pic = h->cur_pic_ptr;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+
+ if (!h->hwaccel_params_buf) {
+ err = vk_h264_create_params(avctx, &h->hwaccel_params_buf);
+ if (err < 0)
+ return err;
+ }
+
+ vp->session_params = av_buffer_ref(h->hwaccel_params_buf);
+ if (!vp->session_params)
+ return AVERROR(ENOMEM);
+
+ /* Fill in main slot */
+ dpb_slot_index = 0;
+ for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+ if (pic == &h->DPB[slot]) {
+ dpb_slot_index = slot;
+ break;
+ }
+ }
+
+ err = vk_h264_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref,
+ &hp->vkh264_ref, &hp->h264_ref, pic, 1,
+ h->picture_structure, dpb_slot_index);
+ if (err < 0)
+ return err;
+
+ /* Fill in short-term references */
+ for (int i = 0; i < h->short_ref_count; i++) {
+ dpb_slot_index = 0;
+ for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+ if (h->short_ref[i] == &h->DPB[slot]) {
+ dpb_slot_index = slot;
+ break;
+ }
+ }
+ err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i],
+ &vp->refs[i], &hp->vkh264_refs[i],
+ &hp->h264_refs[i], h->short_ref[i], 0,
+ h->DPB[dpb_slot_index].picture_structure,
+ dpb_slot_index);
+ if (err < 0)
+ return err;
+ }
+
+ /* Fill in long-term refs */
+ for (int r = 0, i = h->short_ref_count; i < h->short_ref_count + h->long_ref_count; i++, r++) {
+ dpb_slot_index = 0;
+ for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+ if (h->long_ref[i] == &h->DPB[slot]) {
+ dpb_slot_index = slot;
+ break;
+ }
+ }
+ err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i],
+ &vp->refs[i], &hp->vkh264_refs[i],
+ &hp->h264_refs[i], h->long_ref[r], 0,
+ h->DPB[dpb_slot_index].picture_structure,
+ dpb_slot_index);
+ if (err < 0)
+ return err;
+ }
+
+ hp->h264pic = (StdVideoDecodeH264PictureInfo) {
+ .seq_parameter_set_id = pic->pps->sps_id,
+ .pic_parameter_set_id = pic->pps->pps_id,
+ .frame_num = h->poc.frame_num,
+ .idr_pic_id = h->poc.idr_pic_id,
+ .PicOrderCnt[0] = pic->field_poc[0],
+ .PicOrderCnt[1] = pic->field_poc[1],
+ .flags = (StdVideoDecodeH264PictureInfoFlags) {
+ .field_pic_flag = FIELD_PICTURE(h),
+ .is_intra = 1,
+ .IdrPicFlag = h->picture_idr,
+ .bottom_field_flag = !!(h->picture_structure & PICT_BOTTOM_FIELD),
+ .is_reference = h->nal_ref_idc != 0,
+
+ // TODO: Not sure about this
+ .complementary_field_pair = h->first_field && FIELD_PICTURE(h),
+ },
+ };
+
+ hp->h264_pic_info = (VkVideoDecodeH264PictureInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR,
+ .pStdPictureInfo = &hp->h264pic,
+ .sliceCount = 0,
+ };
+
+ vp->decode_info = (VkVideoDecodeInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR,
+ .pNext = &hp->h264_pic_info,
+ .flags = 0x0,
+ .pSetupReferenceSlot = &vp->ref_slot,
+ .referenceSlotCount = h->short_ref_count + h->long_ref_count,
+ .pReferenceSlots = vp->ref_slots,
+ .dstPictureResource = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
+ .baseArrayLayer = 0,
+ .imageViewBinding = vp->img_view_out,
+ },
+ };
+
+ hp->ctx = ctx;
+
+ return 0;
+}
+
+static int vk_h264_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ const H264Context *h = avctx->priv_data;
+ const H264SliceContext *sl = &h->slice_ctx[0];
+ H264VulkanDecodePicture *hp = h->cur_pic_ptr->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+
+ int err = ff_vk_decode_add_slice(vp, data, size, 1,
+ &hp->h264_pic_info.sliceCount,
+ &hp->h264_pic_info.pSliceOffsets);
+ if (err < 0)
+ return err;
+
+ /* Frame is only intra of all slices are marked as intra */
+ if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI)
+ hp->h264pic.flags.is_intra = 0;
+
+ return 0;
+}
+
+static int vk_h264_end_frame(AVCodecContext *avctx)
+{
+ const H264Context *h = avctx->priv_data;
+ H264Picture *pic = h->cur_pic_ptr;
+ H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+ FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 };
+ AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 };
+
+ for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+ H264Picture *rp = hp->ref_src[i];
+ H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private;
+
+ rvp[i] = &rhp->vp;
+ rav[i] = hp->ref_src[i]->f;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n",
+ vp->slices_size, hp->h264_pic_info.sliceCount);
+
+ return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp);
+}
+
+static void vk_h264_free_frame_priv(AVCodecContext *avctx, void *data)
+{
+ H264VulkanDecodePicture *hp = data;
+
+ /* Free frame resources, this also destroys the session parameters. */
+ ff_vk_decode_free_frame(hp->ctx, &hp->vp);
+
+ /* Free frame context */
+ av_free(hp);
+}
+
+const AVHWAccel ff_h264_vulkan_hwaccel = {
+ .name = "h264_vulkan",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .id = AV_CODEC_ID_H264,
+ .pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_h264_start_frame,
+ .decode_slice = &vk_h264_decode_slice,
+ .end_frame = &vk_h264_end_frame,
+ .free_frame_priv = &vk_h264_free_frame_priv,
+ .frame_priv_data_size = sizeof(H264VulkanDecodePicture),
+ .init = &ff_vk_decode_init,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &ff_vk_decode_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
--
2.39.2
[-- Attachment #72: 0071-hevcdec-add-hwaccel_params_buf.patch --]
[-- Type: text/x-diff, Size: 2828 bytes --]
From 756f3a7daf18f402ec56a7f52ea8742d905edf18 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 17 Jan 2023 05:02:02 +0100
Subject: [PATCH 71/72] hevcdec: add hwaccel_params_buf
---
libavcodec/hevcdec.c | 9 +++++++++
libavcodec/hevcdec.h | 2 ++
2 files changed, 11 insertions(+)
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 7c9b46240c..5df831688c 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2969,6 +2969,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
switch (s->nal_unit_type) {
case HEVC_NAL_VPS:
+ av_buffer_unref(&s->hwaccel_params_buf);
if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
ret = s->avctx->hwaccel->decode_params(s->avctx,
nal->type,
@@ -2982,6 +2983,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
goto fail;
break;
case HEVC_NAL_SPS:
+ av_buffer_unref(&s->hwaccel_params_buf);
if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
ret = s->avctx->hwaccel->decode_params(s->avctx,
nal->type,
@@ -2996,6 +2998,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
goto fail;
break;
case HEVC_NAL_PPS:
+ av_buffer_unref(&s->hwaccel_params_buf);
if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
ret = s->avctx->hwaccel->decode_params(s->avctx,
nal->type,
@@ -3455,6 +3458,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
ff_dovi_ctx_unref(&s->dovi_ctx);
av_buffer_unref(&s->rpu_buf);
+ av_buffer_unref(&s->hwaccel_params_buf);
av_freep(&s->md5_ctx);
@@ -3606,6 +3610,10 @@ static int hevc_update_thread_context(AVCodecContext *dst,
if (ret < 0)
return ret;
+ ret = av_buffer_replace(&s->hwaccel_params_buf, s0->hwaccel_params_buf);
+ if (ret < 0)
+ return ret;
+
ret = av_buffer_replace(&s->rpu_buf, s0->rpu_buf);
if (ret < 0)
return ret;
@@ -3683,6 +3691,7 @@ static void hevc_decode_flush(AVCodecContext *avctx)
s->max_ra = INT_MAX;
s->eos = 1;
+ av_buffer_unref(&s->hwaccel_params_buf);
if (avctx->hwaccel->flush)
avctx->hwaccel->flush(avctx);
}
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 15c4113bdd..774cd95947 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -509,6 +509,8 @@ typedef struct HEVCContext {
uint8_t *sao_pixel_buffer_h[3];
uint8_t *sao_pixel_buffer_v[3];
+ AVBufferRef *hwaccel_params_buf;
+
HEVCParamSets ps;
HEVCSEI sei;
struct AVMD5 *md5_ctx;
--
2.39.2
[-- Attachment #73: 0072-hevcdec-add-Vulkan-hwaccel.patch --]
[-- Type: text/x-diff, Size: 50457 bytes --]
From d47cb5940bc4808fea572b530eb1b9bf11159540 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 08:27:18 +0100
Subject: [PATCH 72/72] hevcdec: add Vulkan hwaccel
Thanks to Dave Airlie for figuring out a lot of the parameters.
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/hevcdec.c | 27 +-
libavcodec/hwaccels.h | 1 +
libavcodec/vulkan_hevc.c | 904 +++++++++++++++++++++++++++++++++++++++
5 files changed, 934 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan_hevc.c
diff --git a/configure b/configure
index 60973c38b3..8f7b918565 100755
--- a/configure
+++ b/configure
@@ -3050,6 +3050,8 @@ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
hevc_vdpau_hwaccel_select="hevc_decoder"
hevc_videotoolbox_hwaccel_deps="videotoolbox"
hevc_videotoolbox_hwaccel_select="hevc_decoder"
+hevc_vulkan_hwaccel_deps="vulkan"
+hevc_vulkan_hwaccel_select="hevc_decoder"
mjpeg_nvdec_hwaccel_deps="nvdec"
mjpeg_nvdec_hwaccel_select="mjpeg_decoder"
mjpeg_vaapi_hwaccel_deps="vaapi"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4c9db167a5..6aa304071a 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -999,6 +999,7 @@ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o
OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o
OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o
+OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL) += vulkan_decode.o vulkan_hevc.o
OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o
OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL) += vaapi_mjpeg.o
OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL) += nvdec_mpeg12.o
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 5df831688c..0ad6418f8d 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -405,7 +405,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
CONFIG_HEVC_NVDEC_HWACCEL + \
CONFIG_HEVC_VAAPI_HWACCEL + \
CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
- CONFIG_HEVC_VDPAU_HWACCEL)
+ CONFIG_HEVC_VDPAU_HWACCEL + \
+ CONFIG_HEVC_VULKAN_HWACCEL)
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
switch (sps->pix_fmt) {
@@ -429,6 +430,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_YUV420P10:
@@ -445,6 +449,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
#if CONFIG_HEVC_VDPAU_HWACCEL
*fmt++ = AV_PIX_FMT_VDPAU;
#endif
@@ -464,6 +471,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_YUV422P:
@@ -473,11 +483,17 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_YUV444P10:
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
case AV_PIX_FMT_YUV420P12:
case AV_PIX_FMT_YUV444P12:
@@ -487,6 +503,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#if CONFIG_HEVC_VDPAU_HWACCEL
*fmt++ = AV_PIX_FMT_VDPAU;
#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
#if CONFIG_HEVC_NVDEC_HWACCEL
*fmt++ = AV_PIX_FMT_CUDA;
#endif
@@ -494,6 +513,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
case AV_PIX_FMT_YUV422P12:
#if CONFIG_HEVC_VAAPI_HWACCEL
*fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
break;
}
@@ -3752,6 +3774,9 @@ const FFCodec ff_hevc_decoder = {
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
HWACCEL_VIDEOTOOLBOX(hevc),
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ HWACCEL_VULKAN(hevc),
#endif
NULL
},
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 23d0843c76..a7c74d07cb 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -44,6 +44,7 @@ extern const AVHWAccel ff_hevc_nvdec_hwaccel;
extern const AVHWAccel ff_hevc_vaapi_hwaccel;
extern const AVHWAccel ff_hevc_vdpau_hwaccel;
extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
+extern const AVHWAccel ff_hevc_vulkan_hwaccel;
extern const AVHWAccel ff_mjpeg_nvdec_hwaccel;
extern const AVHWAccel ff_mjpeg_vaapi_hwaccel;
extern const AVHWAccel ff_mpeg1_nvdec_hwaccel;
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
new file mode 100644
index 0000000000..f4991d8f82
--- /dev/null
+++ b/libavcodec/vulkan_hevc.c
@@ -0,0 +1,904 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "hevcdec.h"
+#include "hevc_ps.h"
+
+#include "vulkan_decode.h"
+
+const VkExtensionProperties ff_vk_dec_hevc_ext = {
+ .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME,
+ .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION,
+};
+
+typedef struct HEVCHeaderSPS {
+ StdVideoH265ScalingLists scaling;
+ StdVideoH265HrdParameters vui_header;
+ StdVideoH265SequenceParameterSetVui vui;
+ StdVideoH265ProfileTierLevel ptl;
+ StdVideoH265DecPicBufMgr dpbm;
+ StdVideoH265PredictorPaletteEntries pal;
+ StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS];
+ StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS];
+ StdVideoH265ShortTermRefPicSet str[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
+ StdVideoH265LongTermRefPicsSps ltr[HEVC_MAX_LONG_TERM_REF_PICS];
+} HEVCHeaderSPS;
+
+typedef struct HEVCHeaderPPS {
+ StdVideoH265ScalingLists scaling;
+ StdVideoH265PredictorPaletteEntries pal;
+} HEVCHeaderPPS;
+
+typedef struct HEVCHeaderVPSSet {
+ StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS];
+ StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS];
+} HEVCHeaderVPSSet;
+
+typedef struct HEVCHeaderVPS {
+ StdVideoH265ProfileTierLevel ptl;
+ StdVideoH265DecPicBufMgr dpbm;
+ StdVideoH265HrdParameters hdr[HEVC_MAX_LAYER_SETS];
+ HEVCHeaderVPSSet sls[];
+} HEVCHeaderVPS;
+
+typedef struct HEVCHeaderSet {
+ StdVideoH265SequenceParameterSet sps[HEVC_MAX_SPS_COUNT];
+ HEVCHeaderSPS hsps[HEVC_MAX_SPS_COUNT];
+
+ StdVideoH265PictureParameterSet pps[HEVC_MAX_PPS_COUNT];
+ HEVCHeaderPPS hpps[HEVC_MAX_PPS_COUNT];
+
+ StdVideoH265VideoParameterSet vps[HEVC_MAX_PPS_COUNT];
+ HEVCHeaderVPS hvps[];
+} HEVCHeaderSet;
+
+static int get_data_set_buf(FFVulkanDecodeContext *s, AVBufferRef **data_buf,
+ int nb_vps, AVBufferRef * const vps_list[HEVC_MAX_VPS_COUNT])
+{
+ size_t buf_size = sizeof(HEVCHeaderSPS)*HEVC_MAX_SPS_COUNT +
+ sizeof(HEVCHeaderPPS)*HEVC_MAX_PPS_COUNT +
+ sizeof(StdVideoH265SequenceParameterSet)*HEVC_MAX_SPS_COUNT +
+ sizeof(StdVideoH265PictureParameterSet)*HEVC_MAX_PPS_COUNT +
+ sizeof(StdVideoH265VideoParameterSet)*HEVC_MAX_VPS_COUNT;
+
+ buf_size += (sizeof(StdVideoH265ProfileTierLevel) +
+ sizeof(StdVideoH265DecPicBufMgr) +
+ sizeof(StdVideoH265HrdParameters)*HEVC_MAX_LAYER_SETS)*nb_vps;
+
+ for (int i = 0; i < nb_vps; i++) {
+ const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data;
+ buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters;
+ }
+
+ if (buf_size > s->tmp_pool_ele_size) {
+ av_buffer_pool_uninit(&s->tmp_pool);
+ s->tmp_pool_ele_size = 0;
+ s->tmp_pool = av_buffer_pool_init(buf_size, NULL);
+ if (!s->tmp_pool)
+ return AVERROR(ENOMEM);
+ s->tmp_pool_ele_size = buf_size;
+ }
+
+ *data_buf = av_buffer_pool_get(s->tmp_pool);
+ if (!(*data_buf))
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+typedef struct HEVCVulkanDecodePicture {
+ FFVulkanDecodeContext *ctx;
+ FFVulkanDecodePicture vp;
+
+ /* Current picture */
+ StdVideoDecodeH265ReferenceInfo h265_ref;
+ VkVideoDecodeH265DpbSlotInfoKHR vkh265_ref;
+
+ /* Picture refs */
+ HEVCFrame *ref_src [HEVC_MAX_REFS];
+ StdVideoDecodeH265ReferenceInfo h265_refs [HEVC_MAX_REFS];
+ VkVideoDecodeH265DpbSlotInfoKHR vkh265_refs[HEVC_MAX_REFS];
+
+ /* Current picture (contd.) */
+ StdVideoDecodeH265PictureInfo h265pic;
+ VkVideoDecodeH265PictureInfoKHR h265_pic_info;
+} HEVCVulkanDecodePicture;
+
+static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
+ VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */
+ VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */
+ VkVideoDecodeH265DpbSlotInfoKHR *vkh265_ref, /* Goes in ^ */
+ StdVideoDecodeH265ReferenceInfo *h265_ref, /* Goes in ^ */
+ HEVCFrame *pic, int is_current, int pic_id)
+{
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vkpic = &hp->vp;
+
+ int err = ff_vk_decode_prepare_frame(ctx, pic->frame, vkpic, is_current,
+ ctx->dedicated_dpb);
+ if (err < 0)
+ return err;
+
+ *h265_ref = (StdVideoDecodeH265ReferenceInfo) {
+ .flags = (StdVideoDecodeH265ReferenceInfoFlags) {
+ .used_for_long_term_reference = pic->flags & HEVC_FRAME_FLAG_LONG_REF,
+ .unused_for_reference = 0,
+ },
+ .PicOrderCntVal = pic->poc,
+ };
+
+ *vkh265_ref = (VkVideoDecodeH265DpbSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR,
+ .pStdReferenceInfo = h265_ref,
+ };
+
+ *ref = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height },
+ .baseArrayLayer = ctx->layered_dpb ? pic_id : 0,
+ .imageViewBinding = vkpic->img_view_ref,
+ };
+
+ *ref_slot = (VkVideoReferenceSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+ .pNext = vkh265_ref,
+ .slotIndex = pic_id,
+ .pPictureResource = ref,
+ };
+
+ if (ref_src)
+ *ref_src = pic;
+
+ return 0;
+}
+
+static void set_sps(const HEVCSPS *sps, int sps_idx,
+ StdVideoH265ScalingLists *vksps_scaling,
+ StdVideoH265HrdParameters *vksps_vui_header,
+ StdVideoH265SequenceParameterSetVui *vksps_vui,
+ StdVideoH265SequenceParameterSet *vksps,
+ StdVideoH265SubLayerHrdParameters *slhdrnal,
+ StdVideoH265SubLayerHrdParameters *slhdrvcl,
+ StdVideoH265ProfileTierLevel *ptl,
+ StdVideoH265DecPicBufMgr *dpbm,
+ StdVideoH265PredictorPaletteEntries *pal,
+ StdVideoH265ShortTermRefPicSet *str,
+ StdVideoH265LongTermRefPicsSps *ltr)
+{
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_list.sl[0][i],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList4x4));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_list.sl[1][i],
+ STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList8x8));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList16x16[i], sps->scaling_list.sl[2][i],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList16x16));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList32x32[i], sps->scaling_list.sl[3][i],
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList32x32));
+
+ memcpy(vksps_scaling->ScalingListDCCoef16x16, sps->scaling_list.sl_dc[0],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef16x16));
+
+ memcpy(vksps_scaling->ScalingListDCCoef32x32, sps->scaling_list.sl_dc[1],
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef32x32));
+
+ *vksps_vui_header = (StdVideoH265HrdParameters) {
+ .flags = (StdVideoH265HrdFlags) {
+ .nal_hrd_parameters_present_flag = sps->hdr.flags.nal_hrd_parameters_present_flag,
+ .vcl_hrd_parameters_present_flag = sps->hdr.flags.vcl_hrd_parameters_present_flag,
+ .sub_pic_hrd_params_present_flag = sps->hdr.flags.sub_pic_hrd_params_present_flag,
+ .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->hdr.flags.sub_pic_cpb_params_in_pic_timing_sei_flag,
+ .fixed_pic_rate_general_flag = sps->hdr.flags.fixed_pic_rate_general_flag,
+ .fixed_pic_rate_within_cvs_flag = sps->hdr.flags.fixed_pic_rate_within_cvs_flag,
+ .low_delay_hrd_flag = sps->hdr.flags.low_delay_hrd_flag,
+ },
+ .tick_divisor_minus2 = sps->hdr.tick_divisor_minus2,
+ .du_cpb_removal_delay_increment_length_minus1 = sps->hdr.du_cpb_removal_delay_increment_length_minus1,
+ .dpb_output_delay_du_length_minus1 = sps->hdr.dpb_output_delay_du_length_minus1,
+ .bit_rate_scale = sps->hdr.bit_rate_scale,
+ .cpb_size_scale = sps->hdr.cpb_size_scale,
+ .cpb_size_du_scale = sps->hdr.cpb_size_du_scale,
+ .initial_cpb_removal_delay_length_minus1 = sps->hdr.initial_cpb_removal_delay_length_minus1,
+ .au_cpb_removal_delay_length_minus1 = sps->hdr.au_cpb_removal_delay_length_minus1,
+ .dpb_output_delay_length_minus1 = sps->hdr.dpb_output_delay_length_minus1,
+ /* Reserved - 3*16 bits */
+ .pSubLayerHrdParametersNal = slhdrnal,
+ .pSubLayerHrdParametersNal = slhdrvcl,
+ };
+
+ memcpy(vksps_vui_header->cpb_cnt_minus1, sps->hdr.cpb_cnt_minus1,
+ STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->cpb_cnt_minus1));
+ memcpy(vksps_vui_header->elemental_duration_in_tc_minus1, sps->hdr.elemental_duration_in_tc_minus1,
+ STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->elemental_duration_in_tc_minus1));
+
+ memcpy(slhdrnal, sps->hdr.nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrnal));
+ memcpy(slhdrvcl, sps->hdr.vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrvcl));
+
+ *vksps_vui = (StdVideoH265SequenceParameterSetVui) {
+ .flags = (StdVideoH265SpsVuiFlags) {
+ .aspect_ratio_info_present_flag = sps->vui.common.aspect_ratio_info_present_flag,
+ .overscan_info_present_flag = sps->vui.common.overscan_info_present_flag,
+ .overscan_appropriate_flag = sps->vui.common.overscan_appropriate_flag,
+ .video_signal_type_present_flag = sps->vui.common.video_signal_type_present_flag,
+ .video_full_range_flag = sps->vui.common.video_full_range_flag,
+ .colour_description_present_flag = sps->vui.common.colour_description_present_flag,
+ .chroma_loc_info_present_flag = sps->vui.common.chroma_loc_info_present_flag,
+ .neutral_chroma_indication_flag = sps->vui.neutra_chroma_indication_flag,
+ .field_seq_flag = sps->vui.field_seq_flag,
+ .frame_field_info_present_flag = sps->vui.frame_field_info_present_flag,
+ .default_display_window_flag = sps->vui.default_display_window_flag,
+ .vui_timing_info_present_flag = sps->vui.vui_timing_info_present_flag,
+ .vui_poc_proportional_to_timing_flag = sps->vui.vui_poc_proportional_to_timing_flag,
+ .vui_hrd_parameters_present_flag = sps->vui.vui_hrd_parameters_present_flag,
+ .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag,
+ .tiles_fixed_structure_flag = sps->vui.tiles_fixed_structure_flag,
+ .motion_vectors_over_pic_boundaries_flag = sps->vui.motion_vectors_over_pic_boundaries_flag,
+ .restricted_ref_pic_lists_flag = sps->vui.restricted_ref_pic_lists_flag,
+ },
+ .aspect_ratio_idc = sps->vui.common.aspect_ratio_idc,
+ .sar_width = sps->vui.common.sar.num,
+ .sar_height = sps->vui.common.sar.den,
+ .video_format = sps->vui.common.video_format,
+ .colour_primaries = sps->vui.common.colour_primaries,
+ .transfer_characteristics = sps->vui.common.transfer_characteristics,
+ .matrix_coeffs = sps->vui.common.matrix_coeffs,
+ .chroma_sample_loc_type_top_field = sps->vui.common.chroma_sample_loc_type_top_field,
+ .chroma_sample_loc_type_bottom_field = sps->vui.common.chroma_sample_loc_type_bottom_field,
+ /* Reserved */
+ /* Reserved */
+ .def_disp_win_left_offset = sps->vui.def_disp_win.left_offset,
+ .def_disp_win_right_offset = sps->vui.def_disp_win.right_offset,
+ .def_disp_win_top_offset = sps->vui.def_disp_win.top_offset,
+ .def_disp_win_bottom_offset = sps->vui.def_disp_win.bottom_offset,
+ .vui_num_units_in_tick = sps->vui.vui_num_units_in_tick,
+ .vui_time_scale = sps->vui.vui_time_scale,
+ .vui_num_ticks_poc_diff_one_minus1 = sps->vui.vui_num_ticks_poc_diff_one_minus1,
+ .min_spatial_segmentation_idc = sps->vui.min_spatial_segmentation_idc,
+ .max_bytes_per_pic_denom = sps->vui.max_bytes_per_pic_denom,
+ .max_bits_per_min_cu_denom = sps->vui.max_bits_per_min_cu_denom,
+ .log2_max_mv_length_horizontal = sps->vui.log2_max_mv_length_horizontal,
+ .log2_max_mv_length_vertical = sps->vui.log2_max_mv_length_vertical,
+ .pHrdParameters = vksps_vui_header,
+ };
+
+ *ptl = (StdVideoH265ProfileTierLevel) {
+ .flags = (StdVideoH265ProfileTierLevelFlags) {
+ .general_tier_flag = sps->ptl.general_ptl.tier_flag,
+ .general_progressive_source_flag = sps->ptl.general_ptl.progressive_source_flag,
+ .general_interlaced_source_flag = sps->ptl.general_ptl.interlaced_source_flag,
+ .general_non_packed_constraint_flag = sps->ptl.general_ptl.non_packed_constraint_flag,
+ .general_frame_only_constraint_flag = sps->ptl.general_ptl.frame_only_constraint_flag,
+ },
+ .general_profile_idc = sps->ptl.general_ptl.profile_idc,
+ .general_level_idc = sps->ptl.general_ptl.level_idc,
+ };
+
+ for (int i = 0; i < sps->max_sub_layers; i++) {
+ dpbm->max_latency_increase_plus1[i] = sps->temporal_layer[i].max_latency_increase + 1;
+ dpbm->max_dec_pic_buffering_minus1[i] = sps->temporal_layer[i].max_dec_pic_buffering - 1;
+ dpbm->max_num_reorder_pics[i] = sps->temporal_layer[i].num_reorder_pics;
+ }
+
+ for (int i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++)
+ for (int j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++)
+ pal->PredictorPaletteEntries[i][j] = sps->palette_predictor_initializers[i][j];
+
+ for (int i = 0; i < sps->nb_st_rps; i++) {
+ str[i] = (StdVideoH265ShortTermRefPicSet) {
+ .flags = (StdVideoH265ShortTermRefPicSetFlags) {
+ .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict,
+ .delta_rps_sign = sps->st_rps[i].delta_rps_sign,
+ },
+ .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1,
+ .use_delta_flag = sps->st_rps[i].use_delta_flag,
+ .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1,
+ /* Spec fucked this up
+ .used_by_curr_pic_flag =
+ .used_by_curr_pic_s0_flag =
+ .used_by_curr_pic_s1_flag =
+ */
+ /* Reserved */
+ /* Reserved */
+ /* Reserved */
+ .num_negative_pics = sps->st_rps[i].num_negative_pics,
+ .num_positive_pics = sps->st_rps[i].num_delta_pocs - sps->st_rps[i].num_negative_pics,
+ };
+
+ for (int j = 0; j < str[i].num_negative_pics; j++)
+ str[i].delta_poc_s0_minus1[j] = sps->st_rps[i].delta_poc_s0[j] - 1;
+
+ for (int j = 0; j < str[i].num_positive_pics; j++)
+ str[i].delta_poc_s1_minus1[j] = sps->st_rps[i].delta_poc_s1[j] - 1;
+ }
+
+ for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) {
+ ltr[i] = (StdVideoH265LongTermRefPicsSps) {
+ .used_by_curr_pic_lt_sps_flag = sps->used_by_curr_pic_lt_sps_flag[i],
+ /* Spec fucked this up too*/
+ .lt_ref_pic_poc_lsb_sps[0] = sps->lt_ref_pic_poc_lsb_sps[i],
+ };
+ }
+
+ *vksps = (StdVideoH265SequenceParameterSet) {
+ .flags = (StdVideoH265SpsFlags) {
+ .sps_temporal_id_nesting_flag = sps->temporal_id_nesting_flag,
+ .separate_colour_plane_flag = sps->separate_colour_plane_flag,
+ .conformance_window_flag = sps->conformance_window_flag,
+ .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info_flag,
+ .scaling_list_enabled_flag = sps->scaling_list_enable_flag,
+ .sps_scaling_list_data_present_flag = sps->scaling_list_data_present_flag,
+ .amp_enabled_flag = sps->amp_enabled_flag,
+ .sample_adaptive_offset_enabled_flag = sps->sao_enabled,
+ .pcm_enabled_flag = sps->pcm_enabled_flag,
+ .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag,
+ .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag,
+ .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag,
+ .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag,
+ .vui_parameters_present_flag = sps->vui_present,
+ .sps_extension_present_flag = sps->sps_extension_present_flag,
+ .sps_range_extension_flag = sps->sps_range_extension_flag,
+ .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag,
+ .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag,
+ .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag,
+ .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag,
+ .extended_precision_processing_flag = sps->extended_precision_processing_flag,
+ .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag,
+ .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag,
+ .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag,
+ .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag,
+ .sps_scc_extension_flag = sps->sps_scc_extension_flag,
+ .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag,
+ .palette_mode_enabled_flag = sps->palette_mode_enabled_flag,
+ .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializer_present_flag,
+ .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disable_flag,
+ },
+ .chroma_format_idc = sps->chroma_format_idc,
+ .pic_width_in_luma_samples = sps->width,
+ .pic_height_in_luma_samples = sps->height,
+ .sps_video_parameter_set_id = sps->vps_id,
+ .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
+ .sps_seq_parameter_set_id = sps_idx,
+ .bit_depth_luma_minus8 = sps->bit_depth - 8,
+ .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8,
+ .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
+ .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
+ .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
+ .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
+ .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_transform_block_size,
+ .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
+ .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
+ .num_short_term_ref_pic_sets = sps->nb_st_rps,
+ .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
+ .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
+ .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
+ .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
+ .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
+ /* Reserved */
+ /* Reserved */
+ .palette_max_size = sps->palette_max_size,
+ .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size,
+ .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc,
+ .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializer_minus1,
+ .conf_win_left_offset = sps->pic_conf_win.left_offset,
+ .conf_win_right_offset = sps->pic_conf_win.right_offset,
+ .conf_win_top_offset = sps->pic_conf_win.top_offset,
+ .conf_win_bottom_offset = sps->pic_conf_win.bottom_offset,
+ .pProfileTierLevel = ptl,
+ .pDecPicBufMgr = dpbm,
+ .pScalingLists = vksps_scaling,
+ .pShortTermRefPicSet = str,
+ .pLongTermRefPicsSps = ltr,
+ .pSequenceParameterSetVui = vksps_vui,
+ .pPredictorPaletteEntries = pal,
+ };
+}
+
+static void set_pps(const HEVCPPS *pps, const HEVCSPS *sps,
+ StdVideoH265ScalingLists *vkpps_scaling,
+ StdVideoH265PictureParameterSet *vkpps,
+ StdVideoH265PredictorPaletteEntries *pal)
+{
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_list.sl[0][i],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList4x4));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_list.sl[1][i],
+ STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList8x8));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList16x16[i], pps->scaling_list.sl[2][i],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList16x16));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList32x32[i], pps->scaling_list.sl[3][i],
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList32x32));
+
+ memcpy(vkpps_scaling->ScalingListDCCoef16x16, pps->scaling_list.sl_dc[0],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef16x16));
+
+ memcpy(vkpps_scaling->ScalingListDCCoef32x32, pps->scaling_list.sl_dc[1],
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef32x32));
+
+ *vkpps = (StdVideoH265PictureParameterSet) {
+ .flags = (StdVideoH265PpsFlags) {
+ .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag,
+ .output_flag_present_flag = pps->output_flag_present_flag,
+ .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag,
+ .cabac_init_present_flag = pps->cabac_init_present_flag,
+ .constrained_intra_pred_flag = pps->constrained_intra_pred_flag,
+ .transform_skip_enabled_flag = pps->transform_skip_enabled_flag,
+ .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag,
+ .pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag,
+ .weighted_pred_flag = pps->weighted_pred_flag,
+ .weighted_bipred_flag = pps->weighted_bipred_flag,
+ .transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag,
+ .tiles_enabled_flag = pps->tiles_enabled_flag,
+ .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag,
+ .uniform_spacing_flag = pps->uniform_spacing_flag,
+ .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag,
+ .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag,
+ .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag,
+ .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag,
+ .pps_deblocking_filter_disabled_flag = pps->disable_dbf,
+ .pps_scaling_list_data_present_flag = pps->scaling_list_data_present_flag,
+ .lists_modification_present_flag = pps->lists_modification_present_flag,
+ .slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag,
+ .pps_extension_present_flag = pps->pps_extension_present_flag,
+ .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag,
+ .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag,
+ .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag,
+ .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag,
+ .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag,
+ .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializer_present_flag,
+ .monochrome_palette_flag = pps->monochrome_palette_flag,
+ .pps_range_extension_flag = pps->pps_range_extensions_flag,
+ },
+ .pps_pic_parameter_set_id = pps->pps_id,
+ .pps_seq_parameter_set_id = pps->sps_id,
+ .sps_video_parameter_set_id = sps->vps_id,
+ .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
+ .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1,
+ .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1,
+ .init_qp_minus26 = pps->pic_init_qp_minus26,
+ .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
+ .pps_cb_qp_offset = pps->cb_qp_offset,
+ .pps_cr_qp_offset = pps->cr_qp_offset,
+ .pps_beta_offset_div2 = pps->beta_offset >> 1,
+ .pps_tc_offset_div2 = pps->tc_offset >> 1,
+ .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
+ .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2,
+ .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth,
+ .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1,
+ .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma,
+ .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma,
+ .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset_plus5,
+ .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset_plus5,
+ .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset_plus3,
+ .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializer,
+ .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry_minus8,
+ .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry_minus8,
+ .num_tile_columns_minus1 = pps->num_tile_columns - 1,
+ .num_tile_rows_minus1 = pps->num_tile_rows - 1,
+ .pScalingLists = vkpps_scaling,
+ .pPredictorPaletteEntries = pal,
+ };
+
+ for (int i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) {
+ for (int j = 0; j < pps->pps_num_palette_predictor_initializer; j++)
+ pal->PredictorPaletteEntries[i][j] = pps->palette_predictor_initializers[i][j];
+ }
+
+ for (int i = 0; i < pps->num_tile_columns - 1; i++)
+ vkpps->column_width_minus1[i] = pps->column_width[i] - 1;
+
+ for (int i = 0; i < pps->num_tile_rows - 1; i++)
+ vkpps->row_height_minus1[i] = pps->row_height[i] - 1;
+
+ for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
+ vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i];
+ vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i];
+ }
+}
+
+static void set_vps(const HEVCVPS *vps,
+ StdVideoH265VideoParameterSet *vkvps,
+ StdVideoH265ProfileTierLevel *ptl,
+ StdVideoH265DecPicBufMgr *dpbm,
+ StdVideoH265HrdParameters *sls_hdr,
+ HEVCHeaderVPSSet sls[])
+{
+ for (int i = 0; i < vps->vps_num_hrd_parameters; i++) {
+ const HEVCHdrParams *src = &vps->hdr[i];
+
+ sls_hdr[i] = (StdVideoH265HrdParameters) {
+ .flags = (StdVideoH265HrdFlags) {
+ .nal_hrd_parameters_present_flag = src->flags.nal_hrd_parameters_present_flag,
+ .vcl_hrd_parameters_present_flag = src->flags.vcl_hrd_parameters_present_flag,
+ .sub_pic_hrd_params_present_flag = src->flags.sub_pic_hrd_params_present_flag,
+ .sub_pic_cpb_params_in_pic_timing_sei_flag = src->flags.sub_pic_cpb_params_in_pic_timing_sei_flag,
+ .fixed_pic_rate_general_flag = src->flags.fixed_pic_rate_general_flag,
+ .fixed_pic_rate_within_cvs_flag = src->flags.fixed_pic_rate_within_cvs_flag,
+ .low_delay_hrd_flag = src->flags.low_delay_hrd_flag,
+ },
+ .tick_divisor_minus2 = src->tick_divisor_minus2,
+ .du_cpb_removal_delay_increment_length_minus1 = src->du_cpb_removal_delay_increment_length_minus1,
+ .dpb_output_delay_du_length_minus1 = src->dpb_output_delay_du_length_minus1,
+ .bit_rate_scale = src->bit_rate_scale,
+ .cpb_size_scale = src->cpb_size_scale,
+ .cpb_size_du_scale = src->cpb_size_du_scale,
+ .initial_cpb_removal_delay_length_minus1 = src->initial_cpb_removal_delay_length_minus1,
+ .au_cpb_removal_delay_length_minus1 = src->au_cpb_removal_delay_length_minus1,
+ .dpb_output_delay_length_minus1 = src->dpb_output_delay_length_minus1,
+ /* Reserved - 3*16 bits */
+ .pSubLayerHrdParametersNal = sls[i].nal_hdr,
+ .pSubLayerHrdParametersNal = sls[i].vcl_hdr,
+ };
+
+ memcpy(sls_hdr[i].cpb_cnt_minus1, src->cpb_cnt_minus1,
+ STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].cpb_cnt_minus1));
+ memcpy(sls_hdr[i].elemental_duration_in_tc_minus1, src->elemental_duration_in_tc_minus1,
+ STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].elemental_duration_in_tc_minus1));
+
+ memcpy(sls[i].nal_hdr, src->nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].nal_hdr));
+ memcpy(sls[i].vcl_hdr, src->vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].vcl_hdr));
+ }
+
+ *ptl = (StdVideoH265ProfileTierLevel) {
+ .flags = (StdVideoH265ProfileTierLevelFlags) {
+ .general_tier_flag = vps->ptl.general_ptl.tier_flag,
+ .general_progressive_source_flag = vps->ptl.general_ptl.progressive_source_flag,
+ .general_interlaced_source_flag = vps->ptl.general_ptl.interlaced_source_flag,
+ .general_non_packed_constraint_flag = vps->ptl.general_ptl.non_packed_constraint_flag,
+ .general_frame_only_constraint_flag = vps->ptl.general_ptl.frame_only_constraint_flag,
+ },
+ .general_profile_idc = vps->ptl.general_ptl.profile_idc,
+ .general_level_idc = vps->ptl.general_ptl.level_idc,
+ };
+
+ for (int i = 0; i < vps->vps_max_sub_layers; i++) {
+ dpbm->max_latency_increase_plus1[i] = vps->vps_max_latency_increase[i] + 1;
+ dpbm->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering[i] - 1;
+ dpbm->max_num_reorder_pics[i] = vps->vps_num_reorder_pics[i];
+ }
+
+ *vkvps = (StdVideoH265VideoParameterSet) {
+ .flags = (StdVideoH265VpsFlags) {
+ .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag,
+ .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag,
+ .vps_timing_info_present_flag = vps->vps_timing_info_present_flag,
+ .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag,
+ },
+ .vps_video_parameter_set_id = vps->vps_id,
+ .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers - 1,
+ /* Reserved */
+ /* Reserved */
+ .vps_num_units_in_tick = vps->vps_num_units_in_tick,
+ .vps_time_scale = vps->vps_time_scale,
+ .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one - 1,
+ /* Reserved */
+ .pDecPicBufMgr = dpbm,
+ .pHrdParameters = sls_hdr,
+ .pProfileTierLevel = ptl,
+ };
+}
+
+static int vk_hevc_create_params(AVCodecContext *avctx, AVBufferRef **buf)
+{
+ int err;
+ VkResult ret;
+ const HEVCContext *h = avctx->priv_data;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ VkVideoDecodeH265SessionParametersAddInfoKHR h265_params_info = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR,
+ .stdSPSCount = 0,
+ .stdPPSCount = 0,
+ .stdVPSCount = 0,
+ };
+ VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pParametersAddInfo = &h265_params_info,
+ };
+ VkVideoSessionParametersCreateInfoKHR session_params_create = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pNext = &h265_params,
+ .videoSession = ctx->common.session,
+ .videoSessionParametersTemplate = NULL,
+ };
+
+ int nb_vps = 0;
+ AVBufferRef *data_set;
+ HEVCHeaderSet *hdr;
+
+ AVBufferRef *tmp;
+ VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par));
+ if (!par)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; h->ps.vps_list[i]; i++)
+ nb_vps++;
+
+ err = get_data_set_buf(ctx, &data_set, nb_vps, h->ps.vps_list);
+ if (err < 0)
+ return err;
+
+ hdr = (HEVCHeaderSet *)data_set->data;
+
+ h265_params_info.pStdSPSs = hdr->sps;
+ h265_params_info.pStdPPSs = hdr->pps;
+ h265_params_info.pStdVPSs = hdr->vps;
+
+ /* SPS list */
+ for (int i = 0; h->ps.sps_list[i]; i++) {
+ const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[i]->data;
+ set_sps(sps_l, i, &hdr->hsps[i].scaling, &hdr->hsps[i].vui_header,
+ &hdr->hsps[i].vui, &hdr->sps[i], hdr->hsps[i].nal_hdr,
+ hdr->hsps[i].vcl_hdr, &hdr->hsps[i].ptl, &hdr->hsps[i].dpbm,
+ &hdr->hsps[i].pal, hdr->hsps[i].str, hdr->hsps[i].ltr);
+ h265_params_info.stdSPSCount++;
+ }
+
+ /* PPS list */
+ for (int i = 0; h->ps.pps_list[i]; i++) {
+ const HEVCPPS *pps_l = (const HEVCPPS *)h->ps.pps_list[i]->data;
+ const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[pps_l->sps_id]->data;
+ set_pps(pps_l, sps_l, &hdr->hpps[i].scaling, &hdr->pps[i], &hdr->hpps[i].pal);
+ h265_params_info.stdPPSCount++;
+ }
+
+ /* VPS list */
+ for (int i = 0; i < nb_vps; i++) {
+ const HEVCVPS *vps_l = (const HEVCVPS *)h->ps.vps_list[i]->data;
+ set_vps(vps_l, &hdr->vps[i], &hdr->hvps[i].ptl, &hdr->hvps[i].dpbm,
+ hdr->hvps[i].hdr, hdr->hvps[i].sls);
+ h265_params_info.stdVPSCount++;
+ }
+
+ h265_params.maxStdSPSCount = h265_params_info.stdSPSCount;
+ h265_params.maxStdPPSCount = h265_params_info.stdPPSCount;
+ h265_params.maxStdVPSCount = h265_params_info.stdVPSCount;
+
+ /* Create session parameters */
+ ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create,
+ ctx->s.hwctx->alloc, par);
+ av_buffer_unref(&data_set);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params,
+ ctx, 0);
+ if (!tmp) {
+ ff_vk_decode_free_params(ctx, (uint8_t *)par);
+ return AVERROR(ENOMEM);
+ }
+
+ av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS %i VPS\n",
+ h265_params_info.stdSPSCount, h265_params_info.stdPPSCount,
+ h265_params_info.stdVPSCount);
+
+ *buf = tmp;
+
+ return 0;
+}
+
+static int vk_hevc_start_frame(AVCodecContext *avctx,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ HEVCContext *h = avctx->priv_data;
+ HEVCFrame *pic = h->ref;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+ const HEVCSPS *sps = h->ps.sps;
+ const HEVCPPS *pps = h->ps.pps;
+ int nb_refs = 0;
+
+ if (!h->hwaccel_params_buf) {
+ err = vk_hevc_create_params(avctx, &h->hwaccel_params_buf);
+ if (err < 0)
+ return err;
+ }
+
+ vp->session_params = av_buffer_ref(h->hwaccel_params_buf);
+ if (!vp->session_params)
+ return AVERROR(ENOMEM);
+
+ hp->h265pic = (StdVideoDecodeH265PictureInfo) {
+ .flags = (StdVideoDecodeH265PictureInfoFlags) {
+ .IrapPicFlag = IS_IRAP(h),
+ .IdrPicFlag = IS_IDR(h),
+ .IsReference = h->nal_unit_type < 16 ? h->nal_unit_type & 1 : 1,
+ .short_term_ref_pic_set_sps_flag = h->sh.short_term_ref_pic_set_sps_flag,
+ },
+ .sps_video_parameter_set_id = sps->vps_id,
+ .pps_seq_parameter_set_id = pps->sps_id,
+ .pps_pic_parameter_set_id = pps->pps_id,
+ .NumDeltaPocsOfRefRpsIdx = h->sh.short_term_rps ? h->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
+ .PicOrderCntVal = h->poc,
+ .NumBitsForSTRefPicSetInSlice = !h->sh.short_term_ref_pic_set_sps_flag ?
+ h->sh.bits_used_for_short_term_rps : 0,
+ };
+
+ /* Fill in references */
+ for (int i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
+ const HEVCFrame *ref = &h->DPB[i];
+ int idx = nb_refs;
+
+ if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF)))
+ continue;
+
+ if (ref == pic) {
+ err = vk_hevc_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref,
+ &hp->vkh265_ref, &hp->h265_ref, pic, 1, i);
+ if (err < 0)
+ return err;
+
+ continue;
+ }
+
+ err = vk_hevc_fill_pict(avctx, &hp->ref_src[idx], &vp->ref_slots[idx],
+ &vp->refs[idx], &hp->vkh265_refs[idx],
+ &hp->h265_refs[idx], (HEVCFrame *)ref, 0, i);
+ if (err < 0)
+ return err;
+
+ nb_refs++;
+ }
+
+ memset(hp->h265pic.RefPicSetStCurrBefore, 0xff, 8);
+ for (int i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
+ HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i];
+ for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+ const HEVCFrame *ref = &h->DPB[j];
+ if (ref == frame) {
+ hp->h265pic.RefPicSetStCurrBefore[i] = j;
+ break;
+ }
+ }
+ }
+ memset(hp->h265pic.RefPicSetStCurrAfter, 0xff, 8);
+ for (int i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
+ HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i];
+ for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+ const HEVCFrame *ref = &h->DPB[j];
+ if (ref == frame) {
+ hp->h265pic.RefPicSetStCurrAfter[i] = j;
+ break;
+ }
+ }
+ }
+ memset(hp->h265pic.RefPicSetLtCurr, 0xff, 8);
+ for (int i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
+ HEVCFrame *frame = h->rps[LT_CURR].ref[i];
+ for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+ const HEVCFrame *ref = &h->DPB[j];
+ if (ref == frame) {
+ hp->h265pic.RefPicSetLtCurr[i] = j;
+ break;
+ }
+ }
+ }
+
+ hp->h265_pic_info = (VkVideoDecodeH265PictureInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR,
+ .pStdPictureInfo = &hp->h265pic,
+ .sliceSegmentCount = 0,
+ .pSliceSegmentOffsets = vp->slice_off,
+ };
+
+ vp->decode_info = (VkVideoDecodeInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR,
+ .pNext = &hp->h265_pic_info,
+ .flags = 0x0,
+ .pSetupReferenceSlot = &vp->ref_slot,
+ .referenceSlotCount = nb_refs,
+ .pReferenceSlots = vp->ref_slots,
+ .dstPictureResource = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height },
+ .baseArrayLayer = 0,
+ .imageViewBinding = vp->img_view_out,
+ },
+ };
+
+ hp->ctx = ctx;
+
+ return 0;
+}
+
+static int vk_hevc_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ const HEVCContext *h = avctx->priv_data;
+ HEVCVulkanDecodePicture *hp = h->ref->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+
+ int err = ff_vk_decode_add_slice(vp, data, size, 1,
+ &hp->h265_pic_info.sliceSegmentCount,
+ &hp->h265_pic_info.pSliceSegmentOffsets);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int vk_hevc_end_frame(AVCodecContext *avctx)
+{
+ const HEVCContext *h = avctx->priv_data;
+ HEVCFrame *pic = h->ref;
+ HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+ FFVulkanDecodePicture *rvp[HEVC_MAX_REFS] = { 0 };
+ AVFrame *rav[HEVC_MAX_REFS] = { 0 };
+
+ for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+ HEVCVulkanDecodePicture *rfhp = hp->ref_src[i]->hwaccel_picture_private;
+ rav[i] = hp->ref_src[i]->frame;
+ rvp[i] = &rfhp->vp;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n",
+ vp->slices_size, hp->h265_pic_info.sliceSegmentCount);
+
+ return ff_vk_decode_frame(avctx, pic->frame, vp, rav, rvp);
+}
+
+static void vk_hevc_free_frame_priv(AVCodecContext *avctx, void *data)
+{
+ HEVCVulkanDecodePicture *hp = data;
+
+ /* Free frame resources */
+ ff_vk_decode_free_frame(hp->ctx, &hp->vp);
+
+ /* Free frame context */
+ av_free(hp);
+}
+
+const AVHWAccel ff_hevc_vulkan_hwaccel = {
+ .name = "hevc_vulkan",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .id = AV_CODEC_ID_HEVC,
+ .pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_hevc_start_frame,
+ .decode_slice = &vk_hevc_decode_slice,
+ .end_frame = &vk_hevc_end_frame,
+ .free_frame_priv = &vk_hevc_free_frame_priv,
+ .frame_priv_data_size = sizeof(HEVCVulkanDecodePicture),
+ .init = &ff_vk_decode_init,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &ff_vk_decode_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
--
2.39.2
[-- Attachment #74: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2023-02-17 3:44 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-17 3:43 Lynne [this message]
2023-02-17 9:08 ` Jean-Baptiste Kempf
2023-02-17 9:45 ` Hendrik Leppkes
2023-02-17 10:45 ` Lynne
2023-02-17 11:04 ` Kieran Kunhya
[not found] ` <CAK+ULv780c=z_dig_FAhPJ2poZ8u2_QOnnPUmV3SSiYoaQZ+tw@mail.gmail.com-NOU29aV----9>
2023-02-17 11:52 ` Lynne
2023-02-17 15:45 ` Michael Niedermayer
2023-02-17 16:35 ` Lynne
2023-02-18 19:02 ` Michael Niedermayer
2023-02-19 0:08 ` Lynne
2023-02-19 15:40 ` Michael Niedermayer
2023-02-19 15:44 ` Kieran Kunhya
2023-02-19 16:53 ` Lynne
2023-02-19 16:56 ` Jean-Baptiste Kempf
2023-02-19 16:58 ` Lynne
2023-02-19 17:02 ` Jean-Baptiste Kempf
2023-02-19 19:32 ` Niklas Haas
2023-02-19 18:50 ` Michael Niedermayer
2023-02-19 19:02 ` Lynne
2023-02-19 19:44 ` Michael Niedermayer
2023-02-19 20:00 ` Lynne
2023-02-19 20:14 ` Michael Niedermayer
[not found] ` <NOea74V--3-9@lynne.ee-NOeaB9K--R-9>
2023-02-19 16:57 ` Lynne
2023-02-19 17:36 ` Kieran Kunhya
2023-02-19 17:42 ` Kieran Kunhya
2023-02-19 18:46 ` Lynne
2023-02-19 21:59 ` Kieran Kunhya
2023-02-19 23:50 ` Neal Gompa
2023-02-20 5:13 ` Jean-Baptiste Kempf
2023-02-20 9:18 ` Hendrik Leppkes
2023-02-20 16:51 ` Anton Khirnov
2023-02-20 16:56 ` Anton Khirnov
2023-02-20 17:21 ` Anton Khirnov
2023-02-20 17:40 ` Anton Khirnov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=NOST85t--3-9@lynne.ee \
--to=dev@lynne.ee \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git