* [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
@ 2023-02-17 3:43 Lynne
2023-02-17 9:08 ` Jean-Baptiste Kempf
` (5 more replies)
0 siblings, 6 replies; 34+ messages in thread
From: Lynne @ 2023-02-17 3:43 UTC (permalink / raw)
To: Ffmpeg Devel
[-- Attachment #1: Type: text/plain, Size: 338 bytes --]
This small patchset mostly rewrites Vulkan to enable using multiplane images,
and implements video decode support. Also, many numerous bugs and issues
were fixed, as well as having quite a lot of performance improvements.
The patchset can be viewed here as well:
https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
Patches attached.
[-- Attachment #2: 0001-h2645_vui-expose-aspect_ratio_idc.patch --]
[-- Type: text/x-diff, Size: 1857 bytes --]
From a03d8aa0e2aa961183440e85de3f4922b14f8075 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:02:11 +0100
Subject: [PATCH 01/72] h2645_vui: expose aspect_ratio_idc
---
libavcodec/h2645_vui.c | 10 +++++-----
libavcodec/h2645_vui.h | 1 +
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c
index 0633fcbddd..93e83a9e1f 100644
--- a/libavcodec/h2645_vui.c
+++ b/libavcodec/h2645_vui.c
@@ -42,15 +42,15 @@ void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *l
aspect_ratio_info_present_flag = get_bits1(gb);
if (aspect_ratio_info_present_flag) {
- uint8_t aspect_ratio_idc = get_bits(gb, 8);
- if (aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
- vui->sar = ff_h2645_pixel_aspect[aspect_ratio_idc];
- else if (aspect_ratio_idc == EXTENDED_SAR) {
+ vui->aspect_ratio_idc = get_bits(gb, 8);
+ if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
+ vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc];
+ else if (vui->aspect_ratio_idc == EXTENDED_SAR) {
vui->sar.num = get_bits(gb, 16);
vui->sar.den = get_bits(gb, 16);
} else
av_log(logctx, AV_LOG_WARNING,
- "Unknown SAR index: %u.\n", aspect_ratio_idc);
+ "Unknown SAR index: %u.\n", vui->aspect_ratio_idc);
} else
vui->sar = (AVRational){ 0, 1 };
diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h
index 638da7c366..f1aeab7758 100644
--- a/libavcodec/h2645_vui.h
+++ b/libavcodec/h2645_vui.h
@@ -26,6 +26,7 @@
typedef struct H2645VUI {
AVRational sar;
+ int aspect_ratio_idc;
int overscan_info_present_flag;
int overscan_appropriate_flag;
--
2.39.2
[-- Attachment #3: 0002-h2645_vui-expose-aspect_ratio_info_present_flag.patch --]
[-- Type: text/x-diff, Size: 1469 bytes --]
From 42ff928100caea41ffa55ea2c8a8181de39306b7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:03:44 +0100
Subject: [PATCH 02/72] h2645_vui: expose aspect_ratio_info_present_flag
---
libavcodec/h2645_vui.c | 6 ++----
libavcodec/h2645_vui.h | 1 +
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/libavcodec/h2645_vui.c b/libavcodec/h2645_vui.c
index 93e83a9e1f..e5c7bf46f9 100644
--- a/libavcodec/h2645_vui.c
+++ b/libavcodec/h2645_vui.c
@@ -36,12 +36,10 @@
void ff_h2645_decode_common_vui_params(GetBitContext *gb, H2645VUI *vui, void *logctx)
{
- int aspect_ratio_info_present_flag;
-
av_log(logctx, AV_LOG_DEBUG, "Decoding VUI\n");
- aspect_ratio_info_present_flag = get_bits1(gb);
- if (aspect_ratio_info_present_flag) {
+ vui->aspect_ratio_info_present_flag = get_bits1(gb);
+ if (vui->aspect_ratio_info_present_flag) {
vui->aspect_ratio_idc = get_bits(gb, 8);
if (vui->aspect_ratio_idc < FF_ARRAY_ELEMS(ff_h2645_pixel_aspect))
vui->sar = ff_h2645_pixel_aspect[vui->aspect_ratio_idc];
diff --git a/libavcodec/h2645_vui.h b/libavcodec/h2645_vui.h
index f1aeab7758..2c839f4b01 100644
--- a/libavcodec/h2645_vui.h
+++ b/libavcodec/h2645_vui.h
@@ -27,6 +27,7 @@
typedef struct H2645VUI {
AVRational sar;
int aspect_ratio_idc;
+ int aspect_ratio_info_present_flag;
int overscan_info_present_flag;
int overscan_appropriate_flag;
--
2.39.2
[-- Attachment #4: 0003-h264_ps-expose-pps_id.patch --]
[-- Type: text/x-diff, Size: 1226 bytes --]
From 5e115cd41e2221cc8048932dfed362be6f80b74b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 18 Mar 2022 15:11:02 +0100
Subject: [PATCH 03/72] h264_ps: expose pps_id
---
libavcodec/h264_ps.c | 1 +
libavcodec/h264_ps.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index d0d1e65903..4ec5bd4e80 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -731,6 +731,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
if (!(bit_length & 7) && pps->data_size < sizeof(pps->data))
pps->data[pps->data_size++] = 0x80;
+ pps->pps_id = pps_id;
pps->sps_id = get_ue_golomb_31(gb);
if ((unsigned)pps->sps_id >= MAX_SPS_COUNT ||
!ps->sps_list[pps->sps_id]) {
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 5c35761fbc..c3f0888f24 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -103,6 +103,7 @@ typedef struct SPS {
* Picture parameter set
*/
typedef struct PPS {
+ unsigned int pps_id;
unsigned int sps_id;
int cabac; ///< entropy_coding_mode_flag
int pic_order_present; ///< pic_order_present_flag
--
2.39.2
[-- Attachment #5: 0004-h264_ps-set-pic_scaling_matrix_present_flag.patch --]
[-- Type: text/x-diff, Size: 3223 bytes --]
From 2720b9ff2a3d95c5d5887c2e06161de1691fc085 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 18 Mar 2022 16:17:33 +0100
Subject: [PATCH 04/72] h264_ps: set pic_scaling_matrix_present_flag
---
libavcodec/h264_ps.c | 7 +++++--
libavcodec/h264_ps.h | 1 +
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index 4ec5bd4e80..a94f5350c4 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -226,6 +226,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
/* returns non zero if the provided SPS scaling matrix has been filled */
static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
const PPS *pps, int is_sps,
+ int present_flag,
uint8_t(*scaling_matrix4)[16],
uint8_t(*scaling_matrix8)[64])
{
@@ -237,7 +238,7 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
};
int ret = 0;
- if (get_bits1(gb)) {
+ if (present_flag) {
ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y
ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr
ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb
@@ -368,7 +369,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx,
goto fail;
}
sps->transform_bypass = get_bits1(gb);
- ret = decode_scaling_matrices(gb, sps, NULL, 1,
+ ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb),
sps->scaling_matrix4, sps->scaling_matrix8);
if (ret < 0)
goto fail;
@@ -803,7 +804,9 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
bits_left = bit_length - get_bits_count(gb);
if (bits_left > 0 && more_rbsp_data_in_pps(sps, avctx)) {
pps->transform_8x8_mode = get_bits1(gb);
+ pps->pic_scaling_matrix_present_flag = get_bits1(gb);
ret = decode_scaling_matrices(gb, sps, pps, 0,
+ pps->pic_scaling_matrix_present_flag,
pps->scaling_matrix4, pps->scaling_matrix8);
if (ret < 0)
goto fail;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index c3f0888f24..d2413ae0f8 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -119,6 +119,7 @@ typedef struct PPS {
int constrained_intra_pred; ///< constrained_intra_pred_flag
int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
int transform_8x8_mode; ///< transform_8x8_mode_flag
+ int pic_scaling_matrix_present_flag;
uint8_t scaling_matrix4[6][16];
uint8_t scaling_matrix8[6][64];
uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
--
2.39.2
[-- Attachment #6: 0005-h264_parser-expose-idr_pic_id.patch --]
[-- Type: text/x-diff, Size: 1437 bytes --]
From a9ae85816dfaa8791f974348825fc8ba9209423d Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 18:08:53 +0100
Subject: [PATCH 05/72] h264_parser: expose idr_pic_id
Vulkan needs it.
---
libavcodec/h264_parse.h | 1 +
libavcodec/h264_parser.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/h264_parse.h b/libavcodec/h264_parse.h
index 4ee863df66..4ba0add4f2 100644
--- a/libavcodec/h264_parse.h
+++ b/libavcodec/h264_parse.h
@@ -85,6 +85,7 @@ typedef struct H264POCContext {
int delta_poc_bottom;
int delta_poc[2];
int frame_num;
+ int idr_pic_id;
int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
int frame_num_offset; ///< for POC type 2
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index 46134a1c48..1c330484c1 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -432,7 +432,7 @@ static inline int parse_nal_units(AVCodecParserContext *s,
}
if (nal.type == H264_NAL_IDR_SLICE)
- get_ue_golomb_long(&nal.gb); /* idr_pic_id */
+ p->poc.idr_pic_id = get_ue_golomb_long(&nal.gb); /* idr_pic_id */
if (sps->poc_type == 0) {
p->poc.poc_lsb = get_bits(&nal.gb, sps->log2_max_poc_lsb);
--
2.39.2
[-- Attachment #7: 0006-h264_ps-comment-pic_order_present-better.patch --]
[-- Type: text/x-diff, Size: 997 bytes --]
From e42521563191a899d21fbf24e461bc6cb89661e9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:59:23 +0100
Subject: [PATCH 06/72] h264_ps: comment pic_order_present better
The official name which CBS uses is bottom_field_pic_order_in_frame_present_flag.
---
libavcodec/h264_ps.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index d2413ae0f8..de4529b353 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -106,7 +106,7 @@ typedef struct PPS {
unsigned int pps_id;
unsigned int sps_id;
int cabac; ///< entropy_coding_mode_flag
- int pic_order_present; ///< pic_order_present_flag
+ int pic_order_present; ///< bottom_field_pic_order_in_frame_present_flag
int slice_group_count; ///< num_slice_groups_minus1 + 1
int mb_slice_group_map_type;
unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
--
2.39.2
[-- Attachment #8: 0007-h264_ps-expose-max_dec_frame_buffering.patch --]
[-- Type: text/x-diff, Size: 1396 bytes --]
From e222eaa26f4d8fd36dd04525d754dbf4800c502a Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:06:04 +0100
Subject: [PATCH 07/72] h264_ps: expose max_dec_frame_buffering
---
libavcodec/h264_ps.c | 2 +-
libavcodec/h264_ps.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index a94f5350c4..d9df570718 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -176,7 +176,7 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx,
get_ue_golomb_31(gb); /* log2_max_mv_length_horizontal */
get_ue_golomb_31(gb); /* log2_max_mv_length_vertical */
sps->num_reorder_frames = get_ue_golomb_31(gb);
- get_ue_golomb_31(gb); /*max_dec_frame_buffering*/
+ sps->max_dec_frame_buffering = get_ue_golomb_31(gb);
if (get_bits_left(gb) < 0) {
sps->num_reorder_frames = 0;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index de4529b353..906bab7214 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -80,6 +80,7 @@ typedef struct SPS {
int32_t offset_for_ref_frame[256];
int bitstream_restriction_flag;
int num_reorder_frames;
+ int max_dec_frame_buffering;
int scaling_matrix_present;
uint8_t scaling_matrix4[6][16];
uint8_t scaling_matrix8[6][64];
--
2.39.2
[-- Attachment #9: 0008-h264_ps-expose-bit-rate-and-CPB-size-fields.patch --]
[-- Type: text/x-diff, Size: 2114 bytes --]
From 1279c6011c610fdb054cd9eea7a6f07c94f69f29 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:09:08 +0100
Subject: [PATCH 08/72] h264_ps: expose bit rate and CPB size fields
---
libavcodec/h264_ps.c | 8 ++++----
libavcodec/h264_ps.h | 4 ++++
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index d9df570718..fc8715876a 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -113,12 +113,12 @@ static inline int decode_hrd_parameters(GetBitContext *gb, void *logctx,
return AVERROR_INVALIDDATA;
}
- get_bits(gb, 4); /* bit_rate_scale */
+ sps->bit_rate_scale = get_bits(gb, 4);
get_bits(gb, 4); /* cpb_size_scale */
for (i = 0; i < cpb_count; i++) {
- get_ue_golomb_long(gb); /* bit_rate_value_minus1 */
- get_ue_golomb_long(gb); /* cpb_size_value_minus1 */
- get_bits1(gb); /* cbr_flag */
+ sps->bit_rate_value[i] = get_ue_golomb_long(gb) + 1; /* bit_rate_value_minus1 + 1 */
+ sps->cpb_size_value[i] = get_ue_golomb_long(gb) + 1; /* cpb_size_value_minus1 + 1 */
+ sps->cpr_flag[i] = get_bits1(gb);
}
sps->initial_cpb_removal_delay_length = get_bits(gb, 5) + 1;
sps->cpb_removal_delay_length = get_bits(gb, 5) + 1;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 906bab7214..03bd0227d6 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -89,6 +89,10 @@ typedef struct SPS {
int pic_struct_present_flag;
int time_offset_length;
int cpb_cnt; ///< See H.264 E.1.2
+ int bit_rate_scale;
+ uint32_t bit_rate_value[32]; ///< bit_rate_value_minus1 + 1
+ uint32_t cpb_size_value[32]; ///< cpb_size_value_minus1 + 1
+ uint8_t cpr_flag[32];
int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1
int cpb_removal_delay_length; ///< cpb_removal_delay_length_minus1 + 1
int dpb_output_delay_length; ///< dpb_output_delay_length_minus1 + 1
--
2.39.2
[-- Attachment #10: 0009-h264_ps-expose-scaling_matrix_present_mask.patch --]
[-- Type: text/x-diff, Size: 7404 bytes --]
From 3ef9965fe2fa33942eb5b5def748f3f6bf9e0afb Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 17:05:35 +0100
Subject: [PATCH 09/72] h264_ps: expose scaling_matrix_present_mask
Vulkan requires it.
It technically also requires use_default_scaling_matrix_mask,
but we can just be explicit and give it the matrix we fill in as-non
default.
---
libavcodec/h264_ps.c | 37 +++++++++++++++++++++----------------
libavcodec/h264_ps.h | 2 ++
2 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index fc8715876a..9f26514167 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -197,12 +197,14 @@ static inline int decode_vui_parameters(GetBitContext *gb, void *logctx,
}
static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
- const uint8_t *jvt_list,
- const uint8_t *fallback_list)
+ const uint8_t *jvt_list, const uint8_t *fallback_list,
+ uint16_t *mask, int pos)
{
int i, last = 8, next = 8;
const uint8_t *scan = size == 16 ? ff_zigzag_scan : ff_zigzag_direct;
- if (!get_bits1(gb)) /* matrix not written, we use the predicted one */
+ uint16_t seq_scaling_list_present_flag = get_bits1(gb);
+ *mask |= (seq_scaling_list_present_flag << pos);
+ if (!seq_scaling_list_present_flag) /* matrix not written, we use the predicted one */
memcpy(factors, fallback_list, size * sizeof(uint8_t));
else
for (i = 0; i < size; i++) {
@@ -226,7 +228,7 @@ static int decode_scaling_list(GetBitContext *gb, uint8_t *factors, int size,
/* returns non zero if the provided SPS scaling matrix has been filled */
static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
const PPS *pps, int is_sps,
- int present_flag,
+ int present_flag, uint16_t *mask,
uint8_t(*scaling_matrix4)[16],
uint8_t(*scaling_matrix8)[64])
{
@@ -238,21 +240,22 @@ static int decode_scaling_matrices(GetBitContext *gb, const SPS *sps,
fallback_sps ? sps->scaling_matrix8[3] : default_scaling8[1]
};
int ret = 0;
+ *mask = 0x0;
if (present_flag) {
- ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0]); // Intra, Y
- ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0]); // Intra, Cr
- ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1]); // Intra, Cb
- ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1]); // Inter, Y
- ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3]); // Inter, Cr
- ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4]); // Inter, Cb
+ ret |= decode_scaling_list(gb, scaling_matrix4[0], 16, default_scaling4[0], fallback[0], mask, 0); // Intra, Y
+ ret |= decode_scaling_list(gb, scaling_matrix4[1], 16, default_scaling4[0], scaling_matrix4[0], mask, 1); // Intra, Cr
+ ret |= decode_scaling_list(gb, scaling_matrix4[2], 16, default_scaling4[0], scaling_matrix4[1], mask, 2); // Intra, Cb
+ ret |= decode_scaling_list(gb, scaling_matrix4[3], 16, default_scaling4[1], fallback[1], mask, 3); // Inter, Y
+ ret |= decode_scaling_list(gb, scaling_matrix4[4], 16, default_scaling4[1], scaling_matrix4[3], mask, 4); // Inter, Cr
+ ret |= decode_scaling_list(gb, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4], mask, 5); // Inter, Cb
if (is_sps || pps->transform_8x8_mode) {
- ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2]); // Intra, Y
- ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3]); // Inter, Y
+ ret |= decode_scaling_list(gb, scaling_matrix8[0], 64, default_scaling8[0], fallback[2], mask, 6); // Intra, Y
+ ret |= decode_scaling_list(gb, scaling_matrix8[3], 64, default_scaling8[1], fallback[3], mask, 7); // Inter, Y
if (sps->chroma_format_idc == 3) {
- ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0]); // Intra, Cr
- ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3]); // Inter, Cr
- ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1]); // Intra, Cb
- ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4]); // Inter, Cb
+ ret |= decode_scaling_list(gb, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0], mask, 8); // Intra, Cr
+ ret |= decode_scaling_list(gb, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3], mask, 9); // Inter, Cr
+ ret |= decode_scaling_list(gb, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1], mask, 10); // Intra, Cb
+ ret |= decode_scaling_list(gb, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4], mask, 11); // Inter, Cb
}
}
if (!ret)
@@ -370,6 +373,7 @@ int ff_h264_decode_seq_parameter_set(GetBitContext *gb, AVCodecContext *avctx,
}
sps->transform_bypass = get_bits1(gb);
ret = decode_scaling_matrices(gb, sps, NULL, 1, get_bits1(gb),
+ &sps->scaling_matrix_present_mask,
sps->scaling_matrix4, sps->scaling_matrix8);
if (ret < 0)
goto fail;
@@ -807,6 +811,7 @@ int ff_h264_decode_picture_parameter_set(GetBitContext *gb, AVCodecContext *avct
pps->pic_scaling_matrix_present_flag = get_bits1(gb);
ret = decode_scaling_matrices(gb, sps, pps, 0,
pps->pic_scaling_matrix_present_flag,
+ &pps->pic_scaling_matrix_present_mask,
pps->scaling_matrix4, pps->scaling_matrix8);
if (ret < 0)
goto fail;
diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
index 03bd0227d6..60ca9b3cd7 100644
--- a/libavcodec/h264_ps.h
+++ b/libavcodec/h264_ps.h
@@ -82,6 +82,7 @@ typedef struct SPS {
int num_reorder_frames;
int max_dec_frame_buffering;
int scaling_matrix_present;
+ uint16_t scaling_matrix_present_mask;
uint8_t scaling_matrix4[6][16];
uint8_t scaling_matrix8[6][64];
int nal_hrd_parameters_present_flag;
@@ -125,6 +126,7 @@ typedef struct PPS {
int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
int transform_8x8_mode; ///< transform_8x8_mode_flag
int pic_scaling_matrix_present_flag;
+ uint16_t pic_scaling_matrix_present_mask;
uint8_t scaling_matrix4[6][16];
uint8_t scaling_matrix8[6][64];
uint8_t chroma_qp_table[2][QP_MAX_NUM+1]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
--
2.39.2
[-- Attachment #11: 0010-h264dec-track-picture_structure-in-H264Picture.patch --]
[-- Type: text/x-diff, Size: 2132 bytes --]
From 52ab3cd8d165a838be92189c87c54915efc1c7e5 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 05:20:32 +0100
Subject: [PATCH 10/72] h264dec: track picture_structure in H264Picture
---
libavcodec/h264_picture.c | 1 +
libavcodec/h264_slice.c | 1 +
libavcodec/h264dec.h | 1 +
3 files changed, 3 insertions(+)
diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index 2661ff4698..0348166c43 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -80,6 +80,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
dst->mbaff = src->mbaff;
dst->field_picture = src->field_picture;
dst->reference = src->reference;
+ dst->picture_structure = src->picture_structure;
dst->recovered = src->recovered;
dst->invalid_gap = src->invalid_gap;
dst->sei_recovery_frame_cnt = src->sei_recovery_frame_cnt;
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 6188c74632..8ac66b343c 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -491,6 +491,7 @@ static int h264_frame_start(H264Context *h)
pic->reference = h->droppable ? 0 : h->picture_structure;
pic->f->coded_picture_number = h->coded_picture_number++;
pic->field_picture = h->picture_structure != PICT_FRAME;
+ pic->picture_structure = h->picture_structure;
pic->frame_num = h->poc.frame_num;
/*
* Zero key_frame here; IDR markings per slice in frame or fields are ORed
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 9a1ec1bace..1b18aba71f 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -137,6 +137,7 @@ typedef struct H264Picture {
int ref_count[2][2]; ///< number of entries in ref_poc (FIXME need per slice)
int mbaff; ///< 1 -> MBAFF frame 0-> not MBAFF
int field_picture; ///< whether or not picture was encoded in separate fields
+ int picture_structure; ///< picture structure
/**
* H264Picture.reference has this flag set,
--
2.39.2
[-- Attachment #12: 0011-hevc_ps-expose-SPS-and-VPS-headers.patch --]
[-- Type: text/x-diff, Size: 9068 bytes --]
From d80272e0759b686942f51b1c0c7615edb6a81bc6 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 01:29:57 +0100
Subject: [PATCH 11/72] hevc_ps: expose SPS and VPS headers
---
libavcodec/hevc_ps.c | 100 ++++++++++++++++++++++---------------------
libavcodec/hevc_ps.h | 41 ++++++++++++++++++
2 files changed, 93 insertions(+), 48 deletions(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 5fe62ec35b..bd1f278b06 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -355,81 +355,84 @@ static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx,
}
static void decode_sublayer_hrd(GetBitContext *gb, unsigned int nb_cpb,
- int subpic_params_present)
+ HEVCSublayerHdrParams *par, int subpic_params_present)
{
int i;
for (i = 0; i < nb_cpb; i++) {
- get_ue_golomb_long(gb); // bit_rate_value_minus1
- get_ue_golomb_long(gb); // cpb_size_value_minus1
+ par->bit_rate_value_minus1[i] = get_ue_golomb_long(gb);
+ par->cpb_size_value_minus1[i] = get_ue_golomb_long(gb);
if (subpic_params_present) {
- get_ue_golomb_long(gb); // cpb_size_du_value_minus1
- get_ue_golomb_long(gb); // bit_rate_du_value_minus1
+ par->cpb_size_du_value_minus1[i] = get_ue_golomb_long(gb);
+ par->bit_rate_du_value_minus1[i] = get_ue_golomb_long(gb);
}
- skip_bits1(gb); // cbr_flag
+
+ par->cbr_flag = get_bits1(gb);
}
}
static int decode_hrd(GetBitContext *gb, int common_inf_present,
- int max_sublayers)
+ HEVCHdrParams *hdr, int max_sublayers)
{
- int nal_params_present = 0, vcl_params_present = 0;
- int subpic_params_present = 0;
- int i;
-
if (common_inf_present) {
- nal_params_present = get_bits1(gb);
- vcl_params_present = get_bits1(gb);
-
- if (nal_params_present || vcl_params_present) {
- subpic_params_present = get_bits1(gb);
-
- if (subpic_params_present) {
- skip_bits(gb, 8); // tick_divisor_minus2
- skip_bits(gb, 5); // du_cpb_removal_delay_increment_length_minus1
- skip_bits(gb, 1); // sub_pic_cpb_params_in_pic_timing_sei_flag
- skip_bits(gb, 5); // dpb_output_delay_du_length_minus1
+ hdr->flags.nal_hrd_parameters_present_flag = get_bits1(gb);
+ hdr->flags.vcl_hrd_parameters_present_flag = get_bits1(gb);
+
+ if (hdr->flags.nal_hrd_parameters_present_flag ||
+ hdr->flags.vcl_hrd_parameters_present_flag) {
+ hdr->flags.sub_pic_hrd_params_present_flag = get_bits1(gb);
+
+ if (hdr->flags.sub_pic_hrd_params_present_flag) {
+ hdr->tick_divisor_minus2 = get_bits(gb, 8);
+ hdr->du_cpb_removal_delay_increment_length_minus1 = get_bits(gb, 5);
+ hdr->flags.sub_pic_cpb_params_in_pic_timing_sei_flag = get_bits1(gb);
+ hdr->dpb_output_delay_du_length_minus1 = get_bits(gb, 5);
}
- skip_bits(gb, 4); // bit_rate_scale
- skip_bits(gb, 4); // cpb_size_scale
+ hdr->bit_rate_scale = get_bits(gb, 4);
+ hdr->cpb_size_scale = get_bits(gb, 4);
- if (subpic_params_present)
- skip_bits(gb, 4); // cpb_size_du_scale
+ if (hdr->flags.sub_pic_hrd_params_present_flag)
+ hdr->cpb_size_du_scale = get_bits(gb, 4);
- skip_bits(gb, 5); // initial_cpb_removal_delay_length_minus1
- skip_bits(gb, 5); // au_cpb_removal_delay_length_minus1
- skip_bits(gb, 5); // dpb_output_delay_length_minus1
+ hdr->initial_cpb_removal_delay_length_minus1 = get_bits(gb, 5);
+ hdr->au_cpb_removal_delay_length_minus1 = get_bits(gb, 5);
+ hdr->dpb_output_delay_length_minus1 = get_bits(gb, 5);
}
}
- for (i = 0; i < max_sublayers; i++) {
- int low_delay = 0;
- unsigned int nb_cpb = 1;
- int fixed_rate = get_bits1(gb);
+ for (int i = 0; i < max_sublayers; i++) {
+ hdr->flags.fixed_pic_rate_general_flag = get_bits1(gb);
+
+ hdr->cpb_cnt_minus1[i] = 1;
- if (!fixed_rate)
- fixed_rate = get_bits1(gb);
+ if (!hdr->flags.fixed_pic_rate_general_flag)
+ hdr->flags.fixed_pic_rate_within_cvs_flag = get_bits1(gb);
- if (fixed_rate)
- get_ue_golomb_long(gb); // elemental_duration_in_tc_minus1
+ if (hdr->flags.fixed_pic_rate_within_cvs_flag)
+ hdr->elemental_duration_in_tc_minus1[i] = get_ue_golomb_long(gb);
else
- low_delay = get_bits1(gb);
+ hdr->flags.low_delay_hrd_flag = get_bits1(gb);
- if (!low_delay) {
- nb_cpb = get_ue_golomb_long(gb) + 1;
- if (nb_cpb < 1 || nb_cpb > 32) {
- av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb);
+ if (!hdr->flags.low_delay_hrd_flag) {
+ hdr->cpb_cnt_minus1[i] = get_ue_golomb_long(gb);
+ if (hdr->cpb_cnt_minus1[i] > 31) {
+ av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n",
+ hdr->cpb_cnt_minus1[i]);
return AVERROR_INVALIDDATA;
}
}
- if (nal_params_present)
- decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
- if (vcl_params_present)
- decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
+ if (hdr->flags.nal_hrd_parameters_present_flag)
+ decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->nal_params[i],
+ hdr->flags.sub_pic_hrd_params_present_flag);
+
+ if (hdr->flags.vcl_hrd_parameters_present_flag)
+ decode_sublayer_hrd(gb, hdr->cpb_cnt_minus1[i], &hdr->vcl_params[i],
+ hdr->flags.sub_pic_hrd_params_present_flag);
}
+
return 0;
}
@@ -536,7 +539,8 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
get_ue_golomb_long(gb); // hrd_layer_set_idx
if (i)
common_inf_present = get_bits1(gb);
- decode_hrd(gb, common_inf_present, vps->vps_max_sub_layers);
+ decode_hrd(gb, common_inf_present, &vps->hdr[i],
+ vps->vps_max_sub_layers);
}
}
get_bits1(gb); /* vps_extension_flag */
@@ -655,7 +659,7 @@ timing_info:
vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb);
vui->vui_hrd_parameters_present_flag = get_bits1(gb);
if (vui->vui_hrd_parameters_present_flag)
- decode_hrd(gb, 1, sps->max_sub_layers);
+ decode_hrd(gb, 1, &sps->hdr, sps->max_sub_layers);
}
vui->bitstream_restriction_flag = get_bits1(gb);
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 18894cfed1..b61d3b32b3 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -32,6 +32,43 @@
#include "h2645_vui.h"
#include "hevc.h"
+typedef struct HEVCSublayerHdrParams {
+ uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT];
+ uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT];
+ uint32_t cpb_size_du_value_minus1[HEVC_MAX_CPB_CNT];
+ uint32_t bit_rate_du_value_minus1[HEVC_MAX_CPB_CNT];
+ uint32_t cbr_flag;
+} HEVCSublayerHdrParams;
+
+typedef struct HEVCHdrFlagParams {
+ uint32_t nal_hrd_parameters_present_flag;
+ uint32_t vcl_hrd_parameters_present_flag;
+ uint32_t sub_pic_hrd_params_present_flag;
+ uint32_t sub_pic_cpb_params_in_pic_timing_sei_flag;
+ uint32_t fixed_pic_rate_general_flag;
+ uint32_t fixed_pic_rate_within_cvs_flag;
+ uint32_t low_delay_hrd_flag;
+} HEVCHdrFlagParams;
+
+typedef struct HEVCHdrParams {
+ HEVCHdrFlagParams flags;
+
+ uint8_t tick_divisor_minus2;
+ uint8_t du_cpb_removal_delay_increment_length_minus1;
+ uint8_t dpb_output_delay_du_length_minus1;
+ uint8_t bit_rate_scale;
+ uint8_t cpb_size_scale;
+ uint8_t cpb_size_du_scale;
+ uint8_t initial_cpb_removal_delay_length_minus1;
+ uint8_t au_cpb_removal_delay_length_minus1;
+ uint8_t dpb_output_delay_length_minus1;
+ uint8_t cpb_cnt_minus1[HEVC_MAX_SUB_LAYERS];
+ uint16_t elemental_duration_in_tc_minus1[HEVC_MAX_SUB_LAYERS];
+
+ HEVCSublayerHdrParams nal_params[HEVC_MAX_SUB_LAYERS];
+ HEVCSublayerHdrParams vcl_params[HEVC_MAX_SUB_LAYERS];
+} HEVCHdrParams;
+
typedef struct ShortTermRPS {
unsigned int num_negative_pics;
int num_delta_pocs;
@@ -108,6 +145,8 @@ typedef struct PTL {
} PTL;
typedef struct HEVCVPS {
+ HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS];
+
uint8_t vps_temporal_id_nesting_flag;
int vps_max_layers;
int vps_max_sub_layers; ///< vps_max_temporal_layers_minus1 + 1
@@ -146,6 +185,8 @@ typedef struct HEVCSPS {
HEVCWindow pic_conf_win;
+ HEVCHdrParams hdr;
+
int bit_depth;
int bit_depth_chroma;
int pixel_shift;
--
2.39.2
[-- Attachment #13: 0012-hevc_ps-expose-pps_id.patch --]
[-- Type: text/x-diff, Size: 1213 bytes --]
From d6e2ac33861642ac5dfa651963874c0f65d9b49b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 05:33:29 +0100
Subject: [PATCH 12/72] hevc_ps: expose pps_id
---
libavcodec/hevc_ps.c | 2 +-
libavcodec/hevc_ps.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index bd1f278b06..3242904473 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -1486,7 +1486,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
pps->log2_max_transform_skip_block_size = 2;
// Coded parameters
- pps_id = get_ue_golomb_long(gb);
+ pps_id = pps->pps_id = get_ue_golomb_long(gb);
if (pps_id >= HEVC_MAX_PPS_COUNT) {
av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
ret = AVERROR_INVALIDDATA;
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index b61d3b32b3..4cfcbcf9ae 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -275,6 +275,7 @@ typedef struct HEVCSPS {
} HEVCSPS;
typedef struct HEVCPPS {
+ unsigned int pps_id;
unsigned int sps_id; ///< seq_parameter_set_id
uint8_t sign_data_hiding_flag;
--
2.39.2
[-- Attachment #14: 0013-hevc_ps-expose-vps_id.patch --]
[-- Type: text/x-diff, Size: 1162 bytes --]
From a09e6d7611f6e89ea3107c4581b27715a7ca480d Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 06:42:44 +0100
Subject: [PATCH 13/72] hevc_ps: expose vps_id
---
libavcodec/hevc_ps.c | 2 +-
libavcodec/hevc_ps.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 3242904473..a26f2940fc 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -462,7 +462,7 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
}
memcpy(vps->data, gb->buffer, vps->data_size);
- vps_id = get_bits(gb, 4);
+ vps_id = vps->vps_id = get_bits(gb, 4);
if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits
av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 4cfcbcf9ae..571657d7fd 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -145,6 +145,7 @@ typedef struct PTL {
} PTL;
typedef struct HEVCVPS {
+ unsigned int vps_id;
HEVCHdrParams hdr[HEVC_MAX_LAYER_SETS];
uint8_t vps_temporal_id_nesting_flag;
--
2.39.2
[-- Attachment #15: 0014-hevc_ps-expose-pps_extension_present_flag.patch --]
[-- Type: text/x-diff, Size: 1512 bytes --]
From 73a6b7e49ba8f01aefe2b7c152b2e2d04edaa3ee Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 12:49:45 +0100
Subject: [PATCH 14/72] hevc_ps: expose pps_extension_present_flag
---
libavcodec/hevc_ps.c | 3 ++-
libavcodec/hevc_ps.h | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index a26f2940fc..b1247bad67 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -1659,7 +1659,8 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
pps->slice_header_extension_present_flag = get_bits1(gb);
- if (get_bits1(gb)) { // pps_extension_present_flag
+ pps->pps_extension_present_flag = get_bits1(gb);
+ if (pps->pps_extension_present_flag) {
pps->pps_range_extensions_flag = get_bits1(gb);
skip_bits(gb, 7); // pps_extension_7bits
if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 571657d7fd..f221640531 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -326,6 +326,7 @@ typedef struct HEVCPPS {
int num_extra_slice_header_bits;
uint8_t slice_header_extension_present_flag;
uint8_t log2_max_transform_skip_block_size;
+ uint8_t pps_extension_present_flag;
uint8_t pps_range_extensions_flag;
uint8_t cross_component_prediction_enabled_flag;
uint8_t chroma_qp_offset_list_enabled_flag;
--
2.39.2
[-- Attachment #16: 0015-hevcdec-expose-bits_used_for_short_term_rps.patch --]
[-- Type: text/x-diff, Size: 1228 bytes --]
From 68e33940f494112e359f6a0a769083c1dd82a1c4 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 17:11:36 +0100
Subject: [PATCH 15/72] hevcdec: expose bits_used_for_short_term_rps
---
libavcodec/hevcdec.c | 1 +
libavcodec/hevcdec.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 567e8d81d4..43cd963175 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -702,6 +702,7 @@ static int hls_slice_header(HEVCContext *s)
if (ret < 0)
return ret;
+ sh->bits_used_for_short_term_rps = pos - get_bits_left(gb);
sh->short_term_rps = &sh->slice_rps;
} else {
int numbits, rps_idx;
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 9d3f4adbb3..15c4113bdd 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -268,6 +268,7 @@ typedef struct SliceHeader {
///< RPS coded in the slice header itself is stored here
int short_term_ref_pic_set_sps_flag;
+ int bits_used_for_short_term_rps;
int short_term_ref_pic_set_size;
ShortTermRPS slice_rps;
const ShortTermRPS *short_term_rps;
--
2.39.2
[-- Attachment #17: 0016-hevc_ps-expose-vui_present-sublayer_ordering_info-co.patch --]
[-- Type: text/x-diff, Size: 4332 bytes --]
From 46f18bf6af9e8ed0aaa82085a06b31dc8565e0df Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:17:51 +0100
Subject: [PATCH 16/72] hevc_ps: expose vui_present, sublayer_ordering_info,
conformance_window_flag
---
libavcodec/hevc_ps.c | 18 ++++++++++--------
libavcodec/hevc_ps.h | 4 ++++
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index b1247bad67..a740da9f82 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -855,7 +855,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
HEVCWindow *ow;
int ret = 0;
int log2_diff_max_min_transform_block_size;
- int bit_depth_chroma, start, vui_present, sublayer_ordering_info;
+ int bit_depth_chroma, start;
int i;
// Coded parameters
@@ -904,7 +904,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
sps->height, 0, avctx)) < 0)
return ret;
- if (get_bits1(gb)) { // pic_conformance_flag
+ sps->conformance_window_flag = get_bits1(gb);
+ if (sps->conformance_window_flag) { // pic_conformance_flag
int vert_mult = hevc_sub_height_c[sps->chroma_format_idc];
int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc];
sps->pic_conf_win.left_offset = get_ue_golomb_long(gb) * horiz_mult;
@@ -951,8 +952,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
return AVERROR_INVALIDDATA;
}
- sublayer_ordering_info = get_bits1(gb);
- start = sublayer_ordering_info ? 0 : sps->max_sub_layers - 1;
+ sps->sublayer_ordering_info_flag = get_bits1(gb);
+ start = sps->sublayer_ordering_info_flag ? 0 : sps->max_sub_layers - 1;
for (i = start; i < sps->max_sub_layers; i++) {
sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1;
sps->temporal_layer[i].num_reorder_pics = get_ue_golomb_long(gb);
@@ -973,7 +974,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
}
}
- if (!sublayer_ordering_info) {
+ if (!sps->sublayer_ordering_info_flag) {
for (i = 0; i < start; i++) {
sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering;
sps->temporal_layer[i].num_reorder_pics = sps->temporal_layer[start].num_reorder_pics;
@@ -1015,7 +1016,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
if (sps->scaling_list_enable_flag) {
set_default_scaling_list_data(&sps->scaling_list);
- if (get_bits1(gb)) {
+ sps->scaling_list_data_present_flag = get_bits1(gb);
+ if (sps->scaling_list_data_present_flag) {
ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps);
if (ret < 0)
return ret;
@@ -1071,8 +1073,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
sps->sps_temporal_mvp_enabled_flag = get_bits1(gb);
sps->sps_strong_intra_smoothing_enable_flag = get_bits1(gb);
sps->vui.common.sar = (AVRational){0, 1};
- vui_present = get_bits1(gb);
- if (vui_present)
+ sps->vui_present = get_bits1(gb);
+ if (sps->vui_present)
decode_vui(gb, avctx, apply_defdispwin, sps);
if (get_bits1(gb)) { // sps_extension_flag
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index f221640531..549e0bdf57 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -184,6 +184,7 @@ typedef struct HEVCSPS {
HEVCWindow output_window;
+ int conformance_window_flag;
HEVCWindow pic_conf_win;
HEVCHdrParams hdr;
@@ -196,6 +197,7 @@ typedef struct HEVCSPS {
unsigned int log2_max_poc_lsb;
int pcm_enabled_flag;
+ int sublayer_ordering_info_flag;
int max_sub_layers;
struct {
int max_dec_pic_buffering;
@@ -204,10 +206,12 @@ typedef struct HEVCSPS {
} temporal_layer[HEVC_MAX_SUB_LAYERS];
uint8_t temporal_id_nesting_flag;
+ int vui_present;
VUI vui;
PTL ptl;
uint8_t scaling_list_enable_flag;
+ int scaling_list_data_present_flag;
ScalingList scaling_list;
unsigned int nb_st_rps;
--
2.39.2
[-- Attachment #18: 0017-hevc_ps-expose-and-parse-scc-range-extension-fields.patch --]
[-- Type: text/x-diff, Size: 7752 bytes --]
From 4645f1fb3249f8249fdebaf9b3edffc848b9af3c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:18:42 +0100
Subject: [PATCH 17/72] hevc_ps: expose and parse scc range extension fields
---
libavcodec/hevc.h | 2 ++
libavcodec/hevc_ps.c | 63 ++++++++++++++++++++++++++++++++++++++++----
libavcodec/hevc_ps.h | 26 ++++++++++++++++++
3 files changed, 86 insertions(+), 5 deletions(-)
diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index 1804755327..913c7d4e2e 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -154,6 +154,8 @@ enum {
// get near that, though, so set a lower limit here with the maximum
// possible value for 4K video (at most 135 16x16 Ctb rows).
HEVC_MAX_ENTRY_POINT_OFFSETS = HEVC_MAX_TILE_COLUMNS * 135,
+
+ HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE = 128,
};
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index a740da9f82..b03f59efef 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -856,7 +856,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
int ret = 0;
int log2_diff_max_min_transform_block_size;
int bit_depth_chroma, start;
- int i;
+ int i, j;
// Coded parameters
@@ -1077,9 +1077,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
if (sps->vui_present)
decode_vui(gb, avctx, apply_defdispwin, sps);
- if (get_bits1(gb)) { // sps_extension_flag
+ sps->sps_extension_present_flag = get_bits1(gb);
+ if (sps->sps_extension_present_flag) { // sps_extension_flag
sps->sps_range_extension_flag = get_bits1(gb);
- skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7);
+ skip_bits(gb, 2);
+ sps->sps_scc_extension_flag = get_bits1(gb);
+ skip_bits(gb, 4);
if (sps->sps_range_extension_flag) {
sps->transform_skip_rotation_enabled_flag = get_bits1(gb);
sps->transform_skip_context_enabled_flag = get_bits1(gb);
@@ -1105,6 +1108,26 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
av_log(avctx, AV_LOG_WARNING,
"cabac_bypass_alignment_enabled_flag not yet implemented\n");
}
+ if (sps->sps_scc_extension_flag) {
+ sps->sps_curr_pic_ref_enabled_flag = get_bits1(gb);
+ sps->palette_mode_enabled_flag = get_bits1(gb);
+ if (sps->palette_mode_enabled_flag) {
+ sps->palette_max_size = get_ue_golomb_long(gb);
+ sps->delta_palette_max_predictor_size = get_ue_golomb_long(gb);
+
+ sps->sps_palette_predictor_initializer_present_flag = get_bits1(gb);
+ if (sps->sps_palette_predictor_initializer_present_flag) {
+ sps->sps_num_palette_predictor_initializer_minus1 = get_ue_golomb_long(gb);
+ for (i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) {
+ for (j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++)
+ sps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb);
+ }
+ }
+ }
+
+ sps->motion_vector_resolution_control_idc = get_bits(gb, 2);
+ sps->intra_boundary_filtering_disable_flag = get_bits1(gb);
+ }
}
if (apply_defdispwin) {
sps->output_window.left_offset += sps->vui.def_disp_win.left_offset;
@@ -1446,7 +1469,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
HEVCParamSets *ps)
{
HEVCSPS *sps = NULL;
- int i, ret = 0;
+ int i, j, ret = 0;
unsigned int pps_id = 0;
ptrdiff_t nal_size;
unsigned log2_parallel_merge_level_minus2;
@@ -1664,11 +1687,41 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
pps->pps_extension_present_flag = get_bits1(gb);
if (pps->pps_extension_present_flag) {
pps->pps_range_extensions_flag = get_bits1(gb);
- skip_bits(gb, 7); // pps_extension_7bits
+ skip_bits(gb, 2);
+ pps->pps_scc_extension_flag = get_bits1(gb);
+ skip_bits(gb, 4);
if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
goto err;
}
+ if (pps->pps_scc_extension_flag) {
+ pps->pps_curr_pic_ref_enabled_flag = get_bits1(gb);
+ pps->residual_adaptive_colour_transform_enabled_flag = get_bits1(gb);
+
+ if (pps->residual_adaptive_colour_transform_enabled_flag) {
+ pps->pps_slice_act_qp_offsets_present_flag = get_bits1(gb);
+ pps->pps_act_y_qp_offset_plus5 = get_se_golomb(gb);
+ pps->pps_act_cb_qp_offset_plus5 = get_se_golomb(gb);
+ pps->pps_act_cr_qp_offset_plus3 = get_se_golomb(gb);
+ }
+
+ pps->pps_palette_predictor_initializer_present_flag = get_bits1(gb);
+ if (pps->pps_palette_predictor_initializer_present_flag) {
+ pps->pps_num_palette_predictor_initializer = get_ue_golomb_long(gb);
+ if (pps->pps_num_palette_predictor_initializer) {
+ pps->monochrome_palette_flag = get_bits1(gb);
+ pps->luma_bit_depth_entry_minus8 = get_ue_golomb_long(gb);
+
+ if (!pps->monochrome_palette_flag)
+ pps->chroma_bit_depth_entry_minus8 = get_ue_golomb_long(gb);
+
+ for (i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) {
+ for (j = 0; j < pps->pps_num_palette_predictor_initializer; j++)
+ pps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb);
+ }
+ }
+ }
+ }
}
ret = setup_pps(avctx, gb, pps, sps);
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 549e0bdf57..8dddf7ef8d 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -210,6 +210,18 @@ typedef struct HEVCSPS {
VUI vui;
PTL ptl;
+ int sps_extension_present_flag;
+ int sps_scc_extension_flag;
+ int sps_curr_pic_ref_enabled_flag;
+ int palette_mode_enabled_flag;
+ uint8_t palette_max_size;
+ uint8_t delta_palette_max_predictor_size;
+ uint8_t motion_vector_resolution_control_idc;
+ uint8_t sps_num_palette_predictor_initializer_minus1;
+ int sps_palette_predictor_initializer_present_flag;
+ int intra_boundary_filtering_disable_flag;
+ uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE];
+
uint8_t scaling_list_enable_flag;
int scaling_list_data_present_flag;
ScalingList scaling_list;
@@ -341,6 +353,20 @@ typedef struct HEVCPPS {
uint8_t log2_sao_offset_scale_luma;
uint8_t log2_sao_offset_scale_chroma;
+ int pps_scc_extension_flag;
+ int pps_curr_pic_ref_enabled_flag;
+ int residual_adaptive_colour_transform_enabled_flag;
+ int pps_slice_act_qp_offsets_present_flag;
+ int pps_palette_predictor_initializer_present_flag;
+ int pps_num_palette_predictor_initializer;
+ int monochrome_palette_flag;
+ int luma_bit_depth_entry_minus8;
+ int chroma_bit_depth_entry_minus8;
+ int pps_act_y_qp_offset_plus5;
+ int pps_act_cb_qp_offset_plus5;
+ int pps_act_cr_qp_offset_plus3;
+ uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE];
+
// Inferred parameters
unsigned int *column_width; ///< ColumnWidth
unsigned int *row_height; ///< RowHeight
--
2.39.2
[-- Attachment #19: 0018-hevc_ps-expose-log2_diff_max_min_transform_block_siz.patch --]
[-- Type: text/x-diff, Size: 3078 bytes --]
From 141df2aaa6e9e256cf5260b919fb9151982dabe0 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 7 Dec 2022 04:30:46 +0100
Subject: [PATCH 18/72] hevc_ps: expose log2_diff_max_min_transform_block_size
---
libavcodec/hevc_ps.c | 18 +++++++++---------
libavcodec/hevc_ps.h | 1 +
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index b03f59efef..2f0aff5a97 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -854,7 +854,6 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
{
HEVCWindow *ow;
int ret = 0;
- int log2_diff_max_min_transform_block_size;
int bit_depth_chroma, start;
int i, j;
@@ -982,12 +981,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
}
}
- sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3;
- sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb);
- sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2;
- log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb);
- sps->log2_max_trafo_size = log2_diff_max_min_transform_block_size +
- sps->log2_min_tb_size;
+ sps->log2_min_cb_size = get_ue_golomb_long(gb) + 3;
+ sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb);
+ sps->log2_min_tb_size = get_ue_golomb_long(gb) + 2;
+ sps->log2_diff_max_min_transform_block_size = get_ue_golomb_long(gb);
+ sps->log2_max_trafo_size = sps->log2_diff_max_min_transform_block_size +
+ sps->log2_min_tb_size;
if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) {
av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size);
@@ -1004,8 +1003,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
return AVERROR_INVALIDDATA;
}
- if (log2_diff_max_min_transform_block_size < 0 || log2_diff_max_min_transform_block_size > 30) {
- av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size);
+ if (sps->log2_diff_max_min_transform_block_size > 30) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size",
+ sps->log2_diff_max_min_transform_block_size);
return AVERROR_INVALIDDATA;
}
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 8dddf7ef8d..88e73e97c8 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -253,6 +253,7 @@ typedef struct HEVCSPS {
unsigned int log2_max_trafo_size;
unsigned int log2_ctb_size;
unsigned int log2_min_pu_size;
+ unsigned int log2_diff_max_min_transform_block_size;
int max_transform_hierarchy_depth_inter;
int max_transform_hierarchy_depth_intra;
--
2.39.2
[-- Attachment #20: 0019-hevc_ps-expose-rps-fields.patch --]
[-- Type: text/x-diff, Size: 4900 bytes --]
From b0e8756c78c95ff93b908612b76d2013f79d5c2b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:25:48 +0100
Subject: [PATCH 19/72] hevc_ps: expose rps fields
---
libavcodec/hevc_ps.c | 37 ++++++++++++++++++-------------------
libavcodec/hevc_ps.h | 7 +++++++
2 files changed, 25 insertions(+), 19 deletions(-)
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 2f0aff5a97..745a4f270e 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -100,51 +100,50 @@ static void remove_vps(HEVCParamSets *s, int id)
int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header)
{
- uint8_t rps_predict = 0;
int delta_poc;
int k0 = 0;
int k = 0;
int i;
+ rps->rps_predict = 0;
+
if (rps != sps->st_rps && sps->nb_st_rps)
- rps_predict = get_bits1(gb);
+ rps->rps_predict = get_bits1(gb);
- if (rps_predict) {
+ if (rps->rps_predict) {
const ShortTermRPS *rps_ridx;
int delta_rps;
- unsigned abs_delta_rps;
- uint8_t use_delta_flag = 0;
- uint8_t delta_rps_sign;
if (is_slice_header) {
- unsigned int delta_idx = get_ue_golomb_long(gb) + 1;
- if (delta_idx > sps->nb_st_rps) {
+ rps->delta_idx = get_ue_golomb_long(gb) + 1;
+ if (rps->delta_idx > sps->nb_st_rps) {
av_log(avctx, AV_LOG_ERROR,
"Invalid value of delta_idx in slice header RPS: %d > %d.\n",
- delta_idx, sps->nb_st_rps);
+ rps->delta_idx, sps->nb_st_rps);
return AVERROR_INVALIDDATA;
}
- rps_ridx = &sps->st_rps[sps->nb_st_rps - delta_idx];
+ rps_ridx = &sps->st_rps[sps->nb_st_rps - rps->delta_idx];
rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs;
} else
rps_ridx = &sps->st_rps[rps - sps->st_rps - 1];
- delta_rps_sign = get_bits1(gb);
- abs_delta_rps = get_ue_golomb_long(gb) + 1;
- if (abs_delta_rps < 1 || abs_delta_rps > 32768) {
+ rps->delta_rps_sign = get_bits1(gb);
+ rps->abs_delta_rps = get_ue_golomb_long(gb) + 1;
+ if (rps->abs_delta_rps > 32768) {
av_log(avctx, AV_LOG_ERROR,
"Invalid value of abs_delta_rps: %d\n",
- abs_delta_rps);
+ rps->abs_delta_rps);
return AVERROR_INVALIDDATA;
}
- delta_rps = (1 - (delta_rps_sign << 1)) * abs_delta_rps;
+ delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps;
for (i = 0; i <= rps_ridx->num_delta_pocs; i++) {
int used = rps->used[k] = get_bits1(gb);
+ rps->use_delta_flag = 0;
if (!used)
- use_delta_flag = get_bits1(gb);
+ rps->use_delta_flag = get_bits1(gb);
- if (used || use_delta_flag) {
+ if (used || rps->use_delta_flag) {
if (i < rps_ridx->num_delta_pocs)
delta_poc = delta_rps + rps_ridx->delta_poc[i];
else
@@ -210,7 +209,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
if (rps->num_delta_pocs) {
prev = 0;
for (i = 0; i < rps->num_negative_pics; i++) {
- delta_poc = get_ue_golomb_long(gb) + 1;
+ delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1;
if (delta_poc < 1 || delta_poc > 32768) {
av_log(avctx, AV_LOG_ERROR,
"Invalid value of delta_poc: %d\n",
@@ -223,7 +222,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
}
prev = 0;
for (i = 0; i < nb_positive_pics; i++) {
- delta_poc = get_ue_golomb_long(gb) + 1;
+ delta_poc = rps->delta_poc_s1[i] = get_ue_golomb_long(gb) + 1;
if (delta_poc < 1 || delta_poc > 32768) {
av_log(avctx, AV_LOG_ERROR,
"Invalid value of delta_poc: %d\n",
diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
index 88e73e97c8..3cdbf6abec 100644
--- a/libavcodec/hevc_ps.h
+++ b/libavcodec/hevc_ps.h
@@ -70,9 +70,16 @@ typedef struct HEVCHdrParams {
} HEVCHdrParams;
typedef struct ShortTermRPS {
+ uint8_t rps_predict;
+ unsigned int delta_idx;
+ uint8_t use_delta_flag;
+ uint8_t delta_rps_sign;
+ unsigned int abs_delta_rps;
unsigned int num_negative_pics;
int num_delta_pocs;
int rps_idx_num_delta_pocs;
+ int32_t delta_poc_s0[32];
+ int32_t delta_poc_s1[32];
int32_t delta_poc[32];
uint8_t used[32];
} ShortTermRPS;
--
2.39.2
[-- Attachment #21: 0020-hwcontext_vulkan-initialize-and-require-instance-ver.patch --]
[-- Type: text/x-diff, Size: 2363 bytes --]
From a35cd953f9af8f34836d53006d10e3890a30ebf1 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:15:04 +0100
Subject: [PATCH 20/72] hwcontext_vulkan: initialize and require instance
version 1.3
---
configure | 4 ++--
libavutil/hwcontext_vulkan.c | 2 +-
libavutil/hwcontext_vulkan.h | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/configure b/configure
index d38613309d..f0f15b9e87 100755
--- a/configure
+++ b/configure
@@ -7006,8 +7006,8 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.
"in maintaining it."
if enabled vulkan; then
- check_pkg_config_header_only vulkan "vulkan >= 1.2.189" "vulkan/vulkan.h" "defined VK_VERSION_1_2" ||
- check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_3) || (defined(VK_VERSION_1_2) && VK_HEADER_VERSION >= 189)"
+ check_pkg_config_header_only vulkan "vulkan >= 1.3.238" "vulkan/vulkan.h" "defined VK_VERSION_1_3" ||
+ check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) || (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 238)"
fi
if enabled x86; then
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 2a9b5f4aac..c87f39d072 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -673,7 +673,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
VkApplicationInfo application_info = {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
.pEngineName = "libavutil",
- .apiVersion = VK_API_VERSION_1_2,
+ .apiVersion = VK_API_VERSION_1_3,
.engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
LIBAVUTIL_VERSION_MINOR,
LIBAVUTIL_VERSION_MICRO),
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index df86c85b3c..70c8379dc3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -53,7 +53,7 @@ typedef struct AVVulkanDeviceContext {
PFN_vkGetInstanceProcAddr get_proc_addr;
/**
- * Vulkan instance. Must be at least version 1.2.
+ * Vulkan instance. Must be at least version 1.3.
*/
VkInstance inst;
--
2.39.2
[-- Attachment #22: 0021-hwcontext_vulkan-enable-support-for-YCbCr-samplers.patch --]
[-- Type: text/x-diff, Size: 1833 bytes --]
From f365b7902693a367d77032e13c2e099306308f44 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 02:37:14 +0100
Subject: [PATCH 21/72] hwcontext_vulkan: enable support for YCbCr samplers
---
libavutil/hwcontext_vulkan.c | 1 +
libavutil/vulkan_functions.h | 2 ++
2 files changed, 3 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c87f39d072..72850c03cf 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1378,6 +1378,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
goto end;
}
p->device_features_1_2.timelineSemaphore = 1;
+ p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
/* Setup queue family */
if ((err = setup_queue_families(ctx, &dev_info)))
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index d15a5d9a42..deb77495a2 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -155,6 +155,8 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipeline) \
\
/* Sampler */ \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSamplerYcbcrConversion) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \
\
--
2.39.2
[-- Attachment #23: 0022-hwcontext_vulkan-enable-VK_KHR_synchronization2-if-s.patch --]
[-- Type: text/x-diff, Size: 5364 bytes --]
From b6db2ca65db72b346ba08480df4a201f7e1caea9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 13 Mar 2022 09:06:06 +0100
Subject: [PATCH 22/72] hwcontext_vulkan: enable VK_KHR_synchronization2 if
supported
---
libavutil/hwcontext_vulkan.c | 17 +++++++++++++----
libavutil/vulkan_functions.h | 6 +++++-
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 72850c03cf..1d0261c8fe 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -89,6 +89,7 @@ typedef struct VulkanDevicePriv {
/* Features */
VkPhysicalDeviceVulkan11Features device_features_1_1;
VkPhysicalDeviceVulkan12Features device_features_1_2;
+ VkPhysicalDeviceVulkan13Features device_features_1_3;
/* Queues */
uint32_t qfs[5];
@@ -346,7 +347,7 @@ static const VulkanOptExtension optional_device_exts[] = {
/* Misc or required by other extensions */
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
- { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
+ { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1326,9 +1327,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
+ VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
+ .pNext = &timeline_features,
+ };
VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
- .pNext = &timeline_features,
+ .pNext = &dev_features_1_3,
};
VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
@@ -1340,8 +1345,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
};
VkDeviceCreateInfo dev_info = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
- .pNext = &hwctx->device_features,
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
};
hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -1349,6 +1353,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
p->device_features_1_1.pNext = &p->device_features_1_2;
p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
+ p->device_features_1_2.pNext = &p->device_features_1_3;
+ p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
ctx->free = vulkan_device_free;
/* Create an instance if not given one */
@@ -1379,6 +1385,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
}
p->device_features_1_2.timelineSemaphore = 1;
p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
+ p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+
+ dev_info.pNext = &hwctx->device_features;
/* Setup queue family */
if ((err = setup_queue_families(ctx, &dev_info)))
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index deb77495a2..103bff3013 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -37,6 +37,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_EXTERNAL_WIN32_MEMORY = 1ULL << 6, /* VK_KHR_external_memory_win32 */
FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */
#endif
+ FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -145,7 +146,10 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \
- \
+ \
+ /* sync2 */ \
+ MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \
+ \
/* Pipeline */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \
--
2.39.2
[-- Attachment #24: 0023-hwcontext_vulkan-support-threadsafe-queue-and-frame-.patch --]
[-- Type: text/x-diff, Size: 19170 bytes --]
From 05e94e06667f305afe181c3b318d08b4e528ce09 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 15 Mar 2022 23:00:32 +0100
Subject: [PATCH 23/72] hwcontext_vulkan: support threadsafe queue and frame
operations
---
libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
libavutil/hwcontext_vulkan.h | 40 +++++++-
2 files changed, 167 insertions(+), 49 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 1d0261c8fe..5a06a6872d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -27,6 +27,7 @@
#include <dlfcn.h>
#endif
+#include <pthread.h>
#include <unistd.h>
#include "config.h"
@@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceVulkan13Features device_features_1_3;
/* Queues */
- uint32_t qfs[5];
- int num_qfs;
+ pthread_mutex_t **qf_mutex;
+ int nb_tot_qfs;
+ uint32_t img_qfs[5];
+ int nb_img_qfs;
/* Debug callback */
VkDebugUtilsMessengerEXT debug_ctx;
@@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
} VulkanFramesPriv;
typedef struct AVVkFrameInternal {
+ pthread_mutex_t update_mutex;
+
#if CONFIG_CUDA
/* Importing external memory into cuda is really expensive so we keep the
* memory imported all the time */
@@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
if (p->libvulkan)
dlclose(p->libvulkan);
+ for (int i = 0; i < p->nb_tot_qfs; i++)
+ av_freep(&p->qf_mutex[i]);
+ av_freep(&p->qf_mutex);
+
RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
}
@@ -1436,13 +1445,26 @@ end:
return err;
}
+static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
+}
+
+static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
+}
+
static int vulkan_device_init(AVHWDeviceContext *ctx)
{
int err;
- uint32_t queue_num;
+ uint32_t qf_num;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ VkQueueFamilyProperties *qf;
int graph_index, comp_index, tx_index, enc_index, dec_index;
/* Set device extension flags */
@@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
- if (!queue_num) {
+ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
+ if (!qf_num) {
av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
return AVERROR_EXTERNAL;
}
+ qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
+ if (!qf)
+ return AVERROR(ENOMEM);
+
+ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
+
+ p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
+ if (!p->qf_mutex)
+ return AVERROR(ENOMEM);
+ p->nb_tot_qfs = qf_num;
+
+ for (int i = 0; i < qf_num; i++) {
+ p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
+ if (!p->qf_mutex[i])
+ return AVERROR(ENOMEM);
+ for (int j = 0; j < qf[i].queueCount; j++)
+ pthread_mutex_init(&p->qf_mutex[i][j], NULL);
+ }
+
graph_index = hwctx->queue_family_index;
comp_index = hwctx->queue_family_comp_index;
tx_index = hwctx->queue_family_tx_index;
@@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
return AVERROR(EINVAL); \
} else if (fidx < 0 || ctx_qf < 0) { \
break; \
- } else if (ctx_qf >= queue_num) { \
+ } else if (ctx_qf >= qf_num) { \
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
- type, ctx_qf, queue_num); \
+ type, ctx_qf, qf_num); \
return AVERROR(EINVAL); \
} \
\
@@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
- p->qfs[p->num_qfs++] = ctx_qf; \
+ p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
} while (0)
CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
@@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
#undef CHECK_QUEUE
+ if (!hwctx->lock_queue)
+ hwctx->lock_queue = lock_queue;
+ if (!hwctx->unlock_queue)
+ hwctx->unlock_queue = unlock_queue;
+
/* Get device capabilities */
vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
@@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
{
AVVkFrameInternal *internal = f->internal;
- if (!internal)
- return;
-
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
@@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
VkAccessFlags new_access;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -1944,6 +1989,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
};
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
+
+ if ((err = wait_start_exec_ctx(hwfc, ectx)))
+ return err;
+
+ vkfc->lock_frame(hwfc, frame);
+
for (int i = 0; i < planes; i++) {
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
sem_sig_val[i] = frame->sem_value[i] + 1;
@@ -1980,9 +2031,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break;
}
- if ((err = wait_start_exec_ctx(hwfc, ectx)))
- return err;
-
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
@@ -2008,7 +2056,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, NULL, 0, NULL, planes, img_bar);
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ vkfc->unlock_frame(hwfc, frame);
+
+ return err;
}
static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
@@ -2090,10 +2141,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
@@ -2117,6 +2168,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
return AVERROR_EXTERNAL;
}
+ f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@@ -2161,10 +2213,10 @@ static void try_export_flags(AVHWFramesContext *hwfc,
VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
.pNext = NULL,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
VkPhysicalDeviceExternalImageFormatInfo enext = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
@@ -2259,6 +2311,16 @@ fail:
return NULL;
}
+static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+ pthread_mutex_lock(&vkf->internal->update_mutex);
+}
+
+static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+ pthread_mutex_unlock(&vkf->internal->update_mutex);
+}
+
static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
{
VulkanFramesPriv *fp = hwfc->internal->priv;
@@ -2421,6 +2483,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return AVERROR(ENOMEM);
}
+ if (!hwctx->lock_frame)
+ hwctx->lock_frame = lock_frame;
+ if (!hwctx->unlock_frame)
+ hwctx->unlock_frame = unlock_frame;
+
return 0;
}
@@ -2727,10 +2794,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
/* Image format verification */
@@ -2809,6 +2876,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
* offer us anything we could import and sync with, so instead
* just signal the semaphore we created. */
+ f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@@ -3017,20 +3085,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
CU_AD_FORMAT_UNSIGNED_INT8;
dst_f = (AVVkFrame *)frame->data[0];
-
dst_int = dst_f->internal;
- if (!dst_int || !dst_int->cuda_fc_ref) {
- if (!dst_f->internal)
- dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
-
- if (!dst_int)
- return AVERROR(ENOMEM);
+ if (!dst_int->cuda_fc_ref) {
dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
- if (!dst_int->cuda_fc_ref) {
- av_freep(&dst_f->internal);
+ if (!dst_int->cuda_fc_ref)
return AVERROR(ENOMEM);
- }
for (int i = 0; i < planes; i++) {
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
@@ -3704,13 +3764,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
return err;
}
-static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
+static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
AVBufferRef **bufs, size_t *buf_offsets,
const int *buf_stride, int w,
int h, enum AVPixelFormat pix_fmt, int to_buf)
{
int err;
AVVkFrame *frame = (AVVkFrame *)f->data[0];
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
@@ -3745,11 +3806,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
.waitSemaphoreCount = planes,
};
- for (int i = 0; i < planes; i++)
- sem_signal_values[i] = frame->sem_value[i] + 1;
+ vkfc->lock_frame(hwfc, frame);
if ((err = wait_start_exec_ctx(hwfc, ectx)))
- return err;
+ goto end;
+
+ for (int i = 0; i < planes; i++)
+ sem_signal_values[i] = frame->sem_value[i] + 1;
/* Change the image layout to something more optimal for transfers */
for (int i = 0; i < planes; i++) {
@@ -3824,14 +3887,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
if (!f->buf[ref])
break;
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
- return err;
+ goto end;
}
if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
- return err;
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
+ goto end;
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
} else {
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
}
+
+end:
+ vkfc->unlock_frame(hwfc, frame);
+ return err;
}
static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
@@ -3960,8 +4027,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
}
/* Copy buffers into/from image */
- err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
- swf->width, swf->height, swf->format, from);
+ err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
+ tmp.linesize, swf->width, swf->height, swf->format,
+ from);
if (from) {
/* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
@@ -4142,7 +4210,19 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
AVVkFrame *av_vk_frame_alloc(void)
{
- return av_mallocz(sizeof(AVVkFrame));
+ AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
+ if (!f)
+ return NULL;
+
+ f->internal = av_mallocz(sizeof(*f->internal));
+ if (!f->internal) {
+ av_free(f);
+ return NULL;
+ }
+
+ pthread_mutex_init(&f->internal->update_mutex, NULL);
+
+ return f;
}
const HWContextType ff_hwcontext_type_vulkan = {
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 70c8379dc3..406d8709c3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -27,6 +27,8 @@
#include "pixfmt.h"
#include "frame.h"
+typedef struct AVVkFrame AVVkFrame;
+
/**
* @file
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext {
*/
int queue_family_decode_index;
int nb_decode_queues;
+
+ /**
+ * Locks a queue, preventing other threads from submitting any command
+ * buffers to this queue.
+ * If set to NULL, will be set to lavu-internal functions that utilize a
+ * mutex.
+ */
+ void (*lock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index);
+
+ /**
+ * Similar to lock_queue(), unlocks a queue. Must only be called after locking.
+ */
+ void (*unlock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index);
} AVVulkanDeviceContext;
/**
@@ -195,6 +210,23 @@ typedef struct AVVulkanFramesContext {
* av_hwframe_ctx_init().
*/
AVVkFrameFlags flags;
+
+ /**
+ * Locks a frame, preventing other threads from changing frame properties.
+ * If set to NULL, will be set to lavu-internal functions that utilize a
+ * mutex.
+ * Users SHOULD only ever lock just before command submission in order
+ * to get accurate frame properties, and unlock immediately after command
+ * submission without waiting for it to finish.
+ *
+ * If unset, will be set to lavu-internal functions that utilize a mutex.
+ */
+ void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
+
+ /**
+ * Similar to lock_frame(), unlocks a frame. Must only be called after locking.
+ */
+ void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
} AVVulkanFramesContext;
/*
@@ -210,7 +242,7 @@ typedef struct AVVulkanFramesContext {
* @note the size of this structure is not part of the ABI, to allocate
* you must use @av_vk_frame_alloc().
*/
-typedef struct AVVkFrame {
+struct AVVkFrame {
/**
* Vulkan images to which the memory is bound to.
*/
@@ -264,6 +296,12 @@ typedef struct AVVkFrame {
* Describes the binding offset of each plane to the VkDeviceMemory.
*/
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
+
+ /**
+ * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
+ * the image was allocated with the CONCURRENT concurrency option.
+ */
+ uint32_t queue_family[AV_NUM_DATA_POINTERS];
} AVVkFrame;
/**
--
2.39.2
[-- Attachment #25: 0024-hwcontext_vulkan-remove-contiguous-memory-hack.patch --]
[-- Type: text/x-diff, Size: 2600 bytes --]
From 197e5cfa63a2356a64ac6ae20024fa98fda26f43 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 20:32:49 +0100
Subject: [PATCH 24/72] hwcontext_vulkan: remove contiguous memory hack
---
libavutil/hwcontext_vulkan.c | 12 ------------
libavutil/hwcontext_vulkan.h | 4 +---
2 files changed, 1 insertion(+), 15 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 5a06a6872d..ab5b24f10c 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -112,9 +112,6 @@ typedef struct VulkanDevicePriv {
/* Nvidia */
int dev_is_nvidia;
-
- /* Intel */
- int dev_is_intel;
} VulkanDevicePriv;
typedef struct VulkanFramesPriv {
@@ -1501,7 +1498,6 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->hprops.minImportedHostPointerAlignment);
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
- p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
if (!qf_num) {
@@ -1620,8 +1616,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
return AVERROR_EXTERNAL;
}
- if (strstr(vendor, "Intel"))
- dev_select.vendor_id = 0x8086;
if (strstr(vendor, "AMD"))
dev_select.vendor_id = 0x1002;
@@ -2356,12 +2350,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
if (!hwctx->usage)
hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
- if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) {
- if (p->contiguous_planes == 1 ||
- ((p->contiguous_planes == -1) && p->dev_is_intel))
- hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY;
- }
-
modifier_info = vk_find_struct(hwctx->create_pnext,
VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 406d8709c3..e89fa52927 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags {
* device and tiling during av_hwframe_ctx_init(). */
AV_VK_FRAME_FLAG_NONE = (1ULL << 0),
- /* Image planes will be allocated in a single VkDeviceMemory, rather
- * than as per-plane VkDeviceMemory allocations. Required for exporting
- * to VAAPI on Intel devices. */
+ /* DEPRECATED: does nothing. */
AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),
} AVVkFrameFlags;
--
2.39.2
[-- Attachment #26: 0025-hwcontext_vulkan-rename-vk_pixfmt_map-to-vk_pixfmt_p.patch --]
[-- Type: text/x-diff, Size: 1383 bytes --]
From 28903a643a7db85e6eef289a853a03b33b67be41 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 20:35:51 +0100
Subject: [PATCH 25/72] hwcontext_vulkan: rename vk_pixfmt_map to
vk_pixfmt_planar_map
---
libavutil/hwcontext_vulkan.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index ab5b24f10c..de5575c031 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal {
static const struct {
enum AVPixelFormat pixfmt;
- const VkFormat vkfmts[4];
-} vk_pixfmt_map[] = {
+ const VkFormat vkfmts[5];
+} vk_pixfmt_planar_map[] = {
{ AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
{ AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
{ AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
@@ -244,9 +244,9 @@ static const struct {
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
{
- for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
- if (vk_pixfmt_map[i].pixfmt == p)
- return vk_pixfmt_map[i].vkfmts;
+ for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++)
+ if (vk_pixfmt_planar_map[i].pixfmt == p)
+ return vk_pixfmt_planar_map[i].vkfmts;
return NULL;
}
--
2.39.2
[-- Attachment #27: 0026-hwcontext_vulkan-fix-minor-type-issue-in-VulkanQueue.patch --]
[-- Type: text/x-diff, Size: 772 bytes --]
From a62f75557a8b2d64fe88670b823d1e8500504bd2 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:52:15 +0100
Subject: [PATCH 26/72] hwcontext_vulkan: fix minor type issue in
VulkanQueueCtx.buf_deps_alloc_size
---
libavutil/hwcontext_vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index de5575c031..8141e8c310 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -66,7 +66,7 @@ typedef struct VulkanQueueCtx {
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
- int buf_deps_alloc_size;
+ unsigned int buf_deps_alloc_size;
} VulkanQueueCtx;
typedef struct VulkanExecCtx {
--
2.39.2
[-- Attachment #28: 0027-hwcontext_vulkan-report-nonCoherentAtomSize.patch --]
[-- Type: text/x-diff, Size: 1140 bytes --]
From 0dec881653e9c9434a1b06ea212735a4c7b9caf8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 28 Dec 2022 05:55:17 +0100
Subject: [PATCH 27/72] hwcontext_vulkan: report nonCoherentAtomSize
---
libavutil/hwcontext_vulkan.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 8141e8c310..7e63c2350c 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1493,6 +1493,8 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n",
p->props.properties.limits.minMemoryMapAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n",
+ p->props.properties.limits.nonCoherentAtomSize);
if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
p->hprops.minImportedHostPointerAlignment);
--
2.39.2
[-- Attachment #29: 0028-hwcontext_vulkan-add-support-for-descriptor-buffers.patch --]
[-- Type: text/x-diff, Size: 6084 bytes --]
From a028bdcd05284bfb306558212646a309e2da4c24 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:15:02 +0100
Subject: [PATCH 28/72] hwcontext_vulkan: add support for descriptor buffers
---
libavutil/hwcontext_vulkan.c | 13 ++++++++++++-
libavutil/vulkan_functions.h | 9 +++++++++
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 7e63c2350c..60ff11ad3d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -91,6 +91,7 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceVulkan11Features device_features_1_1;
VkPhysicalDeviceVulkan12Features device_features_1_2;
VkPhysicalDeviceVulkan13Features device_features_1_3;
+ VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features;
/* Queues */
pthread_mutex_t **qf_mutex;
@@ -350,6 +351,7 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 },
+ { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1333,9 +1335,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
+ VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT,
+ .pNext = &timeline_features,
+ };
VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
- .pNext = &timeline_features,
+ .pNext = &desc_buf_features,
};
VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
@@ -1361,6 +1367,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
p->device_features_1_2.pNext = &p->device_features_1_3;
p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
+ p->device_features_1_3.pNext = &p->desc_buf_features;
+ p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT;
ctx->free = vulkan_device_free;
/* Create an instance if not given one */
@@ -1390,8 +1398,11 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
goto end;
}
p->device_features_1_2.timelineSemaphore = 1;
+ p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress;
p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+ p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer;
+ p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors;
dev_info.pNext = &hwctx->device_features;
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 103bff3013..f8739da8e5 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -38,6 +38,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */
#endif
FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
+ FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -121,6 +122,7 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \
\
/* Image */ \
@@ -142,6 +144,13 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorPool) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorSetLayout) \
\
+ /* Descriptor buffers */ \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutSizeEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutBindingOffsetEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdBindDescriptorBuffersEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdSetDescriptorBufferOffsetsEXT) \
+ \
/* DescriptorUpdateTemplate */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \
--
2.39.2
[-- Attachment #30: 0029-hwcontext_vulkan-add-functions-for-video-decoding.patch --]
[-- Type: text/x-diff, Size: 6637 bytes --]
From cc5ef22f90cc48ee604f6a27d28bb05237b9f2b7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:34:36 +0100
Subject: [PATCH 29/72] hwcontext_vulkan: add functions for video decoding
---
libavutil/hwcontext_vulkan.c | 6 ++++++
libavutil/vulkan.c | 8 +++++---
libavutil/vulkan_functions.h | 20 ++++++++++++++++++++
libavutil/vulkan_loader.h | 4 ++++
4 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 60ff11ad3d..c0e35d8d78 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -363,6 +363,12 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
#endif
+
+ /* Video encoding/decoding */
+ { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
+ { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
+ { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
+ { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
};
/* Converts return values to strings */
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 403f0b1f27..6bf2c214b7 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -85,9 +85,11 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
CASE(VK_ERROR_VALIDATION_FAILED_EXT);
CASE(VK_ERROR_INVALID_SHADER_NV);
- CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
- CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
- CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
default: return "Unknown error";
}
#undef CASE
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index f8739da8e5..65ab560d21 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -39,6 +39,10 @@ typedef enum FFVulkanExtensions {
#endif
FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */
+ FF_VK_EXT_VIDEO_QUEUE = 1ULL << 10, /* VK_KHR_video_queue */
+ FF_VK_EXT_VIDEO_DECODE_QUEUE = 1ULL << 11, /* VK_KHR_video_decode_queue */
+ FF_VK_EXT_VIDEO_DECODE_H264 = 1ULL << 12, /* VK_EXT_video_decode_h264 */
+ FF_VK_EXT_VIDEO_DECODE_H265 = 1ULL << 13, /* VK_EXT_video_decode_h265 */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -60,6 +64,8 @@ typedef enum FFVulkanExtensions {
MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDevice) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFeatures2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties) \
+ MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoCapabilitiesKHR) \
+ MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoFormatPropertiesKHR) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, DeviceWaitIdle) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDevice) \
\
@@ -159,6 +165,20 @@ typedef enum FFVulkanExtensions {
/* sync2 */ \
MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \
\
+ /* Video queue */ \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionParametersKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, GetVideoSessionMemoryRequirementsKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, BindVideoSessionMemoryKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdBeginVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdControlVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdEndVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionParametersKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionKHR) \
+ \
+ /* Video decoding */ \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \
+ \
/* Pipeline */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 3f1ee6aa46..5385e398bf 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -48,6 +48,10 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
#endif
+ { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
+ { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
+ { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
+ { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
};
FFVulkanExtensions mask = 0x0;
--
2.39.2
[-- Attachment #31: 0030-hwcontext_vulkan-support-PREP_MODE_DECODING-in-prepa.patch --]
[-- Type: text/x-diff, Size: 5554 bytes --]
From 506c7daa8423efd56296868cce017642235b6186 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:18:21 +0100
Subject: [PATCH 30/72] hwcontext_vulkan: support PREP_MODE_DECODING in
prepare_frame()
---
libavutil/hwcontext_vulkan.c | 70 ++++++++++++++++++++++++++----------
1 file changed, 51 insertions(+), 19 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c0e35d8d78..e7c14fad74 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1969,7 +1969,9 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
enum PrepMode {
PREP_MODE_WRITE,
PREP_MODE_EXTERNAL_EXPORT,
- PREP_MODE_EXTERNAL_IMPORT
+ PREP_MODE_EXTERNAL_IMPORT,
+ PREP_MODE_DECODING_DST,
+ PREP_MODE_DECODING_DPB,
};
static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
@@ -1978,7 +1980,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
int err;
uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
- VkAccessFlags new_access;
+ VkAccessFlags2 new_access;
AVVulkanFramesContext *vkfc = hwfc->hwctx;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
@@ -1986,7 +1988,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
- VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+ VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+ VkDependencyInfo dep_info;
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -2042,32 +2045,55 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
s_info.pWaitDstStageMask = wait_st;
s_info.waitSemaphoreCount = planes;
break;
+ case PREP_MODE_DECODING_DST:
+ new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
+ new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
+ dst_qf = VK_QUEUE_FAMILY_IGNORED;
+ break;
+ case PREP_MODE_DECODING_DPB:
+ new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
+ new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
+ dst_qf = VK_QUEUE_FAMILY_IGNORED;
+ break;
}
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
for (int i = 0; i < planes; i++) {
- img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
- img_bar[i].srcAccessMask = 0x0;
- img_bar[i].dstAccessMask = new_access;
- img_bar[i].oldLayout = frame->layout[i];
- img_bar[i].newLayout = new_layout;
- img_bar[i].srcQueueFamilyIndex = src_qf;
- img_bar[i].dstQueueFamilyIndex = dst_qf;
- img_bar[i].image = frame->img[i];
- img_bar[i].subresourceRange.levelCount = 1;
- img_bar[i].subresourceRange.layerCount = 1;
- img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ img_bar[i] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
+ .srcAccessMask = 0x0,
+ .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+ .dstAccessMask = new_access,
+ .oldLayout = frame->layout[i],
+ .newLayout = new_layout,
+ .srcQueueFamilyIndex = src_qf,
+ .dstQueueFamilyIndex = dst_qf,
+ .image = frame->img[i],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
frame->layout[i] = img_bar[i].newLayout;
frame->access[i] = img_bar[i].dstAccessMask;
}
- vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
- VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, 0, NULL, 0, NULL, planes, img_bar);
+ dep_info = (VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = planes,
+ };
+
+ vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
vkfc->unlock_frame(hwfc, frame);
@@ -2308,7 +2334,13 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
if (err)
goto fail;
- err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
+ if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
+ !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DPB);
+ else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DST);
+ else
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
if (err)
goto fail;
--
2.39.2
[-- Attachment #32: 0031-vulkan-lock-queues-before-submitting-operations.patch --]
[-- Type: text/x-diff, Size: 1087 bytes --]
From 6da405c60b7b04895a4395f5e226e8cc60e6552e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:04:28 +0100
Subject: [PATCH 31/72] vulkan: lock queues before submitting operations
---
libavutil/vulkan.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 6bf2c214b7..ad13b8f3cb 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
+ s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
+ e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
+
ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
+
+ s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
+ e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
+
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
ff_vk_ret2str(ret));
--
2.39.2
[-- Attachment #33: 0032-vulkan-define-VK_NO_PROTOTYPES.patch --]
[-- Type: text/x-diff, Size: 573 bytes --]
From 69c6d3dff6040feb9192be9364b064cce340ef3a Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:04:48 +0100
Subject: [PATCH 32/72] vulkan: define VK_NO_PROTOTYPES
---
libavutil/vulkan.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index d1ea1e24fb..7927b04454 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -19,6 +19,8 @@
#ifndef AVUTIL_VULKAN_H
#define AVUTIL_VULKAN_H
+#define VK_NO_PROTOTYPES
+
#include "pixdesc.h"
#include "bprint.h"
#include "hwcontext.h"
--
2.39.2
[-- Attachment #34: 0033-vulkan-add-additional-error-codes.patch --]
[-- Type: text/x-diff, Size: 1553 bytes --]
From 3049e9213948926ec2a3f42808f065c336eb0126 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:54:35 +0100
Subject: [PATCH 33/72] vulkan: add additional error codes
---
libavutil/vulkan.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ad13b8f3cb..f2846e628a 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -78,6 +78,12 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_TOO_MANY_OBJECTS);
CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_UNKNOWN);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_FRAGMENTATION);
+ CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
+ CASE(VK_PIPELINE_COMPILE_REQUIRED);
CASE(VK_ERROR_SURFACE_LOST_KHR);
CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
CASE(VK_SUBOPTIMAL_KHR);
@@ -90,6 +96,13 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
+ CASE(VK_ERROR_NOT_PERMITTED_KHR);
+ CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
+ CASE(VK_THREAD_IDLE_KHR);
+ CASE(VK_THREAD_DONE_KHR);
+ CASE(VK_OPERATION_DEFERRED_KHR);
+ CASE(VK_OPERATION_NOT_DEFERRED_KHR);
default: return "Unknown error";
}
#undef CASE
--
2.39.2
[-- Attachment #35: 0034-vulkan-fix-comment-statement-about-exec_queue-blocki.patch --]
[-- Type: text/x-diff, Size: 919 bytes --]
From 630be2276afccbac78976d7c8a0f3662b72de248 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 21:41:59 +0100
Subject: [PATCH 34/72] vulkan: fix comment statement about exec_queue blocking
---
libavutil/vulkan.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 7927b04454..a8aa9d8a8b 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -386,9 +386,7 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
VkPipelineStageFlagBits in_wait_dst_flag);
/**
- * Submits a command buffer to the queue for execution.
- * Will block until execution has finished in order to simplify resource
- * management.
+ * Submits a command buffer to the queue for execution. Will not block.
*/
int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
--
2.39.2
[-- Attachment #36: 0035-vulkan-add-pNext-argument-to-ff_vk_create_buf.patch --]
[-- Type: text/x-diff, Size: 3809 bytes --]
From d9c9bfa670126ea72a95a1808beb6bd0883cbb98 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 17 Mar 2022 12:23:56 +0100
Subject: [PATCH 35/72] vulkan: add pNext argument to ff_vk_create_buf()
---
libavfilter/vf_gblur_vulkan.c | 2 +-
libavfilter/vf_overlay_vulkan.c | 2 +-
libavfilter/vf_scale_vulkan.c | 2 +-
libavutil/vulkan.c | 4 ++--
libavutil/vulkan.h | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index d61f3c778c..c6360799a7 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -174,7 +174,7 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
- RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize,
+ RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index e87ee83000..bdf231f4ef 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -181,7 +181,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
} *par;
err = ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par),
+ sizeof(*par), NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err)
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index c140420896..31dc35569b 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -253,7 +253,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}
RET(ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par),
+ sizeof(*par), NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index f2846e628a..ae6adc5104 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -205,7 +205,7 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return 0;
}
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
int err;
@@ -215,7 +215,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
VkBufferCreateInfo buf_spawn = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = NULL,
+ .pNext = pNext,
.usage = usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.size = size, /* Gets FFALIGNED during alloc if host visible
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index a8aa9d8a8b..2311928a8c 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -393,7 +393,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
/**
* Create a VkBuffer with the specified parameters.
*/
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
--
2.39.2
[-- Attachment #37: 0036-vulkan-add-ff_vk_qf_fill.patch --]
[-- Type: text/x-diff, Size: 2777 bytes --]
From da581e95cea93e9b628263aa28de945828f71967 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:03:58 +0100
Subject: [PATCH 36/72] vulkan: add ff_vk_qf_fill()
---
libavutil/vulkan.c | 25 +++++++++++++++++++++++++
libavutil/vulkan.h | 9 +++++++++
2 files changed, 34 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ae6adc5104..eceef295a8 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,6 +108,31 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
+void ff_vk_qf_fill(FFVulkanContext *s)
+{
+ s->nb_qfs = 0;
+
+ /* Simply fills in all unique queues into s->qfs */
+ if (s->hwctx->queue_family_index >= 0)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
+ if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
+ if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
+ s->qfs[1] != s->hwctx->queue_family_comp_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
+ if (s->hwctx->queue_family_decode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_decode_index &&
+ s->qfs[1] != s->hwctx->queue_family_decode_index &&
+ s->qfs[2] != s->hwctx->queue_family_decode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
+ if (s->hwctx->queue_family_encode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_encode_index &&
+ s->qfs[1] != s->hwctx->queue_family_encode_index &&
+ s->qfs[2] != s->hwctx->queue_family_encode_index &&
+ s->qfs[3] != s->hwctx->queue_family_encode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+}
+
void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family, int nb_queues)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 2311928a8c..7254c21cf7 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -203,6 +203,9 @@ typedef struct FFVulkanContext {
AVHWFramesContext *frames;
AVVulkanFramesContext *hwfc;
+ uint32_t qfs[5];
+ int nb_qfs;
+
FFVkSPIRVCompiler *spirv_compiler;
/* Properties */
@@ -245,6 +248,12 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
*/
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
+/**
+ * Setup the queue families from the hardware device context.
+ * Necessary for image creation to work.
+ */
+void ff_vk_qf_fill(FFVulkanContext *s);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
--
2.39.2
[-- Attachment #38: 0037-vulkan-add-ff_vk_image_create.patch --]
[-- Type: text/x-diff, Size: 4892 bytes --]
From 9da56b3fc3169588f97f590abeecb7ead3c18202 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:05:59 +0100
Subject: [PATCH 37/72] vulkan: add ff_vk_image_create()
---
libavutil/vulkan.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++
libavutil/vulkan.h | 11 ++++++
2 files changed, 100 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index eceef295a8..212f134466 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -401,6 +401,95 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
+int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
+ int width, int height, VkFormat fmt, VkImageTiling tiling,
+ VkImageUsageFlagBits usage, VkImageCreateFlags flags,
+ void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
+{
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ AVVulkanDeviceContext *hwctx = s->hwctx;
+
+ VkExportSemaphoreCreateInfo ext_sem_info = {
+ .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
+#ifdef _WIN32
+ .handleTypes = IsWindows8OrGreater()
+ ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
+ : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
+#else
+ .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+#endif
+ };
+
+ VkSemaphoreTypeCreateInfo sem_type_info = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+#ifdef _WIN32
+ .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
+#else
+ .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
+#endif
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+ .initialValue = 0,
+ };
+
+ VkSemaphoreCreateInfo sem_spawn = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = &sem_type_info,
+ };
+
+ /* Create the image */
+ VkImageCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = create_pnext,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = fmt,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .flags = flags,
+ .tiling = tiling,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = usage,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .pQueueFamilyIndices = s->qfs,
+ .queueFamilyIndexCount = s->nb_qfs,
+ .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
+ };
+
+ ret = vk->CreateImage(hwctx->act_dev, &create_info,
+ hwctx->alloc, &f->img[0]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ /* Create semaphore */
+ ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
+ hwctx->alloc, &f->sem[0]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
+ f->layout[0] = create_info.initialLayout;
+ f->access[0] = 0x0;
+ f->sem_value[0] = 0;
+
+ f->flags = 0x0;
+ f->tiling = tiling;
+
+ return 0;
+
+fail:
+ return err;
+}
+
int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
VkShaderStageFlagBits stage)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 7254c21cf7..69c099fa8f 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -423,6 +423,17 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
*/
void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
+/**
+ * Creates an image, allocates and binds memory in the given
+ * idx value of the dst frame. If mem is non-NULL, then no memory will be
+ * allocated, but instead the given memory will be bound to the image.
+ */
+int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx,
+ int width, int height, VkFormat fmt, VkImageTiling tiling,
+ VkImageUsageFlagBits usage, VkImageCreateFlags flags,
+ void *create_pnext,
+ VkDeviceMemory *mem, void *alloc_pnext);
+
/**
* Frees the main Vulkan context.
*/
--
2.39.2
[-- Attachment #39: 0038-vulkan-expose-ff_vk_alloc_mem.patch --]
[-- Type: text/x-diff, Size: 2666 bytes --]
From 661af851afe7dcb3c2982fab953aff2941b4e5b9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:03:34 +0100
Subject: [PATCH 38/72] vulkan: expose ff_vk_alloc_mem()
---
libavutil/vulkan.c | 15 ++++++++-------
libavutil/vulkan.h | 7 +++++++
2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 212f134466..7870de351d 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -174,9 +174,9 @@ void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
}
-static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
- VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
int index = -1;
@@ -225,7 +225,8 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return AVERROR(ENOMEM);
}
- *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
+ if (mem_flags)
+ *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
return 0;
}
@@ -279,9 +280,9 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
if (use_ded_mem)
ded_alloc.buffer = buf->buf;
- err = vk_alloc_mem(s, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &buf->flags, &buf->mem);
+ err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &buf->flags, &buf->mem);
if (err)
return err;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 69c099fa8f..afc8bce999 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -254,6 +254,13 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
*/
void ff_vk_qf_fill(FFVulkanContext *s);
+/**
+ * Allocate device memory.
+ */
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
--
2.39.2
[-- Attachment #40: 0039-vulkan-support-ignoring-memory-properties-when-alloc.patch --]
[-- Type: text/x-diff, Size: 1648 bytes --]
From e2a8084132631c8fad25aa5a2850deb904e42847 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 29 Nov 2022 00:43:19 +0000
Subject: [PATCH 39/72] vulkan: support ignoring memory properties when
allocating
---
libavutil/vulkan.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7870de351d..b1553c6537 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -188,7 +188,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
};
/* Align if we need to */
- if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
@@ -201,7 +201,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
continue;
/* The memory type flags must include our properties */
- if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ if ((req_flags != UINT32_MAX) &&
+ ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
continue;
/* Found a suitable memory type */
@@ -210,7 +211,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
}
if (index < 0) {
- av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
req_flags);
return AVERROR(EINVAL);
}
--
2.39.2
[-- Attachment #41: 0040-vulkan-allow-alloc-pNext-in-ff_vk_create_buf.patch --]
[-- Type: text/x-diff, Size: 1878 bytes --]
From 6ac7455f51f0ea1d68b4be2c8cf3ef6f5ca9abde Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 17:43:27 +0100
Subject: [PATCH 40/72] vulkan: allow alloc pNext in ff_vk_create_buf
---
libavutil/vulkan.c | 5 +++--
libavutil/vulkan.h | 3 ++-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index b1553c6537..0bb5b1eebf 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return 0;
}
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
int err;
@@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
};
VkMemoryDedicatedAllocateInfo ded_alloc = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = NULL,
+ .pNext = alloc_pNext,
};
VkMemoryDedicatedRequirements ded_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index afc8bce999..65f24ca138 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -409,7 +409,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
/**
* Create a VkBuffer with the specified parameters.
*/
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
--
2.39.2
[-- Attachment #42: 0041-vulkan-do-not-wait-for-device-idle-when-destroying-b.patch --]
[-- Type: text/x-diff, Size: 786 bytes --]
From 8ce981bb551f37d27f9a11a36c4af7eb007011cb Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 16 Dec 2022 00:37:53 +0100
Subject: [PATCH 41/72] vulkan: do not wait for device idle when destroying
buffers
This should be done explicitly.
---
libavutil/vulkan.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 0bb5b1eebf..0250f5aa39 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -396,8 +396,6 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
if (!buf || !s->hwctx)
return;
- vk->DeviceWaitIdle(s->hwctx->act_dev);
-
if (buf->buf != VK_NULL_HANDLE)
vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
if (buf->mem != VK_NULL_HANDLE)
--
2.39.2
[-- Attachment #43: 0042-vulkan-add-size-tracking-to-buffer-structs.patch --]
[-- Type: text/x-diff, Size: 964 bytes --]
From 0f532a85d9d3fd09d8f35f61911edc8827ed26c0 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 16 Dec 2022 01:47:42 +0100
Subject: [PATCH 42/72] vulkan: add size tracking to buffer structs
---
libavutil/vulkan.c | 2 ++
libavutil/vulkan.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 0250f5aa39..faf5cd5508 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -295,6 +295,8 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
return AVERROR_EXTERNAL;
}
+ buf->size = size;
+
return 0;
}
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 65f24ca138..c993263324 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -94,6 +94,7 @@ typedef struct FFVkBuffer {
VkBuffer buf;
VkDeviceMemory mem;
VkMemoryPropertyFlagBits flags;
+ size_t size;
} FFVkBuffer;
typedef struct FFVkQueueFamilyCtx {
--
2.39.2
[-- Attachment #44: 0043-vulkan-use-device-properties-2-and-add-a-convenience.patch --]
[-- Type: text/x-diff, Size: 2388 bytes --]
From 054c1925dd67a5918fd42b894bb4ca966e60aec8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 19 Dec 2022 07:57:22 +0100
Subject: [PATCH 43/72] vulkan: use device properties 2 and add a convenience
loader function
---
libavutil/vulkan.c | 18 +++++++++++++++++-
libavutil/vulkan.h | 8 +++++++-
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index faf5cd5508..8a583248d1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,6 +108,22 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
+void ff_vk_load_props(FFVulkanContext *s)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ s->driver_props = (VkPhysicalDeviceDriverProperties) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+ };
+ s->props = (VkPhysicalDeviceProperties2) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
+ .pNext = &s->driver_props,
+ };
+
+ vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
+ vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+}
+
void ff_vk_qf_fill(FFVulkanContext *s)
{
s->nb_qfs = 0;
@@ -189,7 +205,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
/* Align if we need to */
if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
- req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
+ req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index c993263324..0f6efd023e 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -193,7 +193,8 @@ typedef struct FFVulkanContext {
FFVulkanFunctions vkfn;
FFVulkanExtensions extensions;
- VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceProperties2 props;
+ VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
AVBufferRef *device_ref;
@@ -239,6 +240,11 @@ extern const VkComponentMapping ff_comp_identity_map;
*/
const char *ff_vk_ret2str(VkResult res);
+/**
+ * Loads props/mprops/driver_props
+ */
+void ff_vk_load_props(FFVulkanContext *s);
+
/**
* Returns 1 if the image is any sort of supported RGB
*/
--
2.39.2
[-- Attachment #45: 0044-vulkan-minor-indent-fix-add-support-for-synchronous-.patch --]
[-- Type: text/x-diff, Size: 2945 bytes --]
From 834645640497d6e371fa50c40ee9ef9700494851 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 05:02:50 +0100
Subject: [PATCH 44/72] vulkan: minor indent fix, add support for synchronous
submission/waiting
---
libavutil/vulkan.c | 20 ++++++++++++++++++--
libavutil/vulkan.h | 9 +++++++++
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 8a583248d1..b5e08ecc46 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -564,7 +564,7 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
/* Create command pool */
ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
- s->hwctx->alloc, &e->pool);
+ s->hwctx->alloc, &e->pool);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
ff_vk_ret2str(ret));
@@ -631,11 +631,13 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
- } else {
+ } else if (!q->synchronous) {
vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
}
+ q->synchronous = 0;
+
/* Discard queue dependencies */
ff_vk_discard_exec_deps(e);
@@ -788,9 +790,23 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
for (int i = 0; i < e->sem_sig_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
+ q->submitted = 1;
+
return 0;
}
+void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ if (!q->submitted)
+ return;
+
+ vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+ vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+ q->synchronous = 1;
+}
+
int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 0f6efd023e..9ee9469305 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -143,6 +143,9 @@ typedef struct FFVkQueueCtx {
VkFence fence;
VkQueue queue;
+ int synchronous;
+ int submitted;
+
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
@@ -413,6 +416,12 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
*/
int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
+/**
+ * Wait on a command buffer's execution. Mainly useful for debugging and
+ * development.
+ */
+void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e);
+
/**
* Create a VkBuffer with the specified parameters.
*/
--
2.39.2
[-- Attachment #46: 0045-vulkan-add-support-for-queries.patch --]
[-- Type: text/x-diff, Size: 7363 bytes --]
From f97d922f523914c0d1e9748876aa3002e0f5811c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 05:03:32 +0100
Subject: [PATCH 45/72] vulkan: add support for queries
---
libavutil/vulkan.c | 118 +++++++++++++++++++++++++++++++++++++++++++++
libavutil/vulkan.h | 30 ++++++++++++
2 files changed, 148 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index b5e08ecc46..de0c300c0e 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -592,6 +592,114 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
return 0;
}
+int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
+ int nb_queries, VkQueryType type,
+ int elem_64bits, void *create_pnext)
+{
+ VkResult ret;
+ size_t qd_size;
+ int nb_results = nb_queries;
+ int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
+ int status_stride = 2;
+ int result_elem_size = elem_64bits ? 8 : 4;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkQueryPoolCreateInfo query_pool_info = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .pNext = create_pnext,
+ .queryType = type,
+ .queryCount = nb_queries*e->qf->nb_queues,
+ };
+
+ if (e->query.pool)
+ return AVERROR(EINVAL);
+
+ /* Video encode quieries produce two results per query */
+ if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
+ status_stride = 3; /* skip,skip,result,skip,skip,result */
+ nb_results *= 2;
+ } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
+ status_stride = 1;
+ nb_results *= 0;
+ }
+
+ qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
+
+ e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
+ if (!e->query.data)
+ return AVERROR(ENOMEM);
+
+ ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
+ s->hwctx->alloc, &e->query.pool);
+ if (ret != VK_SUCCESS)
+ return AVERROR_EXTERNAL;
+
+ e->query.data_per_queue = qd_size;
+ e->query.nb_queries = nb_queries;
+ e->query.nb_results = nb_results;
+ e->query.nb_statuses = nb_statuses;
+ e->query.elem_64bits = elem_64bits;
+ e->query.status_stride = status_stride;
+
+ return 0;
+}
+
+int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
+ int query_idx, void **data, int64_t *status)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ uint8_t *qd;
+ int32_t *res32;
+ int64_t *res64;
+ int64_t res = 0;
+ VkQueryResultFlags qf = 0;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+
+ if (!q->submitted) {
+ *data = NULL;
+ return 0;
+ }
+
+ qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
+ qf |= e->query.nb_results && e->query.nb_statuses ?
+ VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
+ qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
+ res32 = (int32_t *)(qd + e->query.nb_results*4);
+ res64 = (int64_t *)(qd + e->query.nb_results*8);
+
+ ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
+ query_idx,
+ e->query.nb_queries,
+ e->query.data_per_queue, qd,
+ e->query.elem_64bits ? 8 : 4, qf);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (e->query.nb_statuses && e->query.elem_64bits) {
+ for (int i = 0; i < e->query.nb_queries; i++) {
+ res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
+ res64[i] : res;
+ res64 += e->query.status_stride;
+ }
+ } else if (e->query.nb_statuses) {
+ for (int i = 0; i < e->query.nb_queries; i++) {
+ res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
+ res32[i] : res;
+ res32 += e->query.status_stride;
+ }
+ }
+
+ if (data)
+ *data = qd;
+ if (status)
+ *status = res;
+
+ return 0;
+}
+
void ff_vk_discard_exec_deps(FFVkExecContext *e)
{
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
@@ -648,6 +756,12 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
+ if (e->query.pool) {
+ e->query.idx = e->qf->cur_queue*e->query.nb_queries;
+ vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
+ e->query.idx, e->query.nb_queries);
+ }
+
return 0;
}
@@ -790,6 +904,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
for (int i = 0; i < e->sem_sig_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
+ e->query.idx = e->qf->cur_queue*e->query.nb_queries;
q->submitted = 1;
return 0;
@@ -1483,7 +1598,10 @@ static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
if (e->pool)
vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+ if (e->query.pool)
+ vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
+ av_freep(&e->query.data);
av_freep(&e->bufs);
av_freep(&e->queues);
av_freep(&e->sem_sig);
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 9ee9469305..e222f67b5a 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -164,6 +164,19 @@ typedef struct FFVkExecContext {
VkCommandBuffer *bufs;
FFVkQueueCtx *queues;
+ struct {
+ int idx;
+ VkQueryPool pool;
+ uint8_t *data;
+
+ int nb_queries;
+ int nb_results;
+ int nb_statuses;
+ int elem_64bits;
+ size_t data_per_queue;
+ int status_stride;
+ } query;
+
AVBufferRef ***deps;
int *nb_deps;
int *dep_alloc_size;
@@ -367,6 +380,23 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
FFVkQueueFamilyCtx *qf);
+/**
+ * Create a query pool for a command context.
+ * elem_64bits exists to troll driver devs for compliance. All results
+ * and statuses returned should be 32 bits, unless this is set, then it's 64bits.
+ */
+int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
+ int nb_queries, VkQueryType type,
+ int elem_64bits, void *create_pnext);
+
+/**
+ * Get results for query.
+ * Returns the status of the query.
+ * Sets *res to the status of the queries.
+ */
+int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
+ int query_idx, void **data, int64_t *status);
+
/**
* Begin recording to the command buffer. Previous execution must have been
* completed, which ff_vk_submit_exec_queue() will ensure.
--
2.39.2
[-- Attachment #47: 0046-vulkan-add-support-for-retrieving-queue-query-and-vi.patch --]
[-- Type: text/x-diff, Size: 7602 bytes --]
From 5422a554ad592c3b4a68c34490db201577f295ee Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 17:37:51 +0100
Subject: [PATCH 46/72] vulkan: add support for retrieving queue, query and
video properties
---
libavutil/vulkan.c | 87 ++++++++++++++++++++++++++++++------
libavutil/vulkan.h | 14 ++++--
libavutil/vulkan_functions.h | 1 +
3 files changed, 85 insertions(+), 17 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index de0c300c0e..d045ff83c1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
-void ff_vk_load_props(FFVulkanContext *s)
+int ff_vk_load_props(FFVulkanContext *s)
{
+ uint32_t qc = 0;
FFVulkanFunctions *vk = &s->vkfn;
s->driver_props = (VkPhysicalDeviceDriverProperties) {
@@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s)
.pNext = &s->driver_props,
};
+
vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+ vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
+
+ if (s->qf_props)
+ return 0;
+
+ s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc);
+ if (!s->qf_props)
+ return AVERROR(ENOMEM);
+
+ s->query_props = av_mallocz(sizeof(*s->query_props)*qc);
+ if (!s->qf_props) {
+ av_freep(&s->qf_props);
+ return AVERROR(ENOMEM);
+ }
+
+ s->video_props = av_mallocz(sizeof(*s->video_props)*qc);
+ if (!s->video_props) {
+ av_freep(&s->qf_props);
+ av_freep(&s->query_props);
+ return AVERROR(ENOMEM);
+ }
+
+ for (uint32_t i = 0; i < qc; i++) {
+ s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR,
+ };
+ s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+ .pNext = &s->query_props[i],
+ };
+ s->qf_props[i] = (VkQueueFamilyProperties2) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+ .pNext = &s->video_props[i],
+ };
+ }
+
+ vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
+
+ return 0;
}
void ff_vk_qf_fill(FFVulkanContext *s)
@@ -149,40 +190,54 @@ void ff_vk_qf_fill(FFVulkanContext *s)
s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
}
-void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues)
+int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
{
+ int ret, num;
+
switch (dev_family) {
case VK_QUEUE_GRAPHICS_BIT:
- qf->queue_family = s->hwctx->queue_family_index;
- qf->actual_queues = s->hwctx->nb_graphics_queues;
+ ret = s->hwctx->queue_family_index;
+ num = s->hwctx->nb_graphics_queues;
break;
case VK_QUEUE_COMPUTE_BIT:
- qf->queue_family = s->hwctx->queue_family_comp_index;
- qf->actual_queues = s->hwctx->nb_comp_queues;
+ ret = s->hwctx->queue_family_comp_index;
+ num = s->hwctx->nb_comp_queues;
break;
case VK_QUEUE_TRANSFER_BIT:
- qf->queue_family = s->hwctx->queue_family_tx_index;
- qf->actual_queues = s->hwctx->nb_tx_queues;
+ ret = s->hwctx->queue_family_tx_index;
+ num = s->hwctx->nb_tx_queues;
break;
case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
- qf->queue_family = s->hwctx->queue_family_encode_index;
- qf->actual_queues = s->hwctx->nb_encode_queues;
+ ret = s->hwctx->queue_family_encode_index;
+ num = s->hwctx->nb_encode_queues;
break;
case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
- qf->queue_family = s->hwctx->queue_family_decode_index;
- qf->actual_queues = s->hwctx->nb_decode_queues;
+ ret = s->hwctx->queue_family_decode_index;
+ num = s->hwctx->nb_decode_queues;
break;
default:
av_assert0(0); /* Should never happen */
}
+ if (nb)
+ *nb = num;
+
+ return ret;
+}
+
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family, int nb_queues)
+{
+ int ret;
+
+ ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
+
if (!nb_queues)
qf->nb_queues = qf->actual_queues;
else
qf->nb_queues = nb_queues;
- return;
+ return ret;
}
void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
@@ -1669,6 +1724,10 @@ void ff_vk_uninit(FFVulkanContext *s)
{
FFVulkanFunctions *vk = &s->vkfn;
+ av_freep(&s->query_props);
+ av_freep(&s->qf_props);
+ av_freep(&s->video_props);
+
if (s->spirv_compiler)
s->spirv_compiler->uninit(&s->spirv_compiler);
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index e222f67b5a..ccfa88f44f 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -212,6 +212,9 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceProperties2 props;
VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
+ VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
+ VkQueueFamilyVideoPropertiesKHR *video_props;
+ VkQueueFamilyProperties2 *qf_props;
AVBufferRef *device_ref;
AVHWDeviceContext *device;
@@ -259,7 +262,7 @@ const char *ff_vk_ret2str(VkResult res);
/**
* Loads props/mprops/driver_props
*/
-void ff_vk_load_props(FFVulkanContext *s);
+int ff_vk_load_props(FFVulkanContext *s);
/**
* Returns 1 if the image is any sort of supported RGB
@@ -284,12 +287,17 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+/**
+ * Get a queue family index and the number of queues. nb is optional.
+ */
+int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
*/
-void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues);
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family, int nb_queues);
/**
* Rotate through the queues in a queue family.
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 65ab560d21..fa1650e895 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -77,6 +77,7 @@ typedef enum FFVulkanExtensions {
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \
+ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties2) \
\
/* Command pool */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateCommandPool) \
--
2.39.2
[-- Attachment #48: 0047-vulkan-return-current-queue-index-from-ff_vk_qf_rota.patch --]
[-- Type: text/x-diff, Size: 1290 bytes --]
From 4632426c65f136ef70c4ab854a1076e1d1c868ff Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 28 Dec 2022 05:55:53 +0100
Subject: [PATCH 47/72] vulkan: return current queue index from
ff_vk_qf_rotate()
---
libavutil/vulkan.c | 3 ++-
libavutil/vulkan.h | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index d045ff83c1..cb8e08e02f 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -240,9 +240,10 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
return ret;
}
-void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
+int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
{
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
+ return qf->cur_queue;
}
int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index ccfa88f44f..dd1bc9c440 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -302,7 +302,7 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
/**
* Rotate through the queues in a queue family.
*/
-void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
+int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
/**
* Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
--
2.39.2
[-- Attachment #49: 0048-vulkan-rewrite-dependency-handling-code.patch --]
[-- Type: text/x-diff, Size: 82373 bytes --]
From c1e607011ac764b46875add61c533ab2e49ab00e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 29 Dec 2022 21:16:21 +0100
Subject: [PATCH 48/72] vulkan: rewrite dependency handling code
---
libavutil/vulkan.c | 1350 ++++++++++++++++++++------------------------
libavutil/vulkan.h | 382 +++++--------
2 files changed, 749 insertions(+), 983 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index cb8e08e02f..17a5bd6f3f 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -27,27 +27,6 @@
#include "vulkan_shaderc.c"
#endif
-/* Generic macro for creating contexts which need to keep their addresses
- * if another context is created. */
-#define FN_CREATING(ctx, type, shortname, array, num) \
-static av_always_inline type *create_ ##shortname(ctx *dctx) \
-{ \
- type **array, *sctx = av_mallocz(sizeof(*sctx)); \
- if (!sctx) \
- return NULL; \
- \
- array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
- if (!array) { \
- av_free(sctx); \
- return NULL; \
- } \
- \
- dctx->array = array; \
- dctx->array[dctx->num++] = sctx; \
- \
- return sctx; \
-}
-
const VkComponentMapping ff_comp_identity_map = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -165,32 +144,7 @@ int ff_vk_load_props(FFVulkanContext *s)
return 0;
}
-void ff_vk_qf_fill(FFVulkanContext *s)
-{
- s->nb_qfs = 0;
-
- /* Simply fills in all unique queues into s->qfs */
- if (s->hwctx->queue_family_index >= 0)
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
- if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
- if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
- s->qfs[1] != s->hwctx->queue_family_comp_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
- if (s->hwctx->queue_family_decode_index >= 0 &&
- (s->qfs[0] != s->hwctx->queue_family_decode_index &&
- s->qfs[1] != s->hwctx->queue_family_decode_index &&
- s->qfs[2] != s->hwctx->queue_family_decode_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
- if (s->hwctx->queue_family_encode_index >= 0 &&
- (s->qfs[0] != s->hwctx->queue_family_encode_index &&
- s->qfs[1] != s->hwctx->queue_family_encode_index &&
- s->qfs[2] != s->hwctx->queue_family_encode_index &&
- s->qfs[3] != s->hwctx->queue_family_encode_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
-}
-
-int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
+static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
{
int ret, num;
@@ -226,790 +180,760 @@ int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
}
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues)
+ VkQueueFlagBits dev_family)
{
- int ret;
-
- ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
-
- if (!nb_queues)
- qf->nb_queues = qf->actual_queues;
- else
- qf->nb_queues = nb_queues;
-
- return ret;
-}
+ /* Fill in queue families from context if not done yet */
+ if (!s->nb_qfs) {
+ s->nb_qfs = 0;
+
+ /* Simply fills in all unique queues into s->qfs */
+ if (s->hwctx->queue_family_index >= 0)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
+ if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
+ if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
+ s->qfs[1] != s->hwctx->queue_family_comp_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
+ if (s->hwctx->queue_family_decode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_decode_index &&
+ s->qfs[1] != s->hwctx->queue_family_decode_index &&
+ s->qfs[2] != s->hwctx->queue_family_decode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
+ if (s->hwctx->queue_family_encode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_encode_index &&
+ s->qfs[1] != s->hwctx->queue_family_encode_index &&
+ s->qfs[2] != s->hwctx->queue_family_encode_index &&
+ s->qfs[3] != s->hwctx->queue_family_encode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+ }
-int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
-{
- qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
- return qf->cur_queue;
+ return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues));
}
-int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
- VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
{
- VkResult ret;
- int index = -1;
FFVulkanFunctions *vk = &s->vkfn;
- VkMemoryAllocateInfo alloc_info = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = alloc_extension,
- };
-
- /* Align if we need to */
- if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
- req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
-
- alloc_info.allocationSize = req->size;
-
- /* The vulkan spec requires memory types to be sorted in the "optimal"
- * order, so the first matching type we find will be the best/fastest one */
- for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
- /* The memory type must be supported by the requirements (bitfield) */
- if (!(req->memoryTypeBits & (1 << i)))
- continue;
-
- /* The memory type flags must include our properties */
- if ((req_flags != UINT32_MAX) &&
- ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
- continue;
+ for (int i = 0; i < pool->pool_size; i++) {
+ FFVkExecContext *e = &pool->contexts[i];
- /* Found a suitable memory type */
- index = i;
- break;
- }
+ if (e->fence) {
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+ vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
+ }
- if (index < 0) {
- av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
- req_flags);
- return AVERROR(EINVAL);
+ ff_vk_exec_discard_deps(s, e);
+
+ av_free(e->frame_deps);
+ av_free(e->buf_deps);
+ av_free(e->queue_family_dst);
+ av_free(e->layout_dst);
+ av_free(e->access_dst);
+ av_free(e->frame_update);
+ av_free(e->frame_locked);
+ av_free(e->sem_sig);
+ av_free(e->sem_sig_val);
+ av_free(e->sem_sig_val_dst);
+ av_free(e->sem_wait);
+ av_free(e->sem_wait_dst);
+ av_free(e->sem_wait_val);
}
- alloc_info.memoryTypeIndex = index;
-
- ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
- s->hwctx->alloc, mem);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR(ENOMEM);
- }
-
- if (mem_flags)
- *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
-
- return 0;
+ if (pool->cmd_bufs)
+ vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool,
+ pool->pool_size, pool->cmd_bufs);
+ if (pool->cmd_buf_pool)
+ vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc);
+ if (pool->query_pool)
+ vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc);
+
+ av_free(pool->query_data);
+ av_free(pool->cmd_bufs);
+ av_free(pool->contexts);
}
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
- void *pNext, void *alloc_pNext,
- VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
+int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ FFVkExecPool *pool, int nb_contexts,
+ int nb_queries, VkQueryType query_type, int query_64bit,
+ void *query_create_pnext)
{
int err;
VkResult ret;
- int use_ded_mem;
FFVulkanFunctions *vk = &s->vkfn;
- VkBufferCreateInfo buf_spawn = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = pNext,
- .usage = usage,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- .size = size, /* Gets FFALIGNED during alloc if host visible
- but should be ok */
- };
+ VkCommandPoolCreateInfo cqueue_create;
+ VkCommandBufferAllocateInfo cbuf_create;
- VkBufferMemoryRequirementsInfo2 req_desc = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
- };
- VkMemoryDedicatedAllocateInfo ded_alloc = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = alloc_pNext,
- };
- VkMemoryDedicatedRequirements ded_req = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
- };
- VkMemoryRequirements2 req = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
- .pNext = &ded_req,
- };
+ atomic_init(&pool->idx, 0);
- ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ /* Create command pool */
+ cqueue_create = (VkCommandPoolCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+ VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = qf->queue_family,
+ };
+ ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
+ s->hwctx->alloc, &pool->cmd_buf_pool);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ err = AVERROR_EXTERNAL;
+ goto fail;
}
- req_desc.buffer = buf->buf;
-
- vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
-
- /* In case the implementation prefers/requires dedicated allocation */
- use_ded_mem = ded_req.prefersDedicatedAllocation |
- ded_req.requiresDedicatedAllocation;
- if (use_ded_mem)
- ded_alloc.buffer = buf->buf;
-
- err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &buf->flags, &buf->mem);
- if (err)
- return err;
+ /* Allocate space for command buffers */
+ pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs));
+ if (!pool->cmd_bufs) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
- ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
+ /* Allocate command buffer */
+ cbuf_create = (VkCommandBufferAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandPool = pool->cmd_buf_pool,
+ .commandBufferCount = nb_contexts,
+ };
+ ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create,
+ pool->cmd_bufs);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ err = AVERROR_EXTERNAL;
+ goto fail;
}
- buf->size = size;
+ /* Query pool */
+ if (nb_queries) {
+ VkQueryPoolCreateInfo query_pool_info = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .pNext = query_create_pnext,
+ .queryType = query_type,
+ .queryCount = nb_queries*nb_contexts,
+ };
+ ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
+ s->hwctx->alloc, &pool->query_pool);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
- return 0;
-}
+ pool->nb_queries = nb_queries;
+ pool->query_status_stride = 2;
+ pool->query_results = nb_queries;
+ pool->query_statuses = 0; /* if radv supports it, nb_queries; */
+
+ /* Video encode quieries produce two results per query */
+ if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
+ pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */
+ pool->query_results *= 2;
+ } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
+ pool->query_status_stride = 1;
+ pool->query_results = 0;
+ pool->query_statuses = nb_queries;
+ }
-int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
- int nb_buffers, int invalidate)
-{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
- VkMappedMemoryRange *inval_list = NULL;
- int inval_count = 0;
+ pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4);
- for (int i = 0; i < nb_buffers; i++) {
- ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
- VK_WHOLE_SIZE, 0, (void **)&mem[i]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ /* Allocate space for the query data */
+ pool->query_data = av_mallocz(nb_contexts*pool->qd_size);
+ if (!pool->query_data) {
+ err = AVERROR(ENOMEM);
+ goto fail;
}
}
- if (!invalidate)
- return 0;
-
- for (int i = 0; i < nb_buffers; i++) {
- const VkMappedMemoryRange ival_buf = {
- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = buf[i].mem,
- .size = VK_WHOLE_SIZE,
- };
- if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
- continue;
- inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
- (++inval_count)*sizeof(*inval_list));
- if (!inval_list)
- return AVERROR(ENOMEM);
- inval_list[inval_count - 1] = ival_buf;
+ /* Allocate space for the contexts */
+ pool->contexts = av_mallocz(nb_contexts*sizeof(*pool->contexts));
+ if (!pool->contexts) {
+ err = AVERROR(ENOMEM);
+ goto fail;
}
- if (inval_count) {
- ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
- inval_list);
+ pool->pool_size = nb_contexts;
+
+ /* Init contexts */
+ for (int i = 0; i < pool->pool_size; i++) {
+ FFVkExecContext *e = &pool->contexts[i];
+
+ /* Fence */
+ VkFenceCreateInfo fence_create = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+ };
+ ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc,
+ &e->fence);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
+
+ e->parent = pool;
+
+ /* Query data */
+ e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i;
+ e->query_idx = nb_queries*i;
+
+ /* Command buffer */
+ e->buf = pool->cmd_bufs[i];
+
+ /* Queue index distribution */
+ e->qi = i % qf->nb_queues;
+ e->qf = qf->queue_family;
+ vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
+ e->qi, &e->queue);
}
return 0;
+
+fail:
+ ff_vk_exec_pool_free(s, pool);
+ return err;
}
-int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
- int flush)
+VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
+ void **data, int64_t *status)
{
- int err = 0;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- VkMappedMemoryRange *flush_list = NULL;
- int flush_count = 0;
+ const FFVkExecPool *pool = e->parent;
- if (flush) {
- for (int i = 0; i < nb_buffers; i++) {
- const VkMappedMemoryRange flush_buf = {
- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = buf[i].mem,
- .size = VK_WHOLE_SIZE,
- };
- if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
- continue;
- flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
- (++flush_count)*sizeof(*flush_list));
- if (!flush_list)
- return AVERROR(ENOMEM);
- flush_list[flush_count - 1] = flush_buf;
- }
- }
+ int32_t *res32;
+ int64_t *res64;
+ int64_t res = 0;
+ VkQueryResultFlags qf = 0;
- if (flush_count) {
- ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
- flush_list);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
- ff_vk_ret2str(ret));
- err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ qf |= pool->query_64bit ?
+ VK_QUERY_RESULT_64_BIT : 0x0;
+ qf |= pool->query_statuses ?
+ VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
+
+ ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool,
+ e->query_idx,
+ pool->nb_queries,
+ pool->qd_size, e->query_data,
+ pool->query_64bit ? 8 : 4, qf);
+ if (ret != VK_SUCCESS)
+ return ret;
+
+ if (pool->query_statuses && pool->query_64bit) {
+ for (int i = 0; i < pool->query_statuses; i++) {
+ res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
+ res64[i] : res;
+ res64 += pool->query_status_stride;
+ }
+ } else if (pool->query_statuses) {
+ for (int i = 0; i < pool->query_statuses; i++) {
+ res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
+ res32[i] : res;
+ res32 += pool->query_status_stride;
}
}
- for (int i = 0; i < nb_buffers; i++)
- vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
+ if (data)
+ *data = e->query_data;
+ if (status)
+ *status = res;
- return err;
+ return VK_SUCCESS;
}
-void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
+FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool)
{
- FFVulkanFunctions *vk = &s->vkfn;
-
- if (!buf || !s->hwctx)
- return;
+ int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed);
+ idx %= pool->pool_size;
+ return &pool->contexts[idx];
+}
- if (buf->buf != VK_NULL_HANDLE)
- vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
- if (buf->mem != VK_NULL_HANDLE)
- vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
}
-int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
- int width, int height, VkFormat fmt, VkImageTiling tiling,
- VkImageUsageFlagBits usage, VkImageCreateFlags flags,
- void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
+int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
{
- int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- AVVulkanDeviceContext *hwctx = s->hwctx;
-
- VkExportSemaphoreCreateInfo ext_sem_info = {
- .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
-#ifdef _WIN32
- .handleTypes = IsWindows8OrGreater()
- ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
- : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
-#else
- .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
-#endif
- };
+ const FFVkExecPool *pool = e->parent;
- VkSemaphoreTypeCreateInfo sem_type_info = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
-#ifdef _WIN32
- .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
-#else
- .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
-#endif
- .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
- .initialValue = 0,
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
- VkSemaphoreCreateInfo sem_spawn = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- .pNext = &sem_type_info,
- };
+ /* Create the fence and don't wait for it initially */
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+ vk->ResetFences(s->hwctx->act_dev, 1, &e->fence);
- /* Create the image */
- VkImageCreateInfo create_info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .pNext = create_pnext,
- .imageType = VK_IMAGE_TYPE_2D,
- .format = fmt,
- .extent.depth = 1,
- .mipLevels = 1,
- .arrayLayers = 1,
- .flags = flags,
- .tiling = tiling,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .usage = usage,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = s->qfs,
- .queueFamilyIndexCount = s->nb_qfs,
- .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
- };
+ /* Discard queue dependencies */
+ ff_vk_exec_discard_deps(s, e);
- ret = vk->CreateImage(hwctx->act_dev, &create_info,
- hwctx->alloc, &f->img[0]);
+ ret = vk->BeginCommandBuffer(e->buf, &cmd_start);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
+ av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
ff_vk_ret2str(ret));
- err = AVERROR(EINVAL);
- goto fail;
+ return AVERROR_EXTERNAL;
}
- /* Create semaphore */
- ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
- hwctx->alloc, &f->sem[0]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ if (pool->nb_queries)
+ vk->CmdResetQueryPool(e->buf, pool->query_pool,
+ e->query_idx, pool->nb_queries);
+
+ return 0;
+}
+
+void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
+{
+ for (int j = 0; j < e->nb_buf_deps; j++)
+ av_buffer_unref(&e->buf_deps[j]);
+ e->nb_buf_deps = 0;
+
+ for (int j = 0; j < e->nb_frame_deps; j++) {
+ if (e->frame_locked[j]) {
+ AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+ s->hwfc->unlock_frame(s->frames, f);
+ e->frame_locked[j] = 0;
+ e->frame_update[j] = 0;
+ }
+ av_buffer_unref(&e->frame_deps[j]);
}
+ e->nb_frame_deps = 0;
- f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
- f->layout[0] = create_info.initialLayout;
- f->access[0] = 0x0;
- f->sem_value[0] = 0;
+ e->sem_wait_cnt = 0;
+ e->sem_sig_cnt = 0;
+}
- f->flags = 0x0;
- f->tiling = tiling;
+int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef **deps, int nb_deps, int ref)
+{
+ AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size,
+ (e->nb_buf_deps + nb_deps) * sizeof(*dst));
+ if (!dst) {
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR(ENOMEM);
+ }
- return 0;
+ e->buf_deps = dst;
-fail:
- return err;
+ for (int i = 0; i < nb_deps; i++) {
+ e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i];
+ if (!e->buf_deps[e->nb_buf_deps]) {
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR(ENOMEM);
+ }
+ e->nb_buf_deps++;
+ }
+
+ return 0;
}
-int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
- VkShaderStageFlagBits stage)
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+ VkPipelineStageFlagBits in_wait_dst_flag)
{
- VkPushConstantRange *pc;
+ uint8_t *frame_locked;
+ uint8_t *frame_update;
+ AVBufferRef **frame_deps;
+ VkImageLayout *layout_dst;
+ uint32_t *queue_family_dst;
+ VkAccessFlagBits *access_dst;
+
+ AVVkFrame *f = (AVVkFrame *)vkfb->data;
+ int nb_images = ff_vk_count_images(f);
+
+#define ARR_REALLOC(str, arr, alloc_s, cnt) \
+ do { \
+ arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
+ if (!arr) { \
+ ff_vk_exec_discard_deps(s, e); \
+ return AVERROR(ENOMEM); \
+ } \
+ str->arr = arr; \
+ } while (0)
+
+ for (int i = 0; i < nb_images; i++) {
+ VkSemaphore *sem_wait;
+ uint64_t *sem_wait_val;
+ VkPipelineStageFlagBits *sem_wait_dst;
+ VkSemaphore *sem_sig;
+ uint64_t *sem_sig_val;
+ uint64_t **sem_sig_val_dst;
+
+ ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
+ ARR_REALLOC(e, sem_wait_dst, &e->sem_wait_dst_alloc, e->sem_wait_cnt);
+ ARR_REALLOC(e, sem_wait_val, &e->sem_wait_val_alloc, e->sem_wait_cnt);
+ ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
+ ARR_REALLOC(e, sem_sig_val, &e->sem_sig_val_alloc, e->sem_sig_cnt);
+ ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_cnt);
- pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
- pl->push_consts_num + 1);
- if (!pl->push_consts)
+ e->sem_wait[e->sem_wait_cnt] = f->sem[i];
+ e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
+ e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
+ e->sem_wait_cnt++;
+
+ e->sem_sig[e->sem_sig_cnt] = f->sem[i];
+ e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
+ e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
+ e->sem_sig_cnt++;
+ }
+
+ ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps);
+ ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps);
+ ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps);
+
+ ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps);
+ ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
+ ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps);
+
+ e->frame_deps[e->nb_frame_deps] = av_buffer_ref(vkfb);
+ if (!e->frame_deps[e->nb_frame_deps]) {
+ ff_vk_exec_discard_deps(s, e);
return AVERROR(ENOMEM);
+ }
- pc = &pl->push_consts[pl->push_consts_num++];
- memset(pc, 0, sizeof(*pc));
+ s->hwfc->lock_frame(s->frames, f);
+ e->frame_locked[e->nb_frame_deps] = 1;
+ e->frame_update[e->nb_frame_deps] = 0;
- pc->stageFlags = stage;
- pc->offset = offset;
- pc->size = size;
+ e->nb_frame_deps++;
return 0;
}
-FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
-int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
- FFVkQueueFamilyCtx *qf)
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef *vkfb,
+ VkImageMemoryBarrier2 *bar)
+{
+ int i;
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data == vkfb->data)
+ break;
+ av_assert0(i < e->nb_frame_deps);
+
+ e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
+ e->access_dst[i] = bar->dstAccessMask;
+ e->layout_dst[i] = bar->newLayout;
+ e->frame_update[i] = 1;
+}
+
+int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
{
VkResult ret;
- FFVkExecContext *e;
FFVulkanFunctions *vk = &s->vkfn;
- VkCommandPoolCreateInfo cqueue_create = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = qf->queue_family,
- };
- VkCommandBufferAllocateInfo cbuf_create = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
- .commandBufferCount = qf->nb_queues,
+ VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+ .pWaitSemaphoreValues = e->sem_wait_val,
+ .pSignalSemaphoreValues = e->sem_sig_val,
+ .waitSemaphoreValueCount = e->sem_wait_cnt,
+ .signalSemaphoreValueCount = e->sem_sig_cnt,
};
- e = create_exec_ctx(s);
- if (!e)
- return AVERROR(ENOMEM);
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .pNext = &s_timeline_sem_info,
- e->qf = qf;
+ .commandBufferCount = 1,
+ .pCommandBuffers = &e->buf,
- e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
- if (!e->queues)
- return AVERROR(ENOMEM);
+ .pWaitSemaphores = e->sem_wait,
+ .pWaitDstStageMask = e->sem_wait_dst,
+ .waitSemaphoreCount = e->sem_wait_cnt,
- e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
- if (!e->bufs)
- return AVERROR(ENOMEM);
+ .pSignalSemaphores = e->sem_sig,
+ .signalSemaphoreCount = e->sem_sig_cnt,
+ };
- /* Create command pool */
- ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
- s->hwctx->alloc, &e->pool);
+ ret = vk->EndCommandBuffer(e->buf);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
ff_vk_ret2str(ret));
+ ff_vk_exec_discard_deps(s, e);
return AVERROR_EXTERNAL;
}
- cbuf_create.commandPool = e->pool;
+ s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, e->qf, e->qi);
+ ret = vk->QueueSubmit(e->queue, 1, &s_info, e->fence);
+ s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, e->qf, e->qi);
- /* Allocate command buffer */
- ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
ff_vk_ret2str(ret));
+ ff_vk_exec_discard_deps(s, e);
return AVERROR_EXTERNAL;
}
- for (int i = 0; i < qf->nb_queues; i++) {
- FFVkQueueCtx *q = &e->queues[i];
- vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
- i % qf->actual_queues, &q->queue);
- }
+ for (int i = 0; i < e->sem_sig_cnt; i++)
+ *e->sem_sig_val_dst[i] += 1;
- *ctx = e;
+ /* Unlock all frames */
+ for (int j = 0; j < e->nb_frame_deps; j++) {
+ if (e->frame_locked[j]) {
+ AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+ if (e->frame_update[j]) {
+ int nb_images = ff_vk_count_images(f);
+ for (int i = 0; i < nb_images; i++) {
+ f->layout[i] = e->layout_dst[j];
+ f->access[i] = e->access_dst[j];
+ f->queue_family[i] = e->queue_family_dst[j];
+ }
+ }
+ s->hwfc->unlock_frame(s->frames, f);
+ e->frame_locked[j] = 0;
+ }
+ }
return 0;
}
-int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
- int nb_queries, VkQueryType type,
- int elem_64bits, void *create_pnext)
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
- size_t qd_size;
- int nb_results = nb_queries;
- int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
- int status_stride = 2;
- int result_elem_size = elem_64bits ? 8 : 4;
+ int index = -1;
FFVulkanFunctions *vk = &s->vkfn;
- VkQueryPoolCreateInfo query_pool_info = {
- .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
- .pNext = create_pnext,
- .queryType = type,
- .queryCount = nb_queries*e->qf->nb_queues,
- };
- if (e->query.pool)
- return AVERROR(EINVAL);
-
- /* Video encode quieries produce two results per query */
- if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
- status_stride = 3; /* skip,skip,result,skip,skip,result */
- nb_results *= 2;
- } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
- status_stride = 1;
- nb_results *= 0;
- }
-
- qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
-
- e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
- if (!e->query.data)
- return AVERROR(ENOMEM);
-
- ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
- s->hwctx->alloc, &e->query.pool);
- if (ret != VK_SUCCESS)
- return AVERROR_EXTERNAL;
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = alloc_extension,
+ };
- e->query.data_per_queue = qd_size;
- e->query.nb_queries = nb_queries;
- e->query.nb_results = nb_results;
- e->query.nb_statuses = nb_statuses;
- e->query.elem_64bits = elem_64bits;
- e->query.status_stride = status_stride;
+ /* Align if we need to */
+ if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
- return 0;
-}
+ alloc_info.allocationSize = req->size;
-int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
- int query_idx, void **data, int64_t *status)
-{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
- uint8_t *qd;
- int32_t *res32;
- int64_t *res64;
- int64_t res = 0;
- VkQueryResultFlags qf = 0;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ /* The vulkan spec requires memory types to be sorted in the "optimal"
+ * order, so the first matching type we find will be the best/fastest one */
+ for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
+ /* The memory type must be supported by the requirements (bitfield) */
+ if (!(req->memoryTypeBits & (1 << i)))
+ continue;
- if (!q->submitted) {
- *data = NULL;
- return 0;
- }
+ /* The memory type flags must include our properties */
+ if ((req_flags != UINT32_MAX) &&
+ ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
+ continue;
- qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
- qf |= e->query.nb_results && e->query.nb_statuses ?
- VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
- qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
- res32 = (int32_t *)(qd + e->query.nb_results*4);
- res64 = (int64_t *)(qd + e->query.nb_results*8);
-
- ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
- query_idx,
- e->query.nb_queries,
- e->query.data_per_queue, qd,
- e->query.elem_64bits ? 8 : 4, qf);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ /* Found a suitable memory type */
+ index = i;
+ break;
}
- if (e->query.nb_statuses && e->query.elem_64bits) {
- for (int i = 0; i < e->query.nb_queries; i++) {
- res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
- res64[i] : res;
- res64 += e->query.status_stride;
- }
- } else if (e->query.nb_statuses) {
- for (int i = 0; i < e->query.nb_queries; i++) {
- res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
- res32[i] : res;
- res32 += e->query.status_stride;
- }
+ if (index < 0) {
+ av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ req_flags);
+ return AVERROR(EINVAL);
}
- if (data)
- *data = qd;
- if (status)
- *status = res;
-
- return 0;
-}
-
-void ff_vk_discard_exec_deps(FFVkExecContext *e)
-{
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ alloc_info.memoryTypeIndex = index;
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- q->nb_buf_deps = 0;
+ ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
+ s->hwctx->alloc, mem);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR(ENOMEM);
+ }
- for (int j = 0; j < q->nb_frame_deps; j++)
- av_frame_free(&q->frame_deps[j]);
- q->nb_frame_deps = 0;
+ if (mem_flags)
+ *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
- e->sem_wait_cnt = 0;
- e->sem_sig_cnt = 0;
+ return 0;
}
-int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
+ int err;
VkResult ret;
+ int use_ded_mem;
FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
- VkCommandBufferBeginInfo cmd_start = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = pNext,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
};
- /* Create the fence and don't wait for it initially */
- if (!q->fence) {
- VkFenceCreateInfo fence_spawn = {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- };
- ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
- &q->fence);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- } else if (!q->synchronous) {
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+ VkBufferMemoryRequirementsInfo2 req_desc = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
+ };
+ VkMemoryDedicatedAllocateInfo ded_alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = alloc_pNext,
+ };
+ VkMemoryDedicatedRequirements ded_req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ };
+ VkMemoryRequirements2 req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = &ded_req,
+ };
+
+ ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
}
- q->synchronous = 0;
+ req_desc.buffer = buf->buf;
- /* Discard queue dependencies */
- ff_vk_discard_exec_deps(e);
+ vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
+
+ /* In case the implementation prefers/requires dedicated allocation */
+ use_ded_mem = ded_req.prefersDedicatedAllocation |
+ ded_req.requiresDedicatedAllocation;
+ if (use_ded_mem)
+ ded_alloc.buffer = buf->buf;
+
+ err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &buf->flags, &buf->mem);
+ if (err)
+ return err;
- ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
+ ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
+ av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
- if (e->query.pool) {
- e->query.idx = e->qf->cur_queue*e->query.nb_queries;
- vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
- e->query.idx, e->query.nb_queries);
- }
+ buf->size = size;
return 0;
}
-VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
-{
- return e->bufs[e->qf->cur_queue];
-}
-
-int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
- VkPipelineStageFlagBits in_wait_dst_flag)
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
{
- AVFrame **dst;
- AVVkFrame *f = (AVVkFrame *)frame->data[0];
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
- AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
- int planes = av_pix_fmt_count_planes(fc->sw_format);
-
- for (int i = 0; i < planes; i++) {
- e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
- if (!e->sem_wait) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
- if (!e->sem_wait_dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
- if (!e->sem_wait_val) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
- if (!e->sem_sig) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
- if (!e->sem_sig_val) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkMappedMemoryRange inval_list[64];
+ int inval_count = 0;
- e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
- if (!e->sem_sig_val_dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
+ for (int i = 0; i < nb_buffers; i++) {
+ ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
}
-
- e->sem_wait[e->sem_wait_cnt] = f->sem[i];
- e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
- e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
- e->sem_wait_cnt++;
-
- e->sem_sig[e->sem_sig_cnt] = f->sem[i];
- e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
- e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
- e->sem_sig_cnt++;
}
- dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
- (q->nb_frame_deps + 1) * sizeof(*dst));
- if (!dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
+ if (!invalidate)
+ return 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ inval_list[inval_count++] = ival_buf;
}
- q->frame_deps = dst;
- q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
- if (!q->frame_deps[q->nb_frame_deps]) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
+ if (inval_count) {
+ ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
+ inval_list);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
}
- q->nb_frame_deps++;
return 0;
}
-int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
+ int flush)
{
+ int err = 0;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
- VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
- .pWaitSemaphoreValues = e->sem_wait_val,
- .pSignalSemaphoreValues = e->sem_sig_val,
- .waitSemaphoreValueCount = e->sem_wait_cnt,
- .signalSemaphoreValueCount = e->sem_sig_cnt,
- };
-
- VkSubmitInfo s_info = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &s_timeline_sem_info,
-
- .commandBufferCount = 1,
- .pCommandBuffers = &e->bufs[e->qf->cur_queue],
-
- .pWaitSemaphores = e->sem_wait,
- .pWaitDstStageMask = e->sem_wait_dst,
- .waitSemaphoreCount = e->sem_wait_cnt,
-
- .pSignalSemaphores = e->sem_sig,
- .signalSemaphoreCount = e->sem_sig_cnt,
- };
+ VkMappedMemoryRange flush_list[64];
+ int flush_count = 0;
- ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ if (flush) {
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ flush_list[flush_count++] = flush_buf;
+ }
}
- s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
- e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
-
- ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
-
- s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
- e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
-
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ if (flush_count) {
+ ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
+ flush_list);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ }
}
- for (int i = 0; i < e->sem_sig_cnt; i++)
- *e->sem_sig_val_dst[i] += 1;
-
- e->query.idx = e->qf->cur_queue*e->query.nb_queries;
- q->submitted = 1;
+ for (int i = 0; i < nb_buffers; i++)
+ vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
- return 0;
+ return err;
}
-void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
+void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
{
FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
- if (!q->submitted)
+
+ if (!buf || !s->hwctx)
return;
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
- q->synchronous = 1;
+ if (buf->buf != VK_NULL_HANDLE)
+ vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
+ if (buf->mem != VK_NULL_HANDLE)
+ vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
-int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef **deps, int nb_deps)
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+ VkShaderStageFlagBits stage)
{
- AVBufferRef **dst;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
- if (!deps || !nb_deps)
- return 0;
+ VkPushConstantRange *pc;
- dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
- (q->nb_buf_deps + nb_deps) * sizeof(*dst));
- if (!dst)
- goto err;
+ pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
+ pl->push_consts_num + 1);
+ if (!pl->push_consts)
+ return AVERROR(ENOMEM);
- q->buf_deps = dst;
+ pc = &pl->push_consts[pl->push_consts_num++];
+ memset(pc, 0, sizeof(*pc));
- for (int i = 0; i < nb_deps; i++) {
- q->buf_deps[q->nb_buf_deps] = deps[i];
- if (!q->buf_deps[q->nb_buf_deps])
- goto err;
- q->nb_buf_deps++;
- }
+ pc->stageFlags = stage;
+ pc->offset = offset;
+ pc->size = size;
return 0;
-
-err:
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
}
-FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
+FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
int unnorm_coords, VkFilter filt)
{
VkResult ret;
@@ -1030,10 +954,6 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
.unnormalizedCoordinates = unnorm_coords,
};
- FFVkSampler *sctx = create_sampler(s);
- if (!sctx)
- return NULL;
-
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
s->hwctx->alloc, &sctx->sampler[0]);
if (ret != VK_SUCCESS) {
@@ -1048,6 +968,13 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
return sctx;
}
+void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->DestroySampler(s->hwctx->act_dev, sctx->sampler[0],
+ s->hwctx->alloc);
+}
+
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
{
if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
@@ -1122,7 +1049,7 @@ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
}
/* Add to queue dependencies */
- err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
+ err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
if (err) {
av_buffer_unref(&buf);
return err;
@@ -1133,14 +1060,9 @@ int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
return 0;
}
-FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
-FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
- VkShaderStageFlags stage)
+int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
+ VkShaderStageFlags stage)
{
- FFVkSPIRVShader *shd = create_shader(pl);
- if (!shd)
- return NULL;
-
av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
@@ -1152,10 +1074,10 @@ FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
GLSLC(0, );
- return shd;
+ return 0;
}
-void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3])
{
shd->local_size[0] = local_size[0];
shd->local_size[1] = local_size[1];
@@ -1166,7 +1088,7 @@ void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
shd->local_size[0], shd->local_size[1], shd->local_size[2]);
}
-void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
+void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio)
{
int line = 0;
const char *p = shd->src.str;
@@ -1188,7 +1110,13 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
av_bprint_finalize(&buf, NULL);
}
-int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
+void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
+}
+
+int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
const char *entrypoint)
{
int err;
@@ -1437,7 +1365,7 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
return;
}
- set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+// set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
pl->desc_set[set_id],
@@ -1446,12 +1374,12 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
}
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
- VkShaderStageFlagBits stage, int offset,
- size_t size, void *src)
+ FFVulkanPipeline *pl,
+ VkShaderStageFlagBits stage,
+ int offset, size_t size, void *src)
{
FFVulkanFunctions *vk = &s->vkfn;
-
- vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
+ vk->CmdPushConstants(e->buf, pl->pipeline_layout,
stage, offset, size, src);
}
@@ -1558,17 +1486,8 @@ int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
return 0;
}
-FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
-FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
-{
- FFVulkanPipeline *pl = create_pipeline(s);
- if (pl)
- pl->qf = qf;
-
- return pl;
-}
-
-int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkQueueFamilyCtx *qf)
{
int i;
VkResult ret;
@@ -1579,6 +1498,8 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
.layout = pl->pipeline_layout,
};
+ pl->qf = qf;
+
for (i = 0; i < pl->shaders_num; i++) {
if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
pipe.stage = pl->shaders[i]->shader;
@@ -1603,73 +1524,24 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
return 0;
}
-void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
+void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
- vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
+ vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
- for (int i = 0; i < pl->descriptor_sets_num; i++)
- pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+// for (int i = 0; i < pl->descriptor_sets_num; i++)
+ // pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
- vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
+ vk->CmdBindDescriptorSets(e->buf, pl->bind_point,
pl->pipeline_layout, 0,
pl->descriptor_sets_num,
(VkDescriptorSet *)pl->desc_staging,
0, NULL);
-
- e->bound_pl = pl;
-}
-
-static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
-{
- FFVulkanFunctions *vk = &s->vkfn;
-
- /* Make sure all queues have finished executing */
- for (int i = 0; i < e->qf->nb_queues; i++) {
- FFVkQueueCtx *q = &e->queues[i];
-
- if (q->fence) {
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
- }
-
- /* Free the fence */
- if (q->fence)
- vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
-
- /* Free buffer dependencies */
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- av_free(q->buf_deps);
-
- /* Free frame dependencies */
- for (int j = 0; j < q->nb_frame_deps; j++)
- av_frame_free(&q->frame_deps[j]);
- av_free(q->frame_deps);
- }
-
- if (e->bufs)
- vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
- if (e->pool)
- vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
- if (e->query.pool)
- vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
-
- av_freep(&e->query.data);
- av_freep(&e->bufs);
- av_freep(&e->queues);
- av_freep(&e->sem_sig);
- av_freep(&e->sem_sig_val);
- av_freep(&e->sem_sig_val_dst);
- av_freep(&e->sem_wait);
- av_freep(&e->sem_wait_dst);
- av_freep(&e->sem_wait_val);
- av_free(e);
}
-static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
@@ -1723,8 +1595,6 @@ static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
void ff_vk_uninit(FFVulkanContext *s)
{
- FFVulkanFunctions *vk = &s->vkfn;
-
av_freep(&s->query_props);
av_freep(&s->qf_props);
av_freep(&s->video_props);
@@ -1732,24 +1602,6 @@ void ff_vk_uninit(FFVulkanContext *s)
if (s->spirv_compiler)
s->spirv_compiler->uninit(&s->spirv_compiler);
- for (int i = 0; i < s->exec_ctx_num; i++)
- free_exec_ctx(s, s->exec_ctx[i]);
- av_freep(&s->exec_ctx);
-
- for (int i = 0; i < s->samplers_num; i++) {
- vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
- s->hwctx->alloc);
- av_free(s->samplers[i]);
- }
- av_freep(&s->samplers);
-
- for (int i = 0; i < s->pipelines_num; i++)
- free_pipeline(s, s->pipelines[i]);
- av_freep(&s->pipelines);
-
- av_freep(&s->scratch);
- s->scratch_size = 0;
-
av_buffer_unref(&s->device_ref);
av_buffer_unref(&s->frames_ref);
}
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index dd1bc9c440..a8f3c458fc 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -21,6 +21,8 @@
#define VK_NO_PROTOTYPES
+#include <stdatomic.h>
+
#include "pixdesc.h"
#include "bprint.h"
#include "hwcontext.h"
@@ -100,8 +102,6 @@ typedef struct FFVkBuffer {
typedef struct FFVkQueueFamilyCtx {
int queue_family;
int nb_queues;
- int cur_queue;
- int actual_queues;
} FFVkQueueFamilyCtx;
typedef struct FFVulkanPipeline {
@@ -139,71 +139,88 @@ typedef struct FFVulkanPipeline {
VkDescriptorPoolSize *pool_size_desc;
} FFVulkanPipeline;
-typedef struct FFVkQueueCtx {
- VkFence fence;
+typedef struct FFVkExecContext {
+ const struct FFVkExecPool *parent;
+
+ /* Queue for the execution context */
VkQueue queue;
+ int qf;
+ int qi;
- int synchronous;
- int submitted;
+ /* Command buffer for the context */
+ VkCommandBuffer buf;
+
+ /* Fence for the command buffer */
+ VkFence fence;
+
+ void *query_data;
+ int query_idx;
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
- int buf_deps_alloc_size;
+ unsigned int buf_deps_alloc_size;
/* Frame dependencies */
- AVFrame **frame_deps;
+ AVBufferRef **frame_deps;
+ unsigned int frame_deps_alloc_size;
int nb_frame_deps;
- int frame_deps_alloc_size;
-} FFVkQueueCtx;
-
-typedef struct FFVkExecContext {
- FFVkQueueFamilyCtx *qf;
-
- VkCommandPool pool;
- VkCommandBuffer *bufs;
- FFVkQueueCtx *queues;
-
- struct {
- int idx;
- VkQueryPool pool;
- uint8_t *data;
-
- int nb_queries;
- int nb_results;
- int nb_statuses;
- int elem_64bits;
- size_t data_per_queue;
- int status_stride;
- } query;
-
- AVBufferRef ***deps;
- int *nb_deps;
- int *dep_alloc_size;
-
- FFVulkanPipeline *bound_pl;
VkSemaphore *sem_wait;
- int sem_wait_alloc; /* Allocated sem_wait */
+ unsigned int sem_wait_alloc; /* Allocated sem_wait */
int sem_wait_cnt;
uint64_t *sem_wait_val;
- int sem_wait_val_alloc;
+ unsigned int sem_wait_val_alloc;
VkPipelineStageFlagBits *sem_wait_dst;
- int sem_wait_dst_alloc; /* Allocated sem_wait_dst */
+ unsigned int sem_wait_dst_alloc; /* Allocated sem_wait_dst */
VkSemaphore *sem_sig;
- int sem_sig_alloc; /* Allocated sem_sig */
+ unsigned int sem_sig_alloc; /* Allocated sem_sig */
int sem_sig_cnt;
uint64_t *sem_sig_val;
- int sem_sig_val_alloc;
+ unsigned int sem_sig_val_alloc;
uint64_t **sem_sig_val_dst;
- int sem_sig_val_dst_alloc;
+ unsigned int sem_sig_val_dst_alloc;
+
+ uint8_t *frame_locked;
+ unsigned int frame_locked_alloc_size;
+
+ VkAccessFlagBits *access_dst;
+ unsigned int access_dst_alloc;
+
+ VkImageLayout *layout_dst;
+ unsigned int layout_dst_alloc;
+
+ uint32_t *queue_family_dst;
+ unsigned int queue_family_dst_alloc;
+
+ uint8_t *frame_update;
+ unsigned int frame_update_alloc_size;
} FFVkExecContext;
+typedef struct FFVkExecPool {
+ FFVkQueueFamilyCtx *qf;
+ FFVkExecContext *contexts;
+ atomic_int_least64_t idx;
+
+ VkCommandPool cmd_buf_pool;
+ VkCommandBuffer *cmd_bufs;
+ int pool_size;
+
+ VkQueryPool query_pool;
+ void *query_data;
+ int query_results;
+ int query_statuses;
+ int query_64bit;
+ int query_status_stride;
+ int nb_queries;
+ size_t qd_size;
+} FFVkExecPool;
+
typedef struct FFVulkanContext {
const AVClass *class; /* Filters and encoders use this */
@@ -234,21 +251,6 @@ typedef struct FFVulkanContext {
int output_height;
enum AVPixelFormat output_format;
enum AVPixelFormat input_format;
-
- /* Samplers */
- FFVkSampler **samplers;
- int samplers_num;
-
- /* Exec contexts */
- FFVkExecContext **exec_ctx;
- int exec_ctx_num;
-
- /* Pipelines (each can have 1 shader of each type) */
- FFVulkanPipeline **pipelines;
- int pipelines_num;
-
- void *scratch; /* Scratch memory used only in functions */
- unsigned int scratch_size;
} FFVulkanContext;
/* Identity mapping - r = r, b = b, g = g, a = a */
@@ -260,244 +262,156 @@ extern const VkComponentMapping ff_comp_identity_map;
const char *ff_vk_ret2str(VkResult res);
/**
- * Loads props/mprops/driver_props
+ * Returns 1 if pixfmt is a usable RGB format.
*/
-int ff_vk_load_props(FFVulkanContext *s);
+int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
/**
- * Returns 1 if the image is any sort of supported RGB
+ * Returns the format to use for images in shaders.
*/
-int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
+const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
/**
- * Gets the glsl format string for a pixel format
+ * Loads props/mprops/driver_props
*/
-const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
+int ff_vk_load_props(FFVulkanContext *s);
/**
- * Setup the queue families from the hardware device context.
- * Necessary for image creation to work.
+ * Loads queue families into the main context.
+ * Chooses a QF and loads it into a context.
*/
void ff_vk_qf_fill(FFVulkanContext *s);
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family);
/**
- * Allocate device memory.
+ * Allocates/frees an execution pool.
*/
-int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
- VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ FFVkExecPool *pool, int nb_contexts,
+ int nb_queries, VkQueryType query_type, int query_64bit,
+ void *query_create_pnext);
+void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool);
/**
- * Get a queue family index and the number of queues. nb is optional.
+ * Retrieve an execution pool. Threadsafe.
*/
-int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb);
+FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool);
/**
- * Initialize a queue family with a specific number of queues.
- * If nb_queues == 0, use however many queues the queue family has.
+ * Explicitly wait on an execution to be finished.
+ * Starting via ff_vk_exec_start() also waits on it.
*/
-int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues);
/**
- * Rotate through the queues in a queue family.
+ * Performs nb_queries queries and returns their results and statuses.
+ * Execution must have been waited on to produce valid results.
+ */
+VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
+ void **data, int64_t *status);
+
+/**
+ * Start/submit/wait an execution.
+ * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait()
+ * is not necessary (unless using it is just better).
*/
-int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
+int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e);
+int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e);
+void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
/**
- * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
+ * Execution dependency management.
+ * Can attach buffers to executions that will only be unref'd once the
+ * buffer has finished executing.
+ * Adding a frame dep will *lock the frame*, until either the dependencies
+ * are discarded, the execution is submitted, or a failure happens.
+ * update_frame will update the frame's properties before it is unlocked,
+ * only if submission was successful.
*/
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords,
- VkFilter filt);
+int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef **deps, int nb_deps, int ref);
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef *vkfb, VkPipelineStageFlagBits in_wait_dst_flag);
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+ VkImageMemoryBarrier2 *bar);
+void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
/**
- * Create an imageview.
- * Guaranteed to remain alive until the queue submission has finished executing,
- * and will be destroyed after that.
+ * Create an imageview and add it as a dependency to an execution.
*/
int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
VkImageView *v, VkImage img, VkFormat fmt,
const VkComponentMapping map);
/**
- * Define a push constant for a given stage into a pipeline.
- * Must be called before the pipeline layout has been initialized.
+ * Memory/buffer/image allocation helpers.
*/
-int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
- VkShaderStageFlagBits stage);
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
- * Inits a pipeline. Everything in it will be auto-freed when calling
- * ff_vk_filter_uninit().
+ * Buffer management code.
*/
-FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf);
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate);
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
+ int flush);
+void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
/**
- * Inits a shader for a specific pipeline. Will be auto-freed on uninit.
+ * Sampler management.
*/
-FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
- VkShaderStageFlags stage);
+FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
+ int unnorm_coords, VkFilter filt);
+void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx);
/**
- * Writes the workgroup size for a shader.
+ * Shader management.
*/
-void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
+ VkShaderStageFlags stage);
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio);
+int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ const char *entrypoint);
+void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd);
/**
- * Adds a descriptor set to the shader and registers them in the pipeline.
+ * Register a descriptor set.
+ * Update a descriptor set for execution.
*/
int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader);
-
-/**
- * Compiles the shader, entrypoint must be set to "main".
- */
-int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
- const char *entrypoint);
-
-/**
- * Pretty print shader, mainly used by shader compilers.
- */
-void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio);
-
-/**
- * Initializes the pipeline layout after all shaders and descriptor sets have
- * been finished.
- */
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
-
-/**
- * Initializes a compute pipeline. Will pick the first shader with the
- * COMPUTE flag set.
- */
-int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl);
-
-/**
- * Updates a descriptor set via the updaters defined.
- * Can be called immediately after pipeline creation, but must be called
- * at least once before queue submission.
- */
void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
int set_id);
/**
- * Init an execution context for command recording and queue submission.
- * WIll be auto-freed on uninit.
- */
-int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
- FFVkQueueFamilyCtx *qf);
-
-/**
- * Create a query pool for a command context.
- * elem_64bits exists to troll driver devs for compliance. All results
- * and statuses returned should be 32 bits, unless this is set, then it's 64bits.
- */
-int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
- int nb_queries, VkQueryType type,
- int elem_64bits, void *create_pnext);
-
-/**
- * Get results for query.
- * Returns the status of the query.
- * Sets *res to the status of the queries.
- */
-int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
- int query_idx, void **data, int64_t *status);
-
-/**
- * Begin recording to the command buffer. Previous execution must have been
- * completed, which ff_vk_submit_exec_queue() will ensure.
- */
-int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Add a command to bind the completed pipeline and its descriptor sets.
- * Must be called after ff_vk_start_exec_recording() and before submission.
- */
-void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
- FFVulkanPipeline *pl);
-
-/**
- * Updates push constants.
- * Must be called after binding a pipeline if any push constants were defined.
+ * Add/update push constants for execution.
*/
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+ VkShaderStageFlagBits stage);
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
- VkShaderStageFlagBits stage, int offset,
- size_t size, void *src);
+ FFVulkanPipeline *pl,
+ VkShaderStageFlagBits stage,
+ int offset, size_t size, void *src);
/**
- * Gets the command buffer to use for this submission from the exe context.
+ * Pipeline management.
*/
-VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e);
-
-/**
- * Adds a generic AVBufferRef as a queue depenency.
- */
-int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef **deps, int nb_deps);
-
-/**
- * Discards all queue dependencies
- */
-void ff_vk_discard_exec_deps(FFVkExecContext *e);
-
-/**
- * Adds a frame as a queue dependency. This also manages semaphore signalling.
- * Must be called before submission.
- */
-int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
- VkPipelineStageFlagBits in_wait_dst_flag);
-
-/**
- * Submits a command buffer to the queue for execution. Will not block.
- */
-int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Wait on a command buffer's execution. Mainly useful for debugging and
- * development.
- */
-void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Create a VkBuffer with the specified parameters.
- */
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
- void *pNext, void *alloc_pNext,
- VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
-
-/**
- * Maps the buffer to userspace. Set invalidate to 1 if reading the contents
- * is necessary.
- */
-int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
- int nb_buffers, int invalidate);
-
-/**
- * Unmaps the buffer from userspace. Set flush to 1 to write and sync.
- */
-int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
- int flush);
-
-/**
- * Frees a buffer.
- */
-void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
-
-/**
- * Creates an image, allocates and binds memory in the given
- * idx value of the dst frame. If mem is non-NULL, then no memory will be
- * allocated, but instead the given memory will be bound to the image.
- */
-int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx,
- int width, int height, VkFormat fmt, VkImageTiling tiling,
- VkImageUsageFlagBits usage, VkImageCreateFlags flags,
- void *create_pnext,
- VkDeviceMemory *mem, void *alloc_pnext);
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkQueueFamilyCtx *qf);
+int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
+void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl);
+void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl);
/**
- * Frees the main Vulkan context.
+ * Frees main context.
*/
void ff_vk_uninit(FFVulkanContext *s);
--
2.39.2
[-- Attachment #50: 0049-vulkan-add-ff_vk_count_images.patch --]
[-- Type: text/x-diff, Size: 779 bytes --]
From 2aad41bb35392d7f2e300857a1b0f73b873ec601 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:37:18 +0100
Subject: [PATCH 49/72] vulkan: add ff_vk_count_images()
---
libavutil/vulkan.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index a8f3c458fc..e66ca59ef7 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -253,6 +253,15 @@ typedef struct FFVulkanContext {
enum AVPixelFormat input_format;
} FFVulkanContext;
+static inline int ff_vk_count_images(AVVkFrame *f)
+{
+ int cnt = 0;
+ while (f->img[cnt])
+ cnt++;
+
+ return cnt;
+}
+
/* Identity mapping - r = r, b = b, g = g, a = a */
extern const VkComponentMapping ff_comp_identity_map;
--
2.39.2
[-- Attachment #51: 0050-vulkan-rewrite-image-handling-code.patch --]
[-- Type: text/x-diff, Size: 64405 bytes --]
From dca500204539da2a17746db4125c476a29851305 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:38:10 +0100
Subject: [PATCH 50/72] vulkan: rewrite image handling code
---
libavutil/vulkan.c | 919 +++++++++++++++++++++++++--------------------
libavutil/vulkan.h | 166 ++++----
2 files changed, 612 insertions(+), 473 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 17a5bd6f3f..20ad269b0a 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -21,12 +23,6 @@
#include "vulkan.h"
#include "vulkan_loader.h"
-#if CONFIG_LIBGLSLANG
-#include "vulkan_glslang.c"
-#elif CONFIG_LIBSHADERC
-#include "vulkan_shaderc.c"
-#endif
-
const VkComponentMapping ff_comp_identity_map = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -92,15 +88,22 @@ int ff_vk_load_props(FFVulkanContext *s)
uint32_t qc = 0;
FFVulkanFunctions *vk = &s->vkfn;
+ s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
+ };
+ s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT,
+ .pNext = &s->hprops,
+ };
s->driver_props = (VkPhysicalDeviceDriverProperties) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+ .pNext = &s->desc_buf_props,
};
s->props = (VkPhysicalDeviceProperties2) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = &s->driver_props,
};
-
vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
@@ -373,6 +376,7 @@ int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
return AVERROR_EXTERNAL;
}
+ e->idx = i;
e->parent = pool;
/* Query data */
@@ -496,17 +500,21 @@ void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
for (int j = 0; j < e->nb_frame_deps; j++) {
if (e->frame_locked[j]) {
- AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
- s->hwfc->unlock_frame(s->frames, f);
+ AVFrame *f = e->frame_deps[j];
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ vkfc->unlock_frame(hwfc, vkf);
e->frame_locked[j] = 0;
e->frame_update[j] = 0;
}
- av_buffer_unref(&e->frame_deps[j]);
+ av_frame_free(&e->frame_deps[j]);
}
e->nb_frame_deps = 0;
e->sem_wait_cnt = 0;
e->sem_sig_cnt = 0;
+ e->sem_sig_val_dst_cnt = 0;
}
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
@@ -533,18 +541,25 @@ int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
return 0;
}
-int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
VkPipelineStageFlagBits in_wait_dst_flag)
{
uint8_t *frame_locked;
uint8_t *frame_update;
- AVBufferRef **frame_deps;
+ AVFrame **frame_deps;
VkImageLayout *layout_dst;
uint32_t *queue_family_dst;
VkAccessFlagBits *access_dst;
- AVVkFrame *f = (AVVkFrame *)vkfb->data;
- int nb_images = ff_vk_count_images(f);
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ int nb_images = ff_vk_count_images(vkf);
+
+ /* Don't add duplicates */
+ for (int i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == f->data[0])
+ return 1;
#define ARR_REALLOC(str, arr, alloc_s, cnt) \
do { \
@@ -569,17 +584,18 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
ARR_REALLOC(e, sem_wait_val, &e->sem_wait_val_alloc, e->sem_wait_cnt);
ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
ARR_REALLOC(e, sem_sig_val, &e->sem_sig_val_alloc, e->sem_sig_cnt);
- ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_cnt);
+ ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
- e->sem_wait[e->sem_wait_cnt] = f->sem[i];
+ e->sem_wait[e->sem_wait_cnt] = vkf->sem[i];
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
- e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
+ e->sem_wait_val[e->sem_wait_cnt] = vkf->sem_value[i];
e->sem_wait_cnt++;
- e->sem_sig[e->sem_sig_cnt] = f->sem[i];
- e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
- e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
+ e->sem_sig[e->sem_sig_cnt] = vkf->sem[i];
+ e->sem_sig_val[e->sem_sig_cnt] = vkf->sem_value[i] + 1;
+ e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i];
e->sem_sig_cnt++;
+ e->sem_sig_val_dst_cnt++;
}
ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps);
@@ -590,13 +606,13 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps);
- e->frame_deps[e->nb_frame_deps] = av_buffer_ref(vkfb);
+ e->frame_deps[e->nb_frame_deps] = av_frame_clone(f);
if (!e->frame_deps[e->nb_frame_deps]) {
ff_vk_exec_discard_deps(s, e);
return AVERROR(ENOMEM);
}
- s->hwfc->lock_frame(s->frames, f);
+ vkfc->lock_frame(hwfc, vkf);
e->frame_locked[e->nb_frame_deps] = 1;
e->frame_update[e->nb_frame_deps] = 0;
@@ -605,22 +621,51 @@ int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef
return 0;
}
-void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef *vkfb,
- VkImageMemoryBarrier2 *bar)
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+ VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar)
{
int i;
for (i = 0; i < e->nb_frame_deps; i++)
- if (e->frame_deps[i]->data == vkfb->data)
+ if (e->frame_deps[i]->data[0] == f->data[0])
break;
av_assert0(i < e->nb_frame_deps);
+ /* Don't update duplicates */
+ if (nb_img_bar && !e->frame_update[i])
+ (*nb_img_bar)++;
+
e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
e->access_dst[i] = bar->dstAccessMask;
e->layout_dst[i] = bar->newLayout;
e->frame_update[i] = 1;
}
+int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
+ VkSemaphore *dst, uint64_t *dst_val,
+ AVFrame *f)
+{
+ uint64_t **sem_sig_val_dst;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
+ /* Reject unknown frames */
+ int i;
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == f->data[0])
+ break;
+ if (i == e->nb_frame_deps)
+ return AVERROR(EINVAL);
+
+ ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
+
+ *dst = vkf->sem[0];
+ *dst_val = vkf->sem_value[0];
+
+ e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val;
+ e->sem_sig_val_dst_cnt++;
+
+ return 0;
+}
+
int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
{
VkResult ret;
@@ -668,22 +713,26 @@ int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
- for (int i = 0; i < e->sem_sig_cnt; i++)
+ for (int i = 0; i < e->sem_sig_val_dst_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
/* Unlock all frames */
for (int j = 0; j < e->nb_frame_deps; j++) {
if (e->frame_locked[j]) {
- AVVkFrame *f = (AVVkFrame *)e->frame_deps[j]->data;
+ AVFrame *f = e->frame_deps[j];
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
if (e->frame_update[j]) {
- int nb_images = ff_vk_count_images(f);
+ int nb_images = ff_vk_count_images(vkf);
for (int i = 0; i < nb_images; i++) {
- f->layout[i] = e->layout_dst[j];
- f->access[i] = e->access_dst[j];
- f->queue_family[i] = e->queue_family_dst[j];
+ vkf->layout[i] = e->layout_dst[j];
+ vkf->access[i] = e->access_dst[j];
+ vkf->queue_family[i] = e->queue_family_dst[j];
}
}
- s->hwfc->unlock_frame(s->frames, f);
+ vkfc->unlock_frame(hwfc, vkf);
e->frame_locked[j] = 0;
}
}
@@ -767,6 +816,10 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
but should be ok */
};
+ VkMemoryAllocateFlagsInfo alloc_flags = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
+ .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
+ };
VkBufferMemoryRequirementsInfo2 req_desc = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
};
@@ -796,11 +849,18 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
/* In case the implementation prefers/requires dedicated allocation */
use_ded_mem = ded_req.prefersDedicatedAllocation |
ded_req.requiresDedicatedAllocation;
- if (use_ded_mem)
+ if (use_ded_mem) {
ded_alloc.buffer = buf->buf;
+ ded_alloc.pNext = alloc_pNext;
+ alloc_pNext = &ded_alloc;
+ }
- err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+ alloc_flags.pNext = alloc_pNext;
+ alloc_pNext = &alloc_flags;
+ }
+
+ err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext,
&buf->flags, &buf->mem);
if (err)
return err;
@@ -812,6 +872,14 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
return AVERROR_EXTERNAL;
}
+ if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+ VkBufferDeviceAddressInfo address_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
+ .buffer = buf->buf,
+ };
+ buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
+ }
+
buf->size = size;
return 0;
@@ -933,8 +1001,8 @@ int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
return 0;
}
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
- int unnorm_coords, VkFilter filt)
+int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
+ int unnorm_coords, VkFilter filt)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
@@ -955,24 +1023,14 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
};
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
- s->hwctx->alloc, &sctx->sampler[0]);
+ s->hwctx->alloc, sampler);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
ff_vk_ret2str(ret));
- return NULL;
+ return AVERROR_EXTERNAL;
}
- for (int i = 1; i < 4; i++)
- sctx->sampler[i] = sctx->sampler[0];
-
- return sctx;
-}
-
-void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx)
-{
- FFVulkanFunctions *vk = &s->vkfn;
- vk->DestroySampler(s->hwctx->act_dev, sctx->sampler[0],
- s->hwctx->alloc);
+ return 0;
}
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
@@ -995,69 +1053,131 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
}
typedef struct ImageViewCtx {
- VkImageView view;
+ VkImageView views[AV_NUM_DATA_POINTERS];
+ int nb_views;
} ImageViewCtx;
-static void destroy_imageview(void *opaque, uint8_t *data)
+static void destroy_imageviews(void *opaque, uint8_t *data)
{
FFVulkanContext *s = opaque;
FFVulkanFunctions *vk = &s->vkfn;
ImageViewCtx *iv = (ImageViewCtx *)data;
- vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
+ for (int i = 0; i < iv->nb_views; i++)
+ vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
+
av_free(iv);
}
-int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
- VkImageView *v, VkImage img, VkFormat fmt,
- const VkComponentMapping map)
+int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
+ VkImageView views[AV_NUM_DATA_POINTERS],
+ AVFrame *f)
{
int err;
+ VkResult ret;
AVBufferRef *buf;
FFVulkanFunctions *vk = &s->vkfn;
-
- VkImageViewCreateInfo imgview_spawn = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .pNext = NULL,
- .image = img,
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = fmt,
- .components = map,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- };
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ const int nb_images = ff_vk_count_images(vkf);
+ const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
ImageViewCtx *iv = av_mallocz(sizeof(*iv));
- VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
- s->hwctx->alloc, &iv->view);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ for (int i = 0; i < nb_planes; i++) {
+ VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
+ VkImageViewCreateInfo view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = NULL,
+ .image = vkf->img[FFMIN(i, nb_images - 1)],
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = rep_fmts[i],
+ .components = ff_comp_identity_map,
+ .subresourceRange = {
+ .aspectMask = plane_aspect[(nb_planes != 1) + i*(nb_planes != 1)],
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
+
+ ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
+ s->hwctx->alloc, &iv->views[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ iv->nb_views++;
}
- buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
+ buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0);
if (!buf) {
- destroy_imageview(s, (uint8_t *)iv);
- return AVERROR(ENOMEM);
+ err = AVERROR(ENOMEM);
+ goto fail;
}
/* Add to queue dependencies */
err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
- if (err) {
+ if (err < 0)
av_buffer_unref(&buf);
- return err;
- }
- *v = iv->view;
+ memcpy(views, iv->views, nb_planes*sizeof(*views));
- return 0;
+ return err;
+
+fail:
+ for (int i = 0; i < iv->nb_views; i++)
+ vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
+ av_free(iv);
+ return err;
+}
+
+void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
+ AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
+ VkPipelineStageFlags src_stage,
+ VkPipelineStageFlags dst_stage,
+ VkAccessFlagBits new_access,
+ VkImageLayout new_layout,
+ uint32_t new_qf)
+{
+ int i, found;
+ AVVkFrame *vkf = (AVVkFrame *)pic->data[0];
+ const int nb_images = ff_vk_count_images(vkf);
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == pic->data[0])
+ break;
+ found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1;
+
+ for (int i = 0; i < nb_images; i++) {
+ bar[*nb_bar] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = src_stage,
+ .dstStageMask = dst_stage,
+ .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i],
+ .dstAccessMask = new_access,
+ .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0],
+ .newLayout = new_layout,
+ .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0],
+ .dstQueueFamilyIndex = new_qf,
+ .image = vkf->img[i],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .layerCount = 1,
+ .levelCount = 1,
+ },
+ };
+ *nb_bar += 1;
+ }
+
+ ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL);
}
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
@@ -1077,11 +1197,11 @@ int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *na
return 0;
}
-void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3])
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
{
- shd->local_size[0] = local_size[0];
- shd->local_size[1] = local_size[1];
- shd->local_size[2] = local_size[2];
+ shd->local_size[0] = x;
+ shd->local_size[1] = y;
+ shd->local_size[2] = z;
av_bprintf(&shd->src, "layout (local_size_x = %i, "
"local_size_y = %i, local_size_z = %i) in;\n\n",
@@ -1113,39 +1233,21 @@ void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio)
void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
{
FFVulkanFunctions *vk = &s->vkfn;
- vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
+ av_bprint_finalize(&shd->src, NULL);
+
+ if (shd->shader.module)
+ vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
}
-int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
- const char *entrypoint)
+int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ uint8_t *spirv, size_t spirv_size, const char *entrypoint)
{
- int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkShaderModuleCreateInfo shader_create;
- uint8_t *spirv;
- size_t spirv_size;
- void *priv;
shd->shader.pName = entrypoint;
- if (!s->spirv_compiler) {
-#if CONFIG_LIBGLSLANG
- s->spirv_compiler = ff_vk_glslang_init();
-#elif CONFIG_LIBSHADERC
- s->spirv_compiler = ff_vk_shaderc_init();
-#else
- return AVERROR(ENOSYS);
-#endif
- if (!s->spirv_compiler)
- return AVERROR(ENOMEM);
- }
-
- err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv,
- &spirv_size, entrypoint, &priv);
- if (err < 0)
- return err;
-
av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
shd->name, spirv_size);
@@ -1157,11 +1259,8 @@ int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
&shd->shader.module);
-
- s->spirv_compiler->free_shader(s->spirv_compiler, &priv);
-
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
+ av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
@@ -1190,132 +1289,88 @@ static const struct descriptor_props {
[VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
};
-int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
- int num, int only_print_to_shader)
+int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd,
+ FFVulkanDescriptorSetBinding *desc, int nb,
+ int read_only, int print_to_shader_only)
{
VkResult ret;
- VkDescriptorSetLayout *layout;
+ int has_sampler = 0;
FFVulkanFunctions *vk = &s->vkfn;
+ FFVulkanDescriptorSet *set;
+ VkDescriptorSetLayoutCreateInfo desc_create_layout;
- if (only_print_to_shader)
+ if (print_to_shader_only)
goto print;
- pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
- pl->desc_layout_num + pl->qf->nb_queues);
- if (!pl->desc_layout)
+ /* Actual layout allocated for the pipeline */
+ set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set),
+ pl->nb_descriptor_sets + 1);
+ if (!set)
return AVERROR(ENOMEM);
+ pl->desc_set = set;
+ set = &set[pl->nb_descriptor_sets];
+ memset(set, 0, sizeof(*set));
- pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
- sizeof(*pl->desc_set_initialized),
- pl->descriptor_sets_num + 1);
- if (!pl->desc_set_initialized)
+ set->binding = av_mallocz(nb*sizeof(*set->binding));
+ if (!set->binding)
return AVERROR(ENOMEM);
- pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
- layout = &pl->desc_layout[pl->desc_layout_num];
-
- { /* Create descriptor set layout descriptions */
- VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
- VkDescriptorSetLayoutBinding *desc_binding;
-
- desc_binding = av_mallocz(sizeof(*desc_binding)*num);
- if (!desc_binding)
- return AVERROR(ENOMEM);
-
- for (int i = 0; i < num; i++) {
- desc_binding[i].binding = i;
- desc_binding[i].descriptorType = desc[i].type;
- desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
- desc_binding[i].stageFlags = desc[i].stages;
- desc_binding[i].pImmutableSamplers = desc[i].sampler ?
- desc[i].sampler->sampler :
- NULL;
- }
-
- desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- desc_create_layout.pBindings = desc_binding;
- desc_create_layout.bindingCount = num;
-
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
- s->hwctx->alloc, &layout[i]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
- "layout: %s\n", ff_vk_ret2str(ret));
- av_free(desc_binding);
- return AVERROR_EXTERNAL;
- }
- }
-
- av_free(desc_binding);
+ set->binding_offset = av_mallocz(nb*sizeof(*set->binding_offset));
+ if (!set->binding_offset) {
+ av_freep(&set->binding);
+ return AVERROR(ENOMEM);
}
- { /* Pool each descriptor by type and update pool counts */
- for (int i = 0; i < num; i++) {
- int j;
- for (j = 0; j < pl->pool_size_desc_num; j++)
- if (pl->pool_size_desc[j].type == desc[i].type)
- break;
- if (j >= pl->pool_size_desc_num) {
- pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
- sizeof(*pl->pool_size_desc),
- ++pl->pool_size_desc_num);
- if (!pl->pool_size_desc)
- return AVERROR(ENOMEM);
- memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
- }
- pl->pool_size_desc[j].type = desc[i].type;
- pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
- }
- }
+ desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = nb,
+ .pBindings = set->binding,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
+ };
- { /* Create template creation struct */
- VkDescriptorUpdateTemplateCreateInfo *dt;
- VkDescriptorUpdateTemplateEntry *des_entries;
+ for (int i = 0; i < nb; i++) {
+ set->binding[i].binding = i;
+ set->binding[i].descriptorType = desc[i].type;
+ set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ set->binding[i].stageFlags = desc[i].stages;
+ set->binding[i].pImmutableSamplers = desc[i].samplers;
- /* Freed after descriptor set initialization */
- des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
- if (!des_entries)
- return AVERROR(ENOMEM);
+ if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ has_sampler |= 1;
+ }
- for (int i = 0; i < num; i++) {
- des_entries[i].dstBinding = i;
- des_entries[i].descriptorType = desc[i].type;
- des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
- des_entries[i].dstArrayElement = 0;
- des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
- des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
- }
+ set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+ if (has_sampler)
+ set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
- pl->desc_template_info = av_realloc_array(pl->desc_template_info,
- sizeof(*pl->desc_template_info),
- pl->total_descriptor_sets + pl->qf->nb_queues);
- if (!pl->desc_template_info)
- return AVERROR(ENOMEM);
+ ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+ s->hwctx->alloc, &set->layout);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
- dt = &pl->desc_template_info[pl->total_descriptor_sets];
- memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
+ vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size);
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
- dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
- dt[i].descriptorSetLayout = layout[i];
- dt[i].pDescriptorUpdateEntries = des_entries;
- dt[i].descriptorUpdateEntryCount = num;
- }
- }
+ set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment);
- pl->descriptor_sets_num++;
+ for (int i = 0; i < nb; i++)
+ vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout,
+ i, &set->binding_offset[i]);
- pl->desc_layout_num += pl->qf->nb_queues;
- pl->total_descriptor_sets += pl->qf->nb_queues;
+ set->read_only = read_only;
+ set->nb_bindings = nb;
+ pl->nb_descriptor_sets++;
print:
/* Write shader info */
- for (int i = 0; i < num; i++) {
+ for (int i = 0; i < nb; i++) {
const struct descriptor_props *prop = &descriptor_props[desc[i].type];
- GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
+ GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i);
if (desc[i].mem_layout)
GLSLA(", %s", desc[i].mem_layout);
@@ -1347,171 +1402,260 @@ print:
return 0;
}
-void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- int set_id)
+int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
+ FFVulkanPipeline *pl)
{
- FFVulkanFunctions *vk = &s->vkfn;
+ int err;
- /* If a set has never been updated, update all queues' sets. */
- if (!pl->desc_set_initialized[set_id]) {
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- int idx = set_id*pl->qf->nb_queues + i;
- vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
- pl->desc_set[idx],
- pl->desc_template[idx],
- s);
- }
- pl->desc_set_initialized[set_id] = 1;
- return;
- }
+ pl->desc_bind = av_mallocz(pl->nb_descriptor_sets*sizeof(*pl->desc_bind));
+ if (!pl->desc_bind)
+ return AVERROR(ENOMEM);
+
+ pl->bound_buffer_indices = av_mallocz(pl->nb_descriptor_sets*
+ sizeof(*pl->bound_buffer_indices));
+ if (!pl->bound_buffer_indices)
+ return AVERROR(ENOMEM);
-// set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+ for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+ FFVulkanDescriptorSet *set = &pl->desc_set[i];
+ int nb = set->read_only ? 1 : pool->pool_size;
+
+ err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb,
+ NULL, NULL, set->usage,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_map_buffers(s, &set->buf, &set->desc_mem, 1, 0);
+ if (err < 0)
+ return err;
+
+ pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT,
+ .usage = set->usage,
+ .address = set->buf.address,
+ };
+
+ pl->bound_buffer_indices[i] = i;
+ }
- vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
- pl->desc_set[set_id],
- pl->desc_template[set_id],
- s);
+ return 0;
}
-void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
- FFVulkanPipeline *pl,
- VkShaderStageFlagBits stage,
- int offset, size_t size, void *src)
+static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanDescriptorSet *set,
+ int bind_idx, int array_idx,
+ VkDescriptorGetInfoEXT *desc_get_info,
+ size_t desc_size)
{
FFVulkanFunctions *vk = &s->vkfn;
- vk->CmdPushConstants(e->buf, pl->pipeline_layout,
- stage, offset, size, src);
+ const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx;
+ void *desc = set->desc_mem + /* Base */
+ exec_offset + /* Execution context */
+ set->binding_offset[bind_idx] + /* Descriptor binding */
+ array_idx*desc_size; /* Array position */
+
+ vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc);
}
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
+int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkSampler *sampler)
{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
- pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
- if (!pl->desc_staging)
- return AVERROR(ENOMEM);
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ desc_get_info.data.pSampler = sampler;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- { /* Init descriptor set pool */
- VkDescriptorPoolCreateInfo pool_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
- .poolSizeCount = pl->pool_size_desc_num,
- .pPoolSizes = pl->pool_size_desc,
- .maxSets = pl->total_descriptor_sets,
- };
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info,
+ s->desc_buf_props.samplerDescriptorSize);
- ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
- s->hwctx->alloc, &pl->desc_pool);
- av_freep(&pl->pool_size_desc);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
- "pool: %s\n", ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ return 0;
+}
- { /* Allocate descriptor sets */
- VkDescriptorSetAllocateInfo alloc_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
- .descriptorPool = pl->desc_pool,
- .descriptorSetCount = pl->total_descriptor_sets,
- .pSetLayouts = pl->desc_layout,
- };
+int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkImageView view, VkImageLayout layout, VkSampler sampler)
+{
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
+ VkDescriptorImageInfo desc_img_info = {
+ .imageView = view,
+ .sampler = sampler,
+ .imageLayout = layout,
+ };
+ size_t desc_size;
- pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
- if (!pl->desc_set)
- return AVERROR(ENOMEM);
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ desc_get_info.data.pSampledImage = &desc_img_info;
+ desc_size = s->desc_buf_props.sampledImageDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ desc_get_info.data.pStorageImage = &desc_img_info;
+ desc_size = s->desc_buf_props.storageImageDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ desc_get_info.data.pInputAttachmentImage = &desc_img_info;
+ desc_size = s->desc_buf_props.inputAttachmentDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ desc_get_info.data.pCombinedImageSampler = &desc_img_info;
+ desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
- pl->desc_set);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
- { /* Finally create the pipeline layout */
- VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging,
- .pushConstantRangeCount = pl->push_consts_num,
- .pPushConstantRanges = pl->push_consts,
- };
+ return 0;
+}
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
- pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
+int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt)
+{
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
+ VkDescriptorAddressInfoEXT desc_buf_info = {
+ .address = addr,
+ .range = len,
+ .format = fmt,
+ };
+ size_t desc_size;
- ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
- s->hwctx->alloc, &pl->pipeline_layout);
- av_freep(&pl->push_consts);
- pl->push_consts_num = 0;
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ desc_get_info.data.pUniformBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.uniformBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ desc_get_info.data.pStorageBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.storageBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ desc_get_info.data.pUniformTexelBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ desc_get_info.data.pStorageTexelBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- { /* Descriptor template (for tightly packed descriptors) */
- VkDescriptorUpdateTemplateCreateInfo *dt;
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
- pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
- if (!pl->desc_template)
- return AVERROR(ENOMEM);
+ return 0;
+}
- /* Create update templates for the descriptor sets */
- for (int i = 0; i < pl->total_descriptor_sets; i++) {
- dt = &pl->desc_template_info[i];
- dt->pipelineLayout = pl->pipeline_layout;
- ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
- dt, s->hwctx->alloc,
- &pl->desc_template[i]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
- "template: %s\n", ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, AVFrame *f,
+ VkImageView *views, int set, int binding,
+ VkImageLayout layout, VkSampler sampler)
+{
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
- /* Free the duplicated memory used for the template entries */
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
- dt = &pl->desc_template_info[i];
- av_free((void *)dt->pDescriptorUpdateEntries);
- }
+ for (int i = 0; i < nb_planes; i++)
+ ff_vk_set_descriptor_image(s, pl, e, set, binding, i,
+ views[i], layout, sampler);
+}
+
+void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl,
+ VkShaderStageFlagBits stage,
+ int offset, size_t size, void *src)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->CmdPushConstants(e->buf, pl->pipeline_layout,
+ stage, offset, size, src);
+}
+
+static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkPipelineLayoutCreateInfo pipeline_layout_info;
+
+ VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets*
+ sizeof(desc_layouts));
+ if (!desc_layouts)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < pl->nb_descriptor_sets; i++)
+ desc_layouts[i] = pl->desc_set[i].layout;
+
+ /* Finally create the pipeline layout */
+ pipeline_layout_info = (VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pSetLayouts = desc_layouts,
+ .setLayoutCount = pl->nb_descriptor_sets,
+ .pushConstantRangeCount = pl->push_consts_num,
+ .pPushConstantRanges = pl->push_consts,
+ };
- av_freep(&pl->desc_template_info);
+ ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info,
+ s->hwctx->alloc, &pl->pipeline_layout);
+ av_free(desc_layouts);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
}
return 0;
}
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkQueueFamilyCtx *qf)
+ FFVkSPIRVShader *shd)
{
- int i;
+ int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- VkComputePipelineCreateInfo pipe = {
+ VkComputePipelineCreateInfo pipeline_create_info;
+
+ err = init_pipeline_layout(s, pl);
+ if (err < 0)
+ return err;
+
+ pipeline_create_info = (VkComputePipelineCreateInfo) {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
.layout = pl->pipeline_layout,
+ .stage = shd->shader,
};
- pl->qf = qf;
-
- for (i = 0; i < pl->shaders_num; i++) {
- if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
- pipe.stage = pl->shaders[i]->shader;
- break;
- }
- }
- if (i == pl->shaders_num) {
- av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
- return AVERROR(EINVAL);
- }
-
- ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
+ ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1,
+ &pipeline_create_info,
s->hwctx->alloc, &pl->pipeline);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
@@ -1520,77 +1664,57 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
}
pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
+ pl->wg_size[0] = shd->local_size[0];
+ pl->wg_size[1] = shd->local_size[1];
+ pl->wg_size[2] = shd->local_size[2];
return 0;
}
-void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
+void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
+ VkDeviceSize offsets[1024];
- vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
-
-// for (int i = 0; i < pl->descriptor_sets_num; i++)
- // pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+ for (int i = 0; i < pl->nb_descriptor_sets; i++)
+ offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx;
- vk->CmdBindDescriptorSets(e->buf, pl->bind_point,
- pl->pipeline_layout, 0,
- pl->descriptor_sets_num,
- (VkDescriptorSet *)pl->desc_staging,
- 0, NULL);
+ /* Bind pipeline */
+ vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
+ /* Bind descriptor buffers */
+ vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind);
+ /* Binding offsets */
+ vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout,
+ 0, pl->nb_descriptor_sets,
+ pl->bound_buffer_indices, offsets);
}
void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
- for (int i = 0; i < pl->shaders_num; i++) {
- FFVkSPIRVShader *shd = pl->shaders[i];
- av_bprint_finalize(&shd->src, NULL);
- vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
- s->hwctx->alloc);
- av_free(shd);
- }
-
- vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
- vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
- s->hwctx->alloc);
+ if (pl->pipeline)
+ vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
+ if (pl->pipeline_layout)
+ vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
+ s->hwctx->alloc);
- for (int i = 0; i < pl->desc_layout_num; i++) {
- if (pl->desc_template && pl->desc_template[i])
- vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
- s->hwctx->alloc);
- if (pl->desc_layout && pl->desc_layout[i])
- vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
+ for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+ FFVulkanDescriptorSet *set = &pl->desc_set[i];
+ ff_vk_unmap_buffers(s, &set->buf, 1, 0);
+ ff_vk_free_buf(s, &set->buf);
+ if (set->layout)
+ vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout,
s->hwctx->alloc);
+ av_free(set->binding);
+ av_free(set->binding_offset);
}
- /* Also frees the descriptor sets */
- if (pl->desc_pool)
- vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
- s->hwctx->alloc);
-
- av_freep(&pl->desc_staging);
av_freep(&pl->desc_set);
- av_freep(&pl->shaders);
- av_freep(&pl->desc_layout);
- av_freep(&pl->desc_template);
- av_freep(&pl->desc_set_initialized);
+ av_freep(&pl->desc_bind);
av_freep(&pl->push_consts);
pl->push_consts_num = 0;
-
- /* Only freed in case of failure */
- av_freep(&pl->pool_size_desc);
- if (pl->desc_template_info) {
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
- VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
- av_free((void *)dt->pDescriptorUpdateEntries);
- }
- av_freep(&pl->desc_template_info);
- }
-
- av_free(pl);
}
void ff_vk_uninit(FFVulkanContext *s)
@@ -1599,9 +1723,6 @@ void ff_vk_uninit(FFVulkanContext *s)
av_freep(&s->qf_props);
av_freep(&s->video_props);
- if (s->spirv_compiler)
- s->spirv_compiler->uninit(&s->spirv_compiler);
-
av_buffer_unref(&s->device_ref);
av_buffer_unref(&s->frames_ref);
}
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index e66ca59ef7..1321fb8ba8 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -30,11 +30,6 @@
#include "hwcontext_vulkan.h"
#include "vulkan_loader.h"
-#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
- VK_IMAGE_USAGE_STORAGE_BIT | \
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
- VK_IMAGE_USAGE_TRANSFER_DST_BIT)
-
/* GLSL management macros */
#define INDENT(N) INDENT_##N
#define INDENT_0
@@ -59,6 +54,8 @@
goto fail; \
} while (0)
+#define DUP_SAMPLER(x) { x, x, x, x }
+
typedef struct FFVkSPIRVShader {
const char *name; /* Name for id/debugging purposes */
AVBPrint src;
@@ -66,19 +63,6 @@ typedef struct FFVkSPIRVShader {
VkPipelineShaderStageCreateInfo shader;
} FFVkSPIRVShader;
-typedef struct FFVkSPIRVCompiler {
- void *priv;
- int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx,
- struct FFVkSPIRVShader *shd, uint8_t **data,
- size_t *size, const char *entrypoint, void **opaque);
- void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque);
- void (*uninit)(struct FFVkSPIRVCompiler **ctx);
-} FFVkSPIRVCompiler;
-
-typedef struct FFVkSampler {
- VkSampler sampler[4];
-} FFVkSampler;
-
typedef struct FFVulkanDescriptorSetBinding {
const char *name;
VkDescriptorType type;
@@ -88,8 +72,7 @@ typedef struct FFVulkanDescriptorSetBinding {
uint32_t dimensions; /* Needed for e.g. sampler%iD */
uint32_t elems; /* 0 - scalar, 1 or more - vector */
VkShaderStageFlags stages;
- FFVkSampler *sampler; /* Sampler to use for all elems */
- void *updater; /* Pointer to VkDescriptor*Info */
+ VkSampler samplers[4]; /* Sampler to use for all elems */
} FFVulkanDescriptorSetBinding;
typedef struct FFVkBuffer {
@@ -97,6 +80,7 @@ typedef struct FFVkBuffer {
VkDeviceMemory mem;
VkMemoryPropertyFlagBits flags;
size_t size;
+ VkDeviceAddress address;
} FFVkBuffer;
typedef struct FFVkQueueFamilyCtx {
@@ -104,42 +88,45 @@ typedef struct FFVkQueueFamilyCtx {
int nb_queues;
} FFVkQueueFamilyCtx;
-typedef struct FFVulkanPipeline {
- FFVkQueueFamilyCtx *qf;
+typedef struct FFVulkanDescriptorSet {
+ VkDescriptorSetLayout layout;
+ FFVkBuffer buf;
+ uint8_t *desc_mem;
+ VkDeviceSize layout_size;
+ VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */
+ VkDeviceSize total_size; /* Once registered to an exec context */
+ VkBufferUsageFlags usage;
+ VkDescriptorSetLayoutBinding *binding;
+ VkDeviceSize *binding_offset;
+ int nb_bindings;
+
+ int read_only;
+} FFVulkanDescriptorSet;
+
+typedef struct FFVulkanPipeline {
VkPipelineBindPoint bind_point;
/* Contexts */
VkPipelineLayout pipeline_layout;
VkPipeline pipeline;
- /* Shaders */
- FFVkSPIRVShader **shaders;
- int shaders_num;
-
/* Push consts */
VkPushConstantRange *push_consts;
int push_consts_num;
+ /* Workgroup */
+ int wg_size[3];
+
/* Descriptors */
- VkDescriptorSetLayout *desc_layout;
- VkDescriptorPool desc_pool;
- VkDescriptorSet *desc_set;
- void **desc_staging;
- VkDescriptorSetLayoutBinding **desc_binding;
- VkDescriptorUpdateTemplate *desc_template;
- int *desc_set_initialized;
- int desc_layout_num;
- int descriptor_sets_num;
- int total_descriptor_sets;
- int pool_size_desc_num;
-
- /* Temporary, used to store data in between initialization stages */
- VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
- VkDescriptorPoolSize *pool_size_desc;
+ FFVulkanDescriptorSet *desc_set;
+ VkDescriptorBufferBindingInfoEXT *desc_bind;
+ uint32_t *bound_buffer_indices;
+ int nb_descriptor_sets;
} FFVulkanPipeline;
typedef struct FFVkExecContext {
+ int idx;
const struct FFVkExecPool *parent;
/* Queue for the execution context */
@@ -162,7 +149,7 @@ typedef struct FFVkExecContext {
unsigned int buf_deps_alloc_size;
/* Frame dependencies */
- AVBufferRef **frame_deps;
+ AVFrame **frame_deps;
unsigned int frame_deps_alloc_size;
int nb_frame_deps;
@@ -185,6 +172,7 @@ typedef struct FFVkExecContext {
uint64_t **sem_sig_val_dst;
unsigned int sem_sig_val_dst_alloc;
+ int sem_sig_val_dst_cnt;
uint8_t *frame_locked;
unsigned int frame_locked_alloc_size;
@@ -229,6 +217,8 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceProperties2 props;
VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
+ VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
+ VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props;
VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
VkQueueFamilyVideoPropertiesKHR *video_props;
VkQueueFamilyProperties2 *qf_props;
@@ -244,8 +234,6 @@ typedef struct FFVulkanContext {
uint32_t qfs[5];
int nb_qfs;
- FFVkSPIRVCompiler *spirv_compiler;
-
/* Properties */
int output_width;
int output_height;
@@ -286,15 +274,15 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
int ff_vk_load_props(FFVulkanContext *s);
/**
- * Loads queue families into the main context.
* Chooses a QF and loads it into a context.
*/
-void ff_vk_qf_fill(FFVulkanContext *s);
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family);
/**
* Allocates/frees an execution pool.
+ * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add()
+ * has been called.
*/
int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
FFVkExecPool *pool, int nb_contexts,
@@ -340,17 +328,28 @@ void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps, int ref);
int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef *vkfb, VkPipelineStageFlagBits in_wait_dst_flag);
-void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef *vkfb,
- VkImageMemoryBarrier2 *bar);
+ AVFrame *f, VkPipelineStageFlagBits in_wait_dst_flag);
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+ VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar);
+int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
+ VkSemaphore *dst, uint64_t *dst_val,
+ AVFrame *f);
void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
/**
* Create an imageview and add it as a dependency to an execution.
*/
-int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
- VkImageView *v, VkImage img, VkFormat fmt,
- const VkComponentMapping map);
+int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
+ VkImageView views[AV_NUM_DATA_POINTERS],
+ AVFrame *f);
+
+void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
+ AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
+ VkPipelineStageFlags src_stage,
+ VkPipelineStageFlags dst_stage,
+ VkAccessFlagBits new_access,
+ VkImageLayout new_layout,
+ uint32_t new_qf);
/**
* Memory/buffer/image allocation helpers.
@@ -372,33 +371,22 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
/**
- * Sampler management.
+ * Create a sampler.
*/
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, FFVkSampler *sctx,
- int unnorm_coords, VkFilter filt);
-void ff_vk_sampler_free(FFVulkanContext *s, FFVkSampler *sctx);
+int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
+ int unnorm_coords, VkFilter filt);
/**
* Shader management.
*/
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
VkShaderStageFlags stage);
-void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z);
void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio);
-int ff_vk_shader_compile(FFVulkanContext *s, FFVkSPIRVShader *shd,
- const char *entrypoint);
+int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ uint8_t *spirv, size_t spirv_size, const char *entrypoint);
void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd);
-/**
- * Register a descriptor set.
- * Update a descriptor set for execution.
- */
-int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
- int num, int only_print_to_shader);
-void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- int set_id);
-
/**
* Add/update push constants for execution.
*/
@@ -410,15 +398,45 @@ void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
int offset, size_t size, void *src);
/**
- * Pipeline management.
+ * Add descriptor to a pipeline. Must be called before pipeline init.
*/
+int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd,
+ FFVulkanDescriptorSetBinding *desc, int nb,
+ int read_only, int print_to_shader_only);
+
+/* Initialize/free a pipeline. */
int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkQueueFamilyCtx *qf);
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
-void ff_vk_pipeline_bind_exec(FFVulkanContext *s, FFVkExecContext *e,
- FFVulkanPipeline *pl);
+ FFVkSPIRVShader *shd);
void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl);
+/**
+ * Register a pipeline with an exec pool.
+ * Pool may be NULL if all descriptor sets are read-only.
+ */
+int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
+ FFVulkanPipeline *pl);
+
+/* Bind pipeline */
+void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl);
+
+/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */
+int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkSampler *sampler);
+int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkImageView view, VkImageLayout layout, VkSampler sampler);
+int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt);
+
+void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, AVFrame *f,
+ VkImageView *views, int set, int binding,
+ VkImageLayout layout, VkSampler sampler);
+
/**
* Frees main context.
*/
--
2.39.2
[-- Attachment #52: 0051-hwcontext_vulkan-rewrite-to-support-multiplane-surfa.patch --]
[-- Type: text/x-diff, Size: 68673 bytes --]
From f36680714e0636288dacf687e766a9222fe04867 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:37:35 +0100
Subject: [PATCH 51/72] hwcontext_vulkan: rewrite to support multiplane
surfaces
---
libavutil/hwcontext_vulkan.c | 744 +++++++++++++++++------------------
libavutil/hwcontext_vulkan.h | 69 ++--
2 files changed, 411 insertions(+), 402 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index e7c14fad74..027ecc76b1 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -62,6 +64,8 @@ typedef struct VulkanQueueCtx {
VkFence fence;
VkQueue queue;
int was_synchronous;
+ int qf;
+ int qidx;
/* Buffer dependencies */
AVBufferRef **buf_deps;
@@ -116,6 +120,11 @@ typedef struct VulkanDevicePriv {
} VulkanDevicePriv;
typedef struct VulkanFramesPriv {
+ const VkFormat *fmts;
+ int nb_images;
+ VkImageAspectFlags aspect;
+ const struct FFVkFormatEntry *fmt;
+
/* Image conversions */
VulkanExecCtx conv_ctx;
@@ -145,112 +154,201 @@ typedef struct AVVkFrameInternal {
#endif
} AVVkFrameInternal;
-#define ADD_VAL_TO_LIST(list, count, val) \
- do { \
- list = av_realloc_array(list, sizeof(*list), ++count); \
- if (!list) { \
- err = AVERROR(ENOMEM); \
- goto fail; \
- } \
- list[count - 1] = av_strdup(val); \
- if (!list[count - 1]) { \
- err = AVERROR(ENOMEM); \
- goto fail; \
- } \
- } while(0)
-
-#define RELEASE_PROPS(props, count) \
- if (props) { \
- for (int i = 0; i < count; i++) \
- av_free((void *)((props)[i])); \
- av_free((void *)props); \
- }
+#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
+#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
-static const struct {
+static const struct FFVkFormatEntry {
+ VkFormat vkf;
enum AVPixelFormat pixfmt;
- const VkFormat vkfmts[5];
-} vk_pixfmt_planar_map[] = {
- { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
-
- { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
- { AV_PIX_FMT_P012, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
- { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
-
- { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-
- { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-
- { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- /* There is no AV_PIX_FMT_YUVA420P12 */
- { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_VUYX, { VK_FORMAT_R8G8B8A8_UNORM } },
- { AV_PIX_FMT_XV36, { VK_FORMAT_R16G16B16A16_UNORM } },
-
- { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
- { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
- { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
- { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
- { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
- { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
- { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
- { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
- { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
- { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
- { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
-
- /* Lower priority as there's an endianess-dependent overlap between these
- * and rgba/bgr0, and PACK32 formats are more limited */
- { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
- { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
-
- { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
-
- { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
- { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ VkImageAspectFlags aspect;
+ int vk_planes;
+ int nb_images;
+ int nb_images_fallback;
+ const VkFormat fallback[5];
+} vk_formats_list[] = {
+ /* Gray formats */
+ { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
+
+ /* RGB formats */
+ { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
+ { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } },
+ { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } },
+ { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } },
+ { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
+ { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
+ { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
+ { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
+
+ /* Planar RGB */
+ { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+
+ /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Two-plane 422 YUV at 8, 10 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Two-plane 444 YUV at 8, 10 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */
+ { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+ /* Single plane 422 at 8, 10 and 12 bits */
+ { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
};
+static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list);
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
{
- for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++)
- if (vk_pixfmt_planar_map[i].pixfmt == p)
- return vk_pixfmt_planar_map[i].vkfmts;
+ for (int i = 0; i < nb_vk_formats_list; i++)
+ if (vk_formats_list[i].pixfmt == p)
+ return vk_formats_list[i].fallback;
+ return NULL;
+}
+
+static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p)
+{
+ for (int i = 0; i < nb_vk_formats_list; i++)
+ if (vk_formats_list[i].pixfmt == p)
+ return &vk_formats_list[i];
return NULL;
}
+/* Malitia pura, Khronos */
+#define FN_MAP_TO(dst_t, dst, src_t, src) \
+ static dst_t map_ ##src## _to_ ##dst(src_t mask2) \
+ { \
+ dst_t mask1 = 0x0; \
+ MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \
+ MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \
+ MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \
+ MAP_TO(mask1, mask2, VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \
+ return mask1; \
+ }
+
+#define MAP_TO(mask1, mask2, flag1, flag2) if (mask2 & flag2) mask1 |= flag1;
+FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage)
+#undef MAP_TO
+#define MAP_TO(mask1, mask2, flag1, flag2) if (mask1 & flag1) mask2 |= flag2;
+FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats)
+#undef MAP_TO
+#undef FN_MAP_TO
+
+static int av_vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
+ VkImageUsageFlags additional_usage, const VkFormat **fmts,
+ int *nb_images, VkImageAspectFlags *aspect,
+ VkImageUsageFlags *supported_usage)
+{
+ AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
+ VulkanDevicePriv *priv = dev_ctx->internal->priv;
+ FFVulkanFunctions *vk = &priv->vkfn;
+
+ VkFormatProperties2 prop = {
+ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+ };
+ const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
+ const VkFormatFeatureFlagBits2 full_flags = VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT |
+ basic_flags;
+
+ const VkFormatFeatureFlagBits2 additional_flags = map_usage_to_feats(additional_usage);
+
+ for (int i = 0; i < nb_vk_formats_list; i++) {
+ if (vk_formats_list[i].pixfmt == p) {
+ VkFormatFeatureFlagBits *feat = &prop.formatProperties.optimalTilingFeatures;
+ VkFormatFeatureFlagBits2 feats_vk1, feats_vk2;
+ int basics;
+ int full;
+ int additional;
+
+ basics = 0;
+ full = 0;
+ additional = 0;
+ vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, vk_formats_list[i].vkf,
+ &prop);
+
+ /* We want at least the basics supported */
+ feats_vk1 = *feat;
+ basics = !!(*feat & basic_flags);
+ additional = !!(*feat & additional_flags);
+
+ /* If basics are not supported, OR we have multiplane images,
+ * check the fallback/single-plane rep for support. */
+ if (!basics || vk_formats_list[i].vk_planes > 1)
+ vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
+ vk_formats_list[i].fallback[0],
+ &prop);
+
+ feats_vk2 = *feat;
+ full = !!(*feat & full_flags);
+
+ if (additional_flags && !additional) {
+ return AVERROR(ENOTSUP);
+ } else if (full && basics) {
+ if (fmts)
+ *fmts = &vk_formats_list[i].vkf;
+ if (nb_images)
+ *nb_images = 1;
+ if (aspect)
+ *aspect = vk_formats_list[i].aspect;
+ if (supported_usage)
+ *supported_usage = map_feats_to_usage(feats_vk1);
+ return 0;
+ } else if (full && (vk_formats_list[i].vk_planes > 1)) {
+ if (fmts)
+ *fmts = vk_formats_list[i].fallback;
+ if (nb_images)
+ *nb_images = vk_formats_list[i].nb_images_fallback;
+ if (aspect)
+ *aspect = vk_formats_list[i].aspect;
+ if (supported_usage)
+ *supported_usage = map_feats_to_usage(feats_vk2);
+ return 0;
+ } else {
+ return AVERROR(ENOTSUP);
+ }
+ }
+ }
+
+ return AVERROR(EINVAL);
+}
+
static const void *vk_find_struct(const void *chain, VkStructureType stype)
{
const VkBaseInStructure *in = chain;
@@ -276,33 +374,6 @@ static void vk_link_struct(void *chain, void *in)
out->pNext = in;
}
-static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
- int linear)
-{
- AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
- VulkanDevicePriv *priv = dev_ctx->internal->priv;
- FFVulkanFunctions *vk = &priv->vkfn;
- const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
- int planes = av_pix_fmt_count_planes(p);
-
- if (!fmt)
- return 0;
-
- for (int i = 0; i < planes; i++) {
- VkFormatFeatureFlags flags;
- VkFormatProperties2 prop = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
- };
- vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
- flags = linear ? prop.formatProperties.linearTilingFeatures :
- prop.formatProperties.optimalTilingFeatures;
- if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS))
- return 0;
- }
-
- return 1;
-}
-
static int load_libvulkan(AVHWDeviceContext *ctx)
{
AVVulkanDeviceContext *hwctx = ctx->hwctx;
@@ -435,6 +506,27 @@ static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
return 0;
}
+#define ADD_VAL_TO_LIST(list, count, val) \
+ do { \
+ list = av_realloc_array(list, sizeof(*list), ++count); \
+ if (!list) { \
+ err = AVERROR(ENOMEM); \
+ goto fail; \
+ } \
+ list[count - 1] = av_strdup(val); \
+ if (!list[count - 1]) { \
+ err = AVERROR(ENOMEM); \
+ goto fail; \
+ } \
+ } while(0)
+
+#define RELEASE_PROPS(props, count) \
+ if (props) { \
+ for (int i = 0; i < count; i++) \
+ av_free((void *)((props)[i])); \
+ av_free((void *)props); \
+ }
+
static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
const char * const **dst, uint32_t *num, int debug)
{
@@ -683,6 +775,10 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VkApplicationInfo application_info = {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "ffmpeg",
+ .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
+ LIBAVUTIL_VERSION_MINOR,
+ LIBAVUTIL_VERSION_MICRO),
.pEngineName = "libavutil",
.apiVersion = VK_API_VERSION_1_3,
.engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
@@ -1121,6 +1217,8 @@ static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
VulkanQueueCtx *q = &cmd->queues[i];
vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
q->was_synchronous = 1;
+ q->qf = queue_family_index;
+ q->qidx = i;
}
return 0;
@@ -1256,6 +1354,7 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
VkResult ret;
VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
FFVulkanFunctions *vk = &p->vkfn;
ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
@@ -1269,7 +1368,9 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
s_info->commandBufferCount = 1;
+ hwctx->lock_queue(hwfc->device_ctx, q->qf, q->qidx);
ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence);
+ hwctx->unlock_queue(hwfc->device_ctx, q->qf, q->qidx);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n",
vk_ret2str(ret));
@@ -1284,7 +1385,6 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
q->was_synchronous = synchronous;
if (synchronous) {
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(hwctx->act_dev, 1, &q->fence);
unref_exec_ctx_deps(hwfc, cmd);
@@ -1446,12 +1546,6 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
if (opt_d)
p->use_linear_images = strtol(opt_d->value, NULL, 10);
- opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0);
- if (opt_d)
- p->contiguous_planes = strtol(opt_d->value, NULL, 10);
- else
- p->contiguous_planes = -1;
-
hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
@@ -1690,8 +1784,10 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
int count = 0;
VulkanDevicePriv *p = ctx->internal->priv;
- for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
- count += pixfmt_is_supported(ctx, i, p->use_linear_images);
+ for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
+ count += av_vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
+ 0, NULL, NULL, NULL, NULL) >= 0;
+ }
#if CONFIG_CUDA
if (p->dev_is_nvidia)
@@ -1704,9 +1800,12 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
return AVERROR(ENOMEM);
count = 0;
- for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
- if (pixfmt_is_supported(ctx, i, p->use_linear_images))
- constraints->valid_sw_formats[count++] = i;
+ for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
+ if (av_vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
+ 0, NULL, NULL, NULL, NULL) >= 0) {
+ constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt;
+ }
+ }
#if CONFIG_CUDA
if (p->dev_is_nvidia)
@@ -1714,8 +1813,8 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
#endif
constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
- constraints->min_width = 0;
- constraints->min_height = 0;
+ constraints->min_width = 1;
+ constraints->min_height = 1;
constraints->max_width = p->props.properties.limits.maxImageDimension2D;
constraints->max_height = p->props.properties.limits.maxImageDimension2D;
@@ -1789,7 +1888,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
static void vulkan_free_internal(AVVkFrame *f)
{
- AVVkFrameInternal *internal = f->internal;
+ av_unused AVVkFrameInternal *internal = f->internal;
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
@@ -1829,17 +1928,22 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
- int planes = av_pix_fmt_count_planes(hwfc->sw_format);
+ int nb_images = ff_vk_count_images(f);
+
+ VkSemaphoreWaitInfo sem_wait = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+ .pSemaphores = f->sem,
+ .pValues = f->sem_value,
+ .semaphoreCount = nb_images,
+ };
- /* We could use vkWaitSemaphores, but the validation layer seems to have
- * issues tracking command buffer execution state on uninit. */
- vk->DeviceWaitIdle(hwctx->act_dev);
+ vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
vulkan_free_internal(f);
- for (int i = 0; i < planes; i++) {
- vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
- vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+ for (int i = 0; i < nb_images; i++) {
+ vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
+ vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
}
@@ -1849,30 +1953,25 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
void *alloc_pnext, size_t alloc_pnext_stride)
{
- int err;
+ int img_cnt = 0, err;
VkResult ret;
AVHWDeviceContext *ctx = hwfc->device_ctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
AVVulkanFramesContext *hwfctx = hwfc->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
- VkMemoryRequirements cont_memory_requirements = { 0 };
- int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 };
- int cont_mem_size = 0;
-
AVVulkanDeviceContext *hwctx = ctx->hwctx;
- for (int i = 0; i < planes; i++) {
+ while (f->img[img_cnt]) {
int use_ded_mem;
VkImageMemoryRequirementsInfo2 req_desc = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
- .image = f->img[i],
+ .image = f->img[img_cnt],
};
VkMemoryDedicatedAllocateInfo ded_alloc = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
+ .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
};
VkMemoryDedicatedRequirements ded_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
@@ -1884,79 +1983,35 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
- if (f->tiling == VK_IMAGE_TILING_LINEAR)
+ if (hwfctx->tiling == VK_IMAGE_TILING_LINEAR)
req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
p->props.properties.limits.minMemoryMapAlignment);
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- if (ded_req.requiresDedicatedAllocation) {
- av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, "
- "device requires dedicated image allocation!\n");
- return AVERROR(EINVAL);
- } else if (!i) {
- cont_memory_requirements = req.memoryRequirements;
- } else if (cont_memory_requirements.memoryTypeBits !=
- req.memoryRequirements.memoryTypeBits) {
- av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 "
- "and %i, cannot allocate in a single region!\n",
- i);
- return AVERROR(EINVAL);
- }
-
- cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size,
- req.memoryRequirements.alignment);
- cont_mem_size += cont_mem_size_list[i];
- continue;
- }
-
/* In case the implementation prefers/requires dedicated allocation */
use_ded_mem = ded_req.prefersDedicatedAllocation |
ded_req.requiresDedicatedAllocation;
if (use_ded_mem)
- ded_alloc.image = f->img[i];
+ ded_alloc.image = f->img[img_cnt];
/* Allocate memory */
if ((err = alloc_mem(ctx, &req.memoryRequirements,
- f->tiling == VK_IMAGE_TILING_LINEAR ?
+ hwfctx->tiling == VK_IMAGE_TILING_LINEAR ?
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &f->flags, &f->mem[i])))
- return err;
-
- f->size[i] = req.memoryRequirements.size;
- bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
- bind_info[i].image = f->img[i];
- bind_info[i].memory = f->mem[i];
- }
-
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- cont_memory_requirements.size = cont_mem_size;
-
- /* Allocate memory */
- if ((err = alloc_mem(ctx, &cont_memory_requirements,
- f->tiling == VK_IMAGE_TILING_LINEAR ?
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- (void *)(((uint8_t *)alloc_pnext)),
- &f->flags, &f->mem[0])))
+ &f->flags, &f->mem[img_cnt])))
return err;
- f->size[0] = cont_memory_requirements.size;
-
- for (int i = 0, offset = 0; i < planes; i++) {
- bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
- bind_info[i].image = f->img[i];
- bind_info[i].memory = f->mem[0];
- bind_info[i].memoryOffset = offset;
+ f->size[img_cnt] = req.memoryRequirements.size;
+ bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+ bind_info[img_cnt].image = f->img[img_cnt];
+ bind_info[img_cnt].memory = f->mem[img_cnt];
- f->offset[i] = bind_info[i].memoryOffset;
- offset += cont_mem_size_list[i];
- }
+ img_cnt++;
}
/* Bind the allocated memory to the images */
- ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info);
+ ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
vk_ret2str(ret));
@@ -1982,11 +2037,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VkImageLayout new_layout;
VkAccessFlags2 new_access;
AVVulkanFramesContext *vkfc = hwfc->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
- AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
+ int nb_images = ff_vk_count_images(frame);
VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
VkDependencyInfo dep_info;
@@ -1994,14 +2048,14 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pSignalSemaphoreValues = sem_sig_val,
- .signalSemaphoreValueCount = planes,
+ .signalSemaphoreValueCount = nb_images,
};
VkSubmitInfo s_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &s_timeline_sem_info,
.pSignalSemaphores = frame->sem,
- .signalSemaphoreCount = planes,
+ .signalSemaphoreCount = nb_images,
};
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
@@ -2011,7 +2065,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
vkfc->lock_frame(hwfc, frame);
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
sem_sig_val[i] = frame->sem_value[i] + 1;
}
@@ -2029,10 +2083,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
dst_qf = VK_QUEUE_FAMILY_IGNORED;
s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
- s_timeline_sem_info.waitSemaphoreValueCount = planes;
+ s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
s_info.pWaitSemaphores = frame->sem;
s_info.pWaitDstStageMask = wait_st;
- s_info.waitSemaphoreCount = planes;
+ s_info.waitSemaphoreCount = nb_images;
break;
case PREP_MODE_EXTERNAL_EXPORT:
new_layout = VK_IMAGE_LAYOUT_GENERAL;
@@ -2040,10 +2094,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
src_qf = VK_QUEUE_FAMILY_IGNORED;
dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
- s_timeline_sem_info.waitSemaphoreValueCount = planes;
+ s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
s_info.pWaitSemaphores = frame->sem;
s_info.pWaitDstStageMask = wait_st;
- s_info.waitSemaphoreCount = planes;
+ s_info.waitSemaphoreCount = nb_images;
break;
case PREP_MODE_DECODING_DST:
new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
@@ -2062,7 +2116,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
img_bar[i] = (VkImageMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.pNext = NULL,
@@ -2077,8 +2131,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.image = frame->img[i],
.subresourceRange = (VkImageSubresourceRange) {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
.levelCount = 1,
- .layerCount = 1,
},
};
@@ -2090,7 +2144,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
.pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = planes,
+ .imageMemoryBarrierCount = nb_images,
};
vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
@@ -2101,7 +2155,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
return err;
}
-static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
+static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
int frame_w, int frame_h, int plane)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
@@ -2120,17 +2174,17 @@ static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
VkImageTiling tiling, VkImageUsageFlagBits usage,
+ VkImageCreateFlags flags, int nb_layers,
void *create_pnext)
{
int err;
VkResult ret;
AVHWDeviceContext *ctx = hwfc->device_ctx;
VulkanDevicePriv *p = ctx->internal->priv;
+ VulkanFramesPriv *fp = hwfc->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
- enum AVPixelFormat format = hwfc->sw_format;
- const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
- const int planes = av_pix_fmt_count_planes(format);
+ AVVulkanFramesContext *frames = hwfc->hwctx;
VkExportSemaphoreCreateInfo ext_sem_info = {
.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
@@ -2165,17 +2219,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
return AVERROR(ENOMEM);
}
+ // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled)
+
/* Create the images */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < fp->nb_images; i++) {
VkImageCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = create_pnext,
.imageType = VK_IMAGE_TYPE_2D,
- .format = img_fmts[i],
+ .format = fp->fmts[i],
.extent.depth = 1,
.mipLevels = 1,
- .arrayLayers = 1,
- .flags = VK_IMAGE_CREATE_ALIAS_BIT,
+ .arrayLayers = nb_layers,
+ .flags = flags,
.tiling = tiling,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
@@ -2187,7 +2243,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
};
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
- format, hwfc->width, hwfc->height, i);
+ hwfc->sw_format, hwfc->width, hwfc->height, i);
ret = vk->CreateImage(hwctx->act_dev, &create_info,
hwctx->alloc, &f->img[i]);
@@ -2214,7 +2270,9 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
}
f->flags = 0x0;
+FF_DISABLE_DEPRECATION_WARNINGS
f->tiling = tiling;
+FF_ENABLE_DEPRECATION_WARNINGS
*frame = f;
return 0;
@@ -2296,41 +2354,23 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
AVVulkanFramesContext *hwctx = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
VulkanFramesPriv *fp = hwfc->internal->priv;
- VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
VkExternalMemoryHandleTypeFlags e = 0x0;
- VkExternalMemoryImageCreateInfo eiinfo = {
- .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
- .pNext = hwctx->create_pnext,
- };
-
#ifdef _WIN32
if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
#else
- if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
- try_export_flags(hwfc, &eiinfo.handleTypes, &e,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
- if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
- try_export_flags(hwfc, &eiinfo.handleTypes, &e,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
#endif
- for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
- eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
- eminfo[i].pNext = hwctx->alloc_pnext[i];
- eminfo[i].handleTypes = e;
- }
-
- err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
- eiinfo.handleTypes ? &eiinfo : NULL);
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
+ hwctx->nb_layers, hwctx->create_pnext);
if (err)
return NULL;
- err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo));
+ err = alloc_bind_mem(hwfc, f, NULL, 0);
if (err)
goto fail;
@@ -2389,103 +2429,44 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
VulkanFramesPriv *fp = hwfc->internal->priv;
AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
- const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS);
-
- /* Default tiling flags */
- hwctx->tiling = hwctx->tiling ? hwctx->tiling :
- has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
- p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
- VK_IMAGE_TILING_OPTIMAL;
-
- if (!hwctx->usage)
- hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
-
- modifier_info = vk_find_struct(hwctx->create_pnext,
- VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
-
- /* Get the supported modifiers if the user has not given any. */
- if (has_modifiers && !modifier_info) {
- const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
- VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
- FFVulkanFunctions *vk = &p->vkfn;
- VkDrmFormatModifierPropertiesEXT *mod_props;
- uint64_t *modifiers;
- int modifier_count = 0;
-
- VkDrmFormatModifierPropertiesListEXT mod_props_list = {
- .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
- .pNext = NULL,
- .drmFormatModifierCount = 0,
- .pDrmFormatModifierProperties = NULL,
- };
- VkFormatProperties2 prop = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
- .pNext = &mod_props_list,
- };
-
- /* Get all supported modifiers */
- vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
+ VkImageUsageFlagBits supported_usage;
- if (!mod_props_list.drmFormatModifierCount) {
- av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n");
- return AVERROR(EINVAL);
- }
-
- /* Createa structure to hold the modifier list info */
- modifier_info = av_mallocz(sizeof(*modifier_info));
- if (!modifier_info)
- return AVERROR(ENOMEM);
-
- modifier_info->pNext = NULL;
- modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
+ /* Defaults */
+ if (!hwctx->nb_layers)
+ hwctx->nb_layers = 1;
- /* Add structure to the image creation pNext chain */
- if (!hwctx->create_pnext)
- hwctx->create_pnext = modifier_info;
- else
- vk_link_struct(hwctx->create_pnext, (void *)modifier_info);
+ /* VK_IMAGE_TILING_OPTIMAL == 0, so no need to check */
- /* Backup the allocated struct to be freed later */
- fp->modifier_info = modifier_info;
-
- /* Allocate list of modifiers */
- modifiers = av_mallocz(mod_props_list.drmFormatModifierCount *
- sizeof(*modifiers));
- if (!modifiers)
- return AVERROR(ENOMEM);
-
- modifier_info->pDrmFormatModifiers = modifiers;
+ if (!hwctx->usage)
+ hwctx->usage = VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT;
- /* Allocate a temporary list to hold all modifiers supported */
- mod_props = av_mallocz(mod_props_list.drmFormatModifierCount *
- sizeof(*mod_props));
- if (!mod_props)
- return AVERROR(ENOMEM);
+ err = av_vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, 0, /* drivers must fix feats. */
+ &fp->fmts, &fp->nb_images, &fp->aspect, &supported_usage);
+ if (err < 0)
+ return err;
- mod_props_list.pDrmFormatModifierProperties = mod_props;
+ fp->fmt = vk_find_format_entry(hwfc->sw_format);
- /* Finally get all modifiers from the device */
- vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
+ /* Remove comments once drivers properly signal features for formats */
+ if (fp->fmt->vk_planes > 1) // || supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR)
+ hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
- /* Reject any modifiers that don't match our requirements */
- for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) {
- if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage))
- continue;
+// fp->fmt = vk_find_format_entry(hwfc->sw_format);
- modifiers[modifier_count++] = mod_props[i].drmFormatModifier;
- }
+ if (!hwctx->img_flags) {
+ hwctx->img_flags = VK_IMAGE_CREATE_ALIAS_BIT;
+ if ((fp->fmt->vk_planes > 1 && fp->nb_images == 1) ||
+ (fp->fmt->vkf != fp->fmt->fallback[0]))
+ hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+ }
- if (!modifier_count) {
- av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports"
- " the usage flags!\n");
- av_freep(&mod_props);
- return AVERROR(EINVAL);
- }
+ if (!hwctx->lock_frame)
+ hwctx->lock_frame = lock_frame;
- modifier_info->drmFormatModifierCount = modifier_count;
- av_freep(&mod_props);
- }
+ if (!hwctx->unlock_frame)
+ hwctx->unlock_frame = unlock_frame;
err = create_exec_ctx(hwfc, &fp->conv_ctx,
dev_hwctx->queue_family_comp_index,
@@ -2505,8 +2486,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return err;
/* Test to see if allocation will fail */
- err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
- hwctx->create_pnext);
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
+ hwctx->nb_layers, hwctx->create_pnext);
if (err)
return err;
@@ -2522,11 +2503,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return AVERROR(ENOMEM);
}
- if (!hwctx->lock_frame)
- hwctx->lock_frame = lock_frame;
- if (!hwctx->unlock_frame)
- hwctx->unlock_frame = unlock_frame;
-
return 0;
}
@@ -2602,7 +2578,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
const AVFrame *src, int flags)
{
VkResult ret;
- int err, mapped_mem_count = 0, mem_planes = 0;
+ int err, nb_mem = 0, mapped_mem_count = 0, mem_planes = 0;
AVVkFrame *f = (AVVkFrame *)src->data[0];
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
AVVulkanFramesContext *hwfctx = hwfc->hwctx;
@@ -2622,7 +2598,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
}
if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
- !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
+ !(hwfctx->tiling == VK_IMAGE_TILING_LINEAR)) {
av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
"and linear!\n");
err = AVERROR(EINVAL);
@@ -2632,35 +2608,35 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
dst->width = src->width;
dst->height = src->height;
- mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes;
- for (int i = 0; i < mem_planes; i++) {
+ for (int i = 0; i < AV_NUM_DATA_POINTERS; i++)
+ nb_mem += !!f->mem[i];
+
+ for (int i = 0; i < nb_mem; i++) {
ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0,
VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
- vk_ret2str(ret));
+ av_log(hwfc, AV_LOG_ERROR, "Failed to map %ith frame memory: %s\n",
+ i, vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
mapped_mem_count++;
}
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- for (int i = 0; i < planes; i++)
- dst->data[i] = dst->data[0] + f->offset[i];
- }
+ for (int i = 0; i < planes; i++)
+ dst->data[i] = dst->data[i] + f->offset[i];
/* Check if the memory contents matter */
if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
!(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_mem; i++) {
map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
map_mem_ranges[i].size = VK_WHOLE_SIZE;
map_mem_ranges[i].memory = f->mem[i];
}
- ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes,
+ ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, nb_mem,
map_mem_ranges);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
@@ -2702,25 +2678,25 @@ static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwma
{
AVVkFrame *f = hwmap->priv;
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ const int nb_images = ff_vk_count_images(f);
VkSemaphoreWaitInfo wait_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
.flags = 0x0,
.pSemaphores = f->sem,
.pValues = f->sem_value,
- .semaphoreCount = planes,
+ .semaphoreCount = nb_images,
};
vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
vulkan_free_internal(f);
- for (int i = 0; i < planes; i++) {
- vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
- vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+ for (int i = 0; i < nb_images; i++) {
+ vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
+ vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
}
@@ -2790,7 +2766,9 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
goto fail;
}
+FF_DISABLE_DEPRECATION_WARNINGS
f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
+FF_ENABLE_DEPRECATION_WARNINGS
for (int i = 0; i < desc->nb_layers; i++) {
const int planes = desc->layers[i].nb_planes;
@@ -2828,7 +2806,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
.mipLevels = 1,
.arrayLayers = 1,
.flags = 0x0, /* ALIAS flag is implicit for imported images */
- .tiling = f->tiling,
+ .tiling = hwfctx->tiling,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
@@ -3498,7 +3476,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
drm_desc->layers[i].planes[0].object_index = FFMIN(i, drm_desc->nb_objects - 1);
- if (f->tiling == VK_IMAGE_TILING_OPTIMAL)
+ if (hwfctx ->tiling == VK_IMAGE_TILING_OPTIMAL)
continue;
vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
@@ -3818,7 +3796,10 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
int bar_num = 0;
VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
- const int planes = av_pix_fmt_count_planes(pix_fmt);
+ const int img_planes = fp->fmt->vk_planes;
+ const int nb_images = ff_vk_count_images(frame);
+ int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt);
+
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -3831,8 +3812,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pWaitSemaphoreValues = frame->sem_value,
.pSignalSemaphoreValues = sem_signal_values,
- .waitSemaphoreValueCount = planes,
- .signalSemaphoreValueCount = planes,
+ .waitSemaphoreValueCount = nb_images,
+ .signalSemaphoreValueCount = nb_images,
};
VkSubmitInfo s_info = {
@@ -3841,8 +3822,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
.pSignalSemaphores = frame->sem,
.pWaitSemaphores = frame->sem,
.pWaitDstStageMask = sem_wait_dst,
- .signalSemaphoreCount = planes,
- .waitSemaphoreCount = planes,
+ .signalSemaphoreCount = nb_images,
+ .waitSemaphoreCount = nb_images,
};
vkfc->lock_frame(hwfc, frame);
@@ -3850,11 +3831,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
if ((err = wait_start_exec_ctx(hwfc, ectx)))
goto end;
- for (int i = 0; i < planes; i++)
+ for (int i = 0; i < nb_images; i++)
sem_signal_values[i] = frame->sem_value[i] + 1;
/* Change the image layout to something more optimal for transfers */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
@@ -3890,13 +3871,19 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
0, NULL, 0, NULL, bar_num, img_bar);
/* Schedule a copy for each plane */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < pixfmt_planes; i++) {
+ int idx = FFMIN(i, nb_images - 1);
+ VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
VkBufferImageCopy buf_reg = {
.bufferOffset = buf_offsets[i],
.bufferRowLength = buf_stride[i] / desc->comp[i].step,
.imageSubresource.layerCount = 1,
- .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .imageSubresource.aspectMask = plane_aspect[(img_planes != 1) + i*(img_planes != 1)],
.imageOffset = { 0, 0, 0, },
};
@@ -3907,11 +3894,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
if (to_buf)
- vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
+ vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], frame->layout[idx],
vkbuf->buf, 1, &buf_reg);
else
- vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
- frame->layout[i], 1, &buf_reg);
+ vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx],
+ frame->layout[idx], 1, &buf_reg);
}
/* When uploading, do this asynchronously if the source is refcounted by
@@ -3928,7 +3915,7 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
goto end;
}
- if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
+ if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, pixfmt_planes)))
goto end;
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
} else {
@@ -3948,6 +3935,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
AVVkFrame *f = (AVVkFrame *)vkf->data[0];
AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
+ AVVulkanFramesContext *fc = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
@@ -3970,7 +3958,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
return AVERROR(EINVAL);
/* For linear, host visiable images */
- if (f->tiling == VK_IMAGE_TILING_LINEAR &&
+ if (fc->tiling == VK_IMAGE_TILING_LINEAR &&
f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
AVFrame *map = av_frame_alloc();
if (!map)
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index e89fa52927..13a40fa563 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -169,26 +169,31 @@ typedef enum AVVkFrameFlags {
*/
typedef struct AVVulkanFramesContext {
/**
- * Controls the tiling of allocated frames. If left as optimal tiling,
- * then during av_hwframe_ctx_init() will decide based on whether the device
- * supports DRM modifiers, or if the linear_images flag is set, otherwise
- * will allocate optimally-tiled images.
+ * Controls the tiling of allocated frames.
+ * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling.
+ * Can be set to VK_IMAGE_TILING_LINEAR to force linear images,
+ * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed
+ * images.
*/
VkImageTiling tiling;
/**
- * Defines extra usage of output frames. If left as 0, the following bits
- * are set: TRANSFER_SRC, TRANSFER_DST. SAMPLED and STORAGE.
+ * Defines extra usage of output frames. If non-zero, all flags MUST be
+ * supported by the VkFormat. Otherwise, will use supported flags amongst:
+ * - VK_IMAGE_USAGE_SAMPLED_BIT
+ * - VK_IMAGE_USAGE_STORAGE_BIT
+ * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT
+ * - VK_IMAGE_USAGE_TRANSFER_DST_BIT
*/
VkImageUsageFlagBits usage;
/**
* Extension data for image creation.
- * If VkImageDrmFormatModifierListCreateInfoEXT is present in the chain,
- * and the device supports DRM modifiers, then images will be allocated
- * with the specific requested DRM modifiers.
+ * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure
+ * can be added to specify the exact modifier to use.
+ *
* Additional structures may be added at av_hwframe_ctx_init() time,
- * which will be freed automatically on uninit(), so users need only free
+ * which will be freed automatically on uninit(), so users must only free
* any structures they've allocated themselves.
*/
void *create_pnext;
@@ -209,6 +214,25 @@ typedef struct AVVulkanFramesContext {
*/
AVVkFrameFlags flags;
+ /**
+ * Flags to set during image creation. If unset, defaults to
+ * VK_IMAGE_CREATE_ALIAS_BIT.
+ */
+ VkImageCreateFlags img_flags;
+
+ /**
+ * Vulkan format for each image. MUST be compatible with the pixel format.
+ * If unset, will be automatically set.
+ * There are at most two compatible formats for a frame - a multiplane
+ * format, and a single-plane multi-image format.
+ */
+ VkFormat format[AV_NUM_DATA_POINTERS];
+
+ /**
+ * Number of layers each image will have.
+ */
+ int nb_layers;
+
/**
* Locks a frame, preventing other threads from changing frame properties.
* If set to NULL, will be set to lavu-internal functions that utilize a
@@ -228,14 +252,7 @@ typedef struct AVVulkanFramesContext {
} AVVulkanFramesContext;
/*
- * Frame structure, the VkFormat of the image will always match
- * the pool's sw_format.
- * All frames, imported or allocated, will be created with the
- * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed.
- *
- * If all queue family indices in the device context are the same,
- * images will be created with the EXCLUSIVE sharing mode. Otherwise, all images
- * will be created using the CONCURRENT sharing mode.
+ * Frame structure.
*
* @note the size of this structure is not part of the ABI, to allocate
* you must use @av_vk_frame_alloc().
@@ -248,8 +265,9 @@ struct AVVkFrame {
/**
* The same tiling must be used for all images in the frame.
+ * DEPRECATED: use AVVulkanFramesContext.tiling instead.
*/
- VkImageTiling tiling;
+ attribute_deprecated VkImageTiling tiling;
/**
* Memory backing the images. Could be less than the amount of planes,
@@ -265,13 +283,13 @@ struct AVVkFrame {
VkMemoryPropertyFlagBits flags;
/**
- * Updated after every barrier
+ * Updated after every barrier. One per VkImage.
*/
VkAccessFlagBits access[AV_NUM_DATA_POINTERS];
VkImageLayout layout[AV_NUM_DATA_POINTERS];
/**
- * Synchronization timeline semaphores, one for each sw_format plane.
+ * Synchronization timeline semaphores, one for each VkImage.
* Must not be freed manually. Must be waited on at every submission using
* the value in sem_value, and must be signalled at every submission,
* using an incremented value.
@@ -280,6 +298,7 @@ struct AVVkFrame {
/**
* Up to date semaphore value at which each image becomes accessible.
+ * One per VkImage.
* Clients must wait on this value when submitting a command queue,
* and increment it when signalling.
*/
@@ -291,16 +310,18 @@ struct AVVkFrame {
struct AVVkFrameInternal *internal;
/**
- * Describes the binding offset of each plane to the VkDeviceMemory.
+ * Describes the binding offset of each image to the VkDeviceMemory.
+ * One per VkImage.
*/
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
/**
* Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
* the image was allocated with the CONCURRENT concurrency option.
+ * One per VkImage.
*/
uint32_t queue_family[AV_NUM_DATA_POINTERS];
-} AVVkFrame;
+};
/**
* Allocates a single AVVkFrame and initializes everything as 0.
@@ -309,7 +330,7 @@ struct AVVkFrame {
AVVkFrame *av_vk_frame_alloc(void);
/**
- * Returns the format of each image up to the number of planes for a given sw_format.
+ * Returns the optimal format for a given sw_format, one for each plane.
* Returns NULL on unsupported formats.
*/
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p);
--
2.39.2
[-- Attachment #53: 0052-hwcontext_vulkan-don-t-change-properties-if-prepare_.patch --]
[-- Type: text/x-diff, Size: 2638 bytes --]
From a9ac0aa322a3369ccb5167ae1a8a984faf2e24d1 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:14:08 +0100
Subject: [PATCH 52/72] hwcontext_vulkan: don't change properties if
prepare_frame fails
---
libavutil/hwcontext_vulkan.c | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 027ecc76b1..75004037da 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2113,16 +2113,13 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break;
}
- /* Change the image layout to something more optimal for writes.
- * This also signals the newly created semaphore, making it usable
- * for synchronization */
for (int i = 0; i < nb_images; i++) {
img_bar[i] = (VkImageMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.pNext = NULL,
.srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
- .srcAccessMask = 0x0,
.dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+ .srcAccessMask = frame->access[i],
.dstAccessMask = new_access,
.oldLayout = frame->layout[i],
.newLayout = new_layout,
@@ -2135,21 +2132,23 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.levelCount = 1,
},
};
-
- frame->layout[i] = img_bar[i].newLayout;
- frame->access[i] = img_bar[i].dstAccessMask;
}
- dep_info = (VkDependencyInfo) {
- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
- .pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = nb_images,
- };
-
- vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
+ vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_images,
+ });
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ if (err >= 0) {
+ for (int i = 0; i < nb_images; i++) {
+ frame->layout[i] = img_bar[i].newLayout;
+ frame->access[i] = img_bar[i].dstAccessMask;
+ frame->queue_family[i] = img_bar[i].dstQueueFamilyIndex;
+ }
+ }
vkfc->unlock_frame(hwfc, frame);
return err;
--
2.39.2
[-- Attachment #54: 0053-hwcontext_vulkan-disable-host-mapping-frames-for-tra.patch --]
[-- Type: text/x-diff, Size: 1033 bytes --]
From 51c352d34c0ab2ae5eea1df1753d2a8d615c33d8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:14:24 +0100
Subject: [PATCH 53/72] hwcontext_vulkan: disable host-mapping frames for
transfers
Currently broken for multiplane surfaces.
---
libavutil/hwcontext_vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 75004037da..647a072bdd 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -3946,7 +3946,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
const int planes = av_pix_fmt_count_planes(swf->format);
int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
- const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY);
+ const int map_host = 0;
if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
--
2.39.2
[-- Attachment #55: 0054-hwcontext_vulkan-disable-all-mapping-code.patch --]
[-- Type: text/x-diff, Size: 5612 bytes --]
From a871a7d4ffe3f94488cd5091794e683c720bc5df Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:30:00 +0100
Subject: [PATCH 54/72] hwcontext_vulkan: disable all mapping code
Multiplane formats are currently not easy to map.
---
libavutil/hwcontext_vulkan.c | 33 +++++++++++++++++----------------
1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 647a072bdd..761a63ddd7 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -44,7 +44,7 @@
#include "vulkan.h"
#include "vulkan_loader.h"
-#if CONFIG_LIBDRM
+#if 0
#include <xf86drm.h>
#include <drm_fourcc.h>
#include "hwcontext_drm.h"
@@ -54,7 +54,7 @@
#endif
#endif
-#if CONFIG_CUDA
+#if 0
#include "hwcontext_cuda_internal.h"
#include "cuda_check.h"
#define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
@@ -139,7 +139,7 @@ typedef struct VulkanFramesPriv {
typedef struct AVVkFrameInternal {
pthread_mutex_t update_mutex;
-#if CONFIG_CUDA
+#if 0
/* Importing external memory into cuda is really expensive so we keep the
* memory imported all the time */
AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */
@@ -1718,7 +1718,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
* by the following checks (e.g. non-PCIe ARM GPU), having an empty
* dev_select will mean it'll get picked. */
switch(src_ctx->type) {
-#if CONFIG_LIBDRM
+#if 0
#if CONFIG_VAAPI
case AV_HWDEVICE_TYPE_VAAPI: {
AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
@@ -1753,7 +1753,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
}
#endif
-#if CONFIG_CUDA
+#if 0
case AV_HWDEVICE_TYPE_CUDA: {
AVHWDeviceContext *cuda_cu = src_ctx;
AVCUDADeviceContext *src_hwctx = src_ctx->hwctx;
@@ -1789,7 +1789,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
0, NULL, NULL, NULL, NULL) >= 0;
}
-#if CONFIG_CUDA
+#if 0
if (p->dev_is_nvidia)
count++;
#endif
@@ -1807,7 +1807,7 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
}
}
-#if CONFIG_CUDA
+#if 0
if (p->dev_is_nvidia)
constraints->valid_sw_formats[count++] = AV_PIX_FMT_CUDA;
#endif
@@ -1890,7 +1890,7 @@ static void vulkan_free_internal(AVVkFrame *f)
{
av_unused AVVkFrameInternal *internal = f->internal;
-#if CONFIG_CUDA
+#if 0
if (internal->cuda_fc_ref) {
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
int planes = av_pix_fmt_count_planes(cuda_fc->sw_format);
@@ -2672,7 +2672,7 @@ fail:
return err;
}
-#if CONFIG_LIBDRM
+#if 0
static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
{
AVVkFrame *f = hwmap->priv;
@@ -2746,6 +2746,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ AVVulkanFramesContext *hwfctx = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
@@ -3076,7 +3077,7 @@ fail:
#endif
#endif
-#if CONFIG_CUDA
+#if 0
static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
AVBufferRef *cuda_hwfc,
const AVFrame *frame)
@@ -3346,7 +3347,7 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
switch (src->format) {
-#if CONFIG_LIBDRM
+#if 0
#if CONFIG_VAAPI
case AV_PIX_FMT_VAAPI:
if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
@@ -3365,7 +3366,7 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
}
}
-#if CONFIG_LIBDRM
+#if 0
typedef struct VulkanDRMMapping {
AVDRMFrameDescriptor drm_desc;
AVVkFrame *source;
@@ -3533,7 +3534,7 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
switch (dst->format) {
-#if CONFIG_LIBDRM
+#if 0
case AV_PIX_FMT_DRM_PRIME:
if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
return vulkan_map_to_drm(hwfc, dst, src, flags);
@@ -4091,7 +4092,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
switch (src->format) {
-#if CONFIG_CUDA
+#if 0
case AV_PIX_FMT_CUDA:
#ifdef _WIN32
if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
@@ -4110,7 +4111,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
}
}
-#if CONFIG_CUDA
+#if 0
static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
const AVFrame *src)
{
@@ -4209,7 +4210,7 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
av_unused VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
switch (dst->format) {
-#if CONFIG_CUDA
+#if 0
case AV_PIX_FMT_CUDA:
#ifdef _WIN32
if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
--
2.39.2
[-- Attachment #56: 0055-lavfi-add-lavfi-only-Vulkan-infrastructure.patch --]
[-- Type: text/x-diff, Size: 21753 bytes --]
From 6bd109733484568c98c2d08935d9c7f05ad7803c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:10:58 +0100
Subject: [PATCH 55/72] lavfi: add lavfi-only Vulkan infrastructure
---
libavfilter/Makefile | 6 +
libavfilter/vulkan_filter.c | 241 +++++++++++++++++++-
libavfilter/vulkan_filter.h | 25 ++
{libavutil => libavfilter}/vulkan_glslang.c | 19 +-
{libavutil => libavfilter}/vulkan_shaderc.c | 8 +-
libavfilter/vulkan_spirv.h | 45 ++++
6 files changed, 330 insertions(+), 14 deletions(-)
rename {libavutil => libavfilter}/vulkan_glslang.c (95%)
rename {libavutil => libavfilter}/vulkan_shaderc.c (96%)
create mode 100644 libavfilter/vulkan_spirv.h
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 0173b11870..f02e787d61 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -615,6 +615,10 @@ OBJS-$(CONFIG_AVSYNCTEST_FILTER) += src_avsynctest.o
OBJS-$(CONFIG_AMOVIE_FILTER) += src_movie.o
OBJS-$(CONFIG_MOVIE_FILTER) += src_movie.o
+# vulkan libs
+OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o
+OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o
+
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o
@@ -628,6 +632,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h
SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h
SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_filter.h
+SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h
+SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h
TOOLS = graph2dot
TESTPROGS = drawutils filtfmts formats integral
diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c
index e22541bd23..ad88931c4b 100644
--- a/libavfilter/vulkan_filter.c
+++ b/libavfilter/vulkan_filter.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -54,7 +56,6 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
int err;
AVFilterContext *avctx = inlink->dst;
FFVulkanContext *s = avctx->priv;
- FFVulkanFunctions *vk = &s->vkfn;
AVHWFramesContext *input_frames;
if (!inlink->hw_frames_ctx) {
@@ -85,8 +86,7 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
if (err < 0)
return err;
- vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props);
- vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+ ff_vk_load_props(s);
/* Default output parameters match input parameters. */
s->input_format = input_frames->sw_format;
@@ -189,3 +189,238 @@ int ff_vk_filter_init(AVFilterContext *avctx)
return 0;
}
+
+int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
+ VkSampler sampler, void *push_src, size_t push_size)
+{
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in_views[AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+ RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f));
+
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ NULL);
+
+ ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[1]);
+
+ return ff_vk_exec_submit(vkctx, exec);
+fail:
+ ff_vk_exec_discard_deps(vkctx, exec);
+ return err;
+}
+
+int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pls[2],
+ AVFrame *out, AVFrame *tmp, AVFrame *in,
+ VkSampler sampler, void *push_src, size_t push_size)
+{
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in_views[AV_NUM_DATA_POINTERS];
+ VkImageView tmp_views[AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+ RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
+ RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
+
+ ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ for (int i = 0; i < 2; i++) {
+ FFVulkanPipeline *pl = pls[i];
+ AVFrame *src_f = !i ? in : tmp;
+ AVFrame *dst_f = !i ? tmp : out;
+ VkImageView *src_views = !i ? in_views : tmp_views;
+ VkImageView *dst_views = !i ? tmp_views : out_views;
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0,
+ !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
+ VK_IMAGE_LAYOUT_GENERAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ NULL);
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[1]);
+ }
+
+ return ff_vk_exec_submit(vkctx, exec);
+fail:
+ ff_vk_exec_discard_deps(vkctx, exec);
+ return err;
+}
+
+int ff_vk_filter_process_2in(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl,
+ AVFrame *out, AVFrame *in1, AVFrame *in2,
+ VkSampler sampler, void *push_src, size_t push_size)
+{
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in1_views[AV_NUM_DATA_POINTERS];
+ VkImageView in2_views[AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in1,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in2,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
+
+ RET(ff_vk_create_imageviews(vkctx, exec, in1_views, in1));
+ RET(ff_vk_create_imageviews(vkctx, exec, in2_views, in2));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
+
+ ff_vk_frame_barrier(vkctx, exec, in1, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, in2, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, in1, in1_views, 0, 0,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, in2, in2_views, 0, 1,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, 2,
+ VK_IMAGE_LAYOUT_GENERAL,
+ NULL);
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[1]);
+
+ return ff_vk_exec_submit(vkctx, exec);
+fail:
+ ff_vk_exec_discard_deps(vkctx, exec);
+ return err;
+}
diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h
index bfdb9b2d7d..2a2a0e6e97 100644
--- a/libavfilter/vulkan_filter.h
+++ b/libavfilter/vulkan_filter.h
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -31,4 +33,27 @@ int ff_vk_filter_config_input (AVFilterLink *inlink);
int ff_vk_filter_config_output (AVFilterLink *outlink);
int ff_vk_filter_config_output_inplace(AVFilterLink *outlink);
+/**
+ * Submit a compute shader with a single in and single out for execution.
+ */
+int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
+ VkSampler sampler, void *push_src, size_t push_size);
+
+/**
+ * Submit a compute shader with a single in and single out with 2 stages.
+ */
+int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pls[2],
+ AVFrame *out, AVFrame *tmp, AVFrame *in,
+ VkSampler sampler, void *push_src, size_t push_size);
+
+/**
+ * Two inputs, one output
+ */
+int ff_vk_filter_process_2in(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl,
+ AVFrame *out, AVFrame *in1, AVFrame *in2,
+ VkSampler sampler, void *push_src, size_t push_size);
+
#endif /* AVFILTER_VULKAN_FILTER_H */
diff --git a/libavutil/vulkan_glslang.c b/libavfilter/vulkan_glslang.c
similarity index 95%
rename from libavutil/vulkan_glslang.c
rename to libavfilter/vulkan_glslang.c
index e7785f6d40..845a530ee0 100644
--- a/libavutil/vulkan_glslang.c
+++ b/libavfilter/vulkan_glslang.c
@@ -21,8 +21,9 @@
#include <glslang/build_info.h>
#include <glslang/Include/glslang_c_interface.h>
-#include "mem.h"
-#include "avassert.h"
+#include "vulkan_spirv.h"
+#include "libavutil/mem.h"
+#include "libavutil/avassert.h"
static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int glslc_refcount = 0;
@@ -176,11 +177,13 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
av_assert0(glslc_refcount);
+ *opaque = NULL;
+
if (!(glslc_shader = glslang_shader_create(&glslc_input)))
return AVERROR(ENOMEM);
if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n",
glslang_shader_get_info_log(glslc_shader),
glslang_shader_get_info_debug_log(glslc_shader));
@@ -189,7 +192,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
}
if (!glslang_shader_parse(glslc_shader, &glslc_input)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n",
glslang_shader_get_info_log(glslc_shader),
glslang_shader_get_info_debug_log(glslc_shader));
@@ -206,7 +209,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT |
GLSLANG_MSG_VULKAN_RULES_BIT)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n",
glslang_program_get_info_log(glslc_program),
glslang_program_get_info_debug_log(glslc_program));
@@ -219,10 +222,10 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
messages = glslang_program_SPIRV_get_messages(glslc_program);
if (messages) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_WARNING, "%s\n", messages);
} else {
- ff_vk_print_shader(avctx, shd, AV_LOG_VERBOSE);
+ ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE);
}
glslang_shader_delete(glslc_shader);
@@ -257,7 +260,7 @@ static void glslc_uninit(FFVkSPIRVCompiler **ctx)
av_freep(ctx);
}
-static FFVkSPIRVCompiler *ff_vk_glslang_init(void)
+FFVkSPIRVCompiler *ff_vk_glslang_init(void)
{
FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret));
if (!ret)
diff --git a/libavutil/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c
similarity index 96%
rename from libavutil/vulkan_shaderc.c
rename to libavfilter/vulkan_shaderc.c
index bd40edf187..38be1030ad 100644
--- a/libavutil/vulkan_shaderc.c
+++ b/libavfilter/vulkan_shaderc.c
@@ -18,7 +18,8 @@
#include <shaderc/shaderc.h>
-#include "mem.h"
+#include "libavutil/mem.h"
+#include "vulkan_spirv.h"
static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
FFVkSPIRVShader *shd, uint8_t **data,
@@ -43,6 +44,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
};
shaderc_compile_options_t opts = shaderc_compile_options_initialize();
+ *opaque = NULL;
if (!opts)
return AVERROR(ENOMEM);
@@ -65,7 +67,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE;
- ff_vk_print_shader(avctx, shd, loglevel);
+ ff_vk_shader_print(avctx, shd, loglevel);
if (message && (err || warn))
av_log(avctx, loglevel, "%s\n", message);
status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown";
@@ -104,7 +106,7 @@ static void shdc_uninit(FFVkSPIRVCompiler **ctx)
av_freep(ctx);
}
-static FFVkSPIRVCompiler *ff_vk_shaderc_init(void)
+FFVkSPIRVCompiler *ff_vk_shaderc_init(void)
{
FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret));
if (!ret)
diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h
new file mode 100644
index 0000000000..5638cd9696
--- /dev/null
+++ b/libavfilter/vulkan_spirv.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VULKAN_SPIRV_H
+#define AVFILTER_VULKAN_SPIRV_H
+
+#include "libavutil/vulkan.h"
+
+#include "vulkan.h"
+#include "config.h"
+
+typedef struct FFVkSPIRVCompiler {
+ void *priv;
+ int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx,
+ struct FFVkSPIRVShader *shd, uint8_t **data,
+ size_t *size, const char *entrypoint, void **opaque);
+ void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque);
+ void (*uninit)(struct FFVkSPIRVCompiler **ctx);
+} FFVkSPIRVCompiler;
+
+#if CONFIG_LIBGLSLANG
+FFVkSPIRVCompiler *ff_vk_glslang_init(void);
+#define ff_vk_spirv_init ff_vk_glslang_init
+#endif
+#if CONFIG_LIBSHADERC
+FFVkSPIRVCompiler *ff_vk_shaderc_init(void);
+#define ff_vk_spirv_init ff_vk_shaderc_init
+#endif
+
+#endif /* AVFILTER_VULKAN_H */
--
2.39.2
[-- Attachment #57: 0056-avgblur_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18269 bytes --]
From b14473b21aa057181ec85e0ea3bac3e5fa053875 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:19 +0100
Subject: [PATCH 56/72] avgblur_vulkan: port for the rewrite
---
libavfilter/vf_avgblur_vulkan.c | 339 ++++++++++----------------------
1 file changed, 108 insertions(+), 231 deletions(-)
diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
index d118ce802c..17b2167951 100644
--- a/libavfilter/vf_avgblur_vulkan.c
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,23 +21,20 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGS 32
-
typedef struct AvgBlurVulkanContext {
FFVulkanContext vkctx;
int initialized;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl_hor;
- FFVulkanPipeline *pl_ver;
-
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo tmp_images[3];
- VkDescriptorImageInfo output_images[3];
+ VkSampler sampler;
+ FFVulkanPipeline pl_hor;
+ FFVkSPIRVShader shd_hor;
+ FFVulkanPipeline pl_ver;
+ FFVkSPIRVShader shd_ver;
int size_x;
int size_y;
@@ -71,18 +70,41 @@ static const char blur_kernel[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
AvgBlurVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
- FFVulkanDescriptorSetBinding desc_i[2] = {
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "avgblur_hor_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT));
+ RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "avgblur_ver_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT));
+ shd = &s->shd_hor;
+
+ ff_vk_shader_set_compute_sizes(shd, 32, 1, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_img",
@@ -95,238 +117,79 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0));
- desc_i[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!desc_i[0].sampler)
- return AVERROR_EXTERNAL;
-
- { /* Create shader for the horizontal pass */
- desc_i[0].updater = s->input_images;
- desc_i[1].updater = s->tmp_images;
-
- s->pl_hor = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl_hor)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl_hor, "avgblur_compute_hor",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl_hor, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0));
-
- GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1);
- GLSLC(0, #define INC(x) (ivec2(x, 0)) );
- GLSLC(0, #define DIR(var) (var.x) );
- GLSLD( blur_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->planes & (1 << i)) {
- GLSLF(2, distort(pos, %i); ,i);
- } else {
- GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- }
- GLSLC(1, } );
+ GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1);
+ GLSLC(0, #define INC(x) (ivec2(x, 0)) );
+ GLSLC(0, #define DIR(var) (var.x) );
+ GLSLD( blur_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ if (s->planes & (1 << i)) {
+ GLSLF(2, distort(pos, %i); ,i);
+ } else {
+ GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
}
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl_hor));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl_hor));
+ GLSLC(1, } );
}
-
- { /* Create shader for the vertical pass */
- desc_i[0].updater = s->tmp_images;
- desc_i[1].updater = s->output_images;
-
- s->pl_ver = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl_ver)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl_ver, "avgblur_compute_ver",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl_ver, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0));
-
- GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1);
- GLSLC(0, #define INC(x) (ivec2(0, x)) );
- GLSLC(0, #define DIR(var) (var.y) );
- GLSLD( blur_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->planes & (1 << i)) {
- GLSLF(2, distort(pos, %i); ,i);
- } else {
- GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- }
- GLSLC(1, } );
+ GLSLC(0, } );
+
+ shd = &s->shd_ver;
+ ff_vk_shader_set_compute_sizes(shd, 1, 32, 1);
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0));
+
+ GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1);
+ GLSLC(0, #define INC(x) (ivec2(0, x)) );
+ GLSLC(0, #define DIR(var) (var.y) );
+ GLSLD( blur_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ if (s->planes & (1 << i)) {
+ GLSLF(2, distort(pos, %i); ,i);
+ } else {
+ GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
}
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl_ver));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl_ver));
+ GLSLC(1, } );
}
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, &s->shd_hor, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, &s->shd_hor, spv_data, spv_len, "main"));
+ RET(spv->compile_shader(spv, ctx, &s->shd_ver, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, &s->shd_ver, spv_data, spv_len, "main"));
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_hor, &s->shd_hor));
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl_ver, &s->shd_ver));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_hor));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl_ver));
s->initialized = 1;
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f)
-{
- int err;
- VkCommandBuffer cmd_buf;
- AvgBlurVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->tmp_images[i].imageView, tmp->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl_hor, 0);
- ff_vk_update_descriptor_set(vkctx, s->pl_ver, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = tmp->layout[i],
- .newLayout = s->tmp_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = tmp->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- in->layout[i] = bar[0].newLayout;
- in->access[i] = bar[0].dstAccessMask;
-
- tmp->layout[i] = bar[1].newLayout;
- tmp->access[i] = bar[1].dstAccessMask;
-
- out->layout[i] = bar[2].newLayout;
- out->access[i] = bar[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_hor);
-
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_ver);
-
- vk->CmdDispatch(cmd_buf, s->vkctx.output_width,
- FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx,s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
}
static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
@@ -352,7 +215,9 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, tmp, in));
+ RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e,
+ (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver },
+ out, tmp, in, s->sampler, NULL, 0));
err = av_frame_copy_props(out, in);
if (err < 0)
@@ -373,6 +238,18 @@ fail:
static void avgblur_vulkan_uninit(AVFilterContext *avctx)
{
AvgBlurVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl_hor);
+ ff_vk_pipeline_free(vkctx, &s->pl_ver);
+ ff_vk_shader_free(vkctx, &s->shd_hor);
+ ff_vk_shader_free(vkctx, &s->shd_ver);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
--
2.39.2
[-- Attachment #58: 0057-blend_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 16613 bytes --]
From 83edf3b91ffaed33b2103a6ba743487850f5325c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:43 +0100
Subject: [PATCH 57/72] blend_vulkan: port for the rewrite
---
libavfilter/vf_blend_vulkan.c | 315 +++++++++++-----------------------
1 file changed, 102 insertions(+), 213 deletions(-)
diff --git a/libavfilter/vf_blend_vulkan.c b/libavfilter/vf_blend_vulkan.c
index fcc21cbc8d..7ffdc9f3bd 100644
--- a/libavfilter/vf_blend_vulkan.c
+++ b/libavfilter/vf_blend_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* The blend modes are based on the blend.c.
*
* This file is part of FFmpeg.
@@ -22,12 +24,11 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "framesync.h"
#include "blend.h"
-#define CGS 32
-
#define IN_TOP 0
#define IN_BOTTOM 1
@@ -40,20 +41,18 @@ typedef struct FilterParamsVulkan {
typedef struct BlendVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
FFFrameSync fs;
- VkDescriptorImageInfo top_images[3];
- VkDescriptorImageInfo bottom_images[3];
- VkDescriptorImageInfo output_images[3];
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
FilterParamsVulkan params[4];
double all_opacity;
enum BlendMode all_mode;
-
- int initialized;
} BlendVulkanContext;
#define DEFINE_BLEND_MODE(MODE, EXPR) \
@@ -125,223 +124,102 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
static av_cold int init_filter(AVFilterContext *avctx)
{
int err = 0;
- FFVkSampler *sampler;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
BlendVulkanContext *s = avctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!sampler)
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- {
- FFVulkanDescriptorSetBinding image_descs[] = {
- {
- .name = "top_images",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->top_images,
- .sampler = sampler,
- },
- {
- .name = "bottom_images",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->bottom_images,
- .sampler = sampler,
- },
- {
- .name = "output_images",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- shd = ff_vk_init_shader(s->pl, "blend_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- for (int i = 0, j = 0; i < planes; i++) {
- for (j = 0; j < i; j++)
- if (s->params[i].blend_func == s->params[j].blend_func)
- break;
- /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */
- if (j == i) {
- GLSLD(s->params[i].blend_func);
- }
- }
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_images[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i);
- GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i);
- GLSLF(2, const float opacity = %f; ,s->params[i].opacity);
- GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend);
- GLSLC(0, );
- GLSLF(2, imageStore(output_images[%i], pos, dst); ,i);
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
}
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
-
- s->initialized = 1;
-
-fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_frame, AVFrame *top_frame, AVFrame *bottom_frame)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- BlendVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *out = (AVVkFrame *)out_frame->data[0];
- AVVkFrame *top = (AVVkFrame *)top_frame->data[0];
- AVVkFrame *bottom = (AVVkFrame *)bottom_frame->data[0];
-
- AVHWFramesContext *top_fc = (AVHWFramesContext*)top_frame->hw_frames_ctx->data;
- AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom_frame->hw_frames_ctx->data;
-
- const VkFormat *top_formats = av_vkfmt_from_pixfmt(top_fc->sw_format);
- const VkFormat *bottom_formats = av_vkfmt_from_pixfmt(bottom_fc->sw_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->top_images[i].imageView, top->img[i],
- top_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->bottom_images[i].imageView, bottom->img[i],
- bottom_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->top_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->bottom_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "blend_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "top_images",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "bottom_images",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_images",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0));
+
+ for (int i = 0, j = 0; i < planes; i++) {
+ for (j = 0; j < i; j++)
+ if (s->params[i].blend_func == s->params[j].blend_func)
+ break;
+ /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */
+ if (j == i) {
+ GLSLD(s->params[i].blend_func);
+ }
}
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = top->layout[i],
- .newLayout = s->top_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = top->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = bottom->layout[i],
- .newLayout = s->bottom_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = bottom->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- top->layout[i] = barriers[0].newLayout;
- top->access[i] = barriers[0].dstAccessMask;
-
- bottom->layout[i] = barriers[1].newLayout;
- bottom->access[i] = barriers[1].dstAccessMask;
-
- out->layout[i] = barriers[2].newLayout;
- out->access[i] = barriers[2].dstAccessMask;
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_images[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i);
+ GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i);
+ GLSLF(2, const float opacity = %f; ,s->params[i].opacity);
+ GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend);
+ GLSLC(0, );
+ GLSLF(2, imageStore(output_images[%i], pos, dst); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS) / CGS,
- FFALIGN(s->vkctx.output_height, CGS) / CGS, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, top_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, bottom_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ RET(spv->compile_shader(spv, avctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- ff_vk_qf_rotate(&s->qf);
-
- return 0;
+ s->initialized = 1;
fail:
- ff_vk_discard_exec_deps(s->exec);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
@@ -375,7 +253,9 @@ static int blend_frame(FFFrameSync *fs)
RET(init_filter(avctx));
}
- RET(process_frames(avctx, out, top, bottom));
+ RET(ff_vk_filter_process_2in(&s->vkctx, &s->e, &s->pl,
+ out, top, bottom,
+ s->sampler, NULL, 0));
return ff_filter_frame(outlink, out);
@@ -396,10 +276,19 @@ static av_cold int init(AVFilterContext *avctx)
static av_cold void uninit(AVFilterContext *avctx)
{
BlendVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- ff_framesync_uninit(&s->fs);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
+ ff_framesync_uninit(&s->fs);
s->initialized = 0;
}
--
2.39.2
[-- Attachment #59: 0058-chromaber_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 14904 bytes --]
From 3328104c3ec2aa1412b5c8ea33ef8a96249acdd9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:53 +0100
Subject: [PATCH 58/72] chromaber_vulkan: port for the rewrite
---
libavfilter/vf_chromaber_vulkan.c | 288 ++++++++++--------------------
1 file changed, 99 insertions(+), 189 deletions(-)
diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c
index b9423e417e..24649f7b25 100644
--- a/libavfilter/vf_chromaber_vulkan.c
+++ b/libavfilter/vf_chromaber_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,21 +21,18 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
typedef struct ChromaticAberrationVulkanContext {
FFVulkanContext vkctx;
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
-
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
/* Push constants / options */
struct {
@@ -68,205 +67,105 @@ static const char distort_chroma_kernel[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
ChromaticAberrationVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- /* Create a sampler */
- sampler = ff_vk_init_sampler(vkctx, 0, VK_FILTER_LINEAR);
- if (!sampler)
- return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
/* Normalize options */
s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f;
s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f;
- { /* Create the shader */
- FFVulkanDescriptorSetBinding desc_i[2] = {
- {
- .name = "input_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "chromaber_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
- GLSLC(1, vec2 dist; );
- GLSLC(0, }; );
- GLSLC(0, );
-
- ff_vk_add_push_constant(s->pl, 0, sizeof(s->opts),
- VK_SHADER_STAGE_COMPUTE_BIT);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
-
- GLSLD( distort_chroma_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- if (planes == 1) {
- GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); );
- } else {
- GLSLC(1, ivec2 size = imageSize(output_img[0]); );
- GLSLC(1, vec2 npos = vec2(pos)/vec2(size); );
- GLSLC(1, vec4 res = texture(input_img[0], npos); );
- GLSLC(1, imageStore(output_img[0], pos, res); );
- for (int i = 1; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- GLSLF(2, distort_chroma(%i, size, pos); ,i);
- GLSLC(1, } else { );
- GLSLC(2, npos = vec2(pos)/vec2(size); );
- GLSLF(2, res = texture(input_img[%i], npos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- GLSLC(1, } );
- }
- }
- GLSLC(0, } );
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "chromaber_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, vec2 dist; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
+
+ GLSLD( distort_chroma_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ if (planes == 1) {
+ GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); );
+ } else {
+ GLSLC(1, ivec2 size = imageSize(output_img[0]); );
+ GLSLC(1, vec2 npos = vec2(pos)/vec2(size); );
+ GLSLC(1, vec4 res = texture(input_img[0], npos); );
+ GLSLC(1, imageStore(output_img[0], pos, res); );
+ for (int i = 1; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ GLSLF(2, distort_chroma(%i, size, pos); ,i);
+ GLSLC(1, } else { );
+ GLSLC(2, npos = vec2(pos)/vec2(size); );
+ GLSLF(2, res = texture(input_img[%i], npos); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
+ GLSLC(1, } );
+ }
}
+ GLSLC(0, } );
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
s->initialized = 1;
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- ChromaticAberrationVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *ouput_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- ouput_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[2] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- in->layout[i] = bar[0].newLayout;
- in->access[i] = bar[0].dstAccessMask;
-
- out->layout[i] = bar[1].newLayout;
- out->access[i] = bar[1].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- ff_vk_update_push_exec(vkctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(s->opts), &s->opts);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
- ff_vk_qf_rotate(&s->qf);
-
- return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
return err;
}
@@ -286,7 +185,8 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, in);
if (err < 0)
@@ -305,6 +205,16 @@ fail:
static void chromaber_vulkan_uninit(AVFilterContext *avctx)
{
ChromaticAberrationVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
--
2.39.2
[-- Attachment #60: 0059-flip_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 13075 bytes --]
From f69abda00b625c1f9d69421e7c6bef6713a43f76 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:12:42 +0100
Subject: [PATCH 59/72] flip_vulkan: port for the rewrite
---
libavfilter/vf_flip_vulkan.c | 229 ++++++++++++-----------------------
1 file changed, 78 insertions(+), 151 deletions(-)
diff --git a/libavfilter/vf_flip_vulkan.c b/libavfilter/vf_flip_vulkan.c
index 0223786ef1..0330dce257 100644
--- a/libavfilter/vf_flip_vulkan.c
+++ b/libavfilter/vf_flip_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,10 +22,9 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGS 32
-
enum FlipType {
FLIP_VERTICAL,
FLIP_HORIZONTAL,
@@ -32,32 +33,49 @@ enum FlipType {
typedef struct FlipVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
-
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
} FlipVulkanContext;
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type)
{
int err = 0;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
FlipVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "flip_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
- FFVulkanDescriptorSetBinding image_descs[] = {
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_image",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_image",
@@ -67,167 +85,75 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
},
};
- image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- {
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl, "flip_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_image[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- switch (type)
- {
- case FLIP_HORIZONTAL:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i);
- break;
- case FLIP_VERTICAL:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i);
- break;
- case FLIP_BOTH:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i);
- break;
- default:
- GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i);
- break;
- }
- GLSLF(2, imageStore(output_image[%i], pos, res); ,i);
- GLSLC(1, } );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_image[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ switch (type)
+ {
+ case FLIP_HORIZONTAL:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i);
+ break;
+ case FLIP_VERTICAL:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i);
+ break;
+ case FLIP_BOTH:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i);
+ break;
+ default:
+ GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i);
+ break;
}
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ GLSLF(2, imageStore(output_image[%i], pos, res); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
s->initialized = 1;
fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
static av_cold void flip_vulkan_uninit(AVFilterContext *avctx)
{
FlipVulkanContext *s = avctx->priv;
- ff_vk_uninit(&s->vkctx);
- s->initialized = 0;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- FlipVulkanContext *s = avctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- out->layout[i] = barriers[1].newLayout;
- out->access[i] = barriers[1].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_qf_rotate(&s->qf);
+ ff_vk_uninit(&s->vkctx);
- return 0;
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
+ s->initialized = 0;
}
static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type)
@@ -247,7 +173,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type)
if (!s->initialized)
RET(init_filter(ctx, in, type));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, NULL, 0));
RET(av_frame_copy_props(out, in));
--
2.39.2
[-- Attachment #61: 0060-gblur_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 17658 bytes --]
From 369e41818f25c68097764dd417cd03b6984e3ce6 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:12:55 +0100
Subject: [PATCH 60/72] gblur_vulkan: port for the rewrite
---
libavfilter/vf_gblur_vulkan.c | 314 ++++++++++------------------------
1 file changed, 95 insertions(+), 219 deletions(-)
diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index c6360799a7..72308ffe83 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,6 +22,7 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#define CGS 32
@@ -27,26 +30,23 @@
typedef struct GBlurVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl_hor;
- FFVulkanPipeline *pl_ver;
- FFVkBuffer params_buf_hor;
- FFVkBuffer params_buf_ver;
-
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo tmp_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc_hor;
- VkDescriptorBufferInfo params_desc_ver;
int initialized;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ VkSampler sampler;
+ FFVulkanPipeline pl_hor;
+ FFVkSPIRVShader shd_hor;
+ FFVkBuffer params_hor;
+ FFVulkanPipeline pl_ver;
+ FFVkSPIRVShader shd_ver;
+ FFVkBuffer params_ver;
+
int size;
int sizeV;
int planes;
float sigma;
float sigmaV;
- AVFrame *tmpframe;
} GBlurVulkanContext;
static const char gblur_func[] = {
@@ -118,16 +118,17 @@ static av_cold void init_gaussian_params(GBlurVulkanContext *s)
s->sizeV = s->size;
else
init_kernel_size(s, &s->sizeV);
-
- s->tmpframe = NULL;
}
-static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd,
- FFVkBuffer *params_buf, VkDescriptorBufferInfo *params_desc,
- int ksize, float sigma)
+static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd, FFVkBuffer *params_buf,
+ int ksize, float sigma, FFVkSPIRVCompiler *spv)
{
int err = 0;
uint8_t *kernel_mapped;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
@@ -137,7 +138,6 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
.mem_quali = "readonly",
.mem_layout = "std430",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = NULL,
.buf_content = NULL,
};
@@ -145,10 +145,9 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
if (!kernel_def)
return AVERROR(ENOMEM);
- buf_desc.updater = params_desc;
buf_desc.buf_content = kernel_def;
- RET(ff_vk_add_descriptor_set(&s->vkctx, pl, shd, &buf_desc, 1, 0));
+ RET(ff_vk_pipeline_descriptor_set_add(&s->vkctx, pl, shd, &buf_desc, 1, 1, 0));
GLSLD( gblur_func );
GLSLC(0, void main() );
@@ -169,26 +168,31 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
}
GLSLC(0, } );
- RET(ff_vk_compile_shader(&s->vkctx, shd, "main"));
+ RET(spv->compile_shader(spv, s, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(&s->vkctx, shd, spv_data, spv_len, "main"));
- RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
- RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
+ RET(ff_vk_init_compute_pipeline(&s->vkctx, pl, shd));
+ RET(ff_vk_exec_pipeline_register(&s->vkctx, &s->e, pl));
- RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
init_gaussian_kernel((float *)kernel_mapped, sigma, ksize);
RET(ff_vk_unmap_buffers(&s->vkctx, params_buf, 1, 1));
- params_desc->buffer = params_buf->buf;
- params_desc->range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(&s->vkctx, pl, 1);
+ RET(ff_vk_set_descriptor_buffer(&s->vkctx, pl, NULL, 1, 0, 0,
+ params_buf->address, params_buf->size,
+ VK_FORMAT_UNDEFINED));
fail:
av_free(kernel_def);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
return err;
}
@@ -196,16 +200,35 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err = 0;
GBlurVulkanContext *s = ctx->priv;
- FFVkSPIRVShader *shd;
+ FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- FFVulkanDescriptorSetBinding image_descs[] = {
+ FFVkSPIRVShader *shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "gblur_hor_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT));
+ RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "gblur_ver_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT));
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_images",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_images",
@@ -218,215 +241,64 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
- image_descs[0].sampler = ff_vk_init_sampler(&s->vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
-
init_gaussian_params(s);
- ff_vk_qf_init(&s->vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
{
- /* Create shader for the horizontal pass */
- image_descs[0].updater = s->input_images;
- image_descs[1].updater = s->tmp_images;
-
- s->pl_hor = ff_vk_create_pipeline(&s->vkctx, &s->qf);
- if (!s->pl_hor) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
-
- shd = ff_vk_init_shader(s->pl_hor, "gblur_compute_hor", image_descs[0].stages);
- if (!shd) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ shd = &s->shd_hor;
+ ff_vk_shader_set_compute_sizes(shd, 32, 1, 1);
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0));
GLSLC(0, #define OFFSET (vec2(i, 0.0)));
- RET(init_gblur_pipeline(s, s->pl_hor, shd, &s->params_buf_hor, &s->params_desc_hor,
- s->size, s->sigma));
+ RET(init_gblur_pipeline(s, &s->pl_hor, shd, &s->params_hor, s->size, s->sigma, spv));
}
{
- /* Create shader for the vertical pass */
- image_descs[0].updater = s->tmp_images;
- image_descs[1].updater = s->output_images;
-
- s->pl_ver = ff_vk_create_pipeline(&s->vkctx, &s->qf);
- if (!s->pl_ver) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ shd = &s->shd_ver;
+ ff_vk_shader_set_compute_sizes(shd, 1, 32, 1);
- shd = ff_vk_init_shader(s->pl_ver, "gblur_compute_ver", image_descs[0].stages);
- if (!shd) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
- RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0));
GLSLC(0, #define OFFSET (vec2(0.0, i)));
- RET(init_gblur_pipeline(s, s->pl_ver, shd, &s->params_buf_ver, &s->params_desc_ver,
- s->sizeV, s->sigmaV));
+ RET(init_gblur_pipeline(s, &s->pl_ver, shd, &s->params_ver, s->sizeV, s->sigmaV, spv));
}
- RET(ff_vk_create_exec_ctx(&s->vkctx, &s->exec, &s->qf));
-
s->initialized = 1;
fail:
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx)
{
GBlurVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- av_frame_free(&s->tmpframe);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl_hor);
+ ff_vk_pipeline_free(vkctx, &s->pl_ver);
+ ff_vk_shader_free(vkctx, &s->shd_hor);
+ ff_vk_shader_free(vkctx, &s->shd_ver);
+ ff_vk_free_buf(vkctx, &s->params_hor);
+ ff_vk_free_buf(vkctx, &s->params_ver);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf_hor);
- ff_vk_free_buf(&s->vkctx, &s->params_buf_ver);
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
}
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err;
- VkCommandBuffer cmd_buf;
- GBlurVulkanContext *s = avctx->priv;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
-
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
- AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(&s->vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->input_images[i].imageView,
- in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->tmp_images[i].imageView,
- tmp->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->output_images[i].imageView,
- out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(&s->vkctx, s->pl_hor, 0);
- ff_vk_update_descriptor_set(&s->vkctx, s->pl_ver, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = tmp->layout[i],
- .newLayout = s->tmp_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = tmp->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- tmp->layout[i] = barriers[1].newLayout;
- tmp->access[i] = barriers[1].dstAccessMask;
-
- out->layout[i] = barriers[2].newLayout;
- out->access[i] = barriers[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_hor);
-
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_ver);
-
- vk->CmdDispatch(cmd_buf,s->vkctx.output_width,
- FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
-
- ff_vk_add_exec_dep(&s->vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(&s->vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(&s->vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
-
- return 0;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
-}
-
static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
{
int err;
- AVFrame *out = NULL;
+ AVFrame *tmp = NULL, *out = NULL;
AVFilterContext *ctx = link->dst;
GBlurVulkanContext *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
@@ -437,28 +309,32 @@ static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
goto fail;
}
- if (!s->initialized) {
- RET(init_filter(ctx, in));
- s->tmpframe = ff_get_video_buffer(outlink, outlink->w, outlink->h);
- if (!s->tmpframe) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!tmp) {
+ err = AVERROR(ENOMEM);
+ goto fail;
}
- RET(process_frames(ctx, out, in));
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
- RET(av_frame_copy_props(out, in));
+ RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e,
+ (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver },
+ out, tmp, in, s->sampler, NULL, 0));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
av_frame_free(&in);
+ av_frame_free(&tmp);
return ff_filter_frame(outlink, out);
fail:
av_frame_free(&in);
+ av_frame_free(&tmp);
av_frame_free(&out);
- av_frame_free(&s->tmpframe);
-
return err;
}
--
2.39.2
[-- Attachment #62: 0061-overlay_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18798 bytes --]
From 1a4987ea3171409cc15b7ea85c2d483cf155378e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:05 +0100
Subject: [PATCH 61/72] overlay_vulkan: port for the rewrite
---
libavfilter/vf_overlay_vulkan.c | 397 ++++++++++----------------------
1 file changed, 122 insertions(+), 275 deletions(-)
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index bdf231f4ef..694cb666d8 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,26 +21,26 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "framesync.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
typedef struct OverlayVulkanContext {
FFVulkanContext vkctx;
+ FFFrameSync fs;
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- FFFrameSync fs;
- FFVkBuffer params_buf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo main_images[3];
- VkDescriptorImageInfo overlay_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc;
+ /* Push constants / options */
+ struct {
+ int32_t o_offset[2*3];
+ int32_t o_size[2*3];
+ } opts;
int overlay_x;
int overlay_y;
@@ -80,279 +82,113 @@ static const char overlay_alpha[] = {
static av_cold int init_filter(AVFilterContext *ctx)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
OverlayVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_NEAREST);
- if (!sampler)
+ const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
+ const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- { /* Create the shader */
- const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
-
- FFVulkanDescriptorSetBinding desc_i[3] = {
- {
- .name = "main_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->main_images,
- .sampler = sampler,
- },
- {
- .name = "overlay_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->overlay_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVulkanDescriptorSetBinding desc_b = {
- .name = "params",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .mem_quali = "readonly",
- .mem_layout = "std430",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = &s->params_desc,
- .buf_content = "ivec2 o_offset[3], o_size[3];",
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "overlay_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
-
- GLSLD( overlay_noalpha );
- GLSLD( overlay_alpha );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- GLSLF(1, int planes = %i; ,planes);
- GLSLC(1, for (int i = 0; i < planes; i++) { );
- if (ialpha)
- GLSLC(2, overlay_alpha_opaque(i, pos); );
- else
- GLSLC(2, overlay_noalpha(i, pos); );
- GLSLC(1, } );
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- }
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
-
- { /* Create and update buffer */
- const AVPixFmtDescriptor *desc;
-
- /* NOTE: std430 requires the same identical struct layout, padding and
- * alignment as C, so we're allowed to do this, as this will map
- * exactly to what the shader recieves */
- struct {
- int32_t o_offset[2*3];
- int32_t o_size[2*3];
- } *par;
-
- err = ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par), NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
- if (err)
- return err;
-
- err = ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0);
- if (err)
- return err;
-
- desc = av_pix_fmt_desc_get(s->vkctx.output_format);
-
- par->o_offset[0] = s->overlay_x;
- par->o_offset[1] = s->overlay_y;
- par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w;
- par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h;
- par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w;
- par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h;
-
- par->o_size[0] = s->overlay_w;
- par->o_size[1] = s->overlay_h;
- par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w;
- par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h;
- par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w;
- par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h;
-
- err = ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1);
- if (err)
- return err;
-
- s->params_desc.buffer = s->params_buf.buf;
- s->params_desc.range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 1);
}
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "overlay_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, ivec2 o_offset[3]; );
+ GLSLC(1, ivec2 o_size[3]; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "main_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "overlay_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0));
+
+ GLSLD( overlay_noalpha );
+ GLSLD( overlay_alpha );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, int planes = %i; ,planes);
+ GLSLC(1, for (int i = 0; i < planes; i++) { );
+ if (ialpha)
+ GLSLC(2, overlay_alpha_opaque(i, pos); );
+ else
+ GLSLC(2, overlay_noalpha(i, pos); );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
+
+ s->opts.o_offset[0] = s->overlay_x;
+ s->opts.o_offset[1] = s->overlay_y;
+ s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h;
+ s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h;
+
+ s->opts.o_size[0] = s->overlay_w;
+ s->opts.o_size[1] = s->overlay_h;
+ s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h;
+ s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h;
s->initialized = 1;
- return 0;
-
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
- AVFrame *main_f, AVFrame *overlay_f)
-{
- int err;
- VkCommandBuffer cmd_buf;
- OverlayVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- AVVkFrame *main = (AVVkFrame *)main_f->data[0];
- AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0];
-
- AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data;
- AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data;
-
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
- const VkFormat *main_sw_formats = av_vkfmt_from_pixfmt(main_fc->sw_format);
- const VkFormat *overlay_sw_formats = av_vkfmt_from_pixfmt(overlay_fc->sw_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->main_images[i].imageView, main->img[i],
- main_sw_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->overlay_images[i].imageView, overlay->img[i],
- overlay_sw_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[3] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = main->layout[i],
- .newLayout = s->main_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = main->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = overlay->layout[i],
- .newLayout = s->overlay_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = overlay->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- main->layout[i] = bar[0].newLayout;
- main->access[i] = bar[0].dstAccessMask;
-
- overlay->layout[i] = bar[1].newLayout;
- overlay->access[i] = bar[1].dstAccessMask;
-
- out->layout[i] = bar[2].newLayout;
- out->access[i] = bar[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
- ff_vk_qf_rotate(&s->qf);
-
- return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
return err;
}
@@ -394,7 +230,9 @@ static int overlay_vulkan_blend(FFFrameSync *fs)
goto fail;
}
- RET(process_frames(ctx, out, input_main, input_overlay));
+ RET(ff_vk_filter_process_2in(&s->vkctx, &s->e, &s->pl,
+ out, input_main, input_overlay,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, input_main);
if (err < 0)
@@ -443,8 +281,17 @@ static av_cold int overlay_vulkan_init(AVFilterContext *avctx)
static void overlay_vulkan_uninit(AVFilterContext *avctx)
{
OverlayVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf);
ff_vk_uninit(&s->vkctx);
ff_framesync_uninit(&s->fs);
--
2.39.2
[-- Attachment #63: 0062-scale_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18951 bytes --]
From 4ec8834fa164e172420cd162d4a51735fbddd986 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:32 +0100
Subject: [PATCH 62/72] scale_vulkan: port for the rewrite
---
libavfilter/vf_scale_vulkan.c | 365 ++++++++++++----------------------
1 file changed, 124 insertions(+), 241 deletions(-)
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 31dc35569b..84bd19c012 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,12 +21,11 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "scale_eval.h"
#include "internal.h"
#include "colorspace.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
enum ScalerFunc {
F_BILINEAR = 0,
F_NEAREST,
@@ -35,15 +36,17 @@ enum ScalerFunc {
typedef struct ScaleVulkanContext {
FFVulkanContext vkctx;
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- FFVkBuffer params_buf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc;
+ /* Push constants / options */
+ struct {
+ float yuv_matrix[4][4];
+ } opts;
char *out_format_string;
char *w_expr;
@@ -51,8 +54,6 @@ typedef struct ScaleVulkanContext {
enum ScalerFunc scaler;
enum AVColorRange out_range;
-
- int initialized;
} ScaleVulkanContext;
static const char scale_bilinear[] = {
@@ -110,10 +111,15 @@ static const char write_444[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
VkFilter sampler_mode;
ScaleVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
int crop_x = in->crop_left;
int crop_y = in->crop_top;
@@ -121,8 +127,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
int crop_h = in->height - (in->crop_top + in->crop_bottom);
int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
switch (s->scaler) {
case F_NEAREST:
sampler_mode = VK_FILTER_NEAREST;
@@ -132,264 +136,133 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
break;
};
- /* Create a sampler */
- sampler = ff_vk_init_sampler(vkctx, 0, sampler_mode);
- if (!sampler)
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
+ }
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- { /* Create the shader */
- FFVulkanDescriptorSetBinding desc_i[2] = {
- {
- .name = "input_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = in_planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = av_pix_fmt_count_planes(s->vkctx.output_format),
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVulkanDescriptorSetBinding desc_b = {
- .name = "params",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .mem_quali = "readonly",
- .mem_layout = "std430",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = &s->params_desc,
- .buf_content = "mat4 yuv_matrix;",
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "scale_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
-
- GLSLD( scale_bilinear );
-
- if (s->vkctx.output_format != s->vkctx.input_format) {
- GLSLD( rgb2yuv );
- }
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "scale_compute", VK_SHADER_STAGE_COMPUTE_BIT));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, mat4 yuv_matrix; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = in_planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = av_pix_fmt_count_planes(s->vkctx.output_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
- switch (s->vkctx.output_format) {
- case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
- case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
- case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
- default: break;
- }
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
- GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h);
- GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y);
- GLSLC(0, );
-
- if (s->vkctx.output_format == s->vkctx.input_format) {
- for (int i = 0; i < desc_i[1].elems; i++) {
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- switch (s->scaler) {
- case F_NEAREST:
- case F_BILINEAR:
- GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- break;
- };
- GLSLC(1, } );
- }
- } else {
- GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
- GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
- switch (s->vkctx.output_format) {
- case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
- case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
- case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
- default: return AVERROR(EINVAL);
- }
- }
+ GLSLD( scale_bilinear );
+
+ if (s->vkctx.output_format != s->vkctx.input_format) {
+ GLSLD( rgb2yuv );
+ }
- GLSLC(0, } );
+ switch (s->vkctx.output_format) {
+ case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
+ case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
+ case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
+ default: break;
+ }
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
+ GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h);
+ GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y);
+ GLSLC(0, );
+
+ if (s->vkctx.output_format == s->vkctx.input_format) {
+ for (int i = 0; i < desc[i].elems; i++) {
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ switch (s->scaler) {
+ case F_NEAREST:
+ case F_BILINEAR:
+ GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
+ break;
+ };
+ GLSLC(1, } );
+ }
+ } else {
+ GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
+ GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
+ switch (s->vkctx.output_format) {
+ case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
+ case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
+ case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
+ default: return AVERROR(EINVAL);
+ }
}
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ GLSLC(0, } );
if (s->vkctx.output_format != s->vkctx.input_format) {
const AVLumaCoefficients *lcoeffs;
double tmp_mat[3][3];
- struct {
- float yuv_matrix[4][4];
- } *par;
-
lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
if (!lcoeffs) {
av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n");
return AVERROR(EINVAL);
}
- RET(ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par), NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
-
- RET(ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0));
-
ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
- memset(par, 0, sizeof(*par));
-
for (int y = 0; y < 3; y++)
for (int x = 0; x < 3; x++)
- par->yuv_matrix[x][y] = tmp_mat[x][y];
-
- par->yuv_matrix[3][3] = 1.0;
-
- RET(ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1));
-
- s->params_desc.buffer = s->params_buf.buf;
- s->params_desc.range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 1);
+ s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
+ s->opts.yuv_matrix[3][3] = 1.0;
}
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
s->initialized = 1;
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- ScaleVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2];
- int barrier_count = 0;
- const int planes = av_pix_fmt_count_planes(s->vkctx.input_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- };
-
- memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
-
- in->layout[i] = bar.newLayout;
- in->access[i] = bar.dstAccessMask;
- }
-
- for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
- VkImageMemoryBarrier bar = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- };
-
- memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
-
- out->layout[i] = bar.newLayout;
- out->access[i] = bar.dstAccessMask;
- }
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, barrier_count, barriers);
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(vkctx->output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(vkctx->output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
}
static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
@@ -408,7 +281,8 @@ static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, in);
if (err < 0)
@@ -475,8 +349,17 @@ static int scale_vulkan_config_output(AVFilterLink *outlink)
static void scale_vulkan_uninit(AVFilterContext *avctx)
{
ScaleVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf);
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
--
2.39.2
[-- Attachment #64: 0063-transpose_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 12391 bytes --]
From ec245a2b213f82a52b9a5120062ab4f620519100 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:43 +0100
Subject: [PATCH 63/72] transpose_vulkan: port for the rewrite
---
libavfilter/vf_transpose_vulkan.c | 223 ++++++++++--------------------
1 file changed, 75 insertions(+), 148 deletions(-)
diff --git a/libavfilter/vf_transpose_vulkan.c b/libavfilter/vf_transpose_vulkan.c
index 30d052e08c..36f286b219 100644
--- a/libavfilter/vf_transpose_vulkan.c
+++ b/libavfilter/vf_transpose_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,41 +22,59 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "transpose.h"
-#define CGS 32
-
typedef struct TransposeVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
int dir;
int passthrough;
- int initialized;
} TransposeVulkanContext;
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
- int err = 0;
- FFVkSPIRVShader *shd;
+ int err;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
TransposeVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
+
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "transpose_compute", VK_SHADER_STAGE_COMPUTE_BIT));
- FFVulkanDescriptorSetBinding image_descs[] = {
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 1, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_images",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_images",
@@ -64,154 +84,49 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
},
};
- image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- {
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl, "transpose_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_images[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->dir == TRANSPOSE_CCLOCK)
- GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
- else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
- GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i);
- if (s->dir == TRANSPOSE_CLOCK)
- GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); );
- } else
- GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i);
- GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
- }
-
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
- s->initialized = 1;
-
-fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- TransposeVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- out->layout[i] = barriers[1].newLayout;
- out->access[i] = barriers[1].dstAccessMask;
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_images[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ if (s->dir == TRANSPOSE_CCLOCK)
+ GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
+ else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
+ GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i);
+ if (s->dir == TRANSPOSE_CLOCK)
+ GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); );
+ } else
+ GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i);
+ GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ s->initialized = 1;
return 0;
fail:
- ff_vk_discard_exec_deps(s->exec);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
@@ -235,7 +150,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, NULL, 0));
RET(av_frame_copy_props(out, in));
@@ -259,6 +175,17 @@ fail:
static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx)
{
TransposeVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
+
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
--
2.39.2
[-- Attachment #65: 0064-avcodec-add-AVHWAccel.free_frame_priv-callback.patch --]
[-- Type: text/x-diff, Size: 7769 bytes --]
From dbf81f602283527ea27d7ddac58e8ff648fc5557 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 18:03:05 +0100
Subject: [PATCH 64/72] avcodec: add AVHWAccel.free_frame_priv callback
---
libavcodec/av1dec.c | 4 ++--
libavcodec/avcodec.h | 8 ++++++++
libavcodec/decode.c | 19 +++++++++++++++++++
libavcodec/decode.h | 11 +++++++++++
libavcodec/h264_slice.c | 3 ++-
libavcodec/hevc_refs.c | 3 ++-
libavcodec/mpegpicture.c | 4 +++-
libavcodec/vp8.c | 2 +-
libavcodec/vp9.c | 2 +-
9 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
index d83c902f1f..d105835d51 100644
--- a/libavcodec/av1dec.c
+++ b/libavcodec/av1dec.c
@@ -24,6 +24,7 @@
#include "libavutil/pixdesc.h"
#include "libavutil/opt.h"
#include "avcodec.h"
+#include "decode.h"
#include "av1dec.h"
#include "bytestream.h"
#include "codec_internal.h"
@@ -836,8 +837,7 @@ static int av1_frame_alloc(AVCodecContext *avctx, AV1Frame *f)
if (avctx->hwaccel) {
const AVHWAccel *hwaccel = avctx->hwaccel;
if (hwaccel->frame_priv_data_size) {
- f->hwaccel_priv_buf =
- av_buffer_allocz(hwaccel->frame_priv_data_size);
+ f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, hwaccel);
if (!f->hwaccel_priv_buf) {
ret = AVERROR(ENOMEM);
goto fail;
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 17416791a6..6babfc7132 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2206,6 +2206,14 @@ typedef struct AVHWAccel {
* that avctx->hwaccel_priv_data is invalid.
*/
int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+
+ /**
+ * Callback to free the hwaccel-specific frame data.
+ *
+ * @param avctx the codec context
+ * @param data the per-frame hardware accelerator private data to be freed.
+ */
+ void (*free_frame_priv)(AVCodecContext *avctx, void *data);
} AVHWAccel;
/**
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 93ecd36c2b..b9a2ec84f6 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1675,3 +1675,22 @@ int ff_copy_palette(void *dst, const AVPacket *src, void *logctx)
}
return 0;
}
+
+AVBufferRef *ff_alloc_hwaccel_frame_priv_data(AVCodecContext *avctx,
+ const AVHWAccel *hwaccel)
+{
+ AVBufferRef *ref;
+ uint8_t *data = av_mallocz(hwaccel->frame_priv_data_size);
+ if (!data)
+ return NULL;
+
+ ref = av_buffer_create(data, hwaccel->frame_priv_data_size,
+ (void (*)(void *, uint8_t *))hwaccel->free_frame_priv,
+ avctx, 0);
+ if (!ref) {
+ av_free(data);
+ return NULL;
+ }
+
+ return ref;
+}
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 8430ffbd66..aa40baafc0 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -150,4 +150,15 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame, int flags);
int ff_side_data_update_matrix_encoding(AVFrame *frame,
enum AVMatrixEncoding matrix_encoding);
+/**
+ * Allocate a hwaccel frame private data and create an AVBufferRef
+ * from it.
+ *
+ * @param avctx The codec context which to attach as an opaque value
+ * @param hwaccel The hwaccel for which to allocate
+ * @return The allocated buffer
+ */
+AVBufferRef *ff_alloc_hwaccel_frame_priv_data(AVCodecContext *avctx,
+ const AVHWAccel *hwaccel);
+
#endif /* AVCODEC_DECODE_H */
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 8ac66b343c..c0aa31bcd9 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -33,6 +33,7 @@
#include "libavutil/pixdesc.h"
#include "libavutil/timecode.h"
#include "internal.h"
+#include "decode.h"
#include "cabac.h"
#include "cabac_functions.h"
#include "decode.h"
@@ -212,7 +213,7 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
const AVHWAccel *hwaccel = h->avctx->hwaccel;
av_assert0(!pic->hwaccel_picture_private);
if (hwaccel->frame_priv_data_size) {
- pic->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ pic->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(h->avctx, hwaccel);
if (!pic->hwaccel_priv_buf)
return AVERROR(ENOMEM);
pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
index 811e8feff8..30cbb8b37a 100644
--- a/libavcodec/hevc_refs.c
+++ b/libavcodec/hevc_refs.c
@@ -23,6 +23,7 @@
#include "libavutil/avassert.h"
+#include "decode.h"
#include "thread.h"
#include "hevc.h"
#include "hevcdec.h"
@@ -118,7 +119,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
const AVHWAccel *hwaccel = s->avctx->hwaccel;
av_assert0(!frame->hwaccel_picture_private);
if (hwaccel->frame_priv_data_size) {
- frame->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ frame->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(s->avctx, hwaccel);
if (!frame->hwaccel_priv_buf)
goto fail;
frame->hwaccel_picture_private = frame->hwaccel_priv_buf->data;
diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c
index 977bc65191..a1d58f04b3 100644
--- a/libavcodec/mpegpicture.c
+++ b/libavcodec/mpegpicture.c
@@ -27,6 +27,8 @@
#include "avcodec.h"
#include "encode.h"
+#include "internal.h"
+#include "decode.h"
#include "motion_est.h"
#include "mpegpicture.h"
#include "mpegutils.h"
@@ -172,7 +174,7 @@ static int alloc_frame_buffer(AVCodecContext *avctx, Picture *pic,
if (avctx->hwaccel) {
assert(!pic->hwaccel_picture_private);
if (avctx->hwaccel->frame_priv_data_size) {
- pic->hwaccel_priv_buf = av_buffer_allocz(avctx->hwaccel->frame_priv_data_size);
+ pic->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, avctx->hwaccel);
if (!pic->hwaccel_priv_buf) {
av_log(avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
return -1;
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index db2419deaf..4c23eb5672 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -109,7 +109,7 @@ static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
if (s->avctx->hwaccel) {
const AVHWAccel *hwaccel = s->avctx->hwaccel;
if (hwaccel->frame_priv_data_size) {
- f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(s->avctx, hwaccel);
if (!f->hwaccel_priv_buf)
goto fail;
f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 7c0a246446..4f345f18db 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -136,7 +136,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
const AVHWAccel *hwaccel = avctx->hwaccel;
av_assert0(!f->hwaccel_picture_private);
if (hwaccel->frame_priv_data_size) {
- f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ f->hwaccel_priv_buf = ff_alloc_hwaccel_frame_priv_data(avctx, hwaccel);
if (!f->hwaccel_priv_buf)
goto fail;
f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
--
2.39.2
[-- Attachment #66: 0065-avcodec-add-AVHWAccel.flush-callback.patch --]
[-- Type: text/x-diff, Size: 3020 bytes --]
From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 6 Jan 2023 03:32:56 +0100
Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback
---
libavcodec/av1dec.c | 3 +++
libavcodec/avcodec.h | 5 +++++
libavcodec/h264dec.c | 3 +++
libavcodec/hevcdec.c | 3 +++
libavcodec/vp8.c | 3 +++
libavcodec/vp9.c | 3 +++
6 files changed, 20 insertions(+)
diff --git a/libavcodec/av1dec.c b/libavcodec/av1dec.c
index d105835d51..3cbb80bcb5 100644
--- a/libavcodec/av1dec.c
+++ b/libavcodec/av1dec.c
@@ -1228,6 +1228,9 @@ static void av1_decode_flush(AVCodecContext *avctx)
s->raw_seq = NULL;
ff_cbs_flush(s->cbc);
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
#define OFFSET(x) offsetof(AV1DecContext, x)
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 6babfc7132..531998a78c 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2214,6 +2214,11 @@ typedef struct AVHWAccel {
* @param data the per-frame hardware accelerator private data to be freed.
*/
void (*free_frame_priv)(AVCodecContext *avctx, void *data);
+
+ /**
+ * Callback to flush the hwaccel state.
+ */
+ void (*flush)(AVCodecContext *avctx);
} AVHWAccel;
/**
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 2d691731c5..995bf17a8f 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -480,6 +480,9 @@ static void h264_decode_flush(AVCodecContext *avctx)
ff_h264_free_tables(h);
h->context_initialized = 0;
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
static int get_last_needed_nal(H264Context *h)
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 43cd963175..7c9b46240c 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -3682,6 +3682,9 @@ static void hevc_decode_flush(AVCodecContext *avctx)
av_buffer_unref(&s->rpu_buf);
s->max_ra = INT_MAX;
s->eos = 1;
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
#define OFFSET(x) offsetof(HEVCContext, x)
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 4c23eb5672..b591b82ad1 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -167,6 +167,9 @@ static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
if (free_mem)
free_buffers(s);
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
static void vp8_decode_flush(AVCodecContext *avctx)
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 4f345f18db..18c2b09f64 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -1791,6 +1791,9 @@ static void vp9_decode_flush(AVCodecContext *avctx)
vp9_frame_unref(avctx, &s->s.frames[i]);
for (i = 0; i < 8; i++)
ff_thread_release_ext_buffer(avctx, &s->s.refs[i]);
+
+ if (avctx->hwaccel->flush)
+ avctx->hwaccel->flush(avctx);
}
static av_cold int vp9_decode_init(AVCodecContext *avctx)
--
2.39.2
[-- Attachment #67: 0066-hwconfig-add-a-new-HWACCEL_CAP_THREAD_SAFE-for-threa.patch --]
[-- Type: text/x-diff, Size: 1369 bytes --]
From 99ce9693bcb6218ffe82bb5780827c1dca614092 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 01:06:52 +0100
Subject: [PATCH 66/72] hwconfig: add a new HWACCEL_CAP_THREAD_SAFE for
threadsafe hwaccels
Vulkan is fully threadsafe and stateless, so we can benefit from this.
---
libavcodec/hwconfig.h | 1 +
libavcodec/pthread_frame.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index 721424912c..e6b78f0160 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -24,6 +24,7 @@
#define HWACCEL_CAP_ASYNC_SAFE (1 << 0)
+#define HWACCEL_CAP_THREAD_SAFE (1 << 1)
typedef struct AVCodecHWConfigInternal {
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 71edd6b3ec..15e8d96a79 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -204,7 +204,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
/* if the previous thread uses hwaccel then we take the lock to ensure
* the threads don't run concurrently */
- if (avctx->hwaccel) {
+ if (avctx->hwaccel && !(avctx->hwaccel->caps_internal & HWACCEL_CAP_THREAD_SAFE)) {
pthread_mutex_lock(&p->parent->hwaccel_mutex);
p->hwaccel_serializing = 1;
}
--
2.39.2
[-- Attachment #68: 0067-libavcodec-add-Vulkan-common-video-code.patch --]
[-- Type: text/x-diff, Size: 23311 bytes --]
From 2f30e4ddaf855b53cd3d8fd95a863b240bae0047 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 18 Dec 2022 08:31:03 +0100
Subject: [PATCH 67/72] libavcodec: add Vulkan common video code
---
configure | 2 +-
libavcodec/Makefile | 2 +
libavcodec/hwconfig.h | 2 +
libavcodec/vulkan.c | 19 ++
libavcodec/vulkan.h | 24 +++
libavcodec/vulkan_video.c | 417 ++++++++++++++++++++++++++++++++++++++
libavcodec/vulkan_video.h | 98 +++++++++
7 files changed, 563 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan.c
create mode 100644 libavcodec/vulkan.h
create mode 100644 libavcodec/vulkan_video.c
create mode 100644 libavcodec/vulkan_video.h
diff --git a/configure b/configure
index f0f15b9e87..91f715351c 100755
--- a/configure
+++ b/configure
@@ -326,7 +326,6 @@ External library support:
--disable-securetransport disable Secure Transport, needed for TLS support
on OSX if openssl and gnutls are not used [autodetect]
--enable-vapoursynth enable VapourSynth demuxer [no]
- --disable-vulkan disable Vulkan code [autodetect]
--disable-xlib disable xlib [autodetect]
--disable-zlib disable zlib [autodetect]
@@ -353,6 +352,7 @@ External library support:
--disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
--disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
--disable-videotoolbox disable VideoToolbox code [autodetect]
+ --disable-vulkan disable Vulkan code [autodetect]
Toolchain options:
--arch=ARCH select architecture [$arch]
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4971832ff4..a45c32e564 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -976,6 +976,7 @@ OBJS-$(CONFIG_NVDEC) += nvdec.o
OBJS-$(CONFIG_VAAPI) += vaapi_decode.o
OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o
OBJS-$(CONFIG_VDPAU) += vdpau.o
+OBJS-$(CONFIG_VULKAN) += vulkan.o vulkan_video.o
OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL) += dxva2_av1.o
OBJS-$(CONFIG_AV1_DXVA2_HWACCEL) += dxva2_av1.o
@@ -1284,6 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
+SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h
diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
index e6b78f0160..220b8a1e95 100644
--- a/libavcodec/hwconfig.h
+++ b/libavcodec/hwconfig.h
@@ -77,6 +77,8 @@ typedef struct AVCodecHWConfigInternal {
HW_CONFIG_HWACCEL(1, 1, 1, VDPAU, VDPAU, ff_ ## codec ## _vdpau_hwaccel)
#define HWACCEL_VIDEOTOOLBOX(codec) \
HW_CONFIG_HWACCEL(1, 1, 1, VIDEOTOOLBOX, VIDEOTOOLBOX, ff_ ## codec ## _videotoolbox_hwaccel)
+#define HWACCEL_VULKAN(codec) \
+ HW_CONFIG_HWACCEL(1, 1, 1, VULKAN, VULKAN, ff_ ## codec ## _vulkan_hwaccel)
#define HWACCEL_D3D11VA(codec) \
HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel)
diff --git a/libavcodec/vulkan.c b/libavcodec/vulkan.c
new file mode 100644
index 0000000000..fc8a1fa47b
--- /dev/null
+++ b/libavcodec/vulkan.c
@@ -0,0 +1,19 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/vulkan.c"
diff --git a/libavcodec/vulkan.h b/libavcodec/vulkan.h
new file mode 100644
index 0000000000..b15efd4add
--- /dev/null
+++ b/libavcodec/vulkan.h
@@ -0,0 +1,24 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_H
+#define AVCODEC_VULKAN_H
+
+#include "libavutil/vulkan.h"
+
+#endif /* AVCODEC_VULKAN_H */
diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c
new file mode 100644
index 0000000000..3e76109b26
--- /dev/null
+++ b/libavcodec/vulkan_video.c
@@ -0,0 +1,417 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "codec_id.h"
+
+#include "vulkan_video.h"
+
+const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO] = {
+ [AV_CODEC_ID_H264] = {
+#if CONFIG_VULKAN_ENCODE
+ FF_VK_EXT_VIDEO_ENCODE_H264 | FF_VK_EXT_SYNC2,
+ VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_EXT,
+#else
+ 0,
+ 0,
+#endif
+ FF_VK_EXT_VIDEO_DECODE_H264 | FF_VK_EXT_SYNC2,
+ VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR,
+ },
+ [AV_CODEC_ID_HEVC] = {
+#if CONFIG_VULKAN_ENCODE
+ FF_VK_EXT_VIDEO_ENCODE_H265 | FF_VK_EXT_SYNC2,
+ VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_EXT,
+#else
+ 0,
+ 0,
+#endif
+ FF_VK_EXT_VIDEO_DECODE_H265 | FF_VK_EXT_SYNC2,
+ VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR
+ },
+};
+
+enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf, int *score)
+{
+ switch (vkf) {
+ /* Mono */
+ case VK_FORMAT_R8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_GRAY8;
+ case VK_FORMAT_R10X6_UNORM_PACK16:
+ case VK_FORMAT_R12X4_UNORM_PACK16:
+ *score = 2;
+ return AV_PIX_FMT_GRAY16;
+ case VK_FORMAT_R16_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_GRAY16;
+
+ /* RGB */
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_BGRA;
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_RGBA;
+ case VK_FORMAT_R8G8B8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_RGB24;
+ case VK_FORMAT_B8G8R8_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_BGR24;
+
+ /* 420 */
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_NV12;
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV420P;
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_P010;
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV420P16;
+ /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16 */
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV420P12;
+ case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV420P16;
+
+ /* 422 */
+ case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_NV16;
+ case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV422P;
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_NV20;
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV422P10;
+ /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16 */
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV422P12;
+ case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV422P16;
+
+ /* 444 */
+ case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
+ *score = 1;
+ return AV_PIX_FMT_NV24;
+ case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV444P;
+ /* No support for VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT */
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV444P10;
+ /* No support for VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT */
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+ *score = 2;
+ return AV_PIX_FMT_YUV444P12;
+ case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+ *score = 1;
+ return AV_PIX_FMT_YUV444P16;
+ default:
+ break;
+ }
+
+ return AV_PIX_FMT_NONE;
+}
+
+VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf)
+{
+ switch (vkf) {
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_R10X6_UNORM_PACK16:
+ case VK_FORMAT_R12X4_UNORM_PACK16:
+ case VK_FORMAT_R16_UNORM:
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R8G8B8_UNORM:
+ case VK_FORMAT_B8G8R8_UNORM:
+ return VK_IMAGE_ASPECT_COLOR_BIT;
+
+ /* 420 */
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+ case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+ case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+ case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
+ return VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT;
+
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+ case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+ case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+ case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+ case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+ case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+ case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+ case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+ return VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT;
+
+ default:
+ break;
+ }
+
+ return VK_IMAGE_ASPECT_NONE;
+}
+
+VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc)
+{
+ if (desc->nb_components == 1)
+ return VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR;
+ else if (!desc->log2_chroma_w && !desc->log2_chroma_h)
+ return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR;
+ else if (!desc->log2_chroma_w && desc->log2_chroma_h == 1)
+ return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR;
+ else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1)
+ return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR;
+ return VK_VIDEO_CHROMA_SUBSAMPLING_INVALID_KHR;
+}
+
+VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth)
+{
+ switch (depth) {
+ case 8: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR;
+ case 10: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR;
+ case 12: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR;
+ default: break;
+ }
+ return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR;
+}
+
+static void free_data_buf(void *opaque, uint8_t *data)
+{
+ FFVulkanContext *ctx = opaque;
+ FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data;
+ ff_vk_unmap_buffers(ctx, &buf->buf, 1, 0);
+ ff_vk_free_buf(ctx, &buf->buf);
+ av_free(data);
+}
+
+static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
+{
+ uint8_t *buf = av_mallocz(size);
+ if (!buf)
+ return NULL;
+
+ return av_buffer_create(buf, size, free_data_buf, opaque, 0);
+}
+
+int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s,
+ AVBufferRef **buf, VkBufferUsageFlags usage,
+ void *create_pNext, size_t size)
+{
+ int err;
+ AVBufferRef *ref;
+ FFVkVideoBuffer *data;
+
+ if (!s->buf_pool) {
+ s->buf_pool = av_buffer_pool_init2(sizeof(FFVkVideoBuffer), ctx,
+ alloc_data_buf, NULL);
+ if (!s->buf_pool)
+ return AVERROR(ENOMEM);
+ }
+
+ *buf = ref = av_buffer_pool_get(s->buf_pool);
+ if (!ref)
+ return AVERROR(ENOMEM);
+
+ data = (FFVkVideoBuffer *)ref->data;
+
+ if (data->buf.size >= size)
+ return 0;
+
+ /* No point in requesting anything smaller. */
+ size = FFMAX(size, 1024*1024);
+ size = FFALIGN(size, s->caps.minBitstreamBufferSizeAlignment);
+
+ /* Align buffer to nearest power of two. Makes fragmentation management
+ * easier, and gives us ample headroom. */
+ size--;
+ size |= size >> 1;
+ size |= size >> 2;
+ size |= size >> 4;
+ size |= size >> 8;
+ size |= size >> 16;
+ size++;
+
+ ff_vk_free_buf(ctx, &data->buf);
+ memset(data, 0, sizeof(FFVkVideoBuffer));
+
+ err = ff_vk_create_buf(ctx, &data->buf, size,
+ create_pNext, NULL, usage,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0) {
+ av_buffer_unref(&ref);
+ return err;
+ }
+
+ /* Map the buffer */
+ err = ff_vk_map_buffers(ctx, &data->buf, &data->mem, 1, 0);
+ if (err < 0) {
+ av_buffer_unref(&ref);
+ return err;
+ }
+
+ return 0;
+}
+
+av_cold void ff_vk_video_common_uninit(FFVulkanContext *s,
+ FFVkVideoCommon *common)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ if (common->session) {
+ vk->DestroyVideoSessionKHR(s->hwctx->act_dev, common->session,
+ s->hwctx->alloc);
+ common->session = NULL;
+ }
+
+ if (common->nb_mem && common->mem)
+ for (int i = 0; i < common->nb_mem; i++)
+ vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc);
+
+ av_freep(&common->mem);
+
+ av_buffer_pool_uninit(&common->buf_pool);
+}
+
+av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
+ FFVkVideoCommon *common,
+ VkVideoSessionCreateInfoKHR *session_create)
+{
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkMemoryRequirements2 *mem_req = NULL;
+ VkVideoSessionMemoryRequirementsKHR *mem = NULL;
+ VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL;
+
+ /* Create session */
+ ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create,
+ s->hwctx->alloc, &common->session);
+ if (ret != VK_SUCCESS)
+ return AVERROR_EXTERNAL;
+
+ /* Get memory requirements */
+ ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
+ common->session,
+ &common->nb_mem,
+ NULL);
+ if (ret != VK_SUCCESS) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ /* Allocate all memory needed to actually allocate memory */
+ common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem);
+ if (!common->mem) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ mem = av_mallocz(sizeof(*mem)*common->nb_mem);
+ if (!mem) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ mem_req = av_mallocz(sizeof(*mem_req)*common->nb_mem);
+ if (!mem_req) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ bind_mem = av_mallocz(sizeof(*bind_mem)*common->nb_mem);
+ if (!bind_mem) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ /* Set the needed fields to get the memory requirements */
+ for (int i = 0; i < common->nb_mem; i++) {
+ mem_req[i] = (VkMemoryRequirements2) {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ };
+ mem[i] = (VkVideoSessionMemoryRequirementsKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_MEMORY_REQUIREMENTS_KHR,
+ .memoryRequirements = mem_req[i].memoryRequirements,
+ };
+ }
+
+ /* Finally get the memory requirements */
+ ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
+ common->session, &common->nb_mem,
+ mem);
+ if (ret != VK_SUCCESS) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ /* Now allocate each requested memory.
+ * For ricing, could pool together memory that ends up in the same index. */
+ for (int i = 0; i < common->nb_mem; i++) {
+ err = ff_vk_alloc_mem(s, &mem[i].memoryRequirements,
+ UINT32_MAX, NULL, NULL, &common->mem[i]);
+ if (err < 0)
+ goto fail;
+
+ bind_mem[i] = (VkBindVideoSessionMemoryInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_BIND_VIDEO_SESSION_MEMORY_INFO_KHR,
+ .memory = common->mem[i],
+ .memoryBindIndex = mem[i].memoryBindIndex,
+ .memoryOffset = 0,
+ .memorySize = mem[i].memoryRequirements.size,
+ };
+
+ av_log(log, AV_LOG_VERBOSE, "Allocating %lu bytes in bind index %i for video session\n",
+ bind_mem[i].memorySize, bind_mem[i].memoryBindIndex);
+ }
+
+ /* Bind the allocated memory */
+ ret = vk->BindVideoSessionMemoryKHR(s->hwctx->act_dev, common->session,
+ common->nb_mem, bind_mem);
+ if (ret != VK_SUCCESS) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ av_freep(&mem);
+ av_freep(&mem_req);
+ av_freep(&bind_mem);
+
+ return 0;
+
+fail:
+ av_freep(&mem);
+ av_freep(&mem_req);
+ av_freep(&bind_mem);
+
+ ff_vk_video_common_uninit(s, common);
+ return err;
+}
diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h
new file mode 100644
index 0000000000..5e2676a282
--- /dev/null
+++ b/libavcodec/vulkan_video.h
@@ -0,0 +1,98 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_VIDEO_H
+#define AVCODEC_VULKAN_VIDEO_H
+
+#include "codec_id.h"
+#include "vulkan.h"
+
+#include <vk_video/vulkan_video_codecs_common.h>
+
+#define CODEC_VER_MAJ(ver) (ver >> 22)
+#define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1))
+#define CODEC_VER_PAT(ver) (ver & ((1 << 12) - 1))
+#define CODEC_VER(ver) CODEC_VER_MAJ(ver), CODEC_VER_MIN(ver), CODEC_VER_PAT(ver)
+
+typedef struct FFVkCodecMap {
+ FFVulkanExtensions encode_extension;
+ VkVideoCodecOperationFlagBitsKHR encode_op;
+ FFVulkanExtensions decode_extension;
+ VkVideoCodecOperationFlagBitsKHR decode_op;
+} FFVkCodecMap;
+
+typedef struct FFVkVideoSession {
+ VkVideoSessionKHR session;
+ VkDeviceMemory *mem;
+ uint32_t nb_mem;
+ VkVideoCapabilitiesKHR caps;
+
+ AVBufferPool *buf_pool;
+} FFVkVideoCommon;
+
+/**
+ * Index is codec_id.
+ */
+extern const FFVkCodecMap ff_vk_codec_map[AV_CODEC_ID_FIRST_AUDIO];
+
+/**
+ * Get pixfmt from a Vulkan format.
+ */
+enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf, int *score);
+
+/**
+ * Get aspect bits which include all planes from a VkFormat.
+ */
+VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf);
+
+/**
+ * Get Vulkan's chroma subsampling from a pixfmt descriptor.
+ */
+VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc);
+
+/**
+ * Get Vulkan's bit depth from an [8:12] integer.
+ */
+VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth);
+
+typedef struct FFVkVideoBuffer {
+ FFVkBuffer buf;
+ uint8_t *mem;
+} FFVkVideoBuffer;
+
+/**
+ * Get a mapped FFVkPooledBuffer with a specific guaranteed minimum size
+ * from a pool.
+ */
+int ff_vk_video_get_buffer(FFVulkanContext *ctx, FFVkVideoCommon *s,
+ AVBufferRef **buf, VkBufferUsageFlags usage,
+ void *create_pNext, size_t size);
+
+/**
+ * Initialize video session, allocating and binding necessary memory.
+ */
+int ff_vk_video_common_init(void *log, FFVulkanContext *s,
+ FFVkVideoCommon *common,
+ VkVideoSessionCreateInfoKHR *session_create);
+
+/**
+ * Free video session and required resources.
+ */
+void ff_vk_video_common_uninit(FFVulkanContext *s, FFVkVideoCommon *common);
+
+#endif /* AVCODEC_VULKAN_VIDEO_H */
--
2.39.2
[-- Attachment #69: 0068-libavcodec-add-Vulkan-common-video-decoding-code.patch --]
[-- Type: text/x-diff, Size: 53050 bytes --]
From d3f2fa8e530dc94c9058149a2cee92196c7adb33 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 16 Jan 2023 07:23:27 +0100
Subject: [PATCH 68/72] libavcodec: add Vulkan common video decoding code
---
libavcodec/Makefile | 2 +-
libavcodec/vulkan_decode.c | 1135 ++++++++++++++++++++++++++++++++++++
libavcodec/vulkan_decode.h | 163 ++++++
3 files changed, 1299 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan_decode.c
create mode 100644 libavcodec/vulkan_decode.h
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index a45c32e564..eabf4eb43e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1285,7 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
-SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h
+SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_decode.h
SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
new file mode 100644
index 0000000000..582968e1da
--- /dev/null
+++ b/libavcodec/vulkan_decode.c
@@ -0,0 +1,1135 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_video.h"
+#include "vulkan_decode.h"
+#include "config_components.h"
+
+#if CONFIG_H264_VULKAN_HWACCEL
+extern const VkExtensionProperties ff_vk_dec_h264_ext;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+extern const VkExtensionProperties ff_vk_dec_hevc_ext;
+#endif
+
+static const VkExtensionProperties *dec_ext[] = {
+#if CONFIG_H264_VULKAN_HWACCEL
+ [AV_CODEC_ID_H264] = &ff_vk_dec_h264_ext,
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ [AV_CODEC_ID_HEVC] = &ff_vk_dec_hevc_ext,
+#endif
+};
+
+static int vk_decode_create_view(FFVulkanDecodeContext *ctx, VkImageView *dst_view,
+ VkImageAspectFlags *aspect, AVVkFrame *src)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+ VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(ctx->pic_format);
+
+ VkSamplerYcbcrConversionInfo yuv_sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO,
+ .conversion = ctx->yuv_sampler,
+ };
+ VkImageViewCreateInfo img_view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = &yuv_sampler_info,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = ctx->pic_format,
+ .image = src->img[0],
+ .components = (VkComponentMapping) {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ },
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseArrayLayer = 0,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
+ .levelCount = 1,
+ },
+ };
+
+ ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info,
+ ctx->s.hwctx->alloc, dst_view);
+ if (ret != VK_SUCCESS)
+ return AVERROR_EXTERNAL;
+
+ *aspect = aspect_mask;
+
+ return 0;
+}
+
+static AVFrame *vk_get_dpb_pool(FFVulkanDecodeContext *ctx)
+{
+ AVFrame *avf = av_frame_alloc();
+ AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
+ if (!avf)
+ return NULL;
+
+ avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref);
+ if (!avf->hw_frames_ctx)
+ av_frame_free(&avf);
+ avf->buf[0] = av_buffer_pool_get(dpb_frames->pool);
+ if (!avf->buf[0])
+ av_frame_free(&avf);
+ avf->data[0] = avf->buf[0]->data;
+
+ return avf;
+}
+
+int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic,
+ FFVulkanDecodePicture *vkpic, int is_current,
+ int alloc_dpb)
+{
+ int err;
+
+ vkpic->nb_slices = 0;
+ vkpic->slices_size = 0;
+
+ /* If the decoder made a blank frame to make up for a missing ref, or the
+ * frame is the current frame so it's missing one, create a re-representation */
+ if (vkpic->img_view_ref)
+ return 0;
+
+ /* Pre-allocate slice buffer with a reasonable default */
+ if (is_current) {
+ uint64_t min_alloc = 4096;
+ if (0)
+ min_alloc = 2*ctx->s.hprops.minImportedHostPointerAlignment;
+
+ vkpic->slices = av_fast_realloc(NULL, &vkpic->slices_size_max, min_alloc);
+ if (!vkpic->slices)
+ return AVERROR(ENOMEM);
+
+ if (0)
+ vkpic->slices_size += ctx->s.hprops.minImportedHostPointerAlignment;
+ }
+
+ vkpic->dpb_frame = NULL;
+ vkpic->dpb_vkf = NULL;
+ vkpic->img_view_ref = NULL;
+ vkpic->img_view_out = NULL;
+
+ if (ctx->layered_dpb && alloc_dpb) {
+ vkpic->img_view_ref = ctx->layered_view;
+ vkpic->img_aspect_ref = ctx->layered_aspect;
+ } else if (alloc_dpb) {
+ vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+ if (!vkpic->dpb_frame)
+ return AVERROR(ENOMEM);
+
+ vkpic->dpb_vkf = (AVVkFrame *)vkpic->dpb_frame->data[0];
+
+ err = vk_decode_create_view(ctx, &vkpic->img_view_ref,
+ &vkpic->img_aspect_ref,
+ vkpic->dpb_vkf);
+ if (err < 0)
+ return err;
+ }
+
+ if (!alloc_dpb || is_current) {
+ err = vk_decode_create_view(ctx, &vkpic->img_view_out,
+ &vkpic->img_aspect,
+ (AVVkFrame *)pic->buf[0]->data);
+ if (err < 0)
+ return err;
+
+ if (!alloc_dpb) {
+ vkpic->img_view_ref = vkpic->img_view_out;
+ vkpic->img_aspect_ref = vkpic->img_aspect;
+ }
+ }
+
+ return 0;
+}
+
+int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp,
+ const uint8_t *data, size_t size, int add_startcode,
+ uint32_t *nb_slices, const uint32_t **offsets)
+{
+ static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 };
+ const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0;
+ const int nb = *nb_slices;
+ uint8_t *slices;
+ uint32_t *slice_off;
+
+ slice_off = av_fast_realloc(vp->slice_off, &vp->slice_off_max,
+ (nb + 1)*sizeof(slice_off));
+ if (!slice_off)
+ return AVERROR(ENOMEM);
+
+ *offsets = vp->slice_off = slice_off;
+ slice_off[nb] = vp->slices_size;
+
+ slices = av_fast_realloc(vp->slices, &vp->slices_size_max,
+ vp->slices_size + size + startcode_len);
+ if (!slices)
+ return AVERROR(ENOMEM);
+
+ vp->slices = slices;
+
+ /* Startcode */
+ memcpy(slices + vp->slices_size, startcode_prefix, startcode_len);
+
+ /* Slice data */
+ memcpy(slices + vp->slices_size + startcode_len, data, size);
+
+ *nb_slices = nb + 1;
+ vp->nb_slices++;
+ vp->slices_size += startcode_len + size;
+
+ return 0;
+}
+
+void ff_vk_decode_flush(AVCodecContext *avctx)
+{
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+ VkVideoBeginCodingInfoKHR decode_start = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
+ .videoSession = ctx->common.session,
+ .videoSessionParameters = ctx->empty_session_params,
+ };
+ VkVideoCodingControlInfoKHR decode_ctrl = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
+ .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR,
+ };
+ VkVideoEndCodingInfoKHR decode_end = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
+ };
+
+ VkCommandBuffer cmd_buf;
+ FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool);
+ ff_vk_exec_start(&ctx->s, exec);
+ cmd_buf = exec->buf;
+
+ vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start);
+ vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl);
+ vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end);
+ ff_vk_exec_submit(&ctx->s, exec);
+}
+
+static void host_map_buf_free(void *opaque, uint8_t *data)
+{
+ FFVulkanContext *ctx = opaque;
+ FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data;
+ ff_vk_free_buf(ctx, &buf->buf);
+ av_free(data);
+}
+
+int ff_vk_decode_frame(AVCodecContext *avctx,
+ AVFrame *pic, FFVulkanDecodePicture *vp,
+ AVFrame *rpic[], FFVulkanDecodePicture *rvkp[])
+{
+ int err;
+ VkResult ret;
+ VkCommandBuffer cmd_buf;
+ FFVkVideoBuffer *sd_buf;
+
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ /* Output */
+ AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data;
+
+ /* Quirks */
+ const int layered_dpb = ctx->layered_dpb;
+
+ VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)vp->session_params->data;
+ VkVideoBeginCodingInfoKHR decode_start = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
+ .videoSession = ctx->common.session,
+ .videoSessionParameters = *par,
+ .referenceSlotCount = vp->decode_info.referenceSlotCount,
+ .pReferenceSlots = vp->decode_info.pReferenceSlots,
+ };
+ VkVideoEndCodingInfoKHR decode_end = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
+ };
+
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+ AVBufferRef *sd_ref = NULL;
+ size_t data_size = FFALIGN(vp->slices_size, ctx->common.caps.minBitstreamBufferSizeAlignment);
+
+ FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool);
+
+ if (ctx->exec_pool.nb_queries) {
+ int64_t prev_sub_res = 0;
+ ff_vk_exec_wait(&ctx->s, exec);
+ ret = ff_vk_exec_get_query(&ctx->s, exec, NULL, &prev_sub_res);
+ if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (ret == VK_SUCCESS)
+ av_log(avctx, prev_sub_res < 0 ? AV_LOG_ERROR : AV_LOG_DEBUG,
+ "Result of previous frame decoding: %li\n", prev_sub_res);
+ }
+
+ if (0) {
+ size_t req_size;
+ VkExternalMemoryBufferCreateInfo create_desc = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ .pNext = &ctx->profile_list,
+ };
+
+ VkImportMemoryHostPointerInfoEXT import_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ };
+
+ VkMemoryHostPointerPropertiesEXT p_props = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
+ };
+
+ /* Align slices pointer */
+ import_desc.pHostPointer = (void *)FFALIGN((uintptr_t)vp->slices,
+ ctx->s.hprops.minImportedHostPointerAlignment);
+
+ req_size = FFALIGN(data_size,
+ ctx->s.hprops.minImportedHostPointerAlignment);
+
+ ret = vk->GetMemoryHostPointerPropertiesEXT(ctx->s.hwctx->act_dev,
+ import_desc.handleType,
+ import_desc.pHostPointer,
+ &p_props);
+
+ if (ret == VK_SUCCESS) {
+ sd_buf = av_mallocz(sizeof(*sd_buf));
+ if (!sd_buf)
+ return AVERROR(ENOMEM);
+
+ err = ff_vk_create_buf(&ctx->s, &sd_buf->buf, req_size,
+ &create_desc, &import_desc,
+ VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0) {
+ av_free(sd_buf);
+ return err; /* This shouldn't error out, unless it's critical */
+ } else {
+ size_t neg_offs = (uint8_t *)import_desc.pHostPointer - vp->slices;
+
+ sd_ref = av_buffer_create((uint8_t *)sd_buf, sizeof(*sd_buf),
+ host_map_buf_free, &ctx->s, 0);
+ if (!sd_ref) {
+ ff_vk_free_buf(&ctx->s, &sd_buf->buf);
+ av_free(sd_buf);
+ return AVERROR(ENOMEM);
+ }
+
+ for (int i = 0; i < vp->nb_slices; i++)
+ vp->slice_off[i] -= neg_offs;
+
+ sd_buf->mem = vp->slices;
+ }
+ }
+ }
+
+ if (!sd_ref) {
+ err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &sd_ref,
+ VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
+ &ctx->profile_list, data_size);
+ if (err < 0)
+ return err;
+
+ sd_buf = (FFVkVideoBuffer *)sd_ref->data;
+
+ /* Copy the slices data to the buffer */
+ memcpy(sd_buf->mem, vp->slices, vp->slices_size);
+ }
+
+ /* Flush if needed */
+ if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = sd_buf->buf.mem,
+ .offset = 0,
+ .size = FFALIGN(vp->slices_size,
+ ctx->s.props.properties.limits.nonCoherentAtomSize),
+ };
+
+ ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ ff_vk_ret2str(ret));
+ av_buffer_unref(&sd_ref);
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ vp->decode_info.srcBuffer = sd_buf->buf.buf;
+ vp->decode_info.srcBufferOffset = 0;
+ vp->decode_info.srcBufferRange = data_size;
+
+ /* Start command buffer recording */
+ ff_vk_exec_start(&ctx->s, exec);
+ cmd_buf = exec->buf;
+
+ /* Slices */
+ err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &sd_ref, 1, 0);
+ if (err < 0)
+ return err;
+
+ /* Parameters */
+ err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->session_params, 1, 0);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+ pic);
+ if (err < 0)
+ return err;
+
+ /* Output image - change layout, as it comes from a pool */
+ img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ .srcAccessMask = vkf->access[0],
+ .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
+ .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
+ .oldLayout = vkf->layout[0],
+ .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR,
+ .srcQueueFamilyIndex = vkf->queue_family[0],
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = vkf->img[0],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = vp->img_aspect,
+ .layerCount = 1,
+ .levelCount = 1,
+ },
+ };
+ ff_vk_exec_update_frame(&ctx->s, exec, pic,
+ &img_bar[nb_img_bar], &nb_img_bar);
+
+ /* Reference for the current image, if existing and not layered */
+ if (vp->dpb_frame) {
+ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ if (err < 0)
+ return err;
+ }
+
+ if (!layered_dpb) {
+ /* All references (apart from the current) for non-layered refs */
+
+ for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+ AVFrame *ref_frame = rpic[i];
+ FFVulkanDecodePicture *rvp = rvkp[i];
+ AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame;
+
+ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ if (err < 0)
+ return err;
+
+ if (err == 0) {
+ err = ff_vk_exec_mirror_sem_value(&ctx->s, exec,
+ &rvp->sem, &rvp->sem_value,
+ ref);
+ if (err < 0)
+ return err;
+ }
+
+ if (!rvp->dpb_frame) {
+ AVVkFrame *rvkf = (AVVkFrame *)ref->data;
+
+ img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ .srcAccessMask = rvkf->access[0],
+ .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
+ .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR |
+ VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
+ .oldLayout = rvkf->layout[0],
+ .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR,
+ .srcQueueFamilyIndex = rvkf->queue_family[0],
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = rvkf->img[0],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = rvp->img_aspect_ref,
+ .layerCount = 1,
+ .levelCount = 1,
+ },
+ };
+ ff_vk_exec_update_frame(&ctx->s, exec, ref,
+ &img_bar[nb_img_bar], &nb_img_bar);
+ }
+ }
+ } else if (vp->decode_info.referenceSlotCount ||
+ vp->img_view_out != vp->img_view_ref) {
+ /* Single barrier for a single layered ref */
+ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->layered_frame,
+ VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ if (err < 0)
+ return err;
+ }
+
+ /* Change image layout */
+ vk->CmdPipelineBarrier2KHR(cmd_buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ /* Start, use parameters, decode and end decoding */
+ vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start);
+
+ /* Start status query TODO: remove check when radv gets support */
+ if (ctx->exec_pool.nb_queries)
+ vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0);
+
+ vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info);
+
+ /* End status query */
+ if (ctx->exec_pool.nb_queries)
+ vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0);
+
+ vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end);
+
+ /* End recording and submit for execution */
+ return ff_vk_exec_submit(&ctx->s, exec);
+}
+
+void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp)
+{
+ FFVulkanFunctions *vk;
+ VkSemaphoreWaitInfo sem_wait;
+
+ // TODO: investigate why this happens
+ if (!ctx) {
+ av_freep(&vp->slices);
+ av_freep(&vp->slice_off);
+ av_frame_free(&vp->dpb_frame);
+ return;
+ }
+
+ vk = &ctx->s.vkfn;
+
+ /* We do not have to lock the frame here because we're not interested
+ * in the actual current semaphore value, but only that it's later than
+ * the time we submitted the image for decoding. */
+ sem_wait = (VkSemaphoreWaitInfo) {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+ .pSemaphores = &vp->sem,
+ .pValues = &vp->sem_value,
+ .semaphoreCount = 1,
+ };
+
+ if (vp->sem)
+ vk->WaitSemaphores(ctx->s.hwctx->act_dev, &sem_wait, UINT64_MAX);
+
+ /* Free slices data
+ * TODO: use a pool in the decode context instead to avoid per-frame allocs. */
+ av_freep(&vp->slices);
+ av_freep(&vp->slice_off);
+
+ /* Destroy image view (out) */
+ if (vp->img_view_out != vp->img_view_ref && vp->img_view_out)
+ vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_out, ctx->s.hwctx->alloc);
+
+ /* Destroy image view (ref, unlayered) */
+ if (vp->dpb_vkf && vp->img_view_ref)
+ vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_ref, ctx->s.hwctx->alloc);
+
+ av_frame_free(&vp->dpb_frame);
+}
+
+/* Since to even get decoder capabilities, we have to initialize quite a lot,
+ * this function does initialization and saves it to hwaccel_priv_data if
+ * available. */
+static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref,
+ int *width_align, int *height_align,
+ enum AVPixelFormat *pix_fmt, int *dpb_dedicate)
+{
+ VkResult ret;
+ int err, max_level, score = INT32_MAX;
+ const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id];
+ AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
+ AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data;
+ AVVulkanDeviceContext *hwctx = device->hwctx;
+ enum AVPixelFormat context_format = frames->sw_format;
+ int context_format_was_found = 0;
+ int base_profile, cur_profile = avctx->profile;
+
+ int dedicated_dpb;
+ int layered_dpb;
+
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanExtensions local_extensions = 0x0;
+ FFVulkanExtensions *extensions = ctx ? &ctx->s.extensions : &local_extensions;
+ FFVulkanFunctions local_vk = { 0 };
+ FFVulkanFunctions *vk = ctx ? &ctx->s.vkfn : &local_vk;
+ VkVideoCapabilitiesKHR local_caps = { 0 };
+ VkVideoCapabilitiesKHR *caps = ctx ? &ctx->common.caps : &local_caps;
+ VkVideoDecodeCapabilitiesKHR local_dec_caps = { 0 };
+ VkVideoDecodeCapabilitiesKHR *dec_caps = ctx ? &ctx->dec_caps : &local_dec_caps;
+ VkVideoDecodeUsageInfoKHR local_usage = { 0 };
+ VkVideoDecodeUsageInfoKHR *usage = ctx ? &ctx->usage : &local_usage;
+ VkVideoProfileInfoKHR local_profile = { 0 };
+ VkVideoProfileInfoKHR *profile = ctx ? &ctx->profile : &local_profile;
+ VkVideoProfileListInfoKHR local_profile_list = { 0 };
+ VkVideoProfileListInfoKHR *profile_list = ctx ? &ctx->profile_list : &local_profile_list;
+
+ VkVideoDecodeH264ProfileInfoKHR local_h264_profile = { 0 };
+ VkVideoDecodeH264ProfileInfoKHR *h264_profile = ctx ? &ctx->h264_profile : &local_h264_profile;
+
+ VkVideoDecodeH264ProfileInfoKHR local_h265_profile = { 0 };
+ VkVideoDecodeH264ProfileInfoKHR *h265_profile = ctx ? &ctx->h265_profile : &local_h265_profile;
+
+ VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
+ .pNext = profile_list,
+ };
+ VkVideoDecodeH264CapabilitiesKHR h264_caps = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR,
+ };
+ VkVideoDecodeH265CapabilitiesKHR h265_caps = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR,
+ };
+ VkVideoFormatPropertiesKHR *ret_info;
+ uint32_t nb_out_fmts = 0;
+
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+ if (!desc)
+ return AVERROR(EINVAL);
+
+ if (ctx && ctx->init)
+ return 0;
+
+ if (!vk_codec->decode_op)
+ return AVERROR(EINVAL);
+
+ *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions,
+ hwctx->nb_enabled_dev_extensions);
+
+ if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
+ av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
+ VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
+ return AVERROR(ENOSYS);
+ } else if (!vk_codec->decode_extension) {
+ av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n",
+ avcodec_get_name(avctx->codec_id));
+ return AVERROR(ENOSYS);
+ } else if (!(vk_codec->decode_extension & *extensions)) {
+ av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n",
+ avcodec_get_name(avctx->codec_id));
+ return AVERROR(ENOSYS);
+ }
+
+ err = ff_vk_load_functions(device, vk, *extensions, 1, 1);
+ if (err < 0)
+ return err;
+
+repeat:
+ if (avctx->codec_id == AV_CODEC_ID_H264) {
+ base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;
+ dec_caps->pNext = &h264_caps;
+ usage->pNext = h264_profile;
+ h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR;
+ h264_profile->stdProfileIdc = cur_profile;
+ h264_profile->pictureLayout = avctx->field_order == AV_FIELD_PROGRESSIVE ?
+ VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR :
+ VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR;
+ } else if (avctx->codec_id == AV_CODEC_ID_H265) {
+ base_profile = FF_PROFILE_HEVC_MAIN;
+ dec_caps->pNext = &h265_caps;
+ usage->pNext = h265_profile;
+ h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
+ h265_profile->stdProfileIdc = cur_profile;
+ }
+
+ usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR;
+ usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR;
+
+ profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
+ /* NOTE: NVIDIA's implementation fails if the USAGE hint is inserted.
+ * Remove this once it's fixed. */
+ profile->pNext = usage->pNext;
+ profile->videoCodecOperation = vk_codec->decode_op;
+ profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc);
+ profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth);
+ profile->chromaBitDepth = profile->lumaBitDepth;
+
+ profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
+ profile_list->profileCount = 1;
+ profile_list->pProfiles = profile;
+
+ /* Get the capabilities of the decoder for the given profile */
+ caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
+ caps->pNext = dec_caps;
+ dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR;
+ /* dec_caps->pNext already filled in */
+
+ ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile,
+ caps);
+ if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR &&
+ avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH &&
+ cur_profile != base_profile) {
+ cur_profile = base_profile;
+ av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting "
+ "again with profile %s\n",
+ avcodec_get_name(avctx->codec_id),
+ avcodec_profile_name(avctx->codec_id, avctx->profile),
+ avcodec_profile_name(avctx->codec_id, base_profile));
+ goto repeat;
+ } else if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) {
+ av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: "
+ "%s profile \"%s\" not supported!\n",
+ avcodec_get_name(avctx->codec_id),
+ avcodec_profile_name(avctx->codec_id, cur_profile));
+ return AVERROR(EINVAL);
+ } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) {
+ av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: "
+ "format (%s) not supported!\n",
+ av_get_pix_fmt_name(avctx->sw_pix_fmt));
+ return AVERROR(EINVAL);
+ } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT ||
+ ret == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+ return AVERROR(EINVAL);
+ } else if (ret != VK_SUCCESS) {
+ return AVERROR_EXTERNAL;
+ }
+
+ max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc :
+ avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc :
+ 0;
+
+ if (ctx) {
+ av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n",
+ avcodec_get_name(avctx->codec_id),
+ avcodec_profile_name(avctx->codec_id, avctx->profile));
+ av_log(avctx, AV_LOG_VERBOSE, " Maximum level: %i\n",
+ max_level);
+ av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n",
+ caps->minCodedExtent.width, caps->maxCodedExtent.width);
+ av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n",
+ caps->minCodedExtent.height, caps->maxCodedExtent.height);
+ av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n",
+ caps->pictureAccessGranularity.width);
+ av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n",
+ caps->pictureAccessGranularity.height);
+ av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n",
+ caps->minBitstreamBufferOffsetAlignment);
+ av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n",
+ caps->minBitstreamBufferSizeAlignment);
+ av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n",
+ caps->maxDpbSlots);
+ av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n",
+ caps->maxActiveReferencePictures);
+ av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n",
+ CODEC_VER(caps->stdHeaderVersion.specVersion),
+ CODEC_VER(dec_ext[avctx->codec_id]->specVersion));
+ av_log(avctx, AV_LOG_VERBOSE, " Decode modes:%s%s%s\n",
+ dec_caps->flags ? "" :
+ " invalid",
+ dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ?
+ " reuse_dst_dpb" : "",
+ dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ?
+ " dedicated_dpb" : "");
+ av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n",
+ caps->flags ? "" :
+ " none",
+ caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ?
+ " protected" : "",
+ caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ?
+ " separate_references" : "");
+ }
+
+ /* Check if decoding is possible with the given parameters */
+ if (avctx->coded_width < caps->minCodedExtent.width ||
+ avctx->coded_height < caps->minCodedExtent.height ||
+ avctx->coded_width > caps->maxCodedExtent.width ||
+ avctx->coded_height > caps->maxCodedExtent.height)
+ return AVERROR(EINVAL);
+
+ if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) &&
+ avctx->level > max_level)
+ return AVERROR(EINVAL);
+
+ /* Some basic sanity checking */
+ if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR |
+ VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) {
+ av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither "
+ "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor "
+ "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n");
+ return AVERROR_EXTERNAL;
+ } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR |
+ VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) ==
+ VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) &&
+ !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) {
+ av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: "
+ "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set "
+ "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ /* TODO: make dedicated_dpb tunable */
+ dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR);
+ layered_dpb = !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
+
+ if (dedicated_dpb) {
+ fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+ } else {
+ fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
+ VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT;
+ }
+
+ /* Get the format of the images necessary */
+ ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev,
+ &fmt_info,
+ &nb_out_fmts, NULL);
+ if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
+ (!nb_out_fmts && ret == VK_SUCCESS)) {
+ return AVERROR(EINVAL);
+ } else if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts);
+ if (!ret_info)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < nb_out_fmts; i++)
+ ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR;
+
+ ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev,
+ &fmt_info,
+ &nb_out_fmts, ret_info);
+ if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
+ (!nb_out_fmts && ret == VK_SUCCESS)) {
+ av_free(ret_info);
+ return AVERROR(EINVAL);
+ } else if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
+ ff_vk_ret2str(ret));
+ av_free(ret_info);
+ return AVERROR_EXTERNAL;
+ }
+
+ if (ctx) {
+ ctx->dedicated_dpb = dedicated_dpb;
+ ctx->layered_dpb = layered_dpb;
+ ctx->init = 1;
+ }
+
+ *pix_fmt = AV_PIX_FMT_NONE;
+
+ av_log(avctx, AV_LOG_DEBUG, "Pixel format list for decoding:\n");
+ for (int i = 0; i < nb_out_fmts; i++) {
+ int tmp_score;
+ enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format,
+ &tmp_score);
+ const AVPixFmtDescriptor *tmp_desc = av_pix_fmt_desc_get(tmp);
+ if (tmp == AV_PIX_FMT_NONE || !tmp_desc)
+ continue;
+
+ av_log(avctx, AV_LOG_DEBUG, " %i - %s (%i), score %i\n", i,
+ av_get_pix_fmt_name(tmp), ret_info[i].format, tmp_score);
+
+ if (context_format == tmp || tmp_score < score) {
+ if (ctx)
+ ctx->pic_format = ret_info[i].format;
+ *pix_fmt = tmp;
+ context_format_was_found |= context_format == tmp;
+ if (context_format_was_found)
+ break;
+ }
+ }
+
+ if (*pix_fmt == AV_PIX_FMT_NONE) {
+ av_log(avctx, AV_LOG_ERROR, "No valid pixel format for decoding!\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (width_align)
+ *width_align = caps->pictureAccessGranularity.width;
+ if (height_align)
+ *height_align = caps->pictureAccessGranularity.height;
+ if (dpb_dedicate)
+ *dpb_dedicate = dedicated_dpb;
+
+ av_free(ret_info);
+
+ av_log(avctx, AV_LOG_VERBOSE, "Chosen frames format: %s\n",
+ av_get_pix_fmt_name(*pix_fmt));
+
+ if (context_format != AV_PIX_FMT_NONE && !context_format_was_found) {
+ av_log(avctx, AV_LOG_ERROR, "Frames context had a pixel format set which "
+ "was not available for decoding into!\n");
+ return AVERROR(EINVAL);
+ }
+
+ return *pix_fmt == AV_PIX_FMT_NONE ? AVERROR(EINVAL) : 0;
+}
+
+int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
+{
+ int err, width_align, height_align, dedicated_dpb;
+ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
+ AVVulkanFramesContext *hwfc = frames_ctx->hwctx;
+
+ err = vulkan_decode_check_init(avctx, hw_frames_ctx, &width_align, &height_align,
+ &frames_ctx->sw_format, &dedicated_dpb);
+ if (err < 0)
+ return err;
+
+ frames_ctx->width = FFALIGN(avctx->coded_width, width_align);
+ frames_ctx->height = FFALIGN(avctx->coded_height, height_align);
+ frames_ctx->format = AV_PIX_FMT_VULKAN;
+
+ hwfc->tiling = VK_IMAGE_TILING_OPTIMAL;
+ hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
+
+ if (!dedicated_dpb)
+ hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+
+ return err;
+}
+
+void ff_vk_decode_free_params(void *opaque, uint8_t *data)
+{
+ FFVulkanDecodeContext *ctx = opaque;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+ VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data;
+ vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par,
+ ctx->s.hwctx->alloc);
+ av_free(par);
+}
+
+int ff_vk_decode_uninit(AVCodecContext *avctx)
+{
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanContext *s = &ctx->s;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ /* Wait on and free execution pool */
+ ff_vk_exec_pool_free(s, &ctx->exec_pool);
+
+ /* Destroy layered view */
+ if (ctx->layered_view)
+ vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc);
+
+ /* This also frees all references from this pool */
+ av_frame_free(&ctx->layered_frame);
+ av_buffer_unref(&ctx->dpb_hwfc_ref);
+
+ /* Destroy parameters */
+ if (ctx->empty_session_params)
+ vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
+ ctx->empty_session_params,
+ s->hwctx->alloc);
+
+ ff_vk_video_common_uninit(s, &ctx->common);
+
+ vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler,
+ s->hwctx->alloc);
+
+ av_buffer_pool_uninit(&ctx->tmp_pool);
+
+ ff_vk_uninit(s);
+
+ return 0;
+}
+
+int ff_vk_decode_init(AVCodecContext *avctx)
+{
+ int err, qf, cxpos = 0, cypos = 0, nb_q = 0;
+ VkResult ret;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanContext *s = &ctx->s;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ };
+ VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ };
+ VkVideoSessionParametersCreateInfoKHR session_params_create = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params :
+ avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params :
+ NULL,
+ };
+ VkVideoSessionCreateInfoKHR session_create = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR,
+ };
+ VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO,
+ .components = ff_comp_identity_map,
+ .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY,
+ .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */
+ };
+
+ err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN);
+ if (err < 0)
+ return err;
+
+ s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
+ s->frames = (AVHWFramesContext *)s->frames_ref->data;
+ s->hwfc = s->frames->hwctx;
+
+ s->device_ref = av_buffer_ref(s->frames->device_ref);
+ s->device = (AVHWDeviceContext *)s->device_ref->data;
+ s->hwctx = s->device->hwctx;
+
+ /* Get parameters, capabilities and final pixel/vulkan format */
+ err = vulkan_decode_check_init(avctx, s->frames_ref, NULL, NULL,
+ &ctx->sw_format, NULL);
+ if (err < 0)
+ goto fail;
+
+ /* Load all properties */
+ err = ff_vk_load_props(s);
+ if (err < 0)
+ goto fail;
+
+ /* Create queue context */
+ qf = ff_vk_qf_init(s, &ctx->qf_dec, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+
+ /* Check for support */
+ if (!(s->video_props[qf].videoCodecOperations &
+ ff_vk_codec_map[avctx->codec_id].decode_op)) {
+ av_log(avctx, AV_LOG_ERROR, "Decoding %s not supported on the given "
+ "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf);
+ return AVERROR(EINVAL);
+ }
+
+ /* TODO: enable when stable and tested. */
+ if (s->query_props[qf].queryResultStatusSupport)
+ nb_q = 1;
+
+ /* Create decode exec context.
+ * 4 async contexts per thread seems like a good number. */
+ err = ff_vk_exec_pool_init(s, &ctx->qf_dec, &ctx->exec_pool, 4*avctx->thread_count,
+ nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0,
+ &ctx->profile);
+ if (err < 0)
+ goto fail;
+
+ session_create.pVideoProfile = &ctx->profile;
+ session_create.flags = 0x0;
+ session_create.queueFamilyIndex = s->hwctx->queue_family_decode_index;
+ session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent;
+ session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots;
+ session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures;
+ session_create.pictureFormat = ctx->pic_format;
+ session_create.referencePictureFormat = session_create.pictureFormat;
+ session_create.pStdHeaderVersion = dec_ext[avctx->codec_id];
+
+ err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
+ if (err < 0)
+ goto fail;
+
+ /* Get sampler */
+ av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location);
+ yuv_sampler_info.xChromaOffset = cxpos >> 7;
+ yuv_sampler_info.yChromaOffset = cypos >> 7;
+ yuv_sampler_info.format = ctx->pic_format;
+ ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info,
+ s->hwctx->alloc, &ctx->yuv_sampler);
+ if (ret != VK_SUCCESS) {
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ /* If doing an out-of-place decoding, create a DPB pool */
+ if (ctx->dedicated_dpb) {
+ AVHWFramesContext *dpb_frames;
+ AVVulkanFramesContext *dpb_hwfc;
+
+ ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->device_ref);
+ if (!ctx->dpb_hwfc_ref) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
+ dpb_frames->format = s->frames->format;
+ dpb_frames->sw_format = s->frames->sw_format;
+ dpb_frames->width = s->frames->width;
+ dpb_frames->height = s->frames->height;
+
+ dpb_hwfc = dpb_frames->hwctx;
+ dpb_hwfc->create_pnext = &ctx->profile_list;
+ dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL;
+ dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
+ VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */
+
+ if (ctx->layered_dpb)
+ dpb_hwfc->nb_layers = ctx->common.caps.maxDpbSlots;
+
+ err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref);
+ if (err < 0)
+ goto fail;
+
+ if (ctx->layered_dpb) {
+ ctx->layered_frame = vk_get_dpb_pool(ctx);
+ if (!ctx->layered_frame) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ err = vk_decode_create_view(ctx, &ctx->layered_view, &ctx->layered_aspect,
+ (AVVkFrame *)ctx->layered_frame->data);
+ if (err < 0)
+ goto fail;
+ }
+ }
+
+ session_params_create.videoSession = ctx->common.session;
+ ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
+ s->hwctx->alloc, &ctx->empty_session_params);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_decode_flush(avctx);
+
+ av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n");
+
+ return 0;
+
+fail:
+ ff_vk_decode_uninit(avctx);
+
+ return err;
+}
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
new file mode 100644
index 0000000000..9f9676bbfa
--- /dev/null
+++ b/libavcodec/vulkan_decode.h
@@ -0,0 +1,163 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_DECODE_H
+#define AVCODEC_VULKAN_DECODE_H
+
+#include "decode.h"
+#include "hwconfig.h"
+#include "internal.h"
+
+#include "vulkan_video.h"
+
+typedef struct FFVulkanDecodeContext {
+ FFVulkanContext s;
+ FFVkVideoCommon common;
+
+ int dedicated_dpb; /* Oddity #1 - separate DPB images */
+ int layered_dpb; /* Madness #1 - layered DPB images */
+
+ AVBufferRef *dpb_hwfc_ref; /* Only used for dedicated_dpb */
+
+ AVFrame *layered_frame; /* Only used for layered_dpb */
+ VkImageView layered_view;
+ VkImageAspectFlags layered_aspect;
+
+ VkVideoDecodeH264ProfileInfoKHR h264_profile;
+ VkVideoDecodeH264ProfileInfoKHR h265_profile;
+ VkVideoSessionParametersKHR empty_session_params;
+
+ VkSamplerYcbcrConversion yuv_sampler;
+ VkVideoDecodeUsageInfoKHR usage;
+ VkVideoProfileInfoKHR profile;
+ VkVideoDecodeCapabilitiesKHR dec_caps;
+ VkVideoProfileListInfoKHR profile_list;
+ VkFormat pic_format;
+ enum AVPixelFormat sw_format;
+ int init;
+
+ AVBufferRef *session_params;
+
+ FFVkQueueFamilyCtx qf_dec;
+ FFVkExecPool exec_pool;
+
+ AVBufferPool *tmp_pool; /* Pool for temporary data, if needed (HEVC) */
+ size_t tmp_pool_ele_size;
+
+ uint16_t last_ref_frames_in_use;
+} FFVulkanDecodeContext;
+
+typedef struct FFVulkanDecodePicture {
+ AVFrame *dpb_frame; /* Only used for out-of-place decoding. */
+ AVVkFrame *dpb_vkf; /* Only used for out-of-place decoding. */
+
+ VkImageView img_view_ref; /* Image representation view (reference) */
+ VkImageView img_view_out; /* Image representation view (output-only) */
+ VkImageAspectFlags img_aspect; /* Image plane mask bits */
+ VkImageAspectFlags img_aspect_ref; /* Only used for out-of-place decoding */
+
+ VkSemaphore sem;
+ uint64_t sem_value;
+
+ /* State */
+ int update_params;
+ AVBufferRef *session_params;
+
+ /* Current picture */
+ VkVideoPictureResourceInfoKHR ref;
+ VkVideoReferenceSlotInfoKHR ref_slot;
+
+ /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */
+ VkVideoPictureResourceInfoKHR refs [36];
+ VkVideoReferenceSlotInfoKHR ref_slots[36];
+
+ /* Main decoding struct */
+ AVBufferRef *params_buf;
+ VkVideoDecodeInfoKHR decode_info;
+
+ /* Slice data */
+ uint8_t *slices;
+ size_t slices_size;
+ unsigned int slices_size_max;
+ uint32_t *slice_off;
+ unsigned int slice_off_max;
+ uint32_t nb_slices;
+} FFVulkanDecodePicture;
+
+/**
+ * Initialize decoder.
+ */
+int ff_vk_decode_init(AVCodecContext *avctx);
+
+/**
+ * Initialize hw_frames_ctx with the parameters needed to decode the stream
+ * using the parameters from avctx.
+ *
+ * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize
+ * the context.
+ */
+int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
+
+/**
+ * Prepare a frame, creates the image view, and sets up the dpb fields.
+ */
+int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic,
+ FFVulkanDecodePicture *vkpic, int is_current,
+ int alloc_dpb);
+
+/**
+ * Add slice data to frame.
+ */
+int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp,
+ const uint8_t *data, size_t size, int add_startcode,
+ uint32_t *nb_slices, const uint32_t **offsets);
+
+/**
+ * Decode a frame.
+ */
+int ff_vk_decode_frame(AVCodecContext *avctx,
+ AVFrame *pic, FFVulkanDecodePicture *vp,
+ AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]);
+
+/**
+ * Free a frame and its state.
+ */
+void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp);
+
+/**
+ * Get an FFVkBuffer suitable for decoding from.
+ */
+int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf,
+ void *create_pNext, size_t size);
+
+/**
+ * Free VkVideoSessionParametersKHR.
+ */
+void ff_vk_decode_free_params(void *opaque, uint8_t *data);
+
+/**
+ * Flush decoder.
+ */
+void ff_vk_decode_flush(AVCodecContext *avctx);
+
+/**
+ * Free decoder.
+ */
+int ff_vk_decode_uninit(AVCodecContext *avctx);
+
+#endif /* AVCODEC_VULKAN_DECODE_H */
--
2.39.2
[-- Attachment #70: 0069-h264dec-add-hwaccel_params_buf.patch --]
[-- Type: text/x-diff, Size: 2737 bytes --]
From e26c514b35f5c87321a8fa6c6eb70b54220a92ed Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 17 Jan 2023 05:01:45 +0100
Subject: [PATCH 69/72] h264dec: add hwaccel_params_buf
---
libavcodec/h264_slice.c | 4 ++++
libavcodec/h264dec.c | 4 ++++
libavcodec/h264dec.h | 2 ++
3 files changed, 10 insertions(+)
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index c0aa31bcd9..0c7f80c018 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -347,6 +347,10 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
return ret;
}
+ ret = av_buffer_replace(&h->hwaccel_params_buf, h1->hwaccel_params_buf);
+ if (ret < 0)
+ return ret;
+
ret = av_buffer_replace(&h->ps.pps_ref, h1->ps.pps_ref);
if (ret < 0)
return ret;
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 995bf17a8f..f6059da950 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -341,6 +341,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
H264Context *h = avctx->priv_data;
int i;
+ av_buffer_unref(&h->hwaccel_params_buf);
ff_h264_remove_all_refs(h);
ff_h264_free_tables(h);
@@ -470,6 +471,7 @@ static void h264_decode_flush(AVCodecContext *avctx)
ff_h264_flush_change(h);
ff_h264_sei_uninit(&h->sei);
+ av_buffer_unref(&h->hwaccel_params_buf);
for (i = 0; i < H264_MAX_PICTURE_COUNT; i++)
ff_h264_unref_picture(h, &h->DPB[i]);
@@ -669,6 +671,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
avpriv_request_sample(avctx, "data partitioning");
break;
case H264_NAL_SEI:
+ av_buffer_unref(&h->hwaccel_params_buf);
if (h->setup_finished) {
avpriv_request_sample(avctx, "Late SEI");
break;
@@ -682,6 +685,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
break;
case H264_NAL_SPS: {
GetBitContext tmp_gb = nal->gb;
+ av_buffer_unref(&h->hwaccel_params_buf);
if (avctx->hwaccel && avctx->hwaccel->decode_params) {
ret = avctx->hwaccel->decode_params(avctx,
nal->type,
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 1b18aba71f..5b1620c3f1 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -342,6 +342,8 @@ typedef struct H264Context {
H264Picture cur_pic;
H264Picture last_pic_for_ec;
+ AVBufferRef *hwaccel_params_buf;
+
H264SliceContext *slice_ctx;
int nb_slice_ctx;
int nb_slice_ctx_queued;
--
2.39.2
[-- Attachment #71: 0070-h264dec-add-Vulkan-hwaccel.patch --]
[-- Type: text/x-diff, Size: 27544 bytes --]
From b5ff58808482bedf12b981ee1c03dd95099a9332 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 01:13:01 +0100
Subject: [PATCH 70/72] h264dec: add Vulkan hwaccel
Thanks to Dave Airlie for figuring out a lot of the parameters.
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/h264_slice.c | 12 +-
libavcodec/h264dec.c | 3 +
libavcodec/hwaccels.h | 1 +
libavcodec/vulkan_h264.c | 521 +++++++++++++++++++++++++++++++++++++++
6 files changed, 539 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan_h264.c
diff --git a/configure b/configure
index 91f715351c..60973c38b3 100755
--- a/configure
+++ b/configure
@@ -3034,6 +3034,8 @@ h264_vdpau_hwaccel_deps="vdpau"
h264_vdpau_hwaccel_select="h264_decoder"
h264_videotoolbox_hwaccel_deps="videotoolbox"
h264_videotoolbox_hwaccel_select="h264_decoder"
+h264_vulkan_hwaccel_deps="vulkan"
+h264_vulkan_hwaccel_select="h264_decoder"
hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
hevc_d3d11va_hwaccel_select="hevc_decoder"
hevc_d3d11va2_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index eabf4eb43e..4c9db167a5 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -992,6 +992,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o
OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o
OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
+OBJS-$(CONFIG_H264_VULKAN_HWACCEL) += vulkan_decode.o vulkan_h264.o
OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o
OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o
OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 0c7f80c018..50d138e2a9 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -782,7 +782,8 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
CONFIG_H264_NVDEC_HWACCEL + \
CONFIG_H264_VAAPI_HWACCEL + \
CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
- CONFIG_H264_VDPAU_HWACCEL)
+ CONFIG_H264_VDPAU_HWACCEL + \
+ CONFIG_H264_VULKAN_HWACCEL)
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
const enum AVPixelFormat *choices = pix_fmts;
int i;
@@ -803,6 +804,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
#if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
if (h->avctx->colorspace != AVCOL_SPC_RGB)
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_H264_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
if (CHROMA444(h)) {
if (h->avctx->colorspace == AVCOL_SPC_RGB) {
@@ -815,6 +819,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
*fmt++ = AV_PIX_FMT_YUV420P10;
break;
case 12:
+#if CONFIG_H264_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
if (CHROMA444(h)) {
if (h->avctx->colorspace == AVCOL_SPC_RGB) {
*fmt++ = AV_PIX_FMT_GBRP12;
@@ -840,6 +847,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
#if CONFIG_H264_VDPAU_HWACCEL
*fmt++ = AV_PIX_FMT_VDPAU;
#endif
+#if CONFIG_H264_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
#if CONFIG_H264_NVDEC_HWACCEL
*fmt++ = AV_PIX_FMT_CUDA;
#endif
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index f6059da950..15a6e74829 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -1100,6 +1100,9 @@ const FFCodec ff_h264_decoder = {
#endif
#if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
HWACCEL_VIDEOTOOLBOX(h264),
+#endif
+#if CONFIG_H264_VULKAN_HWACCEL
+ HWACCEL_VULKAN(h264),
#endif
NULL
},
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index aca55831f3..23d0843c76 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -36,6 +36,7 @@ extern const AVHWAccel ff_h264_nvdec_hwaccel;
extern const AVHWAccel ff_h264_vaapi_hwaccel;
extern const AVHWAccel ff_h264_vdpau_hwaccel;
extern const AVHWAccel ff_h264_videotoolbox_hwaccel;
+extern const AVHWAccel ff_h264_vulkan_hwaccel;
extern const AVHWAccel ff_hevc_d3d11va_hwaccel;
extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
extern const AVHWAccel ff_hevc_dxva2_hwaccel;
diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c
new file mode 100644
index 0000000000..241a7d8f5b
--- /dev/null
+++ b/libavcodec/vulkan_h264.c
@@ -0,0 +1,521 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264dec.h"
+#include "h264_ps.h"
+
+#include "vulkan_decode.h"
+
+const VkExtensionProperties ff_vk_dec_h264_ext = {
+ .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME,
+ .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION,
+};
+
+typedef struct H264VulkanDecodePicture {
+ FFVulkanDecodeContext *ctx;
+ FFVulkanDecodePicture vp;
+
+ /* Current picture */
+ StdVideoDecodeH264ReferenceInfo h264_ref;
+ VkVideoDecodeH264DpbSlotInfoKHR vkh264_ref;
+
+ /* Picture refs */
+ H264Picture *ref_src [H264_MAX_PICTURE_COUNT];
+ StdVideoDecodeH264ReferenceInfo h264_refs [H264_MAX_PICTURE_COUNT];
+ VkVideoDecodeH264DpbSlotInfoKHR vkh264_refs[H264_MAX_PICTURE_COUNT];
+
+ /* Current picture (contd.) */
+ StdVideoDecodeH264PictureInfo h264pic;
+ VkVideoDecodeH264PictureInfoKHR h264_pic_info;
+} H264VulkanDecodePicture;
+
+static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
+ VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */
+ VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */
+ VkVideoDecodeH264DpbSlotInfoKHR *vkh264_ref, /* Goes in ^ */
+ StdVideoDecodeH264ReferenceInfo *h264_ref, /* Goes in ^ */
+ H264Picture *pic, int is_current, int picture_structure,
+ int dpb_slot_index)
+{
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vkpic = &hp->vp;
+
+ int err = ff_vk_decode_prepare_frame(ctx, pic->f, vkpic, is_current,
+ ctx->dedicated_dpb);
+ if (err < 0)
+ return err;
+
+ *h264_ref = (StdVideoDecodeH264ReferenceInfo) {
+ .FrameNum = pic->long_ref ? pic->pic_id : pic->frame_num, /* TODO: kinda sure */
+ .PicOrderCnt = { pic->field_poc[0], pic->field_poc[1] },
+ .flags = (StdVideoDecodeH264ReferenceInfoFlags) {
+ .top_field_flag = !!(picture_structure & PICT_TOP_FIELD),
+ .bottom_field_flag = !!(picture_structure & PICT_BOTTOM_FIELD),
+ .used_for_long_term_reference = pic->reference && pic->long_ref,
+ .is_non_existing = 0,
+ },
+ };
+
+ *vkh264_ref = (VkVideoDecodeH264DpbSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR,
+ .pStdReferenceInfo = h264_ref,
+ };
+
+ *ref = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
+ .baseArrayLayer = ctx->layered_dpb ? dpb_slot_index : 0,
+ .imageViewBinding = vkpic->img_view_ref,
+ };
+
+ *ref_slot = (VkVideoReferenceSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+ .pNext = vkh264_ref,
+ .slotIndex = dpb_slot_index, /* TODO: kinda sure */
+ .pPictureResource = ref,
+ };
+
+ if (ref_src)
+ *ref_src = pic;
+
+ return 0;
+}
+
+static void set_sps(const SPS *sps,
+ StdVideoH264ScalingLists *vksps_scaling,
+ StdVideoH264HrdParameters *vksps_vui_header,
+ StdVideoH264SequenceParameterSetVui *vksps_vui,
+ StdVideoH264SequenceParameterSet *vksps)
+{
+ *vksps_scaling = (StdVideoH264ScalingLists) {
+ .scaling_list_present_mask = sps->scaling_matrix_present_mask,
+ .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */
+ };
+
+ for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_matrix4[i],
+ STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**sps->scaling_matrix4));
+
+ for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_matrix8[i],
+ STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**sps->scaling_matrix8));
+
+ *vksps_vui_header = (StdVideoH264HrdParameters) {
+ .cpb_cnt_minus1 = sps->cpb_cnt - 1,
+ .bit_rate_scale = sps->bit_rate_scale,
+ .initial_cpb_removal_delay_length_minus1 = sps->initial_cpb_removal_delay_length - 1,
+ .cpb_removal_delay_length_minus1 = sps->cpb_removal_delay_length - 1,
+ .dpb_output_delay_length_minus1 = sps->dpb_output_delay_length - 1,
+ .time_offset_length = sps->time_offset_length,
+ };
+
+ for (int i = 0; i < sps->cpb_cnt; i++) {
+ vksps_vui_header->bit_rate_value_minus1[i] = sps->bit_rate_value[i] - 1;
+ vksps_vui_header->cpb_size_value_minus1[i] = sps->cpb_size_value[i] - 1;
+ vksps_vui_header->cbr_flag[i] = sps->cpr_flag[i];
+ }
+
+ *vksps_vui = (StdVideoH264SequenceParameterSetVui) {
+ .aspect_ratio_idc = sps->vui.aspect_ratio_idc,
+ .sar_width = sps->vui.sar.num,
+ .sar_height = sps->vui.sar.den,
+ .video_format = sps->vui.video_format,
+ .colour_primaries = sps->vui.colour_primaries,
+ .transfer_characteristics = sps->vui.transfer_characteristics,
+ .matrix_coefficients = sps->vui.matrix_coeffs,
+ .num_units_in_tick = sps->num_units_in_tick,
+ .time_scale = sps->time_scale,
+ .pHrdParameters = vksps_vui_header,
+ .max_num_reorder_frames = sps->num_reorder_frames,
+ .max_dec_frame_buffering = sps->max_dec_frame_buffering,
+ .flags = (StdVideoH264SpsVuiFlags) {
+ .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag,
+ .overscan_info_present_flag = sps->vui.overscan_info_present_flag,
+ .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag,
+ .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag,
+ .video_full_range_flag = sps->vui.video_full_range_flag,
+ .color_description_present_flag = sps->vui.colour_description_present_flag,
+ .chroma_loc_info_present_flag = sps->vui.chroma_location,
+ .timing_info_present_flag = sps->timing_info_present_flag,
+ .fixed_frame_rate_flag = sps->fixed_frame_rate_flag,
+ .bitstream_restriction_flag = sps->bitstream_restriction_flag,
+ .nal_hrd_parameters_present_flag = sps->nal_hrd_parameters_present_flag,
+ .vcl_hrd_parameters_present_flag = sps->vcl_hrd_parameters_present_flag,
+ },
+ };
+
+ *vksps = (StdVideoH264SequenceParameterSet) {
+ .profile_idc = sps->profile_idc,
+ .level_idc = sps->level_idc,
+ .seq_parameter_set_id = sps->sps_id,
+ .chroma_format_idc = sps->chroma_format_idc,
+ .bit_depth_luma_minus8 = sps->bit_depth_luma - 8,
+ .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8,
+ .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4,
+ .pic_order_cnt_type = sps->poc_type,
+ .log2_max_pic_order_cnt_lsb_minus4 = sps->poc_type ? 0 : sps->log2_max_poc_lsb - 4,
+ .offset_for_non_ref_pic = sps->offset_for_non_ref_pic,
+ .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field,
+ .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length,
+ .max_num_ref_frames = sps->ref_frame_count,
+ .pic_width_in_mbs_minus1 = sps->mb_width - 1,
+ .pic_height_in_map_units_minus1 = (sps->mb_height/(2 - sps->frame_mbs_only_flag)) - 1,
+ .frame_crop_left_offset = sps->crop_left,
+ .frame_crop_right_offset = sps->crop_right,
+ .frame_crop_top_offset = sps->crop_top,
+ .frame_crop_bottom_offset = sps->crop_bottom,
+ .flags = (StdVideoH264SpsFlags) {
+ .constraint_set0_flag = (sps->constraint_set_flags >> 0) & 0x1,
+ .constraint_set1_flag = (sps->constraint_set_flags >> 1) & 0x1,
+ .constraint_set2_flag = (sps->constraint_set_flags >> 2) & 0x1,
+ .constraint_set3_flag = (sps->constraint_set_flags >> 3) & 0x1,
+ .constraint_set4_flag = (sps->constraint_set_flags >> 4) & 0x1,
+ .constraint_set5_flag = (sps->constraint_set_flags >> 5) & 0x1,
+ .direct_8x8_inference_flag = sps->direct_8x8_inference_flag,
+ .mb_adaptive_frame_field_flag = sps->mb_aff,
+ .frame_mbs_only_flag = sps->frame_mbs_only_flag,
+ .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag,
+ .separate_colour_plane_flag = sps->residual_color_transform_flag,
+ .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag,
+ .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass,
+ .frame_cropping_flag = sps->crop,
+ .seq_scaling_matrix_present_flag = sps->scaling_matrix_present,
+ .vui_parameters_present_flag = sps->vui_parameters_present_flag,
+ },
+ .pOffsetForRefFrame = sps->offset_for_ref_frame,
+ .pScalingLists = vksps_scaling,
+ .pSequenceParameterSetVui = vksps_vui,
+ };
+}
+
+static void set_pps(const PPS *pps, const SPS *sps,
+ StdVideoH264ScalingLists *vkpps_scaling,
+ StdVideoH264PictureParameterSet *vkpps)
+{
+ *vkpps_scaling = (StdVideoH264ScalingLists) {
+ .scaling_list_present_mask = pps->pic_scaling_matrix_present_mask,
+ .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */
+ };
+
+ for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_matrix4[i],
+ STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**pps->scaling_matrix4));
+
+ for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_matrix8[i],
+ STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**pps->scaling_matrix8));
+
+ *vkpps = (StdVideoH264PictureParameterSet) {
+ .seq_parameter_set_id = pps->sps_id,
+ .pic_parameter_set_id = pps->pps_id,
+ .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1,
+ .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1,
+ .weighted_bipred_idc = pps->weighted_bipred_idc,
+ .pic_init_qp_minus26 = pps->init_qp - 26,
+ .pic_init_qs_minus26 = pps->init_qs - 26,
+ .chroma_qp_index_offset = pps->chroma_qp_index_offset[0],
+ .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1],
+ .flags = (StdVideoH264PpsFlags) {
+ .transform_8x8_mode_flag = pps->transform_8x8_mode,
+ .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present,
+ .constrained_intra_pred_flag = pps->constrained_intra_pred,
+ .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present,
+ .weighted_pred_flag = pps->weighted_pred,
+ .bottom_field_pic_order_in_frame_present_flag = pps->pic_order_present,
+ .entropy_coding_mode_flag = pps->cabac,
+ .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag,
+ },
+ .pScalingLists = vkpps_scaling,
+ };
+}
+
+static int vk_h264_create_params(AVCodecContext *avctx, AVBufferRef **buf)
+{
+ VkResult ret;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+ const H264Context *h = avctx->priv_data;
+
+ /* SPS */
+ StdVideoH264ScalingLists vksps_scaling[MAX_SPS_COUNT];
+ StdVideoH264HrdParameters vksps_vui_header[MAX_SPS_COUNT];
+ StdVideoH264SequenceParameterSetVui vksps_vui[MAX_SPS_COUNT];
+ StdVideoH264SequenceParameterSet vksps[MAX_SPS_COUNT];
+
+ /* PPS */
+ StdVideoH264ScalingLists vkpps_scaling[MAX_PPS_COUNT];
+ StdVideoH264PictureParameterSet vkpps[MAX_PPS_COUNT];
+
+ VkVideoDecodeH264SessionParametersAddInfoKHR h264_params_info = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR,
+ .pStdSPSs = vksps,
+ .stdSPSCount = 0,
+ .pStdPPSs = vkpps,
+ .stdPPSCount = 0,
+ };
+ VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pParametersAddInfo = &h264_params_info,
+ };
+ VkVideoSessionParametersCreateInfoKHR session_params_create = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pNext = &h264_params,
+ .videoSession = ctx->common.session,
+ .videoSessionParametersTemplate = NULL,
+ };
+
+ AVBufferRef *tmp;
+ VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par));
+ if (!par)
+ return AVERROR(ENOMEM);
+
+ /* SPS list */
+ for (int i = 0; h->ps.sps_list[i]; i++) {
+ const SPS *sps_l = (const SPS *)h->ps.sps_list[i]->data;
+ set_sps(sps_l, &vksps_scaling[i], &vksps_vui_header[i], &vksps_vui[i], &vksps[i]);
+ h264_params_info.stdSPSCount++;
+ }
+
+ /* PPS list */
+ for (int i = 0; h->ps.pps_list[i]; i++) {
+ const PPS *pps_l = (const PPS *)h->ps.pps_list[i]->data;
+ set_pps(pps_l, pps_l->sps, &vkpps_scaling[i], &vkpps[i]);
+ h264_params_info.stdPPSCount++;
+ }
+
+ h264_params.maxStdSPSCount = h264_params_info.stdSPSCount;
+ h264_params.maxStdPPSCount = h264_params_info.stdPPSCount;
+
+ /* Create session parameters */
+ ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create,
+ ctx->s.hwctx->alloc, par);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params,
+ ctx, 0);
+ if (!tmp) {
+ ff_vk_decode_free_params(ctx, (uint8_t *)par);
+ return AVERROR(ENOMEM);
+ }
+
+ av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS\n",
+ h264_params_info.stdSPSCount, h264_params_info.stdPPSCount);
+
+ *buf = tmp;
+
+ return 0;
+}
+
+static int vk_h264_start_frame(AVCodecContext *avctx,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ int dpb_slot_index = 0;
+ H264Context *h = avctx->priv_data;
+ H264Picture *pic = h->cur_pic_ptr;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+
+ if (!h->hwaccel_params_buf) {
+ err = vk_h264_create_params(avctx, &h->hwaccel_params_buf);
+ if (err < 0)
+ return err;
+ }
+
+ vp->session_params = av_buffer_ref(h->hwaccel_params_buf);
+ if (!vp->session_params)
+ return AVERROR(ENOMEM);
+
+ /* Fill in main slot */
+ dpb_slot_index = 0;
+ for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+ if (pic == &h->DPB[slot]) {
+ dpb_slot_index = slot;
+ break;
+ }
+ }
+
+ err = vk_h264_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref,
+ &hp->vkh264_ref, &hp->h264_ref, pic, 1,
+ h->picture_structure, dpb_slot_index);
+ if (err < 0)
+ return err;
+
+ /* Fill in short-term references */
+ for (int i = 0; i < h->short_ref_count; i++) {
+ dpb_slot_index = 0;
+ for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+ if (h->short_ref[i] == &h->DPB[slot]) {
+ dpb_slot_index = slot;
+ break;
+ }
+ }
+ err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i],
+ &vp->refs[i], &hp->vkh264_refs[i],
+ &hp->h264_refs[i], h->short_ref[i], 0,
+ h->DPB[dpb_slot_index].picture_structure,
+ dpb_slot_index);
+ if (err < 0)
+ return err;
+ }
+
+ /* Fill in long-term refs */
+ for (int r = 0, i = h->short_ref_count; i < h->short_ref_count + h->long_ref_count; i++, r++) {
+ dpb_slot_index = 0;
+ for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) {
+ if (h->long_ref[i] == &h->DPB[slot]) {
+ dpb_slot_index = slot;
+ break;
+ }
+ }
+ err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i],
+ &vp->refs[i], &hp->vkh264_refs[i],
+ &hp->h264_refs[i], h->long_ref[r], 0,
+ h->DPB[dpb_slot_index].picture_structure,
+ dpb_slot_index);
+ if (err < 0)
+ return err;
+ }
+
+ hp->h264pic = (StdVideoDecodeH264PictureInfo) {
+ .seq_parameter_set_id = pic->pps->sps_id,
+ .pic_parameter_set_id = pic->pps->pps_id,
+ .frame_num = h->poc.frame_num,
+ .idr_pic_id = h->poc.idr_pic_id,
+ .PicOrderCnt[0] = pic->field_poc[0],
+ .PicOrderCnt[1] = pic->field_poc[1],
+ .flags = (StdVideoDecodeH264PictureInfoFlags) {
+ .field_pic_flag = FIELD_PICTURE(h),
+ .is_intra = 1,
+ .IdrPicFlag = h->picture_idr,
+ .bottom_field_flag = !!(h->picture_structure & PICT_BOTTOM_FIELD),
+ .is_reference = h->nal_ref_idc != 0,
+
+ // TODO: Not sure about this
+ .complementary_field_pair = h->first_field && FIELD_PICTURE(h),
+ },
+ };
+
+ hp->h264_pic_info = (VkVideoDecodeH264PictureInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR,
+ .pStdPictureInfo = &hp->h264pic,
+ .sliceCount = 0,
+ };
+
+ vp->decode_info = (VkVideoDecodeInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR,
+ .pNext = &hp->h264_pic_info,
+ .flags = 0x0,
+ .pSetupReferenceSlot = &vp->ref_slot,
+ .referenceSlotCount = h->short_ref_count + h->long_ref_count,
+ .pReferenceSlots = vp->ref_slots,
+ .dstPictureResource = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
+ .baseArrayLayer = 0,
+ .imageViewBinding = vp->img_view_out,
+ },
+ };
+
+ hp->ctx = ctx;
+
+ return 0;
+}
+
+static int vk_h264_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ const H264Context *h = avctx->priv_data;
+ const H264SliceContext *sl = &h->slice_ctx[0];
+ H264VulkanDecodePicture *hp = h->cur_pic_ptr->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+
+ int err = ff_vk_decode_add_slice(vp, data, size, 1,
+ &hp->h264_pic_info.sliceCount,
+ &hp->h264_pic_info.pSliceOffsets);
+ if (err < 0)
+ return err;
+
+ /* Frame is only intra of all slices are marked as intra */
+ if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI)
+ hp->h264pic.flags.is_intra = 0;
+
+ return 0;
+}
+
+static int vk_h264_end_frame(AVCodecContext *avctx)
+{
+ const H264Context *h = avctx->priv_data;
+ H264Picture *pic = h->cur_pic_ptr;
+ H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+ FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 };
+ AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 };
+
+ for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+ H264Picture *rp = hp->ref_src[i];
+ H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private;
+
+ rvp[i] = &rhp->vp;
+ rav[i] = hp->ref_src[i]->f;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n",
+ vp->slices_size, hp->h264_pic_info.sliceCount);
+
+ return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp);
+}
+
+static void vk_h264_free_frame_priv(AVCodecContext *avctx, void *data)
+{
+ H264VulkanDecodePicture *hp = data;
+
+ /* Free frame resources, this also destroys the session parameters. */
+ ff_vk_decode_free_frame(hp->ctx, &hp->vp);
+
+ /* Free frame context */
+ av_free(hp);
+}
+
+const AVHWAccel ff_h264_vulkan_hwaccel = {
+ .name = "h264_vulkan",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .id = AV_CODEC_ID_H264,
+ .pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_h264_start_frame,
+ .decode_slice = &vk_h264_decode_slice,
+ .end_frame = &vk_h264_end_frame,
+ .free_frame_priv = &vk_h264_free_frame_priv,
+ .frame_priv_data_size = sizeof(H264VulkanDecodePicture),
+ .init = &ff_vk_decode_init,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &ff_vk_decode_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
--
2.39.2
[-- Attachment #72: 0071-hevcdec-add-hwaccel_params_buf.patch --]
[-- Type: text/x-diff, Size: 2828 bytes --]
From 756f3a7daf18f402ec56a7f52ea8742d905edf18 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 17 Jan 2023 05:02:02 +0100
Subject: [PATCH 71/72] hevcdec: add hwaccel_params_buf
---
libavcodec/hevcdec.c | 9 +++++++++
libavcodec/hevcdec.h | 2 ++
2 files changed, 11 insertions(+)
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 7c9b46240c..5df831688c 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2969,6 +2969,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
switch (s->nal_unit_type) {
case HEVC_NAL_VPS:
+ av_buffer_unref(&s->hwaccel_params_buf);
if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
ret = s->avctx->hwaccel->decode_params(s->avctx,
nal->type,
@@ -2982,6 +2983,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
goto fail;
break;
case HEVC_NAL_SPS:
+ av_buffer_unref(&s->hwaccel_params_buf);
if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
ret = s->avctx->hwaccel->decode_params(s->avctx,
nal->type,
@@ -2996,6 +2998,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
goto fail;
break;
case HEVC_NAL_PPS:
+ av_buffer_unref(&s->hwaccel_params_buf);
if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
ret = s->avctx->hwaccel->decode_params(s->avctx,
nal->type,
@@ -3455,6 +3458,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
ff_dovi_ctx_unref(&s->dovi_ctx);
av_buffer_unref(&s->rpu_buf);
+ av_buffer_unref(&s->hwaccel_params_buf);
av_freep(&s->md5_ctx);
@@ -3606,6 +3610,10 @@ static int hevc_update_thread_context(AVCodecContext *dst,
if (ret < 0)
return ret;
+ ret = av_buffer_replace(&s->hwaccel_params_buf, s0->hwaccel_params_buf);
+ if (ret < 0)
+ return ret;
+
ret = av_buffer_replace(&s->rpu_buf, s0->rpu_buf);
if (ret < 0)
return ret;
@@ -3683,6 +3691,7 @@ static void hevc_decode_flush(AVCodecContext *avctx)
s->max_ra = INT_MAX;
s->eos = 1;
+ av_buffer_unref(&s->hwaccel_params_buf);
if (avctx->hwaccel->flush)
avctx->hwaccel->flush(avctx);
}
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 15c4113bdd..774cd95947 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -509,6 +509,8 @@ typedef struct HEVCContext {
uint8_t *sao_pixel_buffer_h[3];
uint8_t *sao_pixel_buffer_v[3];
+ AVBufferRef *hwaccel_params_buf;
+
HEVCParamSets ps;
HEVCSEI sei;
struct AVMD5 *md5_ctx;
--
2.39.2
[-- Attachment #73: 0072-hevcdec-add-Vulkan-hwaccel.patch --]
[-- Type: text/x-diff, Size: 50457 bytes --]
From d47cb5940bc4808fea572b530eb1b9bf11159540 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 08:27:18 +0100
Subject: [PATCH 72/72] hevcdec: add Vulkan hwaccel
Thanks to Dave Airlie for figuring out a lot of the parameters.
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/hevcdec.c | 27 +-
libavcodec/hwaccels.h | 1 +
libavcodec/vulkan_hevc.c | 904 +++++++++++++++++++++++++++++++++++++++
5 files changed, 934 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/vulkan_hevc.c
diff --git a/configure b/configure
index 60973c38b3..8f7b918565 100755
--- a/configure
+++ b/configure
@@ -3050,6 +3050,8 @@ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
hevc_vdpau_hwaccel_select="hevc_decoder"
hevc_videotoolbox_hwaccel_deps="videotoolbox"
hevc_videotoolbox_hwaccel_select="hevc_decoder"
+hevc_vulkan_hwaccel_deps="vulkan"
+hevc_vulkan_hwaccel_select="hevc_decoder"
mjpeg_nvdec_hwaccel_deps="nvdec"
mjpeg_nvdec_hwaccel_select="mjpeg_decoder"
mjpeg_vaapi_hwaccel_deps="vaapi"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4c9db167a5..6aa304071a 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -999,6 +999,7 @@ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o
OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o
OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o
OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o
+OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL) += vulkan_decode.o vulkan_hevc.o
OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o
OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL) += vaapi_mjpeg.o
OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL) += nvdec_mpeg12.o
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 5df831688c..0ad6418f8d 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -405,7 +405,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
CONFIG_HEVC_NVDEC_HWACCEL + \
CONFIG_HEVC_VAAPI_HWACCEL + \
CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
- CONFIG_HEVC_VDPAU_HWACCEL)
+ CONFIG_HEVC_VDPAU_HWACCEL + \
+ CONFIG_HEVC_VULKAN_HWACCEL)
enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
switch (sps->pix_fmt) {
@@ -429,6 +430,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_YUV420P10:
@@ -445,6 +449,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
#if CONFIG_HEVC_VDPAU_HWACCEL
*fmt++ = AV_PIX_FMT_VDPAU;
#endif
@@ -464,6 +471,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_YUV422P:
@@ -473,11 +483,17 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
break;
case AV_PIX_FMT_YUV444P10:
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
*fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
case AV_PIX_FMT_YUV420P12:
case AV_PIX_FMT_YUV444P12:
@@ -487,6 +503,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#if CONFIG_HEVC_VDPAU_HWACCEL
*fmt++ = AV_PIX_FMT_VDPAU;
#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
+#endif
#if CONFIG_HEVC_NVDEC_HWACCEL
*fmt++ = AV_PIX_FMT_CUDA;
#endif
@@ -494,6 +513,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
case AV_PIX_FMT_YUV422P12:
#if CONFIG_HEVC_VAAPI_HWACCEL
*fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ *fmt++ = AV_PIX_FMT_VULKAN;
#endif
break;
}
@@ -3752,6 +3774,9 @@ const FFCodec ff_hevc_decoder = {
#endif
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
HWACCEL_VIDEOTOOLBOX(hevc),
+#endif
+#if CONFIG_HEVC_VULKAN_HWACCEL
+ HWACCEL_VULKAN(hevc),
#endif
NULL
},
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 23d0843c76..a7c74d07cb 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -44,6 +44,7 @@ extern const AVHWAccel ff_hevc_nvdec_hwaccel;
extern const AVHWAccel ff_hevc_vaapi_hwaccel;
extern const AVHWAccel ff_hevc_vdpau_hwaccel;
extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
+extern const AVHWAccel ff_hevc_vulkan_hwaccel;
extern const AVHWAccel ff_mjpeg_nvdec_hwaccel;
extern const AVHWAccel ff_mjpeg_vaapi_hwaccel;
extern const AVHWAccel ff_mpeg1_nvdec_hwaccel;
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
new file mode 100644
index 0000000000..f4991d8f82
--- /dev/null
+++ b/libavcodec/vulkan_hevc.c
@@ -0,0 +1,904 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "hevcdec.h"
+#include "hevc_ps.h"
+
+#include "vulkan_decode.h"
+
+const VkExtensionProperties ff_vk_dec_hevc_ext = {
+ .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME,
+ .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION,
+};
+
+typedef struct HEVCHeaderSPS {
+ StdVideoH265ScalingLists scaling;
+ StdVideoH265HrdParameters vui_header;
+ StdVideoH265SequenceParameterSetVui vui;
+ StdVideoH265ProfileTierLevel ptl;
+ StdVideoH265DecPicBufMgr dpbm;
+ StdVideoH265PredictorPaletteEntries pal;
+ StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS];
+ StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS];
+ StdVideoH265ShortTermRefPicSet str[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
+ StdVideoH265LongTermRefPicsSps ltr[HEVC_MAX_LONG_TERM_REF_PICS];
+} HEVCHeaderSPS;
+
+typedef struct HEVCHeaderPPS {
+ StdVideoH265ScalingLists scaling;
+ StdVideoH265PredictorPaletteEntries pal;
+} HEVCHeaderPPS;
+
+typedef struct HEVCHeaderVPSSet {
+ StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS];
+ StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS];
+} HEVCHeaderVPSSet;
+
+typedef struct HEVCHeaderVPS {
+ StdVideoH265ProfileTierLevel ptl;
+ StdVideoH265DecPicBufMgr dpbm;
+ StdVideoH265HrdParameters hdr[HEVC_MAX_LAYER_SETS];
+ HEVCHeaderVPSSet sls[];
+} HEVCHeaderVPS;
+
+typedef struct HEVCHeaderSet {
+ StdVideoH265SequenceParameterSet sps[HEVC_MAX_SPS_COUNT];
+ HEVCHeaderSPS hsps[HEVC_MAX_SPS_COUNT];
+
+ StdVideoH265PictureParameterSet pps[HEVC_MAX_PPS_COUNT];
+ HEVCHeaderPPS hpps[HEVC_MAX_PPS_COUNT];
+
+ StdVideoH265VideoParameterSet vps[HEVC_MAX_PPS_COUNT];
+ HEVCHeaderVPS hvps[];
+} HEVCHeaderSet;
+
+static int get_data_set_buf(FFVulkanDecodeContext *s, AVBufferRef **data_buf,
+ int nb_vps, AVBufferRef * const vps_list[HEVC_MAX_VPS_COUNT])
+{
+ size_t buf_size = sizeof(HEVCHeaderSPS)*HEVC_MAX_SPS_COUNT +
+ sizeof(HEVCHeaderPPS)*HEVC_MAX_PPS_COUNT +
+ sizeof(StdVideoH265SequenceParameterSet)*HEVC_MAX_SPS_COUNT +
+ sizeof(StdVideoH265PictureParameterSet)*HEVC_MAX_PPS_COUNT +
+ sizeof(StdVideoH265VideoParameterSet)*HEVC_MAX_VPS_COUNT;
+
+ buf_size += (sizeof(StdVideoH265ProfileTierLevel) +
+ sizeof(StdVideoH265DecPicBufMgr) +
+ sizeof(StdVideoH265HrdParameters)*HEVC_MAX_LAYER_SETS)*nb_vps;
+
+ for (int i = 0; i < nb_vps; i++) {
+ const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data;
+ buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters;
+ }
+
+ if (buf_size > s->tmp_pool_ele_size) {
+ av_buffer_pool_uninit(&s->tmp_pool);
+ s->tmp_pool_ele_size = 0;
+ s->tmp_pool = av_buffer_pool_init(buf_size, NULL);
+ if (!s->tmp_pool)
+ return AVERROR(ENOMEM);
+ s->tmp_pool_ele_size = buf_size;
+ }
+
+ *data_buf = av_buffer_pool_get(s->tmp_pool);
+ if (!(*data_buf))
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+typedef struct HEVCVulkanDecodePicture {
+ FFVulkanDecodeContext *ctx;
+ FFVulkanDecodePicture vp;
+
+ /* Current picture */
+ StdVideoDecodeH265ReferenceInfo h265_ref;
+ VkVideoDecodeH265DpbSlotInfoKHR vkh265_ref;
+
+ /* Picture refs */
+ HEVCFrame *ref_src [HEVC_MAX_REFS];
+ StdVideoDecodeH265ReferenceInfo h265_refs [HEVC_MAX_REFS];
+ VkVideoDecodeH265DpbSlotInfoKHR vkh265_refs[HEVC_MAX_REFS];
+
+ /* Current picture (contd.) */
+ StdVideoDecodeH265PictureInfo h265pic;
+ VkVideoDecodeH265PictureInfoKHR h265_pic_info;
+} HEVCVulkanDecodePicture;
+
+static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
+ VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */
+ VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */
+ VkVideoDecodeH265DpbSlotInfoKHR *vkh265_ref, /* Goes in ^ */
+ StdVideoDecodeH265ReferenceInfo *h265_ref, /* Goes in ^ */
+ HEVCFrame *pic, int is_current, int pic_id)
+{
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vkpic = &hp->vp;
+
+ int err = ff_vk_decode_prepare_frame(ctx, pic->frame, vkpic, is_current,
+ ctx->dedicated_dpb);
+ if (err < 0)
+ return err;
+
+ *h265_ref = (StdVideoDecodeH265ReferenceInfo) {
+ .flags = (StdVideoDecodeH265ReferenceInfoFlags) {
+ .used_for_long_term_reference = pic->flags & HEVC_FRAME_FLAG_LONG_REF,
+ .unused_for_reference = 0,
+ },
+ .PicOrderCntVal = pic->poc,
+ };
+
+ *vkh265_ref = (VkVideoDecodeH265DpbSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR,
+ .pStdReferenceInfo = h265_ref,
+ };
+
+ *ref = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height },
+ .baseArrayLayer = ctx->layered_dpb ? pic_id : 0,
+ .imageViewBinding = vkpic->img_view_ref,
+ };
+
+ *ref_slot = (VkVideoReferenceSlotInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+ .pNext = vkh265_ref,
+ .slotIndex = pic_id,
+ .pPictureResource = ref,
+ };
+
+ if (ref_src)
+ *ref_src = pic;
+
+ return 0;
+}
+
+static void set_sps(const HEVCSPS *sps, int sps_idx,
+ StdVideoH265ScalingLists *vksps_scaling,
+ StdVideoH265HrdParameters *vksps_vui_header,
+ StdVideoH265SequenceParameterSetVui *vksps_vui,
+ StdVideoH265SequenceParameterSet *vksps,
+ StdVideoH265SubLayerHrdParameters *slhdrnal,
+ StdVideoH265SubLayerHrdParameters *slhdrvcl,
+ StdVideoH265ProfileTierLevel *ptl,
+ StdVideoH265DecPicBufMgr *dpbm,
+ StdVideoH265PredictorPaletteEntries *pal,
+ StdVideoH265ShortTermRefPicSet *str,
+ StdVideoH265LongTermRefPicsSps *ltr)
+{
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList4x4[i], sps->scaling_list.sl[0][i],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList4x4));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList8x8[i], sps->scaling_list.sl[1][i],
+ STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList8x8));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList16x16[i], sps->scaling_list.sl[2][i],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList16x16));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++)
+ memcpy(vksps_scaling->ScalingList32x32[i], sps->scaling_list.sl[3][i],
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vksps_scaling->ScalingList32x32));
+
+ memcpy(vksps_scaling->ScalingListDCCoef16x16, sps->scaling_list.sl_dc[0],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef16x16));
+
+ memcpy(vksps_scaling->ScalingListDCCoef32x32, sps->scaling_list.sl_dc[1],
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vksps_scaling->ScalingListDCCoef32x32));
+
+ *vksps_vui_header = (StdVideoH265HrdParameters) {
+ .flags = (StdVideoH265HrdFlags) {
+ .nal_hrd_parameters_present_flag = sps->hdr.flags.nal_hrd_parameters_present_flag,
+ .vcl_hrd_parameters_present_flag = sps->hdr.flags.vcl_hrd_parameters_present_flag,
+ .sub_pic_hrd_params_present_flag = sps->hdr.flags.sub_pic_hrd_params_present_flag,
+ .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->hdr.flags.sub_pic_cpb_params_in_pic_timing_sei_flag,
+ .fixed_pic_rate_general_flag = sps->hdr.flags.fixed_pic_rate_general_flag,
+ .fixed_pic_rate_within_cvs_flag = sps->hdr.flags.fixed_pic_rate_within_cvs_flag,
+ .low_delay_hrd_flag = sps->hdr.flags.low_delay_hrd_flag,
+ },
+ .tick_divisor_minus2 = sps->hdr.tick_divisor_minus2,
+ .du_cpb_removal_delay_increment_length_minus1 = sps->hdr.du_cpb_removal_delay_increment_length_minus1,
+ .dpb_output_delay_du_length_minus1 = sps->hdr.dpb_output_delay_du_length_minus1,
+ .bit_rate_scale = sps->hdr.bit_rate_scale,
+ .cpb_size_scale = sps->hdr.cpb_size_scale,
+ .cpb_size_du_scale = sps->hdr.cpb_size_du_scale,
+ .initial_cpb_removal_delay_length_minus1 = sps->hdr.initial_cpb_removal_delay_length_minus1,
+ .au_cpb_removal_delay_length_minus1 = sps->hdr.au_cpb_removal_delay_length_minus1,
+ .dpb_output_delay_length_minus1 = sps->hdr.dpb_output_delay_length_minus1,
+ /* Reserved - 3*16 bits */
+ .pSubLayerHrdParametersNal = slhdrnal,
+ .pSubLayerHrdParametersNal = slhdrvcl,
+ };
+
+ memcpy(vksps_vui_header->cpb_cnt_minus1, sps->hdr.cpb_cnt_minus1,
+ STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->cpb_cnt_minus1));
+ memcpy(vksps_vui_header->elemental_duration_in_tc_minus1, sps->hdr.elemental_duration_in_tc_minus1,
+ STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->elemental_duration_in_tc_minus1));
+
+ memcpy(slhdrnal, sps->hdr.nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrnal));
+ memcpy(slhdrvcl, sps->hdr.vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrvcl));
+
+ *vksps_vui = (StdVideoH265SequenceParameterSetVui) {
+ .flags = (StdVideoH265SpsVuiFlags) {
+ .aspect_ratio_info_present_flag = sps->vui.common.aspect_ratio_info_present_flag,
+ .overscan_info_present_flag = sps->vui.common.overscan_info_present_flag,
+ .overscan_appropriate_flag = sps->vui.common.overscan_appropriate_flag,
+ .video_signal_type_present_flag = sps->vui.common.video_signal_type_present_flag,
+ .video_full_range_flag = sps->vui.common.video_full_range_flag,
+ .colour_description_present_flag = sps->vui.common.colour_description_present_flag,
+ .chroma_loc_info_present_flag = sps->vui.common.chroma_loc_info_present_flag,
+ .neutral_chroma_indication_flag = sps->vui.neutra_chroma_indication_flag,
+ .field_seq_flag = sps->vui.field_seq_flag,
+ .frame_field_info_present_flag = sps->vui.frame_field_info_present_flag,
+ .default_display_window_flag = sps->vui.default_display_window_flag,
+ .vui_timing_info_present_flag = sps->vui.vui_timing_info_present_flag,
+ .vui_poc_proportional_to_timing_flag = sps->vui.vui_poc_proportional_to_timing_flag,
+ .vui_hrd_parameters_present_flag = sps->vui.vui_hrd_parameters_present_flag,
+ .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag,
+ .tiles_fixed_structure_flag = sps->vui.tiles_fixed_structure_flag,
+ .motion_vectors_over_pic_boundaries_flag = sps->vui.motion_vectors_over_pic_boundaries_flag,
+ .restricted_ref_pic_lists_flag = sps->vui.restricted_ref_pic_lists_flag,
+ },
+ .aspect_ratio_idc = sps->vui.common.aspect_ratio_idc,
+ .sar_width = sps->vui.common.sar.num,
+ .sar_height = sps->vui.common.sar.den,
+ .video_format = sps->vui.common.video_format,
+ .colour_primaries = sps->vui.common.colour_primaries,
+ .transfer_characteristics = sps->vui.common.transfer_characteristics,
+ .matrix_coeffs = sps->vui.common.matrix_coeffs,
+ .chroma_sample_loc_type_top_field = sps->vui.common.chroma_sample_loc_type_top_field,
+ .chroma_sample_loc_type_bottom_field = sps->vui.common.chroma_sample_loc_type_bottom_field,
+ /* Reserved */
+ /* Reserved */
+ .def_disp_win_left_offset = sps->vui.def_disp_win.left_offset,
+ .def_disp_win_right_offset = sps->vui.def_disp_win.right_offset,
+ .def_disp_win_top_offset = sps->vui.def_disp_win.top_offset,
+ .def_disp_win_bottom_offset = sps->vui.def_disp_win.bottom_offset,
+ .vui_num_units_in_tick = sps->vui.vui_num_units_in_tick,
+ .vui_time_scale = sps->vui.vui_time_scale,
+ .vui_num_ticks_poc_diff_one_minus1 = sps->vui.vui_num_ticks_poc_diff_one_minus1,
+ .min_spatial_segmentation_idc = sps->vui.min_spatial_segmentation_idc,
+ .max_bytes_per_pic_denom = sps->vui.max_bytes_per_pic_denom,
+ .max_bits_per_min_cu_denom = sps->vui.max_bits_per_min_cu_denom,
+ .log2_max_mv_length_horizontal = sps->vui.log2_max_mv_length_horizontal,
+ .log2_max_mv_length_vertical = sps->vui.log2_max_mv_length_vertical,
+ .pHrdParameters = vksps_vui_header,
+ };
+
+ *ptl = (StdVideoH265ProfileTierLevel) {
+ .flags = (StdVideoH265ProfileTierLevelFlags) {
+ .general_tier_flag = sps->ptl.general_ptl.tier_flag,
+ .general_progressive_source_flag = sps->ptl.general_ptl.progressive_source_flag,
+ .general_interlaced_source_flag = sps->ptl.general_ptl.interlaced_source_flag,
+ .general_non_packed_constraint_flag = sps->ptl.general_ptl.non_packed_constraint_flag,
+ .general_frame_only_constraint_flag = sps->ptl.general_ptl.frame_only_constraint_flag,
+ },
+ .general_profile_idc = sps->ptl.general_ptl.profile_idc,
+ .general_level_idc = sps->ptl.general_ptl.level_idc,
+ };
+
+ for (int i = 0; i < sps->max_sub_layers; i++) {
+ dpbm->max_latency_increase_plus1[i] = sps->temporal_layer[i].max_latency_increase + 1;
+ dpbm->max_dec_pic_buffering_minus1[i] = sps->temporal_layer[i].max_dec_pic_buffering - 1;
+ dpbm->max_num_reorder_pics[i] = sps->temporal_layer[i].num_reorder_pics;
+ }
+
+ for (int i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++)
+ for (int j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++)
+ pal->PredictorPaletteEntries[i][j] = sps->palette_predictor_initializers[i][j];
+
+ for (int i = 0; i < sps->nb_st_rps; i++) {
+ str[i] = (StdVideoH265ShortTermRefPicSet) {
+ .flags = (StdVideoH265ShortTermRefPicSetFlags) {
+ .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict,
+ .delta_rps_sign = sps->st_rps[i].delta_rps_sign,
+ },
+ .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1,
+ .use_delta_flag = sps->st_rps[i].use_delta_flag,
+ .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1,
+ /* Spec fucked this up
+ .used_by_curr_pic_flag =
+ .used_by_curr_pic_s0_flag =
+ .used_by_curr_pic_s1_flag =
+ */
+ /* Reserved */
+ /* Reserved */
+ /* Reserved */
+ .num_negative_pics = sps->st_rps[i].num_negative_pics,
+ .num_positive_pics = sps->st_rps[i].num_delta_pocs - sps->st_rps[i].num_negative_pics,
+ };
+
+ for (int j = 0; j < str[i].num_negative_pics; j++)
+ str[i].delta_poc_s0_minus1[j] = sps->st_rps[i].delta_poc_s0[j] - 1;
+
+ for (int j = 0; j < str[i].num_positive_pics; j++)
+ str[i].delta_poc_s1_minus1[j] = sps->st_rps[i].delta_poc_s1[j] - 1;
+ }
+
+ for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) {
+ ltr[i] = (StdVideoH265LongTermRefPicsSps) {
+ .used_by_curr_pic_lt_sps_flag = sps->used_by_curr_pic_lt_sps_flag[i],
+ /* Spec fucked this up too*/
+ .lt_ref_pic_poc_lsb_sps[0] = sps->lt_ref_pic_poc_lsb_sps[i],
+ };
+ }
+
+ *vksps = (StdVideoH265SequenceParameterSet) {
+ .flags = (StdVideoH265SpsFlags) {
+ .sps_temporal_id_nesting_flag = sps->temporal_id_nesting_flag,
+ .separate_colour_plane_flag = sps->separate_colour_plane_flag,
+ .conformance_window_flag = sps->conformance_window_flag,
+ .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info_flag,
+ .scaling_list_enabled_flag = sps->scaling_list_enable_flag,
+ .sps_scaling_list_data_present_flag = sps->scaling_list_data_present_flag,
+ .amp_enabled_flag = sps->amp_enabled_flag,
+ .sample_adaptive_offset_enabled_flag = sps->sao_enabled,
+ .pcm_enabled_flag = sps->pcm_enabled_flag,
+ .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag,
+ .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag,
+ .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag,
+ .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag,
+ .vui_parameters_present_flag = sps->vui_present,
+ .sps_extension_present_flag = sps->sps_extension_present_flag,
+ .sps_range_extension_flag = sps->sps_range_extension_flag,
+ .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag,
+ .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag,
+ .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag,
+ .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag,
+ .extended_precision_processing_flag = sps->extended_precision_processing_flag,
+ .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag,
+ .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag,
+ .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag,
+ .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag,
+ .sps_scc_extension_flag = sps->sps_scc_extension_flag,
+ .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag,
+ .palette_mode_enabled_flag = sps->palette_mode_enabled_flag,
+ .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializer_present_flag,
+ .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disable_flag,
+ },
+ .chroma_format_idc = sps->chroma_format_idc,
+ .pic_width_in_luma_samples = sps->width,
+ .pic_height_in_luma_samples = sps->height,
+ .sps_video_parameter_set_id = sps->vps_id,
+ .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
+ .sps_seq_parameter_set_id = sps_idx,
+ .bit_depth_luma_minus8 = sps->bit_depth - 8,
+ .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8,
+ .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
+ .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
+ .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
+ .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
+ .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_transform_block_size,
+ .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
+ .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
+ .num_short_term_ref_pic_sets = sps->nb_st_rps,
+ .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
+ .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
+ .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
+ .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
+ .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
+ /* Reserved */
+ /* Reserved */
+ .palette_max_size = sps->palette_max_size,
+ .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size,
+ .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc,
+ .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializer_minus1,
+ .conf_win_left_offset = sps->pic_conf_win.left_offset,
+ .conf_win_right_offset = sps->pic_conf_win.right_offset,
+ .conf_win_top_offset = sps->pic_conf_win.top_offset,
+ .conf_win_bottom_offset = sps->pic_conf_win.bottom_offset,
+ .pProfileTierLevel = ptl,
+ .pDecPicBufMgr = dpbm,
+ .pScalingLists = vksps_scaling,
+ .pShortTermRefPicSet = str,
+ .pLongTermRefPicsSps = ltr,
+ .pSequenceParameterSetVui = vksps_vui,
+ .pPredictorPaletteEntries = pal,
+ };
+}
+
+static void set_pps(const HEVCPPS *pps, const HEVCSPS *sps,
+ StdVideoH265ScalingLists *vkpps_scaling,
+ StdVideoH265PictureParameterSet *vkpps,
+ StdVideoH265PredictorPaletteEntries *pal)
+{
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList4x4[i], pps->scaling_list.sl[0][i],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList4x4));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList8x8[i], pps->scaling_list.sl[1][i],
+ STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList8x8));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList16x16[i], pps->scaling_list.sl[2][i],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList16x16));
+
+ for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++)
+ memcpy(vkpps_scaling->ScalingList32x32[i], pps->scaling_list.sl[3][i],
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(**vkpps_scaling->ScalingList32x32));
+
+ memcpy(vkpps_scaling->ScalingListDCCoef16x16, pps->scaling_list.sl_dc[0],
+ STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef16x16));
+
+ memcpy(vkpps_scaling->ScalingListDCCoef32x32, pps->scaling_list.sl_dc[1],
+ STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS * sizeof(*vkpps_scaling->ScalingListDCCoef32x32));
+
+ *vkpps = (StdVideoH265PictureParameterSet) {
+ .flags = (StdVideoH265PpsFlags) {
+ .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag,
+ .output_flag_present_flag = pps->output_flag_present_flag,
+ .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag,
+ .cabac_init_present_flag = pps->cabac_init_present_flag,
+ .constrained_intra_pred_flag = pps->constrained_intra_pred_flag,
+ .transform_skip_enabled_flag = pps->transform_skip_enabled_flag,
+ .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag,
+ .pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag,
+ .weighted_pred_flag = pps->weighted_pred_flag,
+ .weighted_bipred_flag = pps->weighted_bipred_flag,
+ .transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag,
+ .tiles_enabled_flag = pps->tiles_enabled_flag,
+ .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag,
+ .uniform_spacing_flag = pps->uniform_spacing_flag,
+ .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag,
+ .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag,
+ .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag,
+ .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag,
+ .pps_deblocking_filter_disabled_flag = pps->disable_dbf,
+ .pps_scaling_list_data_present_flag = pps->scaling_list_data_present_flag,
+ .lists_modification_present_flag = pps->lists_modification_present_flag,
+ .slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag,
+ .pps_extension_present_flag = pps->pps_extension_present_flag,
+ .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag,
+ .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag,
+ .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag,
+ .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag,
+ .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag,
+ .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializer_present_flag,
+ .monochrome_palette_flag = pps->monochrome_palette_flag,
+ .pps_range_extension_flag = pps->pps_range_extensions_flag,
+ },
+ .pps_pic_parameter_set_id = pps->pps_id,
+ .pps_seq_parameter_set_id = pps->sps_id,
+ .sps_video_parameter_set_id = sps->vps_id,
+ .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
+ .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1,
+ .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1,
+ .init_qp_minus26 = pps->pic_init_qp_minus26,
+ .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
+ .pps_cb_qp_offset = pps->cb_qp_offset,
+ .pps_cr_qp_offset = pps->cr_qp_offset,
+ .pps_beta_offset_div2 = pps->beta_offset >> 1,
+ .pps_tc_offset_div2 = pps->tc_offset >> 1,
+ .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
+ .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2,
+ .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth,
+ .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1,
+ .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma,
+ .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma,
+ .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset_plus5,
+ .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset_plus5,
+ .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset_plus3,
+ .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializer,
+ .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry_minus8,
+ .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry_minus8,
+ .num_tile_columns_minus1 = pps->num_tile_columns - 1,
+ .num_tile_rows_minus1 = pps->num_tile_rows - 1,
+ .pScalingLists = vkpps_scaling,
+ .pPredictorPaletteEntries = pal,
+ };
+
+ for (int i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) {
+ for (int j = 0; j < pps->pps_num_palette_predictor_initializer; j++)
+ pal->PredictorPaletteEntries[i][j] = pps->palette_predictor_initializers[i][j];
+ }
+
+ for (int i = 0; i < pps->num_tile_columns - 1; i++)
+ vkpps->column_width_minus1[i] = pps->column_width[i] - 1;
+
+ for (int i = 0; i < pps->num_tile_rows - 1; i++)
+ vkpps->row_height_minus1[i] = pps->row_height[i] - 1;
+
+ for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
+ vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i];
+ vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i];
+ }
+}
+
+static void set_vps(const HEVCVPS *vps,
+ StdVideoH265VideoParameterSet *vkvps,
+ StdVideoH265ProfileTierLevel *ptl,
+ StdVideoH265DecPicBufMgr *dpbm,
+ StdVideoH265HrdParameters *sls_hdr,
+ HEVCHeaderVPSSet sls[])
+{
+ for (int i = 0; i < vps->vps_num_hrd_parameters; i++) {
+ const HEVCHdrParams *src = &vps->hdr[i];
+
+ sls_hdr[i] = (StdVideoH265HrdParameters) {
+ .flags = (StdVideoH265HrdFlags) {
+ .nal_hrd_parameters_present_flag = src->flags.nal_hrd_parameters_present_flag,
+ .vcl_hrd_parameters_present_flag = src->flags.vcl_hrd_parameters_present_flag,
+ .sub_pic_hrd_params_present_flag = src->flags.sub_pic_hrd_params_present_flag,
+ .sub_pic_cpb_params_in_pic_timing_sei_flag = src->flags.sub_pic_cpb_params_in_pic_timing_sei_flag,
+ .fixed_pic_rate_general_flag = src->flags.fixed_pic_rate_general_flag,
+ .fixed_pic_rate_within_cvs_flag = src->flags.fixed_pic_rate_within_cvs_flag,
+ .low_delay_hrd_flag = src->flags.low_delay_hrd_flag,
+ },
+ .tick_divisor_minus2 = src->tick_divisor_minus2,
+ .du_cpb_removal_delay_increment_length_minus1 = src->du_cpb_removal_delay_increment_length_minus1,
+ .dpb_output_delay_du_length_minus1 = src->dpb_output_delay_du_length_minus1,
+ .bit_rate_scale = src->bit_rate_scale,
+ .cpb_size_scale = src->cpb_size_scale,
+ .cpb_size_du_scale = src->cpb_size_du_scale,
+ .initial_cpb_removal_delay_length_minus1 = src->initial_cpb_removal_delay_length_minus1,
+ .au_cpb_removal_delay_length_minus1 = src->au_cpb_removal_delay_length_minus1,
+ .dpb_output_delay_length_minus1 = src->dpb_output_delay_length_minus1,
+ /* Reserved - 3*16 bits */
+ .pSubLayerHrdParametersNal = sls[i].nal_hdr,
+ .pSubLayerHrdParametersNal = sls[i].vcl_hdr,
+ };
+
+ memcpy(sls_hdr[i].cpb_cnt_minus1, src->cpb_cnt_minus1,
+ STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].cpb_cnt_minus1));
+ memcpy(sls_hdr[i].elemental_duration_in_tc_minus1, src->elemental_duration_in_tc_minus1,
+ STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].elemental_duration_in_tc_minus1));
+
+ memcpy(sls[i].nal_hdr, src->nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].nal_hdr));
+ memcpy(sls[i].vcl_hdr, src->vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].vcl_hdr));
+ }
+
+ *ptl = (StdVideoH265ProfileTierLevel) {
+ .flags = (StdVideoH265ProfileTierLevelFlags) {
+ .general_tier_flag = vps->ptl.general_ptl.tier_flag,
+ .general_progressive_source_flag = vps->ptl.general_ptl.progressive_source_flag,
+ .general_interlaced_source_flag = vps->ptl.general_ptl.interlaced_source_flag,
+ .general_non_packed_constraint_flag = vps->ptl.general_ptl.non_packed_constraint_flag,
+ .general_frame_only_constraint_flag = vps->ptl.general_ptl.frame_only_constraint_flag,
+ },
+ .general_profile_idc = vps->ptl.general_ptl.profile_idc,
+ .general_level_idc = vps->ptl.general_ptl.level_idc,
+ };
+
+ for (int i = 0; i < vps->vps_max_sub_layers; i++) {
+ dpbm->max_latency_increase_plus1[i] = vps->vps_max_latency_increase[i] + 1;
+ dpbm->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering[i] - 1;
+ dpbm->max_num_reorder_pics[i] = vps->vps_num_reorder_pics[i];
+ }
+
+ *vkvps = (StdVideoH265VideoParameterSet) {
+ .flags = (StdVideoH265VpsFlags) {
+ .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag,
+ .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag,
+ .vps_timing_info_present_flag = vps->vps_timing_info_present_flag,
+ .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag,
+ },
+ .vps_video_parameter_set_id = vps->vps_id,
+ .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers - 1,
+ /* Reserved */
+ /* Reserved */
+ .vps_num_units_in_tick = vps->vps_num_units_in_tick,
+ .vps_time_scale = vps->vps_time_scale,
+ .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one - 1,
+ /* Reserved */
+ .pDecPicBufMgr = dpbm,
+ .pHrdParameters = sls_hdr,
+ .pProfileTierLevel = ptl,
+ };
+}
+
+static int vk_hevc_create_params(AVCodecContext *avctx, AVBufferRef **buf)
+{
+ int err;
+ VkResult ret;
+ const HEVCContext *h = avctx->priv_data;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ VkVideoDecodeH265SessionParametersAddInfoKHR h265_params_info = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR,
+ .stdSPSCount = 0,
+ .stdPPSCount = 0,
+ .stdVPSCount = 0,
+ };
+ VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pParametersAddInfo = &h265_params_info,
+ };
+ VkVideoSessionParametersCreateInfoKHR session_params_create = {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR,
+ .pNext = &h265_params,
+ .videoSession = ctx->common.session,
+ .videoSessionParametersTemplate = NULL,
+ };
+
+ int nb_vps = 0;
+ AVBufferRef *data_set;
+ HEVCHeaderSet *hdr;
+
+ AVBufferRef *tmp;
+ VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par));
+ if (!par)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; h->ps.vps_list[i]; i++)
+ nb_vps++;
+
+ err = get_data_set_buf(ctx, &data_set, nb_vps, h->ps.vps_list);
+ if (err < 0)
+ return err;
+
+ hdr = (HEVCHeaderSet *)data_set->data;
+
+ h265_params_info.pStdSPSs = hdr->sps;
+ h265_params_info.pStdPPSs = hdr->pps;
+ h265_params_info.pStdVPSs = hdr->vps;
+
+ /* SPS list */
+ for (int i = 0; h->ps.sps_list[i]; i++) {
+ const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[i]->data;
+ set_sps(sps_l, i, &hdr->hsps[i].scaling, &hdr->hsps[i].vui_header,
+ &hdr->hsps[i].vui, &hdr->sps[i], hdr->hsps[i].nal_hdr,
+ hdr->hsps[i].vcl_hdr, &hdr->hsps[i].ptl, &hdr->hsps[i].dpbm,
+ &hdr->hsps[i].pal, hdr->hsps[i].str, hdr->hsps[i].ltr);
+ h265_params_info.stdSPSCount++;
+ }
+
+ /* PPS list */
+ for (int i = 0; h->ps.pps_list[i]; i++) {
+ const HEVCPPS *pps_l = (const HEVCPPS *)h->ps.pps_list[i]->data;
+ const HEVCSPS *sps_l = (const HEVCSPS *)h->ps.sps_list[pps_l->sps_id]->data;
+ set_pps(pps_l, sps_l, &hdr->hpps[i].scaling, &hdr->pps[i], &hdr->hpps[i].pal);
+ h265_params_info.stdPPSCount++;
+ }
+
+ /* VPS list */
+ for (int i = 0; i < nb_vps; i++) {
+ const HEVCVPS *vps_l = (const HEVCVPS *)h->ps.vps_list[i]->data;
+ set_vps(vps_l, &hdr->vps[i], &hdr->hvps[i].ptl, &hdr->hvps[i].dpbm,
+ hdr->hvps[i].hdr, hdr->hvps[i].sls);
+ h265_params_info.stdVPSCount++;
+ }
+
+ h265_params.maxStdSPSCount = h265_params_info.stdSPSCount;
+ h265_params.maxStdPPSCount = h265_params_info.stdPPSCount;
+ h265_params.maxStdVPSCount = h265_params_info.stdVPSCount;
+
+ /* Create session parameters */
+ ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, &session_params_create,
+ ctx->s.hwctx->alloc, par);
+ av_buffer_unref(&data_set);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ tmp = av_buffer_create((uint8_t *)par, sizeof(*par), ff_vk_decode_free_params,
+ ctx, 0);
+ if (!tmp) {
+ ff_vk_decode_free_params(ctx, (uint8_t *)par);
+ return AVERROR(ENOMEM);
+ }
+
+ av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS %i VPS\n",
+ h265_params_info.stdSPSCount, h265_params_info.stdPPSCount,
+ h265_params_info.stdVPSCount);
+
+ *buf = tmp;
+
+ return 0;
+}
+
+static int vk_hevc_start_frame(AVCodecContext *avctx,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ HEVCContext *h = avctx->priv_data;
+ HEVCFrame *pic = h->ref;
+ FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data;
+ HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+ const HEVCSPS *sps = h->ps.sps;
+ const HEVCPPS *pps = h->ps.pps;
+ int nb_refs = 0;
+
+ if (!h->hwaccel_params_buf) {
+ err = vk_hevc_create_params(avctx, &h->hwaccel_params_buf);
+ if (err < 0)
+ return err;
+ }
+
+ vp->session_params = av_buffer_ref(h->hwaccel_params_buf);
+ if (!vp->session_params)
+ return AVERROR(ENOMEM);
+
+ hp->h265pic = (StdVideoDecodeH265PictureInfo) {
+ .flags = (StdVideoDecodeH265PictureInfoFlags) {
+ .IrapPicFlag = IS_IRAP(h),
+ .IdrPicFlag = IS_IDR(h),
+ .IsReference = h->nal_unit_type < 16 ? h->nal_unit_type & 1 : 1,
+ .short_term_ref_pic_set_sps_flag = h->sh.short_term_ref_pic_set_sps_flag,
+ },
+ .sps_video_parameter_set_id = sps->vps_id,
+ .pps_seq_parameter_set_id = pps->sps_id,
+ .pps_pic_parameter_set_id = pps->pps_id,
+ .NumDeltaPocsOfRefRpsIdx = h->sh.short_term_rps ? h->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
+ .PicOrderCntVal = h->poc,
+ .NumBitsForSTRefPicSetInSlice = !h->sh.short_term_ref_pic_set_sps_flag ?
+ h->sh.bits_used_for_short_term_rps : 0,
+ };
+
+ /* Fill in references */
+ for (int i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
+ const HEVCFrame *ref = &h->DPB[i];
+ int idx = nb_refs;
+
+ if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF)))
+ continue;
+
+ if (ref == pic) {
+ err = vk_hevc_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref,
+ &hp->vkh265_ref, &hp->h265_ref, pic, 1, i);
+ if (err < 0)
+ return err;
+
+ continue;
+ }
+
+ err = vk_hevc_fill_pict(avctx, &hp->ref_src[idx], &vp->ref_slots[idx],
+ &vp->refs[idx], &hp->vkh265_refs[idx],
+ &hp->h265_refs[idx], (HEVCFrame *)ref, 0, i);
+ if (err < 0)
+ return err;
+
+ nb_refs++;
+ }
+
+ memset(hp->h265pic.RefPicSetStCurrBefore, 0xff, 8);
+ for (int i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
+ HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i];
+ for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+ const HEVCFrame *ref = &h->DPB[j];
+ if (ref == frame) {
+ hp->h265pic.RefPicSetStCurrBefore[i] = j;
+ break;
+ }
+ }
+ }
+ memset(hp->h265pic.RefPicSetStCurrAfter, 0xff, 8);
+ for (int i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
+ HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i];
+ for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+ const HEVCFrame *ref = &h->DPB[j];
+ if (ref == frame) {
+ hp->h265pic.RefPicSetStCurrAfter[i] = j;
+ break;
+ }
+ }
+ }
+ memset(hp->h265pic.RefPicSetLtCurr, 0xff, 8);
+ for (int i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
+ HEVCFrame *frame = h->rps[LT_CURR].ref[i];
+ for (int j = 0; j < FF_ARRAY_ELEMS(h->DPB); j++) {
+ const HEVCFrame *ref = &h->DPB[j];
+ if (ref == frame) {
+ hp->h265pic.RefPicSetLtCurr[i] = j;
+ break;
+ }
+ }
+ }
+
+ hp->h265_pic_info = (VkVideoDecodeH265PictureInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR,
+ .pStdPictureInfo = &hp->h265pic,
+ .sliceSegmentCount = 0,
+ .pSliceSegmentOffsets = vp->slice_off,
+ };
+
+ vp->decode_info = (VkVideoDecodeInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR,
+ .pNext = &hp->h265_pic_info,
+ .flags = 0x0,
+ .pSetupReferenceSlot = &vp->ref_slot,
+ .referenceSlotCount = nb_refs,
+ .pReferenceSlots = vp->ref_slots,
+ .dstPictureResource = (VkVideoPictureResourceInfoKHR) {
+ .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+ .codedOffset = (VkOffset2D){ 0, 0 },
+ .codedExtent = (VkExtent2D){ pic->frame->width, pic->frame->height },
+ .baseArrayLayer = 0,
+ .imageViewBinding = vp->img_view_out,
+ },
+ };
+
+ hp->ctx = ctx;
+
+ return 0;
+}
+
+static int vk_hevc_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ const HEVCContext *h = avctx->priv_data;
+ HEVCVulkanDecodePicture *hp = h->ref->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+
+ int err = ff_vk_decode_add_slice(vp, data, size, 1,
+ &hp->h265_pic_info.sliceSegmentCount,
+ &hp->h265_pic_info.pSliceSegmentOffsets);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int vk_hevc_end_frame(AVCodecContext *avctx)
+{
+ const HEVCContext *h = avctx->priv_data;
+ HEVCFrame *pic = h->ref;
+ HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &hp->vp;
+ FFVulkanDecodePicture *rvp[HEVC_MAX_REFS] = { 0 };
+ AVFrame *rav[HEVC_MAX_REFS] = { 0 };
+
+ for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) {
+ HEVCVulkanDecodePicture *rfhp = hp->ref_src[i]->hwaccel_picture_private;
+ rav[i] = hp->ref_src[i]->frame;
+ rvp[i] = &rfhp->vp;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %lu bytes, %i slices\n",
+ vp->slices_size, hp->h265_pic_info.sliceSegmentCount);
+
+ return ff_vk_decode_frame(avctx, pic->frame, vp, rav, rvp);
+}
+
+static void vk_hevc_free_frame_priv(AVCodecContext *avctx, void *data)
+{
+ HEVCVulkanDecodePicture *hp = data;
+
+ /* Free frame resources */
+ ff_vk_decode_free_frame(hp->ctx, &hp->vp);
+
+ /* Free frame context */
+ av_free(hp);
+}
+
+const AVHWAccel ff_hevc_vulkan_hwaccel = {
+ .name = "hevc_vulkan",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .id = AV_CODEC_ID_HEVC,
+ .pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_hevc_start_frame,
+ .decode_slice = &vk_hevc_decode_slice,
+ .end_frame = &vk_hevc_end_frame,
+ .free_frame_priv = &vk_hevc_free_frame_priv,
+ .frame_priv_data_size = sizeof(HEVCVulkanDecodePicture),
+ .init = &ff_vk_decode_init,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &ff_vk_decode_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
--
2.39.2
[-- Attachment #74: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 3:43 [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding Lynne
@ 2023-02-17 9:08 ` Jean-Baptiste Kempf
2023-02-17 9:45 ` Hendrik Leppkes
` (2 more replies)
2023-02-18 19:02 ` Michael Niedermayer
` (4 subsequent siblings)
5 siblings, 3 replies; 34+ messages in thread
From: Jean-Baptiste Kempf @ 2023-02-17 9:08 UTC (permalink / raw)
To: Lynne, ffmpeg-devel
Hello,
On Fri, 17 Feb 2023, at 04:43, Lynne wrote:
> This small patchset mostly rewrites Vulkan to enable using multiplane images,
This is not small. We're talking about thousands of lines of code;
And this changes numerous h264 and hevc headers, and adds callback to the make AVHWAccel.
And that is besides the full rewrite of some Vulkan files...
This will take a long time to review.
--
Jean-Baptiste Kempf - President
+33 672 704 734
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 9:08 ` Jean-Baptiste Kempf
@ 2023-02-17 9:45 ` Hendrik Leppkes
2023-02-17 10:45 ` Lynne
2023-02-17 11:04 ` Kieran Kunhya
[not found] ` <CAK+ULv780c=z_dig_FAhPJ2poZ8u2_QOnnPUmV3SSiYoaQZ+tw@mail.gmail.com-NOU29aV----9>
2 siblings, 1 reply; 34+ messages in thread
From: Hendrik Leppkes @ 2023-02-17 9:45 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, Feb 17, 2023 at 10:09 AM Jean-Baptiste Kempf <jb@videolan.org> wrote:
>
> Hello,
>
> On Fri, 17 Feb 2023, at 04:43, Lynne wrote:
> > This small patchset mostly rewrites Vulkan to enable using multiplane images,
>
> This is not small. We're talking about thousands of lines of code;
>
> And this changes numerous h264 and hevc headers, and adds callback to the make AVHWAccel.
>
> And that is besides the full rewrite of some Vulkan files...
>
> This will take a long time to review.
>
I would agree, this set is too large to try to skirt it in just under
the deadline of a new release, with the potential of needing to fix
stuff later.
Just have it land on master after the branch, and there is no time
pressure to review it, or make numerous fixes on the release branch.
PS:
Can you please send it as a proper patchset? Reviewing 72 patches off
of one mail is not really fitting the workflow at all.
- Hendrik
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 9:45 ` Hendrik Leppkes
@ 2023-02-17 10:45 ` Lynne
0 siblings, 0 replies; 34+ messages in thread
From: Lynne @ 2023-02-17 10:45 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 17, 2023, 10:45 by h.leppkes@gmail.com:
> On Fri, Feb 17, 2023 at 10:09 AM Jean-Baptiste Kempf <jb@videolan.org> wrote:
>
>>
>> Hello,
>>
>> On Fri, 17 Feb 2023, at 04:43, Lynne wrote:
>> > This small patchset mostly rewrites Vulkan to enable using multiplane images,
>>
>> This is not small. We're talking about thousands of lines of code;
>>
>> And this changes numerous h264 and hevc headers, and adds callback to the make AVHWAccel.
>>
>> And that is besides the full rewrite of some Vulkan files...
>>
>> This will take a long time to review.
>>
>
> I would agree, this set is too large to try to skirt it in just under
> the deadline of a new release, with the potential of needing to fix
> stuff later.
> Just have it land on master after the branch, and there is no time
> pressure to review it, or make numerous fixes on the release branch.
>
The code's been tested by a lot of folks already, and importantly,
passes the validation layers (with some warnings due to spec issues
that the workgroup agreed to fix).
The output is also conformant on all implementations.
All the code needs is some looking over to make sure I haven't done
something silly. Valgrind output is clean too.
It is a lot of code in terms of diff, but rather than reviewing each patch
individually, you should just try looking at the final files directly, that way,
you only have a few thousand lines to look at, rather than a few
thousand times twenty.
I've abstracted all the low-level Vulkan stuff from the decoding patchset,
so the output is high-level enough that anyone who's written hwaccels can
understand it.
> PS:
> Can you please send it as a proper patchset? Reviewing 72 patches off
> of one mail is not really fitting the workflow at all.
>
I'll do that.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 9:08 ` Jean-Baptiste Kempf
2023-02-17 9:45 ` Hendrik Leppkes
@ 2023-02-17 11:04 ` Kieran Kunhya
[not found] ` <CAK+ULv780c=z_dig_FAhPJ2poZ8u2_QOnnPUmV3SSiYoaQZ+tw@mail.gmail.com-NOU29aV----9>
2 siblings, 0 replies; 34+ messages in thread
From: Kieran Kunhya @ 2023-02-17 11:04 UTC (permalink / raw)
To: FFmpeg development discussions and patches; +Cc: Lynne
On Fri, 17 Feb 2023 at 09:09, Jean-Baptiste Kempf <jb@videolan.org> wrote:
> Hello,
>
> On Fri, 17 Feb 2023, at 04:43, Lynne wrote:
> > This small patchset mostly rewrites Vulkan to enable using multiplane
> images,
>
> This is not small. We're talking about thousands of lines of code;
>
> And this changes numerous h264 and hevc headers, and adds callback to the
> make AVHWAccel.
>
> And that is besides the full rewrite of some Vulkan files...
>
> This will take a long time to review.
>
I agree.
Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
[not found] ` <CAK+ULv780c=z_dig_FAhPJ2poZ8u2_QOnnPUmV3SSiYoaQZ+tw@mail.gmail.com-NOU29aV----9>
@ 2023-02-17 11:52 ` Lynne
2023-02-17 15:45 ` Michael Niedermayer
0 siblings, 1 reply; 34+ messages in thread
From: Lynne @ 2023-02-17 11:52 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 17, 2023, 12:05 by kierank@obe.tv:
> On Fri, 17 Feb 2023 at 09:09, Jean-Baptiste Kempf <jb@videolan.org> wrote:
>
>> Hello,
>>
>> On Fri, 17 Feb 2023, at 04:43, Lynne wrote:
>> > This small patchset mostly rewrites Vulkan to enable using multiplane
>> images,
>>
>> This is not small. We're talking about thousands of lines of code;
>>
>> And this changes numerous h264 and hevc headers, and adds callback to the
>> make AVHWAccel.
>>
>> And that is besides the full rewrite of some Vulkan files...
>>
>> This will take a long time to review.
>>
>
> I agree.
>
The codec changes are simple and separate, so it's not that bad to read.
There's no more than a 100 lines or so added to the H264 parser.
The 72 patch figure may be scary, but that's because I didn't want to
squash my vulkan changes behind large rewrite commits. Otherwise,
this would be a 30-commit patchset.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 11:52 ` Lynne
@ 2023-02-17 15:45 ` Michael Niedermayer
2023-02-17 16:35 ` Lynne
0 siblings, 1 reply; 34+ messages in thread
From: Michael Niedermayer @ 2023-02-17 15:45 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 1773 bytes --]
On Fri, Feb 17, 2023 at 12:52:21PM +0100, Lynne wrote:
> Feb 17, 2023, 12:05 by kierank@obe.tv:
>
> > On Fri, 17 Feb 2023 at 09:09, Jean-Baptiste Kempf <jb@videolan.org> wrote:
> >
> >> Hello,
> >>
> >> On Fri, 17 Feb 2023, at 04:43, Lynne wrote:
> >> > This small patchset mostly rewrites Vulkan to enable using multiplane
> >> images,
> >>
> >> This is not small. We're talking about thousands of lines of code;
> >>
> >> And this changes numerous h264 and hevc headers, and adds callback to the
> >> make AVHWAccel.
> >>
> >> And that is besides the full rewrite of some Vulkan files...
> >>
> >> This will take a long time to review.
> >>
> >
> > I agree.
> >
>
> The codec changes are simple and separate, so it's not that bad to read.
> There's no more than a 100 lines or so added to the H264 parser.
> The 72 patch figure may be scary, but that's because I didn't want to
> squash my vulkan changes behind large rewrite commits. Otherwise,
> this would be a 30-commit patchset.
some of these patches are very simple, yes. Others probably not (i only looked at
the first few)
That said i was never a friend of large last minute patch-sets before releases.
I think the real deciding factor is the communities opinion and
3 Developers already are against this going in before the branching. So
unless theres a shift in oppinion i think there are more people for applying
this after branching release/6.0 than before.
Is it an issue if its only in 6.1 in 6months ?
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
In fact, the RIAA has been known to suggest that students drop out
of college or go to community college in order to be able to afford
settlements. -- The RIAA
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 15:45 ` Michael Niedermayer
@ 2023-02-17 16:35 ` Lynne
0 siblings, 0 replies; 34+ messages in thread
From: Lynne @ 2023-02-17 16:35 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 17, 2023, 16:45 by michael@niedermayer.cc:
> On Fri, Feb 17, 2023 at 12:52:21PM +0100, Lynne wrote:
>
>> Feb 17, 2023, 12:05 by kierank@obe.tv:
>>
>> > On Fri, 17 Feb 2023 at 09:09, Jean-Baptiste Kempf <jb@videolan.org> wrote:
>> >
>> >> Hello,
>> >>
>> >> On Fri, 17 Feb 2023, at 04:43, Lynne wrote:
>> >> > This small patchset mostly rewrites Vulkan to enable using multiplane
>> >> images,
>> >>
>> >> This is not small. We're talking about thousands of lines of code;
>> >>
>> >> And this changes numerous h264 and hevc headers, and adds callback to the
>> >> make AVHWAccel.
>> >>
>> >> And that is besides the full rewrite of some Vulkan files...
>> >>
>> >> This will take a long time to review.
>> >>
>> >
>> > I agree.
>> >
>>
>> The codec changes are simple and separate, so it's not that bad to read.
>> There's no more than a 100 lines or so added to the H264 parser.
>> The 72 patch figure may be scary, but that's because I didn't want to
>> squash my vulkan changes behind large rewrite commits. Otherwise,
>> this would be a 30-commit patchset.
>>
>
> some of these patches are very simple, yes. Others probably not (i only looked at
> the first few)
> That said i was never a friend of large last minute patch-sets before releases.
> I think the real deciding factor is the communities opinion and
> 3 Developers already are against this going in before the branching. So
> unless theres a shift in oppinion i think there are more people for applying
> this after branching release/6.0 than before.
>
> Is it an issue if its only in 6.1 in 6months ?
>
It's quite a long ways away, and this is by far the most complete
implementation of the spec which everyone is testing against.
This is the first vendor-independent, OS-independent and
hardware-independent video decoding API, and a lot of important
users like browsers and media players want to use it.
Not having it in this release would mean that all those users will
use Gstreamer instead, or nothing at all, instead choosing
proprietary libraries.
6.0 will still have at least a few days of testing after tagging, which
is enough to discover and fix major issues, if they're found.
A single dev so far has really suggested leaving it out of 6.0,
with a few others just saying that it'll take time to review,
but I think reviews are done by looking at code rather than
speculating, and at least one person has said they'll be able to
look at it tomorrow.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 3:43 [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding Lynne
2023-02-17 9:08 ` Jean-Baptiste Kempf
@ 2023-02-18 19:02 ` Michael Niedermayer
2023-02-19 0:08 ` Lynne
2023-02-20 16:51 ` Anton Khirnov
` (3 subsequent siblings)
5 siblings, 1 reply; 34+ messages in thread
From: Michael Niedermayer @ 2023-02-18 19:02 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 2869 bytes --]
On Fri, Feb 17, 2023 at 04:43:50AM +0100, Lynne wrote:
> This small patchset mostly rewrites Vulkan to enable using multiplane images,
> and implements video decode support. Also, many numerous bugs and issues
> were fixed, as well as having quite a lot of performance improvements.
>
> The patchset can be viewed here as well:
> https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
>
> Patches attached.
>
[...]
> av1dec.c | 3 +++
> avcodec.h | 5 +++++
> h264dec.c | 3 +++
> hevcdec.c | 3 +++
> vp8.c | 3 +++
> vp9.c | 3 +++
> 6 files changed, 20 insertions(+)
> 122f9df511e4680d0027afae5d4f9f2f1880874e 0065-avcodec-add-AVHWAccel.flush-callback.patch
> From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Fri, 6 Jan 2023 03:32:56 +0100
> Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback
this patch seems to break fate-vp8-size-change
==5117== Invalid read of size 8
==5117== at 0xD50598: vp8_decode_flush_impl (vp8.c:171)
==5117== by 0xD5ACB3: ff_vp8_decode_free (vp8.c:2869)
==5117== by 0x84CA73: avcodec_close (avcodec.c:448)
==5117== by 0x644BB5: avformat_find_stream_info (demux.c:2969)
==5117== by 0x243F70: ifile_open (ffmpeg_demux.c:985)
==5117== by 0x25F60D: open_files (ffmpeg_opt.c:1244)
==5117== by 0x25F7C4: ffmpeg_parse_options (ffmpeg_opt.c:1283)
==5117== by 0x278003: main (ffmpeg.c:4160)
==5117== Address 0x70 is not stack'd, malloc'd or (recently) free'd
==5117==
==5117==
==5117== Process terminating with default action of signal 11 (SIGSEGV)
==5117== Access not within mapped region at address 0x70
==5117== at 0xD50598: vp8_decode_flush_impl (vp8.c:171)
==5117== by 0xD5ACB3: ff_vp8_decode_free (vp8.c:2869)
==5117== by 0x84CA73: avcodec_close (avcodec.c:448)
==5117== by 0x644BB5: avformat_find_stream_info (demux.c:2969)
==5117== by 0x243F70: ifile_open (ffmpeg_demux.c:985)
==5117== by 0x25F60D: open_files (ffmpeg_opt.c:1244)
==5117== by 0x25F7C4: ffmpeg_parse_options (ffmpeg_opt.c:1283)
==5117== by 0x278003: main (ffmpeg.c:4160)
==5117== If you believe this happened as a result of a stack
==5117== overflow in your program's main thread (unlikely but
==5117== possible), you can try to increase the size of the
==5117== main thread stack using the --main-stacksize= flag.
==5117== The main thread stack size used in this run was 8388608.
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
If you drop bombs on a foreign country and kill a hundred thousand
innocent people, expect your government to call the consequence
"unprovoked inhuman terrorist attacks" and use it to justify dropping
more bombs and killing more people. The technology changed, the idea is old.
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-18 19:02 ` Michael Niedermayer
@ 2023-02-19 0:08 ` Lynne
2023-02-19 15:40 ` Michael Niedermayer
0 siblings, 1 reply; 34+ messages in thread
From: Lynne @ 2023-02-19 0:08 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 18, 2023, 20:03 by michael@niedermayer.cc:
> On Fri, Feb 17, 2023 at 04:43:50AM +0100, Lynne wrote:
>
>> This small patchset mostly rewrites Vulkan to enable using multiplane images,
>> and implements video decode support. Also, many numerous bugs and issues
>> were fixed, as well as having quite a lot of performance improvements.
>>
>> The patchset can be viewed here as well:
>> https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
>>
>> Patches attached.
>>
>
> [...]
>
>
>> av1dec.c | 3 +++
>> avcodec.h | 5 +++++
>> h264dec.c | 3 +++
>> hevcdec.c | 3 +++
>> vp8.c | 3 +++
>> vp9.c | 3 +++
>> 6 files changed, 20 insertions(+)
>> 122f9df511e4680d0027afae5d4f9f2f1880874e 0065-avcodec-add-AVHWAccel.flush-callback.patch
>> From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Fri, 6 Jan 2023 03:32:56 +0100
>> Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback
>>
>
> this patch seems to break fate-vp8-size-change
>
Thanks, philipl also reported this, fixed in my branch
https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 0:08 ` Lynne
@ 2023-02-19 15:40 ` Michael Niedermayer
2023-02-19 15:44 ` Kieran Kunhya
` (2 more replies)
0 siblings, 3 replies; 34+ messages in thread
From: Michael Niedermayer @ 2023-02-19 15:40 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 1807 bytes --]
Hi
On Sun, Feb 19, 2023 at 01:08:02AM +0100, Lynne wrote:
>
> Feb 18, 2023, 20:03 by michael@niedermayer.cc:
>
> > On Fri, Feb 17, 2023 at 04:43:50AM +0100, Lynne wrote:
> >
> >> This small patchset mostly rewrites Vulkan to enable using multiplane images,
> >> and implements video decode support. Also, many numerous bugs and issues
> >> were fixed, as well as having quite a lot of performance improvements.
> >>
> >> The patchset can be viewed here as well:
> >> https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
> >>
> >> Patches attached.
> >>
> >
> > [...]
> >
> >
> >> av1dec.c | 3 +++
> >> avcodec.h | 5 +++++
> >> h264dec.c | 3 +++
> >> hevcdec.c | 3 +++
> >> vp8.c | 3 +++
> >> vp9.c | 3 +++
> >> 6 files changed, 20 insertions(+)
> >> 122f9df511e4680d0027afae5d4f9f2f1880874e 0065-avcodec-add-AVHWAccel.flush-callback.patch
> >> From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Fri, 6 Jan 2023 03:32:56 +0100
> >> Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback
> >>
> >
> > this patch seems to break fate-vp8-size-change
> >
>
> Thanks, philipl also reported this, fixed in my branch
> https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
ok that works, that said
is there consensus that i should create the release branch "now"?
It seems no review is going on in public of these patches and we should do
the release "soon", i am asking as i dont want to just surprise anyone with
making the branch before giving a final call
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
The greatest way to live with honor in this world is to be what we pretend
to be. -- Socrates
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 15:40 ` Michael Niedermayer
@ 2023-02-19 15:44 ` Kieran Kunhya
2023-02-19 16:53 ` Lynne
[not found] ` <NOea74V--3-9@lynne.ee-NOeaB9K--R-9>
2 siblings, 0 replies; 34+ messages in thread
From: Kieran Kunhya @ 2023-02-19 15:44 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sun, 19 Feb 2023 at 15:40, Michael Niedermayer <michael@niedermayer.cc>
wrote:
> ok that works, that said
> is there consensus that i should create the release branch "now"?
> It seems no review is going on in public of these patches and we should do
> the release "soon", i am asking as i dont want to just surprise anyone with
> making the branch before giving a final call
>
> thx
>
Yes, Vulkan can wait for 6.1.
Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 15:40 ` Michael Niedermayer
2023-02-19 15:44 ` Kieran Kunhya
@ 2023-02-19 16:53 ` Lynne
2023-02-19 16:56 ` Jean-Baptiste Kempf
2023-02-19 18:50 ` Michael Niedermayer
[not found] ` <NOea74V--3-9@lynne.ee-NOeaB9K--R-9>
2 siblings, 2 replies; 34+ messages in thread
From: Lynne @ 2023-02-19 16:53 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 19, 2023, 16:40 by michael@niedermayer.cc:
> Hi
>
>
> On Sun, Feb 19, 2023 at 01:08:02AM +0100, Lynne wrote:
>
>>
>> Feb 18, 2023, 20:03 by michael@niedermayer.cc:
>>
>> > On Fri, Feb 17, 2023 at 04:43:50AM +0100, Lynne wrote:
>> >
>> >> This small patchset mostly rewrites Vulkan to enable using multiplane images,
>> >> and implements video decode support. Also, many numerous bugs and issues
>> >> were fixed, as well as having quite a lot of performance improvements.
>> >>
>> >> The patchset can be viewed here as well:
>> >> https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
>> >>
>> >> Patches attached.
>> >>
>> >
>> > [...]
>> >
>> >
>> >> av1dec.c | 3 +++
>> >> avcodec.h | 5 +++++
>> >> h264dec.c | 3 +++
>> >> hevcdec.c | 3 +++
>> >> vp8.c | 3 +++
>> >> vp9.c | 3 +++
>> >> 6 files changed, 20 insertions(+)
>> >> 122f9df511e4680d0027afae5d4f9f2f1880874e 0065-avcodec-add-AVHWAccel.flush-callback.patch
>> >> From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001
>> >> From: Lynne <dev@lynne.ee>
>> >> Date: Fri, 6 Jan 2023 03:32:56 +0100
>> >> Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback
>> >>
>> >
>> > this patch seems to break fate-vp8-size-change
>> >
>>
>> Thanks, philipl also reported this, fixed in my branch
>>
>> https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
>>
>
> ok that works, that said
> is there consensus that i should create the release branch "now"?
> It seems no review is going on in public of these patches and we should do
> the release "soon", i am asking as i dont want to just surprise anyone with
> making the branch before giving a final call
>
We need a few more days, folks are reviewing the patches
mainly on IRC and on github, since they're large.
Functionality is on-par with the current code, and it fixes
so much, apart from adding new features, I really don't want
anyone to use the old code.
The old code is so bad, even if this patchset is broken in
some ways, it would still be a big improvement over the old code.
Most of the code is code I maintain, and has been tested and
partially reviewed already by two developers who know Vulkan
and GPU code in general. I'd still like to have this in 6.0, so
I think what I should ask is if there are any objections to merging
this as-is, and fixing any issues during the testing period.
Otherwise, I'd like for 6.1 to be released no later than April.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 16:53 ` Lynne
@ 2023-02-19 16:56 ` Jean-Baptiste Kempf
2023-02-19 16:58 ` Lynne
2023-02-19 19:32 ` Niklas Haas
2023-02-19 18:50 ` Michael Niedermayer
1 sibling, 2 replies; 34+ messages in thread
From: Jean-Baptiste Kempf @ 2023-02-19 16:56 UTC (permalink / raw)
To: ffmpeg-devel
On Sun, 19 Feb 2023, at 17:53, Lynne wrote:
> Otherwise, I'd like for 6.1 to be released no later than April.
Sure, that would be the best solution, instead of rushing code right now.
And prepare proper testing for this, with numerous different workstations and laptops.
Best,
--
Jean-Baptiste Kempf - President
+33 672 704 734
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
[not found] ` <NOea74V--3-9@lynne.ee-NOeaB9K--R-9>
@ 2023-02-19 16:57 ` Lynne
2023-02-19 17:36 ` Kieran Kunhya
0 siblings, 1 reply; 34+ messages in thread
From: Lynne @ 2023-02-19 16:57 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 19, 2023, 17:54 by dev@lynne.ee:
> Feb 19, 2023, 16:40 by michael@niedermayer.cc:
>
>> Hi
>>
>>
>> On Sun, Feb 19, 2023 at 01:08:02AM +0100, Lynne wrote:
>>
>>>
>>> Feb 18, 2023, 20:03 by michael@niedermayer.cc:
>>>
>>> > On Fri, Feb 17, 2023 at 04:43:50AM +0100, Lynne wrote:
>>> >
>>> >> This small patchset mostly rewrites Vulkan to enable using multiplane images,
>>> >> and implements video decode support. Also, many numerous bugs and issues
>>> >> were fixed, as well as having quite a lot of performance improvements.
>>> >>
>>> >> The patchset can be viewed here as well:
>>> >> https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
>>> >>
>>> >> Patches attached.
>>> >>
>>> >
>>> > [...]
>>> >
>>> >
>>> >> av1dec.c | 3 +++
>>> >> avcodec.h | 5 +++++
>>> >> h264dec.c | 3 +++
>>> >> hevcdec.c | 3 +++
>>> >> vp8.c | 3 +++
>>> >> vp9.c | 3 +++
>>> >> 6 files changed, 20 insertions(+)
>>> >> 122f9df511e4680d0027afae5d4f9f2f1880874e 0065-avcodec-add-AVHWAccel.flush-callback.patch
>>> >> From 93223fa95389c60c015cfcee22784a1bf0fdb05b Mon Sep 17 00:00:00 2001
>>> >> From: Lynne <dev@lynne.ee>
>>> >> Date: Fri, 6 Jan 2023 03:32:56 +0100
>>> >> Subject: [PATCH 65/72] avcodec: add AVHWAccel.flush callback
>>> >>
>>> >
>>> > this patch seems to break fate-vp8-size-change
>>> >
>>>
>>> Thanks, philipl also reported this, fixed in my branch
>>>
>>> https://github.com/cyanreg/FFmpeg/tree/vulkan_staging
>>>
>>
>> ok that works, that said
>> is there consensus that i should create the release branch "now"?
>> It seems no review is going on in public of these patches and we should do
>> the release "soon", i am asking as i dont want to just surprise anyone with
>> making the branch before giving a final call
>>
>
> We need a few more days, folks are reviewing the patches
> mainly on IRC and on github, since they're large.
> Functionality is on-par with the current code, and it fixes
> so much, apart from adding new features, I really don't want
> anyone to use the old code.
> The old code is so bad, even if this patchset is broken in
> some ways, it would still be a big improvement over the old code.
>
> Most of the code is code I maintain, and has been tested and
> partially reviewed already by two developers who know Vulkan
> and GPU code in general. I'd still like to have this in 6.0, so
> I think what I should ask is if there are any objections to merging
> this as-is, and fixing any issues during the testing period.
>
> Otherwise, I'd like for 6.1 to be released no later than April.
>
Obviously, if we merge it now, and big enough issues are found
which we couldn't fix immediately, I'd have no problem reverting
the Vulkan patches from the 6.0 branch.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 16:56 ` Jean-Baptiste Kempf
@ 2023-02-19 16:58 ` Lynne
2023-02-19 17:02 ` Jean-Baptiste Kempf
2023-02-19 19:32 ` Niklas Haas
1 sibling, 1 reply; 34+ messages in thread
From: Lynne @ 2023-02-19 16:58 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 19, 2023, 17:57 by jb@videolan.org:
> On Sun, 19 Feb 2023, at 17:53, Lynne wrote:
>
>> Otherwise, I'd like for 6.1 to be released no later than April.
>>
>
> Sure, that would be the best solution, instead of rushing code right now.
>
> And prepare proper testing for this, with numerous different workstations and laptops.
>
I have properly tested it, 4 different configurations no less.
My AMD desktop with RADV, my 1080Ti with NVIDIA's beta drivers,
my Intel laptop with ANV, and my Intel laptop's 960M.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 16:58 ` Lynne
@ 2023-02-19 17:02 ` Jean-Baptiste Kempf
0 siblings, 0 replies; 34+ messages in thread
From: Jean-Baptiste Kempf @ 2023-02-19 17:02 UTC (permalink / raw)
To: ffmpeg-devel
On Sun, 19 Feb 2023, at 17:58, Lynne wrote:
> Feb 19, 2023, 17:57 by jb@videolan.org:
>
>> On Sun, 19 Feb 2023, at 17:53, Lynne wrote:
>>
>>> Otherwise, I'd like for 6.1 to be released no later than April.
>>>
>>
>> Sure, that would be the best solution, instead of rushing code right now.
>>
>> And prepare proper testing for this, with numerous different workstations and laptops.
>>
>
> I have properly tested it, 4 different configurations no less.
> My AMD desktop with RADV, my 1080Ti with NVIDIA's beta drivers,
> my Intel laptop with ANV, and my Intel laptop's 960M.
Not everyone has the same machines, distributions, OSes or even FFmpeg utlization.
--
Jean-Baptiste Kempf - President
+33 672 704 734
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 16:57 ` Lynne
@ 2023-02-19 17:36 ` Kieran Kunhya
2023-02-19 17:42 ` Kieran Kunhya
0 siblings, 1 reply; 34+ messages in thread
From: Kieran Kunhya @ 2023-02-19 17:36 UTC (permalink / raw)
To: FFmpeg development discussions and patches
>
> Obviously, if we merge it now, and big enough issues are found
> which we couldn't fix immediately, I'd have no problem reverting
> the Vulkan patches from the 6.0 branch.
A major LTS release is not your development sandbox.
Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 17:36 ` Kieran Kunhya
@ 2023-02-19 17:42 ` Kieran Kunhya
2023-02-19 18:46 ` Lynne
0 siblings, 1 reply; 34+ messages in thread
From: Kieran Kunhya @ 2023-02-19 17:42 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sun, 19 Feb 2023 at 17:36, Kieran Kunhya <kierank@obe.tv> wrote:
> Obviously, if we merge it now, and big enough issues are found
>> which we couldn't fix immediately, I'd have no problem reverting
>> the Vulkan patches from the 6.0 branch.
>
>
> A major LTS release is not your development sandbox.
>
> Kieran
>
Correction, 6.0 is not an LTS. Nonetheless, it's not your sandbox.
Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 17:42 ` Kieran Kunhya
@ 2023-02-19 18:46 ` Lynne
2023-02-19 21:59 ` Kieran Kunhya
0 siblings, 1 reply; 34+ messages in thread
From: Lynne @ 2023-02-19 18:46 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 19, 2023, 18:43 by kierank@obe.tv:
> On Sun, 19 Feb 2023 at 17:36, Kieran Kunhya <kierank@obe.tv> wrote:
>
>> Obviously, if we merge it now, and big enough issues are found
>>
>>> which we couldn't fix immediately, I'd have no problem reverting
>>> the Vulkan patches from the 6.0 branch.
>>>
>>
>>
>> A major LTS release is not your development sandbox.
>>
>> Kieran
>>
>
> Correction, 6.0 is not an LTS. Nonetheless, it's not your sandbox.
>
If new features don't go in, the project dies.
Everyone but seems to dislike new features.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 16:53 ` Lynne
2023-02-19 16:56 ` Jean-Baptiste Kempf
@ 2023-02-19 18:50 ` Michael Niedermayer
2023-02-19 19:02 ` Lynne
1 sibling, 1 reply; 34+ messages in thread
From: Michael Niedermayer @ 2023-02-19 18:50 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 603 bytes --]
On Sun, Feb 19, 2023 at 05:53:45PM +0100, Lynne wrote:
[...]
> Otherwise, I'd like for 6.1 to be released no later than April.
ok
release early, release often, that makes sense
but rush things, delay things can be probelmatic. It doesnt have to be
but it can. id much rather do another release than to rush and squeeze
this betwen all the constraints
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Freedom in capitalist society always remains about the same as it was in
ancient Greek republics: Freedom for slave owners. -- Vladimir Lenin
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 18:50 ` Michael Niedermayer
@ 2023-02-19 19:02 ` Lynne
2023-02-19 19:44 ` Michael Niedermayer
0 siblings, 1 reply; 34+ messages in thread
From: Lynne @ 2023-02-19 19:02 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 19, 2023, 19:51 by michael@niedermayer.cc:
> On Sun, Feb 19, 2023 at 05:53:45PM +0100, Lynne wrote:
> [...]
>
>> Otherwise, I'd like for 6.1 to be released no later than April.
>>
>
> ok
> release early, release often, that makes sense
> but rush things, delay things can be probelmatic. It doesnt have to be
> but it can. id much rather do another release than to rush and squeeze
> this betwen all the constraints
>
> thx
>
I wanted to push my dct deprecation patches, can I push them
to both 6.0 and master?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 16:56 ` Jean-Baptiste Kempf
2023-02-19 16:58 ` Lynne
@ 2023-02-19 19:32 ` Niklas Haas
1 sibling, 0 replies; 34+ messages in thread
From: Niklas Haas @ 2023-02-19 19:32 UTC (permalink / raw)
To: ffmpeg-devel
+1 on early 6.1 to get this in
On Sun, 19 Feb 2023 17:56:25 +0100 "Jean-Baptiste Kempf" <jb@videolan.org> wrote:
> On Sun, 19 Feb 2023, at 17:53, Lynne wrote:
> > Otherwise, I'd like for 6.1 to be released no later than April.
>
> Sure, that would be the best solution, instead of rushing code right now.
>
> And prepare proper testing for this, with numerous different workstations and laptops.
>
> Best,
>
> --
> Jean-Baptiste Kempf - President
> +33 672 704 734
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 19:02 ` Lynne
@ 2023-02-19 19:44 ` Michael Niedermayer
2023-02-19 20:00 ` Lynne
0 siblings, 1 reply; 34+ messages in thread
From: Michael Niedermayer @ 2023-02-19 19:44 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 999 bytes --]
On Sun, Feb 19, 2023 at 08:02:24PM +0100, Lynne wrote:
> Feb 19, 2023, 19:51 by michael@niedermayer.cc:
>
> > On Sun, Feb 19, 2023 at 05:53:45PM +0100, Lynne wrote:
> > [...]
> >
> >> Otherwise, I'd like for 6.1 to be released no later than April.
> >>
> >
> > ok
> > release early, release often, that makes sense
> > but rush things, delay things can be probelmatic. It doesnt have to be
> > but it can. id much rather do another release than to rush and squeeze
> > this betwen all the constraints
> >
> > thx
> >
>
> I wanted to push my dct deprecation patches, can I push them
> to both 6.0 and master?
i have no oppinion on this.
If the patches pass review and its before the release they could be
backported.
I think someone said it produces thoiusands of warnings though, that
may be non ideal
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
He who knows, does not speak. He who speaks, does not know. -- Lao Tsu
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 19:44 ` Michael Niedermayer
@ 2023-02-19 20:00 ` Lynne
2023-02-19 20:14 ` Michael Niedermayer
0 siblings, 1 reply; 34+ messages in thread
From: Lynne @ 2023-02-19 20:00 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Feb 19, 2023, 20:44 by michael@niedermayer.cc:
> On Sun, Feb 19, 2023 at 08:02:24PM +0100, Lynne wrote:
>
>> Feb 19, 2023, 19:51 by michael@niedermayer.cc:
>>
>> > On Sun, Feb 19, 2023 at 05:53:45PM +0100, Lynne wrote:
>> > [...]
>> >
>> >> Otherwise, I'd like for 6.1 to be released no later than April.
>> >>
>> >
>> > ok
>> > release early, release often, that makes sense
>> > but rush things, delay things can be probelmatic. It doesnt have to be
>> > but it can. id much rather do another release than to rush and squeeze
>> > this betwen all the constraints
>> >
>> > thx
>> >
>>
>> I wanted to push my dct deprecation patches, can I push them
>> to both 6.0 and master?
>>
>
> i have no oppinion on this.
> If the patches pass review and its before the release they could be
> backported.
> I think someone said it produces thoiusands of warnings though, that
> may be non ideal
>
There are no new warnings at all in v3 of the patch, only API
users will get warnings.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 20:00 ` Lynne
@ 2023-02-19 20:14 ` Michael Niedermayer
0 siblings, 0 replies; 34+ messages in thread
From: Michael Niedermayer @ 2023-02-19 20:14 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 1504 bytes --]
On Sun, Feb 19, 2023 at 09:00:19PM +0100, Lynne wrote:
> Feb 19, 2023, 20:44 by michael@niedermayer.cc:
>
> > On Sun, Feb 19, 2023 at 08:02:24PM +0100, Lynne wrote:
> >
> >> Feb 19, 2023, 19:51 by michael@niedermayer.cc:
> >>
> >> > On Sun, Feb 19, 2023 at 05:53:45PM +0100, Lynne wrote:
> >> > [...]
> >> >
> >> >> Otherwise, I'd like for 6.1 to be released no later than April.
> >> >>
> >> >
> >> > ok
> >> > release early, release often, that makes sense
> >> > but rush things, delay things can be probelmatic. It doesnt have to be
> >> > but it can. id much rather do another release than to rush and squeeze
> >> > this betwen all the constraints
> >> >
> >> > thx
> >> >
> >>
> >> I wanted to push my dct deprecation patches, can I push them
> >> to both 6.0 and master?
> >>
> >
> > i have no oppinion on this.
> > If the patches pass review and its before the release they could be
> > backported.
> > I think someone said it produces thoiusands of warnings though, that
> > may be non ideal
> >
>
> There are no new warnings at all in v3 of the patch, only API
> users will get warnings.
then i see no problem with it
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
If the United States is serious about tackling the national security threats
related to an insecure 5G network, it needs to rethink the extent to which it
values corporate profits and government espionage over security.-Bruce Schneier
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 18:46 ` Lynne
@ 2023-02-19 21:59 ` Kieran Kunhya
2023-02-19 23:50 ` Neal Gompa
0 siblings, 1 reply; 34+ messages in thread
From: Kieran Kunhya @ 2023-02-19 21:59 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sun, 19 Feb 2023 at 18:46, Lynne <dev@lynne.ee> wrote:
> Feb 19, 2023, 18:43 by kierank@obe.tv:
>
> > On Sun, 19 Feb 2023 at 17:36, Kieran Kunhya <kierank@obe.tv> wrote:
> >
> >> Obviously, if we merge it now, and big enough issues are found
> >>
> >>> which we couldn't fix immediately, I'd have no problem reverting
> >>> the Vulkan patches from the 6.0 branch.
> >>>
> >>
> >>
> >> A major LTS release is not your development sandbox.
> >>
> >> Kieran
> >>
> >
> > Correction, 6.0 is not an LTS. Nonetheless, it's not your sandbox.
> >
>
> If new features don't go in, the project dies.
> Everyone but seems to dislike new features.
>
Sure, then put your features in early in the dev cycle, not days before a
major release.
Kieran
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 21:59 ` Kieran Kunhya
@ 2023-02-19 23:50 ` Neal Gompa
2023-02-20 5:13 ` Jean-Baptiste Kempf
2023-02-20 9:18 ` Hendrik Leppkes
0 siblings, 2 replies; 34+ messages in thread
From: Neal Gompa @ 2023-02-19 23:50 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sun, Feb 19, 2023 at 5:00 PM Kieran Kunhya <kierank@obe.tv> wrote:
>
> On Sun, 19 Feb 2023 at 18:46, Lynne <dev@lynne.ee> wrote:
>
> > Feb 19, 2023, 18:43 by kierank@obe.tv:
> >
> > > On Sun, 19 Feb 2023 at 17:36, Kieran Kunhya <kierank@obe.tv> wrote:
> > >
> > >> Obviously, if we merge it now, and big enough issues are found
> > >>
> > >>> which we couldn't fix immediately, I'd have no problem reverting
> > >>> the Vulkan patches from the 6.0 branch.
> > >>>
> > >>
> > >>
> > >> A major LTS release is not your development sandbox.
> > >>
> > >> Kieran
> > >>
> > >
> > > Correction, 6.0 is not an LTS. Nonetheless, it's not your sandbox.
> > >
> >
> > If new features don't go in, the project dies.
> > Everyone but seems to dislike new features.
> >
>
> Sure, then put your features in early in the dev cycle, not days before a
> major release.
>
This is not a reasonable response, especially to someone who is
volunteering their time to develop features for a project. If it
misses 6.0, it sucks.
My concern though is that major version bumps are supposed to be
ABI/API breaks, if this patchset doesn't land in 6.0, does that mean
it'd have to be pushed off to 7.0 if it makes public API changes? If
that's the case, I'd rather request 6.0 to be delayed to get this to land.
--
真実はいつも一つ!/ Always, there's only one truth!
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 23:50 ` Neal Gompa
@ 2023-02-20 5:13 ` Jean-Baptiste Kempf
2023-02-20 9:18 ` Hendrik Leppkes
1 sibling, 0 replies; 34+ messages in thread
From: Jean-Baptiste Kempf @ 2023-02-20 5:13 UTC (permalink / raw)
To: ffmpeg-devel
On Mon, 20 Feb 2023, at 00:50, Neal Gompa wrote:
> This is not a reasonable response, especially to someone who is
> volunteering their time to develop features for a project. If it
> misses 6.0, it sucks.
>
> My concern though is that major version bumps are supposed to be
> ABI/API breaks, if this patchset doesn't land in 6.0, does that mean
> it'd have to be pushed off to 7.0 if it makes public API changes? If
> that's the case, I'd rather request 6.0 to be delayed to get this to land.
Adding an API call does not break the API. Adding an API can, in the large percentage of cases, be done without breaking ABI (Sure, it might means that the structures are not as beautiful as they should be, but...)
I don't see the issue here.
In general, adding big changes should be done at the beginning of a new cycle, not at the end.
jb
--
Jean-Baptiste Kempf - President
+33 672 704 734
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-19 23:50 ` Neal Gompa
2023-02-20 5:13 ` Jean-Baptiste Kempf
@ 2023-02-20 9:18 ` Hendrik Leppkes
1 sibling, 0 replies; 34+ messages in thread
From: Hendrik Leppkes @ 2023-02-20 9:18 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Mon, Feb 20, 2023 at 12:50 AM Neal Gompa <ngompa13@gmail.com> wrote:
>
> On Sun, Feb 19, 2023 at 5:00 PM Kieran Kunhya <kierank@obe.tv> wrote:
> >
> > On Sun, 19 Feb 2023 at 18:46, Lynne <dev@lynne.ee> wrote:
> >
> > > Feb 19, 2023, 18:43 by kierank@obe.tv:
> > >
> > > > On Sun, 19 Feb 2023 at 17:36, Kieran Kunhya <kierank@obe.tv> wrote:
> > > >
> > > >> Obviously, if we merge it now, and big enough issues are found
> > > >>
> > > >>> which we couldn't fix immediately, I'd have no problem reverting
> > > >>> the Vulkan patches from the 6.0 branch.
> > > >>>
> > > >>
> > > >>
> > > >> A major LTS release is not your development sandbox.
> > > >>
> > > >> Kieran
> > > >>
> > > >
> > > > Correction, 6.0 is not an LTS. Nonetheless, it's not your sandbox.
> > > >
> > >
> > > If new features don't go in, the project dies.
> > > Everyone but seems to dislike new features.
> > >
> >
> > Sure, then put your features in early in the dev cycle, not days before a
> > major release.
> >
>
> This is not a reasonable response, especially to someone who is
> volunteering their time to develop features for a project. If it
> misses 6.0, it sucks.
>
I think its quite reasonable to think of the stability and quality of
the release.
If a feature is rushed into a release and you get a buggy version of
it in 6.0, that also sucks - in fact it may suck more for those people
that were using those features this is going to replace.
- Hendrik
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 3:43 [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding Lynne
2023-02-17 9:08 ` Jean-Baptiste Kempf
2023-02-18 19:02 ` Michael Niedermayer
@ 2023-02-20 16:51 ` Anton Khirnov
2023-02-20 16:56 ` Anton Khirnov
` (2 subsequent siblings)
5 siblings, 0 replies; 34+ messages in thread
From: Anton Khirnov @ 2023-02-20 16:51 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-02-17 04:43:50)
> From a9ae85816dfaa8791f974348825fc8ba9209423d Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Thu, 10 Mar 2022 18:08:53 +0100
> Subject: [PATCH 05/72] h264_parser: expose idr_pic_id
>
> Vulkan needs it.
> ---
> libavcodec/h264_parse.h | 1 +
> libavcodec/h264_parser.c | 2 +-
> 2 files changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/h264_parse.h b/libavcodec/h264_parse.h
> index 4ee863df66..4ba0add4f2 100644
> --- a/libavcodec/h264_parse.h
> +++ b/libavcodec/h264_parse.h
> @@ -85,6 +85,7 @@ typedef struct H264POCContext {
> int delta_poc_bottom;
> int delta_poc[2];
> int frame_num;
> + int idr_pic_id;
> int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
> int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
> int frame_num_offset; ///< for POC type 2
> diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
> index 46134a1c48..1c330484c1 100644
> --- a/libavcodec/h264_parser.c
> +++ b/libavcodec/h264_parser.c
> @@ -432,7 +432,7 @@ static inline int parse_nal_units(AVCodecParserContext *s,
> }
>
> if (nal.type == H264_NAL_IDR_SLICE)
> - get_ue_golomb_long(&nal.gb); /* idr_pic_id */
> + p->poc.idr_pic_id = get_ue_golomb_long(&nal.gb); /* idr_pic_id */
I don't see how a parser change can be related to anything vulkan.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 3:43 [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding Lynne
` (2 preceding siblings ...)
2023-02-20 16:51 ` Anton Khirnov
@ 2023-02-20 16:56 ` Anton Khirnov
2023-02-20 17:21 ` Anton Khirnov
2023-02-20 17:40 ` Anton Khirnov
5 siblings, 0 replies; 34+ messages in thread
From: Anton Khirnov @ 2023-02-20 16:56 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-02-17 04:43:50)
> From 1279c6011c610fdb054cd9eea7a6f07c94f69f29 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 14 Dec 2022 00:09:08 +0100
> Subject: [PATCH 08/72] h264_ps: expose bit rate and CPB size fields
>
> ---
> libavcodec/h264_ps.c | 8 ++++----
> libavcodec/h264_ps.h | 4 ++++
> 2 files changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
> index d9df570718..fc8715876a 100644
> --- a/libavcodec/h264_ps.c
> +++ b/libavcodec/h264_ps.c
> @@ -113,12 +113,12 @@ static inline int decode_hrd_parameters(GetBitContext *gb, void *logctx,
> return AVERROR_INVALIDDATA;
> }
>
> - get_bits(gb, 4); /* bit_rate_scale */
> + sps->bit_rate_scale = get_bits(gb, 4);
> get_bits(gb, 4); /* cpb_size_scale */
> for (i = 0; i < cpb_count; i++) {
> - get_ue_golomb_long(gb); /* bit_rate_value_minus1 */
> - get_ue_golomb_long(gb); /* cpb_size_value_minus1 */
> - get_bits1(gb); /* cbr_flag */
> + sps->bit_rate_value[i] = get_ue_golomb_long(gb) + 1; /* bit_rate_value_minus1 + 1 */
> + sps->cpb_size_value[i] = get_ue_golomb_long(gb) + 1; /* cpb_size_value_minus1 + 1 */
> + sps->cpr_flag[i] = get_bits1(gb);
> }
> sps->initial_cpb_removal_delay_length = get_bits(gb, 5) + 1;
> sps->cpb_removal_delay_length = get_bits(gb, 5) + 1;
> diff --git a/libavcodec/h264_ps.h b/libavcodec/h264_ps.h
> index 906bab7214..03bd0227d6 100644
> --- a/libavcodec/h264_ps.h
> +++ b/libavcodec/h264_ps.h
> @@ -89,6 +89,10 @@ typedef struct SPS {
> int pic_struct_present_flag;
> int time_offset_length;
> int cpb_cnt; ///< See H.264 E.1.2
> + int bit_rate_scale;
> + uint32_t bit_rate_value[32]; ///< bit_rate_value_minus1 + 1
> + uint32_t cpb_size_value[32]; ///< cpb_size_value_minus1 + 1
> + uint8_t cpr_flag[32];
If only there was a way to store 32 flags in less than 32 bytes.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 3:43 [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding Lynne
` (3 preceding siblings ...)
2023-02-20 16:56 ` Anton Khirnov
@ 2023-02-20 17:21 ` Anton Khirnov
2023-02-20 17:40 ` Anton Khirnov
5 siblings, 0 replies; 34+ messages in thread
From: Anton Khirnov @ 2023-02-20 17:21 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-02-17 04:43:50)
> From 52ab3cd8d165a838be92189c87c54915efc1c7e5 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 11 Jan 2023 05:20:32 +0100
> Subject: [PATCH 10/72] h264dec: track picture_structure in H264Picture
>
> ---
> libavcodec/h264_picture.c | 1 +
> libavcodec/h264_slice.c | 1 +
> libavcodec/h264dec.h | 1 +
> 3 files changed, 3 insertions(+)
>
> diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
> index 2661ff4698..0348166c43 100644
> --- a/libavcodec/h264_picture.c
> +++ b/libavcodec/h264_picture.c
> @@ -80,6 +80,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
> dst->mbaff = src->mbaff;
> dst->field_picture = src->field_picture;
> dst->reference = src->reference;
> + dst->picture_structure = src->picture_structure;
> dst->recovered = src->recovered;
> dst->invalid_gap = src->invalid_gap;
> dst->sei_recovery_frame_cnt = src->sei_recovery_frame_cnt;
> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
> index 6188c74632..8ac66b343c 100644
> --- a/libavcodec/h264_slice.c
> +++ b/libavcodec/h264_slice.c
> @@ -491,6 +491,7 @@ static int h264_frame_start(H264Context *h)
> pic->reference = h->droppable ? 0 : h->picture_structure;
> pic->f->coded_picture_number = h->coded_picture_number++;
> pic->field_picture = h->picture_structure != PICT_FRAME;
> + pic->picture_structure = h->picture_structure;
How does this make sense? picture_structure in slice header tells you
whether you're currently decoding a frame or a field, but a decoded
H264Picture is always a full frame (i.e. two fields).
This code marks field-coded pictures as whatever the second coded field
was.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding
2023-02-17 3:43 [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding Lynne
` (4 preceding siblings ...)
2023-02-20 17:21 ` Anton Khirnov
@ 2023-02-20 17:40 ` Anton Khirnov
5 siblings, 0 replies; 34+ messages in thread
From: Anton Khirnov @ 2023-02-20 17:40 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-02-17 04:43:50)
> From 4645f1fb3249f8249fdebaf9b3edffc848b9af3c Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 14 Dec 2022 00:18:42 +0100
> Subject: [PATCH 17/72] hevc_ps: expose and parse scc range extension fields
>
> ---
> libavcodec/hevc.h | 2 ++
> libavcodec/hevc_ps.c | 63 ++++++++++++++++++++++++++++++++++++++++----
> libavcodec/hevc_ps.h | 26 ++++++++++++++++++
> 3 files changed, 86 insertions(+), 5 deletions(-)
>
> diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
> index 1804755327..913c7d4e2e 100644
> --- a/libavcodec/hevc.h
> +++ b/libavcodec/hevc.h
> @@ -154,6 +154,8 @@ enum {
> // get near that, though, so set a lower limit here with the maximum
> // possible value for 4K video (at most 135 16x16 Ctb rows).
> HEVC_MAX_ENTRY_POINT_OFFSETS = HEVC_MAX_TILE_COLUMNS * 135,
> +
> + HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE = 128,
> };
>
>
> diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
> index a740da9f82..b03f59efef 100644
> --- a/libavcodec/hevc_ps.c
> +++ b/libavcodec/hevc_ps.c
> @@ -856,7 +856,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
> int ret = 0;
> int log2_diff_max_min_transform_block_size;
> int bit_depth_chroma, start;
> - int i;
> + int i, j;
>
> // Coded parameters
>
> @@ -1077,9 +1077,12 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
> if (sps->vui_present)
> decode_vui(gb, avctx, apply_defdispwin, sps);
>
> - if (get_bits1(gb)) { // sps_extension_flag
> + sps->sps_extension_present_flag = get_bits1(gb);
> + if (sps->sps_extension_present_flag) { // sps_extension_flag
// department of redundancy department called
> sps->sps_range_extension_flag = get_bits1(gb);
> - skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7);
> + skip_bits(gb, 2);
> + sps->sps_scc_extension_flag = get_bits1(gb);
> + skip_bits(gb, 4);
> if (sps->sps_range_extension_flag) {
> sps->transform_skip_rotation_enabled_flag = get_bits1(gb);
> sps->transform_skip_context_enabled_flag = get_bits1(gb);
> @@ -1105,6 +1108,26 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
> av_log(avctx, AV_LOG_WARNING,
> "cabac_bypass_alignment_enabled_flag not yet implemented\n");
> }
> + if (sps->sps_scc_extension_flag) {
> + sps->sps_curr_pic_ref_enabled_flag = get_bits1(gb);
> + sps->palette_mode_enabled_flag = get_bits1(gb);
> + if (sps->palette_mode_enabled_flag) {
> + sps->palette_max_size = get_ue_golomb_long(gb);
> + sps->delta_palette_max_predictor_size = get_ue_golomb_long(gb);
> +
> + sps->sps_palette_predictor_initializer_present_flag = get_bits1(gb);
> + if (sps->sps_palette_predictor_initializer_present_flag) {
> + sps->sps_num_palette_predictor_initializer_minus1 = get_ue_golomb_long(gb);
> + for (i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) {
> + for (j = 0; j <= sps->sps_num_palette_predictor_initializer_minus1; j++)
> + sps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb);
> + }
> + }
> + }
> +
> + sps->motion_vector_resolution_control_idc = get_bits(gb, 2);
> + sps->intra_boundary_filtering_disable_flag = get_bits1(gb);
> + }
> }
> if (apply_defdispwin) {
> sps->output_window.left_offset += sps->vui.def_disp_win.left_offset;
> @@ -1446,7 +1469,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
> HEVCParamSets *ps)
> {
> HEVCSPS *sps = NULL;
> - int i, ret = 0;
> + int i, j, ret = 0;
> unsigned int pps_id = 0;
> ptrdiff_t nal_size;
> unsigned log2_parallel_merge_level_minus2;
> @@ -1664,11 +1687,41 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
> pps->pps_extension_present_flag = get_bits1(gb);
> if (pps->pps_extension_present_flag) {
> pps->pps_range_extensions_flag = get_bits1(gb);
> - skip_bits(gb, 7); // pps_extension_7bits
> + skip_bits(gb, 2);
> + pps->pps_scc_extension_flag = get_bits1(gb);
> + skip_bits(gb, 4);
> if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps->pps_range_extensions_flag) {
> if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
> goto err;
> }
> + if (pps->pps_scc_extension_flag) {
> + pps->pps_curr_pic_ref_enabled_flag = get_bits1(gb);
> + pps->residual_adaptive_colour_transform_enabled_flag = get_bits1(gb);
> +
> + if (pps->residual_adaptive_colour_transform_enabled_flag) {
> + pps->pps_slice_act_qp_offsets_present_flag = get_bits1(gb);
> + pps->pps_act_y_qp_offset_plus5 = get_se_golomb(gb);
> + pps->pps_act_cb_qp_offset_plus5 = get_se_golomb(gb);
> + pps->pps_act_cr_qp_offset_plus3 = get_se_golomb(gb);
> + }
> +
> + pps->pps_palette_predictor_initializer_present_flag = get_bits1(gb);
> + if (pps->pps_palette_predictor_initializer_present_flag) {
> + pps->pps_num_palette_predictor_initializer = get_ue_golomb_long(gb);
> + if (pps->pps_num_palette_predictor_initializer) {
> + pps->monochrome_palette_flag = get_bits1(gb);
> + pps->luma_bit_depth_entry_minus8 = get_ue_golomb_long(gb);
> +
> + if (!pps->monochrome_palette_flag)
> + pps->chroma_bit_depth_entry_minus8 = get_ue_golomb_long(gb);
> +
> + for (i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) {
> + for (j = 0; j < pps->pps_num_palette_predictor_initializer; j++)
You're allowed to declare loop variables inside loops now.
> + pps->palette_predictor_initializers[i][j] = get_ue_golomb_long(gb);
> + }
> + }
> + }
> + }
> }
>
> ret = setup_pps(avctx, gb, pps, sps);
> diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
> index 549e0bdf57..8dddf7ef8d 100644
> --- a/libavcodec/hevc_ps.h
> +++ b/libavcodec/hevc_ps.h
> @@ -210,6 +210,18 @@ typedef struct HEVCSPS {
> VUI vui;
> PTL ptl;
>
> + int sps_extension_present_flag;
> + int sps_scc_extension_flag;
> + int sps_curr_pic_ref_enabled_flag;
> + int palette_mode_enabled_flag;
> + uint8_t palette_max_size;
> + uint8_t delta_palette_max_predictor_size;
> + uint8_t motion_vector_resolution_control_idc;
> + uint8_t sps_num_palette_predictor_initializer_minus1;
> + int sps_palette_predictor_initializer_present_flag;
> + int intra_boundary_filtering_disable_flag;
> + uint16_t palette_predictor_initializers[3][HEVC_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE];
> +
> uint8_t scaling_list_enable_flag;
> int scaling_list_data_present_flag;
> ScalingList scaling_list;
> @@ -341,6 +353,20 @@ typedef struct HEVCPPS {
> uint8_t log2_sao_offset_scale_luma;
> uint8_t log2_sao_offset_scale_chroma;
>
> + int pps_scc_extension_flag;
> + int pps_curr_pic_ref_enabled_flag;
> + int residual_adaptive_colour_transform_enabled_flag;
> + int pps_slice_act_qp_offsets_present_flag;
> + int pps_palette_predictor_initializer_present_flag;
> + int pps_num_palette_predictor_initializer;
> + int monochrome_palette_flag;
> + int luma_bit_depth_entry_minus8;
> + int chroma_bit_depth_entry_minus8;
> + int pps_act_y_qp_offset_plus5;
> + int pps_act_cb_qp_offset_plus5;
> + int pps_act_cr_qp_offset_plus3;
All those pps_ and sps_ and prefices are redundant pleonasms and make the
code more cluttered.
The _plusminus100500 are quite annoying too.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 34+ messages in thread
end of thread, other threads:[~2023-02-20 17:41 UTC | newest]
Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-17 3:43 [FFmpeg-devel] [PATCH 00/72] Implement support for Vulkan multiplane images and video decoding Lynne
2023-02-17 9:08 ` Jean-Baptiste Kempf
2023-02-17 9:45 ` Hendrik Leppkes
2023-02-17 10:45 ` Lynne
2023-02-17 11:04 ` Kieran Kunhya
[not found] ` <CAK+ULv780c=z_dig_FAhPJ2poZ8u2_QOnnPUmV3SSiYoaQZ+tw@mail.gmail.com-NOU29aV----9>
2023-02-17 11:52 ` Lynne
2023-02-17 15:45 ` Michael Niedermayer
2023-02-17 16:35 ` Lynne
2023-02-18 19:02 ` Michael Niedermayer
2023-02-19 0:08 ` Lynne
2023-02-19 15:40 ` Michael Niedermayer
2023-02-19 15:44 ` Kieran Kunhya
2023-02-19 16:53 ` Lynne
2023-02-19 16:56 ` Jean-Baptiste Kempf
2023-02-19 16:58 ` Lynne
2023-02-19 17:02 ` Jean-Baptiste Kempf
2023-02-19 19:32 ` Niklas Haas
2023-02-19 18:50 ` Michael Niedermayer
2023-02-19 19:02 ` Lynne
2023-02-19 19:44 ` Michael Niedermayer
2023-02-19 20:00 ` Lynne
2023-02-19 20:14 ` Michael Niedermayer
[not found] ` <NOea74V--3-9@lynne.ee-NOeaB9K--R-9>
2023-02-19 16:57 ` Lynne
2023-02-19 17:36 ` Kieran Kunhya
2023-02-19 17:42 ` Kieran Kunhya
2023-02-19 18:46 ` Lynne
2023-02-19 21:59 ` Kieran Kunhya
2023-02-19 23:50 ` Neal Gompa
2023-02-20 5:13 ` Jean-Baptiste Kempf
2023-02-20 9:18 ` Hendrik Leppkes
2023-02-20 16:51 ` Anton Khirnov
2023-02-20 16:56 ` Anton Khirnov
2023-02-20 17:21 ` Anton Khirnov
2023-02-20 17:40 ` Anton Khirnov
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git