* [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
@ 2023-04-24 15:56 Lynne
2023-04-28 13:28 ` Niklas Haas
` (15 more replies)
0 siblings, 16 replies; 49+ messages in thread
From: Lynne @ 2023-04-24 15:56 UTC (permalink / raw)
To: Ffmpeg Devel
[-- Attachment #1: Type: text/plain, Size: 143 bytes --]
This is part two of the vulkan patchset, which contains all the
hwcontext and vulkan.c rewrites, and filtering changes.
55 patches attached.
[-- Attachment #2: 0021-lavu-add-12-bit-2-plane-422-and-444-pixel-formats.patch --]
[-- Type: text/x-diff, Size: 5054 bytes --]
From 01b176369d0480b8b15d405cc4a357bdecb3522d Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sat, 25 Feb 2023 09:34:54 +0100
Subject: [PATCH 21/97] lavu: add 12-bit 2-plane 422 and 444 pixel formats
---
libavutil/pixdesc.c | 48 +++++++++++++++++++++++++++++++++++++++++
libavutil/pixfmt.h | 8 +++++++
tests/ref/fate/imgutils | 4 ++++
3 files changed, 60 insertions(+)
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 62a2ae08d9..e1e0dd2a9e 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -2717,6 +2717,54 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
.flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT |
AV_PIX_FMT_FLAG_ALPHA,
},
+ [AV_PIX_FMT_P212BE] = {
+ .name = "p212be",
+ .nb_components = 3,
+ .log2_chroma_w = 1,
+ .log2_chroma_h = 0,
+ .comp = {
+ { 0, 2, 0, 4, 12 }, /* Y */
+ { 1, 4, 0, 4, 12 }, /* U */
+ { 1, 4, 2, 4, 12 }, /* V */
+ },
+ .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE,
+ },
+ [AV_PIX_FMT_P212LE] = {
+ .name = "p212le",
+ .nb_components = 3,
+ .log2_chroma_w = 1,
+ .log2_chroma_h = 0,
+ .comp = {
+ { 0, 2, 0, 4, 12 }, /* Y */
+ { 1, 4, 0, 4, 12 }, /* U */
+ { 1, 4, 2, 4, 12 }, /* V */
+ },
+ .flags = AV_PIX_FMT_FLAG_PLANAR,
+ },
+ [AV_PIX_FMT_P412BE] = {
+ .name = "p412be",
+ .nb_components = 3,
+ .log2_chroma_w = 0,
+ .log2_chroma_h = 0,
+ .comp = {
+ { 0, 2, 0, 4, 12 }, /* Y */
+ { 1, 4, 0, 4, 12 }, /* U */
+ { 1, 4, 2, 4, 12 }, /* V */
+ },
+ .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE,
+ },
+ [AV_PIX_FMT_P412LE] = {
+ .name = "p412le",
+ .nb_components = 3,
+ .log2_chroma_w = 0,
+ .log2_chroma_h = 0,
+ .comp = {
+ { 0, 2, 0, 4, 12 }, /* Y */
+ { 1, 4, 0, 4, 12 }, /* U */
+ { 1, 4, 2, 4, 12 }, /* V */
+ },
+ .flags = AV_PIX_FMT_FLAG_PLANAR,
+ },
};
static const char * const color_range_names[] = {
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index 37c2c79e01..63e07ba64f 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -420,6 +420,12 @@ enum AVPixelFormat {
AV_PIX_FMT_RGBAF32BE, ///< IEEE-754 single precision packed RGBA 32:32:32:32, 128bpp, RGBARGBA..., big-endian
AV_PIX_FMT_RGBAF32LE, ///< IEEE-754 single precision packed RGBA 32:32:32:32, 128bpp, RGBARGBA..., little-endian
+ AV_PIX_FMT_P212BE, ///< interleaved chroma YUV 4:2:2, 24bpp, data in the high bits, big-endian
+ AV_PIX_FMT_P212LE, ///< interleaved chroma YUV 4:2:2, 24bpp, data in the high bits, little-endian
+
+ AV_PIX_FMT_P412BE, ///< interleaved chroma YUV 4:4:4, 36bpp, data in the high bits, big-endian
+ AV_PIX_FMT_P412LE, ///< interleaved chroma YUV 4:4:4, 36bpp, data in the high bits, little-endian
+
AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
};
@@ -518,6 +524,8 @@ enum AVPixelFormat {
#define AV_PIX_FMT_P210 AV_PIX_FMT_NE(P210BE, P210LE)
#define AV_PIX_FMT_P410 AV_PIX_FMT_NE(P410BE, P410LE)
+#define AV_PIX_FMT_P212 AV_PIX_FMT_NE(P212BE, P212LE)
+#define AV_PIX_FMT_P412 AV_PIX_FMT_NE(P412BE, P412LE)
#define AV_PIX_FMT_P216 AV_PIX_FMT_NE(P216BE, P216LE)
#define AV_PIX_FMT_P416 AV_PIX_FMT_NE(P416BE, P416LE)
diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils
index e79ec7e4b3..02a755f2b7 100644
--- a/tests/ref/fate/imgutils
+++ b/tests/ref/fate/imgutils
@@ -262,3 +262,7 @@ rgbf32be planes: 1, linesizes: 768 0 0 0, plane_sizes: 36864 0
rgbf32le planes: 1, linesizes: 768 0 0 0, plane_sizes: 36864 0 0 0, plane_offsets: 0 0 0, total_size: 36864
rgbaf32be planes: 1, linesizes: 1024 0 0 0, plane_sizes: 49152 0 0 0, plane_offsets: 0 0 0, total_size: 49152
rgbaf32le planes: 1, linesizes: 1024 0 0 0, plane_sizes: 49152 0 0 0, plane_offsets: 0 0 0, total_size: 49152
+p212be planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 6144 0 0, plane_offsets: 6144 0 0, total_size: 12288
+p212le planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 6144 0 0, plane_offsets: 6144 0 0, total_size: 12288
+p412be planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432
+p412le planes: 2, linesizes: 128 256 0 0, plane_sizes: 6144 12288 0 0, plane_offsets: 6144 0 0, total_size: 18432
--
2.40.0
[-- Attachment #3: 0022-lsws-add-in-out-support-for-the-new-12-bit-2-plane-4.patch --]
[-- Type: text/x-diff, Size: 3501 bytes --]
From 82997b1b0a6c6e2dfb63865028ba17a1ef7ae8de Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sat, 25 Feb 2023 09:36:58 +0100
Subject: [PATCH 22/97] lsws: add in/out support for the new 12-bit 2-plane 422
and 444 pixfmts
---
libswscale/input.c | 8 ++++++++
libswscale/utils.c | 4 ++++
tests/ref/fate/sws-pixdesc-query | 26 ++++++++++++++++++++++++++
3 files changed, 38 insertions(+)
diff --git a/libswscale/input.c b/libswscale/input.c
index d5676062a2..41795c636e 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1452,9 +1452,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
c->chrToYV12 = p010BEToUV_c;
break;
case AV_PIX_FMT_P012LE:
+ case AV_PIX_FMT_P212LE:
+ case AV_PIX_FMT_P412LE:
c->chrToYV12 = p012LEToUV_c;
break;
case AV_PIX_FMT_P012BE:
+ case AV_PIX_FMT_P212BE:
+ case AV_PIX_FMT_P412BE:
c->chrToYV12 = p012BEToUV_c;
break;
case AV_PIX_FMT_P016LE:
@@ -1944,9 +1948,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
c->lumToYV12 = p010BEToY_c;
break;
case AV_PIX_FMT_P012LE:
+ case AV_PIX_FMT_P212LE:
+ case AV_PIX_FMT_P412LE:
c->lumToYV12 = p012LEToY_c;
break;
case AV_PIX_FMT_P012BE:
+ case AV_PIX_FMT_P212BE:
+ case AV_PIX_FMT_P412BE:
c->lumToYV12 = p012BEToY_c;
break;
case AV_PIX_FMT_GRAYF32LE:
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 925c536bf1..a3a7a40750 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -248,8 +248,12 @@ static const FormatEntry format_entries[] = {
[AV_PIX_FMT_X2BGR10LE] = { 1, 1 },
[AV_PIX_FMT_P210BE] = { 1, 1 },
[AV_PIX_FMT_P210LE] = { 1, 1 },
+ [AV_PIX_FMT_P212BE] = { 1, 1 },
+ [AV_PIX_FMT_P212LE] = { 1, 1 },
[AV_PIX_FMT_P410BE] = { 1, 1 },
[AV_PIX_FMT_P410LE] = { 1, 1 },
+ [AV_PIX_FMT_P412BE] = { 1, 1 },
+ [AV_PIX_FMT_P412LE] = { 1, 1 },
[AV_PIX_FMT_P216BE] = { 1, 1 },
[AV_PIX_FMT_P216LE] = { 1, 1 },
[AV_PIX_FMT_P416BE] = { 1, 1 },
diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query
index 14156a383c..fd7f2aefc0 100644
--- a/tests/ref/fate/sws-pixdesc-query
+++ b/tests/ref/fate/sws-pixdesc-query
@@ -67,8 +67,12 @@ isNBPS:
p012le
p210be
p210le
+ p212be
+ p212le
p410be
p410le
+ p412be
+ p412le
x2bgr10be
x2bgr10le
x2rgb10be
@@ -160,8 +164,10 @@ isBE:
p012be
p016be
p210be
+ p212be
p216be
p410be
+ p412be
p416be
rgb444be
rgb48be
@@ -226,10 +232,14 @@ isYUV:
p016le
p210be
p210le
+ p212be
+ p212le
p216be
p216le
p410be
p410le
+ p412be
+ p412le
p416be
p416le
uyvy422
@@ -338,10 +348,14 @@ isPlanarYUV:
p016le
p210be
p210le
+ p212be
+ p212le
p216be
p216le
p410be
p410le
+ p412be
+ p412le
p416be
p416le
yuv410p
@@ -431,10 +445,14 @@ isSemiPlanarYUV:
p016le
p210be
p210le
+ p212be
+ p212le
p216be
p216le
p410be
p410le
+ p412be
+ p412le
p416be
p416le
@@ -853,10 +871,14 @@ Planar:
p016le
p210be
p210le
+ p212be
+ p212le
p216be
p216le
p410be
p410le
+ p412be
+ p412le
p416be
p416le
yuv410p
@@ -1029,8 +1051,12 @@ DataInHighBits:
p012le
p210be
p210le
+ p212be
+ p212le
p410be
p410le
+ p412be
+ p412le
xv36be
xv36le
xyz12be
--
2.40.0
[-- Attachment #4: 0023-hwcontext_vulkan-initialize-and-require-instance-ver.patch --]
[-- Type: text/x-diff, Size: 2363 bytes --]
From 1de5bf4281b19847fc45556431850d772180269e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:15:04 +0100
Subject: [PATCH 23/97] hwcontext_vulkan: initialize and require instance
version 1.3
---
configure | 4 ++--
libavutil/hwcontext_vulkan.c | 2 +-
libavutil/hwcontext_vulkan.h | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/configure b/configure
index 549ed1401c..b3732dabe9 100755
--- a/configure
+++ b/configure
@@ -7015,8 +7015,8 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.
"in maintaining it."
if enabled vulkan; then
- check_pkg_config_header_only vulkan "vulkan >= 1.2.189" "vulkan/vulkan.h" "defined VK_VERSION_1_2" ||
- check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_3) || (defined(VK_VERSION_1_2) && VK_HEADER_VERSION >= 189)"
+ check_pkg_config_header_only vulkan "vulkan >= 1.3.238" "vulkan/vulkan.h" "defined VK_VERSION_1_3" ||
+ check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) || (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 238)"
fi
if enabled x86; then
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index ffd4f5dec4..4185fb6110 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -673,7 +673,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
VkApplicationInfo application_info = {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
.pEngineName = "libavutil",
- .apiVersion = VK_API_VERSION_1_2,
+ .apiVersion = VK_API_VERSION_1_3,
.engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
LIBAVUTIL_VERSION_MINOR,
LIBAVUTIL_VERSION_MICRO),
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index df86c85b3c..70c8379dc3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -53,7 +53,7 @@ typedef struct AVVulkanDeviceContext {
PFN_vkGetInstanceProcAddr get_proc_addr;
/**
- * Vulkan instance. Must be at least version 1.2.
+ * Vulkan instance. Must be at least version 1.3.
*/
VkInstance inst;
--
2.40.0
[-- Attachment #5: 0024-hwcontext_vulkan-enable-support-for-YCbCr-samplers.patch --]
[-- Type: text/x-diff, Size: 1833 bytes --]
From fa2f04e1db708cabaeedd8ee72aad125cdc5aff7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 02:37:14 +0100
Subject: [PATCH 24/97] hwcontext_vulkan: enable support for YCbCr samplers
---
libavutil/hwcontext_vulkan.c | 1 +
libavutil/vulkan_functions.h | 2 ++
2 files changed, 3 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 4185fb6110..f8cc2d9dbd 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1378,6 +1378,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
goto end;
}
p->device_features_1_2.timelineSemaphore = 1;
+ p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
/* Setup queue family */
if ((err = setup_queue_families(ctx, &dev_info)))
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index d15a5d9a42..deb77495a2 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -155,6 +155,8 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipeline) \
\
/* Sampler */ \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSamplerYcbcrConversion) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \
\
--
2.40.0
[-- Attachment #6: 0025-hwcontext_vulkan-enable-VK_KHR_synchronization2-if-s.patch --]
[-- Type: text/x-diff, Size: 5364 bytes --]
From 69fdfe279d87d9e95af72f15cdaead471e8cb611 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 13 Mar 2022 09:06:06 +0100
Subject: [PATCH 25/97] hwcontext_vulkan: enable VK_KHR_synchronization2 if
supported
---
libavutil/hwcontext_vulkan.c | 17 +++++++++++++----
libavutil/vulkan_functions.h | 6 +++++-
2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index f8cc2d9dbd..894b4b83f3 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -89,6 +89,7 @@ typedef struct VulkanDevicePriv {
/* Features */
VkPhysicalDeviceVulkan11Features device_features_1_1;
VkPhysicalDeviceVulkan12Features device_features_1_2;
+ VkPhysicalDeviceVulkan13Features device_features_1_3;
/* Queues */
uint32_t qfs[5];
@@ -346,7 +347,7 @@ static const VulkanOptExtension optional_device_exts[] = {
/* Misc or required by other extensions */
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
- { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
+ { VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1326,9 +1327,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
+ VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
+ .pNext = &timeline_features,
+ };
VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
- .pNext = &timeline_features,
+ .pNext = &dev_features_1_3,
};
VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
@@ -1340,8 +1345,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
};
VkDeviceCreateInfo dev_info = {
- .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
- .pNext = &hwctx->device_features,
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
};
hwctx->device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
@@ -1349,6 +1353,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->device_features_1_1.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
p->device_features_1_1.pNext = &p->device_features_1_2;
p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
+ p->device_features_1_2.pNext = &p->device_features_1_3;
+ p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
ctx->free = vulkan_device_free;
/* Create an instance if not given one */
@@ -1379,6 +1385,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
}
p->device_features_1_2.timelineSemaphore = 1;
p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
+ p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+
+ dev_info.pNext = &hwctx->device_features;
/* Setup queue family */
if ((err = setup_queue_families(ctx, &dev_info)))
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index deb77495a2..103bff3013 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -37,6 +37,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_EXTERNAL_WIN32_MEMORY = 1ULL << 6, /* VK_KHR_external_memory_win32 */
FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */
#endif
+ FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -145,7 +146,10 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \
- \
+ \
+ /* sync2 */ \
+ MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \
+ \
/* Pipeline */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \
--
2.40.0
[-- Attachment #7: 0026-hwcontext_vulkan-support-threadsafe-queue-and-frame-.patch --]
[-- Type: text/x-diff, Size: 19170 bytes --]
From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 15 Mar 2022 23:00:32 +0100
Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
operations
---
libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
libavutil/hwcontext_vulkan.h | 40 +++++++-
2 files changed, 167 insertions(+), 49 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 894b4b83f3..b0db59b2d8 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -27,6 +27,7 @@
#include <dlfcn.h>
#endif
+#include <pthread.h>
#include <unistd.h>
#include "config.h"
@@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceVulkan13Features device_features_1_3;
/* Queues */
- uint32_t qfs[5];
- int num_qfs;
+ pthread_mutex_t **qf_mutex;
+ int nb_tot_qfs;
+ uint32_t img_qfs[5];
+ int nb_img_qfs;
/* Debug callback */
VkDebugUtilsMessengerEXT debug_ctx;
@@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
} VulkanFramesPriv;
typedef struct AVVkFrameInternal {
+ pthread_mutex_t update_mutex;
+
#if CONFIG_CUDA
/* Importing external memory into cuda is really expensive so we keep the
* memory imported all the time */
@@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
if (p->libvulkan)
dlclose(p->libvulkan);
+ for (int i = 0; i < p->nb_tot_qfs; i++)
+ av_freep(&p->qf_mutex[i]);
+ av_freep(&p->qf_mutex);
+
RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
}
@@ -1436,13 +1445,26 @@ end:
return err;
}
+static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
+}
+
+static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
+}
+
static int vulkan_device_init(AVHWDeviceContext *ctx)
{
int err;
- uint32_t queue_num;
+ uint32_t qf_num;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ VkQueueFamilyProperties *qf;
int graph_index, comp_index, tx_index, enc_index, dec_index;
/* Set device extension flags */
@@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
- if (!queue_num) {
+ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
+ if (!qf_num) {
av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
return AVERROR_EXTERNAL;
}
+ qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
+ if (!qf)
+ return AVERROR(ENOMEM);
+
+ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
+
+ p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
+ if (!p->qf_mutex)
+ return AVERROR(ENOMEM);
+ p->nb_tot_qfs = qf_num;
+
+ for (int i = 0; i < qf_num; i++) {
+ p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
+ if (!p->qf_mutex[i])
+ return AVERROR(ENOMEM);
+ for (int j = 0; j < qf[i].queueCount; j++)
+ pthread_mutex_init(&p->qf_mutex[i][j], NULL);
+ }
+
graph_index = hwctx->queue_family_index;
comp_index = hwctx->queue_family_comp_index;
tx_index = hwctx->queue_family_tx_index;
@@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
return AVERROR(EINVAL); \
} else if (fidx < 0 || ctx_qf < 0) { \
break; \
- } else if (ctx_qf >= queue_num) { \
+ } else if (ctx_qf >= qf_num) { \
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
- type, ctx_qf, queue_num); \
+ type, ctx_qf, qf_num); \
return AVERROR(EINVAL); \
} \
\
@@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
- p->qfs[p->num_qfs++] = ctx_qf; \
+ p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
} while (0)
CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
@@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
#undef CHECK_QUEUE
+ if (!hwctx->lock_queue)
+ hwctx->lock_queue = lock_queue;
+ if (!hwctx->unlock_queue)
+ hwctx->unlock_queue = unlock_queue;
+
/* Get device capabilities */
vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
@@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
{
AVVkFrameInternal *internal = f->internal;
- if (!internal)
- return;
-
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
@@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
VkAccessFlags new_access;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -1944,6 +1989,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
};
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
+
+ if ((err = wait_start_exec_ctx(hwfc, ectx)))
+ return err;
+
+ vkfc->lock_frame(hwfc, frame);
+
for (int i = 0; i < planes; i++) {
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
sem_sig_val[i] = frame->sem_value[i] + 1;
@@ -1980,9 +2031,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break;
}
- if ((err = wait_start_exec_ctx(hwfc, ectx)))
- return err;
-
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
@@ -2008,7 +2056,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, NULL, 0, NULL, planes, img_bar);
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ vkfc->unlock_frame(hwfc, frame);
+
+ return err;
}
static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
@@ -2090,10 +2141,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
@@ -2117,6 +2168,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
return AVERROR_EXTERNAL;
}
+ f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@@ -2161,10 +2213,10 @@ static void try_export_flags(AVHWFramesContext *hwfc,
VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
.pNext = NULL,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
VkPhysicalDeviceExternalImageFormatInfo enext = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
@@ -2259,6 +2311,16 @@ fail:
return NULL;
}
+static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+ pthread_mutex_lock(&vkf->internal->update_mutex);
+}
+
+static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+ pthread_mutex_unlock(&vkf->internal->update_mutex);
+}
+
static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
{
VulkanFramesPriv *fp = hwfc->internal->priv;
@@ -2421,6 +2483,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return AVERROR(ENOMEM);
}
+ if (!hwctx->lock_frame)
+ hwctx->lock_frame = lock_frame;
+ if (!hwctx->unlock_frame)
+ hwctx->unlock_frame = unlock_frame;
+
return 0;
}
@@ -2727,10 +2794,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
/* Image format verification */
@@ -2809,6 +2876,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
* offer us anything we could import and sync with, so instead
* just signal the semaphore we created. */
+ f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@@ -3017,20 +3085,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
CU_AD_FORMAT_UNSIGNED_INT8;
dst_f = (AVVkFrame *)frame->data[0];
-
dst_int = dst_f->internal;
- if (!dst_int || !dst_int->cuda_fc_ref) {
- if (!dst_f->internal)
- dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
-
- if (!dst_int)
- return AVERROR(ENOMEM);
+ if (!dst_int->cuda_fc_ref) {
dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
- if (!dst_int->cuda_fc_ref) {
- av_freep(&dst_f->internal);
+ if (!dst_int->cuda_fc_ref)
return AVERROR(ENOMEM);
- }
for (int i = 0; i < planes; i++) {
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
@@ -3704,13 +3764,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
return err;
}
-static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
+static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
AVBufferRef **bufs, size_t *buf_offsets,
const int *buf_stride, int w,
int h, enum AVPixelFormat pix_fmt, int to_buf)
{
int err;
AVVkFrame *frame = (AVVkFrame *)f->data[0];
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
@@ -3745,11 +3806,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
.waitSemaphoreCount = planes,
};
- for (int i = 0; i < planes; i++)
- sem_signal_values[i] = frame->sem_value[i] + 1;
+ vkfc->lock_frame(hwfc, frame);
if ((err = wait_start_exec_ctx(hwfc, ectx)))
- return err;
+ goto end;
+
+ for (int i = 0; i < planes; i++)
+ sem_signal_values[i] = frame->sem_value[i] + 1;
/* Change the image layout to something more optimal for transfers */
for (int i = 0; i < planes; i++) {
@@ -3824,14 +3887,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
if (!f->buf[ref])
break;
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
- return err;
+ goto end;
}
if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
- return err;
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
+ goto end;
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
} else {
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
}
+
+end:
+ vkfc->unlock_frame(hwfc, frame);
+ return err;
}
static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
@@ -3960,8 +4027,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
}
/* Copy buffers into/from image */
- err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
- swf->width, swf->height, swf->format, from);
+ err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
+ tmp.linesize, swf->width, swf->height, swf->format,
+ from);
if (from) {
/* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
@@ -4142,7 +4210,19 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
AVVkFrame *av_vk_frame_alloc(void)
{
- return av_mallocz(sizeof(AVVkFrame));
+ AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
+ if (!f)
+ return NULL;
+
+ f->internal = av_mallocz(sizeof(*f->internal));
+ if (!f->internal) {
+ av_free(f);
+ return NULL;
+ }
+
+ pthread_mutex_init(&f->internal->update_mutex, NULL);
+
+ return f;
}
const HWContextType ff_hwcontext_type_vulkan = {
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 70c8379dc3..406d8709c3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -27,6 +27,8 @@
#include "pixfmt.h"
#include "frame.h"
+typedef struct AVVkFrame AVVkFrame;
+
/**
* @file
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext {
*/
int queue_family_decode_index;
int nb_decode_queues;
+
+ /**
+ * Locks a queue, preventing other threads from submitting any command
+ * buffers to this queue.
+ * If set to NULL, will be set to lavu-internal functions that utilize a
+ * mutex.
+ */
+ void (*lock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index);
+
+ /**
+ * Similar to lock_queue(), unlocks a queue. Must only be called after locking.
+ */
+ void (*unlock_queue)(struct AVHWDeviceContext *ctx, int queue_family, int index);
} AVVulkanDeviceContext;
/**
@@ -195,6 +210,23 @@ typedef struct AVVulkanFramesContext {
* av_hwframe_ctx_init().
*/
AVVkFrameFlags flags;
+
+ /**
+ * Locks a frame, preventing other threads from changing frame properties.
+ * If set to NULL, will be set to lavu-internal functions that utilize a
+ * mutex.
+ * Users SHOULD only ever lock just before command submission in order
+ * to get accurate frame properties, and unlock immediately after command
+ * submission without waiting for it to finish.
+ *
+ * If unset, will be set to lavu-internal functions that utilize a mutex.
+ */
+ void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
+
+ /**
+ * Similar to lock_frame(), unlocks a frame. Must only be called after locking.
+ */
+ void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
} AVVulkanFramesContext;
/*
@@ -210,7 +242,7 @@ typedef struct AVVulkanFramesContext {
* @note the size of this structure is not part of the ABI, to allocate
* you must use @av_vk_frame_alloc().
*/
-typedef struct AVVkFrame {
+struct AVVkFrame {
/**
* Vulkan images to which the memory is bound to.
*/
@@ -264,6 +296,12 @@ typedef struct AVVkFrame {
* Describes the binding offset of each plane to the VkDeviceMemory.
*/
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
+
+ /**
+ * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
+ * the image was allocated with the CONCURRENT concurrency option.
+ */
+ uint32_t queue_family[AV_NUM_DATA_POINTERS];
} AVVkFrame;
/**
--
2.40.0
[-- Attachment #8: 0027-hwcontext_vulkan-remove-contiguous-memory-hack.patch --]
[-- Type: text/x-diff, Size: 2600 bytes --]
From c50347a552f5c7c2e3fcf20ef9a1ad4f1a419918 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 20:32:49 +0100
Subject: [PATCH 27/97] hwcontext_vulkan: remove contiguous memory hack
---
libavutil/hwcontext_vulkan.c | 12 ------------
libavutil/hwcontext_vulkan.h | 4 +---
2 files changed, 1 insertion(+), 15 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index b0db59b2d8..67b4357dd1 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -112,9 +112,6 @@ typedef struct VulkanDevicePriv {
/* Nvidia */
int dev_is_nvidia;
-
- /* Intel */
- int dev_is_intel;
} VulkanDevicePriv;
typedef struct VulkanFramesPriv {
@@ -1501,7 +1498,6 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->hprops.minImportedHostPointerAlignment);
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
- p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
if (!qf_num) {
@@ -1620,8 +1616,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
return AVERROR_EXTERNAL;
}
- if (strstr(vendor, "Intel"))
- dev_select.vendor_id = 0x8086;
if (strstr(vendor, "AMD"))
dev_select.vendor_id = 0x1002;
@@ -2356,12 +2350,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
if (!hwctx->usage)
hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
- if (!(hwctx->flags & AV_VK_FRAME_FLAG_NONE)) {
- if (p->contiguous_planes == 1 ||
- ((p->contiguous_planes == -1) && p->dev_is_intel))
- hwctx->flags |= AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY;
- }
-
modifier_info = vk_find_struct(hwctx->create_pnext,
VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 406d8709c3..e89fa52927 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags {
* device and tiling during av_hwframe_ctx_init(). */
AV_VK_FRAME_FLAG_NONE = (1ULL << 0),
- /* Image planes will be allocated in a single VkDeviceMemory, rather
- * than as per-plane VkDeviceMemory allocations. Required for exporting
- * to VAAPI on Intel devices. */
+ /* DEPRECATED: does nothing. */
AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),
} AVVkFrameFlags;
--
2.40.0
[-- Attachment #9: 0028-hwcontext_vulkan-rename-vk_pixfmt_map-to-vk_pixfmt_p.patch --]
[-- Type: text/x-diff, Size: 1383 bytes --]
From 287ec5138511a4760f2c66e94bd80f794cd9f7a3 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 20:35:51 +0100
Subject: [PATCH 28/97] hwcontext_vulkan: rename vk_pixfmt_map to
vk_pixfmt_planar_map
---
libavutil/hwcontext_vulkan.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 67b4357dd1..9eacbb4d2e 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal {
static const struct {
enum AVPixelFormat pixfmt;
- const VkFormat vkfmts[4];
-} vk_pixfmt_map[] = {
+ const VkFormat vkfmts[5];
+} vk_pixfmt_planar_map[] = {
{ AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
{ AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
{ AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
@@ -244,9 +244,9 @@ static const struct {
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
{
- for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_map); i++)
- if (vk_pixfmt_map[i].pixfmt == p)
- return vk_pixfmt_map[i].vkfmts;
+ for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++)
+ if (vk_pixfmt_planar_map[i].pixfmt == p)
+ return vk_pixfmt_planar_map[i].vkfmts;
return NULL;
}
--
2.40.0
[-- Attachment #10: 0029-hwcontext_vulkan-fix-minor-type-issue-in-VulkanQueue.patch --]
[-- Type: text/x-diff, Size: 772 bytes --]
From 3618d186001c6da0b2bd0219ef226614fd384b3e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 14 Dec 2022 00:52:15 +0100
Subject: [PATCH 29/97] hwcontext_vulkan: fix minor type issue in
VulkanQueueCtx.buf_deps_alloc_size
---
libavutil/hwcontext_vulkan.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 9eacbb4d2e..ec05428903 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -66,7 +66,7 @@ typedef struct VulkanQueueCtx {
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
- int buf_deps_alloc_size;
+ unsigned int buf_deps_alloc_size;
} VulkanQueueCtx;
typedef struct VulkanExecCtx {
--
2.40.0
[-- Attachment #11: 0030-hwcontext_vulkan-report-nonCoherentAtomSize.patch --]
[-- Type: text/x-diff, Size: 1140 bytes --]
From 5e541daf24989d8ded25f8796453e1a1816e2a29 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 28 Dec 2022 05:55:17 +0100
Subject: [PATCH 30/97] hwcontext_vulkan: report nonCoherentAtomSize
---
libavutil/hwcontext_vulkan.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index ec05428903..2bc075d66f 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1493,6 +1493,8 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n",
p->props.properties.limits.minMemoryMapAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n",
+ p->props.properties.limits.nonCoherentAtomSize);
if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
p->hprops.minImportedHostPointerAlignment);
--
2.40.0
[-- Attachment #12: 0031-hwcontext_vulkan-add-support-for-descriptor-buffers.patch --]
[-- Type: text/x-diff, Size: 6728 bytes --]
From 75ae7c85e3b9a9f834da6d35f84e1099fa169816 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:15:02 +0100
Subject: [PATCH 31/97] hwcontext_vulkan: add support for descriptor buffers
---
libavutil/hwcontext_vulkan.c | 13 ++++++++++++-
libavutil/vulkan_functions.h | 9 +++++++++
libavutil/vulkan_loader.h | 1 +
3 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 2bc075d66f..96bc575291 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -91,6 +91,7 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceVulkan11Features device_features_1_1;
VkPhysicalDeviceVulkan12Features device_features_1_2;
VkPhysicalDeviceVulkan13Features device_features_1_3;
+ VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features;
/* Queues */
pthread_mutex_t **qf_mutex;
@@ -350,6 +351,7 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 },
+ { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1333,9 +1335,13 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
};
+ VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT,
+ .pNext = &timeline_features,
+ };
VkPhysicalDeviceVulkan13Features dev_features_1_3 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
- .pNext = &timeline_features,
+ .pNext = &desc_buf_features,
};
VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
@@ -1361,6 +1367,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
p->device_features_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
p->device_features_1_2.pNext = &p->device_features_1_3;
p->device_features_1_3.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
+ p->device_features_1_3.pNext = &p->desc_buf_features;
+ p->desc_buf_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT;
ctx->free = vulkan_device_free;
/* Create an instance if not given one */
@@ -1390,8 +1398,11 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
goto end;
}
p->device_features_1_2.timelineSemaphore = 1;
+ p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress;
p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+ p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer;
+ p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors;
dev_info.pNext = &hwctx->device_features;
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 103bff3013..f8739da8e5 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -38,6 +38,7 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_EXTERNAL_WIN32_SEM = 1ULL << 7, /* VK_KHR_external_semaphore_win32 */
#endif
FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
+ FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -121,6 +122,7 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \
\
/* Image */ \
@@ -142,6 +144,13 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorPool) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorSetLayout) \
\
+ /* Descriptor buffers */ \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutSizeEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutBindingOffsetEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdBindDescriptorBuffersEXT) \
+ MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdSetDescriptorBufferOffsetsEXT) \
+ \
/* DescriptorUpdateTemplate */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 3f1ee6aa46..e08777db17 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -48,6 +48,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
#endif
+ { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
};
FFVulkanExtensions mask = 0x0;
--
2.40.0
[-- Attachment #13: 0032-hwcontext_vulkan-do-not-require-libdrm-to-map-VAAPI-.patch --]
[-- Type: text/x-diff, Size: 1624 bytes --]
From 6b178b7f2b8d5cc87022148a18e30084c50ea124 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 14 Mar 2023 22:10:05 +0100
Subject: [PATCH 32/97] hwcontext_vulkan: do not require libdrm to map VAAPI
devices
VAAPI is sadly on the way of becoming multiplaform.
---
libavutil/hwcontext_vulkan.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 96bc575291..318ccb622e 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -42,14 +42,17 @@
#include "vulkan.h"
#include "vulkan_loader.h"
+#if CONFIG_VAAPI
+#include "hwcontext_vaapi.h"
+#endif
+
#if CONFIG_LIBDRM
-#include <xf86drm.h>
-#include <drm_fourcc.h>
-#include "hwcontext_drm.h"
#if CONFIG_VAAPI
#include <va/va_drmcommon.h>
-#include "hwcontext_vaapi.h"
#endif
+#include <xf86drm.h>
+#include <drm_fourcc.h>
+#include "hwcontext_drm.h"
#endif
#if CONFIG_CUDA
@@ -1618,7 +1621,6 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
* by the following checks (e.g. non-PCIe ARM GPU), having an empty
* dev_select will mean it'll get picked. */
switch(src_ctx->type) {
-#if CONFIG_LIBDRM
#if CONFIG_VAAPI
case AV_HWDEVICE_TYPE_VAAPI: {
AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
@@ -1635,6 +1637,7 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
}
#endif
+#if CONFIG_LIBDRM
case AV_HWDEVICE_TYPE_DRM: {
AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
--
2.40.0
[-- Attachment #14: 0033-hwcontext_vulkan-use-VK_EXT_physical_device_drm-to-d.patch --]
[-- Type: text/x-diff, Size: 7221 bytes --]
From 5cd49775b5d1e25a4d3d2857421d2bc3b4a45ed2 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 14 Mar 2023 22:30:18 +0100
Subject: [PATCH 33/97] hwcontext_vulkan: use VK_EXT_physical_device_drm to
derive DRM to Vulkan
Finally, a way to directly identify a Vulkan device from a DRM device!
---
libavutil/hwcontext_vulkan.c | 58 ++++++++++++++++++++++++++++++++----
libavutil/vulkan_functions.h | 1 +
libavutil/vulkan_loader.h | 1 +
3 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 318ccb622e..b762c3422d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -50,6 +50,8 @@
#if CONFIG_VAAPI
#include <va/va_drmcommon.h>
#endif
+#include <sys/sysmacros.h>
+#include <sys/stat.h>
#include <xf86drm.h>
#include <drm_fourcc.h>
#include "hwcontext_drm.h"
@@ -355,6 +357,7 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG },
{ VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, FF_VK_EXT_SYNC2 },
{ VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
+ { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
/* Imports/exports */
{ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -761,8 +764,11 @@ fail:
typedef struct VulkanDeviceSelection {
uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */
int has_uuid;
- const char *name; /* Will use this second unless NULL */
- uint32_t pci_device; /* Will use this third unless 0x0 */
+ uint32_t drm_major; /* Will use this second unless !has_drm */
+ uint32_t drm_minor; /* Will use this second unless !has_drm */
+ uint32_t has_drm; /* has drm node info */
+ const char *name; /* Will use this third unless NULL */
+ uint32_t pci_device; /* Will use this fourth unless 0x0 */
uint32_t vendor_id; /* Last resort to find something deterministic */
int index; /* Finally fall back to index */
} VulkanDeviceSelection;
@@ -789,6 +795,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
VkPhysicalDevice *devices = NULL;
VkPhysicalDeviceIDProperties *idp = NULL;
VkPhysicalDeviceProperties2 *prop = NULL;
+ VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL);
@@ -821,8 +828,20 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
goto end;
}
+ if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
+ drm_prop = av_calloc(num, sizeof(*drm_prop));
+ if (!drm_prop) {
+ err = AVERROR(ENOMEM);
+ goto end;
+ }
+ }
+
av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
for (int i = 0; i < num; i++) {
+ if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) {
+ drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT;
+ idp[i].pNext = &drm_prop[i];
+ }
idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
prop[i].pNext = &idp[i];
@@ -844,6 +863,20 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n");
err = AVERROR(ENODEV);
goto end;
+ } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) {
+ for (int i = 0; i < num; i++) {
+ if ((select->drm_major == drm_prop[i].primaryMajor &&
+ select->drm_minor == drm_prop[i].primaryMinor) ||
+ (select->drm_major == drm_prop[i].renderMajor &&
+ select->drm_minor == drm_prop[i].renderMinor)) {
+ choice = i;
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n",
+ select->drm_major, select->drm_minor);
+ err = AVERROR(ENODEV);
+ goto end;
} else if (select->name) {
av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
for (int i = 0; i < num; i++) {
@@ -903,6 +936,7 @@ end:
av_free(devices);
av_free(prop);
av_free(idp);
+ av_free(drm_prop);
return err;
}
@@ -1639,12 +1673,26 @@ static int vulkan_device_derive(AVHWDeviceContext *ctx,
#endif
#if CONFIG_LIBDRM
case AV_HWDEVICE_TYPE_DRM: {
+ int err;
+ struct stat drm_node_info;
+ drmDevice *drm_dev_info;
AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
- drmDevice *drm_dev_info;
- int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
+ err = fstat(src_hwctx->fd, &drm_node_info);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n",
+ av_err2str(AVERROR(errno)));
+ return AVERROR_EXTERNAL;
+ }
+
+ dev_select.drm_major = major(drm_node_info.st_dev);
+ dev_select.drm_minor = minor(drm_node_info.st_dev);
+ dev_select.has_drm = 1;
+
+ err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
if (err) {
- av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd!\n");
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n",
+ av_err2str(AVERROR(errno)));
return AVERROR_EXTERNAL;
}
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index f8739da8e5..801e5334e2 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -39,6 +39,7 @@ typedef enum FFVulkanExtensions {
#endif
FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */
+ FF_VK_EXT_DEVICE_DRM = 1ULL << 10, /* VK_EXT_physical_device_drm */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index e08777db17..2e6dfb4f4f 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -44,6 +44,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM },
{ VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY },
{ VK_EXT_DEBUG_UTILS_EXTENSION_NAME, FF_VK_EXT_DEBUG_UTILS },
+ { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM },
#ifdef _WIN32
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
--
2.40.0
[-- Attachment #15: 0034-hwcontext_vulkan-add-functions-for-video-decoding.patch --]
[-- Type: text/x-diff, Size: 6714 bytes --]
From 18ce0859f09bb08b501fe2656b897368cb3a3086 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:34:36 +0100
Subject: [PATCH 34/97] hwcontext_vulkan: add functions for video decoding
---
libavutil/hwcontext_vulkan.c | 6 ++++++
libavutil/vulkan.c | 8 +++++---
libavutil/vulkan_functions.h | 20 ++++++++++++++++++++
libavutil/vulkan_loader.h | 4 ++++
4 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index b762c3422d..6df829b2ba 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -369,6 +369,12 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
#endif
+
+ /* Video encoding/decoding */
+ { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
+ { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
+ { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
+ { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
};
/* Converts return values to strings */
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 403f0b1f27..6bf2c214b7 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -85,9 +85,11 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
CASE(VK_ERROR_VALIDATION_FAILED_EXT);
CASE(VK_ERROR_INVALID_SHADER_NV);
- CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
- CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
- CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
default: return "Unknown error";
}
#undef CASE
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 801e5334e2..660fea2331 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -40,6 +40,10 @@ typedef enum FFVulkanExtensions {
FF_VK_EXT_SYNC2 = 1ULL << 8, /* VK_KHR_synchronization2 */
FF_VK_EXT_DESCRIPTOR_BUFFER = 1ULL << 9, /* VK_EXT_descriptor_buffer */
FF_VK_EXT_DEVICE_DRM = 1ULL << 10, /* VK_EXT_physical_device_drm */
+ FF_VK_EXT_VIDEO_QUEUE = 1ULL << 11, /* VK_KHR_video_queue */
+ FF_VK_EXT_VIDEO_DECODE_QUEUE = 1ULL << 12, /* VK_KHR_video_decode_queue */
+ FF_VK_EXT_VIDEO_DECODE_H264 = 1ULL << 13, /* VK_EXT_video_decode_h264 */
+ FF_VK_EXT_VIDEO_DECODE_H265 = 1ULL << 14, /* VK_EXT_video_decode_h265 */
FF_VK_EXT_NO_FLAG = 1ULL << 31,
} FFVulkanExtensions;
@@ -61,6 +65,8 @@ typedef enum FFVulkanExtensions {
MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDevice) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFeatures2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties) \
+ MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoCapabilitiesKHR) \
+ MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoFormatPropertiesKHR) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, DeviceWaitIdle) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDevice) \
\
@@ -160,6 +166,20 @@ typedef enum FFVulkanExtensions {
/* sync2 */ \
MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \
\
+ /* Video queue */ \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionParametersKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, GetVideoSessionMemoryRequirementsKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, BindVideoSessionMemoryKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdBeginVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdControlVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdEndVideoCodingKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionParametersKHR) \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionKHR) \
+ \
+ /* Video decoding */ \
+ MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \
+ \
/* Pipeline */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 2e6dfb4f4f..5380e21303 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -50,6 +50,10 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
#endif
{ VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, },
+ { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE },
+ { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE },
+ { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 },
+ { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
};
FFVulkanExtensions mask = 0x0;
--
2.40.0
[-- Attachment #16: 0035-hwcontext_vulkan-support-PREP_MODE_DECODING-in-prepa.patch --]
[-- Type: text/x-diff, Size: 5554 bytes --]
From 1a35987d8fa0ce24e72cf09f826c4dd8458b191e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 15:18:21 +0100
Subject: [PATCH 35/97] hwcontext_vulkan: support PREP_MODE_DECODING in
prepare_frame()
---
libavutil/hwcontext_vulkan.c | 70 ++++++++++++++++++++++++++----------
1 file changed, 51 insertions(+), 19 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 6df829b2ba..4c6229fe0c 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2020,7 +2020,9 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
enum PrepMode {
PREP_MODE_WRITE,
PREP_MODE_EXTERNAL_EXPORT,
- PREP_MODE_EXTERNAL_IMPORT
+ PREP_MODE_EXTERNAL_IMPORT,
+ PREP_MODE_DECODING_DST,
+ PREP_MODE_DECODING_DPB,
};
static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
@@ -2029,7 +2031,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
int err;
uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
- VkAccessFlags new_access;
+ VkAccessFlags2 new_access;
AVVulkanFramesContext *vkfc = hwfc->hwctx;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
@@ -2037,7 +2039,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
- VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+ VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+ VkDependencyInfo dep_info;
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -2093,32 +2096,55 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
s_info.pWaitDstStageMask = wait_st;
s_info.waitSemaphoreCount = planes;
break;
+ case PREP_MODE_DECODING_DST:
+ new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
+ new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
+ dst_qf = VK_QUEUE_FAMILY_IGNORED;
+ break;
+ case PREP_MODE_DECODING_DPB:
+ new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
+ new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+ src_qf = VK_QUEUE_FAMILY_IGNORED;
+ dst_qf = VK_QUEUE_FAMILY_IGNORED;
+ break;
}
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
for (int i = 0; i < planes; i++) {
- img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
- img_bar[i].srcAccessMask = 0x0;
- img_bar[i].dstAccessMask = new_access;
- img_bar[i].oldLayout = frame->layout[i];
- img_bar[i].newLayout = new_layout;
- img_bar[i].srcQueueFamilyIndex = src_qf;
- img_bar[i].dstQueueFamilyIndex = dst_qf;
- img_bar[i].image = frame->img[i];
- img_bar[i].subresourceRange.levelCount = 1;
- img_bar[i].subresourceRange.layerCount = 1;
- img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ img_bar[i] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
+ .srcAccessMask = 0x0,
+ .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+ .dstAccessMask = new_access,
+ .oldLayout = frame->layout[i],
+ .newLayout = new_layout,
+ .srcQueueFamilyIndex = src_qf,
+ .dstQueueFamilyIndex = dst_qf,
+ .image = frame->img[i],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
frame->layout[i] = img_bar[i].newLayout;
frame->access[i] = img_bar[i].dstAccessMask;
}
- vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
- VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT,
- 0, 0, NULL, 0, NULL, planes, img_bar);
+ dep_info = (VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = planes,
+ };
+
+ vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
vkfc->unlock_frame(hwfc, frame);
@@ -2359,7 +2385,13 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
if (err)
goto fail;
- err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
+ if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
+ !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DPB);
+ else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DST);
+ else
+ err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
if (err)
goto fail;
--
2.40.0
[-- Attachment #17: 0036-hwcontext_vulkan-load-query-related-functions.patch --]
[-- Type: text/x-diff, Size: 1980 bytes --]
From 3239bb4079e3840e04919c08af5ec87c82b34271 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 10:09:34 +0100
Subject: [PATCH 36/97] hwcontext_vulkan: load query-related functions
Needed for both encoding and decoding.
---
libavutil/vulkan_functions.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 660fea2331..6396d8d714 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -163,6 +163,15 @@ typedef enum FFVulkanExtensions {
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \
\
+ /* Queries */ \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetQueryPoolResults) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, ResetQueryPool) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBeginQuery) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdEndQuery) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdResetQueryPool) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyQueryPool) \
+ \
/* sync2 */ \
MACRO(1, 1, FF_VK_EXT_SYNC2, CmdPipelineBarrier2KHR) \
\
--
2.40.0
[-- Attachment #18: 0037-hwcontext_vulkan-enable-GPU-assisted-validation-when.patch --]
[-- Type: text/x-diff, Size: 1793 bytes --]
From 88e92ce054692b86ad3a04e18b56b21349d32d2b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 14 Mar 2023 20:45:45 +0100
Subject: [PATCH 37/97] hwcontext_vulkan: enable GPU-assisted validation when
debugging
---
libavutil/hwcontext_vulkan.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 4c6229fe0c..96afeb471e 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -695,6 +695,9 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
LIBAVUTIL_VERSION_MINOR,
LIBAVUTIL_VERSION_MICRO),
};
+ VkValidationFeaturesEXT validation_features = {
+ .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
+ };
VkInstanceCreateInfo inst_props = {
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
.pApplicationInfo = &application_info,
@@ -725,6 +728,17 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
if (err < 0)
goto fail;
+ if (debug_mode) {
+ VkValidationFeatureEnableEXT feat_list[] = {
+ VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT,
+ VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT,
+ VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
+ };
+ validation_features.pEnabledValidationFeatures = feat_list;
+ validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list);
+ inst_props.pNext = &validation_features;
+ }
+
/* Try to create the instance */
ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
--
2.40.0
[-- Attachment #19: 0038-vulkan-lock-queues-before-submitting-operations.patch --]
[-- Type: text/x-diff, Size: 1087 bytes --]
From 956f043e9f233675856336e028cc8ee7e35c71f5 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:04:28 +0100
Subject: [PATCH 38/97] vulkan: lock queues before submitting operations
---
libavutil/vulkan.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 6bf2c214b7..ad13b8f3cb 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
+ s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
+ e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
+
ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
+
+ s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
+ e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
+
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
ff_vk_ret2str(ret));
--
2.40.0
[-- Attachment #20: 0039-vulkan-define-VK_NO_PROTOTYPES.patch --]
[-- Type: text/x-diff, Size: 573 bytes --]
From d81aa7b001995a8cf65590934a7b75a51a63b192 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:04:48 +0100
Subject: [PATCH 39/97] vulkan: define VK_NO_PROTOTYPES
---
libavutil/vulkan.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 90922c6cf3..11ea8d609e 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -19,6 +19,8 @@
#ifndef AVUTIL_VULKAN_H
#define AVUTIL_VULKAN_H
+#define VK_NO_PROTOTYPES
+
#include "pixdesc.h"
#include "bprint.h"
#include "hwcontext.h"
--
2.40.0
[-- Attachment #21: 0040-vulkan-add-additional-error-codes.patch --]
[-- Type: text/x-diff, Size: 1553 bytes --]
From 5d903e29cf72172aff97fb57ee14b433e73dd498 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:54:35 +0100
Subject: [PATCH 40/97] vulkan: add additional error codes
---
libavutil/vulkan.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ad13b8f3cb..f2846e628a 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -78,6 +78,12 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_TOO_MANY_OBJECTS);
CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_UNKNOWN);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_FRAGMENTATION);
+ CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
+ CASE(VK_PIPELINE_COMPILE_REQUIRED);
CASE(VK_ERROR_SURFACE_LOST_KHR);
CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
CASE(VK_SUBOPTIMAL_KHR);
@@ -90,6 +96,13 @@ const char *ff_vk_ret2str(VkResult res)
CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR);
CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR);
+ CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
+ CASE(VK_ERROR_NOT_PERMITTED_KHR);
+ CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
+ CASE(VK_THREAD_IDLE_KHR);
+ CASE(VK_THREAD_DONE_KHR);
+ CASE(VK_OPERATION_DEFERRED_KHR);
+ CASE(VK_OPERATION_NOT_DEFERRED_KHR);
default: return "Unknown error";
}
#undef CASE
--
2.40.0
[-- Attachment #22: 0041-vulkan-fix-comment-statement-about-exec_queue-blocki.patch --]
[-- Type: text/x-diff, Size: 919 bytes --]
From ea5d840aa0eabbb690b164802c7e2fb96262ce21 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 10 Mar 2022 21:41:59 +0100
Subject: [PATCH 41/97] vulkan: fix comment statement about exec_queue blocking
---
libavutil/vulkan.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 11ea8d609e..107c12a746 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -390,9 +390,7 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
VkPipelineStageFlagBits in_wait_dst_flag);
/**
- * Submits a command buffer to the queue for execution.
- * Will block until execution has finished in order to simplify resource
- * management.
+ * Submits a command buffer to the queue for execution. Will not block.
*/
int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
--
2.40.0
[-- Attachment #23: 0042-vulkan-add-pNext-argument-to-ff_vk_create_buf.patch --]
[-- Type: text/x-diff, Size: 3809 bytes --]
From 74b3996cb235e80dfb140b107946b4657371b7ea Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 17 Mar 2022 12:23:56 +0100
Subject: [PATCH 42/97] vulkan: add pNext argument to ff_vk_create_buf()
---
libavfilter/vf_gblur_vulkan.c | 2 +-
libavfilter/vf_overlay_vulkan.c | 2 +-
libavfilter/vf_scale_vulkan.c | 2 +-
libavutil/vulkan.c | 4 ++--
libavutil/vulkan.h | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index e6ffc8c073..80d1dc61c0 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -174,7 +174,7 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
- RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize,
+ RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index 6db7baddfd..7a66cf12ad 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -181,7 +181,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
} *par;
err = ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par),
+ sizeof(*par), NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err)
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 3b09f0dcc1..d14b32277d 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -253,7 +253,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}
RET(ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par),
+ sizeof(*par), NULL,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index f2846e628a..ae6adc5104 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -205,7 +205,7 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return 0;
}
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
int err;
@@ -215,7 +215,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
VkBufferCreateInfo buf_spawn = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = NULL,
+ .pNext = pNext,
.usage = usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.size = size, /* Gets FFALIGNED during alloc if host visible
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 107c12a746..c6cfb779fc 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -397,7 +397,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
/**
* Create a VkBuffer with the specified parameters.
*/
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
--
2.40.0
[-- Attachment #24: 0043-vulkan-add-ff_vk_qf_fill.patch --]
[-- Type: text/x-diff, Size: 2777 bytes --]
From 420269d507cdc3e81180a687b806d8481d21bcb4 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:03:58 +0100
Subject: [PATCH 43/97] vulkan: add ff_vk_qf_fill()
---
libavutil/vulkan.c | 25 +++++++++++++++++++++++++
libavutil/vulkan.h | 9 +++++++++
2 files changed, 34 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index ae6adc5104..eceef295a8 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,6 +108,31 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
+void ff_vk_qf_fill(FFVulkanContext *s)
+{
+ s->nb_qfs = 0;
+
+ /* Simply fills in all unique queues into s->qfs */
+ if (s->hwctx->queue_family_index >= 0)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
+ if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
+ if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
+ s->qfs[1] != s->hwctx->queue_family_comp_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
+ if (s->hwctx->queue_family_decode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_decode_index &&
+ s->qfs[1] != s->hwctx->queue_family_decode_index &&
+ s->qfs[2] != s->hwctx->queue_family_decode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
+ if (s->hwctx->queue_family_encode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_encode_index &&
+ s->qfs[1] != s->hwctx->queue_family_encode_index &&
+ s->qfs[2] != s->hwctx->queue_family_encode_index &&
+ s->qfs[3] != s->hwctx->queue_family_encode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+}
+
void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family, int nb_queues)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index c6cfb779fc..4540c3eda1 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -207,6 +207,9 @@ typedef struct FFVulkanContext {
AVHWFramesContext *frames;
AVVulkanFramesContext *hwfc;
+ uint32_t qfs[5];
+ int nb_qfs;
+
FFVkSPIRVCompiler *spirv_compiler;
/* Properties */
@@ -249,6 +252,12 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
*/
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
+/**
+ * Setup the queue families from the hardware device context.
+ * Necessary for image creation to work.
+ */
+void ff_vk_qf_fill(FFVulkanContext *s);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
--
2.40.0
[-- Attachment #25: 0044-vulkan-add-ff_vk_image_create.patch --]
[-- Type: text/x-diff, Size: 4892 bytes --]
From 9fd818a7dab5189b351b17b691d7b0588e6d7c4f Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 13:05:59 +0100
Subject: [PATCH 44/97] vulkan: add ff_vk_image_create()
---
libavutil/vulkan.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++
libavutil/vulkan.h | 11 ++++++
2 files changed, 100 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index eceef295a8..212f134466 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -401,6 +401,95 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
+int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
+ int width, int height, VkFormat fmt, VkImageTiling tiling,
+ VkImageUsageFlagBits usage, VkImageCreateFlags flags,
+ void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
+{
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ AVVulkanDeviceContext *hwctx = s->hwctx;
+
+ VkExportSemaphoreCreateInfo ext_sem_info = {
+ .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
+#ifdef _WIN32
+ .handleTypes = IsWindows8OrGreater()
+ ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
+ : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
+#else
+ .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+#endif
+ };
+
+ VkSemaphoreTypeCreateInfo sem_type_info = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+#ifdef _WIN32
+ .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
+#else
+ .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
+#endif
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+ .initialValue = 0,
+ };
+
+ VkSemaphoreCreateInfo sem_spawn = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = &sem_type_info,
+ };
+
+ /* Create the image */
+ VkImageCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = create_pnext,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = fmt,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .flags = flags,
+ .tiling = tiling,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = usage,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .pQueueFamilyIndices = s->qfs,
+ .queueFamilyIndexCount = s->nb_qfs,
+ .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
+ };
+
+ ret = vk->CreateImage(hwctx->act_dev, &create_info,
+ hwctx->alloc, &f->img[0]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ /* Create semaphore */
+ ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
+ hwctx->alloc, &f->sem[0]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
+ f->layout[0] = create_info.initialLayout;
+ f->access[0] = 0x0;
+ f->sem_value[0] = 0;
+
+ f->flags = 0x0;
+ f->tiling = tiling;
+
+ return 0;
+
+fail:
+ return err;
+}
+
int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
VkShaderStageFlagBits stage)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 4540c3eda1..a0baba7fc8 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -427,6 +427,17 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
*/
void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
+/**
+ * Creates an image, allocates and binds memory in the given
+ * idx value of the dst frame. If mem is non-NULL, then no memory will be
+ * allocated, but instead the given memory will be bound to the image.
+ */
+int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx,
+ int width, int height, VkFormat fmt, VkImageTiling tiling,
+ VkImageUsageFlagBits usage, VkImageCreateFlags flags,
+ void *create_pnext,
+ VkDeviceMemory *mem, void *alloc_pnext);
+
/**
* Frees the main Vulkan context.
*/
--
2.40.0
[-- Attachment #26: 0045-vulkan-expose-ff_vk_alloc_mem.patch --]
[-- Type: text/x-diff, Size: 2666 bytes --]
From 4db3bdbd5fddef0d75cfb1b03e8298c9149735b9 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 23 Nov 2022 14:03:34 +0100
Subject: [PATCH 45/97] vulkan: expose ff_vk_alloc_mem()
---
libavutil/vulkan.c | 15 ++++++++-------
libavutil/vulkan.h | 7 +++++++
2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 212f134466..7870de351d 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -174,9 +174,9 @@ void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
}
-static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
- VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
int index = -1;
@@ -225,7 +225,8 @@ static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return AVERROR(ENOMEM);
}
- *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
+ if (mem_flags)
+ *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
return 0;
}
@@ -279,9 +280,9 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
if (use_ded_mem)
ded_alloc.buffer = buf->buf;
- err = vk_alloc_mem(s, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &buf->flags, &buf->mem);
+ err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &buf->flags, &buf->mem);
if (err)
return err;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index a0baba7fc8..85836a7807 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -258,6 +258,13 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
*/
void ff_vk_qf_fill(FFVulkanContext *s);
+/**
+ * Allocate device memory.
+ */
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
--
2.40.0
[-- Attachment #27: 0046-vulkan-support-ignoring-memory-properties-when-alloc.patch --]
[-- Type: text/x-diff, Size: 1648 bytes --]
From 25b9c80fd28c91b72e22a60bc484f58466bbd067 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 29 Nov 2022 00:43:19 +0000
Subject: [PATCH 46/97] vulkan: support ignoring memory properties when
allocating
---
libavutil/vulkan.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7870de351d..b1553c6537 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -188,7 +188,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
};
/* Align if we need to */
- if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
@@ -201,7 +201,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
continue;
/* The memory type flags must include our properties */
- if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ if ((req_flags != UINT32_MAX) &&
+ ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags))
continue;
/* Found a suitable memory type */
@@ -210,7 +211,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
}
if (index < 0) {
- av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ av_log(s->device, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
req_flags);
return AVERROR(EINVAL);
}
--
2.40.0
[-- Attachment #28: 0047-vulkan-allow-alloc-pNext-in-ff_vk_create_buf.patch --]
[-- Type: text/x-diff, Size: 1878 bytes --]
From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 15 Dec 2022 17:43:27 +0100
Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
---
libavutil/vulkan.c | 5 +++--
libavutil/vulkan.h | 3 ++-
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index b1553c6537..0bb5b1eebf 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
return 0;
}
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
int err;
@@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
};
VkMemoryDedicatedAllocateInfo ded_alloc = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = NULL,
+ .pNext = alloc_pNext,
};
VkMemoryDedicatedRequirements ded_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 85836a7807..d75be26977 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
/**
* Create a VkBuffer with the specified parameters.
*/
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
--
2.40.0
[-- Attachment #29: 0048-vulkan-do-not-wait-for-device-idle-when-destroying-b.patch --]
[-- Type: text/x-diff, Size: 786 bytes --]
From 3cf6cb281680b71c0db38fae50eed62902e11097 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 16 Dec 2022 00:37:53 +0100
Subject: [PATCH 48/97] vulkan: do not wait for device idle when destroying
buffers
This should be done explicitly.
---
libavutil/vulkan.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 0bb5b1eebf..0250f5aa39 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -396,8 +396,6 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
if (!buf || !s->hwctx)
return;
- vk->DeviceWaitIdle(s->hwctx->act_dev);
-
if (buf->buf != VK_NULL_HANDLE)
vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
if (buf->mem != VK_NULL_HANDLE)
--
2.40.0
[-- Attachment #30: 0049-vulkan-add-size-tracking-to-buffer-structs.patch --]
[-- Type: text/x-diff, Size: 964 bytes --]
From bf288980d0c2034acda25e378ec25b6268986c3e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 16 Dec 2022 01:47:42 +0100
Subject: [PATCH 49/97] vulkan: add size tracking to buffer structs
---
libavutil/vulkan.c | 2 ++
libavutil/vulkan.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 0250f5aa39..faf5cd5508 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -295,6 +295,8 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
return AVERROR_EXTERNAL;
}
+ buf->size = size;
+
return 0;
}
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index d75be26977..f2c4a79102 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -94,6 +94,7 @@ typedef struct FFVkBuffer {
VkBuffer buf;
VkDeviceMemory mem;
VkMemoryPropertyFlagBits flags;
+ size_t size;
} FFVkBuffer;
typedef struct FFVkQueueFamilyCtx {
--
2.40.0
[-- Attachment #31: 0050-vulkan-use-device-properties-2-and-add-a-convenience.patch --]
[-- Type: text/x-diff, Size: 2388 bytes --]
From 736eaadce249af4eae5978d781efaa815a241d44 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 19 Dec 2022 07:57:22 +0100
Subject: [PATCH 50/97] vulkan: use device properties 2 and add a convenience
loader function
---
libavutil/vulkan.c | 18 +++++++++++++++++-
libavutil/vulkan.h | 8 +++++++-
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index faf5cd5508..8a583248d1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,6 +108,22 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
+void ff_vk_load_props(FFVulkanContext *s)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ s->driver_props = (VkPhysicalDeviceDriverProperties) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+ };
+ s->props = (VkPhysicalDeviceProperties2) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
+ .pNext = &s->driver_props,
+ };
+
+ vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
+ vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+}
+
void ff_vk_qf_fill(FFVulkanContext *s)
{
s->nb_qfs = 0;
@@ -189,7 +205,7 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
/* Align if we need to */
if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
- req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
+ req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index f2c4a79102..2cd2c1f8fa 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -197,7 +197,8 @@ typedef struct FFVulkanContext {
FFVulkanFunctions vkfn;
FFVulkanExtensions extensions;
- VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceProperties2 props;
+ VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
AVBufferRef *device_ref;
@@ -243,6 +244,11 @@ extern const VkComponentMapping ff_comp_identity_map;
*/
const char *ff_vk_ret2str(VkResult res);
+/**
+ * Loads props/mprops/driver_props
+ */
+void ff_vk_load_props(FFVulkanContext *s);
+
/**
* Returns 1 if the image is any sort of supported RGB
*/
--
2.40.0
[-- Attachment #32: 0051-vulkan-minor-indent-fix-add-support-for-synchronous-.patch --]
[-- Type: text/x-diff, Size: 2945 bytes --]
From aa1b1b408493c6237b56d8157e39acee7ea2bac8 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 05:02:50 +0100
Subject: [PATCH 51/97] vulkan: minor indent fix, add support for synchronous
submission/waiting
---
libavutil/vulkan.c | 20 ++++++++++++++++++--
libavutil/vulkan.h | 9 +++++++++
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 8a583248d1..b5e08ecc46 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -564,7 +564,7 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
/* Create command pool */
ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
- s->hwctx->alloc, &e->pool);
+ s->hwctx->alloc, &e->pool);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
ff_vk_ret2str(ret));
@@ -631,11 +631,13 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
- } else {
+ } else if (!q->synchronous) {
vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
}
+ q->synchronous = 0;
+
/* Discard queue dependencies */
ff_vk_discard_exec_deps(e);
@@ -788,9 +790,23 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
for (int i = 0; i < e->sem_sig_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
+ q->submitted = 1;
+
return 0;
}
+void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ if (!q->submitted)
+ return;
+
+ vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
+ vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
+ q->synchronous = 1;
+}
+
int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 2cd2c1f8fa..a17cc4a34e 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -147,6 +147,9 @@ typedef struct FFVkQueueCtx {
VkFence fence;
VkQueue queue;
+ int synchronous;
+ int submitted;
+
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
@@ -417,6 +420,12 @@ int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
*/
int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
+/**
+ * Wait on a command buffer's execution. Mainly useful for debugging and
+ * development.
+ */
+void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e);
+
/**
* Create a VkBuffer with the specified parameters.
*/
--
2.40.0
[-- Attachment #33: 0052-vulkan-add-support-for-queries.patch --]
[-- Type: text/x-diff, Size: 7363 bytes --]
From 65617d9347c25f88bfe769ce7a5474196e843d5b Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 05:03:32 +0100
Subject: [PATCH 52/97] vulkan: add support for queries
---
libavutil/vulkan.c | 118 +++++++++++++++++++++++++++++++++++++++++++++
libavutil/vulkan.h | 30 ++++++++++++
2 files changed, 148 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index b5e08ecc46..de0c300c0e 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -592,6 +592,114 @@ int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
return 0;
}
+int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
+ int nb_queries, VkQueryType type,
+ int elem_64bits, void *create_pnext)
+{
+ VkResult ret;
+ size_t qd_size;
+ int nb_results = nb_queries;
+ int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
+ int status_stride = 2;
+ int result_elem_size = elem_64bits ? 8 : 4;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkQueryPoolCreateInfo query_pool_info = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .pNext = create_pnext,
+ .queryType = type,
+ .queryCount = nb_queries*e->qf->nb_queues,
+ };
+
+ if (e->query.pool)
+ return AVERROR(EINVAL);
+
+ /* Video encode quieries produce two results per query */
+ if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
+ status_stride = 3; /* skip,skip,result,skip,skip,result */
+ nb_results *= 2;
+ } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
+ status_stride = 1;
+ nb_results *= 0;
+ }
+
+ qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
+
+ e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
+ if (!e->query.data)
+ return AVERROR(ENOMEM);
+
+ ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
+ s->hwctx->alloc, &e->query.pool);
+ if (ret != VK_SUCCESS)
+ return AVERROR_EXTERNAL;
+
+ e->query.data_per_queue = qd_size;
+ e->query.nb_queries = nb_queries;
+ e->query.nb_results = nb_results;
+ e->query.nb_statuses = nb_statuses;
+ e->query.elem_64bits = elem_64bits;
+ e->query.status_stride = status_stride;
+
+ return 0;
+}
+
+int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
+ int query_idx, void **data, int64_t *status)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ uint8_t *qd;
+ int32_t *res32;
+ int64_t *res64;
+ int64_t res = 0;
+ VkQueryResultFlags qf = 0;
+ FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+
+ if (!q->submitted) {
+ *data = NULL;
+ return 0;
+ }
+
+ qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
+ qf |= e->query.nb_results && e->query.nb_statuses ?
+ VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
+ qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
+ res32 = (int32_t *)(qd + e->query.nb_results*4);
+ res64 = (int64_t *)(qd + e->query.nb_results*8);
+
+ ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
+ query_idx,
+ e->query.nb_queries,
+ e->query.data_per_queue, qd,
+ e->query.elem_64bits ? 8 : 4, qf);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (e->query.nb_statuses && e->query.elem_64bits) {
+ for (int i = 0; i < e->query.nb_queries; i++) {
+ res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
+ res64[i] : res;
+ res64 += e->query.status_stride;
+ }
+ } else if (e->query.nb_statuses) {
+ for (int i = 0; i < e->query.nb_queries; i++) {
+ res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
+ res32[i] : res;
+ res32 += e->query.status_stride;
+ }
+ }
+
+ if (data)
+ *data = qd;
+ if (status)
+ *status = res;
+
+ return 0;
+}
+
void ff_vk_discard_exec_deps(FFVkExecContext *e)
{
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
@@ -648,6 +756,12 @@ int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
+ if (e->query.pool) {
+ e->query.idx = e->qf->cur_queue*e->query.nb_queries;
+ vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
+ e->query.idx, e->query.nb_queries);
+ }
+
return 0;
}
@@ -790,6 +904,7 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
for (int i = 0; i < e->sem_sig_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
+ e->query.idx = e->qf->cur_queue*e->query.nb_queries;
q->submitted = 1;
return 0;
@@ -1483,7 +1598,10 @@ static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
if (e->pool)
vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+ if (e->query.pool)
+ vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
+ av_freep(&e->query.data);
av_freep(&e->bufs);
av_freep(&e->queues);
av_freep(&e->sem_sig);
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index a17cc4a34e..4bd1c9fc00 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -168,6 +168,19 @@ typedef struct FFVkExecContext {
VkCommandBuffer *bufs;
FFVkQueueCtx *queues;
+ struct {
+ int idx;
+ VkQueryPool pool;
+ uint8_t *data;
+
+ int nb_queries;
+ int nb_results;
+ int nb_statuses;
+ int elem_64bits;
+ size_t data_per_queue;
+ int status_stride;
+ } query;
+
AVBufferRef ***deps;
int *nb_deps;
int *dep_alloc_size;
@@ -371,6 +384,23 @@ void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
FFVkQueueFamilyCtx *qf);
+/**
+ * Create a query pool for a command context.
+ * elem_64bits exists to troll driver devs for compliance. All results
+ * and statuses returned should be 32 bits, unless this is set, then it's 64bits.
+ */
+int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
+ int nb_queries, VkQueryType type,
+ int elem_64bits, void *create_pnext);
+
+/**
+ * Get results for query.
+ * Returns the status of the query.
+ * Sets *res to the status of the queries.
+ */
+int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
+ int query_idx, void **data, int64_t *status);
+
/**
* Begin recording to the command buffer. Previous execution must have been
* completed, which ff_vk_submit_exec_queue() will ensure.
--
2.40.0
[-- Attachment #34: 0053-vulkan-add-support-for-retrieving-queue-query-and-vi.patch --]
[-- Type: text/x-diff, Size: 7602 bytes --]
From 786a7d08bc90a88f77057fc31d0943dcb91e4558 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 22 Dec 2022 17:37:51 +0100
Subject: [PATCH 53/97] vulkan: add support for retrieving queue, query and
video properties
---
libavutil/vulkan.c | 87 ++++++++++++++++++++++++++++++------
libavutil/vulkan.h | 14 ++++--
libavutil/vulkan_functions.h | 1 +
3 files changed, 85 insertions(+), 17 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index de0c300c0e..d045ff83c1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
-void ff_vk_load_props(FFVulkanContext *s)
+int ff_vk_load_props(FFVulkanContext *s)
{
+ uint32_t qc = 0;
FFVulkanFunctions *vk = &s->vkfn;
s->driver_props = (VkPhysicalDeviceDriverProperties) {
@@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s)
.pNext = &s->driver_props,
};
+
vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+ vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
+
+ if (s->qf_props)
+ return 0;
+
+ s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc);
+ if (!s->qf_props)
+ return AVERROR(ENOMEM);
+
+ s->query_props = av_mallocz(sizeof(*s->query_props)*qc);
+ if (!s->qf_props) {
+ av_freep(&s->qf_props);
+ return AVERROR(ENOMEM);
+ }
+
+ s->video_props = av_mallocz(sizeof(*s->video_props)*qc);
+ if (!s->video_props) {
+ av_freep(&s->qf_props);
+ av_freep(&s->query_props);
+ return AVERROR(ENOMEM);
+ }
+
+ for (uint32_t i = 0; i < qc; i++) {
+ s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR,
+ };
+ s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+ .pNext = &s->query_props[i],
+ };
+ s->qf_props[i] = (VkQueueFamilyProperties2) {
+ .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+ .pNext = &s->video_props[i],
+ };
+ }
+
+ vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
+
+ return 0;
}
void ff_vk_qf_fill(FFVulkanContext *s)
@@ -149,40 +190,54 @@ void ff_vk_qf_fill(FFVulkanContext *s)
s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
}
-void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues)
+int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
{
+ int ret, num;
+
switch (dev_family) {
case VK_QUEUE_GRAPHICS_BIT:
- qf->queue_family = s->hwctx->queue_family_index;
- qf->actual_queues = s->hwctx->nb_graphics_queues;
+ ret = s->hwctx->queue_family_index;
+ num = s->hwctx->nb_graphics_queues;
break;
case VK_QUEUE_COMPUTE_BIT:
- qf->queue_family = s->hwctx->queue_family_comp_index;
- qf->actual_queues = s->hwctx->nb_comp_queues;
+ ret = s->hwctx->queue_family_comp_index;
+ num = s->hwctx->nb_comp_queues;
break;
case VK_QUEUE_TRANSFER_BIT:
- qf->queue_family = s->hwctx->queue_family_tx_index;
- qf->actual_queues = s->hwctx->nb_tx_queues;
+ ret = s->hwctx->queue_family_tx_index;
+ num = s->hwctx->nb_tx_queues;
break;
case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
- qf->queue_family = s->hwctx->queue_family_encode_index;
- qf->actual_queues = s->hwctx->nb_encode_queues;
+ ret = s->hwctx->queue_family_encode_index;
+ num = s->hwctx->nb_encode_queues;
break;
case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
- qf->queue_family = s->hwctx->queue_family_decode_index;
- qf->actual_queues = s->hwctx->nb_decode_queues;
+ ret = s->hwctx->queue_family_decode_index;
+ num = s->hwctx->nb_decode_queues;
break;
default:
av_assert0(0); /* Should never happen */
}
+ if (nb)
+ *nb = num;
+
+ return ret;
+}
+
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family, int nb_queues)
+{
+ int ret;
+
+ ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
+
if (!nb_queues)
qf->nb_queues = qf->actual_queues;
else
qf->nb_queues = nb_queues;
- return;
+ return ret;
}
void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
@@ -1669,6 +1724,10 @@ void ff_vk_uninit(FFVulkanContext *s)
{
FFVulkanFunctions *vk = &s->vkfn;
+ av_freep(&s->query_props);
+ av_freep(&s->qf_props);
+ av_freep(&s->video_props);
+
if (s->spirv_compiler)
s->spirv_compiler->uninit(&s->spirv_compiler);
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 4bd1c9fc00..4c38dbc2e6 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceProperties2 props;
VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
+ VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
+ VkQueueFamilyVideoPropertiesKHR *video_props;
+ VkQueueFamilyProperties2 *qf_props;
AVBufferRef *device_ref;
AVHWDeviceContext *device;
@@ -263,7 +266,7 @@ const char *ff_vk_ret2str(VkResult res);
/**
* Loads props/mprops/driver_props
*/
-void ff_vk_load_props(FFVulkanContext *s);
+int ff_vk_load_props(FFVulkanContext *s);
/**
* Returns 1 if the image is any sort of supported RGB
@@ -288,12 +291,17 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+/**
+ * Get a queue family index and the number of queues. nb is optional.
+ */
+int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb);
+
/**
* Initialize a queue family with a specific number of queues.
* If nb_queues == 0, use however many queues the queue family has.
*/
-void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues);
+int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ VkQueueFlagBits dev_family, int nb_queues);
/**
* Rotate through the queues in a queue family.
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 6396d8d714..212681d475 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -78,6 +78,7 @@ typedef enum FFVulkanExtensions {
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \
MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \
+ MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties2) \
\
/* Command pool */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateCommandPool) \
--
2.40.0
[-- Attachment #35: 0054-vulkan-return-current-queue-index-from-ff_vk_qf_rota.patch --]
[-- Type: text/x-diff, Size: 1290 bytes --]
From e1f5bb2578042432e80700eac827e780a3b8b1f7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 28 Dec 2022 05:55:53 +0100
Subject: [PATCH 54/97] vulkan: return current queue index from
ff_vk_qf_rotate()
---
libavutil/vulkan.c | 3 ++-
libavutil/vulkan.h | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index d045ff83c1..cb8e08e02f 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -240,9 +240,10 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
return ret;
}
-void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
+int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
{
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
+ return qf->cur_queue;
}
int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 4c38dbc2e6..3f887a782e 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -306,7 +306,7 @@ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
/**
* Rotate through the queues in a queue family.
*/
-void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
+int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
/**
* Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
--
2.40.0
[-- Attachment #36: 0055-vulkan-rewrite-to-support-all-necessary-features.patch --]
[-- Type: text/x-diff, Size: 123609 bytes --]
From 6b5301aa29b63b90d04505c9386822b2e207a038 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 29 Dec 2022 21:16:21 +0100
Subject: [PATCH 55/97] vulkan: rewrite to support all necessary features
---
libavutil/vulkan.c | 2145 ++++++++++++++++++----------------
libavutil/vulkan.h | 515 ++++----
libavutil/vulkan_functions.h | 1 +
3 files changed, 1344 insertions(+), 1317 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index cb8e08e02f..9d607ee1ce 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -21,33 +23,6 @@
#include "vulkan.h"
#include "vulkan_loader.h"
-#if CONFIG_LIBGLSLANG
-#include "vulkan_glslang.c"
-#elif CONFIG_LIBSHADERC
-#include "vulkan_shaderc.c"
-#endif
-
-/* Generic macro for creating contexts which need to keep their addresses
- * if another context is created. */
-#define FN_CREATING(ctx, type, shortname, array, num) \
-static av_always_inline type *create_ ##shortname(ctx *dctx) \
-{ \
- type **array, *sctx = av_mallocz(sizeof(*sctx)); \
- if (!sctx) \
- return NULL; \
- \
- array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
- if (!array) { \
- av_free(sctx); \
- return NULL; \
- } \
- \
- dctx->array = array; \
- dctx->array[dctx->num++] = sctx; \
- \
- return sctx; \
-}
-
const VkComponentMapping ff_comp_identity_map = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -113,15 +88,22 @@ int ff_vk_load_props(FFVulkanContext *s)
uint32_t qc = 0;
FFVulkanFunctions *vk = &s->vkfn;
+ s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
+ };
+ s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT,
+ .pNext = &s->hprops,
+ };
s->driver_props = (VkPhysicalDeviceDriverProperties) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+ .pNext = &s->desc_buf_props,
};
s->props = (VkPhysicalDeviceProperties2) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = &s->driver_props,
};
-
vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
@@ -165,32 +147,7 @@ int ff_vk_load_props(FFVulkanContext *s)
return 0;
}
-void ff_vk_qf_fill(FFVulkanContext *s)
-{
- s->nb_qfs = 0;
-
- /* Simply fills in all unique queues into s->qfs */
- if (s->hwctx->queue_family_index >= 0)
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
- if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
- if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
- s->qfs[1] != s->hwctx->queue_family_comp_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
- if (s->hwctx->queue_family_decode_index >= 0 &&
- (s->qfs[0] != s->hwctx->queue_family_decode_index &&
- s->qfs[1] != s->hwctx->queue_family_decode_index &&
- s->qfs[2] != s->hwctx->queue_family_decode_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
- if (s->hwctx->queue_family_encode_index >= 0 &&
- (s->qfs[0] != s->hwctx->queue_family_encode_index &&
- s->qfs[1] != s->hwctx->queue_family_encode_index &&
- s->qfs[2] != s->hwctx->queue_family_encode_index &&
- s->qfs[3] != s->hwctx->queue_family_encode_index))
- s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
-}
-
-int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
+static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
{
int ret, num;
@@ -226,24 +183,552 @@ int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb)
}
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues)
+ VkQueueFlagBits dev_family)
{
- int ret;
+ /* Fill in queue families from context if not done yet */
+ if (!s->nb_qfs) {
+ s->nb_qfs = 0;
+
+ /* Simply fills in all unique queues into s->qfs */
+ if (s->hwctx->queue_family_index >= 0)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index;
+ if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index)
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index;
+ if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index &&
+ s->qfs[1] != s->hwctx->queue_family_comp_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index;
+ if (s->hwctx->queue_family_decode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_decode_index &&
+ s->qfs[1] != s->hwctx->queue_family_decode_index &&
+ s->qfs[2] != s->hwctx->queue_family_decode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index;
+ if (s->hwctx->queue_family_encode_index >= 0 &&
+ (s->qfs[0] != s->hwctx->queue_family_encode_index &&
+ s->qfs[1] != s->hwctx->queue_family_encode_index &&
+ s->qfs[2] != s->hwctx->queue_family_encode_index &&
+ s->qfs[3] != s->hwctx->queue_family_encode_index))
+ s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index;
+ }
- ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues);
+ return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues));
+}
- if (!nb_queues)
- qf->nb_queues = qf->actual_queues;
- else
- qf->nb_queues = nb_queues;
+void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
- return ret;
+ for (int i = 0; i < pool->pool_size; i++) {
+ FFVkExecContext *e = &pool->contexts[i];
+
+ if (e->fence) {
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+ vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
+ }
+
+ ff_vk_exec_discard_deps(s, e);
+
+ av_free(e->frame_deps);
+ av_free(e->buf_deps);
+ av_free(e->queue_family_dst);
+ av_free(e->layout_dst);
+ av_free(e->access_dst);
+ av_free(e->frame_update);
+ av_free(e->frame_locked);
+ av_free(e->sem_sig);
+ av_free(e->sem_sig_val_dst);
+ av_free(e->sem_wait);
+ }
+
+ if (pool->cmd_bufs)
+ vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool,
+ pool->pool_size, pool->cmd_bufs);
+ if (pool->cmd_buf_pool)
+ vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc);
+ if (pool->query_pool)
+ vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc);
+
+ av_free(pool->query_data);
+ av_free(pool->cmd_bufs);
+ av_free(pool->contexts);
+}
+
+int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ FFVkExecPool *pool, int nb_contexts,
+ int nb_queries, VkQueryType query_type, int query_64bit,
+ const void *query_create_pnext)
+{
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkCommandPoolCreateInfo cqueue_create;
+ VkCommandBufferAllocateInfo cbuf_create;
+
+ atomic_init(&pool->idx, 0);
+
+ /* Create command pool */
+ cqueue_create = (VkCommandPoolCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+ VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = qf->queue_family,
+ };
+ ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
+ s->hwctx->alloc, &pool->cmd_buf_pool);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ /* Allocate space for command buffers */
+ pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs));
+ if (!pool->cmd_bufs) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ /* Allocate command buffer */
+ cbuf_create = (VkCommandBufferAllocateInfo) {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandPool = pool->cmd_buf_pool,
+ .commandBufferCount = nb_contexts,
+ };
+ ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create,
+ pool->cmd_bufs);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ /* Query pool */
+ if (nb_queries) {
+ VkQueryPoolCreateInfo query_pool_info = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .pNext = query_create_pnext,
+ .queryType = query_type,
+ .queryCount = nb_queries*nb_contexts,
+ };
+ ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
+ s->hwctx->alloc, &pool->query_pool);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ pool->nb_queries = nb_queries;
+ pool->query_status_stride = 2;
+ pool->query_results = nb_queries;
+ pool->query_statuses = 0; /* if radv supports it, nb_queries; */
+
+#if CONFIG_VULKAN_ENCODE
+ /* Video encode quieries produce two results per query */
+ if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
+ pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */
+ pool->query_results *= 2;
+ } else
+#endif
+ if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
+ pool->query_status_stride = 1;
+ pool->query_results = 0;
+ pool->query_statuses = nb_queries;
+ }
+
+ pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4);
+
+ /* Allocate space for the query data */
+ pool->query_data = av_mallocz(nb_contexts*pool->qd_size);
+ if (!pool->query_data) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ }
+
+ /* Allocate space for the contexts */
+ pool->contexts = av_mallocz(nb_contexts*sizeof(*pool->contexts));
+ if (!pool->contexts) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ pool->pool_size = nb_contexts;
+
+ /* Init contexts */
+ for (int i = 0; i < pool->pool_size; i++) {
+ FFVkExecContext *e = &pool->contexts[i];
+
+ /* Fence */
+ VkFenceCreateInfo fence_create = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+ };
+ ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc,
+ &e->fence);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ e->idx = i;
+ e->parent = pool;
+
+ /* Query data */
+ e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i;
+ e->query_idx = nb_queries*i;
+
+ /* Command buffer */
+ e->buf = pool->cmd_bufs[i];
+
+ /* Queue index distribution */
+ e->qi = i % qf->nb_queues;
+ e->qf = qf->queue_family;
+ vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
+ e->qi, &e->queue);
+ }
+
+ return 0;
+
+fail:
+ ff_vk_exec_pool_free(s, pool);
+ return err;
+}
+
+VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
+ void **data, int64_t *status)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ const FFVkExecPool *pool = e->parent;
+
+ int32_t *res32 = e->query_data;
+ int64_t *res64 = e->query_data;
+ int64_t res = 0;
+ VkQueryResultFlags qf = 0;
+
+ qf |= pool->query_64bit ?
+ VK_QUERY_RESULT_64_BIT : 0x0;
+ qf |= pool->query_statuses ?
+ VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
+
+ ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool,
+ e->query_idx,
+ pool->nb_queries,
+ pool->qd_size, e->query_data,
+ pool->query_64bit ? 8 : 4, qf);
+ if (ret != VK_SUCCESS)
+ return ret;
+
+ if (pool->query_statuses && pool->query_64bit) {
+ for (int i = 0; i < pool->query_statuses; i++) {
+ res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
+ res64[i] : res;
+ res64 += pool->query_status_stride;
+ }
+ } else if (pool->query_statuses) {
+ for (int i = 0; i < pool->query_statuses; i++) {
+ res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
+ res32[i] : res;
+ res32 += pool->query_status_stride;
+ }
+ }
+
+ if (data)
+ *data = e->query_data;
+ if (status)
+ *status = res;
+
+ return VK_SUCCESS;
+}
+
+FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool)
+{
+ int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed);
+ idx %= pool->pool_size;
+ return &pool->contexts[idx];
+}
+
+void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+}
+
+int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ const FFVkExecPool *pool = e->parent;
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ /* Create the fence and don't wait for it initially */
+ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
+ vk->ResetFences(s->hwctx->act_dev, 1, &e->fence);
+
+ /* Discard queue dependencies */
+ ff_vk_exec_discard_deps(s, e);
+
+ ret = vk->BeginCommandBuffer(e->buf, &cmd_start);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (pool->nb_queries)
+ vk->CmdResetQueryPool(e->buf, pool->query_pool,
+ e->query_idx, pool->nb_queries);
+
+ return 0;
+}
+
+void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
+{
+ for (int j = 0; j < e->nb_buf_deps; j++)
+ av_buffer_unref(&e->buf_deps[j]);
+ e->nb_buf_deps = 0;
+
+ for (int j = 0; j < e->nb_frame_deps; j++) {
+ AVFrame *f = e->frame_deps[j];
+ if (e->frame_locked[j]) {
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ vkfc->unlock_frame(hwfc, vkf);
+ e->frame_locked[j] = 0;
+ e->frame_update[j] = 0;
+ }
+ if (f->buf[0])
+ av_frame_free(&e->frame_deps[j]);
+ }
+ e->nb_frame_deps = 0;
+
+ e->sem_wait_cnt = 0;
+ e->sem_sig_cnt = 0;
+ e->sem_sig_val_dst_cnt = 0;
+}
+
+int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef **deps, int nb_deps, int ref)
+{
+ AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size,
+ (e->nb_buf_deps + nb_deps) * sizeof(*dst));
+ if (!dst) {
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR(ENOMEM);
+ }
+
+ e->buf_deps = dst;
+
+ for (int i = 0; i < nb_deps; i++) {
+ e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i];
+ if (!e->buf_deps[e->nb_buf_deps]) {
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR(ENOMEM);
+ }
+ e->nb_buf_deps++;
+ }
+
+ return 0;
+}
+
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+ VkPipelineStageFlagBits2 wait_stage,
+ VkPipelineStageFlagBits2 signal_stage)
+{
+ uint8_t *frame_locked;
+ uint8_t *frame_update;
+ AVFrame **frame_deps;
+ VkImageLayout *layout_dst;
+ uint32_t *queue_family_dst;
+ VkAccessFlagBits *access_dst;
+
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ int nb_images = ff_vk_count_images(vkf);
+
+ /* Don't add duplicates */
+ for (int i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == f->data[0])
+ return 1;
+
+#define ARR_REALLOC(str, arr, alloc_s, cnt) \
+ do { \
+ arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \
+ if (!arr) { \
+ ff_vk_exec_discard_deps(s, e); \
+ return AVERROR(ENOMEM); \
+ } \
+ str->arr = arr; \
+ } while (0)
+
+ ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps);
+ ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps);
+ ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps);
+
+ ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps);
+ ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps);
+ ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps);
+
+ e->frame_deps[e->nb_frame_deps] = f->buf[0] ? av_frame_clone(f) : f;
+ if (!e->frame_deps[e->nb_frame_deps]) {
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR(ENOMEM);
+ }
+
+ vkfc->lock_frame(hwfc, vkf);
+ e->frame_locked[e->nb_frame_deps] = 1;
+ e->frame_update[e->nb_frame_deps] = 0;
+ e->nb_frame_deps++;
+
+ for (int i = 0; i < nb_images; i++) {
+ VkSemaphoreSubmitInfo *sem_wait;
+ VkSemaphoreSubmitInfo *sem_sig;
+ uint64_t **sem_sig_val_dst;
+
+ ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
+ ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt);
+ ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
+
+ e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = vkf->sem[i],
+ .value = vkf->sem_value[i],
+ .stageMask = wait_stage,
+ };
+
+ e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = vkf->sem[i],
+ .value = vkf->sem_value[i] + 1,
+ .stageMask = signal_stage,
+ };
+
+ e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i];
+ e->sem_sig_val_dst_cnt++;
+ }
+
+ return 0;
+}
+
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+ VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar)
+{
+ int i;
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == f->data[0])
+ break;
+ av_assert0(i < e->nb_frame_deps);
+
+ /* Don't update duplicates */
+ if (nb_img_bar && !e->frame_update[i])
+ (*nb_img_bar)++;
+
+ e->queue_family_dst[i] = bar->dstQueueFamilyIndex;
+ e->access_dst[i] = bar->dstAccessMask;
+ e->layout_dst[i] = bar->newLayout;
+ e->frame_update[i] = 1;
+}
+
+int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
+ VkSemaphore *dst, uint64_t *dst_val,
+ AVFrame *f)
+{
+ uint64_t **sem_sig_val_dst;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
+ /* Reject unknown frames */
+ int i;
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == f->data[0])
+ break;
+ if (i == e->nb_frame_deps)
+ return AVERROR(EINVAL);
+
+ ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt);
+
+ *dst = vkf->sem[0];
+ *dst_val = vkf->sem_value[0];
+
+ e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val;
+ e->sem_sig_val_dst_cnt++;
+
+ return 0;
}
-int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
+int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e)
{
- qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
- return qf->cur_queue;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+ .commandBuffer = e->buf,
+ };
+ VkSubmitInfo2 submit_info = (VkSubmitInfo2) {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
+ .pCommandBufferInfos = &cmd_buf_info,
+ .commandBufferInfoCount = 1,
+ .pWaitSemaphoreInfos = e->sem_wait,
+ .waitSemaphoreInfoCount = e->sem_wait_cnt,
+ .pSignalSemaphoreInfos = e->sem_sig,
+ .signalSemaphoreInfoCount = e->sem_sig_cnt,
+ };
+
+ ret = vk->EndCommandBuffer(e->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR_EXTERNAL;
+ }
+
+ s->hwctx->lock_queue(s->device, e->qf, e->qi);
+ ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence);
+ s->hwctx->unlock_queue(s->device, e->qf, e->qi);
+
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ ff_vk_exec_discard_deps(s, e);
+ return AVERROR_EXTERNAL;
+ }
+
+ for (int i = 0; i < e->sem_sig_val_dst_cnt; i++)
+ *e->sem_sig_val_dst[i] += 1;
+
+ /* Unlock all frames */
+ for (int j = 0; j < e->nb_frame_deps; j++) {
+ if (e->frame_locked[j]) {
+ AVFrame *f = e->frame_deps[j];
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
+ if (e->frame_update[j]) {
+ int nb_images = ff_vk_count_images(vkf);
+ for (int i = 0; i < nb_images; i++) {
+ vkf->layout[i] = e->layout_dst[j];
+ vkf->access[i] = e->access_dst[j];
+ vkf->queue_family[i] = e->queue_family_dst[j];
+ }
+ }
+ vkfc->unlock_frame(hwfc, vkf);
+ e->frame_locked[j] = 0;
+ }
+ }
+
+ return 0;
}
int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
@@ -322,6 +807,10 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
but should be ok */
};
+ VkMemoryAllocateFlagsInfo alloc_flags = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
+ .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
+ };
VkBufferMemoryRequirementsInfo2 req_desc = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
};
@@ -351,11 +840,18 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
/* In case the implementation prefers/requires dedicated allocation */
use_ded_mem = ded_req.prefersDedicatedAllocation |
ded_req.requiresDedicatedAllocation;
- if (use_ded_mem)
+ if (use_ded_mem) {
ded_alloc.buffer = buf->buf;
+ ded_alloc.pNext = alloc_pNext;
+ alloc_pNext = &ded_alloc;
+ }
- err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+ alloc_flags.pNext = alloc_pNext;
+ alloc_pNext = &alloc_flags;
+ }
+
+ err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext,
&buf->flags, &buf->mem);
if (err)
return err;
@@ -367,27 +863,72 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
return AVERROR_EXTERNAL;
}
+ if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+ VkBufferDeviceAddressInfo address_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
+ .buffer = buf->buf,
+ };
+ buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
+ }
+
buf->size = size;
return 0;
}
-int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
+static void destroy_avvkbuf(void *opaque, uint8_t *data)
+{
+ FFVulkanContext *s = opaque;
+ FFVkBuffer *buf = (FFVkBuffer *)data;
+ ff_vk_free_buf(s, buf);
+ av_free(buf);
+}
+
+int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size,
+ void *pNext, void *alloc_pNext,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
+{
+ int err;
+ AVBufferRef *buf;
+ FFVkBuffer *vkb = av_mallocz(sizeof(*vkb));
+ if (!vkb)
+ return AVERROR(ENOMEM);
+
+ err = ff_vk_create_buf(s, vkb, size, pNext, alloc_pNext, usage, flags);
+ if (err < 0) {
+ av_free(vkb);
+ return err;
+ }
+
+ buf = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), destroy_avvkbuf, s, 0);
+ if (!buf) {
+ destroy_avvkbuf(s, (uint8_t *)vkb);
+ return AVERROR(ENOMEM);
+ }
+
+ *ref = buf;
+
+ return 0;
+}
+
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[],
int nb_buffers, int invalidate)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- VkMappedMemoryRange *inval_list = NULL;
+ VkMappedMemoryRange inval_list[64];
int inval_count = 0;
for (int i = 0; i < nb_buffers; i++) {
- ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
- VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ void *dst;
+ ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0,
+ VK_WHOLE_SIZE, 0, &dst);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
+ mem[i] = dst;
}
if (!invalidate)
@@ -396,16 +937,12 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
for (int i = 0; i < nb_buffers; i++) {
const VkMappedMemoryRange ival_buf = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = buf[i].mem,
+ .memory = buf[i]->mem,
.size = VK_WHOLE_SIZE,
};
- if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
- inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
- (++inval_count)*sizeof(*inval_list));
- if (!inval_list)
- return AVERROR(ENOMEM);
- inval_list[inval_count - 1] = ival_buf;
+ inval_list[inval_count++] = ival_buf;
}
if (inval_count) {
@@ -421,29 +958,25 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
return 0;
}
-int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers,
int flush)
{
int err = 0;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- VkMappedMemoryRange *flush_list = NULL;
+ VkMappedMemoryRange flush_list[64];
int flush_count = 0;
if (flush) {
for (int i = 0; i < nb_buffers; i++) {
const VkMappedMemoryRange flush_buf = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = buf[i].mem,
+ .memory = buf[i]->mem,
.size = VK_WHOLE_SIZE,
};
- if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
- flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
- (++flush_count)*sizeof(*flush_list));
- if (!flush_list)
- return AVERROR(ENOMEM);
- flush_list[flush_count - 1] = flush_buf;
+ flush_list[flush_count++] = flush_buf;
}
}
@@ -458,7 +991,7 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
}
for (int i = 0; i < nb_buffers; i++)
- vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
+ vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem);
return err;
}
@@ -470,547 +1003,105 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
if (!buf || !s->hwctx)
return;
+ if (buf->mapped_mem)
+ ff_vk_unmap_buffer(s, buf, 0);
if (buf->buf != VK_NULL_HANDLE)
vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
if (buf->mem != VK_NULL_HANDLE)
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
-int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx,
- int width, int height, VkFormat fmt, VkImageTiling tiling,
- VkImageUsageFlagBits usage, VkImageCreateFlags flags,
- void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext)
+static void free_data_buf(void *opaque, uint8_t *data)
{
- int err;
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
- AVVulkanDeviceContext *hwctx = s->hwctx;
-
- VkExportSemaphoreCreateInfo ext_sem_info = {
- .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
-#ifdef _WIN32
- .handleTypes = IsWindows8OrGreater()
- ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
- : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
-#else
- .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
-#endif
- };
+ FFVulkanContext *ctx = opaque;
+ FFVkBuffer *buf = (FFVkBuffer *)data;
+ ff_vk_free_buf(ctx, buf);
+ av_free(data);
+}
- VkSemaphoreTypeCreateInfo sem_type_info = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
-#ifdef _WIN32
- .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
-#else
- .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
-#endif
- .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
- .initialValue = 0,
- };
-
- VkSemaphoreCreateInfo sem_spawn = {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
- .pNext = &sem_type_info,
- };
-
- /* Create the image */
- VkImageCreateInfo create_info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
- .pNext = create_pnext,
- .imageType = VK_IMAGE_TYPE_2D,
- .format = fmt,
- .extent.depth = 1,
- .mipLevels = 1,
- .arrayLayers = 1,
- .flags = flags,
- .tiling = tiling,
- .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
- .usage = usage,
- .samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = s->qfs,
- .queueFamilyIndexCount = s->nb_qfs,
- .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
- };
-
- ret = vk->CreateImage(hwctx->act_dev, &create_info,
- hwctx->alloc, &f->img[0]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n",
- ff_vk_ret2str(ret));
- err = AVERROR(EINVAL);
- goto fail;
- }
-
- /* Create semaphore */
- ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn,
- hwctx->alloc, &f->sem[0]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0];
- f->layout[0] = create_info.initialLayout;
- f->access[0] = 0x0;
- f->sem_value[0] = 0;
-
- f->flags = 0x0;
- f->tiling = tiling;
-
- return 0;
-
-fail:
- return err;
-}
-
-int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
- VkShaderStageFlagBits stage)
-{
- VkPushConstantRange *pc;
-
- pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
- pl->push_consts_num + 1);
- if (!pl->push_consts)
- return AVERROR(ENOMEM);
-
- pc = &pl->push_consts[pl->push_consts_num++];
- memset(pc, 0, sizeof(*pc));
-
- pc->stageFlags = stage;
- pc->offset = offset;
- pc->size = size;
-
- return 0;
-}
-
-FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
-int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
- FFVkQueueFamilyCtx *qf)
+static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
{
- VkResult ret;
- FFVkExecContext *e;
- FFVulkanFunctions *vk = &s->vkfn;
-
- VkCommandPoolCreateInfo cqueue_create = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = qf->queue_family,
- };
- VkCommandBufferAllocateInfo cbuf_create = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
- .commandBufferCount = qf->nb_queues,
- };
-
- e = create_exec_ctx(s);
- if (!e)
- return AVERROR(ENOMEM);
-
- e->qf = qf;
-
- e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
- if (!e->queues)
- return AVERROR(ENOMEM);
-
- e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
- if (!e->bufs)
- return AVERROR(ENOMEM);
-
- /* Create command pool */
- ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
- s->hwctx->alloc, &e->pool);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- cbuf_create.commandPool = e->pool;
-
- /* Allocate command buffer */
- ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- for (int i = 0; i < qf->nb_queues; i++) {
- FFVkQueueCtx *q = &e->queues[i];
- vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
- i % qf->actual_queues, &q->queue);
- }
-
- *ctx = e;
+ uint8_t *buf = av_mallocz(size);
+ if (!buf)
+ return NULL;
- return 0;
+ return av_buffer_create(buf, size, free_data_buf, opaque, 0);
}
-int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
- int nb_queries, VkQueryType type,
- int elem_64bits, void *create_pnext)
+int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
+ AVBufferRef **buf, VkBufferUsageFlags usage,
+ void *create_pNext, size_t size,
+ VkMemoryPropertyFlagBits mem_props)
{
- VkResult ret;
- size_t qd_size;
- int nb_results = nb_queries;
- int nb_statuses = 0 /* Once RADV has support, = nb_queries */;
- int status_stride = 2;
- int result_elem_size = elem_64bits ? 8 : 4;
- FFVulkanFunctions *vk = &s->vkfn;
- VkQueryPoolCreateInfo query_pool_info = {
- .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
- .pNext = create_pnext,
- .queryType = type,
- .queryCount = nb_queries*e->qf->nb_queues,
- };
-
- if (e->query.pool)
- return AVERROR(EINVAL);
+ int err;
+ AVBufferRef *ref;
+ FFVkBuffer *data;
- /* Video encode quieries produce two results per query */
- if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) {
- status_stride = 3; /* skip,skip,result,skip,skip,result */
- nb_results *= 2;
- } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) {
- status_stride = 1;
- nb_results *= 0;
+ if (!(*buf_pool)) {
+ *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx,
+ alloc_data_buf, NULL);
+ if (!(*buf_pool))
+ return AVERROR(ENOMEM);
}
- qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size;
-
- e->query.data = av_mallocz(e->qf->nb_queues*qd_size);
- if (!e->query.data)
+ *buf = ref = av_buffer_pool_get(*buf_pool);
+ if (!ref)
return AVERROR(ENOMEM);
- ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info,
- s->hwctx->alloc, &e->query.pool);
- if (ret != VK_SUCCESS)
- return AVERROR_EXTERNAL;
-
- e->query.data_per_queue = qd_size;
- e->query.nb_queries = nb_queries;
- e->query.nb_results = nb_results;
- e->query.nb_statuses = nb_statuses;
- e->query.elem_64bits = elem_64bits;
- e->query.status_stride = status_stride;
-
- return 0;
-}
-
-int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
- int query_idx, void **data, int64_t *status)
-{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
- uint8_t *qd;
- int32_t *res32;
- int64_t *res64;
- int64_t res = 0;
- VkQueryResultFlags qf = 0;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
+ data = (FFVkBuffer *)ref->data;
+ data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
+ data->access = VK_ACCESS_2_NONE;
- if (!q->submitted) {
- *data = NULL;
+ if (data->size >= size)
return 0;
- }
- qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue;
- qf |= e->query.nb_results && e->query.nb_statuses ?
- VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0;
- qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0;
- res32 = (int32_t *)(qd + e->query.nb_results*4);
- res64 = (int64_t *)(qd + e->query.nb_results*8);
-
- ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool,
- query_idx,
- e->query.nb_queries,
- e->query.data_per_queue, qd,
- e->query.elem_64bits ? 8 : 4, qf);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
+ ff_vk_free_buf(ctx, data);
+ memset(data, 0, sizeof(*data));
- if (e->query.nb_statuses && e->query.elem_64bits) {
- for (int i = 0; i < e->query.nb_queries; i++) {
- res = (res64[i] < res) || (res >= 0 && res64[i] > res) ?
- res64[i] : res;
- res64 += e->query.status_stride;
- }
- } else if (e->query.nb_statuses) {
- for (int i = 0; i < e->query.nb_queries; i++) {
- res = (res32[i] < res) || (res >= 0 && res32[i] > res) ?
- res32[i] : res;
- res32 += e->query.status_stride;
- }
- }
-
- if (data)
- *data = qd;
- if (status)
- *status = res;
-
- return 0;
-}
+ av_log(ctx, AV_LOG_DEBUG, "Allocating buffer of %lu bytes for pool %p\n",
+ size, *buf_pool);
-void ff_vk_discard_exec_deps(FFVkExecContext *e)
-{
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- q->nb_buf_deps = 0;
-
- for (int j = 0; j < q->nb_frame_deps; j++)
- av_frame_free(&q->frame_deps[j]);
- q->nb_frame_deps = 0;
-
- e->sem_wait_cnt = 0;
- e->sem_sig_cnt = 0;
-}
-
-int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
-{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
- VkCommandBufferBeginInfo cmd_start = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
- };
-
- /* Create the fence and don't wait for it initially */
- if (!q->fence) {
- VkFenceCreateInfo fence_spawn = {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- };
- ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
- &q->fence);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- } else if (!q->synchronous) {
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
- }
-
- q->synchronous = 0;
-
- /* Discard queue dependencies */
- ff_vk_discard_exec_deps(e);
-
- ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- if (e->query.pool) {
- e->query.idx = e->qf->cur_queue*e->query.nb_queries;
- vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool,
- e->query.idx, e->query.nb_queries);
+ err = ff_vk_create_buf(ctx, data, size,
+ create_pNext, NULL, usage,
+ mem_props);
+ if (err < 0) {
+ av_buffer_unref(&ref);
+ return err;
}
- return 0;
-}
-
-VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
-{
- return e->bufs[e->qf->cur_queue];
-}
-
-int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
- VkPipelineStageFlagBits in_wait_dst_flag)
-{
- AVFrame **dst;
- AVVkFrame *f = (AVVkFrame *)frame->data[0];
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
- AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
- int planes = av_pix_fmt_count_planes(fc->sw_format);
-
- for (int i = 0; i < planes; i++) {
- e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
- if (!e->sem_wait) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
- if (!e->sem_wait_dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
- (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
- if (!e->sem_wait_val) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
- if (!e->sem_sig) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
+ if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+ err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0);
+ if (err < 0) {
+ av_buffer_unref(&ref);
+ return err;
}
-
- e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
- if (!e->sem_sig_val) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
- (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
- if (!e->sem_sig_val_dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- e->sem_wait[e->sem_wait_cnt] = f->sem[i];
- e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
- e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
- e->sem_wait_cnt++;
-
- e->sem_sig[e->sem_sig_cnt] = f->sem[i];
- e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
- e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
- e->sem_sig_cnt++;
}
- dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
- (q->nb_frame_deps + 1) * sizeof(*dst));
- if (!dst) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
-
- q->frame_deps = dst;
- q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
- if (!q->frame_deps[q->nb_frame_deps]) {
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
- }
- q->nb_frame_deps++;
-
return 0;
}
-int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
-{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
- VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
- .pWaitSemaphoreValues = e->sem_wait_val,
- .pSignalSemaphoreValues = e->sem_sig_val,
- .waitSemaphoreValueCount = e->sem_wait_cnt,
- .signalSemaphoreValueCount = e->sem_sig_cnt,
- };
-
- VkSubmitInfo s_info = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &s_timeline_sem_info,
-
- .commandBufferCount = 1,
- .pCommandBuffers = &e->bufs[e->qf->cur_queue],
-
- .pWaitSemaphores = e->sem_wait,
- .pWaitDstStageMask = e->sem_wait_dst,
- .waitSemaphoreCount = e->sem_wait_cnt,
-
- .pSignalSemaphores = e->sem_sig,
- .signalSemaphoreCount = e->sem_sig_cnt,
- };
-
- ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
- e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
-
- ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
-
- s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
- e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
-
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- for (int i = 0; i < e->sem_sig_cnt; i++)
- *e->sem_sig_val_dst[i] += 1;
-
- e->query.idx = e->qf->cur_queue*e->query.nb_queries;
- q->submitted = 1;
-
- return 0;
-}
-
-void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
-{
- FFVulkanFunctions *vk = &s->vkfn;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
- if (!q->submitted)
- return;
-
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
- q->synchronous = 1;
-}
-
-int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef **deps, int nb_deps)
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+ VkShaderStageFlagBits stage)
{
- AVBufferRef **dst;
- FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
-
- if (!deps || !nb_deps)
- return 0;
+ VkPushConstantRange *pc;
- dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
- (q->nb_buf_deps + nb_deps) * sizeof(*dst));
- if (!dst)
- goto err;
+ pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
+ pl->push_consts_num + 1);
+ if (!pl->push_consts)
+ return AVERROR(ENOMEM);
- q->buf_deps = dst;
+ pc = &pl->push_consts[pl->push_consts_num++];
+ memset(pc, 0, sizeof(*pc));
- for (int i = 0; i < nb_deps; i++) {
- q->buf_deps[q->nb_buf_deps] = deps[i];
- if (!q->buf_deps[q->nb_buf_deps])
- goto err;
- q->nb_buf_deps++;
- }
+ pc->stageFlags = stage;
+ pc->offset = offset;
+ pc->size = size;
return 0;
-
-err:
- ff_vk_discard_exec_deps(e);
- return AVERROR(ENOMEM);
}
-FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
- int unnorm_coords, VkFilter filt)
+int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
+ int unnorm_coords, VkFilter filt)
{
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
@@ -1030,22 +1121,15 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
.unnormalizedCoordinates = unnorm_coords,
};
- FFVkSampler *sctx = create_sampler(s);
- if (!sctx)
- return NULL;
-
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
- s->hwctx->alloc, &sctx->sampler[0]);
+ s->hwctx->alloc, sampler);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
ff_vk_ret2str(ret));
- return NULL;
+ return AVERROR_EXTERNAL;
}
- for (int i = 1; i < 4; i++)
- sctx->sampler[i] = sctx->sampler[0];
-
- return sctx;
+ return 0;
}
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
@@ -1068,79 +1152,137 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
}
typedef struct ImageViewCtx {
- VkImageView view;
+ VkImageView views[AV_NUM_DATA_POINTERS];
+ int nb_views;
} ImageViewCtx;
-static void destroy_imageview(void *opaque, uint8_t *data)
+static void destroy_imageviews(void *opaque, uint8_t *data)
{
FFVulkanContext *s = opaque;
FFVulkanFunctions *vk = &s->vkfn;
ImageViewCtx *iv = (ImageViewCtx *)data;
- vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
+ for (int i = 0; i < iv->nb_views; i++)
+ vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
+
av_free(iv);
}
-int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
- VkImageView *v, VkImage img, VkFormat fmt,
- const VkComponentMapping map)
+int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
+ VkImageView views[AV_NUM_DATA_POINTERS],
+ AVFrame *f)
{
int err;
+ VkResult ret;
AVBufferRef *buf;
FFVulkanFunctions *vk = &s->vkfn;
-
- VkImageViewCreateInfo imgview_spawn = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .pNext = NULL,
- .image = img,
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = fmt,
- .components = map,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- };
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ const int nb_images = ff_vk_count_images(vkf);
+ const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
ImageViewCtx *iv = av_mallocz(sizeof(*iv));
- VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
- s->hwctx->alloc, &iv->view);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ for (int i = 0; i < nb_planes; i++) {
+ VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
+ VkImageViewCreateInfo view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = NULL,
+ .image = vkf->img[FFMIN(i, nb_images - 1)],
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = rep_fmts[i],
+ .components = ff_comp_identity_map,
+ .subresourceRange = {
+ .aspectMask = plane_aspect[(nb_planes != nb_images) +
+ i*(nb_planes != nb_images)],
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
+
+ ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
+ s->hwctx->alloc, &iv->views[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ iv->nb_views++;
}
- buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
+ buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0);
if (!buf) {
- destroy_imageview(s, (uint8_t *)iv);
- return AVERROR(ENOMEM);
+ err = AVERROR(ENOMEM);
+ goto fail;
}
/* Add to queue dependencies */
- err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
- if (err) {
+ err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0);
+ if (err < 0)
av_buffer_unref(&buf);
- return err;
- }
- *v = iv->view;
+ memcpy(views, iv->views, nb_planes*sizeof(*views));
- return 0;
+ return err;
+
+fail:
+ for (int i = 0; i < iv->nb_views; i++)
+ vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc);
+ av_free(iv);
+ return err;
}
-FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
-FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
- VkShaderStageFlags stage)
+void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
+ AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
+ VkPipelineStageFlags src_stage,
+ VkPipelineStageFlags dst_stage,
+ VkAccessFlagBits new_access,
+ VkImageLayout new_layout,
+ uint32_t new_qf)
{
- FFVkSPIRVShader *shd = create_shader(pl);
- if (!shd)
- return NULL;
+ int i, found;
+ AVVkFrame *vkf = (AVVkFrame *)pic->data[0];
+ const int nb_images = ff_vk_count_images(vkf);
+ for (i = 0; i < e->nb_frame_deps; i++)
+ if (e->frame_deps[i]->data[0] == pic->data[0])
+ break;
+ found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1;
+
+ for (int i = 0; i < nb_images; i++) {
+ bar[*nb_bar] = (VkImageMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+ .pNext = NULL,
+ .srcStageMask = src_stage,
+ .dstStageMask = dst_stage,
+ .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i],
+ .dstAccessMask = new_access,
+ .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0],
+ .newLayout = new_layout,
+ .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0],
+ .dstQueueFamilyIndex = new_qf,
+ .image = vkf->img[i],
+ .subresourceRange = (VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .layerCount = 1,
+ .levelCount = 1,
+ },
+ };
+ *nb_bar += 1;
+ }
+
+ ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL);
+}
+int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
+ VkShaderStageFlags stage)
+{
av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
@@ -1151,22 +1293,24 @@ FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
GLSLF(0, #version %i ,460);
GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
GLSLC(0, );
+ GLSLC(0, #extension GL_EXT_buffer_reference : require );
+ GLSLC(0, #extension GL_EXT_buffer_reference2 : require );
- return shd;
+ return 0;
}
-void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z)
{
- shd->local_size[0] = local_size[0];
- shd->local_size[1] = local_size[1];
- shd->local_size[2] = local_size[2];
+ shd->local_size[0] = x;
+ shd->local_size[1] = y;
+ shd->local_size[2] = z;
av_bprintf(&shd->src, "layout (local_size_x = %i, "
"local_size_y = %i, local_size_z = %i) in;\n\n",
shd->local_size[0], shd->local_size[1], shd->local_size[2]);
}
-void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
+void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio)
{
int line = 0;
const char *p = shd->src.str;
@@ -1188,36 +1332,24 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
av_bprint_finalize(&buf, NULL);
}
-int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
- const char *entrypoint)
+void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ av_bprint_finalize(&shd->src, NULL);
+
+ if (shd->shader.module)
+ vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc);
+}
+
+int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ uint8_t *spirv, size_t spirv_size, const char *entrypoint)
{
- int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
VkShaderModuleCreateInfo shader_create;
- uint8_t *spirv;
- size_t spirv_size;
- void *priv;
shd->shader.pName = entrypoint;
- if (!s->spirv_compiler) {
-#if CONFIG_LIBGLSLANG
- s->spirv_compiler = ff_vk_glslang_init();
-#elif CONFIG_LIBSHADERC
- s->spirv_compiler = ff_vk_shaderc_init();
-#else
- return AVERROR(ENOSYS);
-#endif
- if (!s->spirv_compiler)
- return AVERROR(ENOMEM);
- }
-
- err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv,
- &spirv_size, entrypoint, &priv);
- if (err < 0)
- return err;
-
av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
shd->name, spirv_size);
@@ -1229,11 +1361,8 @@ int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
&shd->shader.module);
-
- s->spirv_compiler->free_shader(s->spirv_compiler, &priv);
-
if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
+ av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
@@ -1262,132 +1391,88 @@ static const struct descriptor_props {
[VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
};
-int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
- int num, int only_print_to_shader)
+int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd,
+ FFVulkanDescriptorSetBinding *desc, int nb,
+ int read_only, int print_to_shader_only)
{
VkResult ret;
- VkDescriptorSetLayout *layout;
+ int has_sampler = 0;
FFVulkanFunctions *vk = &s->vkfn;
+ FFVulkanDescriptorSet *set;
+ VkDescriptorSetLayoutCreateInfo desc_create_layout;
- if (only_print_to_shader)
+ if (print_to_shader_only)
goto print;
- pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
- pl->desc_layout_num + pl->qf->nb_queues);
- if (!pl->desc_layout)
+ /* Actual layout allocated for the pipeline */
+ set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set),
+ pl->nb_descriptor_sets + 1);
+ if (!set)
return AVERROR(ENOMEM);
+ pl->desc_set = set;
+ set = &set[pl->nb_descriptor_sets];
+ memset(set, 0, sizeof(*set));
- pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
- sizeof(*pl->desc_set_initialized),
- pl->descriptor_sets_num + 1);
- if (!pl->desc_set_initialized)
+ set->binding = av_mallocz(nb*sizeof(*set->binding));
+ if (!set->binding)
return AVERROR(ENOMEM);
- pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
- layout = &pl->desc_layout[pl->desc_layout_num];
-
- { /* Create descriptor set layout descriptions */
- VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
- VkDescriptorSetLayoutBinding *desc_binding;
-
- desc_binding = av_mallocz(sizeof(*desc_binding)*num);
- if (!desc_binding)
- return AVERROR(ENOMEM);
-
- for (int i = 0; i < num; i++) {
- desc_binding[i].binding = i;
- desc_binding[i].descriptorType = desc[i].type;
- desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
- desc_binding[i].stageFlags = desc[i].stages;
- desc_binding[i].pImmutableSamplers = desc[i].sampler ?
- desc[i].sampler->sampler :
- NULL;
- }
-
- desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
- desc_create_layout.pBindings = desc_binding;
- desc_create_layout.bindingCount = num;
-
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
- s->hwctx->alloc, &layout[i]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
- "layout: %s\n", ff_vk_ret2str(ret));
- av_free(desc_binding);
- return AVERROR_EXTERNAL;
- }
- }
-
- av_free(desc_binding);
+ set->binding_offset = av_mallocz(nb*sizeof(*set->binding_offset));
+ if (!set->binding_offset) {
+ av_freep(&set->binding);
+ return AVERROR(ENOMEM);
}
- { /* Pool each descriptor by type and update pool counts */
- for (int i = 0; i < num; i++) {
- int j;
- for (j = 0; j < pl->pool_size_desc_num; j++)
- if (pl->pool_size_desc[j].type == desc[i].type)
- break;
- if (j >= pl->pool_size_desc_num) {
- pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
- sizeof(*pl->pool_size_desc),
- ++pl->pool_size_desc_num);
- if (!pl->pool_size_desc)
- return AVERROR(ENOMEM);
- memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
- }
- pl->pool_size_desc[j].type = desc[i].type;
- pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
- }
- }
+ desc_create_layout = (VkDescriptorSetLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = nb,
+ .pBindings = set->binding,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
+ };
- { /* Create template creation struct */
- VkDescriptorUpdateTemplateCreateInfo *dt;
- VkDescriptorUpdateTemplateEntry *des_entries;
+ for (int i = 0; i < nb; i++) {
+ set->binding[i].binding = i;
+ set->binding[i].descriptorType = desc[i].type;
+ set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ set->binding[i].stageFlags = desc[i].stages;
+ set->binding[i].pImmutableSamplers = desc[i].samplers;
- /* Freed after descriptor set initialization */
- des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
- if (!des_entries)
- return AVERROR(ENOMEM);
+ if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ has_sampler |= 1;
+ }
- for (int i = 0; i < num; i++) {
- des_entries[i].dstBinding = i;
- des_entries[i].descriptorType = desc[i].type;
- des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
- des_entries[i].dstArrayElement = 0;
- des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
- des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
- }
+ set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+ if (has_sampler)
+ set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT;
- pl->desc_template_info = av_realloc_array(pl->desc_template_info,
- sizeof(*pl->desc_template_info),
- pl->total_descriptor_sets + pl->qf->nb_queues);
- if (!pl->desc_template_info)
- return AVERROR(ENOMEM);
+ ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+ s->hwctx->alloc, &set->layout);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
- dt = &pl->desc_template_info[pl->total_descriptor_sets];
- memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
+ vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size);
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
- dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
- dt[i].descriptorSetLayout = layout[i];
- dt[i].pDescriptorUpdateEntries = des_entries;
- dt[i].descriptorUpdateEntryCount = num;
- }
- }
+ set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment);
- pl->descriptor_sets_num++;
+ for (int i = 0; i < nb; i++)
+ vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout,
+ i, &set->binding_offset[i]);
- pl->desc_layout_num += pl->qf->nb_queues;
- pl->total_descriptor_sets += pl->qf->nb_queues;
+ set->read_only = read_only;
+ set->nb_bindings = nb;
+ pl->nb_descriptor_sets++;
print:
/* Write shader info */
- for (int i = 0; i < num; i++) {
+ for (int i = 0; i < nb; i++) {
const struct descriptor_props *prop = &descriptor_props[desc[i].type];
- GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
+ GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i);
if (desc[i].mem_layout)
GLSLA(", %s", desc[i].mem_layout);
@@ -1412,185 +1497,268 @@ print:
else if (desc[i].elems > 0)
GLSLA("[%i]", desc[i].elems);
- GLSLA(";\n");
+ GLSLA(";");
+ GLSLA("\n");
}
GLSLA("\n");
return 0;
}
-void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- int set_id)
+int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
+ FFVulkanPipeline *pl)
{
- FFVulkanFunctions *vk = &s->vkfn;
+ int err;
- /* If a set has never been updated, update all queues' sets. */
- if (!pl->desc_set_initialized[set_id]) {
- for (int i = 0; i < pl->qf->nb_queues; i++) {
- int idx = set_id*pl->qf->nb_queues + i;
- vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
- pl->desc_set[idx],
- pl->desc_template[idx],
- s);
- }
- pl->desc_set_initialized[set_id] = 1;
- return;
- }
+ pl->desc_bind = av_mallocz(pl->nb_descriptor_sets*sizeof(*pl->desc_bind));
+ if (!pl->desc_bind)
+ return AVERROR(ENOMEM);
+
+ pl->bound_buffer_indices = av_mallocz(pl->nb_descriptor_sets*
+ sizeof(*pl->bound_buffer_indices));
+ if (!pl->bound_buffer_indices)
+ return AVERROR(ENOMEM);
- set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
+ for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+ FFVulkanDescriptorSet *set = &pl->desc_set[i];
+ int nb = set->read_only ? 1 : pool->pool_size;
+
+ err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb,
+ NULL, NULL, set->usage,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_map_buffer(s, &set->buf, &set->desc_mem, 0);
+ if (err < 0)
+ return err;
+
+ pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT,
+ .usage = set->usage,
+ .address = set->buf.address,
+ };
+
+ pl->bound_buffer_indices[i] = i;
+ }
- vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
- pl->desc_set[set_id],
- pl->desc_template[set_id],
- s);
+ return 0;
}
-void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
- VkShaderStageFlagBits stage, int offset,
- size_t size, void *src)
+static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanDescriptorSet *set,
+ int bind_idx, int array_idx,
+ VkDescriptorGetInfoEXT *desc_get_info,
+ size_t desc_size)
{
FFVulkanFunctions *vk = &s->vkfn;
+ const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx;
+ void *desc = set->desc_mem + /* Base */
+ exec_offset + /* Execution context */
+ set->binding_offset[bind_idx] + /* Descriptor binding */
+ array_idx*desc_size; /* Array position */
- vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
- stage, offset, size, src);
+ vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc);
}
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
+int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkSampler *sampler)
{
- VkResult ret;
- FFVulkanFunctions *vk = &s->vkfn;
-
- pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
- if (!pl->desc_staging)
- return AVERROR(ENOMEM);
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
- { /* Init descriptor set pool */
- VkDescriptorPoolCreateInfo pool_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
- .poolSizeCount = pl->pool_size_desc_num,
- .pPoolSizes = pl->pool_size_desc,
- .maxSets = pl->total_descriptor_sets,
- };
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ desc_get_info.data.pSampler = sampler;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
- s->hwctx->alloc, &pl->desc_pool);
- av_freep(&pl->pool_size_desc);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
- "pool: %s\n", ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info,
+ s->desc_buf_props.samplerDescriptorSize);
- { /* Allocate descriptor sets */
- VkDescriptorSetAllocateInfo alloc_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
- .descriptorPool = pl->desc_pool,
- .descriptorSetCount = pl->total_descriptor_sets,
- .pSetLayouts = pl->desc_layout,
- };
+ return 0;
+}
- pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
- if (!pl->desc_set)
- return AVERROR(ENOMEM);
+int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkImageView view, VkImageLayout layout, VkSampler sampler)
+{
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
+ VkDescriptorImageInfo desc_img_info = {
+ .imageView = view,
+ .sampler = sampler,
+ .imageLayout = layout,
+ };
+ size_t desc_size;
- ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
- pl->desc_set);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ desc_get_info.data.pSampledImage = &desc_img_info;
+ desc_size = s->desc_buf_props.sampledImageDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ desc_get_info.data.pStorageImage = &desc_img_info;
+ desc_size = s->desc_buf_props.storageImageDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ desc_get_info.data.pInputAttachmentImage = &desc_img_info;
+ desc_size = s->desc_buf_props.inputAttachmentDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ desc_get_info.data.pCombinedImageSampler = &desc_img_info;
+ desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- { /* Finally create the pipeline layout */
- VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging,
- .pushConstantRangeCount = pl->push_consts_num,
- .pPushConstantRanges = pl->push_consts,
- };
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
- pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
+ return 0;
+}
- ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
- s->hwctx->alloc, &pl->pipeline_layout);
- av_freep(&pl->push_consts);
- pl->push_consts_num = 0;
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt)
+{
+ FFVulkanDescriptorSet *desc_set = &pl->desc_set[set];
+ VkDescriptorGetInfoEXT desc_get_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT,
+ .type = desc_set->binding[bind].descriptorType,
+ };
+ VkDescriptorAddressInfoEXT desc_buf_info = {
+ .address = addr,
+ .range = len,
+ .format = fmt,
+ };
+ size_t desc_size;
- { /* Descriptor template (for tightly packed descriptors) */
- VkDescriptorUpdateTemplateCreateInfo *dt;
+ switch (desc_get_info.type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ desc_get_info.data.pUniformBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.uniformBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ desc_get_info.data.pStorageBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.storageBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ desc_get_info.data.pUniformTexelBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ desc_get_info.data.pStorageTexelBuffer = &desc_buf_info;
+ desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize;
+ break;
+ default:
+ av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n",
+ set, bind, desc_get_info.type);
+ return AVERROR(EINVAL);
+ break;
+ };
- pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
- if (!pl->desc_template)
- return AVERROR(ENOMEM);
+ update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size);
- /* Create update templates for the descriptor sets */
- for (int i = 0; i < pl->total_descriptor_sets; i++) {
- dt = &pl->desc_template_info[i];
- dt->pipelineLayout = pl->pipeline_layout;
- ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
- dt, s->hwctx->alloc,
- &pl->desc_template[i]);
- if (ret != VK_SUCCESS) {
- av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
- "template: %s\n", ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
+ return 0;
+}
- /* Free the duplicated memory used for the template entries */
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
- dt = &pl->desc_template_info[i];
- av_free((void *)dt->pDescriptorUpdateEntries);
- }
+void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, AVFrame *f,
+ VkImageView *views, int set, int binding,
+ VkImageLayout layout, VkSampler sampler)
+{
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
- av_freep(&pl->desc_template_info);
- }
+ for (int i = 0; i < nb_planes; i++)
+ ff_vk_set_descriptor_image(s, pl, e, set, binding, i,
+ views[i], layout, sampler);
+}
- return 0;
+void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl,
+ VkShaderStageFlagBits stage,
+ int offset, size_t size, void *src)
+{
+ FFVulkanFunctions *vk = &s->vkfn;
+ vk->CmdPushConstants(e->buf, pl->pipeline_layout,
+ stage, offset, size, src);
}
-FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
-FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
+static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
{
- FFVulkanPipeline *pl = create_pipeline(s);
- if (pl)
- pl->qf = qf;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ VkPipelineLayoutCreateInfo pipeline_layout_info;
+
+ VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets*
+ sizeof(desc_layouts));
+ if (!desc_layouts)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < pl->nb_descriptor_sets; i++)
+ desc_layouts[i] = pl->desc_set[i].layout;
+
+ /* Finally create the pipeline layout */
+ pipeline_layout_info = (VkPipelineLayoutCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .pSetLayouts = desc_layouts,
+ .setLayoutCount = pl->nb_descriptor_sets,
+ .pushConstantRangeCount = pl->push_consts_num,
+ .pPushConstantRanges = pl->push_consts,
+ };
+
+ ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info,
+ s->hwctx->alloc, &pl->pipeline_layout);
+ av_free(desc_layouts);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
- return pl;
+ return 0;
}
-int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd)
{
- int i;
+ int err;
VkResult ret;
FFVulkanFunctions *vk = &s->vkfn;
- VkComputePipelineCreateInfo pipe = {
+ VkComputePipelineCreateInfo pipeline_create_info;
+
+ err = init_pipeline_layout(s, pl);
+ if (err < 0)
+ return err;
+
+ pipeline_create_info = (VkComputePipelineCreateInfo) {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
.layout = pl->pipeline_layout,
+ .stage = shd->shader,
};
- for (i = 0; i < pl->shaders_num; i++) {
- if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
- pipe.stage = pl->shaders[i]->shader;
- break;
- }
- }
- if (i == pl->shaders_num) {
- av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
- return AVERROR(EINVAL);
- }
-
- ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
+ ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1,
+ &pipeline_create_info,
s->hwctx->alloc, &pl->pipeline);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
@@ -1599,157 +1767,68 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
}
pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
+ pl->wg_size[0] = shd->local_size[0];
+ pl->wg_size[1] = shd->local_size[1];
+ pl->wg_size[2] = shd->local_size[2];
return 0;
}
-void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
+void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
+ VkDeviceSize offsets[1024];
- vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
-
- for (int i = 0; i < pl->descriptor_sets_num; i++)
- pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
+ /* Bind pipeline */
+ vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
- vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
- pl->pipeline_layout, 0,
- pl->descriptor_sets_num,
- (VkDescriptorSet *)pl->desc_staging,
- 0, NULL);
+ if (pl->nb_descriptor_sets) {
+ for (int i = 0; i < pl->nb_descriptor_sets; i++)
+ offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx;
- e->bound_pl = pl;
-}
-
-static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
-{
- FFVulkanFunctions *vk = &s->vkfn;
-
- /* Make sure all queues have finished executing */
- for (int i = 0; i < e->qf->nb_queues; i++) {
- FFVkQueueCtx *q = &e->queues[i];
-
- if (q->fence) {
- vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
- }
-
- /* Free the fence */
- if (q->fence)
- vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
-
- /* Free buffer dependencies */
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- av_free(q->buf_deps);
-
- /* Free frame dependencies */
- for (int j = 0; j < q->nb_frame_deps; j++)
- av_frame_free(&q->frame_deps[j]);
- av_free(q->frame_deps);
+ /* Bind descriptor buffers */
+ vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind);
+ /* Binding offsets */
+ vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout,
+ 0, pl->nb_descriptor_sets,
+ pl->bound_buffer_indices, offsets);
}
-
- if (e->bufs)
- vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
- if (e->pool)
- vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
- if (e->query.pool)
- vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc);
-
- av_freep(&e->query.data);
- av_freep(&e->bufs);
- av_freep(&e->queues);
- av_freep(&e->sem_sig);
- av_freep(&e->sem_sig_val);
- av_freep(&e->sem_sig_val_dst);
- av_freep(&e->sem_wait);
- av_freep(&e->sem_wait_dst);
- av_freep(&e->sem_wait_val);
- av_free(e);
}
-static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
+void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
- for (int i = 0; i < pl->shaders_num; i++) {
- FFVkSPIRVShader *shd = pl->shaders[i];
- av_bprint_finalize(&shd->src, NULL);
- vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
- s->hwctx->alloc);
- av_free(shd);
- }
-
- vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
- vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
- s->hwctx->alloc);
+ if (pl->pipeline)
+ vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
+ if (pl->pipeline_layout)
+ vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
+ s->hwctx->alloc);
- for (int i = 0; i < pl->desc_layout_num; i++) {
- if (pl->desc_template && pl->desc_template[i])
- vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
- s->hwctx->alloc);
- if (pl->desc_layout && pl->desc_layout[i])
- vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
+ for (int i = 0; i < pl->nb_descriptor_sets; i++) {
+ FFVulkanDescriptorSet *set = &pl->desc_set[i];
+ if (set->buf.mem)
+ ff_vk_unmap_buffer(s, &set->buf, 0);
+ ff_vk_free_buf(s, &set->buf);
+ if (set->layout)
+ vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout,
s->hwctx->alloc);
+ av_free(set->binding);
+ av_free(set->binding_offset);
}
- /* Also frees the descriptor sets */
- if (pl->desc_pool)
- vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
- s->hwctx->alloc);
-
- av_freep(&pl->desc_staging);
av_freep(&pl->desc_set);
- av_freep(&pl->shaders);
- av_freep(&pl->desc_layout);
- av_freep(&pl->desc_template);
- av_freep(&pl->desc_set_initialized);
+ av_freep(&pl->desc_bind);
av_freep(&pl->push_consts);
pl->push_consts_num = 0;
-
- /* Only freed in case of failure */
- av_freep(&pl->pool_size_desc);
- if (pl->desc_template_info) {
- for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
- VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
- av_free((void *)dt->pDescriptorUpdateEntries);
- }
- av_freep(&pl->desc_template_info);
- }
-
- av_free(pl);
}
void ff_vk_uninit(FFVulkanContext *s)
{
- FFVulkanFunctions *vk = &s->vkfn;
-
av_freep(&s->query_props);
av_freep(&s->qf_props);
av_freep(&s->video_props);
- if (s->spirv_compiler)
- s->spirv_compiler->uninit(&s->spirv_compiler);
-
- for (int i = 0; i < s->exec_ctx_num; i++)
- free_exec_ctx(s, s->exec_ctx[i]);
- av_freep(&s->exec_ctx);
-
- for (int i = 0; i < s->samplers_num; i++) {
- vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
- s->hwctx->alloc);
- av_free(s->samplers[i]);
- }
- av_freep(&s->samplers);
-
- for (int i = 0; i < s->pipelines_num; i++)
- free_pipeline(s, s->pipelines[i]);
- av_freep(&s->pipelines);
-
- av_freep(&s->scratch);
- s->scratch_size = 0;
-
- av_buffer_unref(&s->device_ref);
av_buffer_unref(&s->frames_ref);
}
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 3f887a782e..b0921810c6 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -21,6 +21,8 @@
#define VK_NO_PROTOTYPES
+#include <stdatomic.h>
+
#include "pixdesc.h"
#include "bprint.h"
#include "hwcontext.h"
@@ -28,11 +30,6 @@
#include "hwcontext_vulkan.h"
#include "vulkan_loader.h"
-#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
- VK_IMAGE_USAGE_STORAGE_BIT | \
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
- VK_IMAGE_USAGE_TRANSFER_DST_BIT)
-
/* GLSL management macros */
#define INDENT(N) INDENT_##N
#define INDENT_0
@@ -57,6 +54,8 @@
goto fail; \
} while (0)
+#define DUP_SAMPLER(x) { x, x, x, x }
+
typedef struct FFVkSPIRVShader {
const char *name; /* Name for id/debugging purposes */
AVBPrint src;
@@ -64,19 +63,6 @@ typedef struct FFVkSPIRVShader {
VkPipelineShaderStageCreateInfo shader;
} FFVkSPIRVShader;
-typedef struct FFVkSPIRVCompiler {
- void *priv;
- int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx,
- struct FFVkSPIRVShader *shd, uint8_t **data,
- size_t *size, const char *entrypoint, void **opaque);
- void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque);
- void (*uninit)(struct FFVkSPIRVCompiler **ctx);
-} FFVkSPIRVCompiler;
-
-typedef struct FFVkSampler {
- VkSampler sampler[4];
-} FFVkSampler;
-
typedef struct FFVulkanDescriptorSetBinding {
const char *name;
VkDescriptorType type;
@@ -86,8 +72,7 @@ typedef struct FFVulkanDescriptorSetBinding {
uint32_t dimensions; /* Needed for e.g. sampler%iD */
uint32_t elems; /* 0 - scalar, 1 or more - vector */
VkShaderStageFlags stages;
- FFVkSampler *sampler; /* Sampler to use for all elems */
- void *updater; /* Pointer to VkDescriptor*Info */
+ VkSampler samplers[4]; /* Sampler to use for all elems */
} FFVulkanDescriptorSetBinding;
typedef struct FFVkBuffer {
@@ -95,119 +80,133 @@ typedef struct FFVkBuffer {
VkDeviceMemory mem;
VkMemoryPropertyFlagBits flags;
size_t size;
+ VkDeviceAddress address;
+
+ /* Local use only */
+ VkPipelineStageFlags2 stage;
+ VkAccessFlags2 access;
+
+ /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE */
+ uint8_t *mapped_mem;
} FFVkBuffer;
typedef struct FFVkQueueFamilyCtx {
int queue_family;
int nb_queues;
- int cur_queue;
- int actual_queues;
} FFVkQueueFamilyCtx;
-typedef struct FFVulkanPipeline {
- FFVkQueueFamilyCtx *qf;
+typedef struct FFVulkanDescriptorSet {
+ VkDescriptorSetLayout layout;
+ FFVkBuffer buf;
+ uint8_t *desc_mem;
+ VkDeviceSize layout_size;
+ VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */
+ VkDeviceSize total_size; /* Once registered to an exec context */
+ VkBufferUsageFlags usage;
+
+ VkDescriptorSetLayoutBinding *binding;
+ VkDeviceSize *binding_offset;
+ int nb_bindings;
+ int read_only;
+} FFVulkanDescriptorSet;
+
+typedef struct FFVulkanPipeline {
VkPipelineBindPoint bind_point;
/* Contexts */
VkPipelineLayout pipeline_layout;
VkPipeline pipeline;
- /* Shaders */
- FFVkSPIRVShader **shaders;
- int shaders_num;
-
/* Push consts */
VkPushConstantRange *push_consts;
int push_consts_num;
+ /* Workgroup */
+ int wg_size[3];
+
/* Descriptors */
- VkDescriptorSetLayout *desc_layout;
- VkDescriptorPool desc_pool;
- VkDescriptorSet *desc_set;
-#if VK_USE_64_BIT_PTR_DEFINES == 1
- void **desc_staging;
-#else
- uint64_t *desc_staging;
-#endif
- VkDescriptorSetLayoutBinding **desc_binding;
- VkDescriptorUpdateTemplate *desc_template;
- int *desc_set_initialized;
- int desc_layout_num;
- int descriptor_sets_num;
- int total_descriptor_sets;
- int pool_size_desc_num;
-
- /* Temporary, used to store data in between initialization stages */
- VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
- VkDescriptorPoolSize *pool_size_desc;
+ FFVulkanDescriptorSet *desc_set;
+ VkDescriptorBufferBindingInfoEXT *desc_bind;
+ uint32_t *bound_buffer_indices;
+ int nb_descriptor_sets;
} FFVulkanPipeline;
-typedef struct FFVkQueueCtx {
- VkFence fence;
+typedef struct FFVkExecContext {
+ int idx;
+ const struct FFVkExecPool *parent;
+
+ /* Queue for the execution context */
VkQueue queue;
+ int qf;
+ int qi;
+
+ /* Command buffer for the context */
+ VkCommandBuffer buf;
+
+ /* Fence for the command buffer */
+ VkFence fence;
- int synchronous;
- int submitted;
+ void *query_data;
+ int query_idx;
/* Buffer dependencies */
AVBufferRef **buf_deps;
int nb_buf_deps;
- int buf_deps_alloc_size;
+ unsigned int buf_deps_alloc_size;
/* Frame dependencies */
AVFrame **frame_deps;
+ unsigned int frame_deps_alloc_size;
int nb_frame_deps;
- int frame_deps_alloc_size;
-} FFVkQueueCtx;
-
-typedef struct FFVkExecContext {
- FFVkQueueFamilyCtx *qf;
- VkCommandPool pool;
- VkCommandBuffer *bufs;
- FFVkQueueCtx *queues;
-
- struct {
- int idx;
- VkQueryPool pool;
- uint8_t *data;
-
- int nb_queries;
- int nb_results;
- int nb_statuses;
- int elem_64bits;
- size_t data_per_queue;
- int status_stride;
- } query;
+ VkSemaphoreSubmitInfo *sem_wait;
+ unsigned int sem_wait_alloc;
+ int sem_wait_cnt;
- AVBufferRef ***deps;
- int *nb_deps;
- int *dep_alloc_size;
+ VkSemaphoreSubmitInfo *sem_sig;
+ unsigned int sem_sig_alloc;
+ int sem_sig_cnt;
- FFVulkanPipeline *bound_pl;
+ uint64_t **sem_sig_val_dst;
+ unsigned int sem_sig_val_dst_alloc;
+ int sem_sig_val_dst_cnt;
- VkSemaphore *sem_wait;
- int sem_wait_alloc; /* Allocated sem_wait */
- int sem_wait_cnt;
+ uint8_t *frame_locked;
+ unsigned int frame_locked_alloc_size;
- uint64_t *sem_wait_val;
- int sem_wait_val_alloc;
+ VkAccessFlagBits *access_dst;
+ unsigned int access_dst_alloc;
- VkPipelineStageFlagBits *sem_wait_dst;
- int sem_wait_dst_alloc; /* Allocated sem_wait_dst */
+ VkImageLayout *layout_dst;
+ unsigned int layout_dst_alloc;
- VkSemaphore *sem_sig;
- int sem_sig_alloc; /* Allocated sem_sig */
- int sem_sig_cnt;
+ uint32_t *queue_family_dst;
+ unsigned int queue_family_dst_alloc;
- uint64_t *sem_sig_val;
- int sem_sig_val_alloc;
-
- uint64_t **sem_sig_val_dst;
- int sem_sig_val_dst_alloc;
+ uint8_t *frame_update;
+ unsigned int frame_update_alloc_size;
} FFVkExecContext;
+typedef struct FFVkExecPool {
+ FFVkQueueFamilyCtx *qf;
+ FFVkExecContext *contexts;
+ atomic_int_least64_t idx;
+
+ VkCommandPool cmd_buf_pool;
+ VkCommandBuffer *cmd_bufs;
+ int pool_size;
+
+ VkQueryPool query_pool;
+ void *query_data;
+ int query_results;
+ int query_statuses;
+ int query_64bit;
+ int query_status_stride;
+ int nb_queries;
+ size_t qd_size;
+} FFVkExecPool;
+
typedef struct FFVulkanContext {
const AVClass *class; /* Filters and encoders use this */
@@ -216,14 +215,16 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceProperties2 props;
VkPhysicalDeviceDriverProperties driver_props;
VkPhysicalDeviceMemoryProperties mprops;
+ VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
+ VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props;
VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
VkQueueFamilyVideoPropertiesKHR *video_props;
VkQueueFamilyProperties2 *qf_props;
- AVBufferRef *device_ref;
AVHWDeviceContext *device;
AVVulkanDeviceContext *hwctx;
+ AVBufferRef *input_frames_ref;
AVBufferRef *frames_ref;
AVHWFramesContext *frames;
AVVulkanFramesContext *hwfc;
@@ -231,28 +232,11 @@ typedef struct FFVulkanContext {
uint32_t qfs[5];
int nb_qfs;
- FFVkSPIRVCompiler *spirv_compiler;
-
/* Properties */
int output_width;
int output_height;
enum AVPixelFormat output_format;
enum AVPixelFormat input_format;
-
- /* Samplers */
- FFVkSampler **samplers;
- int samplers_num;
-
- /* Exec contexts */
- FFVkExecContext **exec_ctx;
- int exec_ctx_num;
-
- /* Pipelines (each can have 1 shader of each type) */
- FFVulkanPipeline **pipelines;
- int pipelines_num;
-
- void *scratch; /* Scratch memory used only in functions */
- unsigned int scratch_size;
} FFVulkanContext;
/* Identity mapping - r = r, b = b, g = g, a = a */
@@ -264,244 +248,207 @@ extern const VkComponentMapping ff_comp_identity_map;
const char *ff_vk_ret2str(VkResult res);
/**
- * Loads props/mprops/driver_props
- */
-int ff_vk_load_props(FFVulkanContext *s);
-
-/**
- * Returns 1 if the image is any sort of supported RGB
+ * Returns 1 if pixfmt is a usable RGB format.
*/
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
/**
- * Gets the glsl format string for a pixel format
+ * Returns the format to use for images in shaders.
*/
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
/**
- * Setup the queue families from the hardware device context.
- * Necessary for image creation to work.
- */
-void ff_vk_qf_fill(FFVulkanContext *s);
-
-/**
- * Allocate device memory.
- */
-int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
- VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
- VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
-
-/**
- * Get a queue family index and the number of queues. nb is optional.
+ * Loads props/mprops/driver_props
*/
-int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb);
+int ff_vk_load_props(FFVulkanContext *s);
/**
- * Initialize a queue family with a specific number of queues.
- * If nb_queues == 0, use however many queues the queue family has.
+ * Chooses a QF and loads it into a context.
*/
int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
- VkQueueFlagBits dev_family, int nb_queues);
-
-/**
- * Rotate through the queues in a queue family.
- */
-int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
-
-/**
- * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
- */
-FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords,
- VkFilter filt);
+ VkQueueFlagBits dev_family);
/**
- * Create an imageview.
- * Guaranteed to remain alive until the queue submission has finished executing,
- * and will be destroyed after that.
+ * Allocates/frees an execution pool.
+ * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add()
+ * has been called.
*/
-int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
- VkImageView *v, VkImage img, VkFormat fmt,
- const VkComponentMapping map);
+int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
+ FFVkExecPool *pool, int nb_contexts,
+ int nb_queries, VkQueryType query_type, int query_64bit,
+ const void *query_create_pnext);
+void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool);
/**
- * Define a push constant for a given stage into a pipeline.
- * Must be called before the pipeline layout has been initialized.
+ * Retrieve an execution pool. Threadsafe.
*/
-int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
- VkShaderStageFlagBits stage);
+FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool);
/**
- * Inits a pipeline. Everything in it will be auto-freed when calling
- * ff_vk_filter_uninit().
+ * Performs nb_queries queries and returns their results and statuses.
+ * Execution must have been waited on to produce valid results.
*/
-FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf);
+VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e,
+ void **data, int64_t *status);
/**
- * Inits a shader for a specific pipeline. Will be auto-freed on uninit.
+ * Start/submit/wait an execution.
+ * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait()
+ * is not necessary (unless using it is just better).
*/
-FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
- VkShaderStageFlags stage);
+int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e);
+int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e);
+void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
/**
- * Writes the workgroup size for a shader.
+ * Execution dependency management.
+ * Can attach buffers to executions that will only be unref'd once the
+ * buffer has finished executing.
+ * Adding a frame dep will *lock the frame*, until either the dependencies
+ * are discarded, the execution is submitted, or a failure happens.
+ * update_frame will update the frame's properties before it is unlocked,
+ * only if submission was successful.
*/
-void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]);
+int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
+ AVBufferRef **deps, int nb_deps, int ref);
+int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+ VkPipelineStageFlagBits2 wait_stage,
+ VkPipelineStageFlagBits2 signal_stage);
+void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f,
+ VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar);
+int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
+ VkSemaphore *dst, uint64_t *dst_val,
+ AVFrame *f);
+void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
/**
- * Adds a descriptor set to the shader and registers them in the pipeline.
+ * Create an imageview and add it as a dependency to an execution.
*/
-int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
- int num, int only_print_to_shader);
+int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
+ VkImageView views[AV_NUM_DATA_POINTERS],
+ AVFrame *f);
-/**
- * Compiles the shader, entrypoint must be set to "main".
- */
-int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
- const char *entrypoint);
+void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
+ AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
+ VkPipelineStageFlags src_stage,
+ VkPipelineStageFlags dst_stage,
+ VkAccessFlagBits new_access,
+ VkImageLayout new_layout,
+ uint32_t new_qf);
/**
- * Pretty print shader, mainly used by shader compilers.
+ * Memory/buffer/image allocation helpers.
*/
-void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio);
-
-/**
- * Initializes the pipeline layout after all shaders and descriptor sets have
- * been finished.
- */
-int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl);
+int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
+ void *pNext, void *alloc_pNext,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
+int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size,
+ void *pNext, void *alloc_pNext,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
- * Initializes a compute pipeline. Will pick the first shader with the
- * COMPUTE flag set.
+ * Buffer management code.
*/
-int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl);
+int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[],
+ int nb_buffers, int invalidate);
+int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers,
+ int flush);
-/**
- * Updates a descriptor set via the updaters defined.
- * Can be called immediately after pipeline creation, but must be called
- * at least once before queue submission.
- */
-void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
- int set_id);
+static inline int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem,
+ int invalidate)
+{
+ return ff_vk_map_buffers(s, (FFVkBuffer *[]){ buf }, mem,
+ 1, invalidate);
+}
-/**
- * Init an execution context for command recording and queue submission.
- * WIll be auto-freed on uninit.
- */
-int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
- FFVkQueueFamilyCtx *qf);
+static inline int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush)
+{
+ return ff_vk_unmap_buffers(s, (FFVkBuffer *[]){ buf }, 1, flush);
+}
-/**
- * Create a query pool for a command context.
- * elem_64bits exists to troll driver devs for compliance. All results
- * and statuses returned should be 32 bits, unless this is set, then it's 64bits.
- */
-int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e,
- int nb_queries, VkQueryType type,
- int elem_64bits, void *create_pnext);
+void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
-/**
- * Get results for query.
- * Returns the status of the query.
- * Sets *res to the status of the queries.
- */
-int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e,
- int query_idx, void **data, int64_t *status);
+/** Initialize a pool and create AVBufferRefs containing FFVkBuffer.
+ * Threadsafe to use. Buffers are automatically mapped on creation if
+ * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT is set in mem_props. Users should
+ * synchronize access themselvesd. Mainly meant for device-local buffers. */
+int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
+ AVBufferRef **buf, VkBufferUsageFlags usage,
+ void *create_pNext, size_t size,
+ VkMemoryPropertyFlagBits mem_props);
/**
- * Begin recording to the command buffer. Previous execution must have been
- * completed, which ff_vk_submit_exec_queue() will ensure.
+ * Create a sampler.
*/
-int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e);
+int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
+ int unnorm_coords, VkFilter filt);
/**
- * Add a command to bind the completed pipeline and its descriptor sets.
- * Must be called after ff_vk_start_exec_recording() and before submission.
+ * Shader management.
*/
-void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
- FFVulkanPipeline *pl);
+int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
+ VkShaderStageFlags stage);
+void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z);
+void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio);
+int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
+ uint8_t *spirv, size_t spirv_size, const char *entrypoint);
+void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd);
/**
- * Updates push constants.
- * Must be called after binding a pipeline if any push constants were defined.
+ * Add/update push constants for execution.
*/
+int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
+ VkShaderStageFlagBits stage);
void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
- VkShaderStageFlagBits stage, int offset,
- size_t size, void *src);
+ FFVulkanPipeline *pl,
+ VkShaderStageFlagBits stage,
+ int offset, size_t size, void *src);
/**
- * Gets the command buffer to use for this submission from the exe context.
+ * Add descriptor to a pipeline. Must be called before pipeline init.
*/
-VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e);
+int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd,
+ FFVulkanDescriptorSetBinding *desc, int nb,
+ int read_only, int print_to_shader_only);
-/**
- * Adds a generic AVBufferRef as a queue depenency.
- */
-int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
- AVBufferRef **deps, int nb_deps);
-
-/**
- * Discards all queue dependencies
- */
-void ff_vk_discard_exec_deps(FFVkExecContext *e);
+/* Initialize/free a pipeline. */
+int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd);
+void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl);
/**
- * Adds a frame as a queue dependency. This also manages semaphore signalling.
- * Must be called before submission.
+ * Register a pipeline with an exec pool.
+ * Pool may be NULL if all descriptor sets are read-only.
*/
-int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
- VkPipelineStageFlagBits in_wait_dst_flag);
+int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool,
+ FFVulkanPipeline *pl);
-/**
- * Submits a command buffer to the queue for execution. Will not block.
- */
-int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Wait on a command buffer's execution. Mainly useful for debugging and
- * development.
- */
-void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e);
-
-/**
- * Create a VkBuffer with the specified parameters.
- */
-int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
- void *pNext, void *alloc_pNext,
- VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
-
-/**
- * Maps the buffer to userspace. Set invalidate to 1 if reading the contents
- * is necessary.
- */
-int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
- int nb_buffers, int invalidate);
-
-/**
- * Unmaps the buffer from userspace. Set flush to 1 to write and sync.
- */
-int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
- int flush);
+/* Bind pipeline */
+void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanPipeline *pl);
-/**
- * Frees a buffer.
- */
-void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf);
+/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */
+int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkSampler *sampler);
+int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkImageView view, VkImageLayout layout, VkSampler sampler);
+int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, int set, int bind, int offs,
+ VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt);
-/**
- * Creates an image, allocates and binds memory in the given
- * idx value of the dst frame. If mem is non-NULL, then no memory will be
- * allocated, but instead the given memory will be bound to the image.
- */
-int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx,
- int width, int height, VkFormat fmt, VkImageTiling tiling,
- VkImageUsageFlagBits usage, VkImageCreateFlags flags,
- void *create_pnext,
- VkDeviceMemory *mem, void *alloc_pnext);
+void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkExecContext *e, AVFrame *f,
+ VkImageView *views, int set, int binding,
+ VkImageLayout layout, VkSampler sampler);
/**
- * Frees the main Vulkan context.
+ * Frees main context.
*/
void ff_vk_uninit(FFVulkanContext *s);
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 212681d475..24b096af10 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -94,6 +94,7 @@ typedef enum FFVulkanExtensions {
/* Queue */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetDeviceQueue) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit2) \
\
/* Fences */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateFence) \
--
2.40.0
[-- Attachment #37: 0056-vulkan-add-ff_vk_count_images.patch --]
[-- Type: text/x-diff, Size: 779 bytes --]
From efb12fceb5217b160ad552b05d1ce136c77863ca Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:37:18 +0100
Subject: [PATCH 56/97] vulkan: add ff_vk_count_images()
---
libavutil/vulkan.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index b0921810c6..449ffbd334 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -239,6 +239,15 @@ typedef struct FFVulkanContext {
enum AVPixelFormat input_format;
} FFVulkanContext;
+static inline int ff_vk_count_images(AVVkFrame *f)
+{
+ int cnt = 0;
+ while (f->img[cnt])
+ cnt++;
+
+ return cnt;
+}
+
/* Identity mapping - r = r, b = b, g = g, a = a */
extern const VkComponentMapping ff_comp_identity_map;
--
2.40.0
[-- Attachment #38: 0057-vulkan-enable-forcing-of-full-subgroups.patch --]
[-- Type: text/x-diff, Size: 3709 bytes --]
From e9ac86c283feb19a3520ddc57a4fb6314b0b343e Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 6 Mar 2023 00:19:12 +0100
Subject: [PATCH 57/97] vulkan: enable forcing of full subgroups
---
libavutil/vulkan.c | 15 +++++++++++++--
libavutil/vulkan.h | 4 +++-
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 9d607ee1ce..70d162772c 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -91,9 +91,13 @@ int ff_vk_load_props(FFVulkanContext *s)
s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT,
};
+ s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES,
+ .pNext = &s->hprops,
+ };
s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT,
- .pNext = &s->hprops,
+ .pNext = &s->subgroup_props,
};
s->driver_props = (VkPhysicalDeviceDriverProperties) {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
@@ -1281,13 +1285,20 @@ void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
}
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
- VkShaderStageFlags stage)
+ VkShaderStageFlags stage, uint32_t required_subgroup_size)
{
av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shd->shader.stage = stage;
+ if (required_subgroup_size) {
+ shd->shader.flags |= VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT;
+ shd->shader.pNext = &shd->subgroup_info;
+ shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO;
+ shd->subgroup_info.requiredSubgroupSize = required_subgroup_size;
+ }
+
shd->name = name;
GLSLF(0, #version %i ,460);
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 449ffbd334..1d5ece3f09 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -61,6 +61,7 @@ typedef struct FFVkSPIRVShader {
AVBPrint src;
int local_size[3]; /* Compute shader workgroup sizes */
VkPipelineShaderStageCreateInfo shader;
+ VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroup_info;
} FFVkSPIRVShader;
typedef struct FFVulkanDescriptorSetBinding {
@@ -217,6 +218,7 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceMemoryProperties mprops;
VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops;
VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props;
+ VkPhysicalDeviceSubgroupSizeControlProperties subgroup_props;
VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
VkQueueFamilyVideoPropertiesKHR *video_props;
VkQueueFamilyProperties2 *qf_props;
@@ -399,7 +401,7 @@ int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler,
* Shader management.
*/
int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name,
- VkShaderStageFlags stage);
+ VkShaderStageFlags stage, uint32_t required_subgroup_size);
void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z);
void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio);
int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd,
--
2.40.0
[-- Attachment #39: 0058-vulkan-make-GLSL-macro-functions-semicolumn-safe.patch --]
[-- Type: text/x-diff, Size: 1961 bytes --]
From fe7fc855e72c9099de05760229cfeba8bc107f85 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 24 Mar 2023 02:22:06 +0100
Subject: [PATCH 58/97] vulkan: make GLSL macro functions semicolumn-safe
---
libavutil/vulkan.h | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 1d5ece3f09..4de233b0e5 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -40,12 +40,28 @@
#define INDENT_5 INDENT_4 INDENT_1
#define INDENT_6 INDENT_5 INDENT_1
#define C(N, S) INDENT(N) #S "\n"
-#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S))
-#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__)
-#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__)
-#define GLSLD(D) GLSLC(0, ); \
- av_bprint_append_data(&shd->src, D, strlen(D)); \
- GLSLC(0, )
+
+#define GLSLC(N, S) \
+ do { \
+ av_bprintf(&shd->src, C(N, S)); \
+ } while (0)
+
+#define GLSLA(...) \
+ do { \
+ av_bprintf(&shd->src, __VA_ARGS__); \
+ } while (0)
+
+#define GLSLF(N, S, ...) \
+ do { \
+ av_bprintf(&shd->src, C(N, S), __VA_ARGS__); \
+ } while (0)
+
+#define GLSLD(D) \
+ do { \
+ av_bprintf(&shd->src, "\n"); \
+ av_bprint_append_data(&shd->src, D, strlen(D)); \
+ av_bprintf(&shd->src, "\n"); \
+ } while (0)
/* Helper, pretty much every Vulkan return value needs to be checked */
#define RET(x) \
--
2.40.0
[-- Attachment #40: 0059-hwcontext_vulkan-rewrite-to-support-multiplane-surfa.patch --]
[-- Type: text/x-diff, Size: 74862 bytes --]
From 89e47afc304aaf01c9c25a328ddfde37873e1f89 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 11 Jan 2023 09:37:35 +0100
Subject: [PATCH 59/97] hwcontext_vulkan: rewrite to support multiplane
surfaces
---
libavutil/hwcontext_vulkan.c | 791 +++++++++++++++++++----------------
libavutil/hwcontext_vulkan.h | 73 ++--
2 files changed, 474 insertions(+), 390 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 96afeb471e..ffca0b2fc3 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -67,6 +69,8 @@ typedef struct VulkanQueueCtx {
VkFence fence;
VkQueue queue;
int was_synchronous;
+ int qf;
+ int qidx;
/* Buffer dependencies */
AVBufferRef **buf_deps;
@@ -116,6 +120,9 @@ typedef struct VulkanDevicePriv {
/* Option to allocate all image planes in a single allocation */
int contiguous_planes;
+ /* Disable multiplane images */
+ int disable_multiplane;
+
/* Nvidia */
int dev_is_nvidia;
} VulkanDevicePriv;
@@ -150,112 +157,207 @@ typedef struct AVVkFrameInternal {
#endif
} AVVkFrameInternal;
-#define ADD_VAL_TO_LIST(list, count, val) \
- do { \
- list = av_realloc_array(list, sizeof(*list), ++count); \
- if (!list) { \
- err = AVERROR(ENOMEM); \
- goto fail; \
- } \
- list[count - 1] = av_strdup(val); \
- if (!list[count - 1]) { \
- err = AVERROR(ENOMEM); \
- goto fail; \
- } \
- } while(0)
+#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT)
+#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)
-#define RELEASE_PROPS(props, count) \
- if (props) { \
- for (int i = 0; i < count; i++) \
- av_free((void *)((props)[i])); \
- av_free((void *)props); \
- }
-
-static const struct {
+static const struct FFVkFormatEntry {
+ VkFormat vkf;
enum AVPixelFormat pixfmt;
- const VkFormat vkfmts[5];
-} vk_pixfmt_planar_map[] = {
- { AV_PIX_FMT_GRAY8, { VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_GRAY16, { VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_GRAYF32, { VK_FORMAT_R32_SFLOAT } },
-
- { AV_PIX_FMT_NV12, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_NV21, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_P010, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
- { AV_PIX_FMT_P012, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
- { AV_PIX_FMT_P016, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
-
- { AV_PIX_FMT_NV16, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-
- { AV_PIX_FMT_NV24, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
- { AV_PIX_FMT_NV42, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
-
- { AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV420P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUV444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUV444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- /* There is no AV_PIX_FMT_YUVA420P12 */
- { AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
-
- { AV_PIX_FMT_VUYX, { VK_FORMAT_R8G8B8A8_UNORM } },
- { AV_PIX_FMT_XV36, { VK_FORMAT_R16G16B16A16_UNORM } },
-
- { AV_PIX_FMT_BGRA, { VK_FORMAT_B8G8R8A8_UNORM } },
- { AV_PIX_FMT_RGBA, { VK_FORMAT_R8G8B8A8_UNORM } },
- { AV_PIX_FMT_RGB24, { VK_FORMAT_R8G8B8_UNORM } },
- { AV_PIX_FMT_BGR24, { VK_FORMAT_B8G8R8_UNORM } },
- { AV_PIX_FMT_RGB48, { VK_FORMAT_R16G16B16_UNORM } },
- { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
- { AV_PIX_FMT_RGBA64, { VK_FORMAT_R16G16B16A16_UNORM } },
- { AV_PIX_FMT_RGB565, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
- { AV_PIX_FMT_BGR565, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
- { AV_PIX_FMT_BGR0, { VK_FORMAT_B8G8R8A8_UNORM } },
- { AV_PIX_FMT_RGB0, { VK_FORMAT_R8G8B8A8_UNORM } },
-
- /* Lower priority as there's an endianess-dependent overlap between these
- * and rgba/bgr0, and PACK32 formats are more limited */
- { AV_PIX_FMT_BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
- { AV_PIX_FMT_0BGR32, { VK_FORMAT_A8B8G8R8_UNORM_PACK32 } },
-
- { AV_PIX_FMT_X2RGB10, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
-
- { AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
- { AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
- { AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
- { AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ VkImageAspectFlags aspect;
+ int vk_planes;
+ int nb_images;
+ int nb_images_fallback;
+ const VkFormat fallback[5];
+} vk_formats_list[] = {
+ /* Gray formats */
+ { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } },
+
+ /* RGB formats */
+ { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
+ { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } },
+ { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } },
+ { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } },
+ { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } },
+ { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } },
+ { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } },
+ { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } },
+
+ /* Planar RGB */
+ { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+ { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
+
+ /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Two-plane 422 YUV at 8, 10 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Two-plane 444 YUV at 8, 10 and 16 bits */
+ { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } },
+ { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+ { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } },
+
+ /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */
+ { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+ { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
+
+ /* Single plane 422 at 8, 10 and 12 bits */
+ { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } },
+ { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
+ { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } },
};
+static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list);
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p)
{
- for (enum AVPixelFormat i = 0; i < FF_ARRAY_ELEMS(vk_pixfmt_planar_map); i++)
- if (vk_pixfmt_planar_map[i].pixfmt == p)
- return vk_pixfmt_planar_map[i].vkfmts;
+ for (int i = 0; i < nb_vk_formats_list; i++)
+ if (vk_formats_list[i].pixfmt == p)
+ return vk_formats_list[i].fallback;
return NULL;
}
+static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p)
+{
+ for (int i = 0; i < nb_vk_formats_list; i++)
+ if (vk_formats_list[i].pixfmt == p)
+ return &vk_formats_list[i];
+ return NULL;
+}
+
+/* Malitia pura, Khronos */
+#define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \
+ static av_unused dst_t map_ ##src_name## _to_ ##dst_name(src_t src) \
+ { \
+ dst_t dst = 0x0; \
+ MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \
+ VK_IMAGE_USAGE_SAMPLED_BIT); \
+ MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \
+ MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT); \
+ MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \
+ VK_IMAGE_USAGE_STORAGE_BIT); \
+ MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \
+ MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \
+ MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \
+ MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \
+ MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \
+ VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \
+ return dst; \
+ }
+
+#define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1;
+FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage)
+#undef MAP_TO
+#define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2;
+FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats)
+#undef MAP_TO
+#undef FN_MAP_TO
+
+static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
+ VkImageTiling tiling,
+ VkFormat fmts[AV_NUM_DATA_POINTERS],
+ int *nb_images, VkImageAspectFlags *aspect,
+ VkImageUsageFlags *supported_usage, int disable_multiplane)
+{
+ AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
+ VulkanDevicePriv *priv = dev_ctx->internal->priv;
+ FFVulkanFunctions *vk = &priv->vkfn;
+
+ const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
+
+ for (int i = 0; i < nb_vk_formats_list; i++) {
+ if (vk_formats_list[i].pixfmt == p) {
+ VkFormatProperties2 prop = {
+ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+ };
+ VkFormatFeatureFlagBits2 feats_primary, feats_secondary;
+ int basics_primary = 0, basics_secondary = 0;
+
+ vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
+ vk_formats_list[i].vkf,
+ &prop);
+
+ feats_primary = tiling == VK_IMAGE_TILING_LINEAR ?
+ prop.formatProperties.linearTilingFeatures :
+ prop.formatProperties.optimalTilingFeatures;
+ basics_primary = (feats_primary & basic_flags) == basic_flags;
+
+ if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) {
+ vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
+ vk_formats_list[i].fallback[0],
+ &prop);
+ feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ?
+ prop.formatProperties.linearTilingFeatures :
+ prop.formatProperties.optimalTilingFeatures;
+ basics_secondary = (feats_secondary & basic_flags) == basic_flags;
+ } else {
+ basics_secondary = basics_primary;
+ }
+
+ if (basics_primary && !(disable_multiplane && vk_formats_list[i].vk_planes > 1)) {
+ if (fmts)
+ fmts[0] = vk_formats_list[i].vkf;
+ if (nb_images)
+ *nb_images = 1;
+ if (aspect)
+ *aspect = vk_formats_list[i].aspect;
+ if (supported_usage)
+ *supported_usage = map_feats_to_usage(feats_primary);
+ return 0;
+ } else if (basics_secondary) {
+ if (fmts) {
+ for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++)
+ fmts[j] = vk_formats_list[i].fallback[j];
+ }
+ if (nb_images)
+ *nb_images = vk_formats_list[i].nb_images_fallback;
+ if (aspect)
+ *aspect = vk_formats_list[i].aspect;
+ if (supported_usage)
+ *supported_usage = map_feats_to_usage(feats_secondary);
+ return 0;
+ } else {
+ return AVERROR(ENOTSUP);
+ }
+ }
+ }
+
+ return AVERROR(EINVAL);
+}
+
static const void *vk_find_struct(const void *chain, VkStructureType stype)
{
const VkBaseInStructure *in = chain;
@@ -281,33 +383,6 @@ static void vk_link_struct(void *chain, void *in)
out->pNext = in;
}
-static int pixfmt_is_supported(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
- int linear)
-{
- AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
- VulkanDevicePriv *priv = dev_ctx->internal->priv;
- FFVulkanFunctions *vk = &priv->vkfn;
- const VkFormat *fmt = av_vkfmt_from_pixfmt(p);
- int planes = av_pix_fmt_count_planes(p);
-
- if (!fmt)
- return 0;
-
- for (int i = 0; i < planes; i++) {
- VkFormatFeatureFlags flags;
- VkFormatProperties2 prop = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
- };
- vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt[i], &prop);
- flags = linear ? prop.formatProperties.linearTilingFeatures :
- prop.formatProperties.optimalTilingFeatures;
- if (!(flags & FF_VK_DEFAULT_USAGE_FLAGS))
- return 0;
- }
-
- return 1;
-}
-
static int load_libvulkan(AVHWDeviceContext *ctx)
{
AVVulkanDeviceContext *hwctx = ctx->hwctx;
@@ -441,6 +516,27 @@ static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEX
return 0;
}
+#define ADD_VAL_TO_LIST(list, count, val) \
+ do { \
+ list = av_realloc_array(list, sizeof(*list), ++count); \
+ if (!list) { \
+ err = AVERROR(ENOMEM); \
+ goto fail; \
+ } \
+ list[count - 1] = av_strdup(val); \
+ if (!list[count - 1]) { \
+ err = AVERROR(ENOMEM); \
+ goto fail; \
+ } \
+ } while(0)
+
+#define RELEASE_PROPS(props, count) \
+ if (props) { \
+ for (int i = 0; i < count; i++) \
+ av_free((void *)((props)[i])); \
+ av_free((void *)props); \
+ }
+
static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
const char * const **dst, uint32_t *num, int debug)
{
@@ -689,6 +785,10 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VkApplicationInfo application_info = {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "ffmpeg",
+ .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
+ LIBAVUTIL_VERSION_MINOR,
+ LIBAVUTIL_VERSION_MICRO),
.pEngineName = "libavutil",
.apiVersion = VK_API_VERSION_1_3,
.engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
@@ -1172,6 +1272,8 @@ static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
VulkanQueueCtx *q = &cmd->queues[i];
vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
q->was_synchronous = 1;
+ q->qf = queue_family_index;
+ q->qidx = i;
}
return 0;
@@ -1307,6 +1409,7 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
VkResult ret;
VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
FFVulkanFunctions *vk = &p->vkfn;
ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
@@ -1320,7 +1423,9 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
s_info->commandBufferCount = 1;
+ hwctx->lock_queue(hwfc->device_ctx, q->qf, q->qidx);
ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence);
+ hwctx->unlock_queue(hwfc->device_ctx, q->qf, q->qidx);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n",
vk_ret2str(ret));
@@ -1335,7 +1440,6 @@ static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
q->was_synchronous = synchronous;
if (synchronous) {
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
vk->ResetFences(hwctx->act_dev, 1, &q->fence);
unref_exec_ctx_deps(hwfc, cmd);
@@ -1497,11 +1601,9 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
if (opt_d)
p->use_linear_images = strtol(opt_d->value, NULL, 10);
- opt_d = av_dict_get(opts, "contiguous_planes", NULL, 0);
+ opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0);
if (opt_d)
- p->contiguous_planes = strtol(opt_d->value, NULL, 10);
- else
- p->contiguous_planes = -1;
+ p->disable_multiplane = strtol(opt_d->value, NULL, 10);
hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames;
hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount;
@@ -1755,8 +1857,12 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
int count = 0;
VulkanDevicePriv *p = ctx->internal->priv;
- for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
- count += pixfmt_is_supported(ctx, i, p->use_linear_images);
+ for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
+ count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
+ p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
+ VK_IMAGE_TILING_OPTIMAL,
+ NULL, NULL, NULL, NULL, 0) >= 0;
+ }
#if CONFIG_CUDA
if (p->dev_is_nvidia)
@@ -1769,9 +1875,14 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
return AVERROR(ENOMEM);
count = 0;
- for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
- if (pixfmt_is_supported(ctx, i, p->use_linear_images))
- constraints->valid_sw_formats[count++] = i;
+ for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) {
+ if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt,
+ p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
+ VK_IMAGE_TILING_OPTIMAL,
+ NULL, NULL, NULL, NULL, 0) >= 0) {
+ constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt;
+ }
+ }
#if CONFIG_CUDA
if (p->dev_is_nvidia)
@@ -1779,8 +1890,8 @@ static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
#endif
constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
- constraints->min_width = 0;
- constraints->min_height = 0;
+ constraints->min_width = 1;
+ constraints->min_height = 1;
constraints->max_width = p->props.properties.limits.maxImageDimension2D;
constraints->max_height = p->props.properties.limits.maxImageDimension2D;
@@ -1854,7 +1965,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
static void vulkan_free_internal(AVVkFrame *f)
{
- AVVkFrameInternal *internal = f->internal;
+ av_unused AVVkFrameInternal *internal = f->internal;
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
@@ -1894,17 +2005,22 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
- int planes = av_pix_fmt_count_planes(hwfc->sw_format);
+ int nb_images = ff_vk_count_images(f);
- /* We could use vkWaitSemaphores, but the validation layer seems to have
- * issues tracking command buffer execution state on uninit. */
- vk->DeviceWaitIdle(hwctx->act_dev);
+ VkSemaphoreWaitInfo sem_wait = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+ .pSemaphores = f->sem,
+ .pValues = f->sem_value,
+ .semaphoreCount = nb_images,
+ };
+
+ vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX);
vulkan_free_internal(f);
- for (int i = 0; i < planes; i++) {
- vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
- vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+ for (int i = 0; i < nb_images; i++) {
+ vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
+ vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
}
@@ -1914,30 +2030,25 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
void *alloc_pnext, size_t alloc_pnext_stride)
{
- int err;
+ int img_cnt = 0, err;
VkResult ret;
AVHWDeviceContext *ctx = hwfc->device_ctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
AVVulkanFramesContext *hwfctx = hwfc->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
- VkMemoryRequirements cont_memory_requirements = { 0 };
- int cont_mem_size_list[AV_NUM_DATA_POINTERS] = { 0 };
- int cont_mem_size = 0;
-
AVVulkanDeviceContext *hwctx = ctx->hwctx;
- for (int i = 0; i < planes; i++) {
+ while (f->img[img_cnt]) {
int use_ded_mem;
VkImageMemoryRequirementsInfo2 req_desc = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
- .image = f->img[i],
+ .image = f->img[img_cnt],
};
VkMemoryDedicatedAllocateInfo ded_alloc = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
+ .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride),
};
VkMemoryDedicatedRequirements ded_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
@@ -1953,32 +2064,11 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size,
p->props.properties.limits.minMemoryMapAlignment);
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- if (ded_req.requiresDedicatedAllocation) {
- av_log(hwfc, AV_LOG_ERROR, "Cannot allocate all planes in a single allocation, "
- "device requires dedicated image allocation!\n");
- return AVERROR(EINVAL);
- } else if (!i) {
- cont_memory_requirements = req.memoryRequirements;
- } else if (cont_memory_requirements.memoryTypeBits !=
- req.memoryRequirements.memoryTypeBits) {
- av_log(hwfc, AV_LOG_ERROR, "The memory requirements differ between plane 0 "
- "and %i, cannot allocate in a single region!\n",
- i);
- return AVERROR(EINVAL);
- }
-
- cont_mem_size_list[i] = FFALIGN(req.memoryRequirements.size,
- req.memoryRequirements.alignment);
- cont_mem_size += cont_mem_size_list[i];
- continue;
- }
-
/* In case the implementation prefers/requires dedicated allocation */
use_ded_mem = ded_req.prefersDedicatedAllocation |
ded_req.requiresDedicatedAllocation;
if (use_ded_mem)
- ded_alloc.image = f->img[i];
+ ded_alloc.image = f->img[img_cnt];
/* Allocate memory */
if ((err = alloc_mem(ctx, &req.memoryRequirements,
@@ -1986,42 +2076,19 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &f->flags, &f->mem[i])))
+ &f->flags, &f->mem[img_cnt])))
return err;
- f->size[i] = req.memoryRequirements.size;
- bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
- bind_info[i].image = f->img[i];
- bind_info[i].memory = f->mem[i];
- }
-
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- cont_memory_requirements.size = cont_mem_size;
+ f->size[img_cnt] = req.memoryRequirements.size;
+ bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+ bind_info[img_cnt].image = f->img[img_cnt];
+ bind_info[img_cnt].memory = f->mem[img_cnt];
- /* Allocate memory */
- if ((err = alloc_mem(ctx, &cont_memory_requirements,
- f->tiling == VK_IMAGE_TILING_LINEAR ?
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- (void *)(((uint8_t *)alloc_pnext)),
- &f->flags, &f->mem[0])))
- return err;
-
- f->size[0] = cont_memory_requirements.size;
-
- for (int i = 0, offset = 0; i < planes; i++) {
- bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
- bind_info[i].image = f->img[i];
- bind_info[i].memory = f->mem[0];
- bind_info[i].memoryOffset = offset;
-
- f->offset[i] = bind_info[i].memoryOffset;
- offset += cont_mem_size_list[i];
- }
+ img_cnt++;
}
/* Bind the allocated memory to the images */
- ret = vk->BindImageMemory2(hwctx->act_dev, planes, bind_info);
+ ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
vk_ret2str(ret));
@@ -2047,11 +2114,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VkImageLayout new_layout;
VkAccessFlags2 new_access;
AVVulkanFramesContext *vkfc = hwfc->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
- AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
+ int nb_images = ff_vk_count_images(frame);
VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
VkDependencyInfo dep_info;
@@ -2059,14 +2125,14 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pSignalSemaphoreValues = sem_sig_val,
- .signalSemaphoreValueCount = planes,
+ .signalSemaphoreValueCount = nb_images,
};
VkSubmitInfo s_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &s_timeline_sem_info,
.pSignalSemaphores = frame->sem,
- .signalSemaphoreCount = planes,
+ .signalSemaphoreCount = nb_images,
};
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
@@ -2076,7 +2142,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
vkfc->lock_frame(hwfc, frame);
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
sem_sig_val[i] = frame->sem_value[i] + 1;
}
@@ -2094,10 +2160,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
dst_qf = VK_QUEUE_FAMILY_IGNORED;
s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
- s_timeline_sem_info.waitSemaphoreValueCount = planes;
+ s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
s_info.pWaitSemaphores = frame->sem;
s_info.pWaitDstStageMask = wait_st;
- s_info.waitSemaphoreCount = planes;
+ s_info.waitSemaphoreCount = nb_images;
break;
case PREP_MODE_EXTERNAL_EXPORT:
new_layout = VK_IMAGE_LAYOUT_GENERAL;
@@ -2105,10 +2171,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
src_qf = VK_QUEUE_FAMILY_IGNORED;
dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
- s_timeline_sem_info.waitSemaphoreValueCount = planes;
+ s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
s_info.pWaitSemaphores = frame->sem;
s_info.pWaitDstStageMask = wait_st;
- s_info.waitSemaphoreCount = planes;
+ s_info.waitSemaphoreCount = nb_images;
break;
case PREP_MODE_DECODING_DST:
new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
@@ -2127,7 +2193,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
img_bar[i] = (VkImageMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.pNext = NULL,
@@ -2142,8 +2208,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.image = frame->img[i],
.subresourceRange = (VkImageSubresourceRange) {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .layerCount = VK_REMAINING_ARRAY_LAYERS,
.levelCount = 1,
- .layerCount = 1,
},
};
@@ -2155,7 +2221,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
.pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = planes,
+ .imageMemoryBarrierCount = nb_images,
};
vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
@@ -2166,7 +2232,7 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
return err;
}
-static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
+static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
int frame_w, int frame_h, int plane)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
@@ -2185,17 +2251,17 @@ static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
VkImageTiling tiling, VkImageUsageFlagBits usage,
+ VkImageCreateFlags flags, int nb_layers,
void *create_pnext)
{
int err;
VkResult ret;
+ AVVulkanFramesContext *hwfc_vk = hwfc->hwctx;
AVHWDeviceContext *ctx = hwfc->device_ctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
- enum AVPixelFormat format = hwfc->sw_format;
- const VkFormat *img_fmts = av_vkfmt_from_pixfmt(format);
- const int planes = av_pix_fmt_count_planes(format);
+ AVVulkanFramesContext *frames = hwfc->hwctx;
VkExportSemaphoreCreateInfo ext_sem_info = {
.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
@@ -2230,17 +2296,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
return AVERROR(ENOMEM);
}
+ // TODO: check witdh and height for alignment in case of multiplanar (must be mod-2 if subsampled)
+
/* Create the images */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) {
VkImageCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = create_pnext,
.imageType = VK_IMAGE_TYPE_2D,
- .format = img_fmts[i],
+ .format = hwfc_vk->format[i],
.extent.depth = 1,
.mipLevels = 1,
- .arrayLayers = 1,
- .flags = VK_IMAGE_CREATE_ALIAS_BIT,
+ .arrayLayers = nb_layers,
+ .flags = flags,
.tiling = tiling,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
@@ -2252,7 +2320,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
};
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
- format, hwfc->width, hwfc->height, i);
+ hwfc->sw_format, hwfc->width, hwfc->height, i);
ret = vk->CreateImage(hwctx->act_dev, &create_info,
hwctx->alloc, &f->img[i]);
@@ -2361,8 +2429,8 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
AVVulkanFramesContext *hwctx = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
VulkanFramesPriv *fp = hwfc->internal->priv;
- VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
VkExternalMemoryHandleTypeFlags e = 0x0;
+ VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS];
VkExternalMemoryImageCreateInfo eiinfo = {
.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
@@ -2378,10 +2446,6 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
try_export_flags(hwfc, &eiinfo.handleTypes, &e,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
-
- if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
- try_export_flags(hwfc, &eiinfo.handleTypes, &e,
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
#endif
for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
@@ -2390,8 +2454,8 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
eminfo[i].handleTypes = e;
}
- err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
- eiinfo.handleTypes ? &eiinfo : NULL);
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
+ hwctx->nb_layers, eiinfo.handleTypes ? &eiinfo : NULL);
if (err)
return NULL;
@@ -2454,104 +2518,89 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
VulkanFramesPriv *fp = hwfc->internal->priv;
AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
- const int has_modifiers = !!(p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS);
-
- /* Default tiling flags */
- hwctx->tiling = hwctx->tiling ? hwctx->tiling :
- has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
- p->use_linear_images ? VK_IMAGE_TILING_LINEAR :
- VK_IMAGE_TILING_OPTIMAL;
-
- if (!hwctx->usage)
- hwctx->usage = FF_VK_DEFAULT_USAGE_FLAGS;
-
- modifier_info = vk_find_struct(hwctx->create_pnext,
- VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
-
- /* Get the supported modifiers if the user has not given any. */
- if (has_modifiers && !modifier_info) {
- const VkFormat *fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
- VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
- FFVulkanFunctions *vk = &p->vkfn;
- VkDrmFormatModifierPropertiesEXT *mod_props;
- uint64_t *modifiers;
- int modifier_count = 0;
-
- VkDrmFormatModifierPropertiesListEXT mod_props_list = {
- .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
- .pNext = NULL,
- .drmFormatModifierCount = 0,
- .pDrmFormatModifierProperties = NULL,
- };
- VkFormatProperties2 prop = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
- .pNext = &mod_props_list,
- };
-
- /* Get all supported modifiers */
- vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
-
- if (!mod_props_list.drmFormatModifierCount) {
- av_log(hwfc, AV_LOG_ERROR, "There are no supported modifiers for the given sw_format\n");
- return AVERROR(EINVAL);
- }
-
- /* Createa structure to hold the modifier list info */
- modifier_info = av_mallocz(sizeof(*modifier_info));
- if (!modifier_info)
- return AVERROR(ENOMEM);
-
- modifier_info->pNext = NULL;
- modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
-
- /* Add structure to the image creation pNext chain */
- if (!hwctx->create_pnext)
- hwctx->create_pnext = modifier_info;
- else
- vk_link_struct(hwctx->create_pnext, (void *)modifier_info);
+ VkImageUsageFlagBits supported_usage;
+ const struct FFVkFormatEntry *fmt;
+ int disable_multiplane = p->disable_multiplane ||
+ (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE);
- /* Backup the allocated struct to be freed later */
- fp->modifier_info = modifier_info;
+ /* Defaults */
+ if (!hwctx->nb_layers)
+ hwctx->nb_layers = 1;
- /* Allocate list of modifiers */
- modifiers = av_mallocz(mod_props_list.drmFormatModifierCount *
- sizeof(*modifiers));
- if (!modifiers)
- return AVERROR(ENOMEM);
-
- modifier_info->pDrmFormatModifiers = modifiers;
-
- /* Allocate a temporary list to hold all modifiers supported */
- mod_props = av_mallocz(mod_props_list.drmFormatModifierCount *
- sizeof(*mod_props));
- if (!mod_props)
- return AVERROR(ENOMEM);
-
- mod_props_list.pDrmFormatModifierProperties = mod_props;
+ /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */
+ if (p->use_linear_images &&
+ (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT))
+ hwctx->tiling = VK_IMAGE_TILING_LINEAR;
- /* Finally get all modifiers from the device */
- vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt[0], &prop);
- /* Reject any modifiers that don't match our requirements */
- for (int i = 0; i < mod_props_list.drmFormatModifierCount; i++) {
- if (!(mod_props[i].drmFormatModifierTilingFeatures & hwctx->usage))
- continue;
+ fmt = vk_find_format_entry(hwfc->sw_format);
+ if (!fmt) {
+ av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n",
+ av_get_pix_fmt_name(hwfc->sw_format));
+ return AVERROR(EINVAL);
+ }
- modifiers[modifier_count++] = mod_props[i].drmFormatModifier;
+ if (hwctx->format[0] != VK_FORMAT_UNDEFINED) {
+ if (hwctx->format[0] != fmt->vkf) {
+ for (int i = 0; i < fmt->nb_images_fallback; i++) {
+ if (hwctx->format[i] != fmt->fallback[i]) {
+ av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given "
+ "for the current sw_format %s!\n",
+ av_get_pix_fmt_name(hwfc->sw_format));
+ return AVERROR(EINVAL);
+ }
+ }
}
- if (!modifier_count) {
- av_log(hwfc, AV_LOG_ERROR, "None of the given modifiers supports"
- " the usage flags!\n");
- av_freep(&mod_props);
+ /* Check if the sw_format itself is supported */
+ err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
+ hwctx->tiling, NULL,
+ NULL, NULL, &supported_usage, 0);
+ if (err < 0) {
+ av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n",
+ av_get_pix_fmt_name(hwfc->sw_format));
return AVERROR(EINVAL);
}
+ } else {
+ err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format,
+ hwctx->tiling, hwctx->format, NULL,
+ NULL, &supported_usage,
+ disable_multiplane);
+ if (err < 0)
+ return err;
+ }
- modifier_info->drmFormatModifierCount = modifier_count;
- av_freep(&mod_props);
+ /* Image usage flags */
+ if (!hwctx->usage) {
+ hwctx->usage = supported_usage & (VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR);
+ }
+
+ /* Image creation flags.
+ * Only fill them in automatically if the image is not going to be used as
+ * a DPB-only image, and we have SAMPLED/STORAGE bits set. */
+ if (!hwctx->img_flags) {
+ int is_lone_dpb = (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
+ !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR);
+ int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT);
+ if (sampleable && !is_lone_dpb) {
+ hwctx->img_flags = VK_IMAGE_CREATE_ALIAS_BIT;
+ if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf))
+ hwctx->img_flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
+ }
}
+ if (!hwctx->lock_frame)
+ hwctx->lock_frame = lock_frame;
+
+ if (!hwctx->unlock_frame)
+ hwctx->unlock_frame = unlock_frame;
+
err = create_exec_ctx(hwfc, &fp->conv_ctx,
dev_hwctx->queue_family_comp_index,
dev_hwctx->nb_comp_queues);
@@ -2570,8 +2619,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return err;
/* Test to see if allocation will fail */
- err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
- hwctx->create_pnext);
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags,
+ hwctx->nb_layers, hwctx->create_pnext);
if (err)
return err;
@@ -2587,11 +2636,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return AVERROR(ENOMEM);
}
- if (!hwctx->lock_frame)
- hwctx->lock_frame = lock_frame;
- if (!hwctx->unlock_frame)
- hwctx->unlock_frame = unlock_frame;
-
return 0;
}
@@ -2667,7 +2711,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
const AVFrame *src, int flags)
{
VkResult ret;
- int err, mapped_mem_count = 0, mem_planes = 0;
+ int err, nb_mem = 0, mapped_mem_count = 0, mem_planes = 0;
AVVkFrame *f = (AVVkFrame *)src->data[0];
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
AVVulkanFramesContext *hwfctx = hwfc->hwctx;
@@ -2687,7 +2731,7 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
}
if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
- !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
+ !(hwfctx->tiling == VK_IMAGE_TILING_LINEAR)) {
av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
"and linear!\n");
err = AVERROR(EINVAL);
@@ -2697,35 +2741,35 @@ static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
dst->width = src->width;
dst->height = src->height;
- mem_planes = hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY ? 1 : planes;
- for (int i = 0; i < mem_planes; i++) {
+ for (int i = 0; i < AV_NUM_DATA_POINTERS; i++)
+ nb_mem += !!f->mem[i];
+
+ for (int i = 0; i < nb_mem; i++) {
ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0,
VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
- vk_ret2str(ret));
+ av_log(hwfc, AV_LOG_ERROR, "Failed to map %ith frame memory: %s\n",
+ i, vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
mapped_mem_count++;
}
- if (hwfctx->flags & AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY) {
- for (int i = 0; i < planes; i++)
- dst->data[i] = dst->data[0] + f->offset[i];
- }
+ for (int i = 0; i < planes; i++)
+ dst->data[i] = dst->data[i] + f->offset[i];
/* Check if the memory contents matter */
if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
!(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_mem; i++) {
map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
map_mem_ranges[i].size = VK_WHOLE_SIZE;
map_mem_ranges[i].memory = f->mem[i];
}
- ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, planes,
+ ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, nb_mem,
map_mem_ranges);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
@@ -2767,25 +2811,25 @@ static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwma
{
AVVkFrame *f = hwmap->priv;
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ const int nb_images = ff_vk_count_images(f);
VkSemaphoreWaitInfo wait_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
.flags = 0x0,
.pSemaphores = f->sem,
.pValues = f->sem_value,
- .semaphoreCount = planes,
+ .semaphoreCount = nb_images,
};
vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX);
vulkan_free_internal(f);
- for (int i = 0; i < planes; i++) {
- vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
- vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+ for (int i = 0; i < nb_images; i++) {
+ vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
+ vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
}
@@ -2836,6 +2880,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ AVVulkanFramesContext *hwfctx = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
@@ -2892,8 +2937,8 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
.extent.depth = 1,
.mipLevels = 1,
.arrayLayers = 1,
- .flags = 0x0, /* ALIAS flag is implicit for imported images */
- .tiling = f->tiling,
+ .flags = 0x0,
+ .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
@@ -2986,7 +3031,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
f->sem_value[i] = 0;
}
- for (int i = 0; i < desc->nb_objects; i++) {
+ for (int i = 0; i < desc->nb_layers; i++) {
/* Memory requirements */
VkImageMemoryRequirementsInfo2 req_desc = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
@@ -3004,9 +3049,13 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
VkMemoryFdPropertiesKHR fdmp = {
.sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
};
+ /* This assumes that a layer will never be constructed from multiple
+ * objects. If that was to happen in the real world, this code would
+ * need to import each plane separately.
+ */
VkImportMemoryFdInfoKHR idesc = {
.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
- .fd = dup(desc->objects[i].fd),
+ .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd),
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
VkMemoryDedicatedAllocateInfo ded_alloc = {
@@ -3060,7 +3109,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL;
bind_info[bind_counts].image = f->img[i];
- bind_info[bind_counts].memory = f->mem[desc->layers[i].planes[j].object_index];
+ bind_info[bind_counts].memory = f->mem[i];
/* Offset is already signalled via pPlaneLayouts above */
bind_info[bind_counts].memoryOffset = 0;
@@ -3437,13 +3486,13 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
#if CONFIG_LIBDRM
#if CONFIG_VAAPI
case AV_PIX_FMT_VAAPI:
- if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
+ if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
return vulkan_map_from_vaapi(hwfc, dst, src, flags);
else
return AVERROR(ENOSYS);
#endif
case AV_PIX_FMT_DRM_PRIME:
- if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
+ if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
return vulkan_map_from_drm(hwfc, dst, src, flags);
else
return AVERROR(ENOSYS);
@@ -3623,13 +3672,13 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
switch (dst->format) {
#if CONFIG_LIBDRM
case AV_PIX_FMT_DRM_PRIME:
- if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
+ if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
return vulkan_map_to_drm(hwfc, dst, src, flags);
else
return AVERROR(ENOSYS);
#if CONFIG_VAAPI
case AV_PIX_FMT_VAAPI:
- if (p->extensions & (FF_VK_EXT_EXTERNAL_DMABUF_MEMORY | FF_VK_EXT_DRM_MODIFIER_FLAGS))
+ if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
return vulkan_map_to_vaapi(hwfc, dst, src, flags);
else
return AVERROR(ENOSYS);
@@ -3883,7 +3932,9 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
int bar_num = 0;
VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
- const int planes = av_pix_fmt_count_planes(pix_fmt);
+ const int nb_images = ff_vk_count_images(frame);
+ int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt);
+
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -3896,8 +3947,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.pWaitSemaphoreValues = frame->sem_value,
.pSignalSemaphoreValues = sem_signal_values,
- .waitSemaphoreValueCount = planes,
- .signalSemaphoreValueCount = planes,
+ .waitSemaphoreValueCount = nb_images,
+ .signalSemaphoreValueCount = nb_images,
};
VkSubmitInfo s_info = {
@@ -3906,8 +3957,8 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
.pSignalSemaphores = frame->sem,
.pWaitSemaphores = frame->sem,
.pWaitDstStageMask = sem_wait_dst,
- .signalSemaphoreCount = planes,
- .waitSemaphoreCount = planes,
+ .signalSemaphoreCount = nb_images,
+ .waitSemaphoreCount = nb_images,
};
vkfc->lock_frame(hwfc, frame);
@@ -3915,11 +3966,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
if ((err = wait_start_exec_ctx(hwfc, ectx)))
goto end;
- for (int i = 0; i < planes; i++)
+ for (int i = 0; i < nb_images; i++)
sem_signal_values[i] = frame->sem_value[i] + 1;
/* Change the image layout to something more optimal for transfers */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < nb_images; i++) {
VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
@@ -3955,13 +4006,20 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
0, NULL, 0, NULL, bar_num, img_bar);
/* Schedule a copy for each plane */
- for (int i = 0; i < planes; i++) {
+ for (int i = 0; i < pixfmt_planes; i++) {
+ int idx = FFMIN(i, nb_images - 1);
+ VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+ VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
VkBufferImageCopy buf_reg = {
.bufferOffset = buf_offsets[i],
.bufferRowLength = buf_stride[i] / desc->comp[i].step,
.imageSubresource.layerCount = 1,
- .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .imageSubresource.aspectMask = plane_aspect[(pixfmt_planes != nb_images) +
+ i*(pixfmt_planes != nb_images)],
.imageOffset = { 0, 0, 0, },
};
@@ -3972,11 +4030,11 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
if (to_buf)
- vk->CmdCopyImageToBuffer(cmd_buf, frame->img[i], frame->layout[i],
+ vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], frame->layout[idx],
vkbuf->buf, 1, &buf_reg);
else
- vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[i],
- frame->layout[i], 1, &buf_reg);
+ vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx],
+ frame->layout[idx], 1, &buf_reg);
}
/* When uploading, do this asynchronously if the source is refcounted by
@@ -3993,7 +4051,7 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
goto end;
}
- if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
+ if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, pixfmt_planes)))
goto end;
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
} else {
@@ -4013,6 +4071,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
AVVkFrame *f = (AVVkFrame *)vkf->data[0];
AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
+ AVVulkanFramesContext *fc = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
@@ -4035,7 +4094,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
return AVERROR(EINVAL);
/* For linear, host visiable images */
- if (f->tiling == VK_IMAGE_TILING_LINEAR &&
+ if (fc->tiling == VK_IMAGE_TILING_LINEAR &&
f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
AVFrame *map = av_frame_alloc();
if (!map)
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index e89fa52927..056cdc3fdb 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -162,6 +162,10 @@ typedef enum AVVkFrameFlags {
/* DEPRECATED: does nothing. */
AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),
+
+ /* Disables multiplane images.
+ * This is required to export/import images from CUDA. */
+ AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE = (1ULL << 2),
} AVVkFrameFlags;
/**
@@ -169,26 +173,32 @@ typedef enum AVVkFrameFlags {
*/
typedef struct AVVulkanFramesContext {
/**
- * Controls the tiling of allocated frames. If left as optimal tiling,
- * then during av_hwframe_ctx_init() will decide based on whether the device
- * supports DRM modifiers, or if the linear_images flag is set, otherwise
- * will allocate optimally-tiled images.
+ * Controls the tiling of allocated frames.
+ * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling.
+ * Can be set to VK_IMAGE_TILING_LINEAR to force linear images,
+ * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed
+ * images.
+ * @note Imported frames from other APIs ignore this.
*/
VkImageTiling tiling;
/**
- * Defines extra usage of output frames. If left as 0, the following bits
- * are set: TRANSFER_SRC, TRANSFER_DST. SAMPLED and STORAGE.
+ * Defines extra usage of output frames. If non-zero, all flags MUST be
+ * supported by the VkFormat. Otherwise, will use supported flags amongst:
+ * - VK_IMAGE_USAGE_SAMPLED_BIT
+ * - VK_IMAGE_USAGE_STORAGE_BIT
+ * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT
+ * - VK_IMAGE_USAGE_TRANSFER_DST_BIT
*/
VkImageUsageFlagBits usage;
/**
* Extension data for image creation.
- * If VkImageDrmFormatModifierListCreateInfoEXT is present in the chain,
- * and the device supports DRM modifiers, then images will be allocated
- * with the specific requested DRM modifiers.
+ * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure
+ * can be added to specify the exact modifier to use.
+ *
* Additional structures may be added at av_hwframe_ctx_init() time,
- * which will be freed automatically on uninit(), so users need only free
+ * which will be freed automatically on uninit(), so users must only free
* any structures they've allocated themselves.
*/
void *create_pnext;
@@ -209,6 +219,25 @@ typedef struct AVVulkanFramesContext {
*/
AVVkFrameFlags flags;
+ /**
+ * Flags to set during image creation. If unset, defaults to
+ * VK_IMAGE_CREATE_ALIAS_BIT.
+ */
+ VkImageCreateFlags img_flags;
+
+ /**
+ * Vulkan format for each image. MUST be compatible with the pixel format.
+ * If unset, will be automatically set.
+ * There are at most two compatible formats for a frame - a multiplane
+ * format, and a single-plane multi-image format.
+ */
+ VkFormat format[AV_NUM_DATA_POINTERS];
+
+ /**
+ * Number of layers each image will have.
+ */
+ int nb_layers;
+
/**
* Locks a frame, preventing other threads from changing frame properties.
* If set to NULL, will be set to lavu-internal functions that utilize a
@@ -228,14 +257,7 @@ typedef struct AVVulkanFramesContext {
} AVVulkanFramesContext;
/*
- * Frame structure, the VkFormat of the image will always match
- * the pool's sw_format.
- * All frames, imported or allocated, will be created with the
- * VK_IMAGE_CREATE_ALIAS_BIT flag set, so the memory may be aliased if needed.
- *
- * If all queue family indices in the device context are the same,
- * images will be created with the EXCLUSIVE sharing mode. Otherwise, all images
- * will be created using the CONCURRENT sharing mode.
+ * Frame structure.
*
* @note the size of this structure is not part of the ABI, to allocate
* you must use @av_vk_frame_alloc().
@@ -247,7 +269,7 @@ struct AVVkFrame {
VkImage img[AV_NUM_DATA_POINTERS];
/**
- * The same tiling must be used for all images in the frame.
+ * Tiling for the frame.
*/
VkImageTiling tiling;
@@ -265,13 +287,13 @@ struct AVVkFrame {
VkMemoryPropertyFlagBits flags;
/**
- * Updated after every barrier
+ * Updated after every barrier. One per VkImage.
*/
VkAccessFlagBits access[AV_NUM_DATA_POINTERS];
VkImageLayout layout[AV_NUM_DATA_POINTERS];
/**
- * Synchronization timeline semaphores, one for each sw_format plane.
+ * Synchronization timeline semaphores, one for each VkImage.
* Must not be freed manually. Must be waited on at every submission using
* the value in sem_value, and must be signalled at every submission,
* using an incremented value.
@@ -280,6 +302,7 @@ struct AVVkFrame {
/**
* Up to date semaphore value at which each image becomes accessible.
+ * One per VkImage.
* Clients must wait on this value when submitting a command queue,
* and increment it when signalling.
*/
@@ -291,16 +314,18 @@ struct AVVkFrame {
struct AVVkFrameInternal *internal;
/**
- * Describes the binding offset of each plane to the VkDeviceMemory.
+ * Describes the binding offset of each image to the VkDeviceMemory.
+ * One per VkImage.
*/
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
/**
* Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
* the image was allocated with the CONCURRENT concurrency option.
+ * One per VkImage.
*/
uint32_t queue_family[AV_NUM_DATA_POINTERS];
-} AVVkFrame;
+};
/**
* Allocates a single AVVkFrame and initializes everything as 0.
@@ -309,7 +334,7 @@ struct AVVkFrame {
AVVkFrame *av_vk_frame_alloc(void);
/**
- * Returns the format of each image up to the number of planes for a given sw_format.
+ * Returns the optimal format for a given sw_format, one for each plane.
* Returns NULL on unsupported formats.
*/
const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p);
--
2.40.0
[-- Attachment #41: 0060-hwcontext_vulkan-remove-linear-host_visible-fast-pat.patch --]
[-- Type: text/x-diff, Size: 7086 bytes --]
From ed1c61196b2b7f8a07545498e3ba27758c7257f7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sat, 18 Feb 2023 14:55:14 +0100
Subject: [PATCH 60/97] hwcontext_vulkan: remove linear+host_visible "fast"
path
The idea was that it's faster to map linear images and copy them
via regular memcpy. This is a very niche use, plus very inconsistently
useful, as it would only really be faster on a few Intel GPUs.
Even then, using the non-cached memcpy would've been better.
Instead, scrap this code. Drivers are better at figuring out
what copy to use, and if we're host-mapping, it should actually be
just as fast, if not faster.
---
libavutil/hwcontext_vulkan.c | 158 +----------------------------------
1 file changed, 2 insertions(+), 156 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index ffca0b2fc3..c4df542c8f 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2668,144 +2668,6 @@ static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
return 0;
}
-typedef struct VulkanMapping {
- AVVkFrame *frame;
- int flags;
-} VulkanMapping;
-
-static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
-{
- VulkanMapping *map = hwmap->priv;
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
- VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
-
- /* Check if buffer needs flushing */
- if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
- !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
- VkResult ret;
- VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
-
- for (int i = 0; i < planes; i++) {
- flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- flush_ranges[i].memory = map->frame->mem[i];
- flush_ranges[i].size = VK_WHOLE_SIZE;
- }
-
- ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, planes,
- flush_ranges);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
- vk_ret2str(ret));
- }
- }
-
- for (int i = 0; i < planes; i++)
- vk->UnmapMemory(hwctx->act_dev, map->frame->mem[i]);
-
- av_free(map);
-}
-
-static int vulkan_map_frame_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
- const AVFrame *src, int flags)
-{
- VkResult ret;
- int err, nb_mem = 0, mapped_mem_count = 0, mem_planes = 0;
- AVVkFrame *f = (AVVkFrame *)src->data[0];
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- AVVulkanFramesContext *hwfctx = hwfc->hwctx;
- const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
- VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
-
- VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
- if (!map)
- return AVERROR(EINVAL);
-
- if (src->format != AV_PIX_FMT_VULKAN) {
- av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
- av_get_pix_fmt_name(src->format));
- err = AVERROR(EINVAL);
- goto fail;
- }
-
- if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
- !(hwfctx->tiling == VK_IMAGE_TILING_LINEAR)) {
- av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
- "and linear!\n");
- err = AVERROR(EINVAL);
- goto fail;
- }
-
- dst->width = src->width;
- dst->height = src->height;
-
- for (int i = 0; i < AV_NUM_DATA_POINTERS; i++)
- nb_mem += !!f->mem[i];
-
- for (int i = 0; i < nb_mem; i++) {
- ret = vk->MapMemory(hwctx->act_dev, f->mem[i], 0,
- VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Failed to map %ith frame memory: %s\n",
- i, vk_ret2str(ret));
- err = AVERROR_EXTERNAL;
- goto fail;
- }
- mapped_mem_count++;
- }
-
- for (int i = 0; i < planes; i++)
- dst->data[i] = dst->data[i] + f->offset[i];
-
- /* Check if the memory contents matter */
- if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
- !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
- VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
- for (int i = 0; i < nb_mem; i++) {
- map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
- map_mem_ranges[i].size = VK_WHOLE_SIZE;
- map_mem_ranges[i].memory = f->mem[i];
- }
-
- ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, nb_mem,
- map_mem_ranges);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
- vk_ret2str(ret));
- err = AVERROR_EXTERNAL;
- goto fail;
- }
- }
-
- for (int i = 0; i < planes; i++) {
- VkImageSubresource sub = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- };
- VkSubresourceLayout layout;
- vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout);
- dst->linesize[i] = layout.rowPitch;
- }
-
- map->frame = f;
- map->flags = flags;
-
- err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
- &vulkan_unmap_frame, map);
- if (err < 0)
- goto fail;
-
- return 0;
-
-fail:
- for (int i = 0; i < mapped_mem_count; i++)
- vk->UnmapMemory(hwctx->act_dev, f->mem[i]);
-
- av_free(map);
- return err;
-}
-
#if CONFIG_LIBDRM
static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
{
@@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
#endif
#endif
default:
- return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
+ break;
}
+ return AVERROR(ENOSYS);
}
typedef struct ImageBuffer {
@@ -4093,23 +3956,6 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
if (swf->width > hwfc->width || swf->height > hwfc->height)
return AVERROR(EINVAL);
- /* For linear, host visiable images */
- if (fc->tiling == VK_IMAGE_TILING_LINEAR &&
- f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
- AVFrame *map = av_frame_alloc();
- if (!map)
- return AVERROR(ENOMEM);
- map->format = swf->format;
-
- err = vulkan_map_frame_to_mem(hwfc, map, vkf, AV_HWFRAME_MAP_WRITE);
- if (err)
- return err;
-
- err = av_frame_copy((AVFrame *)(from ? swf : map), from ? map : swf);
- av_frame_free(&map);
- return err;
- }
-
/* Create buffers */
for (int i = 0; i < planes; i++) {
size_t req_size;
--
2.40.0
[-- Attachment #42: 0061-hwcontext_vulkan-don-t-change-properties-if-prepare_.patch --]
[-- Type: text/x-diff, Size: 2772 bytes --]
From f5adba6f9456eaf6e058159b6f01f633c935d998 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 04:14:08 +0100
Subject: [PATCH 61/97] hwcontext_vulkan: don't change properties if
prepare_frame fails
---
libavutil/hwcontext_vulkan.c | 33 ++++++++++++++++-----------------
1 file changed, 16 insertions(+), 17 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c4df542c8f..294233a71b 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2190,16 +2190,13 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break;
}
- /* Change the image layout to something more optimal for writes.
- * This also signals the newly created semaphore, making it usable
- * for synchronization */
for (int i = 0; i < nb_images; i++) {
img_bar[i] = (VkImageMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
.pNext = NULL,
- .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
- .srcAccessMask = 0x0,
- .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+ .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
+ .srcAccessMask = frame->access[i],
.dstAccessMask = new_access,
.oldLayout = frame->layout[i],
.newLayout = new_layout,
@@ -2212,21 +2209,23 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
.levelCount = 1,
},
};
-
- frame->layout[i] = img_bar[i].newLayout;
- frame->access[i] = img_bar[i].dstAccessMask;
}
- dep_info = (VkDependencyInfo) {
- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
- .pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = nb_images,
- };
-
- vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
+ vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_images,
+ });
err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ if (err >= 0) {
+ for (int i = 0; i < nb_images; i++) {
+ frame->layout[i] = img_bar[i].newLayout;
+ frame->access[i] = img_bar[i].dstAccessMask;
+ frame->queue_family[i] = img_bar[i].dstQueueFamilyIndex;
+ }
+ }
vkfc->unlock_frame(hwfc, frame);
return err;
--
2.40.0
[-- Attachment #43: 0062-hwcontext_vulkan-remove-duplicate-code-port-to-use-g.patch --]
[-- Type: text/x-diff, Size: 67850 bytes --]
From 6218c25baf01856e23084d4e10d2e0d8ffac3fa1 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 2 Mar 2023 13:02:25 +0100
Subject: [PATCH 62/97] hwcontext_vulkan: remove duplicate code, port to use
generic vulkan utils
---
libavutil/Makefile | 2 +-
libavutil/hwcontext_vulkan.c | 1004 +++++++---------------------------
libavutil/vulkan.h | 13 +
3 files changed, 207 insertions(+), 812 deletions(-)
diff --git a/libavutil/Makefile b/libavutil/Makefile
index dc9012f9a8..bd9c6f9e32 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -195,7 +195,7 @@ OBJS-$(CONFIG_QSV) += hwcontext_qsv.o
OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o
OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o
OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o
-OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o
+OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o vulkan.o
OBJS-$(!CONFIG_VULKAN) += hwcontext_stub.o
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 294233a71b..85ac48f307 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -78,18 +78,13 @@ typedef struct VulkanQueueCtx {
unsigned int buf_deps_alloc_size;
} VulkanQueueCtx;
-typedef struct VulkanExecCtx {
- VkCommandPool pool;
- VkCommandBuffer *bufs;
- VulkanQueueCtx *queues;
- int nb_queues;
- int cur_queue_idx;
-} VulkanExecCtx;
-
typedef struct VulkanDevicePriv {
/* Vulkan library and loader functions */
void *libvulkan;
- FFVulkanFunctions vkfn;
+
+ FFVulkanContext vkctx;
+ FFVkQueueFamilyCtx compute_qf;
+ FFVkQueueFamilyCtx transfer_qf;
/* Properties */
VkPhysicalDeviceProperties2 props;
@@ -111,9 +106,6 @@ typedef struct VulkanDevicePriv {
/* Debug callback */
VkDebugUtilsMessengerEXT debug_ctx;
- /* Extensions */
- FFVulkanExtensions extensions;
-
/* Settings */
int use_linear_images;
@@ -129,11 +121,11 @@ typedef struct VulkanDevicePriv {
typedef struct VulkanFramesPriv {
/* Image conversions */
- VulkanExecCtx conv_ctx;
+ FFVkExecPool compute_exec;
/* Image transfers */
- VulkanExecCtx upload_ctx;
- VulkanExecCtx download_ctx;
+ FFVkExecPool upload_exec;
+ FFVkExecPool download_exec;
/* Modifier info list to free at uninit */
VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
@@ -292,7 +284,7 @@ static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
{
AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
VulkanDevicePriv *priv = dev_ctx->internal->priv;
- FFVulkanFunctions *vk = &priv->vkfn;
+ FFVulkanFunctions *vk = &priv->vkctx.vkfn;
const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
@@ -358,31 +350,6 @@ static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
return AVERROR(EINVAL);
}
-static const void *vk_find_struct(const void *chain, VkStructureType stype)
-{
- const VkBaseInStructure *in = chain;
- while (in) {
- if (in->sType == stype)
- return in;
-
- in = in->pNext;
- }
-
- return NULL;
-}
-
-static void vk_link_struct(void *chain, void *in)
-{
- VkBaseOutStructure *out = chain;
- if (!in)
- return;
-
- while (out->pNext)
- out = out->pNext;
-
- out->pNext = in;
-}
-
static int load_libvulkan(AVHWDeviceContext *ctx)
{
AVVulkanDeviceContext *hwctx = ctx->hwctx;
@@ -452,47 +419,6 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 },
};
-/* Converts return values to strings */
-static const char *vk_ret2str(VkResult res)
-{
-#define CASE(VAL) case VAL: return #VAL
- switch (res) {
- CASE(VK_SUCCESS);
- CASE(VK_NOT_READY);
- CASE(VK_TIMEOUT);
- CASE(VK_EVENT_SET);
- CASE(VK_EVENT_RESET);
- CASE(VK_INCOMPLETE);
- CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
- CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
- CASE(VK_ERROR_INITIALIZATION_FAILED);
- CASE(VK_ERROR_DEVICE_LOST);
- CASE(VK_ERROR_MEMORY_MAP_FAILED);
- CASE(VK_ERROR_LAYER_NOT_PRESENT);
- CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
- CASE(VK_ERROR_FEATURE_NOT_PRESENT);
- CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
- CASE(VK_ERROR_TOO_MANY_OBJECTS);
- CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
- CASE(VK_ERROR_FRAGMENTED_POOL);
- CASE(VK_ERROR_SURFACE_LOST_KHR);
- CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
- CASE(VK_SUBOPTIMAL_KHR);
- CASE(VK_ERROR_OUT_OF_DATE_KHR);
- CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
- CASE(VK_ERROR_VALIDATION_FAILED_EXT);
- CASE(VK_ERROR_INVALID_SHADER_NV);
- CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
- CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
- CASE(VK_ERROR_NOT_PERMITTED_EXT);
- CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
- CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
- CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
- default: return "Unknown error";
- }
-#undef CASE
-}
-
static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
VkDebugUtilsMessageTypeFlagsEXT messageType,
const VkDebugUtilsMessengerCallbackDataEXT *data,
@@ -543,7 +469,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
const char *tstr;
const char **extension_names = NULL;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
int err = 0, found, extensions_found = 0;
@@ -606,7 +532,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
continue;
av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
- p->extensions |= optional_exts[i].flag;
+ p->vkctx.extensions |= optional_exts[i].flag;
ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
}
@@ -622,7 +548,7 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts,
if (found) {
av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr);
ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
- p->extensions |= FF_VK_EXT_DEBUG_UTILS;
+ p->vkctx.extensions |= FF_VK_EXT_DEBUG_UTILS;
} else {
av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
tstr);
@@ -674,7 +600,7 @@ static int check_validation_layers(AVHWDeviceContext *ctx, AVDictionary *opts,
int found = 0, err = 0;
VulkanDevicePriv *priv = ctx->internal->priv;
- FFVulkanFunctions *vk = &priv->vkfn;
+ FFVulkanFunctions *vk = &priv->vkctx.vkfn;
uint32_t sup_layer_count;
VkLayerProperties *sup_layers;
@@ -781,7 +707,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
int err = 0, debug_mode = 0;
VkResult ret;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VkApplicationInfo application_info = {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
@@ -809,7 +735,7 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
return err;
}
- err = ff_vk_load_functions(ctx, vk, p->extensions, 0, 0);
+ err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0);
if (err < 0) {
av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n");
return err;
@@ -845,12 +771,12 @@ static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
/* Check for errors */
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
- err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 0);
+ err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0);
if (err < 0) {
av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n");
goto fail;
@@ -911,7 +837,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
uint32_t num;
VkResult ret;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
VkPhysicalDevice *devices = NULL;
VkPhysicalDeviceIDProperties *idp = NULL;
VkPhysicalDeviceProperties2 *prop = NULL;
@@ -920,7 +846,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL);
if (ret != VK_SUCCESS || !num) {
- av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
+ av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret));
return AVERROR(ENODEV);
}
@@ -931,7 +857,7 @@ static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR(ENODEV);
goto end;
}
@@ -1091,7 +1017,7 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
float *weights;
VkQueueFamilyProperties *qf = NULL;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
int graph_index, comp_index, tx_index, enc_index, dec_index;
@@ -1219,241 +1145,10 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
return 0;
}
-static int create_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
- int queue_family_index, int num_queues)
-{
- VkResult ret;
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
-
- VkCommandPoolCreateInfo cqueue_create = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = queue_family_index,
- };
- VkCommandBufferAllocateInfo cbuf_create = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
- .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
- .commandBufferCount = num_queues,
- };
-
- cmd->nb_queues = num_queues;
-
- /* Create command pool */
- ret = vk->CreateCommandPool(hwctx->act_dev, &cqueue_create,
- hwctx->alloc, &cmd->pool);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Command pool creation failure: %s\n",
- vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- cmd->bufs = av_mallocz(num_queues * sizeof(*cmd->bufs));
- if (!cmd->bufs)
- return AVERROR(ENOMEM);
-
- cbuf_create.commandPool = cmd->pool;
-
- /* Allocate command buffer */
- ret = vk->AllocateCommandBuffers(hwctx->act_dev, &cbuf_create, cmd->bufs);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
- vk_ret2str(ret));
- av_freep(&cmd->bufs);
- return AVERROR_EXTERNAL;
- }
-
- cmd->queues = av_mallocz(num_queues * sizeof(*cmd->queues));
- if (!cmd->queues)
- return AVERROR(ENOMEM);
-
- for (int i = 0; i < num_queues; i++) {
- VulkanQueueCtx *q = &cmd->queues[i];
- vk->GetDeviceQueue(hwctx->act_dev, queue_family_index, i, &q->queue);
- q->was_synchronous = 1;
- q->qf = queue_family_index;
- q->qidx = i;
- }
-
- return 0;
-}
-
-static void free_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
-{
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
-
- if (cmd->queues) {
- for (int i = 0; i < cmd->nb_queues; i++) {
- VulkanQueueCtx *q = &cmd->queues[i];
-
- /* Make sure all queues have finished executing */
- if (q->fence && !q->was_synchronous) {
- vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(hwctx->act_dev, 1, &q->fence);
- }
-
- /* Free the fence */
- if (q->fence)
- vk->DestroyFence(hwctx->act_dev, q->fence, hwctx->alloc);
-
- /* Free buffer dependencies */
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- av_free(q->buf_deps);
- }
- }
-
- if (cmd->bufs)
- vk->FreeCommandBuffers(hwctx->act_dev, cmd->pool, cmd->nb_queues, cmd->bufs);
- if (cmd->pool)
- vk->DestroyCommandPool(hwctx->act_dev, cmd->pool, hwctx->alloc);
-
- av_freep(&cmd->queues);
- av_freep(&cmd->bufs);
- cmd->pool = VK_NULL_HANDLE;
-}
-
-static VkCommandBuffer get_buf_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
-{
- return cmd->bufs[cmd->cur_queue_idx];
-}
-
-static void unref_exec_ctx_deps(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
-{
- VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
-
- for (int j = 0; j < q->nb_buf_deps; j++)
- av_buffer_unref(&q->buf_deps[j]);
- q->nb_buf_deps = 0;
-}
-
-static int wait_start_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd)
-{
- VkResult ret;
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
- VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
-
- VkCommandBufferBeginInfo cmd_start = {
- .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
- .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
- };
-
- /* Create the fence and don't wait for it initially */
- if (!q->fence) {
- VkFenceCreateInfo fence_spawn = {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- };
- ret = vk->CreateFence(hwctx->act_dev, &fence_spawn, hwctx->alloc,
- &q->fence);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
- vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- } else if (!q->was_synchronous) {
- vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(hwctx->act_dev, 1, &q->fence);
- }
-
- /* Discard queue dependencies */
- unref_exec_ctx_deps(hwfc, cmd);
-
- ret = vk->BeginCommandBuffer(cmd->bufs[cmd->cur_queue_idx], &cmd_start);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Unable to init command buffer: %s\n",
- vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
-
- return 0;
-}
-
-static int add_buf_dep_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
- AVBufferRef * const *deps, int nb_deps)
-{
- AVBufferRef **dst;
- VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
-
- if (!deps || !nb_deps)
- return 0;
-
- dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
- (q->nb_buf_deps + nb_deps) * sizeof(*dst));
- if (!dst)
- goto err;
-
- q->buf_deps = dst;
-
- for (int i = 0; i < nb_deps; i++) {
- q->buf_deps[q->nb_buf_deps] = av_buffer_ref(deps[i]);
- if (!q->buf_deps[q->nb_buf_deps])
- goto err;
- q->nb_buf_deps++;
- }
-
- return 0;
-
-err:
- unref_exec_ctx_deps(hwfc, cmd);
- return AVERROR(ENOMEM);
-}
-
-static int submit_exec_ctx(AVHWFramesContext *hwfc, VulkanExecCtx *cmd,
- VkSubmitInfo *s_info, AVVkFrame *f, int synchronous)
-{
- VkResult ret;
- VulkanQueueCtx *q = &cmd->queues[cmd->cur_queue_idx];
- VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
- FFVulkanFunctions *vk = &p->vkfn;
-
- ret = vk->EndCommandBuffer(cmd->bufs[cmd->cur_queue_idx]);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
- vk_ret2str(ret));
- unref_exec_ctx_deps(hwfc, cmd);
- return AVERROR_EXTERNAL;
- }
-
- s_info->pCommandBuffers = &cmd->bufs[cmd->cur_queue_idx];
- s_info->commandBufferCount = 1;
-
- hwctx->lock_queue(hwfc->device_ctx, q->qf, q->qidx);
- ret = vk->QueueSubmit(q->queue, 1, s_info, q->fence);
- hwctx->unlock_queue(hwfc->device_ctx, q->qf, q->qidx);
- if (ret != VK_SUCCESS) {
- av_log(hwfc, AV_LOG_ERROR, "Queue submission failure: %s\n",
- vk_ret2str(ret));
- unref_exec_ctx_deps(hwfc, cmd);
- return AVERROR_EXTERNAL;
- }
-
- if (f)
- for (int i = 0; i < s_info->signalSemaphoreCount; i++)
- f->sem_value[i]++;
-
- q->was_synchronous = synchronous;
-
- if (synchronous) {
- vk->WaitForFences(hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
- vk->ResetFences(hwctx->act_dev, 1, &q->fence);
- unref_exec_ctx_deps(hwfc, cmd);
- } else { /* Rotate queues */
- cmd->cur_queue_idx = (cmd->cur_queue_idx + 1) % cmd->nb_queues;
- }
-
- return 0;
-}
-
static void vulkan_device_free(AVHWDeviceContext *ctx)
{
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
if (hwctx->act_dev)
@@ -1485,7 +1180,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
VkResult ret;
AVDictionaryEntry *opt_d;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
/*
@@ -1588,7 +1283,7 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
for (int i = 0; i < dev_info.enabledExtensionCount; i++)
av_free((void *)dev_info.ppEnabledExtensionNames[i]);
av_free((void *)dev_info.ppEnabledExtensionNames);
@@ -1630,7 +1325,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
uint32_t qf_num;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
VkQueueFamilyProperties *qf;
int graph_index, comp_index, tx_index, enc_index, dec_index;
@@ -1639,13 +1334,13 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) {
if (!strcmp(hwctx->enabled_dev_extensions[i],
optional_device_exts[j].name)) {
- p->extensions |= optional_device_exts[j].flag;
+ p->vkctx.extensions |= optional_device_exts[j].flag;
break;
}
}
}
- err = ff_vk_load_functions(ctx, vk, p->extensions, 1, 1);
+ err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1);
if (err < 0) {
av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n");
return err;
@@ -1665,7 +1360,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->props.properties.limits.minMemoryMapAlignment);
av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n",
p->props.properties.limits.nonCoherentAtomSize);
- if (p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
+ if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n",
p->hprops.minImportedHostPointerAlignment);
@@ -1748,6 +1443,13 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
/* Get device capabilities */
vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
+ p->vkctx.device = ctx;
+ p->vkctx.hwctx = hwctx;
+
+ ff_vk_load_props(&p->vkctx);
+ ff_vk_qf_init(&p->vkctx, &p->compute_qf, VK_QUEUE_COMPUTE_BIT);
+ ff_vk_qf_init(&p->vkctx, &p->transfer_qf, VK_QUEUE_TRANSFER_BIT);
+
return 0;
}
@@ -1912,7 +1614,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
VkResult ret;
int index = -1;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
@@ -1954,7 +1656,7 @@ static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
dev_hwctx->alloc, mem);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
return AVERROR(ENOMEM);
}
@@ -2004,7 +1706,7 @@ static void vulkan_frame_free(void *opaque, uint8_t *data)
AVHWFramesContext *hwfc = opaque;
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
int nb_images = ff_vk_count_images(f);
VkSemaphoreWaitInfo sem_wait = {
@@ -2034,8 +1736,7 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
VkResult ret;
AVHWDeviceContext *ctx = hwfc->device_ctx;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
- AVVulkanFramesContext *hwfctx = hwfc->hwctx;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
AVVulkanDeviceContext *hwctx = ctx->hwctx;
@@ -2091,7 +1792,7 @@ static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f,
ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
@@ -2106,129 +1807,84 @@ enum PrepMode {
PREP_MODE_DECODING_DPB,
};
-static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
+static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx,
AVVkFrame *frame, enum PrepMode pmode)
{
int err;
- uint32_t src_qf, dst_qf;
- VkImageLayout new_layout;
- VkAccessFlags2 new_access;
- AVVulkanFramesContext *vkfc = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
- uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
- int nb_images = ff_vk_count_images(frame);
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
+ VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
+ int nb_img_bar = 0;
- VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
- VkDependencyInfo dep_info;
+ uint32_t dst_qf = VK_QUEUE_FAMILY_IGNORED;
+ VkImageLayout new_layout;
+ VkAccessFlags2 new_access;
- VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
- .pSignalSemaphoreValues = sem_sig_val,
- .signalSemaphoreValueCount = nb_images,
+ /* This is dirty - but it works. The vulkan.c dependency system doesn't
+ * free non-refcounted frames, and non-refcounted hardware frames cannot
+ * happen anywhere outside of here. */
+ AVBufferRef tmp_ref = {
+ .data = (uint8_t *)hwfc,
};
-
- VkSubmitInfo s_info = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &s_timeline_sem_info,
- .pSignalSemaphores = frame->sem,
- .signalSemaphoreCount = nb_images,
+ AVFrame tmp_frame = {
+ .data[0] = (uint8_t *)frame,
+ .hw_frames_ctx = &tmp_ref,
};
- VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
+ VkCommandBuffer cmd_buf;
+ FFVkExecContext *exec = ff_vk_exec_get(ectx);
+ cmd_buf = exec->buf;
+ ff_vk_exec_start(&p->vkctx, exec);
- if ((err = wait_start_exec_ctx(hwfc, ectx)))
+ err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame,
+ VK_PIPELINE_STAGE_2_NONE,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT);
+ if (err < 0)
return err;
- vkfc->lock_frame(hwfc, frame);
-
- for (int i = 0; i < nb_images; i++) {
- wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
- sem_sig_val[i] = frame->sem_value[i] + 1;
- }
-
switch (pmode) {
case PREP_MODE_WRITE:
new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
- src_qf = VK_QUEUE_FAMILY_IGNORED;
- dst_qf = VK_QUEUE_FAMILY_IGNORED;
break;
case PREP_MODE_EXTERNAL_IMPORT:
new_layout = VK_IMAGE_LAYOUT_GENERAL;
new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
- src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
- dst_qf = VK_QUEUE_FAMILY_IGNORED;
- s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
- s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
- s_info.pWaitSemaphores = frame->sem;
- s_info.pWaitDstStageMask = wait_st;
- s_info.waitSemaphoreCount = nb_images;
break;
case PREP_MODE_EXTERNAL_EXPORT:
new_layout = VK_IMAGE_LAYOUT_GENERAL;
new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
- src_qf = VK_QUEUE_FAMILY_IGNORED;
dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
- s_timeline_sem_info.pWaitSemaphoreValues = frame->sem_value;
- s_timeline_sem_info.waitSemaphoreValueCount = nb_images;
- s_info.pWaitSemaphores = frame->sem;
- s_info.pWaitDstStageMask = wait_st;
- s_info.waitSemaphoreCount = nb_images;
break;
case PREP_MODE_DECODING_DST:
new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR;
new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
- src_qf = VK_QUEUE_FAMILY_IGNORED;
- dst_qf = VK_QUEUE_FAMILY_IGNORED;
break;
case PREP_MODE_DECODING_DPB:
new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR;
new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
- src_qf = VK_QUEUE_FAMILY_IGNORED;
- dst_qf = VK_QUEUE_FAMILY_IGNORED;
break;
}
- for (int i = 0; i < nb_images; i++) {
- img_bar[i] = (VkImageMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
- .pNext = NULL,
- .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
- .srcAccessMask = frame->access[i],
- .dstAccessMask = new_access,
- .oldLayout = frame->layout[i],
- .newLayout = new_layout,
- .srcQueueFamilyIndex = src_qf,
- .dstQueueFamilyIndex = dst_qf,
- .image = frame->img[i],
- .subresourceRange = (VkImageSubresourceRange) {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .layerCount = VK_REMAINING_ARRAY_LAYERS,
- .levelCount = 1,
- },
- };
- }
+ ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_NONE,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ new_access, new_layout, dst_qf);
- vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &(VkDependencyInfo) {
+ vk->CmdPipelineBarrier2KHR(cmd_buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
.pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = nb_images,
+ .imageMemoryBarrierCount = nb_img_bar,
});
- err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
- if (err >= 0) {
- for (int i = 0; i < nb_images; i++) {
- frame->layout[i] = img_bar[i].newLayout;
- frame->access[i] = img_bar[i].dstAccessMask;
- frame->queue_family[i] = img_bar[i].dstQueueFamilyIndex;
- }
- }
- vkfc->unlock_frame(hwfc, frame);
+ err = ff_vk_exec_submit(&p->vkctx, exec);
+ if (err < 0)
+ return err;
- return err;
+ /* We can do this because there are no real dependencies */
+ ff_vk_exec_discard_deps(&p->vkctx, exec);
+
+ return 0;
}
static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format,
@@ -2258,9 +1914,8 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
AVVulkanFramesContext *hwfc_vk = hwfc->hwctx;
AVHWDeviceContext *ctx = hwfc->device_ctx;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
- AVVulkanFramesContext *frames = hwfc->hwctx;
VkExportSemaphoreCreateInfo ext_sem_info = {
.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
@@ -2276,9 +1931,9 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
VkSemaphoreTypeCreateInfo sem_type_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
#ifdef _WIN32
- .pNext = p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
+ .pNext = p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL,
#else
- .pNext = p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
+ .pNext = p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL,
#endif
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
.initialValue = 0,
@@ -2325,7 +1980,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
hwctx->alloc, &f->img[i]);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR(EINVAL);
goto fail;
}
@@ -2335,7 +1990,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
hwctx->alloc, &f->sem[i]);
if (ret != VK_SUCCESS) {
av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
@@ -2366,11 +2021,11 @@ static void try_export_flags(AVHWFramesContext *hwfc,
AVVulkanFramesContext *hwctx = hwfc->hwctx;
AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
- vk_find_struct(hwctx->create_pnext,
- VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
+ ff_vk_find_struct(hwctx->create_pnext,
+ VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
int nb_mods;
@@ -2437,12 +2092,12 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
};
#ifdef _WIN32
- if (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
+ if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater()
? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
: VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
#else
- if (p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
+ if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
try_export_flags(hwfc, &eiinfo.handleTypes, &e,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
#endif
@@ -2464,11 +2119,11 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) &&
!(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
- err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DPB);
+ err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB);
else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)
- err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_DECODING_DST);
+ err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST);
else
- err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_WRITE);
+ err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE);
if (err)
goto fail;
@@ -2496,6 +2151,7 @@ static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
{
+ VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
VulkanFramesPriv *fp = hwfc->internal->priv;
if (fp->modifier_info) {
@@ -2504,9 +2160,9 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
av_freep(&fp->modifier_info);
}
- free_exec_ctx(hwfc, &fp->conv_ctx);
- free_exec_ctx(hwfc, &fp->upload_ctx);
- free_exec_ctx(hwfc, &fp->download_ctx);
+ ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec);
+ ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec);
+ ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec);
}
static int vulkan_frames_init(AVHWFramesContext *hwfc)
@@ -2515,7 +2171,6 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
AVVkFrame *f;
AVVulkanFramesContext *hwctx = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
- AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
VkImageUsageFlagBits supported_usage;
const struct FFVkFormatEntry *fmt;
@@ -2600,20 +2255,18 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
if (!hwctx->unlock_frame)
hwctx->unlock_frame = unlock_frame;
- err = create_exec_ctx(hwfc, &fp->conv_ctx,
- dev_hwctx->queue_family_comp_index,
- dev_hwctx->nb_comp_queues);
+ err = ff_vk_exec_pool_init(&p->vkctx, &p->compute_qf, &fp->compute_exec,
+ p->compute_qf.nb_queues*4, 0, 0, 0, NULL);
if (err)
return err;
- err = create_exec_ctx(hwfc, &fp->upload_ctx,
- dev_hwctx->queue_family_tx_index,
- dev_hwctx->nb_tx_queues);
+ err = ff_vk_exec_pool_init(&p->vkctx, &p->transfer_qf, &fp->upload_exec,
+ p->transfer_qf.nb_queues*4, 0, 0, 0, NULL);
if (err)
return err;
- err = create_exec_ctx(hwfc, &fp->download_ctx,
- dev_hwctx->queue_family_tx_index, 1);
+ err = ff_vk_exec_pool_init(&p->vkctx, &p->transfer_qf, &fp->download_exec,
+ p->transfer_qf.nb_queues*4, 0, 0, 0, NULL);
if (err)
return err;
@@ -2673,7 +2326,7 @@ static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwma
AVVkFrame *f = hwmap->priv;
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
const int nb_images = ff_vk_count_images(f);
VkSemaphoreWaitInfo wait_info = {
@@ -2740,8 +2393,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
AVHWDeviceContext *ctx = hwfc->device_ctx;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
- AVVulkanFramesContext *hwfctx = hwfc->hwctx;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
VulkanFramesPriv *fp = hwfc->internal->priv;
const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES];
@@ -2845,7 +2497,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
&fmt_props, &props_ret);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
@@ -2868,7 +2520,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
hwctx->alloc, &f->img[i]);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR(EINVAL);
goto fail;
}
@@ -2877,7 +2529,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
hwctx->alloc, &f->sem[i]);
if (ret != VK_SUCCESS) {
av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
@@ -2931,7 +2583,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
idesc.fd, &fdmp);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL;
close(idesc.fd);
goto fail;
@@ -2983,12 +2635,12 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
- err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_IMPORT);
+ err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT);
if (err)
goto fail;
@@ -3088,7 +2740,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data;
AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx;
@@ -3148,7 +2800,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
&ext_desc.handle.win32.handle);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
@@ -3176,7 +2828,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
&ext_desc.handle.fd);
if (ret != VK_SUCCESS) {
av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
@@ -3219,7 +2871,7 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
#endif
if (ret != VK_SUCCESS) {
av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
- vk_ret2str(ret));
+ ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL;
goto fail;
}
@@ -3267,7 +2919,7 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
dst_f = (AVVkFrame *)dst->data[0];
- err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT);
+ err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
if (err < 0)
return err;
@@ -3327,7 +2979,7 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
av_log(hwfc, AV_LOG_VERBOSE, "Transfered CUDA image to Vulkan!\n");
- return err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT);
+ return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
fail:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -3347,13 +2999,13 @@ static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
#if CONFIG_LIBDRM
#if CONFIG_VAAPI
case AV_PIX_FMT_VAAPI:
- if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
+ if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
return vulkan_map_from_vaapi(hwfc, dst, src, flags);
else
return AVERROR(ENOSYS);
#endif
case AV_PIX_FMT_DRM_PRIME:
- if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
+ if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
return vulkan_map_from_drm(hwfc, dst, src, flags);
else
return AVERROR(ENOSYS);
@@ -3394,7 +3046,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
VkResult ret;
AVVkFrame *f = (AVVkFrame *)src->data[0];
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
VulkanFramesPriv *fp = hwfc->internal->priv;
AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
AVVulkanFramesContext *hwfctx = hwfc->hwctx;
@@ -3412,7 +3064,7 @@ static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
if (!drm_desc)
return AVERROR(ENOMEM);
- err = prepare_frame(hwfc, &fp->conv_ctx, f, PREP_MODE_EXTERNAL_EXPORT);
+ err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT);
if (err < 0)
goto end;
@@ -3533,13 +3185,13 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
switch (dst->format) {
#if CONFIG_LIBDRM
case AV_PIX_FMT_DRM_PRIME:
- if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
+ if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
return vulkan_map_to_drm(hwfc, dst, src, flags);
else
return AVERROR(ENOSYS);
#if CONFIG_VAAPI
case AV_PIX_FMT_VAAPI:
- if (p->extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
+ if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS)
return vulkan_map_to_vaapi(hwfc, dst, src, flags);
else
return AVERROR(ENOSYS);
@@ -3551,29 +3203,6 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
return AVERROR(ENOSYS);
}
-typedef struct ImageBuffer {
- VkBuffer buf;
- VkDeviceMemory mem;
- VkMemoryPropertyFlagBits flags;
- int mapped_mem;
-} ImageBuffer;
-
-static void free_buf(void *opaque, uint8_t *data)
-{
- AVHWDeviceContext *ctx = opaque;
- AVVulkanDeviceContext *hwctx = ctx->hwctx;
- VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
- ImageBuffer *vkbuf = (ImageBuffer *)data;
-
- if (vkbuf->buf)
- vk->DestroyBuffer(hwctx->act_dev, vkbuf->buf, hwctx->alloc);
- if (vkbuf->mem)
- vk->FreeMemory(hwctx->act_dev, vkbuf->mem, hwctx->alloc);
-
- av_free(data);
-}
-
static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
{
size_t size;
@@ -3583,202 +3212,6 @@ static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
return size;
}
-static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
- VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
- size_t size, uint32_t req_memory_bits, int host_mapped,
- void *create_pnext, void *alloc_pnext)
-{
- int err;
- VkResult ret;
- int use_ded_mem;
- AVVulkanDeviceContext *hwctx = ctx->hwctx;
- VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
-
- VkBufferCreateInfo buf_spawn = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .pNext = create_pnext,
- .usage = usage,
- .size = size,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- };
-
- VkBufferMemoryRequirementsInfo2 req_desc = {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
- };
- VkMemoryDedicatedAllocateInfo ded_alloc = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
- .pNext = alloc_pnext,
- };
- VkMemoryDedicatedRequirements ded_req = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
- };
- VkMemoryRequirements2 req = {
- .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
- .pNext = &ded_req,
- };
-
- ImageBuffer *vkbuf = av_mallocz(sizeof(*vkbuf));
- if (!vkbuf)
- return AVERROR(ENOMEM);
-
- vkbuf->mapped_mem = host_mapped;
-
- ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf);
- if (ret != VK_SUCCESS) {
- av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
- vk_ret2str(ret));
- err = AVERROR_EXTERNAL;
- goto fail;
- }
-
- req_desc.buffer = vkbuf->buf;
-
- vk->GetBufferMemoryRequirements2(hwctx->act_dev, &req_desc, &req);
-
- /* In case the implementation prefers/requires dedicated allocation */
- use_ded_mem = ded_req.prefersDedicatedAllocation |
- ded_req.requiresDedicatedAllocation;
- if (use_ded_mem)
- ded_alloc.buffer = vkbuf->buf;
-
- /* Additional requirements imposed on us */
- if (req_memory_bits)
- req.memoryRequirements.memoryTypeBits &= req_memory_bits;
-
- err = alloc_mem(ctx, &req.memoryRequirements, flags,
- use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
- &vkbuf->flags, &vkbuf->mem);
- if (err)
- goto fail;
-
- ret = vk->BindBufferMemory(hwctx->act_dev, vkbuf->buf, vkbuf->mem, 0);
- if (ret != VK_SUCCESS) {
- av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
- vk_ret2str(ret));
- err = AVERROR_EXTERNAL;
- goto fail;
- }
-
- *buf = av_buffer_create((uint8_t *)vkbuf, sizeof(*vkbuf), free_buf, ctx, 0);
- if (!(*buf)) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
-
- return 0;
-
-fail:
- free_buf(ctx, (uint8_t *)vkbuf);
- return err;
-}
-
-/* Skips mapping of host mapped buffers but still invalidates them */
-static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[],
- int nb_buffers, int invalidate)
-{
- VkResult ret;
- AVVulkanDeviceContext *hwctx = ctx->hwctx;
- VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
- VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
- int invalidate_count = 0;
-
- for (int i = 0; i < nb_buffers; i++) {
- ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
- if (vkbuf->mapped_mem)
- continue;
-
- ret = vk->MapMemory(hwctx->act_dev, vkbuf->mem, 0,
- VK_WHOLE_SIZE, 0, (void **)&mem[i]);
- if (ret != VK_SUCCESS) {
- av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
- vk_ret2str(ret));
- return AVERROR_EXTERNAL;
- }
- }
-
- if (!invalidate)
- return 0;
-
- for (int i = 0; i < nb_buffers; i++) {
- ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
- const VkMappedMemoryRange ival_buf = {
- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = vkbuf->mem,
- .size = VK_WHOLE_SIZE,
- };
-
- /* For host imported memory Vulkan says to use platform-defined
- * sync methods, but doesn't really say not to call flush or invalidate
- * on original host pointers. It does explicitly allow to do that on
- * host-mapped pointers which are then mapped again using vkMapMemory,
- * but known implementations return the original pointers when mapped
- * again. */
- if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
- continue;
-
- invalidate_ctx[invalidate_count++] = ival_buf;
- }
-
- if (invalidate_count) {
- ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
- invalidate_ctx);
- if (ret != VK_SUCCESS)
- av_log(ctx, AV_LOG_WARNING, "Failed to invalidate memory: %s\n",
- vk_ret2str(ret));
- }
-
- return 0;
-}
-
-static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
- int nb_buffers, int flush)
-{
- int err = 0;
- VkResult ret;
- AVVulkanDeviceContext *hwctx = ctx->hwctx;
- VulkanDevicePriv *p = ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
- VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
- int flush_count = 0;
-
- if (flush) {
- for (int i = 0; i < nb_buffers; i++) {
- ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
- const VkMappedMemoryRange flush_buf = {
- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = vkbuf->mem,
- .size = VK_WHOLE_SIZE,
- };
-
- if (vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
- continue;
-
- flush_ctx[flush_count++] = flush_buf;
- }
- }
-
- if (flush_count) {
- ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
- if (ret != VK_SUCCESS) {
- av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
- vk_ret2str(ret));
- err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
- }
- }
-
- for (int i = 0; i < nb_buffers; i++) {
- ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
- if (vkbuf->mapped_mem)
- continue;
-
- vk->UnmapMemory(hwctx->act_dev, vkbuf->mem);
- }
-
- return err;
-}
-
static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
AVBufferRef **bufs, size_t *buf_offsets,
const int *buf_stride, int w,
@@ -3786,86 +3219,46 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
{
int err;
AVVkFrame *frame = (AVVkFrame *)f->data[0];
- AVVulkanFramesContext *vkfc = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
-
- int bar_num = 0;
- VkPipelineStageFlagBits sem_wait_dst[AV_NUM_DATA_POINTERS];
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
+ VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
+ int nb_img_bar = 0;
const int nb_images = ff_vk_count_images(frame);
int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt);
-
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
- VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
- VulkanExecCtx *ectx = to_buf ? &fp->download_ctx : &fp->upload_ctx;
- VkCommandBuffer cmd_buf = get_buf_exec_ctx(hwfc, ectx);
-
- uint64_t sem_signal_values[AV_NUM_DATA_POINTERS];
+ VkCommandBuffer cmd_buf;
+ FFVkExecContext *exec = ff_vk_exec_get(to_buf ? &fp->download_exec :
+ &fp->upload_exec);
+ cmd_buf = exec->buf;
+ ff_vk_exec_start(&p->vkctx, exec);
- VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
- .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
- .pWaitSemaphoreValues = frame->sem_value,
- .pSignalSemaphoreValues = sem_signal_values,
- .waitSemaphoreValueCount = nb_images,
- .signalSemaphoreValueCount = nb_images,
- };
-
- VkSubmitInfo s_info = {
- .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = &s_timeline_sem_info,
- .pSignalSemaphores = frame->sem,
- .pWaitSemaphores = frame->sem,
- .pWaitDstStageMask = sem_wait_dst,
- .signalSemaphoreCount = nb_images,
- .waitSemaphoreCount = nb_images,
- };
-
- vkfc->lock_frame(hwfc, frame);
-
- if ((err = wait_start_exec_ctx(hwfc, ectx)))
- goto end;
-
- for (int i = 0; i < nb_images; i++)
- sem_signal_values[i] = frame->sem_value[i] + 1;
-
- /* Change the image layout to something more optimal for transfers */
- for (int i = 0; i < nb_images; i++) {
- VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
- VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
- VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
- VK_ACCESS_TRANSFER_WRITE_BIT;
-
- sem_wait_dst[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, pixfmt_planes, 1);
+ if (err < 0)
+ return err;
- /* If the layout matches and we have read access skip the barrier */
- if ((frame->layout[i] == new_layout) && (frame->access[i] & new_access))
- continue;
+ err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, f,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_TRANSFER_BIT);
+ if (err < 0)
+ return err;
- img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
- img_bar[bar_num].srcAccessMask = 0x0;
- img_bar[bar_num].dstAccessMask = new_access;
- img_bar[bar_num].oldLayout = frame->layout[i];
- img_bar[bar_num].newLayout = new_layout;
- img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
- img_bar[bar_num].image = frame->img[i];
- img_bar[bar_num].subresourceRange.levelCount = 1;
- img_bar[bar_num].subresourceRange.layerCount = 1;
- img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-
- frame->layout[i] = img_bar[bar_num].newLayout;
- frame->access[i] = img_bar[bar_num].dstAccessMask;
-
- bar_num++;
- }
+ ff_vk_frame_barrier(&p->vkctx, exec, f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
+ to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
- if (bar_num)
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
- 0, NULL, 0, NULL, bar_num, img_bar);
+ vk->CmdPipelineBarrier2KHR(cmd_buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
/* Schedule a copy for each plane */
for (int i = 0; i < pixfmt_planes; i++) {
@@ -3875,7 +3268,7 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
VK_IMAGE_ASPECT_PLANE_1_BIT,
VK_IMAGE_ASPECT_PLANE_2_BIT, };
- ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
+ FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[i]->data;
VkBufferImageCopy buf_reg = {
.bufferOffset = buf_offsets[i],
.bufferRowLength = buf_stride[i] / desc->comp[i].step,
@@ -3885,44 +3278,32 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
.imageOffset = { 0, 0, 0, },
};
- int p_w, p_h;
+ uint32_t p_w, p_h;
get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
buf_reg.bufferImageHeight = p_h;
buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
if (to_buf)
- vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], frame->layout[idx],
- vkbuf->buf, 1, &buf_reg);
+ vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx],
+ img_bar[0].newLayout,
+ vkbuf->buf,
+ 1, &buf_reg);
else
vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx],
- frame->layout[idx], 1, &buf_reg);
+ img_bar[0].newLayout,
+ 1, &buf_reg);
}
- /* When uploading, do this asynchronously if the source is refcounted by
- * keeping the buffers as a submission dependency.
- * The hwcontext is guaranteed to not be freed until all frames are freed
- * in the frames_unint function.
- * When downloading to buffer, do this synchronously and wait for the
- * queue submission to finish executing */
- if (!to_buf) {
- int ref;
- for (ref = 0; ref < AV_NUM_DATA_POINTERS; ref++) {
- if (!f->buf[ref])
- break;
- if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
- goto end;
- }
- if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, pixfmt_planes)))
- goto end;
- err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
- } else {
- err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
- }
+ err = ff_vk_exec_submit(&p->vkctx, exec);
+ if (err < 0)
+ return err;
-end:
- vkfc->unlock_frame(hwfc, frame);
- return err;
+ /* Wait for the operation to complete when downloading */
+ if (to_buf)
+ ff_vk_exec_wait(&p->vkctx, exec);
+
+ return 0;
}
static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
@@ -3930,22 +3311,21 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
{
int err = 0;
VkResult ret;
- AVVkFrame *f = (AVVkFrame *)vkf->data[0];
AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
- AVVulkanFramesContext *fc = hwfc->hwctx;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
- FFVulkanFunctions *vk = &p->vkfn;
+ FFVulkanFunctions *vk = &p->vkctx.vkfn;
AVFrame tmp;
+ FFVkBuffer *vkbufs[AV_NUM_DATA_POINTERS];
AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
- int p_w, p_h;
+ uint32_t p_w, p_h;
const int planes = av_pix_fmt_count_planes(swf->format);
int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
- const int map_host = !!(p->extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY);
+ const int map_host = !!(p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY);
if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
@@ -3992,8 +3372,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
import_desc.handleType,
import_desc.pHostPointer,
&p_props);
-
- if (ret == VK_SUCCESS) {
+ if (ret == VK_SUCCESS && p_props.memoryTypeBits) {
host_mapped[i] = 1;
buf_offsets[i] = offs;
}
@@ -4002,20 +3381,23 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
if (!host_mapped[i])
req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
- err = create_buf(dev_ctx, &bufs[i],
- from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
- VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
- req_size, p_props.memoryTypeBits, host_mapped[i],
- host_mapped[i] ? &create_desc : NULL,
- host_mapped[i] ? &import_desc : NULL);
- if (err)
+ err = ff_vk_create_avbuf(&p->vkctx, &bufs[i], req_size,
+ host_mapped[i] ? &create_desc : NULL,
+ host_mapped[i] ? &import_desc : NULL,
+ from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ (host_mapped[i] ?
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : 0x0));
+ if (err < 0)
goto end;
+
+ vkbufs[i] = (FFVkBuffer *)bufs[i]->data;
}
if (!from) {
/* Map, copy image TO buffer (which then goes to the VkImage), unmap */
- if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
+ if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0)))
goto end;
for (int i = 0; i < planes; i++) {
@@ -4030,7 +3412,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
p_h);
}
- if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
+ if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1)))
goto end;
}
@@ -4041,7 +3423,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
if (from) {
/* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
- if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
+ if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0)))
goto end;
for (int i = 0; i < planes; i++) {
@@ -4056,7 +3438,7 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
p_h);
}
- if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
+ if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1)))
goto end;
}
@@ -4076,11 +3458,11 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
#if CONFIG_CUDA
case AV_PIX_FMT_CUDA:
#ifdef _WIN32
- if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
- (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
+ if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
+ (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
#else
- if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
- (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
+ if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
+ (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
#endif
return vulkan_transfer_data_from_cuda(hwfc, dst, src);
#endif
@@ -4114,7 +3496,7 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
dst_f = (AVVkFrame *)src->data[0];
- err = prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_EXPORT);
+ err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT);
if (err < 0)
return err;
@@ -4174,7 +3556,7 @@ static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst,
av_log(hwfc, AV_LOG_VERBOSE, "Transfered Vulkan image to CUDA!\n");
- return prepare_frame(hwfc, &fp->upload_ctx, dst_f, PREP_MODE_EXTERNAL_IMPORT);
+ return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT);
fail:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -4194,11 +3576,11 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
#if CONFIG_CUDA
case AV_PIX_FMT_CUDA:
#ifdef _WIN32
- if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
- (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
+ if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
+ (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
#else
- if ((p->extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
- (p->extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
+ if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) &&
+ (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM))
#endif
return vulkan_transfer_data_to_cuda(hwfc, dst, src);
#endif
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 4de233b0e5..f654d676e5 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -266,6 +266,19 @@ static inline int ff_vk_count_images(AVVkFrame *f)
return cnt;
}
+static inline const void *ff_vk_find_struct(const void *chain, VkStructureType stype)
+{
+ const VkBaseInStructure *in = chain;
+ while (in) {
+ if (in->sType == stype)
+ return in;
+
+ in = in->pNext;
+ }
+
+ return NULL;
+}
+
/* Identity mapping - r = r, b = b, g = g, a = a */
extern const VkComponentMapping ff_comp_identity_map;
--
2.40.0
[-- Attachment #44: 0063-hwcontext_vulkan-move-VulkanFramesPriv-to-hwcontext_.patch --]
[-- Type: text/x-diff, Size: 4430 bytes --]
From acaaee034a43a7bb1461152477a5553a299435fd Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Mon, 6 Mar 2023 02:04:53 +0100
Subject: [PATCH 63/97] hwcontext_vulkan: move VulkanFramesPriv to
hwcontext_vulkan_internal
The issue is that during calls to avcodec_get_hw_frames_parameters,
avctx->internal->hwaccel_priv_data isn't guaranteed to exist.
However, we need to attach some structures to create_pnext, in order
to create frames we can decode into.
We have nowhere to put them, and they have to be valid for the entire lifetime.
Hence, we put them into the hwcontext's frames priv structure.
---
libavutil/Makefile | 1 +
libavutil/hwcontext_vulkan.c | 15 ++-------
libavutil/hwcontext_vulkan_internal.h | 46 +++++++++++++++++++++++++++
3 files changed, 50 insertions(+), 12 deletions(-)
create mode 100644 libavutil/hwcontext_vulkan_internal.h
diff --git a/libavutil/Makefile b/libavutil/Makefile
index bd9c6f9e32..fbcb2d2a88 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -216,6 +216,7 @@ SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h
SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h
SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h vulkan.h \
+ hwcontext_vulkan_internal.h \
vulkan_functions.h \
vulkan_loader.h
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 85ac48f307..2910208df1 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -40,6 +40,7 @@
#include "avassert.h"
#include "hwcontext_internal.h"
#include "hwcontext_vulkan.h"
+#include "hwcontext_vulkan_internal.h"
#include "vulkan.h"
#include "vulkan_loader.h"
@@ -119,18 +120,6 @@ typedef struct VulkanDevicePriv {
int dev_is_nvidia;
} VulkanDevicePriv;
-typedef struct VulkanFramesPriv {
- /* Image conversions */
- FFVkExecPool compute_exec;
-
- /* Image transfers */
- FFVkExecPool upload_exec;
- FFVkExecPool download_exec;
-
- /* Modifier info list to free at uninit */
- VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
-} VulkanFramesPriv;
-
typedef struct AVVkFrameInternal {
pthread_mutex_t update_mutex;
@@ -2163,6 +2152,8 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec);
ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec);
ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec);
+
+ av_freep(&fp->video_profile_data);
}
static int vulkan_frames_init(AVHWFramesContext *hwfc)
diff --git a/libavutil/hwcontext_vulkan_internal.h b/libavutil/hwcontext_vulkan_internal.h
new file mode 100644
index 0000000000..1fdf1663f5
--- /dev/null
+++ b/libavutil/hwcontext_vulkan_internal.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HWCONTEXT_VULKAN_INTERNAL_H
+#define AVUTIL_HWCONTEXT_VULKAN_INTERNAL_H
+
+/**
+ * @file
+ * FFmpeg internal API for CUDA.
+ */
+
+#include "vulkan.h"
+
+typedef struct VulkanFramesPriv {
+ /* Image conversions */
+ FFVkExecPool compute_exec;
+
+ /* Image transfers */
+ FFVkExecPool upload_exec;
+ FFVkExecPool download_exec;
+
+ /* Modifier info list to free at uninit */
+ VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
+
+ /* Used by the decoder in case there's no contex */
+ void *video_profile_data;
+} VulkanFramesPriv;
+
+#endif /* AVUTIL_HWCONTEXT_VULKAN_INTERNAL_H */
--
2.40.0
[-- Attachment #45: 0064-hwcontext_vulkan-enable-additional-device-properties.patch --]
[-- Type: text/x-diff, Size: 2579 bytes --]
From d2ecb0a83fa770866ba396a41a3b0c05e9c9f418 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 14 Mar 2023 21:38:55 +0100
Subject: [PATCH 64/97] hwcontext_vulkan: enable additional device properties
---
libavutil/hwcontext_vulkan.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 2910208df1..f66aa43c11 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1234,6 +1234,8 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
COPY_FEATURE(hwctx->device_features, fragmentStoresAndAtomics)
COPY_FEATURE(hwctx->device_features, vertexPipelineStoresAndAtomics)
COPY_FEATURE(hwctx->device_features, shaderInt64)
+ COPY_FEATURE(hwctx->device_features, shaderInt16)
+ COPY_FEATURE(hwctx->device_features, shaderFloat64)
#undef COPY_FEATURE
/* We require timeline semaphores */
@@ -1242,10 +1244,23 @@ static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
err = AVERROR(ENOSYS);
goto end;
}
+
+ p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
+ p->device_features_1_1.storagePushConstant16 = dev_features_1_1.storagePushConstant16;
+
p->device_features_1_2.timelineSemaphore = 1;
p->device_features_1_2.bufferDeviceAddress = dev_features_1_2.bufferDeviceAddress;
- p->device_features_1_1.samplerYcbcrConversion = dev_features_1_1.samplerYcbcrConversion;
+ p->device_features_1_2.storagePushConstant8 = dev_features_1_2.storagePushConstant8;
+ p->device_features_1_2.shaderInt8 = dev_features_1_2.shaderInt8;
+ p->device_features_1_2.storageBuffer8BitAccess = dev_features_1_2.storageBuffer8BitAccess;
+ p->device_features_1_2.uniformAndStorageBuffer8BitAccess = dev_features_1_2.uniformAndStorageBuffer8BitAccess;
+ p->device_features_1_2.shaderFloat16 = dev_features_1_2.shaderFloat16;
+ p->device_features_1_2.shaderSharedInt64Atomics = dev_features_1_2.shaderSharedInt64Atomics;
+
p->device_features_1_3.synchronization2 = dev_features_1_3.synchronization2;
+ p->device_features_1_3.computeFullSubgroups = dev_features_1_3.computeFullSubgroups;
+ p->device_features_1_3.shaderZeroInitializeWorkgroupMemory = dev_features_1_3.shaderZeroInitializeWorkgroupMemory;
+
p->desc_buf_features.descriptorBuffer = desc_buf_features.descriptorBuffer;
p->desc_buf_features.descriptorBufferPushDescriptors = desc_buf_features.descriptorBufferPushDescriptors;
--
2.40.0
[-- Attachment #46: 0065-lavfi-add-lavfi-only-Vulkan-infrastructure.patch --]
[-- Type: text/x-diff, Size: 32252 bytes --]
From 3e0838ce03f8dcbd137cc24234035cef93ea1b06 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:10:58 +0100
Subject: [PATCH 65/97] lavfi: add lavfi-only Vulkan infrastructure
---
libavfilter/Makefile | 6 +
libavfilter/vulkan_filter.c | 480 +++++++++++++++-----
libavfilter/vulkan_filter.h | 39 +-
{libavutil => libavfilter}/vulkan_glslang.c | 19 +-
{libavutil => libavfilter}/vulkan_shaderc.c | 8 +-
libavfilter/vulkan_spirv.h | 45 ++
6 files changed, 473 insertions(+), 124 deletions(-)
rename {libavutil => libavfilter}/vulkan_glslang.c (95%)
rename {libavutil => libavfilter}/vulkan_shaderc.c (96%)
create mode 100644 libavfilter/vulkan_spirv.h
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 71e198bbf9..4c386bc158 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -618,6 +618,10 @@ OBJS-$(CONFIG_AVSYNCTEST_FILTER) += src_avsynctest.o
OBJS-$(CONFIG_AMOVIE_FILTER) += src_movie.o
OBJS-$(CONFIG_MOVIE_FILTER) += src_movie.o
+# vulkan libs
+OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o
+OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o
+
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o
@@ -631,6 +635,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h stack_internal.h
SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h
SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_filter.h
+SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h
+SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h
TOOLS = graph2dot
TESTPROGS = drawutils filtfmts formats integral
diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c
index e22541bd23..a88b72c7e6 100644
--- a/libavfilter/vulkan_filter.c
+++ b/libavfilter/vulkan_filter.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -18,107 +20,186 @@
#include "vulkan_filter.h"
-static int vulkan_filter_set_device(AVFilterContext *avctx,
- AVBufferRef *device)
+int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s,
+ AVBufferRef *frames_ref,
+ int width, int height, enum AVPixelFormat sw_format)
{
- FFVulkanContext *s = avctx->priv;
+ int err;
+ AVHWFramesContext *frames_ctx;
+ AVHWDeviceContext *device_ctx;
+ AVVulkanFramesContext *vk_frames;
+ AVVulkanDeviceContext *vk_dev;
+ AVBufferRef *device_ref = avctx->hw_device_ctx;
+
+ /* Check if context is reusable as-is */
+ if (frames_ref) {
+ int no_storage = 0;
+ FFVulkanFunctions *vk;
+ const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format);
+
+ frames_ctx = (AVHWFramesContext *)frames_ref->data;
+ device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
+ vk_frames = frames_ctx->hwctx;
+ vk_dev = device_ctx->hwctx;
+
+ /* Basic format validation */
+ if (width != frames_ctx->width ||
+ height != frames_ctx->height ||
+ sw_format != frames_ctx->sw_format ||
+ (vk_frames->tiling != VK_IMAGE_TILING_LINEAR &&
+ vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL) ||
+ !(vk_frames->usage & VK_IMAGE_USAGE_SAMPLED_BIT)) {
+ goto skip;
+ }
- av_buffer_unref(&s->device_ref);
+ if (vk_frames->usage & VK_IMAGE_USAGE_STORAGE_BIT)
+ goto accept;
- s->device_ref = av_buffer_ref(device);
- if (!s->device_ref)
- return AVERROR(ENOMEM);
+ s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
+ vk_dev->nb_enabled_dev_extensions);
+ err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
+ if (err < 0)
+ return err;
+ vk = &s->vkfn;
+
+ /* Check if the subformats can do storage */
+ for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) {
+ VkFormatProperties2 prop = {
+ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+ };
+ vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i],
+ &prop);
+
+ if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) {
+ no_storage |= !(prop.formatProperties.linearTilingFeatures &
+ VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
+ } else {
+ no_storage |= !(prop.formatProperties.optimalTilingFeatures &
+ VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
+ }
+ }
- s->device = (AVHWDeviceContext*)s->device_ref->data;
- s->hwctx = s->device->hwctx;
+ /* Check if it's usable */
+ if (no_storage) {
+skip:
+ device_ref = frames_ctx->device_ref;
+ frames_ref = NULL;
+ } else {
+accept:
+ frames_ref = av_buffer_ref(frames_ref);
+ if (!frames_ref)
+ return AVERROR(ENOMEM);
+ }
+ }
- return 0;
-}
+ if (!frames_ref) {
+ if (!device_ref) {
+ av_log(avctx, AV_LOG_ERROR,
+ "Vulkan filtering requires a device context!\n");
+ return AVERROR(EINVAL);
+ }
-static int vulkan_filter_set_frames(AVFilterContext *avctx,
- AVBufferRef *frames)
-{
- FFVulkanContext *s = avctx->priv;
+ frames_ref = av_hwframe_ctx_alloc(device_ref);
- av_buffer_unref(&s->frames_ref);
+ frames_ctx = (AVHWFramesContext *)frames_ref->data;
+ frames_ctx->format = AV_PIX_FMT_VULKAN;
+ frames_ctx->sw_format = sw_format;
+ frames_ctx->width = width;
+ frames_ctx->height = height;
- s->frames_ref = av_buffer_ref(frames);
- if (!s->frames_ref)
- return AVERROR(ENOMEM);
+ vk_frames = frames_ctx->hwctx;
+ vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
+ vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT;
- return 0;
+ err = av_hwframe_ctx_init(frames_ref);
+ if (err < 0) {
+ av_buffer_unref(&frames_ref);
+ return err;
+ }
+
+ device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
+ vk_dev = device_ctx->hwctx;
+ }
+
+ s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
+ vk_dev->nb_enabled_dev_extensions);
+
+ /**
+ * libplacebo does not use descriptor buffers.
+ */
+ if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) &&
+ strcmp(avctx->filter->name, "libplacebo")) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that "
+ "the %s extension is supported!\n",
+ VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME);
+ av_buffer_unref(&frames_ref);
+ return AVERROR(EINVAL);
+ }
+
+ err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
+ if (err < 0) {
+ av_buffer_unref(&frames_ref);
+ return err;
+ }
+
+ s->frames_ref = frames_ref;
+ s->frames = frames_ctx;
+ s->hwfc = vk_frames;
+ s->device = device_ctx;
+ s->hwctx = device_ctx->hwctx;
+
+ err = ff_vk_load_props(s);
+ if (err < 0)
+ av_buffer_unref(&s->frames_ref);
+
+ return err;
}
int ff_vk_filter_config_input(AVFilterLink *inlink)
{
- int err;
- AVFilterContext *avctx = inlink->dst;
- FFVulkanContext *s = avctx->priv;
- FFVulkanFunctions *vk = &s->vkfn;
AVHWFramesContext *input_frames;
+ AVFilterContext *avctx = inlink->dst;
+ FFVulkanContext *s = inlink->dst->priv;
if (!inlink->hw_frames_ctx) {
- av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a "
"hardware frames context on the input.\n");
return AVERROR(EINVAL);
}
- /* Extract the device and default output format from the first input. */
- if (avctx->inputs[0] != inlink)
- return 0;
-
input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data;
if (input_frames->format != AV_PIX_FMT_VULKAN)
return AVERROR(EINVAL);
- err = vulkan_filter_set_device(avctx, input_frames->device_ref);
- if (err < 0)
- return err;
- err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx);
- if (err < 0)
- return err;
-
- s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions,
- s->hwctx->nb_enabled_dev_extensions);
-
- err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1);
- if (err < 0)
- return err;
+ /* Extract the device and default output format from the first input. */
+ if (avctx->inputs[0] != inlink)
+ return 0;
- vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props);
- vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
+ /* Save the ref, without reffing it */
+ s->input_frames_ref = inlink->hw_frames_ctx;
- /* Default output parameters match input parameters. */
- s->input_format = input_frames->sw_format;
- if (s->output_format == AV_PIX_FMT_NONE)
- s->output_format = input_frames->sw_format;
- if (!s->output_width)
- s->output_width = inlink->w;
- if (!s->output_height)
- s->output_height = inlink->h;
+ /* Defaults */
+ s->output_format = input_frames->sw_format;
+ s->output_width = inlink->w;
+ s->output_height = inlink->h;
return 0;
}
-int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
+int ff_vk_filter_config_output(AVFilterLink *outlink)
{
int err;
- AVFilterContext *avctx = outlink->src;
- FFVulkanContext *s = avctx->priv;
+ FFVulkanContext *s = outlink->src->priv;
av_buffer_unref(&outlink->hw_frames_ctx);
- if (!s->device_ref) {
- if (!avctx->hw_device_ctx) {
- av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
- "Vulkan device.\n");
- return AVERROR(EINVAL);
- }
-
- err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
- if (err < 0)
- return err;
- }
+ err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref,
+ s->output_width, s->output_height,
+ s->output_format);
+ if (err < 0)
+ return err;
outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
if (!outlink->hw_frames_ctx)
@@ -127,65 +208,246 @@ int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
outlink->w = s->output_width;
outlink->h = s->output_height;
- return 0;
+ return err;
}
-int ff_vk_filter_config_output(AVFilterLink *outlink)
+int ff_vk_filter_init(AVFilterContext *avctx)
{
- int err;
- AVFilterContext *avctx = outlink->src;
FFVulkanContext *s = avctx->priv;
- AVBufferRef *output_frames_ref;
- AVHWFramesContext *output_frames;
-
- av_buffer_unref(&outlink->hw_frames_ctx);
- if (!s->device_ref) {
- if (!avctx->hw_device_ctx) {
- av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
- "Vulkan device.\n");
- return AVERROR(EINVAL);
- }
+ s->output_format = AV_PIX_FMT_NONE;
- err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
- if (err < 0)
- return err;
- }
+ return 0;
+}
- output_frames_ref = av_hwframe_ctx_alloc(s->device_ref);
- if (!output_frames_ref) {
- err = AVERROR(ENOMEM);
- goto fail;
+int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
+ VkSampler sampler, void *push_src, size_t push_size)
+{
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in_views[AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ if (in_f) {
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f));
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ sampler);
+ ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
}
- output_frames = (AVHWFramesContext*)output_frames_ref->data;
- output_frames->format = AV_PIX_FMT_VULKAN;
- output_frames->sw_format = s->output_format;
- output_frames->width = s->output_width;
- output_frames->height = s->output_height;
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f));
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+ ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[2]);
+
+ return ff_vk_exec_submit(vkctx, exec);
+fail:
+ ff_vk_exec_discard_deps(vkctx, exec);
+ return err;
+}
- err = av_hwframe_ctx_init(output_frames_ref);
- if (err < 0) {
- av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
- "frames: %d.\n", err);
- goto fail;
+int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pls[2],
+ AVFrame *out, AVFrame *tmp, AVFrame *in,
+ VkSampler sampler, void *push_src, size_t push_size)
+{
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in_views[AV_NUM_DATA_POINTERS];
+ VkImageView tmp_views[AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+ RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
+ RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
+
+ ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ for (int i = 0; i < 2; i++) {
+ FFVulkanPipeline *pl = pls[i];
+ AVFrame *src_f = !i ? in : tmp;
+ AVFrame *dst_f = !i ? tmp : out;
+ VkImageView *src_views = !i ? in_views : tmp_views;
+ VkImageView *dst_views = !i ? tmp_views : out_views;
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0,
+ !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
+ VK_IMAGE_LAYOUT_GENERAL,
+ sampler);
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[2]);
}
- outlink->hw_frames_ctx = output_frames_ref;
- outlink->w = s->output_width;
- outlink->h = s->output_height;
-
- return 0;
+ return ff_vk_exec_submit(vkctx, exec);
fail:
- av_buffer_unref(&output_frames_ref);
+ ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
-int ff_vk_filter_init(AVFilterContext *avctx)
+int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl,
+ AVFrame *out, AVFrame *in[], int nb_in,
+ VkSampler sampler, void *push_src, size_t push_size)
{
- FFVulkanContext *s = avctx->priv;
-
- s->output_format = AV_PIX_FMT_NONE;
+ int err = 0;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+ VkImageView in_views[16][AV_NUM_DATA_POINTERS];
+ VkImageView out_views[AV_NUM_DATA_POINTERS];
+ VkImageMemoryBarrier2 img_bar[128];
+ int nb_img_bar = 0;
+
+ /* Update descriptors and init the exec context */
+ FFVkExecContext *exec = ff_vk_exec_get(e);
+ ff_vk_exec_start(vkctx, exec);
+
+ /* Inputs */
+ for (int i = 0; i < nb_in; i++) {
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i],
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i]));
+
+ ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ }
- return 0;
+ /* Output */
+ RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
+ ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2KHR(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+
+ ff_vk_exec_bind_pipeline(vkctx, exec, pl);
+
+ if (push_src)
+ ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, push_size, push_src);
+
+ for (int i = 0; i < nb_in; i++)
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ sampler);
+
+ ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
+ FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
+ pl->wg_size[2]);
+
+ return ff_vk_exec_submit(vkctx, exec);
+fail:
+ ff_vk_exec_discard_deps(vkctx, exec);
+ return err;
}
diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h
index bfdb9b2d7d..d2c14601d9 100644
--- a/libavfilter/vulkan_filter.h
+++ b/libavfilter/vulkan_filter.h
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -26,9 +28,38 @@
/**
* General lavfi IO functions
*/
-int ff_vk_filter_init (AVFilterContext *avctx);
-int ff_vk_filter_config_input (AVFilterLink *inlink);
-int ff_vk_filter_config_output (AVFilterLink *outlink);
-int ff_vk_filter_config_output_inplace(AVFilterLink *outlink);
+int ff_vk_filter_init (AVFilterContext *avctx);
+int ff_vk_filter_config_input (AVFilterLink *inlink);
+int ff_vk_filter_config_output(AVFilterLink *outlink);
+
+/**
+ * Can be called manually, if not using ff_vk_filter_config_output.
+ */
+int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s,
+ AVBufferRef *frames_ref,
+ int width, int height, enum AVPixelFormat sw_format);
+
+/**
+ * Submit a compute shader with a zero/one input and single out for execution.
+ */
+int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
+ VkSampler sampler, void *push_src, size_t push_size);
+
+/**
+ * Submit a compute shader with a single in and single out with 2 stages.
+ */
+int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pls[2],
+ AVFrame *out, AVFrame *tmp, AVFrame *in,
+ VkSampler sampler, void *push_src, size_t push_size);
+
+/**
+ * Up to 16 inputs, one output
+ */
+int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e,
+ FFVulkanPipeline *pl,
+ AVFrame *out, AVFrame *in[], int nb_in,
+ VkSampler sampler, void *push_src, size_t push_size);
#endif /* AVFILTER_VULKAN_FILTER_H */
diff --git a/libavutil/vulkan_glslang.c b/libavfilter/vulkan_glslang.c
similarity index 95%
rename from libavutil/vulkan_glslang.c
rename to libavfilter/vulkan_glslang.c
index e7785f6d40..845a530ee0 100644
--- a/libavutil/vulkan_glslang.c
+++ b/libavfilter/vulkan_glslang.c
@@ -21,8 +21,9 @@
#include <glslang/build_info.h>
#include <glslang/Include/glslang_c_interface.h>
-#include "mem.h"
-#include "avassert.h"
+#include "vulkan_spirv.h"
+#include "libavutil/mem.h"
+#include "libavutil/avassert.h"
static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int glslc_refcount = 0;
@@ -176,11 +177,13 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
av_assert0(glslc_refcount);
+ *opaque = NULL;
+
if (!(glslc_shader = glslang_shader_create(&glslc_input)))
return AVERROR(ENOMEM);
if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n",
glslang_shader_get_info_log(glslc_shader),
glslang_shader_get_info_debug_log(glslc_shader));
@@ -189,7 +192,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
}
if (!glslang_shader_parse(glslc_shader, &glslc_input)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n",
glslang_shader_get_info_log(glslc_shader),
glslang_shader_get_info_debug_log(glslc_shader));
@@ -206,7 +209,7 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT |
GLSLANG_MSG_VULKAN_RULES_BIT)) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n",
glslang_program_get_info_log(glslc_program),
glslang_program_get_info_debug_log(glslc_program));
@@ -219,10 +222,10 @@ static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
messages = glslang_program_SPIRV_get_messages(glslc_program);
if (messages) {
- ff_vk_print_shader(avctx, shd, AV_LOG_WARNING);
+ ff_vk_shader_print(avctx, shd, AV_LOG_WARNING);
av_log(avctx, AV_LOG_WARNING, "%s\n", messages);
} else {
- ff_vk_print_shader(avctx, shd, AV_LOG_VERBOSE);
+ ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE);
}
glslang_shader_delete(glslc_shader);
@@ -257,7 +260,7 @@ static void glslc_uninit(FFVkSPIRVCompiler **ctx)
av_freep(ctx);
}
-static FFVkSPIRVCompiler *ff_vk_glslang_init(void)
+FFVkSPIRVCompiler *ff_vk_glslang_init(void)
{
FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret));
if (!ret)
diff --git a/libavutil/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c
similarity index 96%
rename from libavutil/vulkan_shaderc.c
rename to libavfilter/vulkan_shaderc.c
index bd40edf187..38be1030ad 100644
--- a/libavutil/vulkan_shaderc.c
+++ b/libavfilter/vulkan_shaderc.c
@@ -18,7 +18,8 @@
#include <shaderc/shaderc.h>
-#include "mem.h"
+#include "libavutil/mem.h"
+#include "vulkan_spirv.h"
static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
FFVkSPIRVShader *shd, uint8_t **data,
@@ -43,6 +44,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
};
shaderc_compile_options_t opts = shaderc_compile_options_initialize();
+ *opaque = NULL;
if (!opts)
return AVERROR(ENOMEM);
@@ -65,7 +67,7 @@ static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx,
loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE;
- ff_vk_print_shader(avctx, shd, loglevel);
+ ff_vk_shader_print(avctx, shd, loglevel);
if (message && (err || warn))
av_log(avctx, loglevel, "%s\n", message);
status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown";
@@ -104,7 +106,7 @@ static void shdc_uninit(FFVkSPIRVCompiler **ctx)
av_freep(ctx);
}
-static FFVkSPIRVCompiler *ff_vk_shaderc_init(void)
+FFVkSPIRVCompiler *ff_vk_shaderc_init(void)
{
FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret));
if (!ret)
diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h
new file mode 100644
index 0000000000..5638cd9696
--- /dev/null
+++ b/libavfilter/vulkan_spirv.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VULKAN_SPIRV_H
+#define AVFILTER_VULKAN_SPIRV_H
+
+#include "libavutil/vulkan.h"
+
+#include "vulkan.h"
+#include "config.h"
+
+typedef struct FFVkSPIRVCompiler {
+ void *priv;
+ int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx,
+ struct FFVkSPIRVShader *shd, uint8_t **data,
+ size_t *size, const char *entrypoint, void **opaque);
+ void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque);
+ void (*uninit)(struct FFVkSPIRVCompiler **ctx);
+} FFVkSPIRVCompiler;
+
+#if CONFIG_LIBGLSLANG
+FFVkSPIRVCompiler *ff_vk_glslang_init(void);
+#define ff_vk_spirv_init ff_vk_glslang_init
+#endif
+#if CONFIG_LIBSHADERC
+FFVkSPIRVCompiler *ff_vk_shaderc_init(void);
+#define ff_vk_spirv_init ff_vk_shaderc_init
+#endif
+
+#endif /* AVFILTER_VULKAN_H */
--
2.40.0
[-- Attachment #47: 0066-avgblur_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 22012 bytes --]
From 94413e67fab8083f134cfc90c7dd0efe0b23ff23 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:19 +0100
Subject: [PATCH 66/97] avgblur_vulkan: port for the rewrite
---
libavfilter/vf_avgblur_vulkan.c | 365 +++++++++---------------------
libavfilter/vf_chromaber_vulkan.c | 2 +-
2 files changed, 102 insertions(+), 265 deletions(-)
diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
index 6a54d158ce..4873824c70 100644
--- a/libavfilter/vf_avgblur_vulkan.c
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,23 +21,24 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGS 32
-
typedef struct AvgBlurVulkanContext {
FFVulkanContext vkctx;
int initialized;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl_hor;
- FFVulkanPipeline *pl_ver;
+ VkSampler sampler;
+ FFVulkanPipeline pl;
+ FFVkSPIRVShader shd;
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo tmp_images[3];
- VkDescriptorImageInfo output_images[3];
+ /* Push constants / options */
+ struct {
+ float filter_norm[4];
+ int32_t filter_len[2];
+ } opts;
int size_x;
int size_y;
@@ -43,46 +46,53 @@ typedef struct AvgBlurVulkanContext {
} AvgBlurVulkanContext;
static const char blur_kernel[] = {
- C(0, shared vec4 cache[DIR(gl_WorkGroupSize) + FILTER_RADIUS*2 + 1]; )
- C(0, )
- C(0, void distort(const ivec2 pos, const int idx) )
- C(0, { )
- C(1, const uint cp = DIR(gl_LocalInvocationID) + FILTER_RADIUS; )
- C(0, )
- C(1, cache[cp] = texture(input_img[idx], pos); )
- C(0, )
- C(1, const ivec2 loc_l = pos - INC(FILTER_RADIUS); )
- C(1, cache[cp - FILTER_RADIUS] = texture(input_img[idx], loc_l); )
- C(0, )
- C(1, const ivec2 loc_h = pos + INC(DIR(gl_WorkGroupSize)); )
- C(1, cache[cp + DIR(gl_WorkGroupSize)] = texture(input_img[idx], loc_h); )
- C(0, )
- C(1, barrier(); )
- C(0, )
- C(1, vec4 sum = vec4(0); )
- C(1, for (int p = -FILTER_RADIUS; p <= FILTER_RADIUS; p++) )
- C(2, sum += cache[cp + p]; )
- C(0, )
- C(1, sum /= vec4(FILTER_RADIUS*2 + 1); )
- C(1, imageStore(output_img[idx], pos, sum); )
- C(0, } )
+ C(0, void distort(const ivec2 pos, const int idx) )
+ C(0, { )
+ C(1, vec4 sum = vec4(0); )
+ C(1, for (int y = -filter_len.y; y <= filter_len.y; y++) )
+ C(1, for (int x = -filter_len.x; x <= filter_len.x; x++) )
+ C(2, sum += texture(input_img[idx], pos + ivec2(x, y)); )
+ C(0, )
+ C(1, imageStore(output_img[idx], pos, sum * filter_norm); )
+ C(0, } )
};
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
AvgBlurVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
- FFVulkanDescriptorSetBinding desc_i[2] = {
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "avgblur_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+ shd = &s->shd;
+
+ ff_vk_shader_set_compute_sizes(shd, 32, 1, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_img",
@@ -95,244 +105,68 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
- desc_i[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!desc_i[0].sampler)
- return AVERROR_EXTERNAL;
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, vec4 filter_norm; );
+ GLSLC(1, ivec2 filter_len; );
+ GLSLC(0, }; );
+ GLSLC(0, );
- { /* Create shader for the horizontal pass */
- desc_i[0].updater = s->input_images;
- desc_i[1].updater = s->tmp_images;
-
- s->pl_hor = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl_hor)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl_hor, "avgblur_compute_hor",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl_hor, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0));
-
- GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_x - 1);
- GLSLC(0, #define INC(x) (ivec2(x, 0)) );
- GLSLC(0, #define DIR(var) (var.x) );
- GLSLD( blur_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->planes & (1 << i)) {
- GLSLF(2, distort(pos, %i); ,i);
- } else {
- GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- }
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl_hor));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl_hor));
- }
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
- { /* Create shader for the vertical pass */
- desc_i[0].updater = s->tmp_images;
- desc_i[1].updater = s->output_images;
-
- s->pl_ver = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl_ver)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl_ver, "avgblur_compute_ver",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl_ver, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0));
-
- GLSLF(0, #define FILTER_RADIUS (%i) ,s->size_y - 1);
- GLSLC(0, #define INC(x) (ivec2(0, x)) );
- GLSLC(0, #define DIR(var) (var.y) );
- GLSLD( blur_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->planes & (1 << i)) {
- GLSLF(2, distort(pos, %i); ,i);
- } else {
- GLSLF(2, vec4 res = texture(input_img[%i], pos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- }
- GLSLC(1, } );
+ GLSLD( blur_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (!IS_WITHIN(pos, size)) );
+ GLSLC(2, return; );
+ if (s->planes & (1 << i)) {
+ GLSLF(1, distort(pos, %i); ,i);
+ } else {
+ GLSLF(1, vec4 res = texture(input_img[%i], pos); ,i);
+ GLSLF(1, imageStore(output_img[%i], pos, res); ,i);
}
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl_ver));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl_ver));
}
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, &s->shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, &s->shd, spv_data, spv_len, "main"));
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, &s->shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
s->initialized = 1;
+ s->opts.filter_len[0] = s->size_x - 1;
+ s->opts.filter_len[1] = s->size_y - 1;
+
+ s->opts.filter_norm[0] = s->opts.filter_len[0]*2 + 1;
+ s->opts.filter_norm[0] = 1.0/(s->opts.filter_norm[0]*s->opts.filter_norm[0]);
+ s->opts.filter_norm[1] = s->opts.filter_norm[0];
+ s->opts.filter_norm[2] = s->opts.filter_norm[0];
+ s->opts.filter_norm[3] = s->opts.filter_norm[0];
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f, AVFrame *in_f)
-{
- int err;
- VkCommandBuffer cmd_buf;
- AvgBlurVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *tmp = (AVVkFrame *)tmp_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->tmp_images[i].imageView, tmp->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl_hor, 0);
- ff_vk_update_descriptor_set(vkctx, s->pl_ver, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = tmp->layout[i],
- .newLayout = s->tmp_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = tmp->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- in->layout[i] = bar[0].newLayout;
- in->access[i] = bar[0].dstAccessMask;
-
- tmp->layout[i] = bar[1].newLayout;
- tmp->access[i] = bar[1].dstAccessMask;
-
- out->layout[i] = bar[2].newLayout;
- out->access[i] = bar[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_hor);
-
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl_ver);
-
- vk->CmdDispatch(cmd_buf, s->vkctx.output_width,
- FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx,s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
}
static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
{
int err;
- AVFrame *tmp = NULL, *out = NULL;
+ AVFrame *out = NULL;
AVFilterContext *ctx = link->dst;
AvgBlurVulkanContext *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
@@ -343,29 +177,22 @@ static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
goto fail;
}
- tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h);
- if (!tmp) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
-
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, tmp, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl,
+ out, in, s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, in);
if (err < 0)
goto fail;
av_frame_free(&in);
- av_frame_free(&tmp);
return ff_filter_frame(outlink, out);
fail:
av_frame_free(&in);
- av_frame_free(&tmp);
av_frame_free(&out);
return err;
}
@@ -373,6 +200,16 @@ fail:
static void avgblur_vulkan_uninit(AVFilterContext *avctx)
{
AvgBlurVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
@@ -382,9 +219,9 @@ static void avgblur_vulkan_uninit(AVFilterContext *avctx)
#define OFFSET(x) offsetof(AvgBlurVulkanContext, x)
#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
static const AVOption avgblur_vulkan_options[] = {
- { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 32, .flags = FLAGS },
+ { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS },
+ { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS },
{ "planes", "Set planes to filter (bitmask)", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS },
- { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 32, .flags = FLAGS },
{ NULL },
};
diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c
index 62b99cc4d9..5ebdaf4e43 100644
--- a/libavfilter/vf_chromaber_vulkan.c
+++ b/libavfilter/vf_chromaber_vulkan.c
@@ -59,7 +59,7 @@ static const char distort_chroma_kernel[] = {
C(0, { )
C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; )
C(1, float d = sqrt(p.x*p.x + p.y*p.y); )
- C(1, p *= d / (d* dist); )
+ C(1, p *= d / (d*dist); )
C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); )
C(1, imageStore(output_img[idx], pos, res); )
C(0, } )
--
2.40.0
[-- Attachment #48: 0067-blend_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 16663 bytes --]
From ec666de153067ab2acec3772126d355eb35e7e81 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:43 +0100
Subject: [PATCH 67/97] blend_vulkan: port for the rewrite
---
libavfilter/vf_blend_vulkan.c | 316 +++++++++++-----------------------
1 file changed, 103 insertions(+), 213 deletions(-)
diff --git a/libavfilter/vf_blend_vulkan.c b/libavfilter/vf_blend_vulkan.c
index 4cee688a22..170992c3ef 100644
--- a/libavfilter/vf_blend_vulkan.c
+++ b/libavfilter/vf_blend_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* The blend modes are based on the blend.c.
*
* This file is part of FFmpeg.
@@ -22,12 +24,11 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "framesync.h"
#include "blend.h"
-#define CGS 32
-
#define IN_TOP 0
#define IN_BOTTOM 1
@@ -40,20 +41,18 @@ typedef struct FilterParamsVulkan {
typedef struct BlendVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
FFFrameSync fs;
- VkDescriptorImageInfo top_images[3];
- VkDescriptorImageInfo bottom_images[3];
- VkDescriptorImageInfo output_images[3];
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
FilterParamsVulkan params[4];
double all_opacity;
enum BlendMode all_mode;
-
- int initialized;
} BlendVulkanContext;
#define DEFINE_BLEND_MODE(MODE, EXPR) \
@@ -125,223 +124,103 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
static av_cold int init_filter(AVFilterContext *avctx)
{
int err = 0;
- FFVkSampler *sampler;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
BlendVulkanContext *s = avctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!sampler)
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- {
- FFVulkanDescriptorSetBinding image_descs[] = {
- {
- .name = "top_images",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->top_images,
- .sampler = sampler,
- },
- {
- .name = "bottom_images",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->bottom_images,
- .sampler = sampler,
- },
- {
- .name = "output_images",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- shd = ff_vk_init_shader(s->pl, "blend_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, CGS, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- for (int i = 0, j = 0; i < planes; i++) {
- for (j = 0; j < i; j++)
- if (s->params[i].blend_func == s->params[j].blend_func)
- break;
- /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */
- if (j == i) {
- GLSLD(s->params[i].blend_func);
- }
- }
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_images[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i);
- GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i);
- GLSLF(2, const float opacity = %f; ,s->params[i].opacity);
- GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend);
- GLSLC(0, );
- GLSLF(2, imageStore(output_images[%i], pos, dst); ,i);
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
}
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
-
- s->initialized = 1;
-
-fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_frame, AVFrame *top_frame, AVFrame *bottom_frame)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- BlendVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *out = (AVVkFrame *)out_frame->data[0];
- AVVkFrame *top = (AVVkFrame *)top_frame->data[0];
- AVVkFrame *bottom = (AVVkFrame *)bottom_frame->data[0];
-
- AVHWFramesContext *top_fc = (AVHWFramesContext*)top_frame->hw_frames_ctx->data;
- AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom_frame->hw_frames_ctx->data;
-
- const VkFormat *top_formats = av_vkfmt_from_pixfmt(top_fc->sw_format);
- const VkFormat *bottom_formats = av_vkfmt_from_pixfmt(bottom_fc->sw_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->top_images[i].imageView, top->img[i],
- top_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->bottom_images[i].imageView, bottom->img[i],
- bottom_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->top_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->bottom_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "blend_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "top_images",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "bottom_images",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_images",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0));
+
+ for (int i = 0, j = 0; i < planes; i++) {
+ for (j = 0; j < i; j++)
+ if (s->params[i].blend_func == s->params[j].blend_func)
+ break;
+ /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */
+ if (j == i) {
+ GLSLD(s->params[i].blend_func);
+ }
}
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = top->layout[i],
- .newLayout = s->top_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = top->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = bottom->layout[i],
- .newLayout = s->bottom_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = bottom->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- top->layout[i] = barriers[0].newLayout;
- top->access[i] = barriers[0].dstAccessMask;
-
- bottom->layout[i] = barriers[1].newLayout;
- bottom->access[i] = barriers[1].dstAccessMask;
-
- out->layout[i] = barriers[2].newLayout;
- out->access[i] = barriers[2].dstAccessMask;
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_images[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ GLSLF(2, const vec4 top = texture(top_images[%i], pos); ,i);
+ GLSLF(2, const vec4 bottom = texture(bottom_images[%i], pos); ,i);
+ GLSLF(2, const float opacity = %f; ,s->params[i].opacity);
+ GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend);
+ GLSLC(0, );
+ GLSLF(2, imageStore(output_images[%i], pos, dst); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS) / CGS,
- FFALIGN(s->vkctx.output_height, CGS) / CGS, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, top_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, bottom_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_frame, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ RET(spv->compile_shader(spv, avctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- ff_vk_qf_rotate(&s->qf);
-
- return 0;
+ s->initialized = 1;
fail:
- ff_vk_discard_exec_deps(s->exec);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
@@ -375,7 +254,9 @@ static int blend_frame(FFFrameSync *fs)
RET(init_filter(avctx));
}
- RET(process_frames(avctx, out, top, bottom));
+ RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl,
+ out, (AVFrame *[]){ top, bottom }, 2,
+ s->sampler, NULL, 0));
return ff_filter_frame(outlink, out);
@@ -396,10 +277,19 @@ static av_cold int init(AVFilterContext *avctx)
static av_cold void uninit(AVFilterContext *avctx)
{
BlendVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- ff_framesync_uninit(&s->fs);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
+ ff_framesync_uninit(&s->fs);
s->initialized = 0;
}
--
2.40.0
[-- Attachment #49: 0068-chromaber_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 14623 bytes --]
From 793a7da2ed31615844f820284d7bd52937b99a9c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:11:53 +0100
Subject: [PATCH 68/97] chromaber_vulkan: port for the rewrite
---
libavfilter/vf_chromaber_vulkan.c | 285 ++++++++++--------------------
1 file changed, 96 insertions(+), 189 deletions(-)
diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c
index 5ebdaf4e43..dcce64304e 100644
--- a/libavfilter/vf_chromaber_vulkan.c
+++ b/libavfilter/vf_chromaber_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,21 +21,18 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
typedef struct ChromaticAberrationVulkanContext {
FFVulkanContext vkctx;
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
-
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
/* Push constants / options */
struct {
@@ -68,205 +67,102 @@ static const char distort_chroma_kernel[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
ChromaticAberrationVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- /* Create a sampler */
- sampler = ff_vk_init_sampler(vkctx, 0, VK_FILTER_LINEAR);
- if (!sampler)
- return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
/* Normalize options */
s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f;
s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f;
- { /* Create the shader */
- FFVulkanDescriptorSetBinding desc_i[2] = {
- {
- .name = "input_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "chromaber_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
- GLSLC(1, vec2 dist; );
- GLSLC(0, }; );
- GLSLC(0, );
-
- ff_vk_add_push_constant(s->pl, 0, sizeof(s->opts),
- VK_SHADER_STAGE_COMPUTE_BIT);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
-
- GLSLD( distort_chroma_kernel );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- if (planes == 1) {
- GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); );
- } else {
- GLSLC(1, ivec2 size = imageSize(output_img[0]); );
- GLSLC(1, vec2 npos = vec2(pos)/vec2(size); );
- GLSLC(1, vec4 res = texture(input_img[0], npos); );
- GLSLC(1, imageStore(output_img[0], pos, res); );
- for (int i = 1; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- GLSLF(2, distort_chroma(%i, size, pos); ,i);
- GLSLC(1, } else { );
- GLSLC(2, npos = vec2(pos)/vec2(size); );
- GLSLF(2, res = texture(input_img[%i], npos); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- GLSLC(1, } );
- }
- }
- GLSLC(0, } );
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "chromaber_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, vec2 dist; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
+
+ GLSLD( distort_chroma_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ if (planes == 1) {
+ GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); );
+ } else {
+ GLSLC(1, ivec2 size = imageSize(output_img[0]); );
+ GLSLC(1, vec2 npos = vec2(pos)/vec2(size); );
+ GLSLC(1, vec4 res = texture(input_img[0], npos); );
+ GLSLC(1, imageStore(output_img[0], pos, res); );
+ for (int i = 1; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (!IS_WITHIN(pos, size)) );
+ GLSLC(2, return; );
+ GLSLF(1, distort_chroma(%i, size, pos); ,i);
+ }
}
+ GLSLC(0, } );
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
s->initialized = 1;
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- ChromaticAberrationVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *ouput_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- ouput_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[2] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- in->layout[i] = bar[0].newLayout;
- in->access[i] = bar[0].dstAccessMask;
-
- out->layout[i] = bar[1].newLayout;
- out->access[i] = bar[1].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- ff_vk_update_push_exec(vkctx, s->exec, VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(s->opts), &s->opts);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
- ff_vk_qf_rotate(&s->qf);
-
- return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
return err;
}
@@ -286,7 +182,8 @@ static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, in);
if (err < 0)
@@ -305,6 +202,16 @@ fail:
static void chromaber_vulkan_uninit(AVFilterContext *avctx)
{
ChromaticAberrationVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
ff_vk_uninit(&s->vkctx);
--
2.40.0
[-- Attachment #50: 0069-flip_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 13105 bytes --]
From 34b0f32524321e386242677524142aa5274b466a Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:12:42 +0100
Subject: [PATCH 69/97] flip_vulkan: port for the rewrite
---
libavfilter/vf_flip_vulkan.c | 230 ++++++++++++-----------------------
1 file changed, 79 insertions(+), 151 deletions(-)
diff --git a/libavfilter/vf_flip_vulkan.c b/libavfilter/vf_flip_vulkan.c
index 6868e39ee6..4279dd2123 100644
--- a/libavfilter/vf_flip_vulkan.c
+++ b/libavfilter/vf_flip_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,10 +22,9 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
-#define CGS 32
-
enum FlipType {
FLIP_VERTICAL,
FLIP_HORIZONTAL,
@@ -32,32 +33,50 @@ enum FlipType {
typedef struct FlipVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
-
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
} FlipVulkanContext;
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type)
{
int err = 0;
- FFVkSPIRVShader *shd;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
FlipVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "flip_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
- FFVulkanDescriptorSetBinding image_descs[] = {
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_image",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_image",
@@ -67,167 +86,75 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
},
};
- image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- {
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl, "flip_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_image[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- switch (type)
- {
- case FLIP_HORIZONTAL:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i);
- break;
- case FLIP_VERTICAL:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i);
- break;
- case FLIP_BOTH:
- GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i);
- break;
- default:
- GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i);
- break;
- }
- GLSLF(2, imageStore(output_image[%i], pos, res); ,i);
- GLSLC(1, } );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_image[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ switch (type)
+ {
+ case FLIP_HORIZONTAL:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i);
+ break;
+ case FLIP_VERTICAL:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i);
+ break;
+ case FLIP_BOTH:
+ GLSLF(2, vec4 res = texture(input_image[%i], ivec2(size.xy - pos.xy));, i);
+ break;
+ default:
+ GLSLF(2, vec4 res = texture(input_image[%i], pos); ,i);
+ break;
}
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ GLSLF(2, imageStore(output_image[%i], pos, res); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
s->initialized = 1;
fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
static av_cold void flip_vulkan_uninit(AVFilterContext *avctx)
{
FlipVulkanContext *s = avctx->priv;
- ff_vk_uninit(&s->vkctx);
- s->initialized = 0;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- FlipVulkanContext *s = avctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- out->layout[i] = barriers[1].newLayout;
- out->access[i] = barriers[1].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_qf_rotate(&s->qf);
+ ff_vk_uninit(&s->vkctx);
- return 0;
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
+ s->initialized = 0;
}
static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type)
@@ -247,7 +174,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type)
if (!s->initialized)
RET(init_filter(ctx, in, type));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, NULL, 0));
RET(av_frame_copy_props(out, in));
--
2.40.0
[-- Attachment #51: 0070-gblur_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18936 bytes --]
From 7b752cd30dedd98aeb26dad29693fc4107479b22 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:12:55 +0100
Subject: [PATCH 70/97] gblur_vulkan: port for the rewrite
---
libavfilter/vf_gblur_vulkan.c | 330 +++++++++++-----------------------
1 file changed, 103 insertions(+), 227 deletions(-)
diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c
index 80d1dc61c0..0f0f5dff43 100644
--- a/libavfilter/vf_gblur_vulkan.c
+++ b/libavfilter/vf_gblur_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,6 +22,7 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#define CGS 32
@@ -27,26 +30,23 @@
typedef struct GBlurVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl_hor;
- FFVulkanPipeline *pl_ver;
- FFVkBuffer params_buf_hor;
- FFVkBuffer params_buf_ver;
-
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo tmp_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc_hor;
- VkDescriptorBufferInfo params_desc_ver;
int initialized;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ VkSampler sampler;
+ FFVulkanPipeline pl_hor;
+ FFVkSPIRVShader shd_hor;
+ FFVkBuffer params_hor;
+ FFVulkanPipeline pl_ver;
+ FFVkSPIRVShader shd_ver;
+ FFVkBuffer params_ver;
+
int size;
int sizeV;
int planes;
float sigma;
float sigmaV;
- AVFrame *tmpframe;
} GBlurVulkanContext;
static const char gblur_func[] = {
@@ -118,16 +118,17 @@ static av_cold void init_gaussian_params(GBlurVulkanContext *s)
s->sizeV = s->size;
else
init_kernel_size(s, &s->sizeV);
-
- s->tmpframe = NULL;
}
-static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVkSPIRVShader *shd,
- FFVkBuffer *params_buf, VkDescriptorBufferInfo *params_desc,
- int ksize, float sigma)
+static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl,
+ FFVkSPIRVShader *shd, FFVkBuffer *params_buf,
+ int ksize, float sigma, FFVkSPIRVCompiler *spv)
{
int err = 0;
uint8_t *kernel_mapped;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
@@ -137,7 +138,6 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
.mem_quali = "readonly",
.mem_layout = "std430",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = NULL,
.buf_content = NULL,
};
@@ -145,10 +145,9 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
if (!kernel_def)
return AVERROR(ENOMEM);
- buf_desc.updater = params_desc;
buf_desc.buf_content = kernel_def;
- RET(ff_vk_add_descriptor_set(&s->vkctx, pl, shd, &buf_desc, 1, 0));
+ RET(ff_vk_pipeline_descriptor_set_add(&s->vkctx, pl, shd, &buf_desc, 1, 1, 0));
GLSLD( gblur_func );
GLSLC(0, void main() );
@@ -157,38 +156,43 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, FFVk
GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
for (int i = 0; i < planes; i++) {
GLSLC(0, );
- GLSLF(1, size = imageSize(output_images[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ GLSLF(1, size = imageSize(output_images[%i]); ,i);
+ GLSLC(1, if (!IS_WITHIN(pos, size)) );
+ GLSLC(2, return; );
if (s->planes & (1 << i)) {
- GLSLF(2, gblur(pos, %i); ,i);
+ GLSLF(1, gblur(pos, %i); ,i);
} else {
- GLSLF(2, vec4 res = texture(input_images[%i], pos); ,i);
- GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
+ GLSLF(1, vec4 res = texture(input_images[%i], pos); ,i);
+ GLSLF(1, imageStore(output_images[%i], pos, res); ,i);
}
- GLSLC(1, } );
}
GLSLC(0, } );
- RET(ff_vk_compile_shader(&s->vkctx, shd, "main"));
+ RET(spv->compile_shader(spv, s, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(&s->vkctx, shd, spv_data, spv_len, "main"));
- RET(ff_vk_init_pipeline_layout(&s->vkctx, pl));
- RET(ff_vk_init_compute_pipeline(&s->vkctx, pl));
+ RET(ff_vk_init_compute_pipeline(&s->vkctx, pl, shd));
+ RET(ff_vk_exec_pipeline_register(&s->vkctx, &s->e, pl));
- RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
- RET(ff_vk_map_buffers(&s->vkctx, params_buf, &kernel_mapped, 1, 0));
+ RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(&s->vkctx, params_buf, &kernel_mapped, 0));
init_gaussian_kernel((float *)kernel_mapped, sigma, ksize);
- RET(ff_vk_unmap_buffers(&s->vkctx, params_buf, 1, 1));
-
- params_desc->buffer = params_buf->buf;
- params_desc->range = VK_WHOLE_SIZE;
+ RET(ff_vk_unmap_buffer(&s->vkctx, params_buf, 1));
- ff_vk_update_descriptor_set(&s->vkctx, pl, 1);
+ RET(ff_vk_set_descriptor_buffer(&s->vkctx, pl, NULL, 1, 0, 0,
+ params_buf->address, params_buf->size,
+ VK_FORMAT_UNDEFINED));
fail:
av_free(kernel_def);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
return err;
}
@@ -196,16 +200,35 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err = 0;
GBlurVulkanContext *s = ctx->priv;
- FFVkSPIRVShader *shd;
+ FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
- FFVulkanDescriptorSetBinding image_descs[] = {
+ FFVkSPIRVShader *shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl_hor, &s->shd_hor, "gblur_hor_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+ RET(ff_vk_shader_init(&s->pl_ver, &s->shd_ver, "gblur_ver_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_images",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_images",
@@ -218,215 +241,64 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
- image_descs[0].sampler = ff_vk_init_sampler(&s->vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
-
init_gaussian_params(s);
- ff_vk_qf_init(&s->vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
{
- /* Create shader for the horizontal pass */
- image_descs[0].updater = s->input_images;
- image_descs[1].updater = s->tmp_images;
-
- s->pl_hor = ff_vk_create_pipeline(&s->vkctx, &s->qf);
- if (!s->pl_hor) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
-
- shd = ff_vk_init_shader(s->pl_hor, "gblur_compute_hor", image_descs[0].stages);
- if (!shd) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ shd = &s->shd_hor;
+ ff_vk_shader_set_compute_sizes(shd, 32, 1, 1);
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_hor, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_hor, shd, desc, 2, 0, 0));
GLSLC(0, #define OFFSET (vec2(i, 0.0)));
- RET(init_gblur_pipeline(s, s->pl_hor, shd, &s->params_buf_hor, &s->params_desc_hor,
- s->size, s->sigma));
+ RET(init_gblur_pipeline(s, &s->pl_hor, shd, &s->params_hor, s->size, s->sigma, spv));
}
{
- /* Create shader for the vertical pass */
- image_descs[0].updater = s->tmp_images;
- image_descs[1].updater = s->output_images;
-
- s->pl_ver = ff_vk_create_pipeline(&s->vkctx, &s->qf);
- if (!s->pl_ver) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ shd = &s->shd_ver;
+ ff_vk_shader_set_compute_sizes(shd, 1, 32, 1);
- shd = ff_vk_init_shader(s->pl_ver, "gblur_compute_ver", image_descs[0].stages);
- if (!shd) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ 1, CGS, 1 });
- RET(ff_vk_add_descriptor_set(&s->vkctx, s->pl_ver, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl_ver, shd, desc, 2, 0, 0));
GLSLC(0, #define OFFSET (vec2(0.0, i)));
- RET(init_gblur_pipeline(s, s->pl_ver, shd, &s->params_buf_ver, &s->params_desc_ver,
- s->sizeV, s->sigmaV));
+ RET(init_gblur_pipeline(s, &s->pl_ver, shd, &s->params_ver, s->sizeV, s->sigmaV, spv));
}
- RET(ff_vk_create_exec_ctx(&s->vkctx, &s->exec, &s->qf));
-
s->initialized = 1;
fail:
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx)
{
GBlurVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
- av_frame_free(&s->tmpframe);
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl_hor);
+ ff_vk_pipeline_free(vkctx, &s->pl_ver);
+ ff_vk_shader_free(vkctx, &s->shd_hor);
+ ff_vk_shader_free(vkctx, &s->shd_ver);
+ ff_vk_free_buf(vkctx, &s->params_hor);
+ ff_vk_free_buf(vkctx, &s->params_ver);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf_hor);
- ff_vk_free_buf(&s->vkctx, &s->params_buf_ver);
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
}
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err;
- VkCommandBuffer cmd_buf;
- GBlurVulkanContext *s = avctx->priv;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
-
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
- AVVkFrame *tmp = (AVVkFrame *)s->tmpframe->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(&s->vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->input_images[i].imageView,
- in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->tmp_images[i].imageView,
- tmp->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(&s->vkctx, s->exec, &s->output_images[i].imageView,
- out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->tmp_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(&s->vkctx, s->pl_hor, 0);
- ff_vk_update_descriptor_set(&s->vkctx, s->pl_ver, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = tmp->layout[i],
- .newLayout = s->tmp_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = tmp->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- tmp->layout[i] = barriers[1].newLayout;
- tmp->access[i] = barriers[1].dstAccessMask;
-
- out->layout[i] = barriers[2].newLayout;
- out->access[i] = barriers[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_hor);
-
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
-
- ff_vk_bind_pipeline_exec(&s->vkctx, s->exec, s->pl_ver);
-
- vk->CmdDispatch(cmd_buf,s->vkctx.output_width,
- FFALIGN(s->vkctx.output_height, CGS)/CGS, 1);
-
- ff_vk_add_exec_dep(&s->vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(&s->vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(&s->vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
-
- return 0;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
-}
-
static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
{
int err;
- AVFrame *out = NULL;
+ AVFrame *tmp = NULL, *out = NULL;
AVFilterContext *ctx = link->dst;
GBlurVulkanContext *s = ctx->priv;
AVFilterLink *outlink = ctx->outputs[0];
@@ -437,28 +309,32 @@ static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
goto fail;
}
- if (!s->initialized) {
- RET(init_filter(ctx, in));
- s->tmpframe = ff_get_video_buffer(outlink, outlink->w, outlink->h);
- if (!s->tmpframe) {
- err = AVERROR(ENOMEM);
- goto fail;
- }
+ tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!tmp) {
+ err = AVERROR(ENOMEM);
+ goto fail;
}
- RET(process_frames(ctx, out, in));
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
- RET(av_frame_copy_props(out, in));
+ RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e,
+ (FFVulkanPipeline *[2]){ &s->pl_hor, &s->pl_ver },
+ out, tmp, in, s->sampler, NULL, 0));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
av_frame_free(&in);
+ av_frame_free(&tmp);
return ff_filter_frame(outlink, out);
fail:
av_frame_free(&in);
+ av_frame_free(&tmp);
av_frame_free(&out);
- av_frame_free(&s->tmpframe);
-
return err;
}
--
2.40.0
[-- Attachment #52: 0071-overlay_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18848 bytes --]
From f8835424df70d441a9d41b82173560cc31ab7ad2 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:05 +0100
Subject: [PATCH 71/97] overlay_vulkan: port for the rewrite
---
libavfilter/vf_overlay_vulkan.c | 398 ++++++++++----------------------
1 file changed, 123 insertions(+), 275 deletions(-)
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
index 7a66cf12ad..a05d9155be 100644
--- a/libavfilter/vf_overlay_vulkan.c
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,26 +21,26 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "framesync.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
typedef struct OverlayVulkanContext {
FFVulkanContext vkctx;
+ FFFrameSync fs;
int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- FFFrameSync fs;
- FFVkBuffer params_buf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo main_images[3];
- VkDescriptorImageInfo overlay_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc;
+ /* Push constants / options */
+ struct {
+ int32_t o_offset[2*3];
+ int32_t o_size[2*3];
+ } opts;
int overlay_x;
int overlay_y;
@@ -80,279 +82,114 @@ static const char overlay_alpha[] = {
static av_cold int init_filter(AVFilterContext *ctx)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
OverlayVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_NEAREST);
- if (!sampler)
+ const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
+ const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
-
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- { /* Create the shader */
- const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
-
- FFVulkanDescriptorSetBinding desc_i[3] = {
- {
- .name = "main_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->main_images,
- .sampler = sampler,
- },
- {
- .name = "overlay_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->overlay_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVulkanDescriptorSetBinding desc_b = {
- .name = "params",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .mem_quali = "readonly",
- .mem_layout = "std430",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = &s->params_desc,
- .buf_content = "ivec2 o_offset[3], o_size[3];",
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "overlay_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
-
- GLSLD( overlay_noalpha );
- GLSLD( overlay_alpha );
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- GLSLF(1, int planes = %i; ,planes);
- GLSLC(1, for (int i = 0; i < planes; i++) { );
- if (ialpha)
- GLSLC(2, overlay_alpha_opaque(i, pos); );
- else
- GLSLC(2, overlay_noalpha(i, pos); );
- GLSLC(1, } );
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- }
-
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
-
- { /* Create and update buffer */
- const AVPixFmtDescriptor *desc;
-
- /* NOTE: std430 requires the same identical struct layout, padding and
- * alignment as C, so we're allowed to do this, as this will map
- * exactly to what the shader recieves */
- struct {
- int32_t o_offset[2*3];
- int32_t o_size[2*3];
- } *par;
-
- err = ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par), NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
- if (err)
- return err;
-
- err = ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0);
- if (err)
- return err;
-
- desc = av_pix_fmt_desc_get(s->vkctx.output_format);
-
- par->o_offset[0] = s->overlay_x;
- par->o_offset[1] = s->overlay_y;
- par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w;
- par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h;
- par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w;
- par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h;
-
- par->o_size[0] = s->overlay_w;
- par->o_size[1] = s->overlay_h;
- par->o_size[2] = par->o_size[0] >> desc->log2_chroma_w;
- par->o_size[3] = par->o_size[1] >> desc->log2_chroma_h;
- par->o_size[4] = par->o_size[0] >> desc->log2_chroma_w;
- par->o_size[5] = par->o_size[1] >> desc->log2_chroma_h;
-
- err = ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1);
- if (err)
- return err;
-
- s->params_desc.buffer = s->params_buf.buf;
- s->params_desc.range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 1);
}
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "overlay_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, ivec2 o_offset[3]; );
+ GLSLC(1, ivec2 o_size[3]; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "main_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "overlay_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 3, 0, 0));
+
+ GLSLD( overlay_noalpha );
+ GLSLD( overlay_alpha );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, int planes = %i; ,planes);
+ GLSLC(1, for (int i = 0; i < planes; i++) { );
+ if (ialpha)
+ GLSLC(2, overlay_alpha_opaque(i, pos); );
+ else
+ GLSLC(2, overlay_noalpha(i, pos); );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
+
+ s->opts.o_offset[0] = s->overlay_x;
+ s->opts.o_offset[1] = s->overlay_y;
+ s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h;
+ s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h;
+
+ s->opts.o_size[0] = s->overlay_w;
+ s->opts.o_size[1] = s->overlay_h;
+ s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h;
+ s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w;
+ s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h;
s->initialized = 1;
- return 0;
-
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
- AVFrame *main_f, AVFrame *overlay_f)
-{
- int err;
- VkCommandBuffer cmd_buf;
- OverlayVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- AVVkFrame *main = (AVVkFrame *)main_f->data[0];
- AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0];
-
- AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data;
- AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data;
-
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
- const VkFormat *main_sw_formats = av_vkfmt_from_pixfmt(main_fc->sw_format);
- const VkFormat *overlay_sw_formats = av_vkfmt_from_pixfmt(overlay_fc->sw_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->main_images[i].imageView, main->img[i],
- main_sw_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->overlay_images[i].imageView, overlay->img[i],
- overlay_sw_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar[3] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = main->layout[i],
- .newLayout = s->main_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = main->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = overlay->layout[i],
- .newLayout = s->overlay_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = overlay->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(bar), bar);
-
- main->layout[i] = bar[0].newLayout;
- main->access[i] = bar[0].dstAccessMask;
-
- overlay->layout[i] = bar[1].newLayout;
- overlay->access[i] = bar[1].dstAccessMask;
-
- out->layout[i] = bar[2].newLayout;
- out->access[i] = bar[2].dstAccessMask;
- }
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(s->vkctx.output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(s->vkctx.output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, main_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, overlay_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
- ff_vk_qf_rotate(&s->qf);
-
- return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
return err;
}
@@ -394,7 +231,9 @@ static int overlay_vulkan_blend(FFFrameSync *fs)
goto fail;
}
- RET(process_frames(ctx, out, input_main, input_overlay));
+ RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl,
+ out, (AVFrame *[]){ input_main, input_overlay }, 2,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, input_main);
if (err < 0)
@@ -443,8 +282,17 @@ static av_cold int overlay_vulkan_init(AVFilterContext *avctx)
static void overlay_vulkan_uninit(AVFilterContext *avctx)
{
OverlayVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf);
ff_vk_uninit(&s->vkctx);
ff_framesync_uninit(&s->fs);
--
2.40.0
[-- Attachment #53: 0072-scale_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 18981 bytes --]
From 28f14ad8e8cbd86daa431d29cb105497f91b4b28 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:32 +0100
Subject: [PATCH 72/97] scale_vulkan: port for the rewrite
---
libavfilter/vf_scale_vulkan.c | 366 ++++++++++++----------------------
1 file changed, 125 insertions(+), 241 deletions(-)
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index d14b32277d..64f5e79afb 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -1,4 +1,6 @@
/*
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -19,12 +21,11 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "scale_eval.h"
#include "internal.h"
#include "colorspace.h"
-#define CGROUPS (int [3]){ 32, 32, 1 }
-
enum ScalerFunc {
F_BILINEAR = 0,
F_NEAREST,
@@ -35,15 +36,17 @@ enum ScalerFunc {
typedef struct ScaleVulkanContext {
FFVulkanContext vkctx;
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- FFVkBuffer params_buf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
- /* Shader updators, must be in the main filter struct */
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
- VkDescriptorBufferInfo params_desc;
+ /* Push constants / options */
+ struct {
+ float yuv_matrix[4][4];
+ } opts;
char *out_format_string;
char *w_expr;
@@ -51,8 +54,6 @@ typedef struct ScaleVulkanContext {
enum ScalerFunc scaler;
enum AVColorRange out_range;
-
- int initialized;
} ScaleVulkanContext;
static const char scale_bilinear[] = {
@@ -110,10 +111,15 @@ static const char write_444[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
- FFVkSampler *sampler;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
VkFilter sampler_mode;
ScaleVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
int crop_x = in->crop_left;
int crop_y = in->crop_top;
@@ -121,8 +127,6 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
int crop_h = in->height - (in->crop_top + in->crop_bottom);
int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
switch (s->scaler) {
case F_NEAREST:
sampler_mode = VK_FILTER_NEAREST;
@@ -132,264 +136,134 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
break;
};
- /* Create a sampler */
- sampler = ff_vk_init_sampler(vkctx, 0, sampler_mode);
- if (!sampler)
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
+ }
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- { /* Create the shader */
- FFVulkanDescriptorSetBinding desc_i[2] = {
- {
- .name = "input_img",
- .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .dimensions = 2,
- .elems = in_planes,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
- .sampler = sampler,
- },
- {
- .name = "output_img",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
- .mem_quali = "writeonly",
- .dimensions = 2,
- .elems = av_pix_fmt_count_planes(s->vkctx.output_format),
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
- },
- };
-
- FFVulkanDescriptorSetBinding desc_b = {
- .name = "params",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .mem_quali = "readonly",
- .mem_layout = "std430",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = &s->params_desc,
- .buf_content = "mat4 yuv_matrix;",
- };
-
- FFVkSPIRVShader *shd = ff_vk_init_shader(s->pl, "scale_compute",
- VK_SHADER_STAGE_COMPUTE_BIT);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, CGROUPS);
-
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, desc_i, FF_ARRAY_ELEMS(desc_i), 0)); /* set 0 */
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
-
- GLSLD( scale_bilinear );
-
- if (s->vkctx.output_format != s->vkctx.input_format) {
- GLSLD( rgb2yuv );
- }
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "scale_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 32, 1);
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, mat4 yuv_matrix; );
+ GLSLC(0, }; );
+ GLSLC(0, );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(s->opts),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = in_planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = av_pix_fmt_count_planes(s->vkctx.output_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
- switch (s->vkctx.output_format) {
- case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
- case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
- case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
- default: break;
- }
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
- GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h);
- GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y);
- GLSLC(0, );
-
- if (s->vkctx.output_format == s->vkctx.input_format) {
- for (int i = 0; i < desc_i[1].elems; i++) {
- GLSLF(1, size = imageSize(output_img[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- switch (s->scaler) {
- case F_NEAREST:
- case F_BILINEAR:
- GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
- GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
- break;
- };
- GLSLC(1, } );
- }
- } else {
- GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
- GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
- switch (s->vkctx.output_format) {
- case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
- case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
- case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
- default: return AVERROR(EINVAL);
- }
- }
+ GLSLD( scale_bilinear );
+
+ if (s->vkctx.output_format != s->vkctx.input_format) {
+ GLSLD( rgb2yuv );
+ }
- GLSLC(0, } );
+ switch (s->vkctx.output_format) {
+ case AV_PIX_FMT_NV12: GLSLD(write_nv12); break;
+ case AV_PIX_FMT_YUV420P: GLSLD( write_420); break;
+ case AV_PIX_FMT_YUV444P: GLSLD( write_444); break;
+ default: break;
+ }
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height);
+ GLSLF(1, vec2 c_r = vec2(%i, %i) / in_d; ,crop_w, crop_h);
+ GLSLF(1, vec2 c_o = vec2(%i, %i) / in_d; ,crop_x,crop_y);
+ GLSLC(0, );
+
+ if (s->vkctx.output_format == s->vkctx.input_format) {
+ for (int i = 0; i < desc[i].elems; i++) {
+ GLSLF(1, size = imageSize(output_img[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ switch (s->scaler) {
+ case F_NEAREST:
+ case F_BILINEAR:
+ GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i);
+ GLSLF(2, imageStore(output_img[%i], pos, res); ,i);
+ break;
+ };
+ GLSLC(1, } );
+ }
+ } else {
+ GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); );
+ GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG);
+ switch (s->vkctx.output_format) {
+ case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break;
+ case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break;
+ case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break;
+ default: return AVERROR(EINVAL);
+ }
}
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
+ GLSLC(0, } );
if (s->vkctx.output_format != s->vkctx.input_format) {
const AVLumaCoefficients *lcoeffs;
double tmp_mat[3][3];
- struct {
- float yuv_matrix[4][4];
- } *par;
-
lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace);
if (!lcoeffs) {
av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n");
return AVERROR(EINVAL);
}
- RET(ff_vk_create_buf(vkctx, &s->params_buf,
- sizeof(*par), NULL,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
-
- RET(ff_vk_map_buffers(vkctx, &s->params_buf, (uint8_t **)&par, 1, 0));
-
ff_fill_rgb2yuv_table(lcoeffs, tmp_mat);
- memset(par, 0, sizeof(*par));
-
for (int y = 0; y < 3; y++)
for (int x = 0; x < 3; x++)
- par->yuv_matrix[x][y] = tmp_mat[x][y];
-
- par->yuv_matrix[3][3] = 1.0;
-
- RET(ff_vk_unmap_buffers(vkctx, &s->params_buf, 1, 1));
-
- s->params_desc.buffer = s->params_buf.buf;
- s->params_desc.range = VK_WHOLE_SIZE;
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 1);
+ s->opts.yuv_matrix[x][y] = tmp_mat[x][y];
+ s->opts.yuv_matrix[3][3] = 1.0;
}
- /* Execution context */
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
s->initialized = 1;
return 0;
fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- ScaleVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &vkctx->vkfn;
- AVVkFrame *in = (AVVkFrame *)in_f->data[0];
- AVVkFrame *out = (AVVkFrame *)out_f->data[0];
- VkImageMemoryBarrier barriers[AV_NUM_DATA_POINTERS*2];
- int barrier_count = 0;
- const int planes = av_pix_fmt_count_planes(s->vkctx.input_format);
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- /* Update descriptors and init the exec context */
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
-
- for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier bar = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- };
-
- memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
-
- in->layout[i] = bar.newLayout;
- in->access[i] = bar.dstAccessMask;
- }
-
- for (int i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
- VkImageMemoryBarrier bar = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- };
-
- memcpy(&barriers[barrier_count++], &bar, sizeof(VkImageMemoryBarrier));
-
- out->layout[i] = bar.newLayout;
- out->access[i] = bar.dstAccessMask;
- }
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, barrier_count, barriers);
-
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
-
- vk->CmdDispatch(cmd_buf,
- FFALIGN(vkctx->output_width, CGROUPS[0])/CGROUPS[0],
- FFALIGN(vkctx->output_height, CGROUPS[1])/CGROUPS[1], 1);
-
- ff_vk_add_exec_dep(vkctx, s->exec, in_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, out_f, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
-
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
return err;
-
-fail:
- ff_vk_discard_exec_deps(s->exec);
- return err;
}
static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
@@ -408,7 +282,8 @@ static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, &s->opts, sizeof(s->opts)));
err = av_frame_copy_props(out, in);
if (err < 0)
@@ -475,8 +350,17 @@ static int scale_vulkan_config_output(AVFilterLink *outlink)
static void scale_vulkan_uninit(AVFilterContext *avctx)
{
ScaleVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
- ff_vk_free_buf(&s->vkctx, &s->params_buf);
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
--
2.40.0
[-- Attachment #54: 0073-transpose_vulkan-port-for-the-rewrite.patch --]
[-- Type: text/x-diff, Size: 12421 bytes --]
From 57daac8d1273dac519a8be567ed4c6e8954d95d6 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 17 Feb 2023 03:13:43 +0100
Subject: [PATCH 73/97] transpose_vulkan: port for the rewrite
---
libavfilter/vf_transpose_vulkan.c | 224 ++++++++++--------------------
1 file changed, 76 insertions(+), 148 deletions(-)
diff --git a/libavfilter/vf_transpose_vulkan.c b/libavfilter/vf_transpose_vulkan.c
index 3b2ce4fb69..f9c0dd928d 100644
--- a/libavfilter/vf_transpose_vulkan.c
+++ b/libavfilter/vf_transpose_vulkan.c
@@ -1,5 +1,7 @@
/*
* copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com>
+ * Copyright (c) Lynne
+ *
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
@@ -20,41 +22,60 @@
#include "libavutil/random_seed.h"
#include "libavutil/opt.h"
#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
#include "internal.h"
#include "transpose.h"
-#define CGS 32
-
typedef struct TransposeVulkanContext {
FFVulkanContext vkctx;
- FFVkQueueFamilyCtx qf;
- FFVkExecContext *exec;
- FFVulkanPipeline *pl;
- VkDescriptorImageInfo input_images[3];
- VkDescriptorImageInfo output_images[3];
+ int initialized;
+ FFVulkanPipeline pl;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ FFVkSPIRVShader shd;
+ VkSampler sampler;
int dir;
int passthrough;
- int initialized;
} TransposeVulkanContext;
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
- int err = 0;
- FFVkSPIRVShader *shd;
+ int err;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
TransposeVulkanContext *s = ctx->priv;
FFVulkanContext *vkctx = &s->vkctx;
+
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd = &s->shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_LINEAR));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "transpose_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
- FFVulkanDescriptorSetBinding image_descs[] = {
+ ff_vk_shader_set_compute_sizes(&s->shd, 32, 1, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
{
.name = "input_images",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->input_images,
+ .samplers = DUP_SAMPLER(s->sampler),
},
{
.name = "output_images",
@@ -64,154 +85,49 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .updater = s->output_images,
},
};
- image_descs[0].sampler = ff_vk_init_sampler(vkctx, 1, VK_FILTER_LINEAR);
- if (!image_descs[0].sampler)
- return AVERROR_EXTERNAL;
-
- ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
-
- {
- s->pl = ff_vk_create_pipeline(vkctx, &s->qf);
- if (!s->pl)
- return AVERROR(ENOMEM);
-
- shd = ff_vk_init_shader(s->pl, "transpose_compute", image_descs[0].stages);
- if (!shd)
- return AVERROR(ENOMEM);
-
- ff_vk_set_compute_shader_sizes(shd, (int [3]){ CGS, 1, 1 });
- RET(ff_vk_add_descriptor_set(vkctx, s->pl, shd, image_descs, FF_ARRAY_ELEMS(image_descs), 0));
-
- GLSLC(0, void main() );
- GLSLC(0, { );
- GLSLC(1, ivec2 size; );
- GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
- for (int i = 0; i < planes; i++) {
- GLSLC(0, );
- GLSLF(1, size = imageSize(output_images[%i]); ,i);
- GLSLC(1, if (IS_WITHIN(pos, size)) { );
- if (s->dir == TRANSPOSE_CCLOCK)
- GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
- else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
- GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i);
- if (s->dir == TRANSPOSE_CLOCK)
- GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); );
- } else
- GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i);
- GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
- GLSLC(1, } );
- }
- GLSLC(0, } );
-
- RET(ff_vk_compile_shader(vkctx, shd, "main"));
- RET(ff_vk_init_pipeline_layout(vkctx, s->pl));
- RET(ff_vk_init_compute_pipeline(vkctx, s->pl));
- }
-
- RET(ff_vk_create_exec_ctx(vkctx, &s->exec, &s->qf));
- s->initialized = 1;
-
-fail:
- return err;
-}
-
-static int process_frames(AVFilterContext *avctx, AVFrame *outframe, AVFrame *inframe)
-{
- int err = 0;
- VkCommandBuffer cmd_buf;
- TransposeVulkanContext *s = avctx->priv;
- FFVulkanContext *vkctx = &s->vkctx;
- FFVulkanFunctions *vk = &s->vkctx.vkfn;
- const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
-
- AVVkFrame *in = (AVVkFrame *)inframe->data[0];
- AVVkFrame *out = (AVVkFrame *)outframe->data[0];
-
- const VkFormat *input_formats = av_vkfmt_from_pixfmt(s->vkctx.input_format);
- const VkFormat *output_formats = av_vkfmt_from_pixfmt(s->vkctx.output_format);
-
- ff_vk_start_exec_recording(vkctx, s->exec);
- cmd_buf = ff_vk_get_exec_buf(s->exec);
-
- for (int i = 0; i < planes; i++) {
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->input_images[i].imageView, in->img[i],
- input_formats[i],
- ff_comp_identity_map));
-
- RET(ff_vk_create_imageview(vkctx, s->exec,
- &s->output_images[i].imageView, out->img[i],
- output_formats[i],
- ff_comp_identity_map));
-
- s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
- s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
- }
-
- ff_vk_update_descriptor_set(vkctx, s->pl, 0);
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 2, 0, 0));
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
for (int i = 0; i < planes; i++) {
- VkImageMemoryBarrier barriers[] = {
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
- .oldLayout = in->layout[i],
- .newLayout = s->input_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = in->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- {
- .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
- .oldLayout = out->layout[i],
- .newLayout = s->output_images[i].imageLayout,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .image = out->img[i],
- .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .subresourceRange.levelCount = 1,
- .subresourceRange.layerCount = 1,
- },
- };
-
- vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
- 0, NULL, 0, NULL, FF_ARRAY_ELEMS(barriers), barriers);
-
- in->layout[i] = barriers[0].newLayout;
- in->access[i] = barriers[0].dstAccessMask;
-
- out->layout[i] = barriers[1].newLayout;
- out->access[i] = barriers[1].dstAccessMask;
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(output_images[%i]); ,i);
+ GLSLC(1, if (IS_WITHIN(pos, size)) { );
+ if (s->dir == TRANSPOSE_CCLOCK)
+ GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i);
+ else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) {
+ GLSLF(2, vec4 res = texture(input_images[%i], ivec2(size.yx - pos.yx)); ,i);
+ if (s->dir == TRANSPOSE_CLOCK)
+ GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); );
+ } else
+ GLSLF(2, vec4 res = texture(input_images[%i], pos.yx); ,i);
+ GLSLF(2, imageStore(output_images[%i], pos, res); ,i);
+ GLSLC(1, } );
}
+ GLSLC(0, } );
- ff_vk_bind_pipeline_exec(vkctx, s->exec, s->pl);
- vk->CmdDispatch(cmd_buf, FFALIGN(s->vkctx.output_width, CGS)/CGS,
- s->vkctx.output_height, 1);
+ RET(spv->compile_shader(spv, ctx, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main"));
- ff_vk_add_exec_dep(vkctx, s->exec, inframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
- ff_vk_add_exec_dep(vkctx, s->exec, outframe, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
- err = ff_vk_submit_exec_queue(vkctx, s->exec);
- if (err)
- return err;
-
- ff_vk_qf_rotate(&s->qf);
+ s->initialized = 1;
return 0;
fail:
- ff_vk_discard_exec_deps(s->exec);
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
return err;
}
@@ -235,7 +151,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
if (!s->initialized)
RET(init_filter(ctx, in));
- RET(process_frames(ctx, out, in));
+ RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->pl, out, in,
+ s->sampler, NULL, 0));
RET(av_frame_copy_props(out, in));
@@ -259,6 +176,17 @@ fail:
static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx)
{
TransposeVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
+
ff_vk_uninit(&s->vkctx);
s->initialized = 0;
--
2.40.0
[-- Attachment #55: 0074-lavfi-add-bwdif_vulkan.patch --]
[-- Type: text/x-diff, Size: 22884 bytes --]
From 84bf2b5eebccf05c1ecc9f50a488a8add63c6095 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 26 Feb 2023 22:36:51 +0100
Subject: [PATCH 74/97] lavfi: add bwdif_vulkan
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_bwdif_vulkan.c | 416 ++++++++++++++++++++++++++++++++++
4 files changed, 419 insertions(+)
create mode 100644 libavfilter/vf_bwdif_vulkan.c
diff --git a/configure b/configure
index b3732dabe9..9a75b20438 100755
--- a/configure
+++ b/configure
@@ -3637,6 +3637,7 @@ blend_vulkan_filter_deps="vulkan spirv_compiler"
boxblur_filter_deps="gpl"
boxblur_opencl_filter_deps="opencl gpl"
bs2b_filter_deps="libbs2b"
+bwdif_vulkan_filter_deps="vulkan spirv_compiler"
chromaber_vulkan_filter_deps="vulkan spirv_compiler"
colorkey_opencl_filter_deps="opencl"
colormatrix_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 4c386bc158..a92611ec70 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -210,6 +210,7 @@ OBJS-$(CONFIG_BOXBLUR_FILTER) += vf_boxblur.o boxblur.o
OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \
opencl/avgblur.o boxblur.o
OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o yadif_common.o
+OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vf_bwdif_vulkan.o yadif_common.o vulkan.o vulkan_filter.o
OBJS-$(CONFIG_CAS_FILTER) += vf_cas.o
OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vf_chromaber_vulkan.o vulkan.o vulkan_filter.o
OBJS-$(CONFIG_CHROMAHOLD_FILTER) += vf_chromakey.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index d7db46c2af..1193be140c 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -195,6 +195,7 @@ extern const AVFilter ff_vf_bm3d;
extern const AVFilter ff_vf_boxblur;
extern const AVFilter ff_vf_boxblur_opencl;
extern const AVFilter ff_vf_bwdif;
+extern const AVFilter ff_vf_bwdif_vulkan;
extern const AVFilter ff_vf_cas;
extern const AVFilter ff_vf_chromaber_vulkan;
extern const AVFilter ff_vf_chromahold;
diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c
new file mode 100644
index 0000000000..126e852e96
--- /dev/null
+++ b/libavfilter/vf_bwdif_vulkan.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (c) Lynne
+ * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
+ * Copyright (C) 2016 Thomas Mundt <loudmax@yahoo.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/random_seed.h"
+#include "libavutil/opt.h"
+#include "vulkan_filter.h"
+#include "vulkan_spirv.h"
+#include "yadif.h"
+#include "internal.h"
+
+typedef struct BWDIFVulkanContext {
+ YADIFContext yadif;
+ FFVulkanContext vkctx;
+
+ int initialized;
+ FFVkExecPool e;
+ FFVkQueueFamilyCtx qf;
+ VkSampler sampler;
+ FFVulkanPipeline pl;
+ FFVkSPIRVShader shd;
+} BWDIFVulkanContext;
+
+typedef struct BWDIFParameters {
+ int parity;
+ int tff;
+ int current_field;
+} BWDIFParameters;
+
+static const char filter_fn[] = {
+ "const vec4 coef_lf[2] = { vec4(4309), vec4(213), };\n"
+ "const vec4 coef_hf[3] = { vec4(5570), vec4(3801), vec4(1016) };\n"
+ "const vec4 coef_sp[2] = { vec4(5077), vec4(981), };\n"
+ C(0, )
+ C(0, vec4 process_intra(vec4 cur[4]) )
+ C(0, { )
+ C(1, return (coef_sp[0]*(cur[1] + cur[2]) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13); )
+ C(0, } )
+ C(0, )
+ C(0, vec4 process_line(vec4 prev2[5], vec4 prev1[2], vec4 cur[4], vec4 next1[2], vec4 next2[5]) )
+ C(0, { )
+ C(1, vec4 fc = cur[1]; )
+ C(1, vec4 fe = cur[2]; )
+ C(1, vec4 fs = prev2[2] + next2[2]; )
+ C(1, vec4 fd = fs / 2; )
+ C(0, )
+ C(1, vec4 temp_diff[3]; )
+ C(1, temp_diff[0] = abs(prev2[2] - next2[2]); )
+ C(1, temp_diff[1] = (abs(prev1[0] - fc) + abs(prev1[1] - fe)) / 2; )
+ C(1, temp_diff[1] = (abs(next1[0] - fc) + abs(next1[1] - fe)) / 2; )
+ C(1, vec4 diff = max(temp_diff[0] / 2, max(temp_diff[1], temp_diff[2])); )
+ C(1, bvec4 diff_mask = equal(diff, vec4(0)); )
+ C(0, )
+ C(1, vec4 fbs = prev2[1] + next2[1]; )
+ C(1, vec4 ffs = prev2[3] + next2[3]; )
+ C(1, vec4 fb = (fbs / 2) - fc; )
+ C(1, vec4 ff = (ffs / 2) - fe; )
+ C(1, vec4 dc = fd - fc; )
+ C(1, vec4 de = fd - fe; )
+ C(1, vec4 mmax = max(de, max(dc, min(fb, ff))); )
+ C(1, vec4 mmin = min(de, min(dc, max(fb, ff))); )
+ C(1, diff = max(diff, max(mmin, -mmax)); )
+ C(0, )
+" vec4 interpolate_all = (((coef_hf[0]*(fs) - coef_hf[1]*(fbs + ffs) +\n"
+" coef_hf[2]*(prev2[0] + next2[0] + prev2[4] + next2[4])) / 4) +\n"
+" coef_lf[0]*(fc + fe) - coef_lf[1]*(cur[0] + cur[3])) / (1 << 13);\n"
+" vec4 interpolate_cur = (coef_sp[0]*(fc + fe) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13);\n"
+ C(0, )
+ C(1, bvec4 interpolate_cnd1 = greaterThan(abs(fc - fe), temp_diff[0]); )
+ C(1, vec4 dst = mix(interpolate_cur, interpolate_all, interpolate_cnd1); )
+ C(1, return mix(dst, fd, diff_mask); )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx)
+{
+ int err;
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque;
+ BWDIFVulkanContext *s = ctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+ FFVkSPIRVShader *shd;
+ FFVkSPIRVCompiler *spv;
+ FFVulkanDescriptorSetBinding *desc;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ ff_vk_qf_init(vkctx, &s->qf, VK_QUEUE_COMPUTE_BIT);
+ RET(ff_vk_exec_pool_init(vkctx, &s->qf, &s->e, s->qf.nb_queues*4, 0, 0, 0, NULL));
+ RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST));
+ RET(ff_vk_shader_init(&s->pl, &s->shd, "bwdif_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT, 0));
+ shd = &s->shd;
+
+ ff_vk_shader_set_compute_sizes(shd, 1, 64, 1);
+
+ desc = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "prev",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "cur",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "next",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .samplers = DUP_SAMPLER(s->sampler),
+ },
+ {
+ .name = "dst",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+
+ RET(ff_vk_pipeline_descriptor_set_add(vkctx, &s->pl, shd, desc, 4, 0, 0));
+
+ GLSLC(0, layout(push_constant, std430) uniform pushConstants { );
+ GLSLC(1, int parity; );
+ GLSLC(1, int tff; );
+ GLSLC(1, int current_field; );
+ GLSLC(0, }; );
+
+ ff_vk_add_push_constant(&s->pl, 0, sizeof(BWDIFParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ GLSLD( filter_fn );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, vec4 res; );
+ GLSLC(1, ivec2 size; );
+ GLSLC(1, vec4 dcur[4]; );
+ GLSLC(1, vec4 prev1[2]; );
+ GLSLC(1, vec4 next1[2]; );
+ GLSLC(1, vec4 prev2[5]; );
+ GLSLC(1, vec4 next2[5]; );
+ GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLC(1, bool filter_field = ((pos.y ^ parity) & 1) == 1; );
+ GLSLF(1, bool is_intra = filter_field && (current_field == %i); ,YADIF_FIELD_END);
+ GLSLC(1, bool field_parity = (parity ^ tff) != 0; );
+ GLSLC(0, );
+
+ for (int i = 0; i < planes; i++) {
+ GLSLC(0, );
+ GLSLF(1, size = imageSize(dst[%i]); ,i);
+ GLSLC(1, if (!IS_WITHIN(pos, size)) { );
+ GLSLC(2, return; );
+ GLSLC(1, } else if (is_intra) { );
+ GLSLF(2, dcur[0] = texture(cur[%i], pos - ivec2(0, 3)); ,i);
+ GLSLF(2, dcur[1] = texture(cur[%i], pos - ivec2(0, 1)); ,i);
+ GLSLF(2, dcur[2] = texture(cur[%i], pos + ivec2(0, 1)); ,i);
+ GLSLF(2, dcur[3] = texture(cur[%i], pos + ivec2(0, 3)); ,i);
+ GLSLC(0, );
+ GLSLC(2, res = process_intra(dcur); );
+ GLSLF(2, imageStore(dst[%i], pos, res); ,i);
+ GLSLC(1, } else if (filter_field) { );
+ GLSLF(2, dcur[0] = texture(cur[%i], pos - ivec2(0, 3)); ,i);
+ GLSLF(2, dcur[1] = texture(cur[%i], pos - ivec2(0, 1)); ,i);
+ GLSLF(2, dcur[2] = texture(cur[%i], pos + ivec2(0, 1)); ,i);
+ GLSLF(2, dcur[3] = texture(cur[%i], pos + ivec2(0, 3)); ,i);
+ GLSLC(0, );
+ GLSLF(2, prev1[0] = texture(prev[%i], pos - ivec2(0, 1)); ,i);
+ GLSLF(2, prev1[1] = texture(prev[%i], pos + ivec2(0, 1)); ,i);
+ GLSLC(0, );
+ GLSLF(2, next1[0] = texture(next[%i], pos - ivec2(0, 1)); ,i);
+ GLSLF(2, next1[1] = texture(next[%i], pos + ivec2(0, 1)); ,i);
+ GLSLC(0, );
+ GLSLC(2, if (field_parity) { );
+ GLSLF(3, prev2[0] = texture(prev[%i], pos - ivec2(0, 4)); ,i);
+ GLSLF(3, prev2[1] = texture(prev[%i], pos - ivec2(0, 2)); ,i);
+ GLSLF(3, prev2[2] = texture(prev[%i], pos); ,i);
+ GLSLF(3, prev2[3] = texture(prev[%i], pos + ivec2(0, 2)); ,i);
+ GLSLF(3, prev2[4] = texture(prev[%i], pos + ivec2(0, 4)); ,i);
+ GLSLC(0, );
+ GLSLF(3, next2[0] = texture(cur[%i], pos - ivec2(0, 4)); ,i);
+ GLSLF(3, next2[1] = texture(cur[%i], pos - ivec2(0, 2)); ,i);
+ GLSLF(3, next2[2] = texture(cur[%i], pos); ,i);
+ GLSLF(3, next2[3] = texture(cur[%i], pos + ivec2(0, 2)); ,i);
+ GLSLF(3, next2[4] = texture(cur[%i], pos + ivec2(0, 4)); ,i);
+ GLSLC(2, } else { );
+ GLSLF(3, prev2[0] = texture(cur[%i], pos - ivec2(0, 4)); ,i);
+ GLSLF(3, prev2[1] = texture(cur[%i], pos - ivec2(0, 2)); ,i);
+ GLSLF(3, prev2[2] = texture(cur[%i], pos); ,i);
+ GLSLF(3, prev2[3] = texture(cur[%i], pos + ivec2(0, 2)); ,i);
+ GLSLF(3, prev2[4] = texture(cur[%i], pos + ivec2(0, 4)); ,i);
+ GLSLC(0, );
+ GLSLF(3, next2[0] = texture(next[%i], pos - ivec2(0, 4)); ,i);
+ GLSLF(3, next2[1] = texture(next[%i], pos - ivec2(0, 2)); ,i);
+ GLSLF(3, next2[2] = texture(next[%i], pos); ,i);
+ GLSLF(3, next2[3] = texture(next[%i], pos + ivec2(0, 2)); ,i);
+ GLSLF(3, next2[4] = texture(next[%i], pos + ivec2(0, 4)); ,i);
+ GLSLC(2, } );
+ GLSLC(0, );
+ GLSLC(2, res = process_line(prev2, prev1, dcur, next1, next2); );
+ GLSLF(2, imageStore(dst[%i], pos, res); ,i);
+ GLSLC(1, } else { );
+ GLSLF(2, res = texture(cur[%i], pos); ,i);
+ GLSLF(2, imageStore(dst[%i], pos, res); ,i);
+ GLSLC(1, } );
+ }
+
+ GLSLC(0, } );
+
+ RET(spv->compile_shader(spv, ctx, &s->shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_create(vkctx, &s->shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_init_compute_pipeline(vkctx, &s->pl, &s->shd));
+ RET(ff_vk_exec_pipeline_register(vkctx, &s->e, &s->pl));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+ if (spv)
+ spv->uninit(&spv);
+
+ return err;
+}
+
+static void bwdif_vulkan_filter_frame(AVFilterContext *ctx, AVFrame *dst,
+ int parity, int tff)
+{
+ BWDIFVulkanContext *s = ctx->priv;
+ YADIFContext *y = &s->yadif;
+ BWDIFParameters params = {
+ .parity = parity,
+ .tff = tff,
+ .current_field = y->current_field,
+ };
+
+ ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->pl, dst,
+ (AVFrame *[]){ y->prev, y->cur, y->next }, 3,
+ s->sampler, ¶ms, sizeof(params));
+
+ if (y->current_field == YADIF_FIELD_END)
+ y->current_field = YADIF_FIELD_NORMAL;
+}
+
+static void bwdif_vulkan_uninit(AVFilterContext *avctx)
+{
+ BWDIFVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+ FFVulkanFunctions *vk = &vkctx->vkfn;
+
+ ff_vk_exec_pool_free(vkctx, &s->e);
+ ff_vk_pipeline_free(vkctx, &s->pl);
+ ff_vk_shader_free(vkctx, &s->shd);
+
+ if (s->sampler)
+ vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler,
+ vkctx->hwctx->alloc);
+
+ ff_vk_uninit(&s->vkctx);
+
+ s->initialized = 0;
+}
+
+static int bwdif_vulkan_config_input(AVFilterLink *inlink)
+{
+ AVHWFramesContext *input_frames;
+ AVFilterContext *avctx = inlink->dst;
+ BWDIFVulkanContext *s = avctx->priv;
+ FFVulkanContext *vkctx = &s->vkctx;
+
+ if (!inlink->hw_frames_ctx) {
+ av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "hardware frames context on the input.\n");
+ return AVERROR(EINVAL);
+ }
+
+ input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data;
+ if (input_frames->format != AV_PIX_FMT_VULKAN)
+ return AVERROR(EINVAL);
+
+ /* Extract the device and default output format from the first input. */
+ if (avctx->inputs[0] != inlink)
+ return 0;
+
+ /* Save the ref, without reffing it */
+ vkctx->input_frames_ref = inlink->hw_frames_ctx;
+
+ /* Defaults */
+ vkctx->output_format = input_frames->sw_format;
+ vkctx->output_width = input_frames->width;
+ vkctx->output_height = input_frames->height;
+
+ return 0;
+}
+
+static int bwdif_vulkan_config_output(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ BWDIFVulkanContext *s = avctx->priv;
+ YADIFContext *y = &s->yadif;
+ FFVulkanContext *vkctx = &s->vkctx;
+
+ av_buffer_unref(&outlink->hw_frames_ctx);
+
+ err = ff_vk_filter_init_context(avctx, vkctx, vkctx->input_frames_ref,
+ vkctx->output_width, vkctx->output_height,
+ vkctx->output_format);
+ if (err < 0)
+ return err;
+
+ /* For logging */
+ vkctx->class = y->class;
+
+ outlink->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref);
+ if (!outlink->hw_frames_ctx)
+ return AVERROR(ENOMEM);
+
+ outlink->time_base = av_mul_q(avctx->inputs[0]->time_base, (AVRational){1, 2});
+ outlink->w = vkctx->output_width;
+ outlink->h = vkctx->output_height;
+
+ if (y->mode & 1)
+ outlink->frame_rate = av_mul_q(avctx->inputs[0]->frame_rate,
+ (AVRational){2, 1});
+
+ if (outlink->w < 4 || outlink->h < 4) {
+ av_log(avctx, AV_LOG_ERROR, "Video of less than 4 columns or lines is not "
+ "supported\n");
+ return AVERROR(EINVAL);
+ }
+
+ y->csp = av_pix_fmt_desc_get(vkctx->frames->sw_format);
+ y->filter = bwdif_vulkan_filter_frame;
+
+ return init_filter(avctx);
+}
+
+static const AVClass bwdif_vulkan_class = {
+ .class_name = "bwdif_vulkan",
+ .item_name = av_default_item_name,
+ .option = ff_yadif_options,
+ .version = LIBAVUTIL_VERSION_INT,
+ .category = AV_CLASS_CATEGORY_FILTER,
+};
+
+static const AVFilterPad bwdif_vulkan_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = ff_yadif_filter_frame,
+ .config_props = &bwdif_vulkan_config_input,
+ },
+};
+
+static const AVFilterPad bwdif_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .request_frame = ff_yadif_request_frame,
+ .config_props = &bwdif_vulkan_config_output,
+ },
+};
+
+const AVFilter ff_vf_bwdif_vulkan = {
+ .name = "bwdif_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Deinterlace Vulkan frames via bwdif"),
+ .priv_size = sizeof(BWDIFVulkanContext),
+ .init = &ff_vk_filter_init,
+ .uninit = &bwdif_vulkan_uninit,
+ FILTER_INPUTS(bwdif_vulkan_inputs),
+ FILTER_OUTPUTS(bwdif_vulkan_outputs),
+ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN),
+ .priv_class = &bwdif_vulkan_class,
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.40.0
[-- Attachment #56: 0075-avfilter-vf_libplacebo-forward-queue-locking-primiti.patch --]
[-- Type: text/x-diff, Size: 1102 bytes --]
From 317ef8322e0d9109e1109a4b06f38470176ec12a Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 30 Jan 2023 14:18:34 +0100
Subject: [PATCH 75/97] avfilter/vf_libplacebo: forward queue locking
primitives
For thread safety.
---
libavfilter/vf_libplacebo.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c
index ba852de08d..d6afcdab0a 100644
--- a/libavfilter/vf_libplacebo.c
+++ b/libavfilter/vf_libplacebo.c
@@ -289,6 +289,11 @@ static int init_vulkan(AVFilterContext *avctx)
.extensions = hwctx->enabled_dev_extensions,
.num_extensions = hwctx->nb_enabled_dev_extensions,
.features = &hwctx->device_features,
+#if PL_API_VER >= 201
+ .lock_queue = (void (*)(void *, int, int)) hwctx->lock_queue,
+ .unlock_queue = (void (*)(void *, int, int)) hwctx->unlock_queue,
+ .queue_ctx = (void *) avhwctx,
+#endif
.queue_graphics = {
.index = hwctx->queue_family_index,
.count = hwctx->nb_graphics_queues,
--
2.40.0
[-- Attachment #57: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
@ 2023-04-28 13:28 ` Niklas Haas
[not found] ` <NTnyn9K--3-9@lynne.ee-NTnytIf----9>
` (14 subsequent siblings)
15 siblings, 0 replies; 49+ messages in thread
From: Niklas Haas @ 2023-04-28 13:28 UTC (permalink / raw)
To: Ffmpeg Devel
On Mon, 24 Apr 2023 17:56:38 +0200 Lynne <dev@lynne.ee> wrote:
> This is part two of the vulkan patchset, which contains all the
> hwcontext and vulkan.c rewrites, and filtering changes.
>
> 55 patches attached.
The new Vulkan code looks like a clear improvement over the status quo
to me. I tested it locally and everything works as expected. The new
synchronization code LGTM overall.
As noted over IRC I think it specifies unnecessarily strict
execution/memory dependencies, though I think it's better to merge a
working state first than to play the microoptimization game at this
stage. (It's also worth noting that this code is still significantly
slower than vf_libplacebo's native hwupload/hwdownload primitives, for
as-of-yet-to-be-determined reasons)
That said, I'm still in favor of this subset of your patchset being
merged soon, since it fixes major issues that prevent the current Vulkan
code on master from working properly with mpv/libplacebo.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
[not found] ` <NTnyn9K--3-9@lynne.ee-NTnytIf----9>
@ 2023-05-10 19:10 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-10 19:10 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 369 bytes --]
Apr 24, 2023, 17:57 by dev@lynne.ee:
> This is part two of the vulkan patchset, which contains all the
> hwcontext and vulkan.c rewrites, and filtering changes.
>
> 55 patches attached.
>
Final patchset pushed here https://github.com/cyanreg/FFmpeg/tree/vulkan
and attached as a single .tar.gz file, as it's a few Kb larger than the maximum
attachment size of 1MiB.
[-- Attachment #2: vulkan_patchset.tar.gz --]
[-- Type: application/gzip, Size: 212484 bytes --]
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
2023-04-28 13:28 ` Niklas Haas
[not found] ` <NTnyn9K--3-9@lynne.ee-NTnytIf----9>
@ 2023-05-11 15:36 ` Anton Khirnov
2023-05-11 16:32 ` Lynne
2023-05-11 16:03 ` Anton Khirnov
` (12 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 15:36 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From 1de5bf4281b19847fc45556431850d772180269e Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 15:15:04 +0100
> Subject: [PATCH 23/97] hwcontext_vulkan: initialize and require instance
> version 1.3
Some comments on why is this needed and what it implies would be nice.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (2 preceding siblings ...)
2023-05-11 15:36 ` Anton Khirnov
@ 2023-05-11 16:03 ` Anton Khirnov
2023-05-11 18:55 ` Lynne
2023-05-11 16:05 ` Anton Khirnov
` (11 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:03 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Tue, 15 Mar 2022 23:00:32 +0100
> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
> operations
>
> ---
> libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
> libavutil/hwcontext_vulkan.h | 40 +++++++-
> 2 files changed, 167 insertions(+), 49 deletions(-)
>
> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
> index 894b4b83f3..b0db59b2d8 100644
> --- a/libavutil/hwcontext_vulkan.c
> +++ b/libavutil/hwcontext_vulkan.c
> @@ -27,6 +27,7 @@
> #include <dlfcn.h>
> #endif
>
> +#include <pthread.h>
> #include <unistd.h>
>
> #include "config.h"
> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
> VkPhysicalDeviceVulkan13Features device_features_1_3;
>
> /* Queues */
> - uint32_t qfs[5];
> - int num_qfs;
> + pthread_mutex_t **qf_mutex;
> + int nb_tot_qfs;
> + uint32_t img_qfs[5];
> + int nb_img_qfs;
This patch would be so much more readable without random renamings.
> /* Debug callback */
> VkDebugUtilsMessengerEXT debug_ctx;
> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
> } VulkanFramesPriv;
>
> typedef struct AVVkFrameInternal {
> + pthread_mutex_t update_mutex;
As far as I can see, none of the mutices you're adding here are
ever destroyed.
> +
> #if CONFIG_CUDA
> /* Importing external memory into cuda is really expensive so we keep the
> * memory imported all the time */
> @@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
> if (p->libvulkan)
> dlclose(p->libvulkan);
>
> + for (int i = 0; i < p->nb_tot_qfs; i++)
> + av_freep(&p->qf_mutex[i]);
> + av_freep(&p->qf_mutex);
> +
> RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
> RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
> }
> @@ -1436,13 +1445,26 @@ end:
> return err;
> }
>
> +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
It'd be nice to be consistent with types.
These are uint32 in vulkan, no?
> +{
> + VulkanDevicePriv *p = ctx->internal->priv;
> + pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
> +}
> +
> +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
> +{
> + VulkanDevicePriv *p = ctx->internal->priv;
> + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
> +}
> +
> static int vulkan_device_init(AVHWDeviceContext *ctx)
> {
> int err;
> - uint32_t queue_num;
> + uint32_t qf_num;
> AVVulkanDeviceContext *hwctx = ctx->hwctx;
> VulkanDevicePriv *p = ctx->internal->priv;
> FFVulkanFunctions *vk = &p->vkfn;
> + VkQueueFamilyProperties *qf;
> int graph_index, comp_index, tx_index, enc_index, dec_index;
>
> /* Set device extension flags */
> @@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
> p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
> p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
>
> - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
> - if (!queue_num) {
> + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
> + if (!qf_num) {
> av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
> return AVERROR_EXTERNAL;
> }
>
> + qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
> + if (!qf)
> + return AVERROR(ENOMEM);
> +
> + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
> +
> + p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
av_calloc()
> + if (!p->qf_mutex)
> + return AVERROR(ENOMEM);
> + p->nb_tot_qfs = qf_num;
> +
> + for (int i = 0; i < qf_num; i++) {
> + p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
av_calloc()
> + if (!p->qf_mutex[i])
> + return AVERROR(ENOMEM);
> + for (int j = 0; j < qf[i].queueCount; j++)
> + pthread_mutex_init(&p->qf_mutex[i][j], NULL);
Should be checked.
> + }
> +
> graph_index = hwctx->queue_family_index;
> comp_index = hwctx->queue_family_comp_index;
> tx_index = hwctx->queue_family_tx_index;
> @@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
> return AVERROR(EINVAL); \
> } else if (fidx < 0 || ctx_qf < 0) { \
> break; \
> - } else if (ctx_qf >= queue_num) { \
> + } else if (ctx_qf >= qf_num) { \
> av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
> - type, ctx_qf, queue_num); \
> + type, ctx_qf, qf_num); \
> return AVERROR(EINVAL); \
> } \
> \
> @@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
> tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
> enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
> dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
> - p->qfs[p->num_qfs++] = ctx_qf; \
> + p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
> } while (0)
>
> CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
> @@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>
> #undef CHECK_QUEUE
>
> + if (!hwctx->lock_queue)
> + hwctx->lock_queue = lock_queue;
> + if (!hwctx->unlock_queue)
> + hwctx->unlock_queue = unlock_queue;
> +
> /* Get device capabilities */
> vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
>
> @@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
> {
> AVVkFrameInternal *internal = f->internal;
>
> - if (!internal)
> - return;
> -
> #if CONFIG_CUDA
> if (internal->cuda_fc_ref) {
> AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
> @@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
> uint32_t src_qf, dst_qf;
> VkImageLayout new_layout;
> VkAccessFlags new_access;
> + AVVulkanFramesContext *vkfc = hwfc->hwctx;
> const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
> VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
> FFVulkanFunctions *vk = &p->vkfn;
> + AVFrame tmp = { .data[0] = (uint8_t *)frame };
???
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (3 preceding siblings ...)
2023-05-11 16:03 ` Anton Khirnov
@ 2023-05-11 16:05 ` Anton Khirnov
2023-05-11 16:40 ` Lynne
2023-05-11 16:06 ` Anton Khirnov
` (10 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:05 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From c50347a552f5c7c2e3fcf20ef9a1ad4f1a419918 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 20:32:49 +0100
> Subject: [PATCH 27/97] hwcontext_vulkan: remove contiguous memory hack
>
This needs a lot more context.
What was the hack, why was it added, why is it being removed and what
will be affected by that.
> diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
> index 406d8709c3..e89fa52927 100644
> --- a/libavutil/hwcontext_vulkan.h
> +++ b/libavutil/hwcontext_vulkan.h
> @@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags {
> * device and tiling during av_hwframe_ctx_init(). */
> AV_VK_FRAME_FLAG_NONE = (1ULL << 0),
>
> - /* Image planes will be allocated in a single VkDeviceMemory, rather
> - * than as per-plane VkDeviceMemory allocations. Required for exporting
> - * to VAAPI on Intel devices. */
> + /* DEPRECATED: does nothing. */
> AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),
If it's deprecated then it should actually be deprecated.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (4 preceding siblings ...)
2023-05-11 16:05 ` Anton Khirnov
@ 2023-05-11 16:06 ` Anton Khirnov
2023-05-11 16:45 ` Lynne
2023-05-11 16:14 ` Anton Khirnov
` (9 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:06 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From 287ec5138511a4760f2c66e94bd80f794cd9f7a3 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 20:35:51 +0100
> Subject: [PATCH 28/97] hwcontext_vulkan: rename vk_pixfmt_map to
> vk_pixfmt_planar_map
>
> ---
> libavutil/hwcontext_vulkan.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
> index 67b4357dd1..9eacbb4d2e 100644
> --- a/libavutil/hwcontext_vulkan.c
> +++ b/libavutil/hwcontext_vulkan.c
> @@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal {
>
> static const struct {
> enum AVPixelFormat pixfmt;
> - const VkFormat vkfmts[4];
> -} vk_pixfmt_map[] = {
> + const VkFormat vkfmts[5];
???
This is not a rename.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (5 preceding siblings ...)
2023-05-11 16:06 ` Anton Khirnov
@ 2023-05-11 16:14 ` Anton Khirnov
2023-05-11 16:47 ` Lynne
2023-05-11 16:15 ` Anton Khirnov
` (8 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:14 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From 956f043e9f233675856336e028cc8ee7e35c71f5 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 14:04:28 +0100
> Subject: [PATCH 38/97] vulkan: lock queues before submitting operations
>
> ---
> libavutil/vulkan.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> index 6bf2c214b7..ad13b8f3cb 100644
> --- a/libavutil/vulkan.c
> +++ b/libavutil/vulkan.c
> @@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
> return AVERROR_EXTERNAL;
> }
>
> + s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
> + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
> +
> ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
> +
> + s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
> + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
> +
Should this patch be right after the one that adds these functions?
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (6 preceding siblings ...)
2023-05-11 16:14 ` Anton Khirnov
@ 2023-05-11 16:15 ` Anton Khirnov
2023-05-11 16:50 ` Lynne
2023-05-11 16:21 ` Anton Khirnov
` (7 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:15 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From d81aa7b001995a8cf65590934a7b75a51a63b192 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 14:04:48 +0100
> Subject: [PATCH 39/97] vulkan: define VK_NO_PROTOTYPES
Some context on what this does and why is it needed would be nice.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (7 preceding siblings ...)
2023-05-11 16:15 ` Anton Khirnov
@ 2023-05-11 16:21 ` Anton Khirnov
2023-05-11 18:58 ` Lynne
2023-05-11 16:29 ` Anton Khirnov
` (6 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:21 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Thu, 15 Dec 2022 17:43:27 +0100
> Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
>
> ---
> libavutil/vulkan.c | 5 +++--
> libavutil/vulkan.h | 3 ++-
> 2 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> index b1553c6537..0bb5b1eebf 100644
> --- a/libavutil/vulkan.c
> +++ b/libavutil/vulkan.c
> @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
> return 0;
> }
>
> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
> + void *pNext, void *alloc_pNext,
> VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
> {
> int err;
> @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
> };
> VkMemoryDedicatedAllocateInfo ded_alloc = {
> .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
> - .pNext = NULL,
> + .pNext = alloc_pNext,
> };
> VkMemoryDedicatedRequirements ded_req = {
> .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> index 85836a7807..d75be26977 100644
> --- a/libavutil/vulkan.h
> +++ b/libavutil/vulkan.h
> @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
> /**
> * Create a VkBuffer with the specified parameters.
> */
> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
> + void *pNext, void *alloc_pNext,
> VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
Shouldn't you be updating all the callers of this function here?
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (8 preceding siblings ...)
2023-05-11 16:21 ` Anton Khirnov
@ 2023-05-11 16:29 ` Anton Khirnov
2023-05-11 18:13 ` Lynne
2023-05-11 16:34 ` Anton Khirnov
` (5 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:29 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From 786a7d08bc90a88f77057fc31d0943dcb91e4558 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Thu, 22 Dec 2022 17:37:51 +0100
> Subject: [PATCH 53/97] vulkan: add support for retrieving queue, query and
> video properties
>
> ---
> libavutil/vulkan.c | 87 ++++++++++++++++++++++++++++++------
> libavutil/vulkan.h | 14 ++++--
> libavutil/vulkan_functions.h | 1 +
> 3 files changed, 85 insertions(+), 17 deletions(-)
>
> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> index de0c300c0e..d045ff83c1 100644
> --- a/libavutil/vulkan.c
> +++ b/libavutil/vulkan.c
> @@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res)
> #undef CASE
> }
>
> -void ff_vk_load_props(FFVulkanContext *s)
> +int ff_vk_load_props(FFVulkanContext *s)
> {
> + uint32_t qc = 0;
> FFVulkanFunctions *vk = &s->vkfn;
>
> s->driver_props = (VkPhysicalDeviceDriverProperties) {
> @@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s)
> .pNext = &s->driver_props,
> };
>
> +
> vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
> vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
> + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
> +
> + if (s->qf_props)
> + return 0;
> +
> + s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc);
av_calloc()
Also, wouldn't it be better to allocate a single array of
{ qf_props; query_props; video_props; }
> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> index 4bd1c9fc00..4c38dbc2e6 100644
> --- a/libavutil/vulkan.h
> +++ b/libavutil/vulkan.h
> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
> VkPhysicalDeviceProperties2 props;
> VkPhysicalDeviceDriverProperties driver_props;
> VkPhysicalDeviceMemoryProperties mprops;
> + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
> + VkQueueFamilyVideoPropertiesKHR *video_props;
> + VkQueueFamilyProperties2 *qf_props;
How does the user of these know how many elements are in each array?
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 15:36 ` Anton Khirnov
@ 2023-05-11 16:32 ` Lynne
2023-05-11 16:59 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-11 16:32 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 17:36 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 1de5bf4281b19847fc45556431850d772180269e Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 15:15:04 +0100
>> Subject: [PATCH 23/97] hwcontext_vulkan: initialize and require instance
>> version 1.3
>>
>
> Some comments on why is this needed and what it implies would be nice.
>
This just bumps the required loader library version (libvulkan).
All device-related features, such as video decoding, atomics, etc.
are still optional and the code deals with their loss on a local level
(e.g. the decoder or filter checks for the features it needs, not
the hwcontext).
Bumping the required version essentially packs all maintenance
extensions which correct the spec rather than requiring to enable
them individually.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (9 preceding siblings ...)
2023-05-11 16:29 ` Anton Khirnov
@ 2023-05-11 16:34 ` Anton Khirnov
2023-05-11 17:12 ` Lynne
2023-05-11 16:34 ` Anton Khirnov
` (4 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:34 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From 6b5301aa29b63b90d04505c9386822b2e207a038 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Thu, 29 Dec 2022 21:16:21 +0100
> Subject: [PATCH 55/97] vulkan: rewrite to support all necessary features
>
> ---
> libavutil/vulkan.c | 2145 ++++++++++++++++++----------------
> libavutil/vulkan.h | 515 ++++----
> libavutil/vulkan_functions.h | 1 +
> 3 files changed, 1344 insertions(+), 1317 deletions(-)
lol
We stopped doing development like this 15 years ago.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (10 preceding siblings ...)
2023-05-11 16:34 ` Anton Khirnov
@ 2023-05-11 16:34 ` Anton Khirnov
2023-05-11 17:16 ` Lynne
2023-05-11 16:40 ` Anton Khirnov
` (3 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:34 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> From 89e47afc304aaf01c9c25a328ddfde37873e1f89 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 11 Jan 2023 09:37:35 +0100
> Subject: [PATCH 59/97] hwcontext_vulkan: rewrite to support multiplane
> surfaces
>
> ---
> libavutil/hwcontext_vulkan.c | 791 +++++++++++++++++++----------------
> libavutil/hwcontext_vulkan.h | 73 ++--
> 2 files changed, 474 insertions(+), 390 deletions(-)
Again, lol. Not to menion an ABI break.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (11 preceding siblings ...)
2023-05-11 16:34 ` Anton Khirnov
@ 2023-05-11 16:40 ` Anton Khirnov
2023-05-11 17:20 ` Lynne
2023-05-18 8:34 ` [FFmpeg-devel] libavcodec: add Vulkan common video code Anton Khirnov
` (2 subsequent siblings)
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:40 UTC (permalink / raw)
To: Ffmpeg Devel
Quoting Lynne (2023-04-24 17:56:38)
> @@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
> #endif
> #endif
> default:
> - return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
> + break;
This seems like it's also removing the ability to map to memory at all.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:05 ` Anton Khirnov
@ 2023-05-11 16:40 ` Lynne
2023-05-11 17:00 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-11 16:40 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:05 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From c50347a552f5c7c2e3fcf20ef9a1ad4f1a419918 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 20:32:49 +0100
>> Subject: [PATCH 27/97] hwcontext_vulkan: remove contiguous memory hack
>>
>
> This needs a lot more context.
>
> What was the hack, why was it added, why is it being removed and what
> will be affected by that.
>
The hack was added to enable exporting of vulkan images to DRM.
On Intel hardware, specifically for DRM images, all planes must be
allocated next to each other, due to hardware limitation, so the hack
used a single large allocation and suballocated all planes from it.
By natively supporting multiplane images, the driver is what decides
the layout, so exporting just works.
It's a hack because it conflicted heavily with image allocation, and
with the whole ecosystem in general, before multiplane images were
supported, which just made it redundant.
This is also the commit which broke the hwcontext hardest and prompted
the entire rewrite in the first place.
>> diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
>> index 406d8709c3..e89fa52927 100644
>> --- a/libavutil/hwcontext_vulkan.h
>> +++ b/libavutil/hwcontext_vulkan.h
>> @@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags {
>> * device and tiling during av_hwframe_ctx_init(). */
>> AV_VK_FRAME_FLAG_NONE = (1ULL << 0),
>>
>> - /* Image planes will be allocated in a single VkDeviceMemory, rather
>> - * than as per-plane VkDeviceMemory allocations. Required for exporting
>> - * to VAAPI on Intel devices. */
>> + /* DEPRECATED: does nothing. */
>> AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),
>>
>
> If it's deprecated then it should actually be deprecated.
>
Done.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:06 ` Anton Khirnov
@ 2023-05-11 16:45 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-11 16:45 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:06 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 287ec5138511a4760f2c66e94bd80f794cd9f7a3 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 20:35:51 +0100
>> Subject: [PATCH 28/97] hwcontext_vulkan: rename vk_pixfmt_map to
>> vk_pixfmt_planar_map
>>
>> ---
>> libavutil/hwcontext_vulkan.c | 10 +++++-----
>> 1 file changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> index 67b4357dd1..9eacbb4d2e 100644
>> --- a/libavutil/hwcontext_vulkan.c
>> +++ b/libavutil/hwcontext_vulkan.c
>> @@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal {
>>
>> static const struct {
>> enum AVPixelFormat pixfmt;
>> - const VkFormat vkfmts[4];
>> -} vk_pixfmt_map[] = {
>> + const VkFormat vkfmts[5];
>>
>
> ???
> This is not a rename.
>
fixed
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:14 ` Anton Khirnov
@ 2023-05-11 16:47 ` Lynne
2023-05-11 17:13 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-11 16:47 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:14 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 956f043e9f233675856336e028cc8ee7e35c71f5 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 14:04:28 +0100
>> Subject: [PATCH 38/97] vulkan: lock queues before submitting operations
>>
>> ---
>> libavutil/vulkan.c | 7 +++++++
>> 1 file changed, 7 insertions(+)
>>
>> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> index 6bf2c214b7..ad13b8f3cb 100644
>> --- a/libavutil/vulkan.c
>> +++ b/libavutil/vulkan.c
>> @@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
>> return AVERROR_EXTERNAL;
>> }
>>
>> + s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
>> + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
>> +
>> ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
>> +
>> + s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
>> + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
>> +
>>
>
> Should this patch be right after the one that adds these functions?
>
Yes. The patch before added support for them to the hwcontext.
This patch uses them in vulkan.c, which, at the given position in
the patchset is still an independent component only used for lavfi.
I can squash them, but I'd prefer not to, though no strong feelings
about it.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:15 ` Anton Khirnov
@ 2023-05-11 16:50 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-11 16:50 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:15 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From d81aa7b001995a8cf65590934a7b75a51a63b192 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 14:04:48 +0100
>> Subject: [PATCH 39/97] vulkan: define VK_NO_PROTOTYPES
>>
>
> Some context on what this does and why is it needed would be nice.
>
This just disables the vulkan headers from defining any symbols
like vkCmdPipelineBarrier2(). Instead, all functions must be loaded
via the loader and used as function pointers as vk->CmdPipelineBarrier2.Mostly just forces developers to write correct code, as using the
symbols can be undesirable in case API users define their own
function wrappers via the loader API.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:32 ` Lynne
@ 2023-05-11 16:59 ` Anton Khirnov
0 siblings, 0 replies; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 16:59 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-11 18:32:48)
> May 11, 2023, 17:36 by anton@khirnov.net:
>
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From 1de5bf4281b19847fc45556431850d772180269e Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Wed, 23 Nov 2022 15:15:04 +0100
> >> Subject: [PATCH 23/97] hwcontext_vulkan: initialize and require instance
> >> version 1.3
> >>
> >
> > Some comments on why is this needed and what it implies would be nice.
> >
>
> This just bumps the required loader library version (libvulkan).
> All device-related features, such as video decoding, atomics, etc.
> are still optional and the code deals with their loss on a local level
> (e.g. the decoder or filter checks for the features it needs, not
> the hwcontext).
> Bumping the required version essentially packs all maintenance
> extensions which correct the spec rather than requiring to enable
> them individually.
So just write in the commit message a short comment on what this gives
us and why is it worth forcing the users to update their loader.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:40 ` Lynne
@ 2023-05-11 17:00 ` Anton Khirnov
0 siblings, 0 replies; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 17:00 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-11 18:40:54)
> May 11, 2023, 18:05 by anton@khirnov.net:
>
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From c50347a552f5c7c2e3fcf20ef9a1ad4f1a419918 Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Wed, 23 Nov 2022 20:32:49 +0100
> >> Subject: [PATCH 27/97] hwcontext_vulkan: remove contiguous memory hack
> >>
> >
> > This needs a lot more context.
> >
> > What was the hack, why was it added, why is it being removed and what
> > will be affected by that.
> >
>
> The hack was added to enable exporting of vulkan images to DRM.
> On Intel hardware, specifically for DRM images, all planes must be
> allocated next to each other, due to hardware limitation, so the hack
> used a single large allocation and suballocated all planes from it.
>
> By natively supporting multiplane images, the driver is what decides
> the layout, so exporting just works.
>
> It's a hack because it conflicted heavily with image allocation, and
> with the whole ecosystem in general, before multiplane images were
> supported, which just made it redundant.
>
> This is also the commit which broke the hwcontext hardest and prompted
> the entire rewrite in the first place.
So take this paragraph and put it in the commit message.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:34 ` Anton Khirnov
@ 2023-05-11 17:12 ` Lynne
2023-05-11 17:19 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-11 17:12 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:34 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 6b5301aa29b63b90d04505c9386822b2e207a038 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Thu, 29 Dec 2022 21:16:21 +0100
>> Subject: [PATCH 55/97] vulkan: rewrite to support all necessary features
>>
>> ---
>> libavutil/vulkan.c | 2145 ++++++++++++++++++----------------
>> libavutil/vulkan.h | 515 ++++----
>> libavutil/vulkan_functions.h | 1 +
>> 3 files changed, 1344 insertions(+), 1317 deletions(-)
>>
>
> lol
>
> We stopped doing development like this 15 years ago.
>
First, I'm criticized for having too many patches. Now, I'm criticized for
having too few. There's no winning.
This touches EVERYTHING, and so it's titled appropriately. If you dislike
the word in the commit message, I can change it to something with
more marketing impact.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:47 ` Lynne
@ 2023-05-11 17:13 ` Anton Khirnov
0 siblings, 0 replies; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 17:13 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-11 18:47:46)
> May 11, 2023, 18:14 by anton@khirnov.net:
>
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From 956f043e9f233675856336e028cc8ee7e35c71f5 Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Wed, 23 Nov 2022 14:04:28 +0100
> >> Subject: [PATCH 38/97] vulkan: lock queues before submitting operations
> >>
> >> ---
> >> libavutil/vulkan.c | 7 +++++++
> >> 1 file changed, 7 insertions(+)
> >>
> >> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> >> index 6bf2c214b7..ad13b8f3cb 100644
> >> --- a/libavutil/vulkan.c
> >> +++ b/libavutil/vulkan.c
> >> @@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
> >> return AVERROR_EXTERNAL;
> >> }
> >>
> >> + s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
> >> + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
> >> +
> >> ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
> >> +
> >> + s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
> >> + e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
> >> +
> >>
> >
> > Should this patch be right after the one that adds these functions?
> >
>
> Yes. The patch before added support for them to the hwcontext.
> This patch uses them in vulkan.c, which, at the given position in
> the patchset is still an independent component only used for lavfi.
> I can squash them, but I'd prefer not to, though no strong feelings
> about it.
I'm not asking you to squash it, I'm asking you to reorder it so that it
immediately follows the patch that adds the locking functions, and isn't
ordered 12 patches later for no apparent reason.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:34 ` Anton Khirnov
@ 2023-05-11 17:16 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-11 17:16 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:34 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 89e47afc304aaf01c9c25a328ddfde37873e1f89 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 11 Jan 2023 09:37:35 +0100
>> Subject: [PATCH 59/97] hwcontext_vulkan: rewrite to support multiplane
>> surfaces
>>
>> ---
>> libavutil/hwcontext_vulkan.c | 791 +++++++++++++++++++----------------
>> libavutil/hwcontext_vulkan.h | 73 ++--
>> 2 files changed, 474 insertions(+), 390 deletions(-)
>>
>
> Again, lol. Not to menion an ABI break.
>
I admit, it's an ABI break as it adds fields introduced before
a few commits prior in the same patch series.
But given that the patchset as a whole does not break the
ABI, I'd like to keep this, as the order is more appropriate,
and swapping the order of the two commits is impossible
without squashing literally every single commit in between
into a giant patch whose commit message starts with a re-
word.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 17:12 ` Lynne
@ 2023-05-11 17:19 ` Anton Khirnov
0 siblings, 0 replies; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 17:19 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-11 19:12:56)
> May 11, 2023, 18:34 by anton@khirnov.net:
>
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From 6b5301aa29b63b90d04505c9386822b2e207a038 Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Thu, 29 Dec 2022 21:16:21 +0100
> >> Subject: [PATCH 55/97] vulkan: rewrite to support all necessary features
> >>
> >> ---
> >> libavutil/vulkan.c | 2145 ++++++++++++++++++----------------
> >> libavutil/vulkan.h | 515 ++++----
> >> libavutil/vulkan_functions.h | 1 +
> >> 3 files changed, 1344 insertions(+), 1317 deletions(-)
> >>
> >
> > lol
> >
> > We stopped doing development like this 15 years ago.
> >
>
> First, I'm criticized for having too many patches. Now, I'm criticized for
> having too few. There's no winning.
I have no issue with too many patches - my work on ffmpeg CLI is over
200 commits this year alone. Small patches are reviewable, this
horrorshow is not.
> This touches EVERYTHING, and so it's titled appropriately. If you dislike
> the word in the commit message, I can change it to something with
> more marketing impact.
No, I dislike the fact that there's a giant uberpatch that rewrites
everything with zero explanation or justification.
Ideally, it should be a series of small individually reviewable changes.
If that is for some reason not feasible, the commit message should
contain
* good justification for why it is not feasible
* detailed explanation on what exactly is being done and why
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:40 ` Anton Khirnov
@ 2023-05-11 17:20 ` Lynne
2023-05-11 17:27 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-11 17:20 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:40 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> @@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
>> #endif
>> #endif
>> default:
>> - return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
>> + break;
>>
>
> This seems like it's also removing the ability to map to memory at all.
>
It is. Due to the driver deciding the layout of multiplane images
(which are used by default), it's not spec-valid to map the memory
used. Rather than keeping complicated code which receives no
use at all, I decided to remove it.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 17:20 ` Lynne
@ 2023-05-11 17:27 ` Anton Khirnov
2023-05-11 19:11 ` Lynne
0 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-11 17:27 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-11 19:20:45)
> May 11, 2023, 18:40 by anton@khirnov.net:
>
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> @@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
> >> #endif
> >> #endif
> >> default:
> >> - return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
> >> + break;
> >>
> >
> > This seems like it's also removing the ability to map to memory at all.
> >
>
> It is. Due to the driver deciding the layout of multiplane images
> (which are used by default), it's not spec-valid to map the memory
> used. Rather than keeping complicated code which receives no
> use at all, I decided to remove it.
That should be stated more clearly in the commit message then, along
with the reason for dropping it.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:29 ` Anton Khirnov
@ 2023-05-11 18:13 ` Lynne
2023-05-16 13:40 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-11 18:13 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:30 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 786a7d08bc90a88f77057fc31d0943dcb91e4558 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Thu, 22 Dec 2022 17:37:51 +0100
>> Subject: [PATCH 53/97] vulkan: add support for retrieving queue, query and
>> video properties
>>
>> ---
>> libavutil/vulkan.c | 87 ++++++++++++++++++++++++++++++------
>> libavutil/vulkan.h | 14 ++++--
>> libavutil/vulkan_functions.h | 1 +
>> 3 files changed, 85 insertions(+), 17 deletions(-)
>>
>> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> index de0c300c0e..d045ff83c1 100644
>> --- a/libavutil/vulkan.c
>> +++ b/libavutil/vulkan.c
>> @@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res)
>> #undef CASE
>> }
>>
>> -void ff_vk_load_props(FFVulkanContext *s)
>> +int ff_vk_load_props(FFVulkanContext *s)
>> {
>> + uint32_t qc = 0;
>> FFVulkanFunctions *vk = &s->vkfn;
>>
>> s->driver_props = (VkPhysicalDeviceDriverProperties) {
>> @@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s)
>> .pNext = &s->driver_props,
>> };
>>
>> +
>> vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
>> vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
>> + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
>> +
>> + if (s->qf_props)
>> + return 0;
>> +
>> + s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc);
>>
>
> av_calloc()
>
Fixed (for all 3).
>
> Also, wouldn't it be better to allocate a single array of
> { qf_props; query_props; video_props; }
>
No, the way they're read requires the array to be contiguous.
>> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> index 4bd1c9fc00..4c38dbc2e6 100644
>> --- a/libavutil/vulkan.h
>> +++ b/libavutil/vulkan.h
>> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
>> VkPhysicalDeviceProperties2 props;
>> VkPhysicalDeviceDriverProperties driver_props;
>> VkPhysicalDeviceMemoryProperties mprops;
>> + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
>> + VkQueueFamilyVideoPropertiesKHR *video_props;
>> + VkQueueFamilyProperties2 *qf_props;
>>
>
> How does the user of these know how many elements are in each array?
>
They don't have to, we read the total number of queues available
for the device, so all indices are always available.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:03 ` Anton Khirnov
@ 2023-05-11 18:55 ` Lynne
2023-05-16 13:31 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-11 18:55 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:04 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Tue, 15 Mar 2022 23:00:32 +0100
>> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
>> operations
>>
>> ---
>> libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
>> libavutil/hwcontext_vulkan.h | 40 +++++++-
>> 2 files changed, 167 insertions(+), 49 deletions(-)
>>
>> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> index 894b4b83f3..b0db59b2d8 100644
>> --- a/libavutil/hwcontext_vulkan.c
>> +++ b/libavutil/hwcontext_vulkan.c
>> @@ -27,6 +27,7 @@
>> #include <dlfcn.h>
>> #endif
>>
>> +#include <pthread.h>
>> #include <unistd.h>
>>
>> #include "config.h"
>> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
>> VkPhysicalDeviceVulkan13Features device_features_1_3;
>>
>> /* Queues */
>> - uint32_t qfs[5];
>> - int num_qfs;
>> + pthread_mutex_t **qf_mutex;
>> + int nb_tot_qfs;
>> + uint32_t img_qfs[5];
>> + int nb_img_qfs;
>>
>
> This patch would be so much more readable without random renamings.
>
They're not random, the meaning of each variable is different
to what they meant before.
nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver
>> /* Debug callback */
>> VkDebugUtilsMessengerEXT debug_ctx;
>> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
>> } VulkanFramesPriv;
>>
>> typedef struct AVVkFrameInternal {
>> + pthread_mutex_t update_mutex;
>>
>
> As far as I can see, none of the mutices you're adding here are
> ever destroyed.
>
Fixed.
>> +
>> #if CONFIG_CUDA
>> /* Importing external memory into cuda is really expensive so we keep the
>> * memory imported all the time */
>> @@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
>> if (p->libvulkan)
>> dlclose(p->libvulkan);
>>
>> + for (int i = 0; i < p->nb_tot_qfs; i++)
>> + av_freep(&p->qf_mutex[i]);
>> + av_freep(&p->qf_mutex);
>> +
>> RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
>> RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
>> }
>> @@ -1436,13 +1445,26 @@ end:
>> return err;
>> }
>>
>> +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
>>
>
> It'd be nice to be consistent with types.
> These are uint32 in vulkan, no?
>
Fixed. Though, they're more closely related to the
number of queue families given in the hwcontext, which
are 32-bit ints.
>> +{
>> + VulkanDevicePriv *p = ctx->internal->priv;
>> + pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
>> +}
>> +
>> +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
>> +{
>> + VulkanDevicePriv *p = ctx->internal->priv;
>> + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
>> +}
>> +
>> static int vulkan_device_init(AVHWDeviceContext *ctx)
>> {
>> int err;
>> - uint32_t queue_num;
>> + uint32_t qf_num;
>> AVVulkanDeviceContext *hwctx = ctx->hwctx;
>> VulkanDevicePriv *p = ctx->internal->priv;
>> FFVulkanFunctions *vk = &p->vkfn;
>> + VkQueueFamilyProperties *qf;
>> int graph_index, comp_index, tx_index, enc_index, dec_index;
>>
>> /* Set device extension flags */
>> @@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>> p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
>> p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
>>
>> - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
>> - if (!queue_num) {
>> + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
>> + if (!qf_num) {
>> av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
>> return AVERROR_EXTERNAL;
>> }
>>
>> + qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
>> + if (!qf)
>> + return AVERROR(ENOMEM);
>> +
>> + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
>> +
>> + p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
>>
>
> av_calloc()
>
>> + if (!p->qf_mutex)
>> + return AVERROR(ENOMEM);
>> + p->nb_tot_qfs = qf_num;
>> +
>> + for (int i = 0; i < qf_num; i++) {
>> + p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
>>
>
> av_calloc()
>
>> + if (!p->qf_mutex[i])
>> + return AVERROR(ENOMEM);
>> + for (int j = 0; j < qf[i].queueCount; j++)
>> + pthread_mutex_init(&p->qf_mutex[i][j], NULL);
>>
>
> Should be checked.
>
Fixed all three.
>> + }
>> +
>> graph_index = hwctx->queue_family_index;
>> comp_index = hwctx->queue_family_comp_index;
>> tx_index = hwctx->queue_family_tx_index;
>> @@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>> return AVERROR(EINVAL); \
>> } else if (fidx < 0 || ctx_qf < 0) { \
>> break; \
>> - } else if (ctx_qf >= queue_num) { \
>> + } else if (ctx_qf >= qf_num) { \
>> av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
>> - type, ctx_qf, queue_num); \
>> + type, ctx_qf, qf_num); \
>> return AVERROR(EINVAL); \
>> } \
>> \
>> @@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>> tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
>> enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
>> dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
>> - p->qfs[p->num_qfs++] = ctx_qf; \
>> + p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
>> } while (0)
>>
>> CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
>> @@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>
>> #undef CHECK_QUEUE
>>
>> + if (!hwctx->lock_queue)
>> + hwctx->lock_queue = lock_queue;
>> + if (!hwctx->unlock_queue)
>> + hwctx->unlock_queue = unlock_queue;
>> +
>> /* Get device capabilities */
>> vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
>>
>> @@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
>> {
>> AVVkFrameInternal *internal = f->internal;
>>
>> - if (!internal)
>> - return;
>> -
>> #if CONFIG_CUDA
>> if (internal->cuda_fc_ref) {
>> AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
>> @@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
>> uint32_t src_qf, dst_qf;
>> VkImageLayout new_layout;
>> VkAccessFlags new_access;
>> + AVVulkanFramesContext *vkfc = hwfc->hwctx;
>> const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
>> VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
>> FFVulkanFunctions *vk = &p->vkfn;
>> + AVFrame tmp = { .data[0] = (uint8_t *)frame };
>>
>
> ???
>
This enables us to use the common dependency/dispatch code.
The prepare_frame function is used for both frame initialization
and frame import/export queue family transfer operations.
In the former case, no AVFrame exists yet, so, as this is purely
libavutil code, we create a temporary frame on stack. Otherwise,
we'd need to allocate multiple frames somewhere, one for each
possible command buffer dispatch.
Comment added to commit message.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 16:21 ` Anton Khirnov
@ 2023-05-11 18:58 ` Lynne
2023-05-16 13:33 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-11 18:58 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 18:22 by anton@khirnov.net:
> Quoting Lynne (2023-04-24 17:56:38)
>
>> From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Thu, 15 Dec 2022 17:43:27 +0100
>> Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
>>
>> ---
>> libavutil/vulkan.c | 5 +++--
>> libavutil/vulkan.h | 3 ++-
>> 2 files changed, 5 insertions(+), 3 deletions(-)
>>
>> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> index b1553c6537..0bb5b1eebf 100644
>> --- a/libavutil/vulkan.c
>> +++ b/libavutil/vulkan.c
>> @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
>> return 0;
>> }
>>
>> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
>> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
>> + void *pNext, void *alloc_pNext,
>> VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
>> {
>> int err;
>> @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
>> };
>> VkMemoryDedicatedAllocateInfo ded_alloc = {
>> .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
>> - .pNext = NULL,
>> + .pNext = alloc_pNext,
>> };
>> VkMemoryDedicatedRequirements ded_req = {
>> .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
>> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> index 85836a7807..d75be26977 100644
>> --- a/libavutil/vulkan.h
>> +++ b/libavutil/vulkan.h
>> @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
>> /**
>> * Create a VkBuffer with the specified parameters.
>> */
>> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
>> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
>> + void *pNext, void *alloc_pNext,
>> VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
>>
>
> Shouldn't you be updating all the callers of this function here?
>
All callers of the function were in filters in libavfilter, and I wanted to keep all
filter commits in libavfilter separate.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 17:27 ` Anton Khirnov
@ 2023-05-11 19:11 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-11 19:11 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 11, 2023, 19:27 by anton@khirnov.net:
> Quoting Lynne (2023-05-11 19:20:45)
>
>> May 11, 2023, 18:40 by anton@khirnov.net:
>>
>> > Quoting Lynne (2023-04-24 17:56:38)
>> >
>> >> @@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
>> >> #endif
>> >> #endif
>> >> default:
>> >> - return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
>> >> + break;
>> >>
>> >
>> > This seems like it's also removing the ability to map to memory at all.
>> >
>>
>> It is. Due to the driver deciding the layout of multiplane images
>> (which are used by default), it's not spec-valid to map the memory
>> used. Rather than keeping complicated code which receives no
>> use at all, I decided to remove it.
>>
>
> That should be stated more clearly in the commit message then, along
> with the reason for dropping it.
>
Branch updated with all feedback.
The top unsquashed commits remains as-is because I need
to know whether "lavc/decode: allow to allocate hwaccel_priv_data early"
is okay, as the commits are cleanups which would make reverting
their squashing bug/time-consuming.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 18:55 ` Lynne
@ 2023-05-16 13:31 ` Anton Khirnov
2023-05-16 14:47 ` Lynne
[not found] ` <NV_0sN0--3-9@lynne.ee-NV_0vMs----9>
0 siblings, 2 replies; 49+ messages in thread
From: Anton Khirnov @ 2023-05-16 13:31 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-11 20:55:40)
> May 11, 2023, 18:04 by anton@khirnov.net:
>
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Tue, 15 Mar 2022 23:00:32 +0100
> >> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
> >> operations
> >>
> >> ---
> >> libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
> >> libavutil/hwcontext_vulkan.h | 40 +++++++-
> >> 2 files changed, 167 insertions(+), 49 deletions(-)
> >>
> >> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
> >> index 894b4b83f3..b0db59b2d8 100644
> >> --- a/libavutil/hwcontext_vulkan.c
> >> +++ b/libavutil/hwcontext_vulkan.c
> >> @@ -27,6 +27,7 @@
> >> #include <dlfcn.h>
> >> #endif
> >>
> >> +#include <pthread.h>
> >> #include <unistd.h>
> >>
> >> #include "config.h"
> >> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
> >> VkPhysicalDeviceVulkan13Features device_features_1_3;
> >>
> >> /* Queues */
> >> - uint32_t qfs[5];
> >> - int num_qfs;
> >> + pthread_mutex_t **qf_mutex;
> >> + int nb_tot_qfs;
> >> + uint32_t img_qfs[5];
> >> + int nb_img_qfs;
> >>
> >
> > This patch would be so much more readable without random renamings.
> >
>
> They're not random, the meaning of each variable is different
> to what they meant before.
> nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver
>
>
> >> /* Debug callback */
> >> VkDebugUtilsMessengerEXT debug_ctx;
> >> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
> >> } VulkanFramesPriv;
> >>
> >> typedef struct AVVkFrameInternal {
> >> + pthread_mutex_t update_mutex;
> >>
> >
> > As far as I can see, none of the mutices you're adding here are
> > ever destroyed.
> >
>
> Fixed.
In your current tree you're only destrying update_mutex, not qf_mutexes.
And not checking thre creation of update_mutex.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 18:58 ` Lynne
@ 2023-05-16 13:33 ` Anton Khirnov
2023-05-16 14:41 ` Lynne
0 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-16 13:33 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-11 20:58:38)
> May 11, 2023, 18:22 by anton@khirnov.net:
>
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Thu, 15 Dec 2022 17:43:27 +0100
> >> Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
> >>
> >> ---
> >> libavutil/vulkan.c | 5 +++--
> >> libavutil/vulkan.h | 3 ++-
> >> 2 files changed, 5 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> >> index b1553c6537..0bb5b1eebf 100644
> >> --- a/libavutil/vulkan.c
> >> +++ b/libavutil/vulkan.c
> >> @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
> >> return 0;
> >> }
> >>
> >> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
> >> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
> >> + void *pNext, void *alloc_pNext,
> >> VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
> >> {
> >> int err;
> >> @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
> >> };
> >> VkMemoryDedicatedAllocateInfo ded_alloc = {
> >> .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
> >> - .pNext = NULL,
> >> + .pNext = alloc_pNext,
> >> };
> >> VkMemoryDedicatedRequirements ded_req = {
> >> .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> >> index 85836a7807..d75be26977 100644
> >> --- a/libavutil/vulkan.h
> >> +++ b/libavutil/vulkan.h
> >> @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
> >> /**
> >> * Create a VkBuffer with the specified parameters.
> >> */
> >> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
> >> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
> >> + void *pNext, void *alloc_pNext,
> >> VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
> >>
> >
> > Shouldn't you be updating all the callers of this function here?
> >
>
> All callers of the function were in filters in libavfilter, and I wanted to keep all
> filter commits in libavfilter separate.
Every commit must be buildable on its own.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-11 18:13 ` Lynne
@ 2023-05-16 13:40 ` Anton Khirnov
2023-05-16 14:46 ` Lynne
0 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-16 13:40 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-11 20:13:29)
> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> >> index 4bd1c9fc00..4c38dbc2e6 100644
> >> --- a/libavutil/vulkan.h
> >> +++ b/libavutil/vulkan.h
> >> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
> >> VkPhysicalDeviceProperties2 props;
> >> VkPhysicalDeviceDriverProperties driver_props;
> >> VkPhysicalDeviceMemoryProperties mprops;
> >> + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
> >> + VkQueueFamilyVideoPropertiesKHR *video_props;
> >> + VkQueueFamilyProperties2 *qf_props;
> >>
> >
> > How does the user of these know how many elements are in each array?
> >
>
> They don't have to, we read the total number of queues available
> for the device, so all indices are always available.
"all indices"?
The allocated size of these arrays is purely local to
ff_vk_load_props(), so there is no safe way for any outside caller to
know it.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-16 13:33 ` Anton Khirnov
@ 2023-05-16 14:41 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-16 14:41 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 16, 2023, 15:33 by anton@khirnov.net:
> Quoting Lynne (2023-05-11 20:58:38)
>
>> May 11, 2023, 18:22 by anton@khirnov.net:
>>
>> > Quoting Lynne (2023-04-24 17:56:38)
>> >
>> >> From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
>> >> From: Lynne <dev@lynne.ee>
>> >> Date: Thu, 15 Dec 2022 17:43:27 +0100
>> >> Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
>> >>
>> >> ---
>> >> libavutil/vulkan.c | 5 +++--
>> >> libavutil/vulkan.h | 3 ++-
>> >> 2 files changed, 5 insertions(+), 3 deletions(-)
>> >>
>> >> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> >> index b1553c6537..0bb5b1eebf 100644
>> >> --- a/libavutil/vulkan.c
>> >> +++ b/libavutil/vulkan.c
>> >> @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
>> >> return 0;
>> >> }
>> >>
>> >> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
>> >> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
>> >> + void *pNext, void *alloc_pNext,
>> >> VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
>> >> {
>> >> int err;
>> >> @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
>> >> };
>> >> VkMemoryDedicatedAllocateInfo ded_alloc = {
>> >> .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
>> >> - .pNext = NULL,
>> >> + .pNext = alloc_pNext,
>> >> };
>> >> VkMemoryDedicatedRequirements ded_req = {
>> >> .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
>> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> >> index 85836a7807..d75be26977 100644
>> >> --- a/libavutil/vulkan.h
>> >> +++ b/libavutil/vulkan.h
>> >> @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
>> >> /**
>> >> * Create a VkBuffer with the specified parameters.
>> >> */
>> >> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
>> >> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
>> >> + void *pNext, void *alloc_pNext,
>> >> VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
>> >>
>> >
>> > Shouldn't you be updating all the callers of this function here?
>> >
>>
>> All callers of the function were in filters in libavfilter, and I wanted to keep all
>> filter commits in libavfilter separate.
>>
>
> Every commit must be buildable on its own.
>
fixed
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-16 13:40 ` Anton Khirnov
@ 2023-05-16 14:46 ` Lynne
2023-05-18 8:29 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-16 14:46 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 16, 2023, 15:41 by anton@khirnov.net:
> Quoting Lynne (2023-05-11 20:13:29)
>
>> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> >> index 4bd1c9fc00..4c38dbc2e6 100644
>> >> --- a/libavutil/vulkan.h
>> >> +++ b/libavutil/vulkan.h
>> >> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
>> >> VkPhysicalDeviceProperties2 props;
>> >> VkPhysicalDeviceDriverProperties driver_props;
>> >> VkPhysicalDeviceMemoryProperties mprops;
>> >> + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
>> >> + VkQueueFamilyVideoPropertiesKHR *video_props;
>> >> + VkQueueFamilyProperties2 *qf_props;
>> >>
>> >
>> > How does the user of these know how many elements are in each array?
>> >
>>
>> They don't have to, we read the total number of queues available
>> for the device, so all indices are always available.
>>
>
> "all indices"?
>
> The allocated size of these arrays is purely local to
> ff_vk_load_props(), so there is no safe way for any outside caller to
> know it.
>
The init function queries the driver for the total number of queue family indices,
allocates an array of that amount for each structure, and reads the properties
into the array.
API users then index into the array based on the queue family index.
API users cannot index outside of the array ever, as the queue family index
they receive is always AVVulkanDeviceContext.queue_family_index (or the
transfer, compute, encode, or decode queue family index member of that structure).
The queue family index members of that structure are checked upon initialization
to not be larger than what the driver returns.
Hence, there's no need for them to know how large the array is, as
it is allocated for all possible indices.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-16 13:31 ` Anton Khirnov
@ 2023-05-16 14:47 ` Lynne
[not found] ` <NV_0sN0--3-9@lynne.ee-NV_0vMs----9>
1 sibling, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-16 14:47 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 16, 2023, 15:32 by anton@khirnov.net:
> Quoting Lynne (2023-05-11 20:55:40)
>
>> May 11, 2023, 18:04 by anton@khirnov.net:
>>
>> > Quoting Lynne (2023-04-24 17:56:38)
>> >
>> >> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
>> >> From: Lynne <dev@lynne.ee>
>> >> Date: Tue, 15 Mar 2022 23:00:32 +0100
>> >> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
>> >> operations
>> >>
>> >> ---
>> >> libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
>> >> libavutil/hwcontext_vulkan.h | 40 +++++++-
>> >> 2 files changed, 167 insertions(+), 49 deletions(-)
>> >>
>> >> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> >> index 894b4b83f3..b0db59b2d8 100644
>> >> --- a/libavutil/hwcontext_vulkan.c
>> >> +++ b/libavutil/hwcontext_vulkan.c
>> >> @@ -27,6 +27,7 @@
>> >> #include <dlfcn.h>
>> >> #endif
>> >>
>> >> +#include <pthread.h>
>> >> #include <unistd.h>
>> >>
>> >> #include "config.h"
>> >> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
>> >> VkPhysicalDeviceVulkan13Features device_features_1_3;
>> >>
>> >> /* Queues */
>> >> - uint32_t qfs[5];
>> >> - int num_qfs;
>> >> + pthread_mutex_t **qf_mutex;
>> >> + int nb_tot_qfs;
>> >> + uint32_t img_qfs[5];
>> >> + int nb_img_qfs;
>> >>
>> >
>> > This patch would be so much more readable without random renamings.
>> >
>>
>> They're not random, the meaning of each variable is different
>> to what they meant before.
>> nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver
>>
>>
>> >> /* Debug callback */
>> >> VkDebugUtilsMessengerEXT debug_ctx;
>> >> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
>> >> } VulkanFramesPriv;
>> >>
>> >> typedef struct AVVkFrameInternal {
>> >> + pthread_mutex_t update_mutex;
>> >>
>> >
>> > As far as I can see, none of the mutices you're adding here are
>> > ever destroyed.
>> >
>>
>> Fixed.
>>
>
> In your current tree you're only destrying update_mutex, not qf_mutexes.
>
> And not checking thre creation of update_mutex.
>
fixed both
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-16 14:46 ` Lynne
@ 2023-05-18 8:29 ` Anton Khirnov
2023-05-18 12:28 ` Lynne
0 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-18 8:29 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-16 16:46:45)
> May 16, 2023, 15:41 by anton@khirnov.net:
>
> > Quoting Lynne (2023-05-11 20:13:29)
> >
> >> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> >> >> index 4bd1c9fc00..4c38dbc2e6 100644
> >> >> --- a/libavutil/vulkan.h
> >> >> +++ b/libavutil/vulkan.h
> >> >> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
> >> >> VkPhysicalDeviceProperties2 props;
> >> >> VkPhysicalDeviceDriverProperties driver_props;
> >> >> VkPhysicalDeviceMemoryProperties mprops;
> >> >> + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
> >> >> + VkQueueFamilyVideoPropertiesKHR *video_props;
> >> >> + VkQueueFamilyProperties2 *qf_props;
> >> >>
> >> >
> >> > How does the user of these know how many elements are in each array?
> >> >
> >>
> >> They don't have to, we read the total number of queues available
> >> for the device, so all indices are always available.
> >>
> >
> > "all indices"?
> >
> > The allocated size of these arrays is purely local to
> > ff_vk_load_props(), so there is no safe way for any outside caller to
> > know it.
> >
>
> The init function queries the driver for the total number of queue family indices,
> allocates an array of that amount for each structure, and reads the properties
> into the array.
> API users then index into the array based on the queue family index.
> API users cannot index outside of the array ever, as the queue family index
> they receive is always AVVulkanDeviceContext.queue_family_index (or the
> transfer, compute, encode, or decode queue family index member of that structure).
> The queue family index members of that structure are checked upon initialization
> to not be larger than what the driver returns.
>
> Hence, there's no need for them to know how large the array is, as
> it is allocated for all possible indices.
That's way too much indirection and way too much code that has to make
the exact same unstated assumption on what the actual size is. In my
experience, it is almost always a good idea to be explicit: store the
exact array size right next to the array itself.
If nothing else, it will be very helpful for the person debugging the
inevitable invalid accesses.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] libavcodec: add Vulkan common video code
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (12 preceding siblings ...)
2023-05-11 16:40 ` Anton Khirnov
@ 2023-05-18 8:34 ` Anton Khirnov
2023-05-18 11:07 ` Lynne
2023-05-18 8:54 ` [FFmpeg-devel] libavcodec: add Vulkan common video decoding code Anton Khirnov
2023-05-19 12:11 ` [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Leo Izen
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-18 8:34 UTC (permalink / raw)
To: Ffmpeg Devel
> commit 3257feba101053b0b3689147c1a8850f68448f62
> Author: Lynne <dev@lynne.ee>
> Date: Sun Dec 18 08:31:03 2022 +0100
>
> libavcodec: add Vulkan common video code
>
> +static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
> +{
> + uint8_t *buf = av_mallocz(size);
> + if (!buf)
> + return NULL;
> +
> + return av_buffer_create(buf, size, free_data_buf, opaque, 0);
leaks buf on av_buffer_create() failure.
> +av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
> + FFVkVideoCommon *common,
> + VkVideoSessionCreateInfoKHR *session_create)
> +{
> + int err;
> + VkResult ret;
> + FFVulkanFunctions *vk = &s->vkfn;
> + VkMemoryRequirements2 *mem_req = NULL;
> + VkVideoSessionMemoryRequirementsKHR *mem = NULL;
> + VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL;
> +
> + /* Create session */
> + ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create,
> + s->hwctx->alloc, &common->session);
> + if (ret != VK_SUCCESS)
> + return AVERROR_EXTERNAL;
> +
> + /* Get memory requirements */
> + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
> + common->session,
> + &common->nb_mem,
> + NULL);
> + if (ret != VK_SUCCESS) {
> + err = AVERROR_EXTERNAL;
> + goto fail;
> + }
> +
> + /* Allocate all memory needed to actually allocate memory */
> + common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem);
av_calloc(), same below
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] libavcodec: add Vulkan common video decoding code
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (13 preceding siblings ...)
2023-05-18 8:34 ` [FFmpeg-devel] libavcodec: add Vulkan common video code Anton Khirnov
@ 2023-05-18 8:54 ` Anton Khirnov
2023-05-18 12:27 ` Lynne
2023-05-19 12:11 ` [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Leo Izen
15 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-18 8:54 UTC (permalink / raw)
To: Ffmpeg Devel
> commit adb671b921d006255597ac126f85adb05f9d6677
> Author: Lynne <dev@lynne.ee>
> Date: Mon Jan 16 07:23:27 2023 +0100
>
> libavcodec: add Vulkan common video decoding code
>
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index d99f7bd25a..362ea31e3e 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1289,7 +1289,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
> SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
> SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
> SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
> -SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h
> +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_decode.h
> SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
> SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h
>
> diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
> new file mode 100644
> index 0000000000..d07b9aa3eb
> --- /dev/null
> +++ b/libavcodec/vulkan_decode.c
> @@ -0,0 +1,1182 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "vulkan_video.h"
> +#include "vulkan_decode.h"
> +#include "config_components.h"
> +#include "libavutil/hwcontext_internal.h"
what for?
> +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx)
> +{
> + AVFrame *avf = av_frame_alloc();
> + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
> + if (!avf)
> + return NULL;
> +
> + avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref);
> + if (!avf->hw_frames_ctx)
> + av_frame_free(&avf);
> + avf->buf[0] = av_buffer_pool_get(dpb_frames->pool);
> + if (!avf->buf[0])
> + av_frame_free(&avf);
> + avf->data[0] = avf->buf[0]->data;
Why is this not av_hwframe_get_buffer()?
> +void ff_vk_decode_free_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vp)
> +{
> + FFVulkanFunctions *vk;
> + VkSemaphoreWaitInfo sem_wait;
> + FFVulkanDecodeShared *ctx;
> +
> + // TODO: investigate why this happens
> + if (!dec || !dec->shared_ref) {
My guess is that this is called from a different thread than the one
whose hwaccel_priv_data you gave to ff_hwaccel_frame_priv_alloc().
You have to attach everything you need to hwaccel_priv_buf itself.
> +/* Since to even get decoder capabilities, we have to initialize quite a lot,
> + * this function does initialization and saves it to hwaccel_priv_data if
> + * available. */
> +static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref,
> + VulkanVideoProfile *profile_data,
> + int *width_align, int *height_align,
> + enum AVPixelFormat *pix_fmt, VkFormat *vk_fmt,
> + int *dpb_dedicate)
> +{
> + VkResult ret;
> + int err, max_level;
> + const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id];
> + AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
> + AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data;
> + AVVulkanDeviceContext *hwctx = device->hwctx;
> + enum AVPixelFormat source_format;
> + enum AVPixelFormat best_format;
> + VkFormat best_vkfmt;
> + int base_profile, cur_profile = avctx->profile;
> +
> + int dedicated_dpb;
> + int layered_dpb;
> +
> + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
> + FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data;
> +
> + FFVulkanExtensions local_extensions = 0x0;
> + FFVulkanExtensions *extensions = ctx ? &ctx->s.extensions : &local_extensions;
> + FFVulkanFunctions local_vk = { 0 };
> + FFVulkanFunctions *vk = ctx ? &ctx->s.vkfn : &local_vk;
> + VkVideoCapabilitiesKHR local_caps = { 0 };
> + VkVideoCapabilitiesKHR *caps = ctx ? &ctx->common.caps : &local_caps;
> + VkVideoDecodeCapabilitiesKHR local_dec_caps = { 0 };
> + VkVideoDecodeCapabilitiesKHR *dec_caps = ctx ? &ctx->dec_caps : &local_dec_caps;
> +
> + VkVideoDecodeH264ProfileInfoKHR local_h264_profile = { 0 };
> + VkVideoDecodeH264ProfileInfoKHR *h264_profile = profile_data ?
> + &profile_data->h264_profile :
> + &local_h264_profile;
> +
> + VkVideoDecodeH264ProfileInfoKHR local_h265_profile = { 0 };
> + VkVideoDecodeH264ProfileInfoKHR *h265_profile = profile_data ?
> + &profile_data->h265_profile :
> + &local_h265_profile;
> +
> + VkVideoDecodeUsageInfoKHR local_usage = { 0 };
> + VkVideoDecodeUsageInfoKHR *usage = profile_data ?
> + &profile_data->usage : &local_usage;
> + VkVideoProfileInfoKHR local_profile = { 0 };
> + VkVideoProfileInfoKHR *profile = profile_data ?
> + &profile_data->profile : &local_profile;
> + VkVideoProfileListInfoKHR local_profile_list = { 0 };
> + VkVideoProfileListInfoKHR *profile_list = profile_data ?
> + &profile_data->profile_list :
> + &local_profile_list;
> +
> + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
> + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
> + .pNext = profile_list,
> + };
> + VkVideoDecodeH264CapabilitiesKHR h264_caps = {
> + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR,
> + };
> + VkVideoDecodeH265CapabilitiesKHR h265_caps = {
> + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR,
> + };
> + VkVideoFormatPropertiesKHR *ret_info;
> + uint32_t nb_out_fmts = 0;
> +
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> + if (!desc)
> + return AVERROR(EINVAL);
> +
> + if (ctx && ctx->init)
> + return 0;
> +
> + if (!vk_codec->decode_op)
> + return AVERROR(EINVAL);
> +
> + *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions,
> + hwctx->nb_enabled_dev_extensions);
> +
> + if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
> + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
> + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
> + return AVERROR(ENOSYS);
> + } else if (!vk_codec->decode_extension) {
> + av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n",
> + avcodec_get_name(avctx->codec_id));
> + return AVERROR(ENOSYS);
> + } else if (!(vk_codec->decode_extension & *extensions)) {
> + av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n",
> + avcodec_get_name(avctx->codec_id));
> + return AVERROR(ENOSYS);
> + }
> +
> + err = ff_vk_load_functions(device, vk, *extensions, 1, 1);
> + if (err < 0)
> + return err;
> +
> +repeat:
> + if (avctx->codec_id == AV_CODEC_ID_H264) {
> + base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;
> + dec_caps->pNext = &h264_caps;
> + usage->pNext = h264_profile;
> + h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR;
> + h264_profile->stdProfileIdc = cur_profile;
> + h264_profile->pictureLayout = avctx->field_order == AV_FIELD_UNKNOWN ||
> + avctx->field_order == AV_FIELD_PROGRESSIVE ?
> + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR :
> + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR;
> + } else if (avctx->codec_id == AV_CODEC_ID_H265) {
> + base_profile = FF_PROFILE_HEVC_MAIN;
> + dec_caps->pNext = &h265_caps;
> + usage->pNext = h265_profile;
> + h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
> + h265_profile->stdProfileIdc = cur_profile;
> + }
> +
> + usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR;
> + usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR;
> +
> + profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
> + profile->pNext = usage;
> + profile->videoCodecOperation = vk_codec->decode_op;
> + profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc);
> + profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth);
> + profile->chromaBitDepth = profile->lumaBitDepth;
> +
> + profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
> + profile_list->profileCount = 1;
> + profile_list->pProfiles = profile;
> +
> + /* Get the capabilities of the decoder for the given profile */
> + caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
> + caps->pNext = dec_caps;
> + dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR;
> + /* dec_caps->pNext already filled in */
> +
> + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile,
> + caps);
> + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR &&
> + avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH &&
> + cur_profile != base_profile) {
> + cur_profile = base_profile;
> + av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting "
> + "again with profile %s\n",
> + avcodec_get_name(avctx->codec_id),
> + avcodec_profile_name(avctx->codec_id, avctx->profile),
> + avcodec_profile_name(avctx->codec_id, base_profile));
> + goto repeat;
This function is long and ugly enough even without backward gotos. What
would Dijkstra say?
> +#endif /* AVCODEC_VULKAN_DECODE_H */
> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> index 0ab90c8f3c..db47956198 100644
> --- a/libavutil/vulkan.c
> +++ b/libavutil/vulkan.c
> @@ -510,8 +510,8 @@ void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
> AVVkFrame *vkf = (AVVkFrame *)f->data[0];
> vkfc->unlock_frame(hwfc, vkf);
> e->frame_locked[j] = 0;
> - e->frame_update[j] = 0;
> }
> + e->frame_update[j] = 0;
unrelated?
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] libavcodec: add Vulkan common video code
2023-05-18 8:34 ` [FFmpeg-devel] libavcodec: add Vulkan common video code Anton Khirnov
@ 2023-05-18 11:07 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-18 11:07 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 18, 2023, 10:34 by anton@khirnov.net:
>> commit 3257feba101053b0b3689147c1a8850f68448f62
>> Author: Lynne <dev@lynne.ee>
>> Date: Sun Dec 18 08:31:03 2022 +0100
>>
>> libavcodec: add Vulkan common video code
>>
>> +static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
>> +{
>> + uint8_t *buf = av_mallocz(size);
>> + if (!buf)
>> + return NULL;
>> +
>> + return av_buffer_create(buf, size, free_data_buf, opaque, 0);
>>
>
> leaks buf on av_buffer_create() failure.
>
fixed
>> +av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
>> + FFVkVideoCommon *common,
>> + VkVideoSessionCreateInfoKHR *session_create)
>> +{
>> + int err;
>> + VkResult ret;
>> + FFVulkanFunctions *vk = &s->vkfn;
>> + VkMemoryRequirements2 *mem_req = NULL;
>> + VkVideoSessionMemoryRequirementsKHR *mem = NULL;
>> + VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL;
>> +
>> + /* Create session */
>> + ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create,
>> + s->hwctx->alloc, &common->session);
>> + if (ret != VK_SUCCESS)
>> + return AVERROR_EXTERNAL;
>> +
>> + /* Get memory requirements */
>> + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
>> + common->session,
>> + &common->nb_mem,
>> + NULL);
>> + if (ret != VK_SUCCESS) {
>> + err = AVERROR_EXTERNAL;
>> + goto fail;
>> + }
>> +
>> + /* Allocate all memory needed to actually allocate memory */
>> + common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem);
>>
>
> av_calloc(), same below
>
Fixed all av_mallocz usage in the file
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] libavcodec: add Vulkan common video decoding code
2023-05-18 8:54 ` [FFmpeg-devel] libavcodec: add Vulkan common video decoding code Anton Khirnov
@ 2023-05-18 12:27 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-18 12:27 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 18, 2023, 10:54 by anton@khirnov.net:
>> commit adb671b921d006255597ac126f85adb05f9d6677
>> Author: Lynne <dev@lynne.ee>
>> Date: Mon Jan 16 07:23:27 2023 +0100
>>
>> libavcodec: add Vulkan common video decoding code
>>
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index d99f7bd25a..362ea31e3e 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -1289,7 +1289,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h
>> SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
>> SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h
>> SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
>> -SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h
>> +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_decode.h
>> SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
>> SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h
>>
>> diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
>> new file mode 100644
>> index 0000000000..d07b9aa3eb
>> --- /dev/null
>> +++ b/libavcodec/vulkan_decode.c
>> @@ -0,0 +1,1182 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "vulkan_video.h"
>> +#include "vulkan_decode.h"
>> +#include "config_components.h"
>> +#include "libavutil/hwcontext_internal.h"
>>
>
> what for?
>
>> +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx)
>> +{
>> + AVFrame *avf = av_frame_alloc();
>> + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
>> + if (!avf)
>> + return NULL;
>> +
>> + avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref);
>> + if (!avf->hw_frames_ctx)
>> + av_frame_free(&avf);
>> + avf->buf[0] = av_buffer_pool_get(dpb_frames->pool);
>> + if (!avf->buf[0])
>> + av_frame_free(&avf);
>> + avf->data[0] = avf->buf[0]->data;
>>
>
> Why is this not av_hwframe_get_buffer()?
>
Didn't occur to me. Fixed.
>> +void ff_vk_decode_free_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vp)
>> +{
>> + FFVulkanFunctions *vk;
>> + VkSemaphoreWaitInfo sem_wait;
>> + FFVulkanDecodeShared *ctx;
>> +
>> + // TODO: investigate why this happens
>> + if (!dec || !dec->shared_ref) {
>>
>
> My guess is that this is called from a different thread than the one
> whose hwaccel_priv_data you gave to ff_hwaccel_frame_priv_alloc().
> You have to attach everything you need to hwaccel_priv_buf itself.
>
This was an old todo which I fixed previously. Removed.
>> + avcodec_get_name(avctx->codec_id),
>> + avcodec_profile_name(avctx->codec_id, avctx->profile),
>> + avcodec_profile_name(avctx->codec_id, base_profile));
>> + goto repeat;
>>
>
> This function is long and ugly enough even without backward gotos. What
> would Dijkstra say?
>
I tried to do it with a function, but the result was more sphagetti
and code duplication, due to needing to handle the return code.
I've commented the goto parts better.
>> +#endif /* AVCODEC_VULKAN_DECODE_H */
>> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> index 0ab90c8f3c..db47956198 100644
>> --- a/libavutil/vulkan.c
>> +++ b/libavutil/vulkan.c
>> @@ -510,8 +510,8 @@ void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
>> AVVkFrame *vkf = (AVVkFrame *)f->data[0];
>> vkfc->unlock_frame(hwfc, vkf);
>> e->frame_locked[j] = 0;
>> - e->frame_update[j] = 0;
>> }
>> + e->frame_update[j] = 0;
>>
>
> unrelated?
>
Fixed.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-18 8:29 ` Anton Khirnov
@ 2023-05-18 12:28 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-18 12:28 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 18, 2023, 10:30 by anton@khirnov.net:
> Quoting Lynne (2023-05-16 16:46:45)
>
>> May 16, 2023, 15:41 by anton@khirnov.net:
>>
>> > Quoting Lynne (2023-05-11 20:13:29)
>> >
>> >> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> >> >> index 4bd1c9fc00..4c38dbc2e6 100644
>> >> >> --- a/libavutil/vulkan.h
>> >> >> +++ b/libavutil/vulkan.h
>> >> >> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
>> >> >> VkPhysicalDeviceProperties2 props;
>> >> >> VkPhysicalDeviceDriverProperties driver_props;
>> >> >> VkPhysicalDeviceMemoryProperties mprops;
>> >> >> + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
>> >> >> + VkQueueFamilyVideoPropertiesKHR *video_props;
>> >> >> + VkQueueFamilyProperties2 *qf_props;
>> >> >>
>> >> >
>> >> > How does the user of these know how many elements are in each array?
>> >> >
>> >>
>> >> They don't have to, we read the total number of queues available
>> >> for the device, so all indices are always available.
>> >>
>> >
>> > "all indices"?
>> >
>> > The allocated size of these arrays is purely local to
>> > ff_vk_load_props(), so there is no safe way for any outside caller to
>> > know it.
>> >
>>
>> The init function queries the driver for the total number of queue family indices,
>> allocates an array of that amount for each structure, and reads the properties
>> into the array.
>> API users then index into the array based on the queue family index.
>> API users cannot index outside of the array ever, as the queue family index
>> they receive is always AVVulkanDeviceContext.queue_family_index (or the
>> transfer, compute, encode, or decode queue family index member of that structure).
>> The queue family index members of that structure are checked upon initialization
>> to not be larger than what the driver returns.
>>
>> Hence, there's no need for them to know how large the array is, as
>> it is allocated for all possible indices.
>>
>
> That's way too much indirection and way too much code that has to make
> the exact same unstated assumption on what the actual size is. In my
> experience, it is almost always a good idea to be explicit: store the
> exact array size right next to the array itself.
>
> If nothing else, it will be very helpful for the person debugging the
> inevitable invalid accesses.
>
added a counter
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
` (14 preceding siblings ...)
2023-05-18 8:54 ` [FFmpeg-devel] libavcodec: add Vulkan common video decoding code Anton Khirnov
@ 2023-05-19 12:11 ` Leo Izen
15 siblings, 0 replies; 49+ messages in thread
From: Leo Izen @ 2023-05-19 12:11 UTC (permalink / raw)
To: ffmpeg-devel
On 4/24/23 11:56, Lynne wrote:
> This is part two of the vulkan patchset, which contains all the
> hwcontext and vulkan.c rewrites, and filtering changes.
>
> 55 patches attached.
>
>
[PATCH 21/97] lavu: add 12-bit 2-plane 422 and 444 pixel formats
iirc new pixel formats need an APIChanges entry, a lavu micro bump, or
both. I'm not really sure what the policy is but I remember being told
something like that when I added a NE macro a few months ago.
[various]
> static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
> ...
> p->device_features_1_3.foo = dev_features_1_3.foo;
There's a lot of these in various patches, why do these need to be done
manually?
[PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
> p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
av_calloc
+ for (int i = 0; i < qf_num; i++) {
+ p->qf_mutex[i] =
av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
+ if (!p->qf_mutex[i])
+ return AVERROR(ENOMEM);
+ for (int j = 0; j < qf[i].queueCount; j++)
+ pthread_mutex_init(&p->qf_mutex[i][j], NULL);
+ }
If the allocation fails for i > 0, you end up with some initialized
mutexes, is this going to be an issue ever?
@@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
{
AVVkFrameInternal *internal = f->internal;
- if (!internal)
- return;
-
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
AVHWFramesContext *cuda_fc = (AVHWFramesContext
*)internal->cuda_fc_ref->data;
What happens if (!internal) and #defined(CONFIG_CUDA), do we just segfault?
- Leo Izen
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
[not found] ` <NV_0sN0--3-9@lynne.ee-NV_0vMs----9>
@ 2023-05-22 8:26 ` Lynne
[not found] ` <NVyq4UQ--F-9@lynne.ee-NW1ZGRp----9>
1 sibling, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-22 8:26 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
All known issues have been fixed, and if there are more issues, they can be found as users test it.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
[not found] ` <NVyq4UQ--F-9@lynne.ee-NW1ZGRp----9>
@ 2023-05-25 0:31 ` Lynne
[not found] ` <NWFJK4e--3-9@lynne.ee-NWFJONn----9>
1 sibling, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-25 0:31 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 22, 2023, 10:26 by dev@lynne.ee:
> Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
> All known issues have been fixed, and if there are more issues, they can be found as users test it.
>
Added APIchanges and bumped minor for lavu and lavc.
Planning to push this in 2 days unless there are more comments.
All known issues have been addressed.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
[not found] ` <NWFJK4e--3-9@lynne.ee-NWFJONn----9>
@ 2023-05-26 17:52 ` Lynne
2023-05-26 19:19 ` Anton Khirnov
0 siblings, 1 reply; 49+ messages in thread
From: Lynne @ 2023-05-26 17:52 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 25, 2023, 02:31 by dev@lynne.ee:
> May 22, 2023, 10:26 by dev@lynne.ee:
>
>> Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
>> All known issues have been fixed, and if there are more issues, they can be found as users test it.
>>
>
> Added APIchanges and bumped minor for lavu and lavc.
> Planning to push this in 2 days unless there are more comments.
> All known issues have been addressed.
>
Planning to push this tomorrow morning.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-26 17:52 ` Lynne
@ 2023-05-26 19:19 ` Anton Khirnov
2023-05-26 20:50 ` Lynne
0 siblings, 1 reply; 49+ messages in thread
From: Anton Khirnov @ 2023-05-26 19:19 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Quoting Lynne (2023-05-26 19:52:24)
> May 25, 2023, 02:31 by dev@lynne.ee:
>
> > May 22, 2023, 10:26 by dev@lynne.ee:
> >
> >> Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
> >> All known issues have been fixed, and if there are more issues, they can be found as users test it.
> >>
> >
> > Added APIchanges and bumped minor for lavu and lavc.
> > Planning to push this in 2 days unless there are more comments.
> > All known issues have been addressed.
> >
>
> Planning to push this tomorrow morning.
I did not approve your internal_ref hack, and I strongly object to it.
Not to mention you never even sent it to the ML.
--
Anton Khirnov
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
* Re: [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering
2023-05-26 19:19 ` Anton Khirnov
@ 2023-05-26 20:50 ` Lynne
0 siblings, 0 replies; 49+ messages in thread
From: Lynne @ 2023-05-26 20:50 UTC (permalink / raw)
To: FFmpeg development discussions and patches
May 26, 2023, 21:19 by anton@khirnov.net:
> Quoting Lynne (2023-05-26 19:52:24)
>
>> May 25, 2023, 02:31 by dev@lynne.ee:
>>
>> > May 22, 2023, 10:26 by dev@lynne.ee:
>> >
>> >> Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
>> >> All known issues have been fixed, and if there are more issues, they can be found as users test it.
>> >>
>> >
>> > Added APIchanges and bumped minor for lavu and lavc.
>> > Planning to push this in 2 days unless there are more comments.
>> > All known issues have been addressed.
>> >
>>
>> Planning to push this tomorrow morning.
>>
>
> I did not approve your internal_ref hack, and I strongly object to it.
>
Glad to hear it. You didn't say anything, and I was starting to get worried.
> Not to mention you never even sent it to the ML.
>
You discussed this with me on IRC.
Everyone on the ML knows where my tree is and that this
is what is for review.
You'll have to find a better reason than this.
I could've played unfair, and pushed the patches without
bumping this. But I didn't.
You refuse to talk to me, despite the vast majority
of issues with my patchset being solvable in no more than 20 minutes
of talking about better ways to fix a problem. This is what is unfair.
You didn't even specify *why* you object to it, let alone a way
to fix this, despite knowing about it since Monday, discussing it,
me pinging you, and you not responding. This is not acceptable,
and easily confused with trolling.
Either propose a better way, or I will look for the opinion of others
without taking yours into consideration.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 49+ messages in thread
end of thread, other threads:[~2023-05-26 20:50 UTC | newest]
Thread overview: 49+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-24 15:56 [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Lynne
2023-04-28 13:28 ` Niklas Haas
[not found] ` <NTnyn9K--3-9@lynne.ee-NTnytIf----9>
2023-05-10 19:10 ` Lynne
2023-05-11 15:36 ` Anton Khirnov
2023-05-11 16:32 ` Lynne
2023-05-11 16:59 ` Anton Khirnov
2023-05-11 16:03 ` Anton Khirnov
2023-05-11 18:55 ` Lynne
2023-05-16 13:31 ` Anton Khirnov
2023-05-16 14:47 ` Lynne
[not found] ` <NV_0sN0--3-9@lynne.ee-NV_0vMs----9>
2023-05-22 8:26 ` Lynne
[not found] ` <NVyq4UQ--F-9@lynne.ee-NW1ZGRp----9>
2023-05-25 0:31 ` Lynne
[not found] ` <NWFJK4e--3-9@lynne.ee-NWFJONn----9>
2023-05-26 17:52 ` Lynne
2023-05-26 19:19 ` Anton Khirnov
2023-05-26 20:50 ` Lynne
2023-05-11 16:05 ` Anton Khirnov
2023-05-11 16:40 ` Lynne
2023-05-11 17:00 ` Anton Khirnov
2023-05-11 16:06 ` Anton Khirnov
2023-05-11 16:45 ` Lynne
2023-05-11 16:14 ` Anton Khirnov
2023-05-11 16:47 ` Lynne
2023-05-11 17:13 ` Anton Khirnov
2023-05-11 16:15 ` Anton Khirnov
2023-05-11 16:50 ` Lynne
2023-05-11 16:21 ` Anton Khirnov
2023-05-11 18:58 ` Lynne
2023-05-16 13:33 ` Anton Khirnov
2023-05-16 14:41 ` Lynne
2023-05-11 16:29 ` Anton Khirnov
2023-05-11 18:13 ` Lynne
2023-05-16 13:40 ` Anton Khirnov
2023-05-16 14:46 ` Lynne
2023-05-18 8:29 ` Anton Khirnov
2023-05-18 12:28 ` Lynne
2023-05-11 16:34 ` Anton Khirnov
2023-05-11 17:12 ` Lynne
2023-05-11 17:19 ` Anton Khirnov
2023-05-11 16:34 ` Anton Khirnov
2023-05-11 17:16 ` Lynne
2023-05-11 16:40 ` Anton Khirnov
2023-05-11 17:20 ` Lynne
2023-05-11 17:27 ` Anton Khirnov
2023-05-11 19:11 ` Lynne
2023-05-18 8:34 ` [FFmpeg-devel] libavcodec: add Vulkan common video code Anton Khirnov
2023-05-18 11:07 ` Lynne
2023-05-18 8:54 ` [FFmpeg-devel] libavcodec: add Vulkan common video decoding code Anton Khirnov
2023-05-18 12:27 ` Lynne
2023-05-19 12:11 ` [FFmpeg-devel] [PATCH 55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering Leo Izen
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git