* [FFmpeg-devel] [PATCH] Small Vulkan sync cleanups (PR #21319)
@ 2025-12-30 11:21 Lynne via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: Lynne via ffmpeg-devel @ 2025-12-30 11:21 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
PR #21319 opened by Lynne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21319
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21319.patch
A macro and a few other changes.
>From 08c31b1d55395f5d23eb555689ee41156b08c6cf Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 28 Dec 2025 19:04:27 +0100
Subject: [PATCH 01/10] hwcontext_vulkan: enable subgroup extended types
Like, of course I want to use int16_t in subgroups, what a stupid
question was that?
---
libavutil/hwcontext_vulkan.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index aa5f72e7f2..bb767f6c96 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -305,6 +305,7 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF
COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope);
COPY_VAL(vulkan_1_2.uniformBufferStandardLayout);
COPY_VAL(vulkan_1_2.runtimeDescriptorArray);
+ COPY_VAL(vulkan_1_2.shaderSubgroupExtendedTypes);
COPY_VAL(vulkan_1_3.dynamicRendering);
COPY_VAL(vulkan_1_3.maintenance4);
--
2.49.1
>From 99cba5a342406d84be78269d3d5eda2d3ad1997c Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 19 Dec 2025 23:49:43 +0000
Subject: [PATCH 02/10] vulkan: use HOST_CACHED memory flag only if such a heap
exists
NVK does not offer such, so our code failed to allocate memory.
---
libavcodec/ffv1enc_vulkan.c | 5 ++---
libavcodec/vulkan_encode.c | 2 +-
libavutil/hwcontext_vulkan.c | 2 +-
libavutil/vulkan.c | 4 ++++
libavutil/vulkan.h | 2 ++
5 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 86521af6c5..1dc6aa8e90 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -365,9 +365,8 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
NULL, maxsize,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
(maxsize < fv->max_heap_size ?
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0) |
- (!(fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) ?
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0x0)));
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT :
+ fv->s.host_cached_flag)));
out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
index 7b534ffa30..5b84ad9db7 100644
--- a/libavcodec/vulkan_encode.c
+++ b/libavcodec/vulkan_encode.c
@@ -182,7 +182,7 @@ static int vulkan_encode_issue(AVCodecContext *avctx,
VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR,
&ctx->profile_list, max_pkt_size,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+ ctx->s.host_cached_flag);
if (err < 0)
return err;
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index bb767f6c96..313359a4af 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -4338,7 +4338,7 @@ static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst,
err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst, buf_usage,
NULL, buf_offset,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+ p->vkctx.host_cached_flag);
if (err < 0)
return err;
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7858e002ed..d4ac1544d1 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -212,6 +212,10 @@ int ff_vk_load_props(FFVulkanContext *s)
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats);
+ for (int i = 0; i < s->mprops.memoryTypeCount; i++)
+ s->host_cached_flag |= s->mprops.memoryTypes[i].propertyFlags &
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+
load_enabled_qfs(s);
if (s->qf_props)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 29116bcb2c..d42bf514fe 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -301,6 +301,8 @@ typedef struct FFVulkanContext {
VkPhysicalDeviceVulkan12Features feats_12;
VkPhysicalDeviceFeatures2 feats;
+ VkMemoryPropertyFlagBits host_cached_flag;
+
AVBufferRef *device_ref;
AVHWDeviceContext *device;
AVVulkanDeviceContext *hwctx;
--
2.49.1
>From c99bfc4ee6e98b608d090be48fc34e314b589590 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 25 Dec 2025 00:18:13 +0100
Subject: [PATCH 03/10] vulkan_functions: add vkCmdDispatchBase
Its useful for multi-stage operations.
---
libavutil/vulkan_functions.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index d2e3c77bb8..9aed48aab3 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -115,6 +115,7 @@ typedef uint64_t FFVulkanExtensions;
MACRO(1, 1, FF_VK_EXT_NO_FLAG, EndCommandBuffer) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, FreeCommandBuffers) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdDispatch) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdDispatchBase) \
\
/* Queue */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetDeviceQueue) \
--
2.49.1
>From a48f37083a19ee7bacfe81f2273643b27d5d01a7 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 23 Dec 2025 19:03:45 +0100
Subject: [PATCH 04/10] vulkan: add ff_vk_buf_barrier()
This is a shorthand way of writing buffer barrier structures.
---
libavutil/vulkan.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index d42bf514fe..115e9fc940 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -507,6 +507,25 @@ int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
VkImageView views[AV_NUM_DATA_POINTERS],
AVFrame *f, enum FFVkShaderRepFormat rep_fmt);
+#define ff_vk_buf_barrier(dst, vkb, s_stage, s_access, s_access2, \
+ d_stage, d_access, d_access2, offs, bsz) \
+ do { \
+ dst = (VkBufferMemoryBarrier2) { \
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, \
+ .srcStageMask = VK_PIPELINE_STAGE_2_ ##s_stage, \
+ .srcAccessMask = VK_ACCESS_2_ ##s_access | \
+ VK_ACCESS_2_ ##s_access2, \
+ .dstStageMask = VK_PIPELINE_STAGE_2_ ##d_stage, \
+ .dstAccessMask = VK_ACCESS_2_ ##d_access | \
+ VK_ACCESS_2_ ##d_access2, \
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, \
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, \
+ .buffer = vkb->buf, \
+ .offset = offs, \
+ .size = bsz \
+ }; \
+ } while(0)
+
void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e,
AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar,
VkPipelineStageFlags2 src_stage,
--
2.49.1
>From 2dbf1e1f7e1ad8923ddcba8cb40d8b54f6191909 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 23 Dec 2025 19:04:37 +0100
Subject: [PATCH 05/10] vulkan_ffv1: use ff_vk_buf_barrier()
---
libavcodec/vulkan_ffv1.c | 90 +++++++++++++++++++---------------------
1 file changed, 42 insertions(+), 48 deletions(-)
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index 168871d5d9..7766d67511 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -366,21 +366,20 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0));
fp->slice_offset_buf = NULL;
- /* Entry barrier for the slice state */
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = slice_state->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = slice_state->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
- VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = slice_state->buf,
- .offset = 0,
- .size = fp->slice_data_size*f->slice_count,
- };
-
+ /* Entry barrier for the slice state (not preserved between frames) */
+ if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY))
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ 0, fp->slice_data_size*f->slice_count);
+ else
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ 0, fp->slice_data_size*f->slice_count);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
@@ -388,8 +387,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
- slice_state->stage = buf_bar[0].dstStageMask;
- slice_state->access = buf_bar[0].dstAccessMask;
nb_buf_bar = 0;
nb_img_bar = 0;
@@ -496,18 +493,23 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
0, sizeof(pd_reset), &pd_reset);
/* Sync between setup and reset shaders */
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = slice_state->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = slice_state->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = slice_state->buf,
- .offset = 0,
- .size = fp->slice_data_size*f->slice_count,
- };
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
+ 0, fp->slice_data_size*f->slice_count);
+ /* Probability data barrier */
+ if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY))
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
+ fp->slice_data_size*f->slice_count, VK_WHOLE_SIZE);
+ else
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
+ fp->slice_data_size*f->slice_count, VK_WHOLE_SIZE);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
@@ -515,8 +517,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
- slice_state->stage = buf_bar[0].dstStageMask;
- slice_state->access = buf_bar[0].dstAccessMask;
nb_buf_bar = 0;
nb_img_bar = 0;
@@ -552,21 +552,17 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
- /* Sync between reset and decode shaders */
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = slice_state->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = slice_state->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
- VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = slice_state->buf,
- .offset = fp->slice_data_size*f->slice_count,
- .size = f->slice_count*(fp->slice_state_size - fp->slice_data_size),
- };
-
+ /* Sync probabilities between reset and decode shaders */
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ 0, fp->slice_data_size*f->slice_count);
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ fp->slice_data_size*f->slice_count, VK_WHOLE_SIZE);
/* Input frame barrier */
ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
@@ -590,8 +586,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
- slice_state->stage = buf_bar[0].dstStageMask;
- slice_state->access = buf_bar[0].dstAccessMask;
nb_img_bar = 0;
nb_buf_bar = 0;
--
2.49.1
>From dfe7656dbeeb246e0bb5de90b98c740dcde9cd41 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 23 Dec 2025 19:05:14 +0100
Subject: [PATCH 06/10] nlmeans_vulkan: use ff_vk_buf_barrier()
---
libavfilter/vf_nlmeans_vulkan.c | 181 ++++++++++++--------------------
1 file changed, 67 insertions(+), 114 deletions(-)
diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c
index b69e8ac0a2..7a765d9f31 100644
--- a/libavfilter/vf_nlmeans_vulkan.c
+++ b/libavfilter/vf_nlmeans_vulkan.c
@@ -740,8 +740,6 @@ static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
{
FFVulkanContext *vkctx = &s->vkctx;
FFVulkanFunctions *vk = &vkctx->vkfn;
- VkBufferMemoryBarrier2 buf_bar[2];
- int nb_buf_bar = 0;
DenoisePushData pd = {
{ comp_offs[0], comp_offs[1], comp_offs[2], comp_offs[3] },
@@ -761,26 +759,17 @@ static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = ws_vk->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = ws_vk->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = ws_vk->buf,
- .size = ws_vk->size,
- .offset = 0,
- };
-
+ VkBufferMemoryBarrier2 buf_bar;
+ ff_vk_buf_barrier(buf_bar, ws_vk,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
+ 0, VK_WHOLE_SIZE);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .pBufferMemoryBarriers = buf_bar,
- .bufferMemoryBarrierCount = nb_buf_bar,
+ .pBufferMemoryBarriers = &buf_bar,
+ .bufferMemoryBarrierCount = 1,
});
- ws_vk->stage = buf_bar[0].dstStageMask;
- ws_vk->access = buf_bar[0].dstAccessMask;
/* End of denoise pass */
vk->CmdDispatch(exec->buf,
@@ -924,20 +913,14 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
- nb_buf_bar = 0;
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = ws_vk->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
- .srcAccessMask = ws_vk->access,
- .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = ws_vk->buf,
- .size = ws_vk->size,
- .offset = 0,
- };
-
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], ws_vk,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ TRANSFER_BIT, TRANSFER_WRITE_BIT, NONE_KHR,
+ 0, VK_WHOLE_SIZE);
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
+ 0, VK_WHOLE_SIZE);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
@@ -945,8 +928,8 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
- ws_vk->stage = buf_bar[0].dstStageMask;
- ws_vk->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+ nb_img_bar = 0;
/* Buffer zeroing */
vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0);
@@ -976,10 +959,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
ws_vk, ws_size * s-> opts.t, ws_size * s-> opts.t,
VK_FORMAT_UNDEFINED));
+ VkPipelineStageFlagBits2 ws_stage = VK_PIPELINE_STAGE_2_TRANSFER_BIT;
+ VkAccessFlagBits2 ws_access = VK_ACCESS_2_TRANSFER_WRITE_BIT;
do {
int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t);
-
- /* Integral pipeline */
IntegralPushData pd = {
{ plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
{ plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
@@ -993,55 +976,68 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
desc->nb_components,
};
- ff_vk_exec_bind_shader(vkctx, exec, &s->shd_vertical);
- ff_vk_shader_update_push_const(vkctx, exec, &s->shd_vertical,
- VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(pd), &pd);
-
- nb_buf_bar = 0;
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = integral_vk->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = integral_vk->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = integral_vk->buf,
- .size = integral_vk->size,
- .offset = 0,
- };
+ /* Vertical pass */
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
+ 0, VK_WHOLE_SIZE);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
- integral_vk->stage = buf_bar[0].dstStageMask;
- integral_vk->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
- /* End of vertical pass */
+ ff_vk_exec_bind_shader(vkctx, exec, &s->shd_vertical);
+ ff_vk_shader_update_push_const(vkctx, exec, &s->shd_vertical,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
vk->CmdDispatch(exec->buf,
- FFALIGN(vkctx->output_width, s->shd_vertical.lg_size[0])/s->shd_vertical.lg_size[0],
+ FFALIGN(vkctx->output_width, s->shd_vertical.lg_size[0]) /
+ s->shd_vertical.lg_size[0],
desc->nb_components,
wg_invoc);
+ /* Horizontal pass */
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ 0, VK_WHOLE_SIZE);
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ nb_buf_bar = 0;
+
ff_vk_exec_bind_shader(vkctx, exec, &s->shd_horizontal);
ff_vk_shader_update_push_const(vkctx, exec, &s->shd_horizontal,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
+ vk->CmdDispatch(exec->buf,
+ FFALIGN(vkctx->output_height, s->shd_horizontal.lg_size[0]) /
+ s->shd_horizontal.lg_size[0],
+ desc->nb_components,
+ wg_invoc);
- nb_buf_bar = 0;
+ /* Weights pass */
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], integral_vk,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT,
+ SHADER_STORAGE_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR,
+ 0, VK_WHOLE_SIZE);
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = integral_vk->stage,
+ .srcStageMask = ws_stage,
.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = integral_vk->access,
+ .srcAccessMask = ws_access,
.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = integral_vk->buf,
- .size = integral_vk->size,
+ .buffer = ws_vk->buf,
+ .size = ws_vk->size,
.offset = 0,
};
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
@@ -1049,16 +1045,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
- integral_vk->stage = buf_bar[0].dstStageMask;
- integral_vk->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+ ws_stage = buf_bar[1].dstStageMask;
+ ws_access = buf_bar[1].dstAccessMask;
- /* End of horizontal pass */
- vk->CmdDispatch(exec->buf,
- FFALIGN(vkctx->output_height, s->shd_horizontal.lg_size[0])/s->shd_horizontal.lg_size[0],
- desc->nb_components,
- wg_invoc);
-
- /* Weights pipeline */
WeightsPushData wpd = {
{ plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] },
{ plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] },
@@ -1075,52 +1065,15 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
ws_count,
desc->nb_components,
};
-
ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights);
ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(wpd), &wpd);
-
- nb_buf_bar = 0;
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = integral_vk->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = integral_vk->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = integral_vk->buf,
- .size = integral_vk->size,
- .offset = 0,
- };
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = ws_vk->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = ws_vk->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
- VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = ws_vk->buf,
- .size = ws_vk->size,
- .offset = 0,
- };
- vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .pBufferMemoryBarriers = buf_bar,
- .bufferMemoryBarrierCount = nb_buf_bar,
- });
- integral_vk->stage = buf_bar[0].dstStageMask;
- integral_vk->access = buf_bar[0].dstAccessMask;
- ws_vk->stage = buf_bar[1].dstStageMask;
- ws_vk->access = buf_bar[1].dstAccessMask;
-
- /* End of weights pass */
vk->CmdDispatch(exec->buf,
- FFALIGN(vkctx->output_width, s->shd_weights.lg_size[0])/s->shd_weights.lg_size[0],
- FFALIGN(vkctx->output_height, s->shd_weights.lg_size[1])/s->shd_weights.lg_size[1],
+ FFALIGN(vkctx->output_width, s->shd_weights.lg_size[0]) /
+ s->shd_weights.lg_size[0],
+ FFALIGN(vkctx->output_height, s->shd_weights.lg_size[1]) /
+ s->shd_weights.lg_size[1],
wg_invoc * desc->nb_components);
offsets_dispatched += wg_invoc * TYPE_ELEMS;
--
2.49.1
>From e6f09619ec4d35384d3035faf59ca2f2f660ea79 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 24 Dec 2025 01:08:53 +0100
Subject: [PATCH 07/10] ffv1enc_vulkan: use ff_vk_buf_barrier()
---
libavcodec/ffv1enc_vulkan.c | 220 +++++++++++++++---------------------
1 file changed, 93 insertions(+), 127 deletions(-)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 1dc6aa8e90..3f3da6bbae 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -414,41 +414,16 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
VK_NULL_HANDLE);
/* Add a buffer barrier between previous and current frame */
- if (!f->key_frame) {
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = slice_data_buf->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = slice_data_buf->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
- VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = slice_data_buf->buf,
- .size = VK_WHOLE_SIZE,
- .offset = 0,
- };
- }
-
- if (fv->optimize_rct) {
- RET(run_rct_search(avctx, exec,
- src, src_views,
- slice_data_buf, slice_data_size));
-
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = slice_data_buf->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = slice_data_buf->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = slice_data_buf->buf,
- .size = slice_data_size*f->slice_count,
- .offset = 0,
- };
- }
-
+ if (!f->key_frame)
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ 0, slice_data_size*f->slice_count);
+ else
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ 0, slice_data_size*f->slice_count);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
@@ -457,9 +432,23 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.bufferMemoryBarrierCount = nb_buf_bar,
});
nb_img_bar = 0;
- if (nb_buf_bar) {
- slice_data_buf->stage = buf_bar[0].dstStageMask;
- slice_data_buf->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+
+ if (fv->optimize_rct) {
+ RET(run_rct_search(avctx, exec,
+ src, src_views,
+ slice_data_buf, slice_data_size));
+
+ /* Make sure the writes are visible to the setup shader */
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ 0, slice_data_size*f->slice_count);
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
nb_buf_bar = 0;
}
@@ -526,87 +515,78 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
}));
}
- /* Setup shader modified the slice data buffer */
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = slice_data_buf->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = slice_data_buf->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
- VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = slice_data_buf->buf,
- .size = slice_data_size*f->slice_count,
- .offset = 0,
+ /* Sync between setup and reset shaders */
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, NONE_KHR,
+ 0, slice_data_size*f->slice_count);
+ /* Prepare the probabilities */
+ if (!f->key_frame)
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
+ slice_data_size*f->slice_count, VK_WHOLE_SIZE);
+ else
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
+ slice_data_size*f->slice_count, VK_WHOLE_SIZE);
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ nb_buf_bar = 0;
+
+ /* Run reset shader */
+ FFv1VkResetParameters pd_reset;
+ ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->reset,
+ 1, 0, 0,
+ slice_data_buf,
+ 0, slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->reset);
+ pd_reset = (FFv1VkResetParameters) {
+ .slice_state = slice_data_buf->address + f->slice_count*256,
+ .plane_state_size = plane_state_size,
+ .codec_planes = f->plane_count,
+ .key_frame = f->key_frame,
};
+ for (int i = 0; i < f->quant_table_count; i++)
+ pd_reset.context_count[i] = f->context_count[i];
- if (f->key_frame || f->version > 3) {
- FFv1VkResetParameters pd_reset;
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd_reset), &pd_reset);
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices,
+ f->plane_count);
- ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->reset,
- 1, 0, 0,
- slice_data_buf,
- 0, slice_data_size*f->slice_count,
- VK_FORMAT_UNDEFINED);
+ /* Sync between reset and encode shaders */
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ 0, slice_data_size*f->slice_count);
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_data_buf,
+ COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, SHADER_WRITE_BIT,
+ slice_data_size*f->slice_count, VK_WHOLE_SIZE);
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], results_data_buf,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
+ 0, VK_WHOLE_SIZE);
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], out_data_buf,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
+ 0, VK_WHOLE_SIZE);
- /* Run setup shader */
- ff_vk_exec_bind_shader(&fv->s, exec, &fv->reset);
- pd_reset = (FFv1VkResetParameters) {
- .slice_state = slice_data_buf->address + f->slice_count*256,
- .plane_state_size = plane_state_size,
- .codec_planes = f->plane_count,
- .key_frame = f->key_frame,
- };
- for (int i = 0; i < f->quant_table_count; i++)
- pd_reset.context_count[i] = f->context_count[i];
-
- ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset,
- VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(pd_reset), &pd_reset);
-
- /* Sync between setup and reset shaders */
- vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .pBufferMemoryBarriers = buf_bar,
- .bufferMemoryBarrierCount = nb_buf_bar,
- });
- slice_data_buf->stage = buf_bar[0].dstStageMask;
- slice_data_buf->access = buf_bar[0].dstAccessMask;
- nb_buf_bar = 0;
-
- vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices,
- f->plane_count);
- }
-
- /* If the reset shader ran, insert a barrier now. */
- if (f->key_frame || f->version > 3) {
- /* Reset shader modified the slice data buffer */
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = slice_data_buf->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = slice_data_buf->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
- VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = slice_data_buf->buf,
- .size = slice_data_buf->size - slice_data_size*f->slice_count,
- .offset = slice_data_size*f->slice_count,
- };
- }
-
- if (fv->is_rgb) {
+ if (fv->is_rgb)
ff_vk_frame_barrier(&fv->s, exec, tmp, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
- }
- /* Final barrier before encoding */
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
@@ -615,11 +595,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.bufferMemoryBarrierCount = nb_buf_bar,
});
nb_img_bar = 0;
- if (nb_buf_bar) {
- slice_data_buf->stage = buf_bar[0].dstStageMask;
- slice_data_buf->access = buf_bar[0].dstAccessMask;
- nb_buf_bar = 0;
- }
+ nb_buf_bar = 0;
/* Main encode shader */
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->enc,
@@ -705,25 +681,15 @@ static int transfer_slices(AVCodecContext *avctx,
mapped_ref = NULL; /* Ownership passed */
/* Ensure the output buffer is finished */
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = out_data_buf->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
- .srcAccessMask = out_data_buf->access,
- .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = out_data_buf->buf,
- .size = VK_WHOLE_SIZE,
- .offset = 0,
- };
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], out_data_buf,
+ COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
+ TRANSFER_BIT, TRANSFER_READ_BIT, NONE_KHR,
+ 0, VK_WHOLE_SIZE);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
.bufferMemoryBarrierCount = nb_buf_bar,
});
- out_data_buf->stage = buf_bar[0].dstStageMask;
- out_data_buf->access = buf_bar[0].dstAccessMask;
nb_buf_bar = 0;
for (int i = 0; i < nb_regions; i++)
--
2.49.1
>From 2226b5d0386c3ca7239220cb1e9afbf0c305d625 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 24 Dec 2025 01:27:59 +0100
Subject: [PATCH 08/10] vulkan_prores: use ff_vk_buf_barrier()
---
libavcodec/vulkan_prores.c | 44 +++++++++++---------------------------
1 file changed, 12 insertions(+), 32 deletions(-)
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index afea8857e8..7e7c2ace9c 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -250,27 +250,17 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
/* Input barrier, or synchronization between clear and vld shader */
ff_vk_frame_barrier(&ctx->s, exec, f, img_bar, &nb_img_bar,
- pr->first_field ? VK_PIPELINE_STAGE_2_CLEAR_BIT : VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ pr->first_field ? VK_PIPELINE_STAGE_2_CLEAR_BIT :
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = metadata->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = metadata->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = metadata->buf,
- .offset = pp->slice_offsets_sz,
- .size = pp->mb_params_sz,
- };
- metadata->stage = buf_bar[0].dstStageMask;
- metadata->access = buf_bar[0].dstAccessMask;
-
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], metadata,
+ ALL_COMMANDS_BIT, NONE_KHR, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
+ pp->slice_offsets_sz, pp->mb_params_sz);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
@@ -302,7 +292,8 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
- vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->slice_count / pr->mb_height, 3), AV_CEIL_RSHIFT(pr->mb_height, 3),
+ vk->CmdDispatch(exec->buf, AV_CEIL_RSHIFT(pr->slice_count / pr->mb_height, 3),
+ AV_CEIL_RSHIFT(pr->mb_height, 3),
3 + !!pr->alpha_info);
/* Synchronize vld and idct shaders */
@@ -313,21 +304,10 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
- buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
- .srcStageMask = metadata->stage,
- .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- .srcAccessMask = metadata->access,
- .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
- .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
- .buffer = metadata->buf,
- .offset = pp->slice_offsets_sz,
- .size = pp->mb_params_sz,
- };
- metadata->stage = buf_bar[0].dstStageMask;
- metadata->access = buf_bar[0].dstAccessMask;
-
+ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], metadata,
+ COMPUTE_SHADER_BIT, SHADER_WRITE_BIT, NONE_KHR,
+ COMPUTE_SHADER_BIT, SHADER_READ_BIT, NONE_KHR,
+ pp->slice_offsets_sz, pp->mb_params_sz);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
--
2.49.1
>From 16e217541b4fec616b52b95e082f77513433be15 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Tue, 23 Dec 2025 19:08:04 +0100
Subject: [PATCH 09/10] vulkan: remove FFVkBuffer.stage and access
Keeping global state for every buffer is unncessary and possibly
suboptimal.
---
libavutil/vulkan.c | 2 --
libavutil/vulkan.h | 4 ----
2 files changed, 6 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index d4ac1544d1..33d7e8aace 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1309,8 +1309,6 @@ int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
return AVERROR(ENOMEM);
data = (FFVkBuffer *)ref->data;
- data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
- data->access = VK_ACCESS_2_NONE;
if (data->size >= size)
return 0;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 115e9fc940..cde2876e46 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -91,10 +91,6 @@ typedef struct FFVkBuffer {
size_t size;
VkDeviceAddress address;
- /* Local use only */
- VkPipelineStageFlags2 stage;
- VkAccessFlags2 access;
-
/* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE or
* via ff_vk_host_map_buffer */
uint8_t *mapped_mem;
--
2.49.1
>From 5a7e16ce2df5b9bcf6bde0fedbec39cbcf7f1f36 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Wed, 24 Dec 2025 04:10:39 +0100
Subject: [PATCH 10/10] prores_raw_idct: use the same prores_idct method for
copying coeffs
This saves 2 barriers.
---
libavcodec/vulkan/prores_raw_idct.comp | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/libavcodec/vulkan/prores_raw_idct.comp b/libavcodec/vulkan/prores_raw_idct.comp
index ffd71d1d73..c9850d17d7 100644
--- a/libavcodec/vulkan/prores_raw_idct.comp
+++ b/libavcodec/vulkan/prores_raw_idct.comp
@@ -63,30 +63,32 @@ void main(void)
uint8_t qmat_buf[64] = qmat;
[[unroll]]
- for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x) {
- int v = int(imageLoad(dst, offs + 2*ivec2(BLOCK_ID*8, 0) + scan[i])[0]);
+ for (uint y = 0; y < 8; y++) {
+ uint block_off = y*8 + ROW_ID;
+ int v = int(imageLoad(dst, offs + 2*ivec2(BLOCK_ID*8, 0) + scan[block_off])[0]);
float vf = float(sign_extend(v, 16)) / 32768.0;
- vf *= qmat_buf[i] * qscale;
- blocks[BLOCK_ID][COMP_ID*64 + i] = (vf / (64*4.56)) *
- idct_scale[i];
+ vf *= qmat_buf[block_off] * qscale;
+ blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID] = (vf / (64*4.56)) *
+ idct_scale[block_off];
}
+ /* Column-wise iDCT */
+ idct8(BLOCK_ID, COMP_ID*72 + ROW_ID, 9);
barrier();
- idct8(BLOCK_ID, COMP_ID*64 + ROW_ID*8, 1);
- blocks[BLOCK_ID][COMP_ID*64 + ROW_ID] += 0.5;
+ blocks[BLOCK_ID][COMP_ID*72 + ROW_ID * 9] += 0.5f;
+ /* Row-wise iDCT */
+ idct8(BLOCK_ID, COMP_ID*72 + ROW_ID * 9, 1);
barrier();
- idct8(BLOCK_ID, COMP_ID*64 + ROW_ID, 8);
- barrier();
[[unroll]]
- for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x) {
- int v = int(round(blocks[BLOCK_ID][COMP_ID*64 + i]*4095.0));
+ for (uint y = 0; y < 8; y++) {
+ int v = int(round(blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID]*4095.0));
v = clamp(v, 0, 4095);
v <<= 4;
imageStore(dst,
- offs + 2*ivec2(BLOCK_ID*8 + (i & 7), i >> 3),
+ offs + 2*ivec2(BLOCK_ID*8 + ROW_ID, y),
ivec4(v));
}
}
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-12-30 11:22 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-12-30 11:21 [FFmpeg-devel] [PATCH] Small Vulkan sync cleanups (PR #21319) Lynne via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git