[FFmpeg-devel] [PATCH] vulkan/prores: forward quantization parameter to the IDCT shader (PR #20870)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* [FFmpeg-devel] [PATCH] vulkan/prores: forward quantization parameter to the IDCT shader (PR #20870)
@ 2025-11-08 20:10 averne via ffmpeg-devel
  0 siblings, 0 replies; only message in thread
From: averne via ffmpeg-devel @ 2025-11-08 20:10 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: averne

PR #20870 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20870
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20870.patch

The qScale syntax element has a maximum value of 512, which would overflow the 16-bit store from the VLD shader in extreme cases.
This fixes that edge case by forwarding the element in a storage buffer, and applying the inverse quantization entirely in the IDCT shader.


>From 71204cc83765f4715b06a345627b320c5d5e0e70 Mon Sep 17 00:00:00 2001
From: averne <averne381@gmail.com>
Date: Sat, 8 Nov 2025 19:57:37 +0100
Subject: [PATCH] vulkan/prores: forward quantization parameter to the IDCT
 shader

The qScale syntax element has a maximum value of 512, which would overflow the 16-bit store from the VLD shader in extreme cases.
This fixes that edge case by forwarding the element in a storage buffer, and applying the inverse quantization fully in the IDCT shader.
---
 libavcodec/vulkan/prores_idct.comp |  14 ++--
 libavcodec/vulkan/prores_vld.comp  |  28 ++++----
 libavcodec/vulkan_prores.c         | 103 ++++++++++++++++++++++++-----
 3 files changed, 111 insertions(+), 34 deletions(-)

diff --git a/libavcodec/vulkan/prores_idct.comp b/libavcodec/vulkan/prores_idct.comp
index 645cb02979..f3469589e0 100644
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -87,17 +87,23 @@ void main(void)
     uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
     bool act = gid.x < mb_width << (4 - chroma_shift);
 
-    /* Coalesced load of DCT coeffs in shared memory, second part of inverse quantization */
+    /* Coalesced load of DCT coeffs in shared memory, inverse quantization */
     if (act) {
+        MbParams p = mb_params[(gid.y >> 1) * mb_width + (gid.x >> 4)];
+
         /**
          * According to spec indexing an array in push constant memory with
          * a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
          * so copy the whole matrix locally.
          */
         uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
+
+        /* Table 15 */
+        int qscale = p.quant_idx > 128 ? (p.quant_idx - 96) << 2 : p.quant_idx;
+
         [[unroll]] for (uint i = 0; i < 8; ++i) {
-            int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16);
-            blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
+            int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) + i))), 16);
+            blocks[block][i * 9 + idx] = float(v * qscale * int(qmat[(i << 3) + idx]));
         }
     }
 
@@ -117,7 +123,7 @@ void main(void)
     if (act) {
         [[unroll]] for (uint i = 0; i < 8; ++i) {
             float v = blocks[block][i * 9 + idx] * fact + off;
-            put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, maxv));
+            put_px(comp, ivec2(gid.x, (gid.y << 3) + i), clamp(int(v), 0, maxv));
         }
     }
 }
diff --git a/libavcodec/vulkan/prores_vld.comp b/libavcodec/vulkan/prores_vld.comp
index 00e78e08ff..258604fb36 100644
--- a/libavcodec/vulkan/prores_vld.comp
+++ b/libavcodec/vulkan/prores_vld.comp
@@ -57,7 +57,7 @@ uint decode_codeword(inout GetBitContext gb, int codebook)
     }
 }
 
-void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
+void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
 {
     uvec3 gid = gl_GlobalInvocationID;
     uint is_luma = uint(gid.z == 0);
@@ -70,7 +70,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
     {
         /* First coeff */
         uint c = to_signed(decode_codeword(gb, 0x650));
-        put_px(gid.z, base_pos, c * qscale & 0xffff);
+        put_px(gid.z, base_pos, c & 0xffff);
 
         /**
          * Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8)
@@ -89,7 +89,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
             int s = int(prev_dc_diff) >> 31;
             c += prev_dc_diff = (to_signed(cw) ^ s) - s;
 
-            put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 0xffff);
+            put_px(gid.z, base_pos + pos_to_block(i, is_luma), c & 0xffff);
         }
     }
 
@@ -152,7 +152,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
             ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
 
             uint c = ((level + 1) ^ -s) + s;
-            put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
+            put_px(gid.z, base_pos + spos + bpos, c & 0xffff);
         }
     }
 }
@@ -235,13 +235,8 @@ void main(void)
     u8buf bs = u8buf(slice_data + slice_off);
 
     /* Decode slice header */
-    uint hdr_size, y_size, u_size, v_size, a_size;
-    hdr_size = bs[0].v >> 3;
-
-    /* Table 15 */
-    uint qidx   = clamp(bs[1].v, 1, 224),
-         qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
-
+    uint hdr_size, qidx, y_size, u_size, v_size, a_size;
+    hdr_size = bs[0].v >> 3, qidx = clamp(bs[1].v, 1, 224);
     y_size = (uint(bs[2].v) << 8) | bs[3].v;
     u_size = (uint(bs[4].v) << 8) | bs[5].v;
 
@@ -308,10 +303,17 @@ void main(void)
     uint mb_count = 1 << log2_width;
 
     if (gid.z < 3) {
-        /* Color entropy decoding, inverse scanning, first part of inverse quantization */
-        decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
+        /* Color entropy decoding, inverse scanning */
+        decode_comp(gb, uvec2(mb_x, mb_y), mb_count);
     } else {
         /* Alpha entropy decoding */
         decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
     }
+
+    /* Forward the quantization index to the IDCT shader */
+    if (gid.z == 0) {
+        uint base = mb_y * mb_width + mb_x;
+        for (uint i = 0; i < mb_count; ++i)
+            mb_params[base + i].quant_idx = uint8_t(qidx);
+    }
 }
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 2602be112b..2bcf9872b4 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -38,10 +38,11 @@ typedef struct ProresVulkanDecodePicture {
     FFVulkanDecodePicture vp;
 
     AVBufferRef *slice_offset_buf;
-    uint32_t slice_num;
+    AVBufferRef *mb_params_buf;
 
     uint32_t bitstream_start;
     uint32_t bitstream_size;
+    uint32_t slice_num;
 } ProresVulkanDecodePicture;
 
 typedef struct ProresVulkanDecodeContext {
@@ -52,6 +53,7 @@ typedef struct ProresVulkanDecodeContext {
     } shaders[2]; /* Progressive/interlaced */
 
     AVBufferPool *slice_offset_pool;
+    AVBufferPool *mb_params_pool;
 } ProresVulkanDecodeContext;
 
 typedef struct ProresVkParameters {
@@ -74,6 +76,10 @@ typedef struct ProresVkParameters {
     uint8_t  qmat_chroma[64];
 } ProresVkParameters;
 
+typedef struct ProresVkMbParameters {
+    uint8_t quant_idx;
+} ProresVkMbParameters;
+
 static int vk_prores_start_frame(AVCodecContext          *avctx,
                                  const AVBufferRef       *buffer_ref,
                                  av_unused const uint8_t *buffer,
@@ -98,12 +104,18 @@ static int vk_prores_start_frame(AVCodecContext          *avctx,
     /* Allocate slice offsets buffer */
     RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
                                 &pp->slice_offset_buf,
-                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
-                                VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
                                 NULL, (pr->slice_count + 1) * sizeof(uint32_t),
                                 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
                                 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
 
+    /* Allocate macroblock parameters buffer */
+    RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->mb_params_pool,
+                                &pp->mb_params_buf,
+                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+                                NULL, pr->mb_width * pr->mb_height * sizeof(ProresVkMbParameters),
+                                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
+
     /* Prepare frame to be used */
     RET(ff_vk_decode_prepare_frame_sdr(dec, pr->frame, vp, 1,
                                        FF_VK_REP_NATIVE, 0));
@@ -158,7 +170,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     FFVulkanDecodePicture     *vp = &pp->vp;
 
     ProresVkParameters pd;
-    FFVkBuffer *slice_data, *slice_offsets;
+    FFVkBuffer *slice_data, *slice_offsets, *mb_params;
     struct ProresVulkanShaderVariants *shaders;
     VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
     VkBufferMemoryBarrier2 buf_bar[2];
@@ -174,6 +186,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
 
     slice_data    = (FFVkBuffer *)vp->slices_buf->data;
     slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
+    mb_params     = (FFVkBuffer *)pp->mb_params_buf->data;
 
     shaders = &pv->shaders[pr->frame_type != 0];
 
@@ -209,11 +222,15 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                                     pr->frame));
 
     RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
-                               (AVBufferRef *[]){ vp->slices_buf, pp->slice_offset_buf },
-                               2, 0));
+                               (AVBufferRef *[]){
+                                   vp->slices_buf,
+                                   pp->slice_offset_buf,
+                                   pp->mb_params_buf,
+                               },
+                               3, 0));
 
     /* Transfer ownership to the exec context */
-    vp->slices_buf = pp->slice_offset_buf = NULL;
+    vp->slices_buf = pp->slice_offset_buf = pp->mb_params_buf = NULL;
 
     /* Input frame barrier */
     ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
@@ -270,9 +287,14 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                                     slice_offsets,
                                     0, (pp->slice_num + 1) * sizeof(uint32_t),
                                     VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
+                                    0, 1, 0,
+                                    mb_params,
+                                    0, pr->mb_width * pr->mb_height * sizeof(ProresVkMbParameters),
+                                    VK_FORMAT_UNDEFINED);
     ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
                                   pr->frame, vp->view.out,
-                                  0, 1,
+                                  0, 2,
                                   VK_IMAGE_LAYOUT_GENERAL,
                                   VK_NULL_HANDLE);
 
@@ -286,7 +308,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                     3 + !!pr->alpha_info);
 
     /* Synchronize vld and idct shaders */
-    nb_img_bar = 0;
     ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
                         VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
                         VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
@@ -294,6 +315,21 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
                         VK_IMAGE_LAYOUT_GENERAL,
                         VK_QUEUE_FAMILY_IGNORED);
 
+    buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+        .sType               = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+        .srcStageMask        = mb_params->stage,
+        .dstStageMask        = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+        .srcAccessMask       = mb_params->access,
+        .dstAccessMask       = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer              = mb_params->buf,
+        .offset              = 0,
+        .size                = pr->mb_width * pr->mb_height * sizeof(ProresVkMbParameters),
+    };
+    mb_params->stage  = buf_bar[0].dstStageMask;
+    mb_params->access = buf_bar[0].dstAccessMask;
+
     vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
         .sType                    = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
         .pBufferMemoryBarriers    = buf_bar,
@@ -304,9 +340,14 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
     nb_img_bar = nb_buf_bar = 0;
 
     /* Inverse transform */
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->idct,
+                                    0, 0, 0,
+                                    mb_params,
+                                    0, pr->mb_width * pr->mb_height * sizeof(ProresVkMbParameters),
+                                    VK_FORMAT_UNDEFINED);
     ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
                                   pr->frame, vp->view.out,
-                                  0, 0,
+                                  0, 1,
                                   VK_IMAGE_LAYOUT_GENERAL,
                                   VK_NULL_HANDLE);
 
@@ -350,6 +391,14 @@ static int add_push_data(FFVulkanShader *shd)
                                        VK_SHADER_STAGE_COMPUTE_BIT);
 }
 
+static int add_struct_layouts(FFVulkanShader *shd) {
+    GLSLC(0, struct MbParams {                                     );
+    GLSLC(1,     uint8_t quant_idx;                                );
+    GLSLC(0, };                                                    );
+
+    return 0;
+}
+
 static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
                        FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
                        FFVulkanShader *shd, const char *name, const char *entrypoint,
@@ -374,6 +423,9 @@ static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
     /* Push constants layout */
     RET(add_push_data(shd));
 
+    /* Structs layout */
+    RET(add_struct_layouts(shd));
+
     RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0));
 
     if (interlaced)
@@ -407,22 +459,23 @@ static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx)
     }
 
     av_buffer_pool_uninit(&pv->slice_offset_pool);
+    av_buffer_pool_uninit(&pv->mb_params_pool);
 
     av_freep(&pv);
 }
 
 static int vk_decode_prores_init(AVCodecContext *avctx)
 {
-    FFVulkanDecodeContext        *dec = avctx->internal->hwaccel_priv_data;
-    FFVulkanDecodeShared         *ctx = NULL;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared  *ctx = NULL;
 
     AVHWFramesContext *out_frames_ctx;
     ProresVulkanDecodeContext *pv;
     FFVkSPIRVCompiler *spv;
     FFVulkanDescriptorSetBinding *desc_set;
-    int max_num_slices, i, err;
+    int max_num_mbs, i, err;
 
-    max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
+    max_num_mbs = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
 
     spv = ff_vk_spirv_init();
     if (!spv) {
@@ -471,7 +524,15 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
                 .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
                 .mem_quali   = "readonly",
                 .buf_content = "uint32_t slice_offsets",
-                .buf_elems   = max_num_slices + 1,
+                .buf_elems   = max_num_mbs + 1,
+            },
+            {
+                .name        = "mb_params_buf",
+                .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+                .mem_quali   = "writeonly",
+                .buf_content = "MbParams mb_params",
+                .buf_elems   = max_num_mbs,
             },
             {
                 .name       = "dst",
@@ -485,10 +546,18 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
             },
         };
         RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
-                        "prores_dec_vld", "main", desc_set, 2,
+                        "prores_dec_vld", "main", desc_set, 3,
                         ff_source_prores_vld_comp, 0x080801, i));
 
         desc_set = (FFVulkanDescriptorSetBinding []) {
+            {
+                .name        = "mb_params_buf",
+                .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+                .mem_quali   = "readonly",
+                .buf_content = "MbParams mb_params",
+                .buf_elems   = max_num_mbs,
+            },
             {
                 .name       = "dst",
                 .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
@@ -500,7 +569,7 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
             },
         };
         RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
-                        "prores_dec_idct", "main", desc_set, 1,
+                        "prores_dec_idct", "main", desc_set, 2,
                         ff_source_prores_idct_comp, 0x200201, i));
     }
 
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2025-11-08 20:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-08 20:10 [FFmpeg-devel] [PATCH] vulkan/prores: forward quantization parameter to the IDCT shader (PR #20870) averne via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git