* [FFmpeg-devel] [PATCH] vulkan/prores: forward quantization parameter to the IDCT shader (PR #20870)
@ 2025-11-08 20:10 averne via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: averne via ffmpeg-devel @ 2025-11-08 20:10 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: averne
PR #20870 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20870
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20870.patch
The qScale syntax element has a maximum value of 512, which would overflow the 16-bit store from the VLD shader in extreme cases.
This fixes that edge case by forwarding the element in a storage buffer, and applying the inverse quantization entirely in the IDCT shader.
>From 71204cc83765f4715b06a345627b320c5d5e0e70 Mon Sep 17 00:00:00 2001
From: averne <averne381@gmail.com>
Date: Sat, 8 Nov 2025 19:57:37 +0100
Subject: [PATCH] vulkan/prores: forward quantization parameter to the IDCT
shader
The qScale syntax element has a maximum value of 512, which would overflow the 16-bit store from the VLD shader in extreme cases.
This fixes that edge case by forwarding the element in a storage buffer, and applying the inverse quantization fully in the IDCT shader.
---
libavcodec/vulkan/prores_idct.comp | 14 ++--
libavcodec/vulkan/prores_vld.comp | 28 ++++----
libavcodec/vulkan_prores.c | 103 ++++++++++++++++++++++++-----
3 files changed, 111 insertions(+), 34 deletions(-)
diff --git a/libavcodec/vulkan/prores_idct.comp b/libavcodec/vulkan/prores_idct.comp
index 645cb02979..f3469589e0 100644
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -87,17 +87,23 @@ void main(void)
uint chroma_shift = comp != 0 ? log2_chroma_w : 0;
bool act = gid.x < mb_width << (4 - chroma_shift);
- /* Coalesced load of DCT coeffs in shared memory, second part of inverse quantization */
+ /* Coalesced load of DCT coeffs in shared memory, inverse quantization */
if (act) {
+ MbParams p = mb_params[(gid.y >> 1) * mb_width + (gid.x >> 4)];
+
/**
* According to spec indexing an array in push constant memory with
* a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
* so copy the whole matrix locally.
*/
uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
+
+ /* Table 15 */
+ int qscale = p.quant_idx > 128 ? (p.quant_idx - 96) << 2 : p.quant_idx;
+
[[unroll]] for (uint i = 0; i < 8; ++i) {
- int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16);
- blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
+ int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) + i))), 16);
+ blocks[block][i * 9 + idx] = float(v * qscale * int(qmat[(i << 3) + idx]));
}
}
@@ -117,7 +123,7 @@ void main(void)
if (act) {
[[unroll]] for (uint i = 0; i < 8; ++i) {
float v = blocks[block][i * 9 + idx] * fact + off;
- put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, maxv));
+ put_px(comp, ivec2(gid.x, (gid.y << 3) + i), clamp(int(v), 0, maxv));
}
}
}
diff --git a/libavcodec/vulkan/prores_vld.comp b/libavcodec/vulkan/prores_vld.comp
index 00e78e08ff..258604fb36 100644
--- a/libavcodec/vulkan/prores_vld.comp
+++ b/libavcodec/vulkan/prores_vld.comp
@@ -57,7 +57,7 @@ uint decode_codeword(inout GetBitContext gb, int codebook)
}
}
-void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
+void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count)
{
uvec3 gid = gl_GlobalInvocationID;
uint is_luma = uint(gid.z == 0);
@@ -70,7 +70,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
{
/* First coeff */
uint c = to_signed(decode_codeword(gb, 0x650));
- put_px(gid.z, base_pos, c * qscale & 0xffff);
+ put_px(gid.z, base_pos, c & 0xffff);
/**
* Table 9, encoded as (last_rice_q << 0) | (krice or kexp << 4) | ((kexp or kexp + 1) << 8)
@@ -89,7 +89,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
int s = int(prev_dc_diff) >> 31;
c += prev_dc_diff = (to_signed(cw) ^ s) - s;
- put_px(gid.z, base_pos + pos_to_block(i, is_luma), c * qscale & 0xffff);
+ put_px(gid.z, base_pos + pos_to_block(i, is_luma), c & 0xffff);
}
}
@@ -152,7 +152,7 @@ void decode_comp(in GetBitContext gb, uvec2 mb_pos, uint mb_count, uint qscale)
ivec2 bpos = ivec2(scan & 0xf, scan >> 4);
uint c = ((level + 1) ^ -s) + s;
- put_px(gid.z, base_pos + spos + bpos, c * qscale & 0xffff);
+ put_px(gid.z, base_pos + spos + bpos, c & 0xffff);
}
}
}
@@ -235,13 +235,8 @@ void main(void)
u8buf bs = u8buf(slice_data + slice_off);
/* Decode slice header */
- uint hdr_size, y_size, u_size, v_size, a_size;
- hdr_size = bs[0].v >> 3;
-
- /* Table 15 */
- uint qidx = clamp(bs[1].v, 1, 224),
- qscale = qidx > 128 ? (qidx - 96) << 2 : qidx;
-
+ uint hdr_size, qidx, y_size, u_size, v_size, a_size;
+ hdr_size = bs[0].v >> 3, qidx = clamp(bs[1].v, 1, 224);
y_size = (uint(bs[2].v) << 8) | bs[3].v;
u_size = (uint(bs[4].v) << 8) | bs[5].v;
@@ -308,10 +303,17 @@ void main(void)
uint mb_count = 1 << log2_width;
if (gid.z < 3) {
- /* Color entropy decoding, inverse scanning, first part of inverse quantization */
- decode_comp(gb, uvec2(mb_x, mb_y), mb_count, qscale);
+ /* Color entropy decoding, inverse scanning */
+ decode_comp(gb, uvec2(mb_x, mb_y), mb_count);
} else {
/* Alpha entropy decoding */
decode_alpha(gb, uvec2(mb_x, mb_y), mb_count);
}
+
+ /* Forward the quantization index to the IDCT shader */
+ if (gid.z == 0) {
+ uint base = mb_y * mb_width + mb_x;
+ for (uint i = 0; i < mb_count; ++i)
+ mb_params[base + i].quant_idx = uint8_t(qidx);
+ }
}
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 2602be112b..2bcf9872b4 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -38,10 +38,11 @@ typedef struct ProresVulkanDecodePicture {
FFVulkanDecodePicture vp;
AVBufferRef *slice_offset_buf;
- uint32_t slice_num;
+ AVBufferRef *mb_params_buf;
uint32_t bitstream_start;
uint32_t bitstream_size;
+ uint32_t slice_num;
} ProresVulkanDecodePicture;
typedef struct ProresVulkanDecodeContext {
@@ -52,6 +53,7 @@ typedef struct ProresVulkanDecodeContext {
} shaders[2]; /* Progressive/interlaced */
AVBufferPool *slice_offset_pool;
+ AVBufferPool *mb_params_pool;
} ProresVulkanDecodeContext;
typedef struct ProresVkParameters {
@@ -74,6 +76,10 @@ typedef struct ProresVkParameters {
uint8_t qmat_chroma[64];
} ProresVkParameters;
+typedef struct ProresVkMbParameters {
+ uint8_t quant_idx;
+} ProresVkMbParameters;
+
static int vk_prores_start_frame(AVCodecContext *avctx,
const AVBufferRef *buffer_ref,
av_unused const uint8_t *buffer,
@@ -98,12 +104,18 @@ static int vk_prores_start_frame(AVCodecContext *avctx,
/* Allocate slice offsets buffer */
RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->slice_offset_pool,
&pp->slice_offset_buf,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
NULL, (pr->slice_count + 1) * sizeof(uint32_t),
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ /* Allocate macroblock parameters buffer */
+ RET(ff_vk_get_pooled_buffer(&ctx->s, &pv->mb_params_pool,
+ &pp->mb_params_buf,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ NULL, pr->mb_width * pr->mb_height * sizeof(ProresVkMbParameters),
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
+
/* Prepare frame to be used */
RET(ff_vk_decode_prepare_frame_sdr(dec, pr->frame, vp, 1,
FF_VK_REP_NATIVE, 0));
@@ -158,7 +170,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
FFVulkanDecodePicture *vp = &pp->vp;
ProresVkParameters pd;
- FFVkBuffer *slice_data, *slice_offsets;
+ FFVkBuffer *slice_data, *slice_offsets, *mb_params;
struct ProresVulkanShaderVariants *shaders;
VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
VkBufferMemoryBarrier2 buf_bar[2];
@@ -174,6 +186,7 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
slice_data = (FFVkBuffer *)vp->slices_buf->data;
slice_offsets = (FFVkBuffer *)pp->slice_offset_buf->data;
+ mb_params = (FFVkBuffer *)pp->mb_params_buf->data;
shaders = &pv->shaders[pr->frame_type != 0];
@@ -209,11 +222,15 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
pr->frame));
RET(ff_vk_exec_add_dep_buf(&ctx->s, exec,
- (AVBufferRef *[]){ vp->slices_buf, pp->slice_offset_buf },
- 2, 0));
+ (AVBufferRef *[]){
+ vp->slices_buf,
+ pp->slice_offset_buf,
+ pp->mb_params_buf,
+ },
+ 3, 0));
/* Transfer ownership to the exec context */
- vp->slices_buf = pp->slice_offset_buf = NULL;
+ vp->slices_buf = pp->slice_offset_buf = pp->mb_params_buf = NULL;
/* Input frame barrier */
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
@@ -270,9 +287,14 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
slice_offsets,
0, (pp->slice_num + 1) * sizeof(uint32_t),
VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->vld,
+ 0, 1, 0,
+ mb_params,
+ 0, pr->mb_width * pr->mb_height * sizeof(ProresVkMbParameters),
+ VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->vld,
pr->frame, vp->view.out,
- 0, 1,
+ 0, 2,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
@@ -286,7 +308,6 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
3 + !!pr->alpha_info);
/* Synchronize vld and idct shaders */
- nb_img_bar = 0;
ff_vk_frame_barrier(&ctx->s, exec, pr->frame, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
@@ -294,6 +315,21 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = mb_params->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = mb_params->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = mb_params->buf,
+ .offset = 0,
+ .size = pr->mb_width * pr->mb_height * sizeof(ProresVkMbParameters),
+ };
+ mb_params->stage = buf_bar[0].dstStageMask;
+ mb_params->access = buf_bar[0].dstAccessMask;
+
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pBufferMemoryBarriers = buf_bar,
@@ -304,9 +340,14 @@ static int vk_prores_end_frame(AVCodecContext *avctx)
nb_img_bar = nb_buf_bar = 0;
/* Inverse transform */
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &shaders->idct,
+ 0, 0, 0,
+ mb_params,
+ 0, pr->mb_width * pr->mb_height * sizeof(ProresVkMbParameters),
+ VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, &shaders->idct,
pr->frame, vp->view.out,
- 0, 0,
+ 0, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
@@ -350,6 +391,14 @@ static int add_push_data(FFVulkanShader *shd)
VK_SHADER_STAGE_COMPUTE_BIT);
}
+static int add_struct_layouts(FFVulkanShader *shd) {
+ GLSLC(0, struct MbParams { );
+ GLSLC(1, uint8_t quant_idx; );
+ GLSLC(0, }; );
+
+ return 0;
+}
+
static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
FFVulkanShader *shd, const char *name, const char *entrypoint,
@@ -374,6 +423,9 @@ static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
/* Push constants layout */
RET(add_push_data(shd));
+ /* Structs layout */
+ RET(add_struct_layouts(shd));
+
RET(ff_vk_shader_add_descriptor_set(s, shd, descs, num_descs, 0, 0));
if (interlaced)
@@ -407,22 +459,23 @@ static void vk_decode_prores_uninit(FFVulkanDecodeShared *ctx)
}
av_buffer_pool_uninit(&pv->slice_offset_pool);
+ av_buffer_pool_uninit(&pv->mb_params_pool);
av_freep(&pv);
}
static int vk_decode_prores_init(AVCodecContext *avctx)
{
- FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
- FFVulkanDecodeShared *ctx = NULL;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = NULL;
AVHWFramesContext *out_frames_ctx;
ProresVulkanDecodeContext *pv;
FFVkSPIRVCompiler *spv;
FFVulkanDescriptorSetBinding *desc_set;
- int max_num_slices, i, err;
+ int max_num_mbs, i, err;
- max_num_slices = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
+ max_num_mbs = (avctx->coded_width >> 4) * (avctx->coded_height >> 4);
spv = ff_vk_spirv_init();
if (!spv) {
@@ -471,7 +524,15 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "readonly",
.buf_content = "uint32_t slice_offsets",
- .buf_elems = max_num_slices + 1,
+ .buf_elems = max_num_mbs + 1,
+ },
+ {
+ .name = "mb_params_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "writeonly",
+ .buf_content = "MbParams mb_params",
+ .buf_elems = max_num_mbs,
},
{
.name = "dst",
@@ -485,10 +546,18 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->vld,
- "prores_dec_vld", "main", desc_set, 2,
+ "prores_dec_vld", "main", desc_set, 3,
ff_source_prores_vld_comp, 0x080801, i));
desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "mb_params_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "readonly",
+ .buf_content = "MbParams mb_params",
+ .buf_elems = max_num_mbs,
+ },
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
@@ -500,7 +569,7 @@ static int vk_decode_prores_init(AVCodecContext *avctx)
},
};
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool, spv, &shaders->idct,
- "prores_dec_idct", "main", desc_set, 1,
+ "prores_dec_idct", "main", desc_set, 2,
ff_source_prores_idct_comp, 0x200201, i));
}
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-11-08 20:10 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-08 20:10 [FFmpeg-devel] [PATCH] vulkan/prores: forward quantization parameter to the IDCT shader (PR #20870) averne via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git