Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Lynne <dev@lynne.ee>
To: ffmpeg-devel@ffmpeg.org
Cc: Lynne <dev@lynne.ee>
Subject: [FFmpeg-devel] [PATCH 15/18] vulkan_ffv1: remove need for scratch data during setup
Date: Sat, 12 Apr 2025 09:22:46 +0200
Message-ID: <20250412072256.77815-15-dev@lynne.ee> (raw)
In-Reply-To: <20250412072256.77815-1-dev@lynne.ee>

This saves on some VRAM, but mainly allows for a more unified path.
---
 libavcodec/vulkan/ffv1_dec_setup.comp | 55 ++++++++++++++-------------
 libavcodec/vulkan/rangecoder.comp     | 17 +++++++++
 libavcodec/vulkan_ffv1.c              | 23 +----------
 3 files changed, 46 insertions(+), 49 deletions(-)

diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp
index 5da63be56d..a27a878927 100644
--- a/libavcodec/vulkan/ffv1_dec_setup.comp
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp
@@ -20,13 +20,15 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-uint get_usymbol(inout RangeCoder c, uint64_t state)
+uint8_t setup_state[CONTEXT_SIZE];
+
+uint get_usymbol(inout RangeCoder c)
 {
-    if (get_rac(c, state + 0))
+    if (get_rac_direct(c, setup_state[0]))
         return 0;
 
     int e = 0;
-    while (get_rac(c, state + 1 + min(e, 9))) { // 1..10
+    while (get_rac_direct(c, setup_state[1 + min(e, 9)])) { // 1..10
         e++;
         if (e > 31) {
             corrupt = true;
@@ -35,24 +37,24 @@ uint get_usymbol(inout RangeCoder c, uint64_t state)
     }
 
     uint a = 1;
-    for (int i = e - 1; i >= 0; i--)
-        a += a + uint(get_rac(c, state + 22 + min(i, 9)));  // 22..31
+    for (int i = e - 1; i >= 0; i--) {
+        a <<= 1;
+        a |= uint(get_rac_direct(c, setup_state[22 + min(i, 9)]));  // 22..31
+    }
 
     return a;
 }
 
-bool decode_slice_header(inout SliceContext sc, uint64_t state)
+bool decode_slice_header(inout SliceContext sc)
 {
-    u8buf sb = u8buf(state);
-
     [[unroll]]
     for (int i = 0; i < CONTEXT_SIZE; i++)
-        sb[i].v = uint8_t(128);
+        setup_state[i] = uint8_t(128);
 
-    uint sx = get_usymbol(sc.c, state);
-    uint sy = get_usymbol(sc.c, state);
-    uint sw = get_usymbol(sc.c, state) + 1;
-    uint sh = get_usymbol(sc.c, state) + 1;
+    uint sx = get_usymbol(sc.c);
+    uint sy = get_usymbol(sc.c);
+    uint sw = get_usymbol(sc.c) + 1;
+    uint sh = get_usymbol(sc.c) + 1;
 
     if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 ||
         sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) ||
@@ -72,22 +74,22 @@ bool decode_slice_header(inout SliceContext sc, uint64_t state)
     sc.slice_coding_mode = int(0);
 
     for (uint i = 0; i < codec_planes; i++) {
-        uint idx = get_usymbol(sc.c, state);
+        uint idx = get_usymbol(sc.c);
         if (idx >= quant_table_count)
             return true;
         sc.quant_table_idx[i] = uint8_t(idx);
     }
 
-    get_usymbol(sc.c, state);
-    get_usymbol(sc.c, state);
-    get_usymbol(sc.c, state);
+    get_usymbol(sc.c);
+    get_usymbol(sc.c);
+    get_usymbol(sc.c);
 
     if (version >= 4) {
-        sc.slice_reset_contexts = get_rac(sc.c, state);
-        sc.slice_coding_mode = get_usymbol(sc.c, state);
+        sc.slice_reset_contexts = get_rac_direct(sc.c, setup_state[0]);
+        sc.slice_coding_mode = get_usymbol(sc.c);
         if (sc.slice_coding_mode != 1 && colorspace == 1) {
-            sc.slice_rct_coef.x = int(get_usymbol(sc.c, state));
-            sc.slice_rct_coef.y = int(get_usymbol(sc.c, state));
+            sc.slice_rct_coef.x = int(get_usymbol(sc.c));
+            sc.slice_rct_coef.y = int(get_usymbol(sc.c));
             if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4)
                 return true;
         }
@@ -96,11 +98,11 @@ bool decode_slice_header(inout SliceContext sc, uint64_t state)
     return false;
 }
 
-void golomb_init(inout SliceContext sc, uint64_t state)
+void golomb_init(inout SliceContext sc)
 {
     if (version == 3 && micro_version > 1 || version > 3) {
-        u8buf(state).v = uint8_t(129);
-        get_rac(sc.c, state);
+        setup_state[0] = uint8_t(129);
+        get_rac_direct(sc.c, setup_state[0]);
     }
 
     uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
@@ -111,7 +113,6 @@ void golomb_init(inout SliceContext sc, uint64_t state)
 void main(void)
 {
     const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
-    uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
 
     u8buf bs = u8buf(slice_data + slice_offsets[2*slice_idx + 0]);
     uint32_t slice_size = slice_offsets[2*slice_idx + 1];
@@ -122,10 +123,10 @@ void main(void)
     if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1))
         get_rac_equi(slice_ctx[slice_idx].c);
 
-    decode_slice_header(slice_ctx[slice_idx], scratch_state);
+    decode_slice_header(slice_ctx[slice_idx]);
 
     if (golomb == 1)
-        golomb_init(slice_ctx[slice_idx], scratch_state);
+        golomb_init(slice_ctx[slice_idx]);
 
     if (ec != 0 && check_crc != 0) {
         uint32_t crc = crcref;
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index e332bce8a5..ff0432511d 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -245,6 +245,23 @@ bool get_rac(inout RangeCoder c, uint64_t state)
     return bit;
 }
 
+bool get_rac_direct(inout RangeCoder c, inout uint8_t state)
+{
+    int range1 = -int(c.range * state >> 8);
+    int ranged = c.range + range1;
+
+    bool bit = c.low >= ranged;
+    state = zero_one_state[state + (bit ? 256 : 0)];
+
+    c.low = c.low - (bit ? ranged : 0);
+    c.range = (bit ? 0 : ranged) - (bit ? range1 : 0);
+
+    if (c.range < 0x100)
+        refill(c);
+
+    return bit;
+}
+
 bool get_rac_equi(inout RangeCoder c)
 {
     int range1 = c.range >> 1;
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index 72cacb1678..c1875711bc 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -43,8 +43,6 @@ const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
 typedef struct FFv1VulkanDecodePicture {
     FFVulkanDecodePicture vp;
 
-    AVBufferRef *tmp_data;
-
     AVBufferRef *slice_state;
     uint32_t plane_state_size;
     uint32_t slice_state_size;
@@ -70,7 +68,6 @@ typedef struct FFv1VulkanDecodeContext {
     FFVkBuffer crc_tab_buf;
 
     AVBufferPool *slice_state_pool;
-    AVBufferPool *tmp_data_pool;
     AVBufferPool *slice_offset_pool;
     AVBufferPool *slice_status_pool;
 } FFv1VulkanDecodeContext;
@@ -78,7 +75,6 @@ typedef struct FFv1VulkanDecodeContext {
 typedef struct FFv1VkParameters {
     VkDeviceAddress slice_data;
     VkDeviceAddress slice_state;
-    VkDeviceAddress scratch_data;
 
     int fmt_lut[4];
     uint32_t img_size[2];
@@ -111,7 +107,6 @@ static void add_push_data(FFVulkanShader *shd)
     GLSLC(0, layout(push_constant, scalar) uniform pushConstants {  );
     GLSLC(1,    u8buf slice_data;                                   );
     GLSLC(1,    u8buf slice_state;                                  );
-    GLSLC(1,    u8buf scratch_data;                                 );
     GLSLC(0,                                                        );
     GLSLC(1,    ivec4 fmt_lut;                                      );
     GLSLC(1,    uvec2 img_size;                                     );
@@ -208,16 +203,6 @@ static int vk_ffv1_start_frame(AVCodecContext          *avctx,
             return AVERROR(ENOMEM);
     }
 
-    /* Allocate temporary data buffer */
-    err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool,
-                                  &fp->tmp_data,
-                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
-                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
-                                  NULL, f->slice_count*CONTEXT_SIZE,
-                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
-    if (err < 0)
-        return err;
-
     /* Allocate slice offsets buffer */
     err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool,
                                   &fp->slice_offset_buf,
@@ -327,7 +312,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
     FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
     FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
 
-    FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data;
     VkImageView rct_image_views[AV_NUM_DATA_POINTERS];
 
     AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f;
@@ -380,8 +364,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
     vp->slices_buf = NULL;
     RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0));
     fp->slice_offset_buf = NULL;
-    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0));
-    fp->tmp_data = NULL;
 
     /* Entry barrier for the slice state */
     buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
@@ -430,8 +412,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
     ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
     pd = (FFv1VkParameters) {
         .slice_data = slices_buf->address,
-        .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
-        .scratch_data = tmp_data->address,
+        .slice_state  = slice_state->address + f->slice_count*fp->slice_data_size,
 
         .img_size[0] = f->picture.f->width,
         .img_size[1] = f->picture.f->height,
@@ -990,7 +971,6 @@ static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
     ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
     ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
 
-    av_buffer_pool_uninit(&fv->tmp_data_pool);
     av_buffer_pool_uninit(&fv->slice_state_pool);
     av_buffer_pool_uninit(&fv->slice_offset_pool);
     av_buffer_pool_uninit(&fv->slice_status_pool);
@@ -1148,7 +1128,6 @@ static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
     av_buffer_unref(&fp->slice_state);
     av_buffer_unref(&fp->slice_offset_buf);
     av_buffer_unref(&fp->slice_status_buf);
-    av_buffer_unref(&fp->tmp_data);
 }
 
 const FFHWAccel ff_ffv1_vulkan_hwaccel = {
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2025-04-12  7:25 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-12  7:22 [FFmpeg-devel] [PATCH 01/18] hwcontext_vulkan: disable descriptor buffer extension on Intel Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 02/18] vulkan_ffv1: enable acceleration " Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 03/18] vulkan_ffv1: remove unused define Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 04/18] vulkan_ffv1: slightly optimize the range decoder Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 05/18] vulkan_ffv1: optimize symbol reader Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 06/18] vulkan_ffv1: allocate just as much memory for slice state as needed Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 07/18] vulkan_ffv1: init overread/corrupt fields Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 08/18] vulkan_ffv1: fallback to upload if mapping packet fails, fix fallback Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 09/18] vulkan_ffv1: fix reset shader dependencies Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 10/18] vulkan_ffv1: improve buffer barrier correctness for slice state Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 11/18] vulkan_ffv1: fix left-2 sample addressing Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 12/18] vulkan_ffv1: cache only 2 lines when decoding RGB Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 13/18] ffv1/vulkan: redo context count tracking and quant_table_idx management Lynne
2025-04-13 20:39   ` Jerome Martinez
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 14/18] vulkan_ffv1: externalize extended lookup check Lynne
2025-04-12  7:22 ` Lynne [this message]
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 16/18] vulkan_ffv1: shortcut +-1 coeffs in symbol reading Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 17/18] vulkan: add support for expect/assume Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 18/18] vulkan_ffv1: add cached symbol reader for AMD Lynne
2025-04-13 13:38 ` [FFmpeg-devel] [PATCH 01/18] hwcontext_vulkan: disable descriptor buffer extension on Intel Jerome Martinez

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250412072256.77815-15-dev@lynne.ee \
    --to=dev@lynne.ee \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git