Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Lynne <dev@lynne.ee>
To: ffmpeg-devel@ffmpeg.org
Cc: Lynne <dev@lynne.ee>
Subject: [FFmpeg-devel] [PATCH 03/16] ffv1enc_vulkan: get rid of temporary data for the setup shader
Date: Wed, 14 May 2025 21:02:32 +0200
Message-ID: <20250514190253.162819-3-dev@lynne.ee> (raw)
In-Reply-To: <20250514190253.162819-1-dev@lynne.ee>

---
 libavcodec/ffv1enc_vulkan.c           | 21 ---------
 libavcodec/vulkan/ffv1_enc_setup.comp | 65 +++++++++++----------------
 libavcodec/vulkan/rangecoder.comp     | 28 +++++++-----
 3 files changed, 42 insertions(+), 72 deletions(-)

diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index f4b54b8375..d78ba3aca8 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -88,9 +88,6 @@ typedef struct VulkanEncodeFFv1Context {
     AVBufferPool *out_data_pool;
     AVBufferPool *pkt_data_pool;
 
-    /* Temporary data buffer */
-    AVBufferPool *tmp_data_pool;
-
     /* Slice results buffer */
     AVBufferPool *results_data_pool;
 
@@ -303,11 +300,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
 
     AVFrame *intermediate_frame = NULL;
 
-    /* Temporary data */
-    size_t tmp_data_size;
-    AVBufferRef *tmp_data_ref;
-    FFVkBuffer *tmp_data_buf;
-
     /* Slice data */
     AVBufferRef *slice_data_ref;
     FFVkBuffer *slice_data_buf;
@@ -352,17 +344,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
 
     f->slice_count = f->max_slice_count;
 
-    /* Allocate temporary data buffer */
-    tmp_data_size = f->slice_count*CONTEXT_SIZE;
-    RET(ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
-                                &tmp_data_ref,
-                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
-                                VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
-                                NULL, tmp_data_size,
-                                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
-    tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
-    ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
-
     /* Allocate slice buffer data */
     if (f->ac == AC_GOLOMB_RICE)
         plane_state_size = 8;
@@ -481,7 +462,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
     ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup);
     pd = (FFv1VkParameters) {
         .slice_state = slice_data_buf->address + f->slice_count*256,
-        .scratch_data = tmp_data_buf->address,
         .out_data = out_data_buf->address,
         .bits_per_raw_sample = f->bits_per_raw_sample,
         .sar[0] = pict->sample_aspect_ratio.num,
@@ -1698,7 +1678,6 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
 
     av_buffer_pool_uninit(&fv->out_data_pool);
     av_buffer_pool_uninit(&fv->pkt_data_pool);
-    av_buffer_pool_uninit(&fv->tmp_data_pool);
 
     av_buffer_unref(&fv->keyframe_slice_data_ref);
     av_buffer_pool_uninit(&fv->slice_data_pool);
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp
index 44c13404d8..d395770ba8 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp
@@ -20,6 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+uint8_t state[CONTEXT_SIZE];
+
 void init_slice(out SliceContext sc, const uint slice_idx)
 {
     /* Set coordinates */
@@ -45,67 +47,54 @@ void init_slice(out SliceContext sc, const uint slice_idx)
              slice_size_max);
 }
 
-void put_rac_full(inout RangeCoder c, uint64_t state, bool bit)
-{
-    put_rac_norenorm(c, state, bit);
-    if (c.range < 0x100)
-        renorm_encoder_full(c);
-}
-
-void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v)
+void put_usymbol(inout RangeCoder c, uint v)
 {
     bool is_nil = (v == 0);
-    put_rac_full(c, state, is_nil);
+    put_rac(c, state[0], is_nil);
     if (is_nil)
         return;
 
     const int e = findMSB(v);
 
-    state += 1;
     for (int i = 0; i < e; i++)
-        put_rac_full(c, state + min(i, 9), true);
-    put_rac_full(c, state + min(e, 9), false);
+        put_rac(c, state[1 + min(i, 9)], true);
+    put_rac(c, state[1 + min(e, 9)], false);
 
-    state += 21;
     for (int i = e - 1; i >= 0; i--)
-        put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1)));
+        put_rac(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1)));
 }
 
-void write_slice_header(inout SliceContext sc, uint64_t state)
+void write_slice_header(inout SliceContext sc)
 {
-    u8buf sb = u8buf(state);
-
     [[unroll]]
     for (int i = 0; i < CONTEXT_SIZE; i++)
-        sb[i].v = uint8_t(128);
+        state[i] = uint8_t(128);
 
-    put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x);
-    put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y);
-    put_symbol_unsigned(sc.c, state, 0);
-    put_symbol_unsigned(sc.c, state, 0);
+    put_usymbol(sc.c, gl_WorkGroupID.x);
+    put_usymbol(sc.c, gl_WorkGroupID.y);
+    put_usymbol(sc.c, 0);
+    put_usymbol(sc.c, 0);
 
     for (int i = 0; i < codec_planes; i++)
-        put_symbol_unsigned(sc.c, state, sc.quant_table_idx[i]);
+        put_usymbol(sc.c, sc.quant_table_idx[i]);
 
-    put_symbol_unsigned(sc.c, state, pic_mode);
-    put_symbol_unsigned(sc.c, state, sar.x);
-    put_symbol_unsigned(sc.c, state, sar.y);
+    put_usymbol(sc.c, pic_mode);
+    put_usymbol(sc.c, sar.x);
+    put_usymbol(sc.c, sar.y);
 
     if (version >= 4) {
-        put_rac_full(sc.c, state, sc.slice_reset_contexts);
-        put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
+        put_rac(sc.c, state[0], sc.slice_reset_contexts);
+        put_usymbol(sc.c, sc.slice_coding_mode);
         if (sc.slice_coding_mode != 1 && colorspace == 1) {
-            put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y);
-            put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.x);
+            put_usymbol(sc.c, sc.slice_rct_coef.y);
+            put_usymbol(sc.c, sc.slice_rct_coef.x);
         }
     }
 }
 
-void write_frame_header(inout SliceContext sc, uint64_t state)
+void write_frame_header(inout SliceContext sc)
 {
-    u8buf sb = u8buf(state);
-    sb.v = uint8_t(128);
-    put_rac_full(sc.c, state, bool(key_frame));
+    put_rac_equi(sc.c, bool(key_frame));
 }
 
 #ifdef GOLOMB
@@ -122,16 +111,12 @@ void main(void)
 {
     const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
 
-    /* Write slice data */
-    uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
-    u8buf sb = u8buf(scratch_state);
-
     init_slice(slice_ctx[slice_idx], slice_idx);
 
     if (slice_idx == 0)
-        write_frame_header(slice_ctx[slice_idx], scratch_state);
+        write_frame_header(slice_ctx[slice_idx]);
 
-    write_slice_header(slice_ctx[slice_idx], scratch_state);
+    write_slice_header(slice_ctx[slice_idx]);
 
 #ifdef GOLOMB
     init_golomb(slice_ctx[slice_idx]);
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 256b5f0e79..1db42e1dc9 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -91,15 +91,13 @@ void renorm_encoder(inout RangeCoder c)
         bs[i].v = fill;
 }
 
-void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
+void put_rac_direct(inout RangeCoder c, uint8_t state, bool bit)
 {
-    u8buf sb = u8buf(state);
-    uint val = uint(sb.v);
-    int range1 = uint16_t((c.range * val) >> 8);
+    int range1 = uint16_t((c.range * state) >> 8);
 
 #ifdef DEBUG
-    if (val == 0)
-        debugPrintfEXT("Error: state is zero (addr: 0x%lx)", uint64_t(sb));
+    if (state == 0)
+        debugPrintfEXT("Error: state is zero");
     if (range1 >= c.range)
         debugPrintfEXT("Error: range1 >= c.range");
     if (range1 <= 0)
@@ -113,13 +111,21 @@ void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
     } else {
         c.range  = diff;
     }
+}
 
-    sb.v = zero_one_state[(uint(bit) << 8) + val];
+void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
+{
+    put_rac_direct(c, u8buf(state).v, bit);
 
-#ifdef DEBUG
-    if (sb.v == 0)
-        debugPrintfEXT("Error: inserted zero state from tab %i idx %i", bit, val);
-#endif
+    u8buf(state).v = zero_one_state[(uint(bit) << 8) + u8buf(state).v];
+}
+
+void put_rac(inout RangeCoder c, inout uint8_t state, bool bit)
+{
+    put_rac_direct(c, state, bit);
+    if (c.range < 0x100)
+        renorm_encoder_full(c);
+    state = zero_one_state[(uint(bit) << 8) + state];
 }
 
 /* Equiprobable bit */
-- 
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2025-05-14 19:03 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 02/16] vulkan/ffv1: synchronize get_pred implementations between encoder and decoder Lynne
2025-05-14 19:02 ` Lynne [this message]
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 04/16] ffv1enc_vulkan: unify EC code between setup and encode Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 05/16] ffv1enc_vulkan: minor EC optimizations Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 06/16] ffv1enc_vulkan: switch to 2-line cache, unify prediction code Lynne
2025-05-23 14:38   ` [FFmpeg-devel] [PATCH] ffv1enc_vulkan: fix array overflow Jerome Martinez
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 07/16] ffv1_common: minor RGB optimization Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 08/16] ffv1enc_vulkan: use ff_get_encode_buffer Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 09/16] vulkan_ffv1: fix PCM + cached symbol reader Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 10/16] ffv1enc_vulkan: implement the cached EC writer from the decoder Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 11/16] ffv1enc_vulkan: implement RCT search for level >= 4 Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 12/16] vulkan/ffv1: unify encode and decode get/put primitives Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 13/16] vulkan_ffv1: pipe through slice decoding status Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 14/16] vulkan: enable VK_KHR_shader_subgroup_rotate Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 15/16] hwcontext_vulkan: correct image transfer usage flags Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 16/16] hwcontext_vulkan: only try exporting DMABUF memory on !WIN32 and only for DMABUF tiling Lynne

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250514190253.162819-3-dev@lynne.ee \
    --to=dev@lynne.ee \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git