From: Lynne <dev@lynne.ee> To: ffmpeg-devel@ffmpeg.org Cc: Lynne <dev@lynne.ee> Subject: [FFmpeg-devel] [PATCH 02/16] vulkan/ffv1: synchronize get_pred implementations between encoder and decoder Date: Wed, 14 May 2025 21:02:31 +0200 Message-ID: <20250514190253.162819-2-dev@lynne.ee> (raw) In-Reply-To: <20250514190253.162819-1-dev@lynne.ee> --- libavcodec/vulkan/ffv1_dec.comp | 32 ++++++------- libavcodec/vulkan/ffv1_enc.comp | 85 ++++++++++++++++++++------------- 2 files changed, 68 insertions(+), 49 deletions(-) diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp index fc0175c715..1c313b3168 100644 --- a/libavcodec/vulkan/ffv1_dec.comp +++ b/libavcodec/vulkan/ffv1_dec.comp @@ -29,19 +29,19 @@ #endif #ifdef RGB -ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) +ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx) { const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0); /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */ - VTYPE3 top = VTYPE3(TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[0]), - TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(0, -1)))[0]), - TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[0])); + VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[comp]), + TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]), + TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp])); /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */ - TYPE cur = TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[0]); + TYPE cur = TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[comp]); int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + @@ -51,12 +51,12 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) TYPE cur2 = TYPE(0); if (expectEXT(off.x > 0, true)) { const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0); - cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + yoff_border2))[0]); + cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]); } base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; /* top-2 became current upon swap */ - TYPE top2 = TYPE(imageLoad(dec[p], sp + LADDR(off))[0]); + TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]); base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; } @@ -64,7 +64,7 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) return ivec2(base, predict(cur, VTYPE2(top))); } #else -ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) +ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx) { const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); sp += off; @@ -73,15 +73,15 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) TYPE(0), TYPE(0)); if (off.y > 0 && off != ivec2(0, 1)) - top[0] = TYPE(imageLoad(dec[p], sp + ivec2(-1, -1) + yoff_border1)[0]); + top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]); if (off.y > 0) { - top[1] = TYPE(imageLoad(dec[p], sp + ivec2(0, -1))[0]); - top[2] = TYPE(imageLoad(dec[p], sp + ivec2(min(1, sw - off.x - 1), -1))[0]); + top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]); + top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]); } TYPE cur = TYPE(0); if (off != ivec2(0, 0)) - cur = TYPE(imageLoad(dec[p], sp + ivec2(-1, 0) + yoff_border1)[0]); + cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]); int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + @@ -92,13 +92,13 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) TYPE cur2 = TYPE(0); if (off.x > 0 && off != ivec2(1, 0)) { const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); - cur2 = TYPE(imageLoad(dec[p], sp + ivec2(-2, 0) + yoff_border2)[0]); + cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]); } base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; TYPE top2 = TYPE(0); if (off.y > 1) - top2 = TYPE(imageLoad(dec[p], sp + ivec2(0, -2))[0]); + top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]); base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; } @@ -171,7 +171,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, #endif for (int x = 0; x < w; x++) { - ivec2 pr = get_pred(sp, ivec2(x, y), p, w, + ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, quant_table_idx); uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]); @@ -216,7 +216,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w, for (int x = 0; x < w; x++) { ivec2 pos = sp + ivec2(x, y); int diff; - ivec2 pr = get_pred(sp, ivec2(x, y), p, w, + ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, quant_table_idx); VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*abs(pr[0])); diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp index 9854ecad51..7f8c831efa 100644 --- a/libavcodec/vulkan/ffv1_enc.comp +++ b/libavcodec/vulkan/ffv1_enc.comp @@ -20,43 +20,46 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits) +ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx) { const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); - const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); - - TYPE top2 = TYPE(0); - if (off.y > 1) - top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]); + sp += off; VTYPE3 top = VTYPE3(TYPE(0), TYPE(0), TYPE(0)); if (off.y > 0 && off != ivec2(0, 1)) - top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]); + top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]); if (off.y > 0) { - top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]); - top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]); + top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]); + top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]); } - VTYPE3 cur = VTYPE3(TYPE(0), - TYPE(0), - imageLoad(src[p], pos)[comp]); - if (off.x > 0 && off != ivec2(1, 0)) - cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]); + TYPE cur = TYPE(0); if (off != ivec2(0, 0)) - cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]); - - /* context, diff */ - ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model), - cur[2] - predict(cur[1], VTYPE2(top))); - - if (d[0] < 0) - d = -d; + cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]); + + int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; + + if ((quant_table[quant_table_idx][3][127] != 0) || + (quant_table[quant_table_idx][4][127] != 0)) { + TYPE cur2 = TYPE(0); + if (off.x > 0 && off != ivec2(1, 0)) { + const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); + cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]); + } + base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; - d[1] = fold(d[1], bits); + TYPE top2 = TYPE(0); + if (off.y > 1) + top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]); + base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; + } - return d; + /* context, prediction */ + return ivec2(base, predict(cur, VTYPE2(top))); } #ifndef GOLOMB @@ -108,7 +111,8 @@ void encode_line_pcm(inout SliceContext sc, int y, int p, int comp, } void encode_line(inout SliceContext sc, uint64_t state, - int y, int p, int comp, int bits, const int run_index) + int y, int p, int comp, int bits, + uint8_t quant_table_idx, const int run_index) { ivec2 sp = sc.slice_pos; @@ -119,7 +123,14 @@ void encode_line(inout SliceContext sc, uint64_t state, } for (int x = 0; x < w; x++) { - const ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits); + ivec2 d = get_pred(src[p], sp, ivec2(x, y), comp, w, quant_table_idx); + d[1] = int(imageLoad(src[p], sp + ivec2(x, y))[comp]) - d[1]; + + if (d[0] < 0) + d = -d; + + d[1] = fold(d[1], bits); + put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]); } } @@ -127,7 +138,8 @@ void encode_line(inout SliceContext sc, uint64_t state, #else /* GOLOMB */ void encode_line(inout SliceContext sc, uint64_t state, - int y, int p, int comp, int bits, inout int run_index) + int y, int p, int comp, int bits, + uint8_t quant_table_idx, inout int run_index) { ivec2 sp = sc.slice_pos; @@ -141,7 +153,13 @@ void encode_line(inout SliceContext sc, uint64_t state, bool run_mode = false; for (int x = 0; x < w; x++) { - ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits); + ivec2 d = get_pred(src[p], sp, ivec2(x, y), comp, w, quant_table_idx); + d[1] = int(imageLoad(src[p], sp + ivec2(x, y))[comp]) - d[1]; + + if (d[0] < 0) + d = -d; + + d[1] = fold(d[1], bits); if (d[0] == 0) run_mode = true; @@ -225,6 +243,7 @@ void encode_slice(inout SliceContext sc, const uint slice_idx) } else #endif { + u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; uint64_t slice_state_off = uint64_t(slice_state) + slice_idx*plane_state_size*codec_planes; @@ -240,7 +259,7 @@ void encode_slice(inout SliceContext sc, const uint slice_idx) int comp = c - p; for (int y = 0; y < h; y++) - encode_line(sc, slice_state_off, y, p, comp, bits, run_index); + encode_line(sc, slice_state_off, y, p, comp, bits, quant_table_idx[c], run_index); /* For the second chroma plane, reuse the first plane's state */ if (c != 1) @@ -250,14 +269,14 @@ void encode_slice(inout SliceContext sc, const uint slice_idx) int run_index = 0; for (int y = 0; y < sc.slice_dim.y; y++) { encode_line(sc, slice_state_off + plane_state_size*0, - y, 0, 1, bits, run_index); + y, 0, 1, bits, quant_table_idx[0], run_index); encode_line(sc, slice_state_off + plane_state_size*1, - y, 0, 2, bits, run_index); + y, 0, 2, bits, quant_table_idx[1], run_index); encode_line(sc, slice_state_off + plane_state_size*1, - y, 0, 0, bits, run_index); + y, 0, 0, bits, quant_table_idx[2], run_index); if (transparency == 1) encode_line(sc, slice_state_off + plane_state_size*2, - y, 0, 3, bits, run_index); + y, 0, 3, bits, quant_table_idx[3], run_index); } #endif } -- 2.49.0.395.g12beb8f557c _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2025-05-14 19:03 UTC|newest] Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne 2025-05-14 19:02 ` Lynne [this message] 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 03/16] ffv1enc_vulkan: get rid of temporary data for the setup shader Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 04/16] ffv1enc_vulkan: unify EC code between setup and encode Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 05/16] ffv1enc_vulkan: minor EC optimizations Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 06/16] ffv1enc_vulkan: switch to 2-line cache, unify prediction code Lynne 2025-05-23 14:38 ` [FFmpeg-devel] [PATCH] ffv1enc_vulkan: fix array overflow Jerome Martinez 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 07/16] ffv1_common: minor RGB optimization Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 08/16] ffv1enc_vulkan: use ff_get_encode_buffer Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 09/16] vulkan_ffv1: fix PCM + cached symbol reader Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 10/16] ffv1enc_vulkan: implement the cached EC writer from the decoder Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 11/16] ffv1enc_vulkan: implement RCT search for level >= 4 Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 12/16] vulkan/ffv1: unify encode and decode get/put primitives Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 13/16] vulkan_ffv1: pipe through slice decoding status Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 14/16] vulkan: enable VK_KHR_shader_subgroup_rotate Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 15/16] hwcontext_vulkan: correct image transfer usage flags Lynne 2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 16/16] hwcontext_vulkan: only try exporting DMABUF memory on !WIN32 and only for DMABUF tiling Lynne
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20250514190253.162819-2-dev@lynne.ee \ --to=dev@lynne.ee \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git