* [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file
@ 2025-05-14 19:02 Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 02/16] vulkan/ffv1: synchronize get_pred implementations between encoder and decoder Lynne
` (14 more replies)
0 siblings, 15 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
Makes it easier to work with, despite the heavy ifdeffery.
---
libavcodec/ffv1enc_vulkan.c | 21 +--
libavcodec/vulkan/Makefile | 4 +-
libavcodec/vulkan/ffv1_enc.comp | 240 ++++++++++++++++++++++++-
libavcodec/vulkan/ffv1_enc_ac.comp | 83 ---------
libavcodec/vulkan/ffv1_enc_common.comp | 101 -----------
libavcodec/vulkan/ffv1_enc_rgb.comp | 83 ---------
libavcodec/vulkan/ffv1_enc_vlc.comp | 112 ------------
7 files changed, 244 insertions(+), 400 deletions(-)
delete mode 100644 libavcodec/vulkan/ffv1_enc_ac.comp
delete mode 100644 libavcodec/vulkan/ffv1_enc_common.comp
delete mode 100644 libavcodec/vulkan/ffv1_enc_rgb.comp
delete mode 100644 libavcodec/vulkan/ffv1_enc_vlc.comp
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 42a98a5efa..f4b54b8375 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -114,13 +114,9 @@ extern const char *ff_source_rangecoder_comp;
extern const char *ff_source_ffv1_vlc_comp;
extern const char *ff_source_ffv1_common_comp;
extern const char *ff_source_ffv1_reset_comp;
-extern const char *ff_source_ffv1_enc_common_comp;
extern const char *ff_source_ffv1_enc_rct_comp;
-extern const char *ff_source_ffv1_enc_vlc_comp;
-extern const char *ff_source_ffv1_enc_ac_comp;
extern const char *ff_source_ffv1_enc_setup_comp;
extern const char *ff_source_ffv1_enc_comp;
-extern const char *ff_source_ffv1_enc_rgb_comp;
typedef struct FFv1VkParameters {
VkDeviceAddress slice_state;
@@ -961,6 +957,9 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
av_bprintf(&shd->src, "#define GOLOMB\n" );
}
+ if (fv->is_rgb)
+ av_bprintf(&shd->src, "#define RGB\n");
+
GLSLF(0, #define TYPE int%i_t ,smp_bits);
GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
@@ -1260,7 +1259,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
- FFV1Context *f = &fv->ctx;
FFVulkanShader *shd = &fv->enc;
FFVulkanDescriptorSetBinding *desc_set;
@@ -1344,18 +1342,7 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
- /* Assemble the shader body */
- GLSLD(ff_source_ffv1_enc_common_comp);
-
- if (f->ac == AC_GOLOMB_RICE)
- GLSLD(ff_source_ffv1_enc_vlc_comp);
- else
- GLSLD(ff_source_ffv1_enc_ac_comp);
-
- if (fv->is_rgb)
- GLSLD(ff_source_ffv1_enc_rgb_comp);
- else
- GLSLD(ff_source_ffv1_enc_comp);
+ GLSLD(ff_source_ffv1_enc_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index feb5d2ea51..4bbcb38c6a 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -6,10 +6,8 @@ clean::
OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
vulkan/rangecoder.o vulkan/ffv1_vlc.o \
vulkan/ffv1_common.o vulkan/ffv1_reset.o \
- vulkan/ffv1_enc_common.o \
vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_setup.o \
- vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \
- vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o
+ vulkan/ffv1_enc.o
OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/rangecoder.o vulkan/ffv1_vlc.o \
diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp
index 4b851fd711..9854ecad51 100644
--- a/libavcodec/vulkan/ffv1_enc.comp
+++ b/libavcodec/vulkan/ffv1_enc.comp
@@ -20,12 +20,186 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits)
+{
+ const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+ const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+
+ TYPE top2 = TYPE(0);
+ if (off.y > 1)
+ top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]);
+
+ VTYPE3 top = VTYPE3(TYPE(0),
+ TYPE(0),
+ TYPE(0));
+ if (off.y > 0 && off != ivec2(0, 1))
+ top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
+ if (off.y > 0) {
+ top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]);
+ top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+ }
+
+ VTYPE3 cur = VTYPE3(TYPE(0),
+ TYPE(0),
+ imageLoad(src[p], pos)[comp]);
+ if (off.x > 0 && off != ivec2(1, 0))
+ cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
+ if (off != ivec2(0, 0))
+ cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
+
+ /* context, diff */
+ ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model),
+ cur[2] - predict(cur[1], VTYPE2(top)));
+
+ if (d[0] < 0)
+ d = -d;
+
+ d[1] = fold(d[1], bits);
+
+ return d;
+}
+
+#ifndef GOLOMB
+void put_rac(inout RangeCoder c, uint64_t state, bool bit)
+{
+ put_rac_norenorm(c, state, bit);
+ if (c.range < 0x100)
+ renorm_encoder(c);
+}
+
+/* Note - only handles signed values */
+void put_symbol(inout RangeCoder c, uint64_t state, int v)
+{
+ bool is_nil = (v == 0);
+ put_rac(c, state, is_nil);
+ if (is_nil)
+ return;
+
+ const int a = abs(v);
+ const int e = findMSB(a);
+
+ state += 1;
+ for (int i = 0; i < e; i++)
+ put_rac(c, state + min(i, 9), true);
+ put_rac(c, state + min(e, 9), false);
+
+ state += 21;
+ for (int i = e - 1; i >= 0; i--)
+ put_rac(c, state + min(i, 9), bool(bitfieldExtract(a, i, 1)));
+
+ put_rac(c, state - 11 + min(e, 10), v < 0);
+}
+
+void encode_line_pcm(inout SliceContext sc, int y, int p, int comp,
+ int bits)
+{
+ ivec2 sp = sc.slice_pos;
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ for (int x = 0; x < w; x++) {
+ uint v = imageLoad(src[p], (sp + ivec2(x, y)))[comp];
+ for (int i = (bits - 1); i >= 0; i--)
+ put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
+ }
+}
+
+void encode_line(inout SliceContext sc, uint64_t state,
+ int y, int p, int comp, int bits, const int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ for (int x = 0; x < w; x++) {
+ const ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
+ put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]);
+ }
+}
+
+#else /* GOLOMB */
+
+void encode_line(inout SliceContext sc, uint64_t state,
+ int y, int p, int comp, int bits, inout int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ int run_count = 0;
+ bool run_mode = false;
+
+ for (int x = 0; x < w; x++) {
+ ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
+
+ if (d[0] == 0)
+ run_mode = true;
+
+ if (run_mode) {
+ if (d[1] != 0) {
+ /* A very unlikely loop */
+ while (run_count >= 1 << log2_run[run_index]) {
+ run_count -= 1 << log2_run[run_index];
+ run_index++;
+ put_bits(sc.pb, 1, 1);
+ }
+
+ put_bits(sc.pb, 1 + log2_run[run_index], run_count);
+ if (run_index != 0)
+ run_index--;
+ run_count = 0;
+ run_mode = false;
+ if (d[1] > 0)
+ d[1]--;
+ } else {
+ run_count++;
+ }
+ }
+
+ if (!run_mode) {
+ VlcState sb = VlcState(state + VLC_STATE_SIZE*d[0]);
+ Symbol sym = get_vlc_symbol(sb, d[1], bits);
+ put_bits(sc.pb, sym.bits, sym.val);
+ }
+ }
+
+ if (run_mode) {
+ while (run_count >= (1 << log2_run[run_index])) {
+ run_count -= 1 << log2_run[run_index];
+ run_index++;
+ put_bits(sc.pb, 1, 1);
+ }
+
+ if (run_count > 0)
+ put_bits(sc.pb, 1, 1);
+ }
+}
+#endif
+
void encode_slice(inout SliceContext sc, const uint slice_idx)
{
+#ifndef RGB
int bits = bits_per_raw_sample;
+#else
+ int bits = 9;
+ if (bits != 8 || sc.slice_coding_mode != 0)
+ bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
+#endif
#ifndef GOLOMB
if (sc.slice_coding_mode == 1) {
+#ifndef RGB
for (int c = 0; c < components; c++) {
int h = sc.slice_dim.y;
@@ -39,12 +213,22 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
for (int y = 0; y < h; y++)
encode_line_pcm(sc, y, p, comp, bits);
}
+#else
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line_pcm(sc, y, 0, 1, bits);
+ encode_line_pcm(sc, y, 0, 2, bits);
+ encode_line_pcm(sc, y, 0, 0, bits);
+ if (transparency == 1)
+ encode_line_pcm(sc, y, 0, 3, bits);
+ }
+#endif
} else
#endif
{
uint64_t slice_state_off = uint64_t(slice_state) +
slice_idx*plane_state_size*codec_planes;
+#ifndef RGB
for (int c = 0; c < components; c++) {
int run_index = 0;
@@ -62,13 +246,67 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
if (c != 1)
slice_state_off += plane_state_size;
}
+#else
+ int run_index = 0;
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line(sc, slice_state_off + plane_state_size*0,
+ y, 0, 1, bits, run_index);
+ encode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 2, bits, run_index);
+ encode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 0, bits, run_index);
+ if (transparency == 1)
+ encode_line(sc, slice_state_off + plane_state_size*2,
+ y, 0, 3, bits, run_index);
+ }
+#endif
+ }
+}
+
+void finalize_slice(inout SliceContext sc, const uint slice_idx)
+{
+#ifdef GOLOMB
+ uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
+#else
+ uint32_t enc_len = rac_terminate(sc.c);
+#endif
+
+ u8buf bs = u8buf(sc.c.bytestream_start);
+
+ /* Append slice length */
+ u8vec4 enc_len_p = unpack8(enc_len);
+ bs[enc_len + 0].v = enc_len_p.z;
+ bs[enc_len + 1].v = enc_len_p.y;
+ bs[enc_len + 2].v = enc_len_p.x;
+ enc_len += 3;
+
+ /* Calculate and write CRC */
+ if (ec != 0) {
+ bs[enc_len].v = uint8_t(0);
+ enc_len++;
+
+ uint32_t crc = crcref;
+ for (int i = 0; i < enc_len; i++)
+ crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
+
+ if (crcref != 0x00000000)
+ crc ^= 0x8CD88196;
+
+ u8vec4 crc_p = unpack8(crc);
+ bs[enc_len + 0].v = crc_p.x;
+ bs[enc_len + 1].v = crc_p.y;
+ bs[enc_len + 2].v = crc_p.z;
+ bs[enc_len + 3].v = crc_p.w;
+ enc_len += 4;
}
- finalize_slice(sc, slice_idx);
+ slice_results[slice_idx*2 + 0] = enc_len;
+ slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data);
}
void main(void)
{
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
encode_slice(slice_ctx[slice_idx], slice_idx);
+ finalize_slice(slice_ctx[slice_idx], slice_idx);
}
diff --git a/libavcodec/vulkan/ffv1_enc_ac.comp b/libavcodec/vulkan/ffv1_enc_ac.comp
deleted file mode 100644
index 0bbf58c5dd..0000000000
--- a/libavcodec/vulkan/ffv1_enc_ac.comp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2024 Lynne <dev@lynne.ee>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-void put_rac(inout RangeCoder c, uint64_t state, bool bit)
-{
- put_rac_norenorm(c, state, bit);
- if (c.range < 0x100)
- renorm_encoder(c);
-}
-
-/* Note - only handles signed values */
-void put_symbol(inout RangeCoder c, uint64_t state, int v)
-{
- bool is_nil = (v == 0);
- put_rac(c, state, is_nil);
- if (is_nil)
- return;
-
- const int a = abs(v);
- const int e = findMSB(a);
-
- state += 1;
- for (int i = 0; i < e; i++)
- put_rac(c, state + min(i, 9), true);
- put_rac(c, state + min(e, 9), false);
-
- state += 21;
- for (int i = e - 1; i >= 0; i--)
- put_rac(c, state + min(i, 9), bool(bitfieldExtract(a, i, 1)));
-
- put_rac(c, state - 11 + min(e, 10), v < 0);
-}
-
-void encode_line_pcm(inout SliceContext sc, int y, int p, int comp,
- int bits)
-{
- ivec2 sp = sc.slice_pos;
- int w = sc.slice_dim.x;
- if (p > 0 && p < 3) {
- w >>= chroma_shift.x;
- sp >>= chroma_shift;
- }
-
- for (int x = 0; x < w; x++) {
- uint v = imageLoad(src[p], (sp + ivec2(x, y)))[comp];
- for (int i = (bits - 1); i >= 0; i--)
- put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
- }
-}
-
-void encode_line(inout SliceContext sc, uint64_t state,
- int y, int p, int comp, int bits, const int run_index)
-{
- ivec2 sp = sc.slice_pos;
-
- int w = sc.slice_dim.x;
- if (p > 0 && p < 3) {
- w >>= chroma_shift.x;
- sp >>= chroma_shift;
- }
-
- for (int x = 0; x < w; x++) {
- const ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
- put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]);
- }
-}
diff --git a/libavcodec/vulkan/ffv1_enc_common.comp b/libavcodec/vulkan/ffv1_enc_common.comp
deleted file mode 100644
index 62c0624b0e..0000000000
--- a/libavcodec/vulkan/ffv1_enc_common.comp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * FFv1 codec
- *
- * Copyright (c) 2024 Lynne <dev@lynne.ee>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits)
-{
- const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
- const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
-
- TYPE top2 = TYPE(0);
- if (off.y > 1)
- top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]);
-
- VTYPE3 top = VTYPE3(TYPE(0),
- TYPE(0),
- TYPE(0));
- if (off.y > 0 && off != ivec2(0, 1))
- top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
- if (off.y > 0) {
- top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]);
- top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
- }
-
- VTYPE3 cur = VTYPE3(TYPE(0),
- TYPE(0),
- imageLoad(src[p], pos)[comp]);
- if (off.x > 0 && off != ivec2(1, 0))
- cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
- if (off != ivec2(0, 0))
- cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
-
- /* context, diff */
- ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model),
- cur[2] - predict(cur[1], VTYPE2(top)));
-
- if (d[0] < 0)
- d = -d;
-
- d[1] = fold(d[1], bits);
-
- return d;
-}
-
-void finalize_slice(inout SliceContext sc, const uint slice_idx)
-{
-#ifdef GOLOMB
- uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
-#else
- uint32_t enc_len = rac_terminate(sc.c);
-#endif
-
- u8buf bs = u8buf(sc.c.bytestream_start);
-
- /* Append slice length */
- u8vec4 enc_len_p = unpack8(enc_len);
- bs[enc_len + 0].v = enc_len_p.z;
- bs[enc_len + 1].v = enc_len_p.y;
- bs[enc_len + 2].v = enc_len_p.x;
- enc_len += 3;
-
- /* Calculate and write CRC */
- if (ec != 0) {
- bs[enc_len].v = uint8_t(0);
- enc_len++;
-
- uint32_t crc = crcref;
- for (int i = 0; i < enc_len; i++)
- crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
-
- if (crcref != 0x00000000)
- crc ^= 0x8CD88196;
-
- u8vec4 crc_p = unpack8(crc);
- bs[enc_len + 0].v = crc_p.x;
- bs[enc_len + 1].v = crc_p.y;
- bs[enc_len + 2].v = crc_p.z;
- bs[enc_len + 3].v = crc_p.w;
- enc_len += 4;
- }
-
- slice_results[slice_idx*2 + 0] = enc_len;
- slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data);
-}
diff --git a/libavcodec/vulkan/ffv1_enc_rgb.comp b/libavcodec/vulkan/ffv1_enc_rgb.comp
deleted file mode 100644
index c176d94e8b..0000000000
--- a/libavcodec/vulkan/ffv1_enc_rgb.comp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * FFv1 codec
- *
- * Copyright (c) 2024 Lynne <dev@lynne.ee>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-void encode_slice_rgb(inout SliceContext sc, const uint slice_idx)
-{
- int bits = 9;
- if (bits != 8 || sc.slice_coding_mode != 0)
- bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
-
- int run_index = 0;
-
-#ifndef GOLOMB
- if (sc.slice_coding_mode == 1) {
- if (transparency == 1) {
- for (int y = 0; y < sc.slice_dim.y; y++) {
- encode_line_pcm(sc, y, 0, 1, bits);
- encode_line_pcm(sc, y, 0, 2, bits);
- encode_line_pcm(sc, y, 0, 0, bits);
- encode_line_pcm(sc, y, 0, 3, bits);
- }
- } else {
- for (int y = 0; y < sc.slice_dim.y; y++) {
- encode_line_pcm(sc, y, 0, 1, bits);
- encode_line_pcm(sc, y, 0, 2, bits);
- encode_line_pcm(sc, y, 0, 0, bits);
- }
- }
- } else
-#endif
- {
- uint64_t slice_state_off = uint64_t(slice_state) +
- slice_idx*plane_state_size*codec_planes;
-
- if (transparency == 1) {
- for (int y = 0; y < sc.slice_dim.y; y++) {
- encode_line(sc, slice_state_off + plane_state_size*0,
- y, 0, 1, bits, run_index);
- encode_line(sc, slice_state_off + plane_state_size*1,
- y, 0, 2, bits, run_index);
- encode_line(sc, slice_state_off + plane_state_size*1,
- y, 0, 0, bits, run_index);
- encode_line(sc, slice_state_off + plane_state_size*2,
- y, 0, 3, bits, run_index);
- }
- } else {
- for (int y = 0; y < sc.slice_dim.y; y++) {
- encode_line(sc, slice_state_off + plane_state_size*0,
- y, 0, 1, bits, run_index);
- encode_line(sc, slice_state_off + plane_state_size*1,
- y, 0, 2, bits, run_index);
- encode_line(sc, slice_state_off + plane_state_size*1,
- y, 0, 0, bits, run_index);
- }
- }
- }
-
- finalize_slice(sc, slice_idx);
-}
-
-void main(void)
-{
- const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
- encode_slice_rgb(slice_ctx[slice_idx], slice_idx);
-}
diff --git a/libavcodec/vulkan/ffv1_enc_vlc.comp b/libavcodec/vulkan/ffv1_enc_vlc.comp
deleted file mode 100644
index 7a4d39e307..0000000000
--- a/libavcodec/vulkan/ffv1_enc_vlc.comp
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * FFv1 codec
- *
- * Copyright (c) 2024 Lynne <dev@lynne.ee>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-struct RLEState {
- int count;
- int diff;
- int index;
- bool mode;
-};
-
-void calc_new_state(inout RLEState state, int context)
-{
- if (context == 0)
- state.mode = false;
-
- if (!state.mode)
- return;
-
- if (state.diff > 0) {
- while (state.count >= (1 << log2_run[state.index])) {
- state.count -= 1 << log2_run[state.index];
- state.index++;
- }
- if (state.index > 0)
- state.index--;
- state.count = 0;
- state.mode = false;
- if (state.diff > 0)
- state.diff--;
- } else {
- state.count++;
- }
-}
-
-void encode_line(inout SliceContext sc, uint64_t state,
- int y, int p, int comp, int bits, inout int run_index)
-{
- ivec2 sp = sc.slice_pos;
-
- int w = sc.slice_dim.x;
- if (p > 0 && p < 3) {
- w >>= chroma_shift.x;
- sp >>= chroma_shift;
- }
-
- int run_count = 0;
- bool run_mode = false;
-
- for (int x = 0; x < w; x++) {
- ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
-
- if (d[0] == 0)
- run_mode = true;
-
- if (run_mode) {
- if (d[1] != 0) {
- /* A very unlikely loop */
- while (run_count >= 1 << log2_run[run_index]) {
- run_count -= 1 << log2_run[run_index];
- run_index++;
- put_bits(sc.pb, 1, 1);
- }
-
- put_bits(sc.pb, 1 + log2_run[run_index], run_count);
- if (run_index != 0)
- run_index--;
- run_count = 0;
- run_mode = false;
- if (d[1] > 0)
- d[1]--;
- } else {
- run_count++;
- }
- }
-
- if (!run_mode) {
- VlcState sb = VlcState(state + VLC_STATE_SIZE*d[0]);
- Symbol sym = get_vlc_symbol(sb, d[1], bits);
- put_bits(sc.pb, sym.bits, sym.val);
- }
- }
-
- if (run_mode) {
- while (run_count >= (1 << log2_run[run_index])) {
- run_count -= 1 << log2_run[run_index];
- run_index++;
- put_bits(sc.pb, 1, 1);
- }
-
- if (run_count > 0)
- put_bits(sc.pb, 1, 1);
- }
-}
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 02/16] vulkan/ffv1: synchronize get_pred implementations between encoder and decoder
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 03/16] ffv1enc_vulkan: get rid of temporary data for the setup shader Lynne
` (13 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/vulkan/ffv1_dec.comp | 32 ++++++-------
libavcodec/vulkan/ffv1_enc.comp | 85 ++++++++++++++++++++-------------
2 files changed, 68 insertions(+), 49 deletions(-)
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
index fc0175c715..1c313b3168 100644
--- a/libavcodec/vulkan/ffv1_dec.comp
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -29,19 +29,19 @@
#endif
#ifdef RGB
-ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
+ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
{
const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0);
/* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
- VTYPE3 top = VTYPE3(TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[0]),
- TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(0, -1)))[0]),
- TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[0]));
+ VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[comp]),
+ TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),
+ TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));
/* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
* return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
* row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
- TYPE cur = TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[0]);
+ TYPE cur = TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[comp]);
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
@@ -51,12 +51,12 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
TYPE cur2 = TYPE(0);
if (expectEXT(off.x > 0, true)) {
const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);
- cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + yoff_border2))[0]);
+ cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]);
}
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
/* top-2 became current upon swap */
- TYPE top2 = TYPE(imageLoad(dec[p], sp + LADDR(off))[0]);
+ TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]);
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
}
@@ -64,7 +64,7 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
return ivec2(base, predict(cur, VTYPE2(top)));
}
#else
-ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
+ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
{
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
sp += off;
@@ -73,15 +73,15 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
TYPE(0),
TYPE(0));
if (off.y > 0 && off != ivec2(0, 1))
- top[0] = TYPE(imageLoad(dec[p], sp + ivec2(-1, -1) + yoff_border1)[0]);
+ top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
if (off.y > 0) {
- top[1] = TYPE(imageLoad(dec[p], sp + ivec2(0, -1))[0]);
- top[2] = TYPE(imageLoad(dec[p], sp + ivec2(min(1, sw - off.x - 1), -1))[0]);
+ top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
+ top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
}
TYPE cur = TYPE(0);
if (off != ivec2(0, 0))
- cur = TYPE(imageLoad(dec[p], sp + ivec2(-1, 0) + yoff_border1)[0]);
+ cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
@@ -92,13 +92,13 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
TYPE cur2 = TYPE(0);
if (off.x > 0 && off != ivec2(1, 0)) {
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
- cur2 = TYPE(imageLoad(dec[p], sp + ivec2(-2, 0) + yoff_border2)[0]);
+ cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
}
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
TYPE top2 = TYPE(0);
if (off.y > 1)
- top2 = TYPE(imageLoad(dec[p], sp + ivec2(0, -2))[0]);
+ top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
}
@@ -171,7 +171,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
#endif
for (int x = 0; x < w; x++) {
- ivec2 pr = get_pred(sp, ivec2(x, y), p, w,
+ ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
quant_table_idx);
uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]);
@@ -216,7 +216,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
for (int x = 0; x < w; x++) {
ivec2 pos = sp + ivec2(x, y);
int diff;
- ivec2 pr = get_pred(sp, ivec2(x, y), p, w,
+ ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
quant_table_idx);
VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*abs(pr[0]));
diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp
index 9854ecad51..7f8c831efa 100644
--- a/libavcodec/vulkan/ffv1_enc.comp
+++ b/libavcodec/vulkan/ffv1_enc.comp
@@ -20,43 +20,46 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits)
+ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
{
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
- const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
-
- TYPE top2 = TYPE(0);
- if (off.y > 1)
- top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]);
+ sp += off;
VTYPE3 top = VTYPE3(TYPE(0),
TYPE(0),
TYPE(0));
if (off.y > 0 && off != ivec2(0, 1))
- top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
+ top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
if (off.y > 0) {
- top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]);
- top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+ top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
+ top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
}
- VTYPE3 cur = VTYPE3(TYPE(0),
- TYPE(0),
- imageLoad(src[p], pos)[comp]);
- if (off.x > 0 && off != ivec2(1, 0))
- cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
+ TYPE cur = TYPE(0);
if (off != ivec2(0, 0))
- cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
-
- /* context, diff */
- ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model),
- cur[2] - predict(cur[1], VTYPE2(top)));
-
- if (d[0] < 0)
- d = -d;
+ cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
+
+ int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
+
+ if ((quant_table[quant_table_idx][3][127] != 0) ||
+ (quant_table[quant_table_idx][4][127] != 0)) {
+ TYPE cur2 = TYPE(0);
+ if (off.x > 0 && off != ivec2(1, 0)) {
+ const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+ cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
+ }
+ base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
- d[1] = fold(d[1], bits);
+ TYPE top2 = TYPE(0);
+ if (off.y > 1)
+ top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
+ base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
+ }
- return d;
+ /* context, prediction */
+ return ivec2(base, predict(cur, VTYPE2(top)));
}
#ifndef GOLOMB
@@ -108,7 +111,8 @@ void encode_line_pcm(inout SliceContext sc, int y, int p, int comp,
}
void encode_line(inout SliceContext sc, uint64_t state,
- int y, int p, int comp, int bits, const int run_index)
+ int y, int p, int comp, int bits,
+ uint8_t quant_table_idx, const int run_index)
{
ivec2 sp = sc.slice_pos;
@@ -119,7 +123,14 @@ void encode_line(inout SliceContext sc, uint64_t state,
}
for (int x = 0; x < w; x++) {
- const ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
+ ivec2 d = get_pred(src[p], sp, ivec2(x, y), comp, w, quant_table_idx);
+ d[1] = int(imageLoad(src[p], sp + ivec2(x, y))[comp]) - d[1];
+
+ if (d[0] < 0)
+ d = -d;
+
+ d[1] = fold(d[1], bits);
+
put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]);
}
}
@@ -127,7 +138,8 @@ void encode_line(inout SliceContext sc, uint64_t state,
#else /* GOLOMB */
void encode_line(inout SliceContext sc, uint64_t state,
- int y, int p, int comp, int bits, inout int run_index)
+ int y, int p, int comp, int bits,
+ uint8_t quant_table_idx, inout int run_index)
{
ivec2 sp = sc.slice_pos;
@@ -141,7 +153,13 @@ void encode_line(inout SliceContext sc, uint64_t state,
bool run_mode = false;
for (int x = 0; x < w; x++) {
- ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
+ ivec2 d = get_pred(src[p], sp, ivec2(x, y), comp, w, quant_table_idx);
+ d[1] = int(imageLoad(src[p], sp + ivec2(x, y))[comp]) - d[1];
+
+ if (d[0] < 0)
+ d = -d;
+
+ d[1] = fold(d[1], bits);
if (d[0] == 0)
run_mode = true;
@@ -225,6 +243,7 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
} else
#endif
{
+ u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
uint64_t slice_state_off = uint64_t(slice_state) +
slice_idx*plane_state_size*codec_planes;
@@ -240,7 +259,7 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
int comp = c - p;
for (int y = 0; y < h; y++)
- encode_line(sc, slice_state_off, y, p, comp, bits, run_index);
+ encode_line(sc, slice_state_off, y, p, comp, bits, quant_table_idx[c], run_index);
/* For the second chroma plane, reuse the first plane's state */
if (c != 1)
@@ -250,14 +269,14 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
int run_index = 0;
for (int y = 0; y < sc.slice_dim.y; y++) {
encode_line(sc, slice_state_off + plane_state_size*0,
- y, 0, 1, bits, run_index);
+ y, 0, 1, bits, quant_table_idx[0], run_index);
encode_line(sc, slice_state_off + plane_state_size*1,
- y, 0, 2, bits, run_index);
+ y, 0, 2, bits, quant_table_idx[1], run_index);
encode_line(sc, slice_state_off + plane_state_size*1,
- y, 0, 0, bits, run_index);
+ y, 0, 0, bits, quant_table_idx[2], run_index);
if (transparency == 1)
encode_line(sc, slice_state_off + plane_state_size*2,
- y, 0, 3, bits, run_index);
+ y, 0, 3, bits, quant_table_idx[3], run_index);
}
#endif
}
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 03/16] ffv1enc_vulkan: get rid of temporary data for the setup shader
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 02/16] vulkan/ffv1: synchronize get_pred implementations between encoder and decoder Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 04/16] ffv1enc_vulkan: unify EC code between setup and encode Lynne
` (12 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/ffv1enc_vulkan.c | 21 ---------
libavcodec/vulkan/ffv1_enc_setup.comp | 65 +++++++++++----------------
libavcodec/vulkan/rangecoder.comp | 28 +++++++-----
3 files changed, 42 insertions(+), 72 deletions(-)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index f4b54b8375..d78ba3aca8 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -88,9 +88,6 @@ typedef struct VulkanEncodeFFv1Context {
AVBufferPool *out_data_pool;
AVBufferPool *pkt_data_pool;
- /* Temporary data buffer */
- AVBufferPool *tmp_data_pool;
-
/* Slice results buffer */
AVBufferPool *results_data_pool;
@@ -303,11 +300,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
AVFrame *intermediate_frame = NULL;
- /* Temporary data */
- size_t tmp_data_size;
- AVBufferRef *tmp_data_ref;
- FFVkBuffer *tmp_data_buf;
-
/* Slice data */
AVBufferRef *slice_data_ref;
FFVkBuffer *slice_data_buf;
@@ -352,17 +344,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
f->slice_count = f->max_slice_count;
- /* Allocate temporary data buffer */
- tmp_data_size = f->slice_count*CONTEXT_SIZE;
- RET(ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
- &tmp_data_ref,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
- NULL, tmp_data_size,
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
- tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
- ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
-
/* Allocate slice buffer data */
if (f->ac == AC_GOLOMB_RICE)
plane_state_size = 8;
@@ -481,7 +462,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup);
pd = (FFv1VkParameters) {
.slice_state = slice_data_buf->address + f->slice_count*256,
- .scratch_data = tmp_data_buf->address,
.out_data = out_data_buf->address,
.bits_per_raw_sample = f->bits_per_raw_sample,
.sar[0] = pict->sample_aspect_ratio.num,
@@ -1698,7 +1678,6 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
av_buffer_pool_uninit(&fv->out_data_pool);
av_buffer_pool_uninit(&fv->pkt_data_pool);
- av_buffer_pool_uninit(&fv->tmp_data_pool);
av_buffer_unref(&fv->keyframe_slice_data_ref);
av_buffer_pool_uninit(&fv->slice_data_pool);
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp
index 44c13404d8..d395770ba8 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp
@@ -20,6 +20,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+uint8_t state[CONTEXT_SIZE];
+
void init_slice(out SliceContext sc, const uint slice_idx)
{
/* Set coordinates */
@@ -45,67 +47,54 @@ void init_slice(out SliceContext sc, const uint slice_idx)
slice_size_max);
}
-void put_rac_full(inout RangeCoder c, uint64_t state, bool bit)
-{
- put_rac_norenorm(c, state, bit);
- if (c.range < 0x100)
- renorm_encoder_full(c);
-}
-
-void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v)
+void put_usymbol(inout RangeCoder c, uint v)
{
bool is_nil = (v == 0);
- put_rac_full(c, state, is_nil);
+ put_rac(c, state[0], is_nil);
if (is_nil)
return;
const int e = findMSB(v);
- state += 1;
for (int i = 0; i < e; i++)
- put_rac_full(c, state + min(i, 9), true);
- put_rac_full(c, state + min(e, 9), false);
+ put_rac(c, state[1 + min(i, 9)], true);
+ put_rac(c, state[1 + min(e, 9)], false);
- state += 21;
for (int i = e - 1; i >= 0; i--)
- put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1)));
+ put_rac(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1)));
}
-void write_slice_header(inout SliceContext sc, uint64_t state)
+void write_slice_header(inout SliceContext sc)
{
- u8buf sb = u8buf(state);
-
[[unroll]]
for (int i = 0; i < CONTEXT_SIZE; i++)
- sb[i].v = uint8_t(128);
+ state[i] = uint8_t(128);
- put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x);
- put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y);
- put_symbol_unsigned(sc.c, state, 0);
- put_symbol_unsigned(sc.c, state, 0);
+ put_usymbol(sc.c, gl_WorkGroupID.x);
+ put_usymbol(sc.c, gl_WorkGroupID.y);
+ put_usymbol(sc.c, 0);
+ put_usymbol(sc.c, 0);
for (int i = 0; i < codec_planes; i++)
- put_symbol_unsigned(sc.c, state, sc.quant_table_idx[i]);
+ put_usymbol(sc.c, sc.quant_table_idx[i]);
- put_symbol_unsigned(sc.c, state, pic_mode);
- put_symbol_unsigned(sc.c, state, sar.x);
- put_symbol_unsigned(sc.c, state, sar.y);
+ put_usymbol(sc.c, pic_mode);
+ put_usymbol(sc.c, sar.x);
+ put_usymbol(sc.c, sar.y);
if (version >= 4) {
- put_rac_full(sc.c, state, sc.slice_reset_contexts);
- put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
+ put_rac(sc.c, state[0], sc.slice_reset_contexts);
+ put_usymbol(sc.c, sc.slice_coding_mode);
if (sc.slice_coding_mode != 1 && colorspace == 1) {
- put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y);
- put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.x);
+ put_usymbol(sc.c, sc.slice_rct_coef.y);
+ put_usymbol(sc.c, sc.slice_rct_coef.x);
}
}
}
-void write_frame_header(inout SliceContext sc, uint64_t state)
+void write_frame_header(inout SliceContext sc)
{
- u8buf sb = u8buf(state);
- sb.v = uint8_t(128);
- put_rac_full(sc.c, state, bool(key_frame));
+ put_rac_equi(sc.c, bool(key_frame));
}
#ifdef GOLOMB
@@ -122,16 +111,12 @@ void main(void)
{
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
- /* Write slice data */
- uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
- u8buf sb = u8buf(scratch_state);
-
init_slice(slice_ctx[slice_idx], slice_idx);
if (slice_idx == 0)
- write_frame_header(slice_ctx[slice_idx], scratch_state);
+ write_frame_header(slice_ctx[slice_idx]);
- write_slice_header(slice_ctx[slice_idx], scratch_state);
+ write_slice_header(slice_ctx[slice_idx]);
#ifdef GOLOMB
init_golomb(slice_ctx[slice_idx]);
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 256b5f0e79..1db42e1dc9 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -91,15 +91,13 @@ void renorm_encoder(inout RangeCoder c)
bs[i].v = fill;
}
-void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
+void put_rac_direct(inout RangeCoder c, uint8_t state, bool bit)
{
- u8buf sb = u8buf(state);
- uint val = uint(sb.v);
- int range1 = uint16_t((c.range * val) >> 8);
+ int range1 = uint16_t((c.range * state) >> 8);
#ifdef DEBUG
- if (val == 0)
- debugPrintfEXT("Error: state is zero (addr: 0x%lx)", uint64_t(sb));
+ if (state == 0)
+ debugPrintfEXT("Error: state is zero");
if (range1 >= c.range)
debugPrintfEXT("Error: range1 >= c.range");
if (range1 <= 0)
@@ -113,13 +111,21 @@ void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
} else {
c.range = diff;
}
+}
- sb.v = zero_one_state[(uint(bit) << 8) + val];
+void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
+{
+ put_rac_direct(c, u8buf(state).v, bit);
-#ifdef DEBUG
- if (sb.v == 0)
- debugPrintfEXT("Error: inserted zero state from tab %i idx %i", bit, val);
-#endif
+ u8buf(state).v = zero_one_state[(uint(bit) << 8) + u8buf(state).v];
+}
+
+void put_rac(inout RangeCoder c, inout uint8_t state, bool bit)
+{
+ put_rac_direct(c, state, bit);
+ if (c.range < 0x100)
+ renorm_encoder_full(c);
+ state = zero_one_state[(uint(bit) << 8) + state];
}
/* Equiprobable bit */
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 04/16] ffv1enc_vulkan: unify EC code between setup and encode
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 02/16] vulkan/ffv1: synchronize get_pred implementations between encoder and decoder Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 03/16] ffv1enc_vulkan: get rid of temporary data for the setup shader Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 05/16] ffv1enc_vulkan: minor EC optimizations Lynne
` (11 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/ffv1enc_vulkan.c | 1 +
libavcodec/vulkan/ffv1_enc.comp | 7 -------
libavcodec/vulkan/ffv1_enc_setup.comp | 10 +++++-----
libavcodec/vulkan/rangecoder.comp | 23 +++++++++++------------
4 files changed, 17 insertions(+), 24 deletions(-)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index d78ba3aca8..956463e932 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -976,6 +976,7 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+ av_bprintf(&shd->src, "#define FULL_RENORM\n");
desc_set = (FFVulkanDescriptorSetBinding []) {
{
diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp
index 7f8c831efa..a3c22f7459 100644
--- a/libavcodec/vulkan/ffv1_enc.comp
+++ b/libavcodec/vulkan/ffv1_enc.comp
@@ -63,13 +63,6 @@ ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, ui
}
#ifndef GOLOMB
-void put_rac(inout RangeCoder c, uint64_t state, bool bit)
-{
- put_rac_norenorm(c, state, bit);
- if (c.range < 0x100)
- renorm_encoder(c);
-}
-
/* Note - only handles signed values */
void put_symbol(inout RangeCoder c, uint64_t state, int v)
{
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp
index d395770ba8..6f21e47523 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp
@@ -50,18 +50,18 @@ void init_slice(out SliceContext sc, const uint slice_idx)
void put_usymbol(inout RangeCoder c, uint v)
{
bool is_nil = (v == 0);
- put_rac(c, state[0], is_nil);
+ put_rac_direct(c, state[0], is_nil);
if (is_nil)
return;
const int e = findMSB(v);
for (int i = 0; i < e; i++)
- put_rac(c, state[1 + min(i, 9)], true);
- put_rac(c, state[1 + min(e, 9)], false);
+ put_rac_direct(c, state[1 + min(i, 9)], true);
+ put_rac_direct(c, state[1 + min(e, 9)], false);
for (int i = e - 1; i >= 0; i--)
- put_rac(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1)));
+ put_rac_direct(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1)));
}
void write_slice_header(inout SliceContext sc)
@@ -83,7 +83,7 @@ void write_slice_header(inout SliceContext sc)
put_usymbol(sc.c, sar.y);
if (version >= 4) {
- put_rac(sc.c, state[0], sc.slice_reset_contexts);
+ put_rac_direct(sc.c, state[0], sc.slice_reset_contexts);
put_usymbol(sc.c, sc.slice_coding_mode);
if (sc.slice_coding_mode != 1 && colorspace == 1) {
put_usymbol(sc.c, sc.slice_rct_coef.y);
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 1db42e1dc9..badc65293f 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -31,8 +31,9 @@ struct RangeCoder {
uint8_t outstanding_byte;
};
+#ifdef FULL_RENORM
/* Full renorm version that can handle outstanding_byte == 0xFF */
-void renorm_encoder_full(inout RangeCoder c)
+void renorm_encoder(inout RangeCoder c)
{
int bs_cnt = 0;
u8buf bytestream = u8buf(c.bytestream);
@@ -62,6 +63,8 @@ void renorm_encoder_full(inout RangeCoder c)
c.low = bitfieldInsert(0, c.low, 8, 8);
}
+#else
+
/* Cannot deal with outstanding_byte == -1 in the name of speed */
void renorm_encoder(inout RangeCoder c)
{
@@ -90,8 +93,9 @@ void renorm_encoder(inout RangeCoder c)
for (int i = 1; i < oc; i++)
bs[i].v = fill;
}
+#endif
-void put_rac_direct(inout RangeCoder c, uint8_t state, bool bit)
+void put_rac_direct(inout RangeCoder c, inout uint8_t state, bool bit)
{
int range1 = uint16_t((c.range * state) >> 8);
@@ -111,21 +115,16 @@ void put_rac_direct(inout RangeCoder c, uint8_t state, bool bit)
} else {
c.range = diff;
}
-}
-void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
-{
- put_rac_direct(c, u8buf(state).v, bit);
+ if (c.range < 0x100)
+ renorm_encoder(c);
- u8buf(state).v = zero_one_state[(uint(bit) << 8) + u8buf(state).v];
+ state = zero_one_state[(uint(bit) << 8) + state];
}
-void put_rac(inout RangeCoder c, inout uint8_t state, bool bit)
+void put_rac(inout RangeCoder c, uint64_t state, bool bit)
{
- put_rac_direct(c, state, bit);
- if (c.range < 0x100)
- renorm_encoder_full(c);
- state = zero_one_state[(uint(bit) << 8) + state];
+ put_rac_direct(c, u8buf(state).v, bit);
}
/* Equiprobable bit */
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 05/16] ffv1enc_vulkan: minor EC optimizations
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (2 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 04/16] ffv1enc_vulkan: unify EC code between setup and encode Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 06/16] ffv1enc_vulkan: switch to 2-line cache, unify prediction code Lynne
` (10 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/vulkan/rangecoder.comp | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index badc65293f..9e2c5fbecf 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -109,14 +109,10 @@ void put_rac_direct(inout RangeCoder c, inout uint8_t state, bool bit)
#endif
int diff = c.range - range1;
- if (bit) {
- c.low += diff;
- c.range = range1;
- } else {
- c.range = diff;
- }
+ c.low += bit ? diff : 0;
+ c.range = bit ? range1 : diff;
- if (c.range < 0x100)
+ if (expectEXT(c.range < 0x100, false))
renorm_encoder(c);
state = zero_one_state[(uint(bit) << 8) + state];
@@ -139,12 +135,9 @@ void put_rac_equi(inout RangeCoder c, bool bit)
debugPrintfEXT("Error: range1 <= 0");
#endif
- if (bit) {
- c.low += c.range - range1;
- c.range = range1;
- } else {
- c.range -= range1;
- }
+ int diff = c.range - range1;
+ c.low += bit ? diff : 0;
+ c.range = bit ? range1 : diff;
if (expectEXT(c.range < 0x100, false))
renorm_encoder(c);
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 06/16] ffv1enc_vulkan: switch to 2-line cache, unify prediction code
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (3 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 05/16] ffv1enc_vulkan: minor EC optimizations Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-23 14:38 ` [FFmpeg-devel] [PATCH] ffv1enc_vulkan: fix array overflow Jerome Martinez
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 07/16] ffv1_common: minor RGB optimization Lynne
` (9 subsequent siblings)
14 siblings, 1 reply; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/ffv1enc_vulkan.c | 379 +++++++++--------------------
libavcodec/vulkan/ffv1_common.comp | 87 +++++++
libavcodec/vulkan/ffv1_dec.comp | 91 +------
libavcodec/vulkan/ffv1_enc.comp | 155 ++++++------
libavcodec/vulkan_ffv1.c | 5 +-
5 files changed, 288 insertions(+), 429 deletions(-)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 956463e932..bab9bb640b 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -37,6 +37,9 @@
#define LG_ALIGN_W 32
#define LG_ALIGN_H 32
+/* Unlike the decoder, we need 4 lines (but really only 3) */
+#define RGB_LINECACHE 4
+
typedef struct VulkanEncodeFFv1FrameData {
/* Output data */
AVBufferRef *out_data_ref;
@@ -72,7 +75,6 @@ typedef struct VulkanEncodeFFv1Context {
FFVulkanShader setup;
FFVulkanShader reset;
- FFVulkanShader rct;
FFVulkanShader enc;
/* Constant read-only buffers */
@@ -111,7 +113,6 @@ extern const char *ff_source_rangecoder_comp;
extern const char *ff_source_ffv1_vlc_comp;
extern const char *ff_source_ffv1_common_comp;
extern const char *ff_source_ffv1_reset_comp;
-extern const char *ff_source_ffv1_enc_rct_comp;
extern const char *ff_source_ffv1_enc_setup_comp;
extern const char *ff_source_ffv1_enc_comp;
@@ -120,6 +121,7 @@ typedef struct FFv1VkParameters {
VkDeviceAddress scratch_data;
VkDeviceAddress out_data;
+ int32_t fmt_lut[4];
int32_t sar[2];
uint32_t chroma_shift[2];
@@ -127,7 +129,9 @@ typedef struct FFv1VkParameters {
uint32_t context_count;
uint32_t crcref;
uint32_t slice_size_max;
+ int rct_offset;
+ uint8_t extend_lookup[8];
uint8_t bits_per_raw_sample;
uint8_t context_model;
uint8_t version;
@@ -137,13 +141,14 @@ typedef struct FFv1VkParameters {
uint8_t components;
uint8_t planes;
uint8_t codec_planes;
+ uint8_t planar_rgb;
uint8_t transparency;
uint8_t colorspace;
uint8_t pic_mode;
uint8_t ec;
uint8_t ppi;
uint8_t chunks;
- uint8_t padding[1];
+ uint8_t padding[4];
} FFv1VkParameters;
static void add_push_data(FFVulkanShader *shd)
@@ -153,6 +158,7 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, u8buf scratch_data; );
GLSLC(1, u8buf out_data; );
GLSLC(0, );
+ GLSLC(1, ivec4 fmt_lut; );
GLSLC(1, ivec2 sar; );
GLSLC(1, uvec2 chroma_shift; );
GLSLC(0, );
@@ -160,7 +166,9 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, uint context_count; );
GLSLC(1, uint32_t crcref; );
GLSLC(1, uint32_t slice_size_max; );
+ GLSLC(1, int rct_offset; );
GLSLC(0, );
+ GLSLC(1, uint8_t extend_lookup[8]; );
GLSLC(1, uint8_t bits_per_raw_sample; );
GLSLC(1, uint8_t context_model; );
GLSLC(1, uint8_t version; );
@@ -170,122 +178,19 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, uint8_t components; );
GLSLC(1, uint8_t planes; );
GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t planar_rgb; );
GLSLC(1, uint8_t transparency; );
GLSLC(1, uint8_t colorspace; );
GLSLC(1, uint8_t pic_mode; );
GLSLC(1, uint8_t ec; );
GLSLC(1, uint8_t ppi; );
GLSLC(1, uint8_t chunks; );
- GLSLC(1, uint8_t padding[1]; );
+ GLSLC(1, uint8_t padding[4]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
}
-static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec,
- AVFrame *enc_in, VkImageView *enc_in_views,
- AVFrame **intermediate_frame, VkImageView *intermediate_views,
- VkImageMemoryBarrier2 *img_bar, int *nb_img_bar,
- VkBufferMemoryBarrier2 *buf_bar, int *nb_buf_bar,
- FFVkBuffer *slice_data_buf, uint32_t slice_data_size)
-{
- int err;
- VulkanEncodeFFv1Context *fv = avctx->priv_data;
- FFV1Context *f = &fv->ctx;
- FFVulkanFunctions *vk = &fv->s.vkfn;
- AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data;
- FFv1VkRCTParameters pd;
-
- /* Create a temporaty frame */
- *intermediate_frame = av_frame_alloc();
- if (!(*intermediate_frame))
- return AVERROR(ENOMEM);
-
- RET(av_hwframe_get_buffer(fv->intermediate_frames_ref,
- *intermediate_frame, 0));
-
- RET(ff_vk_exec_add_dep_frame(&fv->s, exec, *intermediate_frame,
- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
- RET(ff_vk_create_imageviews(&fv->s, exec, intermediate_views,
- *intermediate_frame,
- fv->rep_fmt));
-
- /* Update descriptors */
- ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct,
- 1, 0, 0,
- slice_data_buf,
- 0, slice_data_size*f->slice_count,
- VK_FORMAT_UNDEFINED);
- ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
- enc_in, enc_in_views,
- 1, 1,
- VK_IMAGE_LAYOUT_GENERAL,
- VK_NULL_HANDLE);
- ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
- *intermediate_frame, intermediate_views,
- 1, 2,
- VK_IMAGE_LAYOUT_GENERAL,
- VK_NULL_HANDLE);
-
- ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- VK_ACCESS_SHADER_WRITE_BIT,
- VK_IMAGE_LAYOUT_GENERAL,
- VK_QUEUE_FAMILY_IGNORED);
-
- /* Prep the input/output images */
- vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
- .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
- .pImageMemoryBarriers = img_bar,
- .imageMemoryBarrierCount = *nb_img_bar,
- .pBufferMemoryBarriers = buf_bar,
- .bufferMemoryBarrierCount = *nb_buf_bar,
- });
- *nb_img_bar = 0;
- if (*nb_buf_bar) {
- slice_data_buf->stage = buf_bar[0].dstStageMask;
- slice_data_buf->access = buf_bar[0].dstAccessMask;
- *nb_buf_bar = 0;
- }
-
- /* Run the shader */
- ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct);
- pd = (FFv1VkRCTParameters) {
- .offset = 1 << f->bits_per_raw_sample,
- .bits = f->bits_per_raw_sample,
- .planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) &&
- (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1),
- .transparency = f->transparency,
- };
-
- /* For some reason the C FFv1 encoder/decoder treats these differently */
- if (src_hwfc->sw_format == AV_PIX_FMT_GBRP10 ||
- src_hwfc->sw_format == AV_PIX_FMT_GBRP12 ||
- src_hwfc->sw_format == AV_PIX_FMT_GBRP14)
- memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int));
- else
- ff_vk_set_perm(src_hwfc->sw_format, pd.fmt_lut, 1);
-
- ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct,
- VK_SHADER_STAGE_COMPUTE_BIT,
- 0, sizeof(pd), &pd);
-
- vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
-
- /* Add a post-dispatch barrier before encoding */
- ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
- VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
- VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
- VK_ACCESS_SHADER_READ_BIT,
- VK_IMAGE_LAYOUT_GENERAL,
- VK_QUEUE_FAMILY_IGNORED);
-
-fail:
- return err;
-}
-
static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
FFVkExecContext *exec,
const AVFrame *pict)
@@ -298,8 +203,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
VulkanEncodeFFv1FrameData *fd = exec->opaque;
FFv1VkParameters pd;
- AVFrame *intermediate_frame = NULL;
-
/* Slice data */
AVBufferRef *slice_data_ref;
FFVkBuffer *slice_data_buf;
@@ -318,11 +221,11 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
uint32_t context_count = f->context_count[f->context_model];
const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
- VkImageView in_views[AV_NUM_DATA_POINTERS];
- VkImageView intermediate_views[AV_NUM_DATA_POINTERS];
+ AVFrame *src = (AVFrame *)pict;
+ VkImageView src_views[AV_NUM_DATA_POINTERS];
- AVFrame *enc_in = (AVFrame *)pict;
- VkImageView *enc_in_views = in_views;
+ AVFrame *tmp = NULL;
+ VkImageView tmp_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
@@ -402,27 +305,44 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
/* Prepare input frame */
- RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
+ RET(ff_vk_exec_add_dep_frame(&fv->s, exec, src,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
- RET(ff_vk_create_imageviews(&fv->s, exec, enc_in_views, enc_in,
+ RET(ff_vk_create_imageviews(&fv->s, exec, src_views, src,
fv->rep_fmt));
- ff_vk_frame_barrier(&fv->s, exec, enc_in, img_bar, &nb_img_bar,
+ ff_vk_frame_barrier(&fv->s, exec, src, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
- /* Setup shader needs the original input */
+ if (fv->is_rgb) {
+ /* Create a temporaty frame */
+ tmp = av_frame_alloc();
+ if (!(tmp))
+ return AVERROR(ENOMEM);
+
+ RET(av_hwframe_get_buffer(fv->intermediate_frames_ref,
+ tmp, 0));
+
+ RET(ff_vk_exec_add_dep_frame(&fv->s, exec, tmp,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_create_imageviews(&fv->s, exec, tmp_views,
+ tmp,
+ fv->rep_fmt));
+ }
+
+ /* Setup shader */
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->setup,
1, 0, 0,
slice_data_buf,
0, slice_data_size*f->slice_count,
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup,
- enc_in, enc_in_views,
+ src, src_views,
1, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
@@ -471,6 +391,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.plane_state_size = plane_state_size,
.context_count = context_count,
.crcref = f->crcref,
+ .rct_offset = 1 << f->bits_per_raw_sample,
.slice_size_max = out_data_buf->size / f->slice_count,
.context_model = fv->ctx.context_model,
.version = f->version,
@@ -480,6 +401,8 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.components = fmt_desc->nb_components,
.planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt),
.codec_planes = f->plane_count,
+ .planar_rgb = ff_vk_mt_is_np_rgb(avctx->sw_pix_fmt) &&
+ (ff_vk_count_images((AVVkFrame *)src->data[0]) > 1),
.transparency = f->transparency,
.colorspace = f->colorspace,
.pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 :
@@ -488,11 +411,35 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.ppi = fv->ppi,
.chunks = fv->chunks,
};
+
+ /* For some reason the C FFv1 encoder/decoder treats these differently */
+ if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 ||
+ avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 ||
+ avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14)
+ memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int));
+ else
+ ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1);
+
+ for (int i = 0; i < f->quant_table_count; i++)
+ pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) ||
+ (f->quant_tables[i][4][127] != 0);
ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+ /* Clean up temporary image */
+ if (fv->is_rgb) {
+ AVVkFrame *vkf = (AVVkFrame *)tmp->data[0];
+ vk->CmdClearColorImage(exec->buf, vkf->img[0], VK_IMAGE_LAYOUT_GENERAL,
+ &((VkClearColorValue) { 0 }),
+ 1, &((VkImageSubresourceRange) {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .levelCount = 1,
+ .layerCount = 1,
+ }));
+ }
+
/* Setup shader modified the slice data buffer */
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
@@ -546,19 +493,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
f->plane_count);
}
- /* Run RCT shader */
- if (fv->is_rgb) {
- RET(run_rct(avctx, exec,
- enc_in, enc_in_views,
- &intermediate_frame, intermediate_views,
- img_bar, &nb_img_bar, buf_bar, &nb_buf_bar,
- slice_data_buf, slice_data_size));
-
- /* Use the new frame */
- enc_in = intermediate_frame;
- enc_in_views = intermediate_views;
- }
-
/* If the reset shader ran, insert a barrier now. */
if (f->key_frame || f->version > 3) {
/* Reset shader modified the slice data buffer */
@@ -577,6 +511,15 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
};
}
+ if (fv->is_rgb) {
+ ff_vk_frame_barrier(&fv->s, exec, tmp, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ }
+
/* Final barrier before encoding */
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
@@ -599,7 +542,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
0, slice_data_size*f->slice_count,
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
- enc_in, enc_in_views,
+ src, src_views,
1, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
@@ -608,6 +551,12 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
results_data_buf,
0, results_data_buf->size,
VK_FORMAT_UNDEFINED);
+ if (fv->is_rgb)
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
+ tmp, tmp_views,
+ 1, 3,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc);
ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc,
@@ -624,11 +573,11 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
/* This, if needed, was referenced by the execution context
* as it was declared as a dependency. */
- av_frame_free(&intermediate_frame);
+ av_frame_free(&tmp);
return 0;
fail:
- av_frame_free(&intermediate_frame);
+ av_frame_free(&tmp);
ff_vk_exec_discard_deps(&fv->s, exec);
return err;
@@ -846,6 +795,7 @@ static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
AVHWFramesContext *frames_ctx;
AVVulkanFramesContext *vk_frames;
@@ -856,12 +806,13 @@ static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format)
frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
frames_ctx->format = AV_PIX_FMT_VULKAN;
frames_ctx->sw_format = sw_format;
- frames_ctx->width = FFALIGN(fv->s.frames->width, 32);
- frames_ctx->height = FFALIGN(fv->s.frames->height, 32);
+ frames_ctx->width = fv->s.frames->width;
+ frames_ctx->height = f->num_v_slices*RGB_LINECACHE;
vk_frames = frames_ctx->hwctx;
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
- vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT;
+ vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT |
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT;
vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
err = av_hwframe_ctx_init(fv->intermediate_frames_ref);
@@ -929,6 +880,7 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
FFV1Context *f = &fv->ctx;
int smp_bits = fv->ctx.use32bit ? 32 : 16;
+ av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE);
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
@@ -1120,122 +1072,6 @@ fail:
return err;
}
-static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
-{
- int err;
- VulkanEncodeFFv1Context *fv = avctx->priv_data;
- FFVulkanShader *shd = &fv->rct;
- FFVulkanDescriptorSetBinding *desc_set;
-
- uint8_t *spv_data;
- size_t spv_len;
- void *spv_opaque = NULL;
- int wg_count = sqrt(fv->s.props.properties.limits.maxComputeWorkGroupInvocations);
-
- enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx);
- if (intermediate_fmt == AV_PIX_FMT_NONE) {
- av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible "
- "pixel format for RCT buffer!\n");
- return AVERROR(ENOTSUP);
- }
-
- RET(init_indirect(avctx, intermediate_fmt));
-
- RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct",
- VK_SHADER_STAGE_COMPUTE_BIT,
- (const char *[]) { "GL_EXT_buffer_reference",
- "GL_EXT_buffer_reference2" }, 2,
- wg_count, wg_count, 1,
- 0));
-
- /* Common codec header */
- GLSLD(ff_source_common_comp);
-
- GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
- GLSLC(1, ivec4 fmt_lut; );
- GLSLC(1, int offset; );
- GLSLC(1, uint8_t bits; );
- GLSLC(1, uint8_t planar_rgb; );
- GLSLC(1, uint8_t color_planes; );
- GLSLC(1, uint8_t transparency; );
- GLSLC(1, uint8_t version; );
- GLSLC(1, uint8_t micro_version; );
- GLSLC(1, uint8_t padding[2]; );
- GLSLC(0, }; );
- ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
- VK_SHADER_STAGE_COMPUTE_BIT);
-
- av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
- av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
- av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
-
- desc_set = (FFVulkanDescriptorSetBinding []) {
- {
- .name = "rangecoder_static_buf",
- .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .mem_layout = "scalar",
- .buf_content = "uint8_t zero_one_state[512];",
- },
- {
- .name = "quant_buf",
- .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .mem_layout = "scalar",
- .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
- "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
- },
- };
- RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
-
- define_shared_code(avctx, shd);
-
- desc_set = (FFVulkanDescriptorSetBinding []) {
- {
- .name = "slice_data_buf",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .mem_quali = "readonly",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- .buf_content = "SliceContext slice_ctx[1024];",
- },
- {
- .name = "src",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .dimensions = 2,
- .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
- fv->rep_fmt),
- .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
- .mem_quali = "readonly",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- },
- {
- .name = "dst",
- .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .dimensions = 2,
- .mem_layout = ff_vk_shader_rep_fmt(intermediate_fmt,
- fv->rep_fmt),
- .elems = av_pix_fmt_count_planes(intermediate_fmt),
- .mem_quali = "writeonly",
- .stages = VK_SHADER_STAGE_COMPUTE_BIT,
- },
- };
- RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
-
- GLSLD(ff_source_ffv1_enc_rct_comp);
-
- RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
- &spv_opaque));
- RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
-
- RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
-
-fail:
- if (spv_opaque)
- spv->free_shader(spv, &spv_opaque);
-
- return err;
-}
-
static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
{
int err;
@@ -1243,10 +1079,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
FFVulkanShader *shd = &fv->enc;
FFVulkanDescriptorSetBinding *desc_set;
- AVHWFramesContext *frames_ctx = fv->intermediate_frames_ref ?
- (AVHWFramesContext *)fv->intermediate_frames_ref->data :
- fv->s.frames;
-
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
@@ -1307,9 +1139,9 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
.name = "src",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
- .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
+ .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
fv->rep_fmt),
- .elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
+ .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
.mem_quali = "readonly",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
@@ -1321,7 +1153,19 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
.buf_content = "uint64_t slice_results[2048];",
},
};
- RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
+ if (fv->is_rgb) {
+ AVHWFramesContext *intermediate_frames_ctx;
+ intermediate_frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
+ desc_set[3] = (FFVulkanDescriptorSetBinding) {
+ .name = "tmp",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(intermediate_frames_ctx->sw_format,
+ FF_VK_REP_NATIVE),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ };
+ }
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3 + fv->is_rgb, 0, 0));
GLSLD(ff_source_ffv1_enc_comp);
@@ -1566,13 +1410,15 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
return err;
}
- /* Init RCT shader */
if (fv->is_rgb) {
- err = init_rct_shader(avctx, spv);
- if (err < 0) {
- spv->uninit(&spv);
- return err;
+ enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx);
+ if (intermediate_fmt == AV_PIX_FMT_NONE) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible "
+ "pixel format for RCT buffer!\n");
+ return AVERROR(ENOTSUP);
}
+
+ RET(init_indirect(avctx, intermediate_fmt));
}
/* Encode shader */
@@ -1659,7 +1505,6 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool);
ff_vk_shader_free(&fv->s, &fv->enc);
- ff_vk_shader_free(&fv->s, &fv->rct);
ff_vk_shader_free(&fv->s, &fv->reset);
ff_vk_shader_free(&fv->s, &fv->setup);
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
index 64c1c2ce80..1f222bdc42 100644
--- a/libavcodec/vulkan/ffv1_common.comp
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -92,3 +92,90 @@ uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
return sx;
}
+
+#ifdef RGB
+#define RGB_LBUF (RGB_LINECACHE - 1)
+#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF)))
+
+ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
+ int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
+{
+ const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0);
+
+ /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
+ VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[comp]),
+ TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),
+ TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));
+
+ /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
+ * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
+ * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
+ TYPE cur = TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[comp]);
+
+ int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
+
+ if (expectEXT(extend_lookup, false)) {
+ TYPE cur2 = TYPE(0);
+ if (expectEXT(off.x > 0, true)) {
+ const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);
+ cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]);
+ }
+ base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
+
+ /* top-2 became current upon swap */
+ TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]);
+ base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
+ }
+
+ /* context, prediction */
+ return ivec2(base, predict(cur, VTYPE2(top)));
+}
+
+#else /* RGB */
+
+#define LADDR(p) (p)
+
+ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
+ int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
+{
+ const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+ sp += off;
+
+ VTYPE3 top = VTYPE3(TYPE(0),
+ TYPE(0),
+ TYPE(0));
+ if (off.y > 0 && off != ivec2(0, 1))
+ top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
+ if (off.y > 0) {
+ top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
+ top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+ }
+
+ TYPE cur = TYPE(0);
+ if (off != ivec2(0, 0))
+ cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
+
+ int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
+
+ if (expectEXT(extend_lookup, false)) {
+ TYPE cur2 = TYPE(0);
+ if (off.x > 0 && off != ivec2(1, 0)) {
+ const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+ cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
+ }
+ base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
+
+ TYPE top2 = TYPE(0);
+ if (off.y > 1)
+ top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
+ base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
+ }
+
+ /* context, prediction */
+ return ivec2(base, predict(cur, VTYPE2(top)));
+}
+#endif
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
index 1c313b3168..c74af4bf6a 100644
--- a/libavcodec/vulkan/ffv1_dec.comp
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -20,93 +20,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifndef RGB
-#define LADDR(p) (p)
-#else
-#define RGB_LINECACHE 2
-#define RGB_LBUF (RGB_LINECACHE - 1)
-#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF)))
-#endif
-
-#ifdef RGB
-ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
-{
- const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0);
-
- /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
- VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[comp]),
- TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),
- TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));
-
- /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
- * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
- * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
- TYPE cur = TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[comp]);
-
- int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
- quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
- quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
-
- if (expectEXT(extend_lookup[quant_table_idx] > 0, false)) {
- TYPE cur2 = TYPE(0);
- if (expectEXT(off.x > 0, true)) {
- const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);
- cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]);
- }
- base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
-
- /* top-2 became current upon swap */
- TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]);
- base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
- }
-
- /* context, prediction */
- return ivec2(base, predict(cur, VTYPE2(top)));
-}
-#else
-ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
-{
- const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
- sp += off;
-
- VTYPE3 top = VTYPE3(TYPE(0),
- TYPE(0),
- TYPE(0));
- if (off.y > 0 && off != ivec2(0, 1))
- top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
- if (off.y > 0) {
- top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
- top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
- }
-
- TYPE cur = TYPE(0);
- if (off != ivec2(0, 0))
- cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
-
- int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
- quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
- quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
-
- if ((quant_table[quant_table_idx][3][127] != 0) ||
- (quant_table[quant_table_idx][4][127] != 0)) {
- TYPE cur2 = TYPE(0);
- if (off.x > 0 && off != ivec2(1, 0)) {
- const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
- cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
- }
- base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
-
- TYPE top2 = TYPE(0);
- if (off.y > 1)
- top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
- base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
- }
-
- /* context, prediction */
- return ivec2(base, predict(cur, VTYPE2(top)));
-}
-#endif
-
#ifndef GOLOMB
#ifdef CACHED_SYMBOL_READER
shared uint8_t state[CONTEXT_SIZE];
@@ -172,7 +85,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
for (int x = 0; x < w; x++) {
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
- quant_table_idx);
+ quant_table_idx, extend_lookup[quant_table_idx] > 0);
uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]);
#ifdef CACHED_SYMBOL_READER
@@ -217,7 +130,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
ivec2 pos = sp + ivec2(x, y);
int diff;
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
- quant_table_idx);
+ quant_table_idx, extend_lookup[quant_table_idx] > 0);
VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*abs(pr[0]));
diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp
index a3c22f7459..db33c414e1 100644
--- a/libavcodec/vulkan/ffv1_enc.comp
+++ b/libavcodec/vulkan/ffv1_enc.comp
@@ -20,48 +20,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, int comp, int sw, uint8_t quant_table_idx)
-{
- const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
- sp += off;
-
- VTYPE3 top = VTYPE3(TYPE(0),
- TYPE(0),
- TYPE(0));
- if (off.y > 0 && off != ivec2(0, 1))
- top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
- if (off.y > 0) {
- top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
- top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
- }
-
- TYPE cur = TYPE(0);
- if (off != ivec2(0, 0))
- cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
-
- int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
- quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
- quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
-
- if ((quant_table[quant_table_idx][3][127] != 0) ||
- (quant_table[quant_table_idx][4][127] != 0)) {
- TYPE cur2 = TYPE(0);
- if (off.x > 0 && off != ivec2(1, 0)) {
- const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
- cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
- }
- base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
-
- TYPE top2 = TYPE(0);
- if (off.y > 1)
- top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
- base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
- }
-
- /* context, prediction */
- return ivec2(base, predict(cur, VTYPE2(top)));
-}
-
#ifndef GOLOMB
/* Note - only handles signed values */
void put_symbol(inout RangeCoder c, uint64_t state, int v)
@@ -86,38 +44,42 @@ void put_symbol(inout RangeCoder c, uint64_t state, int v)
put_rac(c, state - 11 + min(e, 10), v < 0);
}
-void encode_line_pcm(inout SliceContext sc, int y, int p, int comp,
- int bits)
+void encode_line_pcm(inout SliceContext sc, readonly uimage2D img,
+ ivec2 sp, int y, int p, int comp, int bits)
{
- ivec2 sp = sc.slice_pos;
int w = sc.slice_dim.x;
+
+#ifndef RGB
if (p > 0 && p < 3) {
w >>= chroma_shift.x;
sp >>= chroma_shift;
}
+#endif
for (int x = 0; x < w; x++) {
- uint v = imageLoad(src[p], (sp + ivec2(x, y)))[comp];
+ uint v = imageLoad(img, sp + LADDR(ivec2(x, y)))[comp];
for (int i = (bits - 1); i >= 0; i--)
put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
}
}
-void encode_line(inout SliceContext sc, uint64_t state,
- int y, int p, int comp, int bits,
+void encode_line(inout SliceContext sc, readonly uimage2D img, uint64_t state,
+ ivec2 sp, int y, int p, int comp, int bits,
uint8_t quant_table_idx, const int run_index)
{
- ivec2 sp = sc.slice_pos;
-
int w = sc.slice_dim.x;
+
+#ifndef RGB
if (p > 0 && p < 3) {
w >>= chroma_shift.x;
sp >>= chroma_shift;
}
+#endif
for (int x = 0; x < w; x++) {
- ivec2 d = get_pred(src[p], sp, ivec2(x, y), comp, w, quant_table_idx);
- d[1] = int(imageLoad(src[p], sp + ivec2(x, y))[comp]) - d[1];
+ ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w,
+ quant_table_idx, extend_lookup[quant_table_idx] > 0);
+ d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1];
if (d[0] < 0)
d = -d;
@@ -130,24 +92,26 @@ void encode_line(inout SliceContext sc, uint64_t state,
#else /* GOLOMB */
-void encode_line(inout SliceContext sc, uint64_t state,
- int y, int p, int comp, int bits,
+void encode_line(inout SliceContext sc, readonly uimage2D img, uint64_t state,
+ ivec2 sp, int y, int p, int comp, int bits,
uint8_t quant_table_idx, inout int run_index)
{
- ivec2 sp = sc.slice_pos;
-
int w = sc.slice_dim.x;
+
+#ifndef RGB
if (p > 0 && p < 3) {
w >>= chroma_shift.x;
sp >>= chroma_shift;
}
+#endif
int run_count = 0;
bool run_mode = false;
for (int x = 0; x < w; x++) {
- ivec2 d = get_pred(src[p], sp, ivec2(x, y), comp, w, quant_table_idx);
- d[1] = int(imageLoad(src[p], sp + ivec2(x, y))[comp]) - d[1];
+ ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w,
+ quant_table_idx, extend_lookup[quant_table_idx] > 0);
+ d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1];
if (d[0] < 0)
d = -d;
@@ -198,14 +162,56 @@ void encode_line(inout SliceContext sc, uint64_t state,
}
#endif
+#ifdef RGB
+ivec4 load_components(ivec2 pos)
+{
+ ivec4 pix = ivec4(imageLoad(src[0], pos));
+ if (planar_rgb != 0) {
+ for (int i = 1; i < (3 + transparency); i++)
+ pix[i] = int(imageLoad(src[i], pos)[0]);
+ }
+
+ return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+ pix[fmt_lut[2]], pix[fmt_lut[3]]);
+}
+
+void transform_sample(inout ivec4 pix, ivec2 rct_coef)
+{
+ pix.b -= pix.g;
+ pix.r -= pix.g;
+ pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+ pix.b += rct_offset;
+ pix.r += rct_offset;
+}
+
+void preload_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
+{
+ for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
+ ivec2 lpos = sp + LADDR(ivec2(x, y));
+ ivec2 pos = sc.slice_pos + ivec2(x, y);
+
+ ivec4 pix = load_components(pos);
+
+ if (expectEXT(apply_rct, true))
+ transform_sample(pix, sc.slice_rct_coef);
+
+ imageStore(tmp, lpos, pix);
+ }
+}
+#endif
+
void encode_slice(inout SliceContext sc, const uint slice_idx)
{
+ ivec2 sp = sc.slice_pos;
+
#ifndef RGB
int bits = bits_per_raw_sample;
#else
int bits = 9;
if (bits != 8 || sc.slice_coding_mode != 0)
bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
+
+ sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE;
#endif
#ifndef GOLOMB
@@ -222,15 +228,17 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
int comp = c - p;
for (int y = 0; y < h; y++)
- encode_line_pcm(sc, y, p, comp, bits);
+ encode_line_pcm(sc, src[p], sp, y, p, comp, bits);
}
#else
for (int y = 0; y < sc.slice_dim.y; y++) {
- encode_line_pcm(sc, y, 0, 1, bits);
- encode_line_pcm(sc, y, 0, 2, bits);
- encode_line_pcm(sc, y, 0, 0, bits);
+ preload_rgb(sc, sp, sc.slice_dim.x, y, false);
+
+ encode_line_pcm(sc, tmp, sp, y, 0, 1, bits);
+ encode_line_pcm(sc, tmp, sp, y, 0, 2, bits);
+ encode_line_pcm(sc, tmp, sp, y, 0, 0, bits);
if (transparency == 1)
- encode_line_pcm(sc, y, 0, 3, bits);
+ encode_line_pcm(sc, tmp, sp, y, 0, 3, bits);
}
#endif
} else
@@ -252,7 +260,8 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
int comp = c - p;
for (int y = 0; y < h; y++)
- encode_line(sc, slice_state_off, y, p, comp, bits, quant_table_idx[c], run_index);
+ encode_line(sc, src[p], slice_state_off, sp, y, p,
+ comp, bits, quant_table_idx[c], run_index);
/* For the second chroma plane, reuse the first plane's state */
if (c != 1)
@@ -261,15 +270,17 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
#else
int run_index = 0;
for (int y = 0; y < sc.slice_dim.y; y++) {
- encode_line(sc, slice_state_off + plane_state_size*0,
- y, 0, 1, bits, quant_table_idx[0], run_index);
- encode_line(sc, slice_state_off + plane_state_size*1,
- y, 0, 2, bits, quant_table_idx[1], run_index);
- encode_line(sc, slice_state_off + plane_state_size*1,
- y, 0, 0, bits, quant_table_idx[2], run_index);
+ preload_rgb(sc, sp, sc.slice_dim.x, y, true);
+
+ encode_line(sc, tmp, slice_state_off + plane_state_size*0,
+ sp, y, 0, 1, bits, quant_table_idx[0], run_index);
+ encode_line(sc, tmp, slice_state_off + plane_state_size*1,
+ sp, y, 0, 2, bits, quant_table_idx[1], run_index);
+ encode_line(sc, tmp, slice_state_off + plane_state_size*1,
+ sp, y, 0, 0, bits, quant_table_idx[2], run_index);
if (transparency == 1)
- encode_line(sc, slice_state_off + plane_state_size*2,
- y, 0, 3, bits, quant_table_idx[3], run_index);
+ encode_line(sc, tmp, slice_state_off + plane_state_size*2,
+ sp, y, 0, 3, bits, quant_table_idx[3], run_index);
}
#endif
}
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index cbde2f319a..efbf5fa953 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -26,6 +26,8 @@
#include "libavutil/vulkan_spirv.h"
#include "libavutil/mem.h"
+#define RGB_LINECACHE 2
+
extern const char *ff_source_common_comp;
extern const char *ff_source_rangecoder_comp;
extern const char *ff_source_ffv1_vlc_comp;
@@ -610,6 +612,7 @@ static void define_shared_code(FFVulkanShader *shd, int use32bit)
GLSLC(0, #define DECODE );
+ av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE);
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
@@ -936,7 +939,7 @@ static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
frames_ctx->format = AV_PIX_FMT_VULKAN;
frames_ctx->sw_format = sw_format;
frames_ctx->width = s->frames->width;
- frames_ctx->height = f->num_v_slices*2;
+ frames_ctx->height = f->num_v_slices*RGB_LINECACHE;
vk_frames = frames_ctx->hwctx;
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 07/16] ffv1_common: minor RGB optimization
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (4 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 06/16] ffv1enc_vulkan: switch to 2-line cache, unify prediction code Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 08/16] ffv1enc_vulkan: use ff_get_encode_buffer Lynne
` (8 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/vulkan/ffv1_common.comp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
index 1f222bdc42..3d40592739 100644
--- a/libavcodec/vulkan/ffv1_common.comp
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -100,17 +100,17 @@ uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
{
- const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0);
+ const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? off + ivec2(1, -1) : off;
/* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
- VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[comp]),
+ VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, -1)))[comp]),
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));
/* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
* return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
* row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
- TYPE cur = TYPE(imageLoad(pred, sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[comp]);
+ TYPE cur = TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, 0)))[comp]);
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 08/16] ffv1enc_vulkan: use ff_get_encode_buffer
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (5 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 07/16] ffv1_common: minor RGB optimization Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 09/16] vulkan_ffv1: fix PCM + cached symbol reader Lynne
` (7 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
We used to create our own buffer, but still used the DR1 flag,
which is not how it's supposed to work.
Instead, use ff_get_encode_buffer, and either host-map the buffer
before copying each slice via GPU transfers, or just copy each
slice manually if that fails or is unavailable.
---
libavcodec/ffv1enc_vulkan.c | 98 +++++++++++++++++++++----------------
1 file changed, 57 insertions(+), 41 deletions(-)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index bab9bb640b..c2eb73ca53 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -88,7 +88,6 @@ typedef struct VulkanEncodeFFv1Context {
/* Output data buffer */
AVBufferPool *out_data_pool;
- AVBufferPool *pkt_data_pool;
/* Slice results buffer */
AVBufferPool *results_data_pool;
@@ -299,8 +298,11 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
NULL, maxsize,
- maxsize < fv->max_heap_size ?
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0));
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ (maxsize < fv->max_heap_size ?
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0) |
+ (!(fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) ?
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0x0)));
out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
@@ -583,10 +585,10 @@ fail:
return err;
}
-static int download_slices(AVCodecContext *avctx,
+static int transfer_slices(AVCodecContext *avctx,
VkBufferCopy *buf_regions, int nb_regions,
VulkanEncodeFFv1FrameData *fd,
- AVBufferRef *pkt_data_ref)
+ uint8_t *dst, AVBufferRef *dst_ref)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
@@ -594,11 +596,20 @@ static int download_slices(AVCodecContext *avctx,
FFVkExecContext *exec;
FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
- FFVkBuffer *pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
+
+ AVBufferRef *mapped_ref;
+ FFVkBuffer *mapped_buf;
VkBufferMemoryBarrier2 buf_bar[8];
int nb_buf_bar = 0;
+ err = ff_vk_host_map_buffer(&fv->s, &mapped_ref, dst, dst_ref,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+ if (err < 0)
+ return err;
+
+ mapped_buf = (FFVkBuffer *)mapped_ref->data;
+
/* Transfer the slices */
exec = ff_vk_exec_get(&fv->s, &fv->transfer_exec_pool);
ff_vk_exec_start(&fv->s, exec);
@@ -606,7 +617,8 @@ static int download_slices(AVCodecContext *avctx,
ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 0);
fd->out_data_ref = NULL; /* Ownership passed */
- ff_vk_exec_add_dep_buf(&fv->s, exec, &pkt_data_ref, 1, 1);
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &mapped_ref, 1, 0);
+ mapped_ref = NULL; /* Ownership passed */
/* Ensure the output buffer is finished */
buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
@@ -630,8 +642,11 @@ static int download_slices(AVCodecContext *avctx,
out_data_buf->access = buf_bar[0].dstAccessMask;
nb_buf_bar = 0;
+ for (int i = 0; i < nb_regions; i++)
+ buf_regions[i].dstOffset += mapped_buf->virtual_offset;
+
vk->CmdCopyBuffer(exec->buf,
- out_data_buf->buf, pkt_data_buf->buf,
+ out_data_buf->buf, mapped_buf->buf,
nb_regions, buf_regions);
/* Submit */
@@ -642,18 +657,6 @@ static int download_slices(AVCodecContext *avctx,
/* We need the encoded data immediately */
ff_vk_exec_wait(&fv->s, exec);
- /* Invalidate slice/output data if needed */
- if (!(pkt_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
- VkMappedMemoryRange invalidate_data = {
- .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = pkt_data_buf->mem,
- .offset = 0,
- .size = VK_WHOLE_SIZE,
- };
- vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
- 1, &invalidate_data);
- }
-
return 0;
}
@@ -664,13 +667,9 @@ static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec,
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx;
FFVulkanFunctions *vk = &fv->s.vkfn;
-
- /* Packet data */
- AVBufferRef *pkt_data_ref;
- FFVkBuffer *pkt_data_buf;
-
VulkanEncodeFFv1FrameData *fd = exec->opaque;
+ FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
FFVkBuffer *results_data_buf = (FFVkBuffer *)fd->results_data_ref->data;
uint64_t *sc;
@@ -707,20 +706,9 @@ static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec,
av_log(avctx, AV_LOG_VERBOSE, "Encoded data: %iMiB\n", pkt->size / (1024*1024));
av_buffer_unref(&fd->results_data_ref); /* No need for this buffer anymore */
- /* Allocate packet buffer */
- err = ff_vk_get_pooled_buffer(&fv->s, &fv->pkt_data_pool,
- &pkt_data_ref,
- VK_BUFFER_USAGE_TRANSFER_DST_BIT,
- NULL, pkt->size,
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
- if (err < 0)
+ /* Allocate packet */
+ if ((err = ff_get_encode_buffer(avctx, pkt, pkt->size, 0)) < 0)
return err;
- pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
-
- /* Setup packet data */
- pkt->data = pkt_data_buf->mapped_mem;
- pkt->buf = pkt_data_ref;
pkt->pts = fd->pts;
pkt->dts = fd->pts;
@@ -733,8 +721,37 @@ static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec,
fd->frame_opaque_ref = NULL;
}
- return download_slices(avctx, fv->buf_regions, f->slice_count, fd,
- pkt_data_ref);
+ /* Try using host mapped memory transfers first */
+ if (fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+ err = transfer_slices(avctx, fv->buf_regions, f->slice_count, fd,
+ pkt->data, pkt->buf);
+ if (err >= 0)
+ return err;
+ }
+
+ /* Invalidate slice/output data if needed */
+ if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange invalidate_data = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = out_data_buf->mem,
+ .offset = 0,
+ .size = VK_WHOLE_SIZE,
+ };
+ vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
+ 1, &invalidate_data);
+ }
+
+ /* Copy each slice */
+ for (int i = 0; i < f->slice_count; i++) {
+ VkBufferCopy *region = &fv->buf_regions[i];
+ memcpy(pkt->data + region->dstOffset,
+ out_data_buf->mapped_mem + region->srcOffset,
+ region->size);
+ }
+
+ av_buffer_unref(&fd->out_data_ref);
+
+ return 0;
}
static int vulkan_encode_ffv1_receive_packet(AVCodecContext *avctx,
@@ -1523,7 +1540,6 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
av_buffer_pool_uninit(&fv->results_data_pool);
av_buffer_pool_uninit(&fv->out_data_pool);
- av_buffer_pool_uninit(&fv->pkt_data_pool);
av_buffer_unref(&fv->keyframe_slice_data_ref);
av_buffer_pool_uninit(&fv->slice_data_pool);
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 09/16] vulkan_ffv1: fix PCM + cached symbol reader
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (6 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 08/16] ffv1enc_vulkan: use ff_get_encode_buffer Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 10/16] ffv1enc_vulkan: implement the cached EC writer from the decoder Lynne
` (6 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
writeout_rgb requires that all subgroups are active.
---
libavcodec/vulkan/ffv1_dec.comp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
index c74af4bf6a..e73b3f1dc0 100644
--- a/libavcodec/vulkan/ffv1_dec.comp
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -56,6 +56,11 @@ int get_isymbol(inout RangeCoder c, uint state_off)
void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits)
{
+#ifdef CACHED_SYMBOL_READER
+ if (gl_LocalInvocationID.x > 0)
+ return;
+#endif
+
#ifndef RGB
if (p > 0 && p < 3) {
w >>= chroma_shift.x;
@@ -235,8 +240,6 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
/* PCM coding */
#ifndef GOLOMB
if (sc.slice_coding_mode == 1) {
- if (gl_LocalInvocationID.x > 0)
- return;
#ifndef RGB
for (int p = 0; p < planes; p++) {
int h = sc.slice_dim.y;
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 10/16] ffv1enc_vulkan: implement the cached EC writer from the decoder
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (7 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 09/16] vulkan_ffv1: fix PCM + cached symbol reader Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 11/16] ffv1enc_vulkan: implement RCT search for level >= 4 Lynne
` (5 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This gives a 35% speedup on AMD and 50% on Nvidia.
---
libavcodec/ffv1enc_vulkan.c | 6 ++-
libavcodec/vulkan/ffv1_enc.comp | 68 ++++++++++++++++++++++-----------
2 files changed, 50 insertions(+), 24 deletions(-)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index c2eb73ca53..5de16d5b02 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -1099,12 +1099,13 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
+ int use_cached_reader = fv->ctx.ac != AC_GOLOMB_RICE;
RET(ff_vk_shader_init(&fv->s, shd, "ffv1_enc",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
- 1, 1, 1,
+ use_cached_reader ? CONTEXT_SIZE : 1, 1, 1,
0));
/* Common codec header */
@@ -1116,6 +1117,9 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+ if (use_cached_reader)
+ av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n");
+
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "rangecoder_static_buf",
diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp
index db33c414e1..65a7df1359 100644
--- a/libavcodec/vulkan/ffv1_enc.comp
+++ b/libavcodec/vulkan/ffv1_enc.comp
@@ -21,27 +21,32 @@
*/
#ifndef GOLOMB
+#ifdef CACHED_SYMBOL_READER
+shared uint8_t state[CONTEXT_SIZE];
+#define WRITE(c, off, val) put_rac_direct(c, state[off], val)
+#else
+#define WRITE(c, off, val) put_rac(c, uint64_t(slice_state) + (state_off + off), val)
+#endif
+
/* Note - only handles signed values */
-void put_symbol(inout RangeCoder c, uint64_t state, int v)
+void put_symbol(inout RangeCoder c, uint state_off, int v)
{
bool is_nil = (v == 0);
- put_rac(c, state, is_nil);
+ WRITE(c, 0, is_nil);
if (is_nil)
return;
const int a = abs(v);
const int e = findMSB(a);
- state += 1;
for (int i = 0; i < e; i++)
- put_rac(c, state + min(i, 9), true);
- put_rac(c, state + min(e, 9), false);
+ WRITE(c, 1 + min(i, 9), true);
+ WRITE(c, 1 + min(e, 9), false);
- state += 21;
for (int i = e - 1; i >= 0; i--)
- put_rac(c, state + min(i, 9), bool(bitfieldExtract(a, i, 1)));
+ WRITE(c, 22 + min(i, 9), bool(bitfieldExtract(a, i, 1)));
- put_rac(c, state - 11 + min(e, 10), v < 0);
+ WRITE(c, 22 - 11 + min(e, 10), v < 0);
}
void encode_line_pcm(inout SliceContext sc, readonly uimage2D img,
@@ -49,6 +54,11 @@ void encode_line_pcm(inout SliceContext sc, readonly uimage2D img,
{
int w = sc.slice_dim.x;
+#ifdef CACHED_SYMBOL_READER
+ if (gl_LocalInvocationID.x > 0)
+ return;
+#endif
+
#ifndef RGB
if (p > 0 && p < 3) {
w >>= chroma_shift.x;
@@ -63,7 +73,7 @@ void encode_line_pcm(inout SliceContext sc, readonly uimage2D img,
}
}
-void encode_line(inout SliceContext sc, readonly uimage2D img, uint64_t state,
+void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off,
ivec2 sp, int y, int p, int comp, int bits,
uint8_t quant_table_idx, const int run_index)
{
@@ -86,13 +96,25 @@ void encode_line(inout SliceContext sc, readonly uimage2D img, uint64_t state,
d[1] = fold(d[1], bits);
- put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]);
+ uint context_off = state_off + CONTEXT_SIZE*d[0];
+#ifdef CACHED_SYMBOL_READER
+ u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x);
+ state[gl_LocalInvocationID.x] = sb.v;
+ barrier();
+ if (gl_LocalInvocationID.x == 0)
+#endif
+
+ put_symbol(sc.c, context_off, d[1]);
+
+#ifdef CACHED_SYMBOL_READER
+ sb.v = state[gl_LocalInvocationID.x];
+#endif
}
}
#else /* GOLOMB */
-void encode_line(inout SliceContext sc, readonly uimage2D img, uint64_t state,
+void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off,
ivec2 sp, int y, int p, int comp, int bits,
uint8_t quant_table_idx, inout int run_index)
{
@@ -143,7 +165,7 @@ void encode_line(inout SliceContext sc, readonly uimage2D img, uint64_t state,
}
if (!run_mode) {
- VlcState sb = VlcState(state + VLC_STATE_SIZE*d[0]);
+ VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*d[0]);
Symbol sym = get_vlc_symbol(sb, d[1], bits);
put_bits(sc.pb, sym.bits, sym.val);
}
@@ -245,8 +267,7 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
#endif
{
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
- uint64_t slice_state_off = uint64_t(slice_state) +
- slice_idx*plane_state_size*codec_planes;
+ u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size;
#ifndef RGB
for (int c = 0; c < components; c++) {
@@ -260,26 +281,22 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
int comp = c - p;
for (int y = 0; y < h; y++)
- encode_line(sc, src[p], slice_state_off, sp, y, p,
+ encode_line(sc, src[p], slice_state_off[c], sp, y, p,
comp, bits, quant_table_idx[c], run_index);
-
- /* For the second chroma plane, reuse the first plane's state */
- if (c != 1)
- slice_state_off += plane_state_size;
}
#else
int run_index = 0;
for (int y = 0; y < sc.slice_dim.y; y++) {
preload_rgb(sc, sp, sc.slice_dim.x, y, true);
- encode_line(sc, tmp, slice_state_off + plane_state_size*0,
+ encode_line(sc, tmp, slice_state_off[0],
sp, y, 0, 1, bits, quant_table_idx[0], run_index);
- encode_line(sc, tmp, slice_state_off + plane_state_size*1,
+ encode_line(sc, tmp, slice_state_off[1],
sp, y, 0, 2, bits, quant_table_idx[1], run_index);
- encode_line(sc, tmp, slice_state_off + plane_state_size*1,
+ encode_line(sc, tmp, slice_state_off[2],
sp, y, 0, 0, bits, quant_table_idx[2], run_index);
if (transparency == 1)
- encode_line(sc, tmp, slice_state_off + plane_state_size*2,
+ encode_line(sc, tmp, slice_state_off[3],
sp, y, 0, 3, bits, quant_table_idx[3], run_index);
}
#endif
@@ -288,6 +305,11 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
void finalize_slice(inout SliceContext sc, const uint slice_idx)
{
+#ifdef CACHED_SYMBOL_READER
+ if (gl_LocalInvocationID.x > 0)
+ return;
+#endif
+
#ifdef GOLOMB
uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
#else
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 11/16] ffv1enc_vulkan: implement RCT search for level >= 4
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (8 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 10/16] ffv1enc_vulkan: implement the cached EC writer from the decoder Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 12/16] vulkan/ffv1: unify encode and decode get/put primitives Lynne
` (4 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/ffv1enc_vulkan.c | 204 ++++++++++++++++++++++++-
libavcodec/vulkan/Makefile | 2 +-
libavcodec/vulkan/ffv1_enc_setup.comp | 6 +-
libavcodec/vulkan/ffv1_rct_search.comp | 139 +++++++++++++++++
4 files changed, 346 insertions(+), 5 deletions(-)
create mode 100644 libavcodec/vulkan/ffv1_rct_search.comp
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 5de16d5b02..d9e12f5fae 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -74,6 +74,7 @@ typedef struct VulkanEncodeFFv1Context {
size_t max_heap_size;
FFVulkanShader setup;
+ FFVulkanShader rct_search;
FFVulkanShader reset;
FFVulkanShader enc;
@@ -101,6 +102,7 @@ typedef struct VulkanEncodeFFv1Context {
int num_h_slices;
int num_v_slices;
int force_pcm;
+ int optimize_rct;
int is_rgb;
int ppi;
@@ -112,6 +114,7 @@ extern const char *ff_source_rangecoder_comp;
extern const char *ff_source_ffv1_vlc_comp;
extern const char *ff_source_ffv1_common_comp;
extern const char *ff_source_ffv1_reset_comp;
+extern const char *ff_source_ffv1_rct_search_comp;
extern const char *ff_source_ffv1_enc_setup_comp;
extern const char *ff_source_ffv1_enc_comp;
@@ -147,7 +150,8 @@ typedef struct FFv1VkParameters {
uint8_t ec;
uint8_t ppi;
uint8_t chunks;
- uint8_t padding[4];
+ uint8_t rct_search;
+ uint8_t padding[3];
} FFv1VkParameters;
static void add_push_data(FFVulkanShader *shd)
@@ -184,12 +188,76 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, uint8_t ec; );
GLSLC(1, uint8_t ppi; );
GLSLC(1, uint8_t chunks; );
- GLSLC(1, uint8_t padding[4]; );
+ GLSLC(1, uint8_t rct_search; );
+ GLSLC(1, uint8_t padding[3]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
}
+typedef struct FFv1VkRCTSearchParameters {
+ int fmt_lut[4];
+ int rct_offset;
+ uint8_t planar_rgb;
+ uint8_t transparency;
+ uint8_t key_frame;
+ uint8_t force_pcm;
+ uint8_t version;
+ uint8_t micro_version;
+ uint8_t padding[2];
+} FFv1VkRCTSearchParameters;
+
+static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec,
+ AVFrame *enc_in, VkImageView *enc_in_views,
+ FFVkBuffer *slice_data_buf, uint32_t slice_data_size)
+{
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVulkanFunctions *vk = &fv->s.vkfn;
+ AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data;
+ FFv1VkRCTSearchParameters pd;
+
+ /* Update descriptors */
+ ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct_search,
+ 0, 0, 0,
+ slice_data_buf,
+ 0, slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct_search,
+ enc_in, enc_in_views,
+ 0, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct_search);
+
+ pd = (FFv1VkRCTSearchParameters) {
+ .rct_offset = 1 << f->bits_per_raw_sample,
+ .planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) &&
+ (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1),
+ .transparency = f->transparency,
+ .key_frame = f->key_frame,
+ .force_pcm = fv->force_pcm,
+ .version = f->version,
+ .micro_version = f->micro_version,
+ };
+
+ if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 ||
+ avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 ||
+ avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14)
+ memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int));
+ else
+ ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1);
+
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct_search,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+ return 0;
+}
+
static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
FFVkExecContext *exec,
const AVFrame *pict)
@@ -366,6 +434,25 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
};
}
+ if (fv->optimize_rct) {
+ RET(run_rct_search(avctx, exec,
+ src, src_views,
+ slice_data_buf, slice_data_size));
+
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_data_buf->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_data_buf->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_data_buf->buf,
+ .size = slice_data_size*f->slice_count,
+ .offset = 0,
+ };
+ }
+
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
@@ -412,6 +499,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.ec = f->ec,
.ppi = fv->ppi,
.chunks = fv->chunks,
+ .rct_search = fv->optimize_rct,
};
/* For some reason the C FFv1 encoder/decoder treats these differently */
@@ -920,6 +1008,103 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
GLSLD(ff_source_ffv1_common_comp);
}
+static int init_rct_search_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFVulkanShader *shd = &fv->rct_search;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct_search",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2",
+ "GL_EXT_null_initializer" }, 3,
+ 32, 32, 1,
+ 0));
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, ivec4 fmt_lut; );
+ GLSLC(1, int rct_offset; );
+ GLSLC(1, uint8_t planar_rgb; );
+ GLSLC(1, uint8_t transparency; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t force_pcm; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t padding[3]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ /* Never used */
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 1));
+
+ define_shared_code(avctx, shd);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx[1024];",
+ },
+ {
+ .name = "src",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0));
+
+ GLSLD(ff_source_ffv1_rct_search_comp);
+
+ RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
{
int err;
@@ -1417,6 +1602,17 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
if (!fv->is_rgb && f->bits_per_raw_sample > 8)
fv->rep_fmt = FF_VK_REP_INT;
+ /* Init rct search shader */
+ fv->optimize_rct = fv->is_rgb && f->version >= 4 &&
+ !fv->force_pcm && fv->optimize_rct;
+ if (fv->optimize_rct) {
+ err = init_rct_search_shader(avctx, spv);
+ if (err < 0) {
+ spv->uninit(&spv);
+ return err;
+ }
+ }
+
/* Init setup shader */
err = init_setup_shader(avctx, spv);
if (err < 0) {
@@ -1528,6 +1724,7 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
ff_vk_shader_free(&fv->s, &fv->enc);
ff_vk_shader_free(&fv->s, &fv->reset);
ff_vk_shader_free(&fv->s, &fv->setup);
+ ff_vk_shader_free(&fv->s, &fv->rct_search);
if (fv->exec_ctx_info) {
for (int i = 0; i < fv->async_depth; i++) {
@@ -1591,6 +1788,9 @@ static const AVOption vulkan_encode_ffv1_options[] = {
{ "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL,
{ .i64 = 0 }, 0, 1, VE },
+ { "rct_search", "Run a search for RCT parameters (level 4 only)", OFFSET(optimize_rct), AV_OPT_TYPE_BOOL,
+ { .i64 = 1 }, 0, 1, VE },
+
{ "async_depth", "Internal parallelization depth", OFFSET(async_depth), AV_OPT_TYPE_INT,
{ .i64 = 1 }, 1, INT_MAX, VE },
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 4bbcb38c6a..729cb4f15c 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -7,7 +7,7 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
vulkan/rangecoder.o vulkan/ffv1_vlc.o \
vulkan/ffv1_common.o vulkan/ffv1_reset.o \
vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_setup.o \
- vulkan/ffv1_enc.o
+ vulkan/ffv1_rct_search.o vulkan/ffv1_enc.o
OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/rangecoder.o vulkan/ffv1_vlc.o \
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp
index 6f21e47523..5f8e6704b0 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp
@@ -22,7 +22,7 @@
uint8_t state[CONTEXT_SIZE];
-void init_slice(out SliceContext sc, const uint slice_idx)
+void init_slice(inout SliceContext sc, const uint slice_idx)
{
/* Set coordinates */
uvec2 img_size = imageSize(src[0]);
@@ -37,11 +37,13 @@ void init_slice(out SliceContext sc, const uint slice_idx)
sc.slice_pos = ivec2(sxs, sys);
sc.slice_dim = ivec2(sxe - sxs, sye - sys);
- sc.slice_rct_coef = ivec2(1, 1);
sc.slice_coding_mode = int(force_pcm == 1);
sc.slice_reset_contexts = sc.slice_coding_mode == 1;
sc.quant_table_idx = u8vec3(context_model);
+ if ((rct_search == 0) || (sc.slice_coding_mode == 1))
+ sc.slice_rct_coef = ivec2(1, 1);
+
rac_init(sc.c,
OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
slice_size_max);
diff --git a/libavcodec/vulkan/ffv1_rct_search.comp b/libavcodec/vulkan/ffv1_rct_search.comp
new file mode 100644
index 0000000000..055bde46c4
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_rct_search.comp
@@ -0,0 +1,139 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec3 load_components(ivec2 pos)
+{
+ ivec3 pix = ivec3(imageLoad(src[0], pos));
+ if (planar_rgb != 0) {
+ for (int i = 1; i < 3; i++)
+ pix[i] = int(imageLoad(src[i], pos)[0]);
+ }
+
+ return ivec3(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]]);
+}
+
+#define NUM_CHECKS 15
+const ivec2 rct_y_coeff[NUM_CHECKS] = {
+ ivec2(0, 0), // 4G
+
+ ivec2(0, 1), // 3G + B
+ ivec2(1, 0), // R + 3G
+ ivec2(1, 1), // R + 2G + B
+
+ ivec2(0, 2), // 2G + 2B
+ ivec2(2, 0), // 2R + 2G
+ ivec2(2, 2), // 2R + 2B
+
+ ivec2(0, 3), // 1G + 3B
+ ivec2(3, 0), // 3R + 1G
+
+ ivec2(0, 4), // 4B
+ ivec2(4, 0), // 4R
+
+ ivec2(1, 2), // R + G + 2B
+ ivec2(2, 1), // 2R + G + B
+
+ ivec2(3, 1), // 3R + B
+ ivec2(1, 3), // R + 3B
+};
+
+shared ivec3 pix_buf[gl_WorkGroupSize.x + 1][gl_WorkGroupSize.y + 1] = { };
+
+ivec3 transform_sample(ivec3 pix, ivec2 rct_coef)
+{
+ pix.b -= pix.g;
+ pix.r -= pix.g;
+ pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+ pix.b += rct_offset;
+ pix.r += rct_offset;
+ return pix;
+}
+
+uint get_dist(ivec3 cur)
+{
+ ivec3 LL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 1];
+ ivec3 TL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 0];
+ ivec3 TT = pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 0];
+
+ ivec3 pred = ivec3(predict(LL.r, ivec2(TL.r, TT.r)),
+ predict(LL.g, ivec2(TL.g, TT.g)),
+ predict(LL.b, ivec2(TL.b, TT.b)));
+
+ uvec3 c = abs(pred - cur);
+ return mid_pred(c.r, c.g, c.b);
+}
+
+shared uint score_cols[gl_WorkGroupSize.y] = { };
+shared uint score_mode[16] = { };
+
+void process(ivec2 pos)
+{
+ ivec3 pix = load_components(pos);
+
+ for (int i = 0; i < NUM_CHECKS; i++) {
+ ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]);
+ pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = tx_pix;
+ memoryBarrierShared();
+
+ uint dist = get_dist(tx_pix);
+ atomicAdd(score_mode[i], dist);
+ }
+}
+
+void coeff_search(inout SliceContext sc)
+{
+ uvec2 img_size = imageSize(src[0]);
+ uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
+ gl_NumWorkGroups.x, 0);
+ uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
+ gl_NumWorkGroups.x, 0);
+ uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
+ gl_NumWorkGroups.y, 0);
+ uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
+ gl_NumWorkGroups.y, 0);
+
+ for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += gl_WorkGroupSize.y) {
+ for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += gl_WorkGroupSize.x) {
+ process(ivec2(x, y));
+ }
+ }
+
+ if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
+ uint min_score = 0xFFFFFFFF;
+ uint min_idx = 3;
+ for (int i = 0; i < NUM_CHECKS; i++) {
+ if (score_mode[i] < min_score) {
+ min_score = score_mode[i];
+ min_idx = i;
+ }
+ }
+ sc.slice_rct_coef = rct_y_coeff[min_idx];
+ }
+}
+
+void main(void)
+{
+ if (force_pcm == 1)
+ return;
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ coeff_search(slice_ctx[slice_idx]);
+}
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 12/16] vulkan/ffv1: unify encode and decode get/put primitives
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (9 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 11/16] ffv1enc_vulkan: implement RCT search for level >= 4 Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 13/16] vulkan_ffv1: pipe through slice decoding status Lynne
` (3 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This simply makes a get_rac/put_rac_internal variant that can be
reused.
---
libavcodec/vulkan/rangecoder.comp | 57 +++++++++----------------------
1 file changed, 17 insertions(+), 40 deletions(-)
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 9e2c5fbecf..8687b8bc3c 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -95,26 +95,26 @@ void renorm_encoder(inout RangeCoder c)
}
#endif
-void put_rac_direct(inout RangeCoder c, inout uint8_t state, bool bit)
+void put_rac_internal(inout RangeCoder c, const int range1, bool bit)
{
- int range1 = uint16_t((c.range * state) >> 8);
-
#ifdef DEBUG
- if (state == 0)
- debugPrintfEXT("Error: state is zero");
if (range1 >= c.range)
debugPrintfEXT("Error: range1 >= c.range");
if (range1 <= 0)
debugPrintfEXT("Error: range1 <= 0");
#endif
- int diff = c.range - range1;
- c.low += bit ? diff : 0;
- c.range = bit ? range1 : diff;
+ int ranged = c.range - range1;
+ c.low += bit ? ranged : 0;
+ c.range = bit ? range1 : ranged;
if (expectEXT(c.range < 0x100, false))
renorm_encoder(c);
+}
+void put_rac_direct(inout RangeCoder c, inout uint8_t state, bool bit)
+{
+ put_rac_internal(c, (c.range * state) >> 8, bit);
state = zero_one_state[(uint(bit) << 8) + state];
}
@@ -126,21 +126,7 @@ void put_rac(inout RangeCoder c, uint64_t state, bool bit)
/* Equiprobable bit */
void put_rac_equi(inout RangeCoder c, bool bit)
{
- int range1 = c.range >> 1;
-
-#ifdef DEBUG
- if (range1 >= c.range)
- debugPrintfEXT("Error: range1 >= c.range");
- if (range1 <= 0)
- debugPrintfEXT("Error: range1 <= 0");
-#endif
-
- int diff = c.range - range1;
- c.low += bit ? diff : 0;
- c.range = bit ? range1 : diff;
-
- if (expectEXT(c.range < 0x100, false))
- renorm_encoder(c);
+ put_rac_internal(c, c.range >> 1, bit);
}
void put_rac_terminate(inout RangeCoder c)
@@ -224,11 +210,9 @@ void refill(inout RangeCoder c)
}
}
-bool get_rac_direct(inout RangeCoder c, inout uint8_t state)
+bool get_rac_internal(inout RangeCoder c, const int range1)
{
- int range1 = c.range * state >> 8;
int ranged = c.range - range1;
-
bool bit = c.low >= ranged;
c.low -= bit ? ranged : 0;
c.range = (bit ? 0 : ranged) + (bit ? range1 : 0);
@@ -236,6 +220,12 @@ bool get_rac_direct(inout RangeCoder c, inout uint8_t state)
if (expectEXT(c.range < 0x100, false))
refill(c);
+ return bit;
+}
+
+bool get_rac_direct(inout RangeCoder c, inout uint8_t state)
+{
+ bool bit = get_rac_internal(c, c.range * state >> 8);
state = zero_one_state[state + (bit ? 256 : 0)];
return bit;
}
@@ -247,18 +237,5 @@ bool get_rac(inout RangeCoder c, uint64_t state)
bool get_rac_equi(inout RangeCoder c)
{
- int range1 = c.range >> 1;
-
- c.range -= range1;
-
- bool bit = c.low >= c.range;
- if (bit) {
- c.low -= c.range;
- c.range = range1;
- }
-
- if (expectEXT(c.range < 0x100, false))
- refill(c);
-
- return bit;
+ return get_rac_internal(c, c.range >> 1);
}
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 13/16] vulkan_ffv1: pipe through slice decoding status
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (10 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 12/16] vulkan/ffv1: unify encode and decode get/put primitives Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 14/16] vulkan: enable VK_KHR_shader_subgroup_rotate Lynne
` (2 subsequent siblings)
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/vulkan/ffv1_dec.comp | 4 ++
libavcodec/vulkan/ffv1_dec_setup.comp | 4 +-
libavcodec/vulkan_decode.c | 1 +
libavcodec/vulkan_decode.h | 1 +
libavcodec/vulkan_ffv1.c | 60 +++++++++++++++++++--------
5 files changed, 52 insertions(+), 18 deletions(-)
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
index e73b3f1dc0..1d33b32c6b 100644
--- a/libavcodec/vulkan/ffv1_dec.comp
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -291,4 +291,8 @@ void main(void)
{
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
decode_slice(slice_ctx[slice_idx], slice_idx);
+
+ uint32_t status = corrupt ? uint32_t(corrupt) : overread;
+ if (status != 0)
+ slice_status[2*slice_idx + 1] = status;
}
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp
index a27a878927..671f28e7e7 100644
--- a/libavcodec/vulkan/ffv1_dec_setup.comp
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp
@@ -133,6 +133,8 @@ void main(void)
for (int i = 0; i < slice_size; i++)
crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
- slice_crc_mismatch[slice_idx] = crc;
+ slice_status[2*slice_idx + 0] = crc;
}
+
+ slice_status[2*slice_idx + 1] = corrupt ? uint32_t(corrupt) : overread;
}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index f1313c8409..7310ba1547 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -142,6 +142,7 @@ static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic)
vkpic->destroy_image_view = vk->DestroyImageView;
vkpic->wait_semaphores = vk->WaitSemaphores;
+ vkpic->invalidate_memory_ranges = vk->InvalidateMappedMemoryRanges;
}
int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index cbd22b3591..bf6506f280 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -114,6 +114,7 @@ typedef struct FFVulkanDecodePicture {
/* Vulkan functions needed for destruction, as no other context is guaranteed to exist */
PFN_vkWaitSemaphores wait_semaphores;
PFN_vkDestroyImageView destroy_image_view;
+ PFN_vkInvalidateMappedMemoryRanges invalidate_memory_ranges;
} FFVulkanDecodePicture;
/**
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index efbf5fa953..c839f4c387 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -221,7 +221,7 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx,
&fp->slice_status_buf,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
- NULL, f->slice_count*sizeof(uint32_t),
+ NULL, 2*f->slice_count*sizeof(uint32_t),
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err < 0)
@@ -408,7 +408,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
1, 2, 0,
slice_status,
- 0, f->slice_count*sizeof(uint32_t),
+ 0, 2*f->slice_count*sizeof(uint32_t),
VK_FORMAT_UNDEFINED);
ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
@@ -538,10 +538,15 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
1, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
+ 1, 2, 0,
+ slice_status,
+ 0, 2*f->slice_count*sizeof(uint32_t),
+ VK_FORMAT_UNDEFINED);
if (is_rgb)
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
f->picture.f, vp->view.out,
- 1, 2,
+ 1, 3,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
@@ -700,8 +705,8 @@ static int init_setup_shader(FFV1Context *f, FFVulkanContext *s,
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "writeonly",
- .buf_content = "uint32_t slice_crc_mismatch",
- .buf_elems = f->max_slice_count,
+ .buf_content = "uint32_t slice_status",
+ .buf_elems = 2*f->max_slice_count,
},
};
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
@@ -895,6 +900,14 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
.elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
+ {
+ .name = "slice_status_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "writeonly",
+ .buf_content = "uint32_t slice_status",
+ .buf_elems = 2*f->max_slice_count,
+ },
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
@@ -906,7 +919,7 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
- RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2 + rgb, 0, 0));
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3 + rgb, 0, 0));
GLSLD(ff_source_ffv1_dec_comp);
@@ -1114,22 +1127,35 @@ fail:
static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
{
- AVHWDeviceContext *hwctx = _hwctx.nc;
+ AVHWDeviceContext *dev_ctx = _hwctx.nc;
+ AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
FFv1VulkanDecodePicture *fp = data;
FFVulkanDecodePicture *vp = &fp->vp;
+ FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
- ff_vk_decode_free_frame(hwctx, vp);
+ ff_vk_decode_free_frame(dev_ctx, vp);
+
+ /* Invalidate slice/output data if needed */
+ if (!(slice_status->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange invalidate_data = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = slice_status->mem,
+ .offset = 0,
+ .size = 2*fp->slice_num*sizeof(uint32_t),
+ };
+ vp->invalidate_memory_ranges(hwctx->act_dev,
+ 1, &invalidate_data);
+ }
- if (fp->crc_checked) {
- FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
- for (int i = 0; i < fp->slice_num; i++) {
- uint32_t crc_res;
- crc_res = AV_RN32(slice_status->mapped_mem + i*sizeof(uint32_t));
- if (crc_res != 0)
- av_log(hwctx, AV_LOG_ERROR, "CRC mismatch in slice %i, res: 0x%x\n",
- i, crc_res);
- }
+ for (int i = 0; i < fp->slice_num; i++) {
+ uint32_t crc_res = 0;
+ if (fp->crc_checked)
+ crc_res = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 0);
+ uint32_t status = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 4);
+ if (status || crc_res)
+ av_log(dev_ctx, AV_LOG_ERROR, "Slice %i status: 0x%x, CRC 0x%x\n",
+ i, status, crc_res);
}
av_buffer_unref(&vp->slices_buf);
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 14/16] vulkan: enable VK_KHR_shader_subgroup_rotate
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (11 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 13/16] vulkan_ffv1: pipe through slice decoding status Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 15/16] hwcontext_vulkan: correct image transfer usage flags Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 16/16] hwcontext_vulkan: only try exporting DMABUF memory on !WIN32 and only for DMABUF tiling Lynne
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
Yet another thing that should've been always present.
---
libavutil/hwcontext_vulkan.c | 5 +++++
libavutil/vulkan_functions.h | 1 +
libavutil/vulkan_loader.h | 1 +
3 files changed, 7 insertions(+)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 978d7e29d3..eded36bc01 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -79,6 +79,7 @@ typedef struct VulkanDeviceFeatures {
VkPhysicalDeviceVulkan12Features vulkan_1_2;
VkPhysicalDeviceVulkan13Features vulkan_1_3;
VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore;
+ VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR subgroup_rotate;
#ifdef VK_KHR_shader_expect_assume
VkPhysicalDeviceShaderExpectAssumeFeaturesKHR expect_assume;
@@ -205,6 +206,8 @@ static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *f
FF_VK_STRUCT_EXT(s, &feats->device, &feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES);
+ FF_VK_STRUCT_EXT(s, &feats->device, &feats->subgroup_rotate, FF_VK_EXT_SUBGROUP_ROTATE,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_ROTATE_FEATURES);
#ifdef VK_KHR_shader_expect_assume
FF_VK_STRUCT_EXT(s, &feats->device, &feats->expect_assume, FF_VK_EXT_EXPECT_ASSUME,
@@ -283,6 +286,7 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF
COPY_VAL(vulkan_1_3.dynamicRendering);
COPY_VAL(timeline_semaphore.timelineSemaphore);
+ COPY_VAL(subgroup_rotate.shaderSubgroupRotate);
COPY_VAL(video_maintenance_1.videoMaintenance1);
#ifdef VK_KHR_video_maintenance2
@@ -588,6 +592,7 @@ static const VulkanOptExtension optional_device_exts[] = {
{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
{ VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
{ VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
+ { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE },
#ifdef VK_KHR_shader_expect_assume
{ VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME },
#endif
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index cd61d71577..8b413013e6 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -48,6 +48,7 @@ typedef uint64_t FFVulkanExtensions;
#define FF_VK_EXT_PUSH_DESCRIPTOR (1ULL << 14) /* VK_KHR_push_descriptor */
#define FF_VK_EXT_RELAXED_EXTENDED_INSTR (1ULL << 15) /* VK_KHR_shader_relaxed_extended_instruction */
#define FF_VK_EXT_EXPECT_ASSUME (1ULL << 16) /* VK_KHR_shader_expect_assume */
+#define FF_VK_EXT_SUBGROUP_ROTATE (1ULL << 17) /* VK_KHR_shader_subgroup_rotate */
/* Video extensions */
#define FF_VK_EXT_VIDEO_QUEUE (1ULL << 36) /* VK_KHR_video_queue */
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index eaf6e2e6bb..a7976fe560 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -58,6 +58,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX },
{ VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW },
{ VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT },
+ { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE },
{ VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 },
#ifdef VK_KHR_video_maintenance2
{ VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 },
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 15/16] hwcontext_vulkan: correct image transfer usage flags
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (12 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 14/16] vulkan: enable VK_KHR_shader_subgroup_rotate Lynne
@ 2025-05-14 19:02 ` Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 16/16] hwcontext_vulkan: only try exporting DMABUF memory on !WIN32 and only for DMABUF tiling Lynne
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
By pure coincidence, BUFFER and IMAGE flags were equal for those
two usage types.
---
libavutil/hwcontext_vulkan.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index eded36bc01..9f9df91e5d 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2784,8 +2784,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
/* Image usage flags */
if (!hwctx->usage) {
- hwctx->usage = supported_usage & (VK_BUFFER_USAGE_TRANSFER_DST_BIT |
- VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ hwctx->usage = supported_usage & (VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT);
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH 16/16] hwcontext_vulkan: only try exporting DMABUF memory on !WIN32 and only for DMABUF tiling
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
` (13 preceding siblings ...)
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 15/16] hwcontext_vulkan: correct image transfer usage flags Lynne
@ 2025-05-14 19:02 ` Lynne
14 siblings, 0 replies; 17+ messages in thread
From: Lynne @ 2025-05-14 19:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/hwcontext_vulkan.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 9f9df91e5d..4f205137eb 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -2643,11 +2643,12 @@ static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size)
if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY)
try_export_flags(hwfc, &eiinfo.handleTypes, &e,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
-#endif
- if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_DMABUF_MEMORY)
+ if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_DMABUF_MEMORY &&
+ hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
try_export_flags(hwfc, &eiinfo.handleTypes, &e,
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+#endif
for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
--
2.49.0.395.g12beb8f557c
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
* [FFmpeg-devel] [PATCH] ffv1enc_vulkan: fix array overflow
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 06/16] ffv1enc_vulkan: switch to 2-line cache, unify prediction code Lynne
@ 2025-05-23 14:38 ` Jerome Martinez
0 siblings, 0 replies; 17+ messages in thread
From: Jerome Martinez @ 2025-05-23 14:38 UTC (permalink / raw)
To: ffmpeg-devel
[-- Attachment #1: Type: text/plain, Size: 206 bytes --]
Fix a crash (some GCC) or silent quit (Microsoft compiler) after
"[PATCH 06/16] ffv1enc_vulkan: switch to 2-line cache, unify prediction
code"
https://ffmpeg.org/pipermail/ffmpeg-devel/2025-May/343502.html
[-- Attachment #2: 0001-vulkan-ffv1-fix-array-overflow.patch --]
[-- Type: text/plain, Size: 739 bytes --]
From: Maxime Gervais <maxime@mediaarea.net>
Date: Fri, 23 May 2025 16:20:41 +0200
Subject: [PATCH] ffv1enc_vulkan: fix array overflow
---
libavcodec/ffv1enc_vulkan.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index d9e12f5fae..15aaddac98 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -1358,6 +1358,8 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
.mem_quali = "writeonly",
.buf_content = "uint64_t slice_results[2048];",
},
+ { /* place holder for desc_set[3] */
+ },
};
if (fv->is_rgb) {
AVHWFramesContext *intermediate_frames_ctx;
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2025-05-23 14:39 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-05-14 19:02 [FFmpeg-devel] [PATCH 01/16] ffv1enc_vulkan: merge all encoder variants into one file Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 02/16] vulkan/ffv1: synchronize get_pred implementations between encoder and decoder Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 03/16] ffv1enc_vulkan: get rid of temporary data for the setup shader Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 04/16] ffv1enc_vulkan: unify EC code between setup and encode Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 05/16] ffv1enc_vulkan: minor EC optimizations Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 06/16] ffv1enc_vulkan: switch to 2-line cache, unify prediction code Lynne
2025-05-23 14:38 ` [FFmpeg-devel] [PATCH] ffv1enc_vulkan: fix array overflow Jerome Martinez
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 07/16] ffv1_common: minor RGB optimization Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 08/16] ffv1enc_vulkan: use ff_get_encode_buffer Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 09/16] vulkan_ffv1: fix PCM + cached symbol reader Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 10/16] ffv1enc_vulkan: implement the cached EC writer from the decoder Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 11/16] ffv1enc_vulkan: implement RCT search for level >= 4 Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 12/16] vulkan/ffv1: unify encode and decode get/put primitives Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 13/16] vulkan_ffv1: pipe through slice decoding status Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 14/16] vulkan: enable VK_KHR_shader_subgroup_rotate Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 15/16] hwcontext_vulkan: correct image transfer usage flags Lynne
2025-05-14 19:02 ` [FFmpeg-devel] [PATCH 16/16] hwcontext_vulkan: only try exporting DMABUF memory on !WIN32 and only for DMABUF tiling Lynne
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git