Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Lynne <dev@lynne.ee>
To: ffmpeg-devel@ffmpeg.org
Cc: Lynne <dev@lynne.ee>
Subject: [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder
Date: Mon, 10 Mar 2025 04:08:59 +0100
Message-ID: <20250310030912.60902-3-dev@lynne.ee> (raw)
In-Reply-To: <20250310030912.60902-1-dev@lynne.ee>

This patch adds a fully-featured level 3 and 4 decoder for FFv1,
supporting Golomb and all Range coding variants, all pixel formats,
and all features, except for the newly added floating-point formats.

On a 6000 Ada, for 3840x2160 bgr0 content at 50Mbps (standard desktop
recording), it is able to do 400fps.
An Alder Lake with 24 threads can barely do 100fps.
---
 configure                             |    2 +
 libavcodec/Makefile                   |    1 +
 libavcodec/ffv1dec.c                  |    6 +
 libavcodec/hwaccels.h                 |    1 +
 libavcodec/vulkan/Makefile            |    6 +
 libavcodec/vulkan/common.comp         |   95 ++
 libavcodec/vulkan/ffv1_common.comp    |    5 +
 libavcodec/vulkan/ffv1_dec.comp       |  303 ++++++
 libavcodec/vulkan/ffv1_dec_rct.comp   |   72 ++
 libavcodec/vulkan/ffv1_dec_setup.comp |  138 +++
 libavcodec/vulkan/ffv1_rct.comp       |   90 ++
 libavcodec/vulkan/ffv1_vlc.comp       |   37 +
 libavcodec/vulkan/rangecoder.comp     |   74 ++
 libavcodec/vulkan_decode.c            |   17 +
 libavcodec/vulkan_ffv1.c              | 1292 +++++++++++++++++++++++++
 15 files changed, 2139 insertions(+)
 create mode 100644 libavcodec/vulkan/ffv1_dec.comp
 create mode 100644 libavcodec/vulkan/ffv1_dec_rct.comp
 create mode 100644 libavcodec/vulkan/ffv1_dec_setup.comp
 create mode 100644 libavcodec/vulkan/ffv1_rct.comp
 create mode 100644 libavcodec/vulkan_ffv1.c

diff --git a/configure b/configure
index 04b83a8868..fbee82f920 100755
--- a/configure
+++ b/configure
@@ -3195,6 +3195,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox"
 av1_videotoolbox_hwaccel_select="av1_decoder"
 av1_vulkan_hwaccel_deps="vulkan"
 av1_vulkan_hwaccel_select="av1_decoder"
+ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler"
+ffv1_vulkan_hwaccel_select="ffv1_decoder"
 h263_vaapi_hwaccel_deps="vaapi"
 h263_vaapi_hwaccel_select="h263_decoder"
 h263_videotoolbox_hwaccel_deps="videotoolbox"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 74de7737f9..eb91cbb5ce 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1017,6 +1017,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL)          += vaapi_av1.o
 OBJS-$(CONFIG_AV1_VDPAU_HWACCEL)          += vdpau_av1.o
 OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL)   += videotoolbox_av1.o
 OBJS-$(CONFIG_AV1_VULKAN_HWACCEL)         += vulkan_decode.o vulkan_av1.o
+OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL)        += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o
 OBJS-$(CONFIG_H263_VAAPI_HWACCEL)         += vaapi_mpeg4.o
 OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_H264_D3D11VA_HWACCEL)       += dxva2_h264.o
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 6396f22f79..7b0740ad37 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -349,6 +349,9 @@ static int decode_slice(AVCodecContext *c, void *arg)
 static enum AVPixelFormat get_pixel_format(FFV1Context *f)
 {
     enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_FFV1_VULKAN_HWACCEL
+        AV_PIX_FMT_VULKAN,
+#endif
         f->pix_fmt,
         AV_PIX_FMT_NONE,
     };
@@ -862,6 +865,9 @@ const FFCodec ff_ffv1_decoder = {
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_USES_PROGRESSFRAMES,
     .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_FFV1_VULKAN_HWACCEL
+        HWACCEL_VULKAN(ffv1),
+#endif
         NULL
     },
 };
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 910a024032..0b2c725247 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel;
 extern const struct FFHWAccel ff_av1_vdpau_hwaccel;
 extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_av1_vulkan_hwaccel;
+extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel;
 extern const struct FFHWAccel ff_h263_vaapi_hwaccel;
 extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_h264_d3d11va_hwaccel;
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 351332ee44..e6bad486bd 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -11,6 +11,12 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER)  +=  vulkan/common.o \
 					vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \
 					vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o
 
+OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL)  +=  vulkan/common.o \
+					vulkan/rangecoder.o vulkan/ffv1_vlc.o \
+					vulkan/ffv1_common.o vulkan/ffv1_reset.o \
+					vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o \
+					vulkan/ffv1_dec_rct.o
+
 VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
 .SECONDARY: $(VULKAN:.comp=.c)
 libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index e4e983b3e2..b0adf8590e 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -26,6 +26,10 @@ layout(buffer_reference, buffer_reference_align = 1) buffer u8vec2buf {
     u8vec2 v;
 };
 
+layout(buffer_reference, buffer_reference_align = 1) buffer u8vec4buf {
+    u8vec4 v;
+};
+
 layout(buffer_reference, buffer_reference_align = 2) buffer u16buf {
     uint16_t v;
 };
@@ -182,3 +186,94 @@ uint32_t put_bytes_count(in PutBitContext pb)
     uint64_t num_bytes = (pb.buf - pb.buf_start) + ((BUF_BITS - pb.bit_left) >> 3);
     return uint32_t(num_bytes);
 }
+
+struct GetBitContext {
+    uint64_t buf_start;
+    uint64_t buf;
+    uint64_t buf_end;
+
+    uint64_t bits;
+    uint bits_valid;
+    uint size_in_bits;
+};
+
+#define LOAD64()                                       \
+    {                                                  \
+        u8vec4buf ptr = u8vec4buf(gb.buf);             \
+        uint32_t rf1 = pack32((ptr[0].v).wzyx);        \
+        uint32_t rf2 = pack32((ptr[1].v).wzyx);        \
+        gb.buf += 8;                                   \
+        gb.bits = uint64_t(rf1) << 32 | uint64_t(rf2); \
+        gb.bits_valid = 64;                            \
+    }
+
+#define RELOAD32()                                          \
+    {                                                       \
+        u8vec4buf ptr = u8vec4buf(gb.buf);                  \
+        uint32_t rf = pack32((ptr[0].v).wzyx);              \
+        gb.buf += 4;                                        \
+        gb.bits = uint64_t(rf) << (32 - gb.bits_valid) | gb.bits;    \
+        gb.bits_valid += 32;                                \
+    }
+
+void init_get_bits(inout GetBitContext gb, u8buf data, uint64_t len)
+{
+    gb.buf = gb.buf_start = uint64_t(data);
+    gb.buf_end = uint64_t(data) + len;
+    gb.size_in_bits = uint(len) * 8;
+
+    /* Preload */
+    LOAD64()
+}
+
+bool get_bit(inout GetBitContext gb)
+{
+    if (gb.bits_valid == 0)
+        LOAD64()
+
+    bool val = bool(gb.bits >> (64 - 1));
+    gb.bits <<= 1;
+    gb.bits_valid--;
+    return val;
+}
+
+uint get_bits(inout GetBitContext gb, uint n)
+{
+    if (n == 0)
+        return 0;
+
+    if (n > gb.bits_valid)
+        RELOAD32()
+
+    uint val = uint(gb.bits >> (64 - n));
+    gb.bits <<= n;
+    gb.bits_valid -= n;
+    return val;
+}
+
+uint show_bits(inout GetBitContext gb, uint n)
+{
+    if (n > gb.bits_valid)
+        RELOAD32()
+
+    return uint(gb.bits >> (64 - n));
+}
+
+void skip_bits(inout GetBitContext gb, uint n)
+{
+    if (n > gb.bits_valid)
+        RELOAD32()
+
+    gb.bits <<= n;
+    gb.bits_valid -= n;
+}
+
+uint tell_bits(in GetBitContext gb)
+{
+    return uint(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
+}
+
+uint left_bits(in GetBitContext gb)
+{
+    return gb.size_in_bits - uint(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
+}
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
index 604d03b2de..d2bd7e736e 100644
--- a/libavcodec/vulkan/ffv1_common.comp
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -22,7 +22,12 @@
 
 struct SliceContext {
     RangeCoder c;
+
+#if !defined(DECODE)
     PutBitContext pb; /* 8*8 bytes */
+#else
+    GetBitContext gb;
+#endif
 
     ivec2 slice_dim;
     ivec2 slice_pos;
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
new file mode 100644
index 0000000000..a9feb9d318
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -0,0 +1,303 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec2 get_pred(ivec2 pos, ivec2 off, int p, int comp, int sw,
+               uint8_t context_model)
+{
+    const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+    const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+
+    TYPE top2 = TYPE(0);
+    if (off.y > 1)
+        top2 = TYPE(imageLoad(dst[p], pos + ivec2(0, -2))[comp]);
+
+    VTYPE3 top  = VTYPE3(TYPE(0),
+                         TYPE(0),
+                         TYPE(0));
+    if (off.y > 0 && off != ivec2(0, 1))
+        top[0] = TYPE(imageLoad(dst[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
+    if (off.y > 0) {
+        top[1] = TYPE(imageLoad(dst[p], pos + ivec2(0, -1))[comp]);
+        top[2] = TYPE(imageLoad(dst[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+    }
+
+    VTYPE2 cur = VTYPE2(TYPE(0),
+                        TYPE(0));
+    if (off.x > 0 && off != ivec2(1, 0))
+        cur[0] = TYPE(imageLoad(dst[p], pos + ivec2(-2,  0) + yoff_border2)[comp]);
+    if (off != ivec2(0, 0))
+        cur[1] = TYPE(imageLoad(dst[p], pos + ivec2(-1,  0) + yoff_border1)[comp]);
+
+    /* context, prediction */
+    return ivec2(get_context(cur, top, top2, context_model),
+                 predict(cur[1], VTYPE2(top)));
+}
+
+void store_comp(ivec2 pos, int p, int comp, uint v)
+{
+#ifdef RGB
+    uvec4 pix = imageLoad(dst[p], pos);
+    pix[comp] = v;
+    imageStore(dst[p], pos, pix);
+#else
+    imageStore(dst[p], pos, uvec4(v));
+#endif
+}
+
+#ifndef GOLOMB
+int get_isymbol(inout RangeCoder c, uint64_t state)
+{
+    if (get_rac(c, state))
+        return 0;
+
+    state += 1;
+
+    int e = 0;
+    while (get_rac(c, state + min(e, 9))) { // 1..10
+        e++;
+        if (e > 31) {
+            corrupt = true;
+            return 0;
+        }
+    }
+
+    state += 21;
+
+    int a = 1;
+    for (int i = e - 1; i >= 0; i--)
+        a += a + int(get_rac(c, state + min(i, 9)));  // 22..31
+
+    e = -int(get_rac(c, state - 11 + min(e, 10))); // 11..21 sign
+    return (a ^ e) - e;
+}
+
+void decode_line_pcm(inout SliceContext sc, int y, int p, int comp,
+                     int bits)
+{
+    ivec2 sp = sc.slice_pos;
+    int w = sc.slice_dim.x;
+
+#ifndef RGB
+    if (p > 0 && p < 3) {
+        w >>= chroma_shift.x;
+        sp >>= chroma_shift;
+    }
+#endif
+
+    for (int x = 0; x < w; x++) {
+        uint v = 0;
+        for (int i = (bits - 1); i >= 0; i--)
+            v |= uint(get_rac_equi(sc.c)) << i;
+
+        store_comp(sp + ivec2(x, y), p, comp, v);
+    }
+}
+
+void decode_line(inout SliceContext sc, uint64_t state,
+                 int y, int p, int comp, int bits, const int run_index)
+{
+    ivec2 sp = sc.slice_pos;
+    int w = sc.slice_dim.x;
+
+#ifndef RGB
+    if (p > 0 && p < 3) {
+        w >>= chroma_shift.x;
+        sp >>= chroma_shift;
+    }
+#endif
+
+    for (int x = 0; x < w; x++) {
+        ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w,
+                            sc.quant_table_idx[p]);
+
+        int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*abs(pr[0]));
+        if (pr[0] < 0)
+            diff = -diff;
+
+        uint v = zero_extend(pr[1] + diff, bits);
+        store_comp(sp + ivec2(x, y), p, comp, v);
+    }
+}
+
+#else /* GOLOMB */
+
+void decode_line(inout SliceContext sc, uint64_t state,
+                 int y, int p, int comp, int bits, inout int run_index)
+{
+    ivec2 sp = sc.slice_pos;
+    int w = sc.slice_dim.x;
+
+#ifndef RGB
+    if (p > 0 && p < 3) {
+        w >>= chroma_shift.x;
+        sp >>= chroma_shift;
+    }
+#endif
+
+    int run_count = 0;
+    int run_mode  = 0;
+
+    for (int x = 0; x < w; x++) {
+        ivec2 pos = sp + ivec2(x, y);
+        int diff;
+        ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w,
+                            sc.quant_table_idx[p]);
+
+        VlcState sb = VlcState(state + VLC_STATE_SIZE*abs(pr[0]));
+
+        if (pr[0] == 0 && run_mode == 0)
+            run_mode = 1;
+
+        if (run_mode != 0) {
+            if (run_count == 0 && run_mode == 1) {
+                int tmp_idx = int(log2_run[run_index]);
+                if (get_bit(sc.gb)) {
+                    run_count = 1 << tmp_idx;
+                    if (x + run_count <= w)
+                        run_index++;
+                } else {
+                    if (tmp_idx != 0) {
+                        run_count = int(get_bits(sc.gb, tmp_idx));
+                    } else
+                        run_count = 0;
+
+                    if (run_index != 0)
+                        run_index--;
+                    run_mode = 2;
+                }
+            }
+
+            run_count--;
+            if (run_count < 0) {
+                run_mode  = 0;
+                run_count = 0;
+                diff = read_vlc_symbol(sc.gb, sb, bits);
+                if (diff >= 0)
+                    diff++;
+            } else {
+                diff = 0;
+            }
+        } else {
+            diff = read_vlc_symbol(sc.gb, sb, bits);
+        }
+
+        if (pr[0] < 0)
+            diff = -diff;
+
+        uint v = zero_extend(pr[1] + diff, bits);
+        store_comp(sp + ivec2(x, y), p, comp, v);
+    }
+}
+#endif
+
+void decode_slice(inout SliceContext sc, const uint slice_idx)
+{
+    int run_index = 0;
+
+#ifndef RGB
+    int bits = bits_per_raw_sample;
+#else
+    int bits = 9;
+    if (bits != 8 || sc.slice_coding_mode != 0)
+        bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
+#endif
+
+    /* PCM coding */
+#ifndef GOLOMB
+    if (sc.slice_coding_mode == 1) {
+#ifndef RGB
+        for (int p = 0; p < planes; p++) {
+            int h = sc.slice_dim.y;
+            if (p > 0 && p < 3)
+                h >>= chroma_shift.y;
+
+            for (int y = 0; y < h; y++)
+                decode_line_pcm(sc, y, p, 0, bits);
+        }
+#else
+        if (transparency == 1) {
+            for (int y = 0; y < sc.slice_dim.y; y++) {
+                decode_line_pcm(sc, y, 0, 1, bits);
+                decode_line_pcm(sc, y, 0, 2, bits);
+                decode_line_pcm(sc, y, 0, 0, bits);
+                decode_line_pcm(sc, y, 0, 3, bits);
+            }
+        } else {
+            for (int y = 0; y < sc.slice_dim.y; y++) {
+                decode_line_pcm(sc, y, 0, 1, bits);
+                decode_line_pcm(sc, y, 0, 2, bits);
+                decode_line_pcm(sc, y, 0, 0, bits);
+            }
+        }
+#endif
+    } else
+
+    /* Arithmetic coding */
+#endif
+    {
+        uint64_t slice_state_off = uint64_t(slice_state) +
+                                   slice_idx*plane_state_size*codec_planes;
+
+#ifndef RGB
+        for (int p = 0; p < planes; p++) {
+            int h = sc.slice_dim.y;
+            if (p > 0 && p < 3)
+                h >>= chroma_shift.y;
+
+            for (int y = 0; y < h; y++)
+                decode_line(sc, slice_state_off, y, p, 0, bits, run_index);
+
+            /* For the second chroma plane, reuse the first plane's state */
+            if (p != 1)
+                slice_state_off += plane_state_size;
+        }
+#else
+        if (transparency == 1) {
+            for (int y = 0; y < sc.slice_dim.y; y++) {
+                decode_line(sc, slice_state_off + plane_state_size*0,
+                            y, 0, 1, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*1,
+                            y, 0, 2, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*1,
+                            y, 0, 0, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*2,
+                            y, 0, 3, bits, run_index);
+            }
+        } else {
+            for (int y = 0; y < sc.slice_dim.y; y++) {
+                decode_line(sc, slice_state_off + plane_state_size*0,
+                            y, 0, 1, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*1,
+                            y, 0, 2, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*1,
+                            y, 0, 0, bits, run_index);
+            }
+        }
+#endif
+    }
+}
+
+void main(void)
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+    decode_slice(slice_ctx[slice_idx], slice_idx);
+}
diff --git a/libavcodec/vulkan/ffv1_dec_rct.comp b/libavcodec/vulkan/ffv1_dec_rct.comp
new file mode 100644
index 0000000000..0305dc3295
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_rct.comp
@@ -0,0 +1,72 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void bypass_block(in SliceContext sc)
+{
+    ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+    ivec2 end = sc.slice_pos + sc.slice_dim;
+
+    for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+        for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+            imageStore(dst[0], ivec2(x, y), ivec4(imageLoad(src[0], ivec2(x, y))));
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+    ivec4 pix = ivec4(imageLoad(src[0], pos));
+
+    pix.b -= offset;
+    pix.r -= offset;
+    pix.g -= (pix.b*rct_coef.y + pix.r*rct_coef.x) >> 2;
+    pix.b += pix.g;
+    pix.r += pix.g;
+
+    pix = ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+                pix[fmt_lut[2]], pix[fmt_lut[3]]);
+
+    imageStore(dst[0], pos, pix);
+    if (planar_rgb != 0) {
+        for (int i = 1; i < (3 + transparency); i++)
+            imageStore(dst[i], pos, ivec4(pix[i]));
+    }
+}
+
+void transform_block(in SliceContext sc)
+{
+    const ivec2 rct_coef = sc.slice_rct_coef;
+    const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+    const ivec2 end = sc.slice_pos + sc.slice_dim;
+
+    for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+        for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+            transform_sample(ivec2(x, y), rct_coef);
+}
+
+void main()
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+    if (slice_ctx[slice_idx].slice_coding_mode == 1)
+        bypass_block(slice_ctx[slice_idx]);
+    else
+        transform_block(slice_ctx[slice_idx]);
+}
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp
new file mode 100644
index 0000000000..a10163a8d6
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp
@@ -0,0 +1,138 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+uint get_usymbol(inout RangeCoder c, uint64_t state)
+{
+    if (get_rac(c, state + 0))
+        return 0;
+
+    int e = 0;
+    while (get_rac(c, state + 1 + min(e, 9))) { // 1..10
+        e++;
+        if (e > 31) {
+            corrupt = true;
+            return 0;
+        }
+    }
+
+    uint a = 1;
+    for (int i = e - 1; i >= 0; i--)
+        a += a + uint(get_rac(c, state + 22 + min(i, 9)));  // 22..31
+
+    return a;
+}
+
+bool decode_slice_header(inout SliceContext sc, uint64_t state)
+{
+    u8buf sb = u8buf(state);
+
+    [[unroll]]
+    for (int i = 0; i < CONTEXT_SIZE; i++)
+        sb[i].v = uint8_t(128);
+
+    uint sx = get_usymbol(sc.c, state);
+    uint sy = get_usymbol(sc.c, state);
+    uint sw = get_usymbol(sc.c, state) + 1;
+    uint sh = get_usymbol(sc.c, state) + 1;
+
+    if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 ||
+        sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) ||
+        corrupt) {
+        return true;
+    }
+
+    /* Set coordinates */
+    uint sxs = slice_coord(img_size.x, sx     , gl_NumWorkGroups.x, chroma_shift.x);
+    uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x);
+    uint sys = slice_coord(img_size.y, sy     , gl_NumWorkGroups.y, chroma_shift.y);
+    uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y);
+
+    sc.slice_pos = ivec2(sxs, sys);
+    sc.slice_dim = ivec2(sxe - sxs, sye - sys);
+    sc.slice_rct_coef = ivec2(1, 1);
+    sc.slice_coding_mode = int(0);
+
+    for (uint i = 0; i < codec_planes; i++) {
+        uint idx = get_usymbol(sc.c, state);
+        if (idx >= quant_table_count)
+            return true;
+        sc.quant_table_idx[i] = uint8_t(idx);
+        sc.context_count = context_count[idx];
+    }
+
+    get_usymbol(sc.c, state);
+    get_usymbol(sc.c, state);
+    get_usymbol(sc.c, state);
+
+    if (version >= 4) {
+        sc.slice_reset_contexts = get_rac(sc.c, state);
+        sc.slice_coding_mode = get_usymbol(sc.c, state);
+        if (sc.slice_coding_mode != 1 && colorspace == 1) {
+            sc.slice_rct_coef.x = int(get_usymbol(sc.c, state));
+            sc.slice_rct_coef.y = int(get_usymbol(sc.c, state));
+            if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4)
+                return true;
+        }
+    }
+
+    return false;
+}
+
+void golomb_init(inout SliceContext sc, uint64_t state)
+{
+    if (version == 3 && micro_version > 1 || version > 3) {
+        u8buf(state).v = uint8_t(129);
+        get_rac(sc.c, state);
+    }
+
+    uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
+    init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
+                  sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count);
+}
+
+void main(void)
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+    uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
+
+    u8buf bs = u8buf(slice_data + slice_offsets[2*slice_idx + 0]);
+    uint32_t slice_size = slice_offsets[2*slice_idx + 1];
+
+    rac_init_dec(slice_ctx[slice_idx].c,
+                 bs, slice_size);
+
+    if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1))
+        get_rac_equi(slice_ctx[slice_idx].c);
+
+    decode_slice_header(slice_ctx[slice_idx], scratch_state);
+
+    if (golomb == 1)
+        golomb_init(slice_ctx[slice_idx], scratch_state);
+
+    if (ec != 0 && check_crc != 0) {
+        uint32_t crc = crcref;
+        for (int i = 0; i < slice_size; i++)
+            crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
+
+        slice_crc_mismatch[slice_idx] = crc;
+    }
+}
diff --git a/libavcodec/vulkan/ffv1_rct.comp b/libavcodec/vulkan/ffv1_rct.comp
new file mode 100644
index 0000000000..b10bb47132
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_rct.comp
@@ -0,0 +1,90 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec4 load_components(ivec2 pos)
+{
+    ivec4 pix = ivec4(imageLoad(src[0], pos));
+    if (planar_rgb != 0) {
+        for (int i = 1; i < (3 + transparency); i++)
+            pix[i] = int(imageLoad(src[i], pos)[0]);
+    }
+
+    return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+                 pix[fmt_lut[2]], pix[fmt_lut[3]]);
+}
+
+void bypass_sample(ivec2 pos)
+{
+    imageStore(dst[0], pos, load_components(pos));
+}
+
+void bypass_block(in SliceContext sc)
+{
+    ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+    ivec2 end = sc.slice_pos + sc.slice_dim;
+    for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+        for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+            bypass_sample(ivec2(x, y));
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+    ivec4 pix = load_components(pos);
+    pix.b -= offset;
+    pix.r -= offset;
+    pix.g -= (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+    pix.b += pix.g;
+    pix.r += pix.g;
+    imageStore(dst[0], pos, pix);
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+    ivec4 pix = load_components(pos);
+    pix.b -= pix.g;
+    pix.r -= pix.g;
+    pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+    pix.b += offset;
+    pix.r += offset;
+    imageStore(dst[0], pos, pix);
+}
+
+void transform_block(in SliceContext sc)
+{
+    const ivec2 rct_coef = sc.slice_rct_coef;
+    const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+    const ivec2 end = sc.slice_pos + sc.slice_dim;
+
+    for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+        for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+            transform_sample(ivec2(x, y), rct_coef);
+}
+
+void main()
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+    if (slice_ctx[slice_idx].slice_coding_mode == 1)
+        bypass_block(slice_ctx[slice_idx]);
+    else
+        transform_block(slice_ctx[slice_idx]);
+}
diff --git a/libavcodec/vulkan/ffv1_vlc.comp b/libavcodec/vulkan/ffv1_vlc.comp
index 0a53e035b5..d374e5a069 100644
--- a/libavcodec/vulkan/ffv1_vlc.comp
+++ b/libavcodec/vulkan/ffv1_vlc.comp
@@ -120,3 +120,40 @@ Symbol get_vlc_symbol(inout VlcState state, int v, int bits)
 
     return set_sr_golomb(code, k, 12, bits);
 }
+
+uint get_ur_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+{
+    for (uint i = 0; i < 12; i++)
+        if (get_bit(gb))
+            return get_bits(gb, k) + (i << k);
+
+    return get_bits(gb, esc_len) + 11;
+}
+
+int get_sr_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+{
+    int v = int(get_ur_golomb(gb, k, limit, esc_len));
+    return (v >> 1) ^ -(v & 1);
+}
+
+int read_vlc_symbol(inout GetBitContext gb, inout VlcState state, int bits)
+{
+    int k, i, v, ret;
+
+    i = state.count;
+    k = 0;
+    while (i < state.error_sum) { // FIXME: optimize
+        k++;
+        i += i;
+    }
+
+    v = get_sr_golomb(gb, k, 12, bits);
+
+    v ^= ((2 * state.drift + state.count) >> 31);
+
+    ret = fold(v + state.bias, bits);
+
+    update_vlc_state(state, v);
+
+    return ret;
+}
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 6e3b9c1238..8c8d0d9d9c 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -191,3 +191,77 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size)
     r.outstanding_count = uint16_t(0);
     r.outstanding_byte = uint8_t(0xFF);
 }
+
+/* Decoder */
+uint overread;
+bool corrupt;
+
+void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size)
+{
+    overread = 0;
+    corrupt = false;
+
+    /* Skip priming bytes */
+    rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2);
+
+    u8vec2 prime = u8vec2buf(data).v;
+    /* Switch endianess of the priming bytes */
+    r.low = pack16(prime.yx);
+
+    if (r.low >= 0xFF00) {
+        r.low = 0xFF00;
+        r.bytestream_end = uint64_t(data) + 2;
+    }
+}
+
+void refill(inout RangeCoder c)
+{
+    c.range <<= 8;
+    c.low   <<= 8;
+    if (c.bytestream < c.bytestream_end) {
+        c.low += u8buf(c.bytestream).v;
+        c.bytestream++;
+    } else {
+        overread++;
+    }
+}
+
+bool get_rac(inout RangeCoder c, uint64_t state)
+{
+    u8buf sb = u8buf(state);
+    uint val = uint(sb.v);
+    uint16_t range1 = uint16_t((uint(c.range) * val) >> 8);
+
+    c.range -= range1;
+
+    bool bit = c.low >= c.range;
+    sb.v = zero_one_state[(uint(bit) << 8) + val];
+
+    if (bit) {
+        c.low -= c.range;
+        c.range = range1;
+    }
+
+    if (c.range < 0x100)
+        refill(c);
+
+    return bit;
+}
+
+bool get_rac_equi(inout RangeCoder c)
+{
+    uint16_t range1 = c.range >> 1;
+
+    c.range -= range1;
+
+    bool bit = c.low >= c.range;
+    if (bit) {
+        c.low -= c.range;
+        c.range = range1;
+    }
+
+    if (c.range < 0x100)
+        refill(c);
+
+    return bit;
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index cd77e10e12..bc850a7333 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -36,6 +36,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc;
 #if CONFIG_AV1_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
 #endif
+#if CONFIG_FFV1_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
+#endif
 
 static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_H264_VULKAN_HWACCEL
@@ -47,6 +50,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_AV1_VULKAN_HWACCEL
     &ff_vk_dec_av1_desc,
 #endif
+#if CONFIG_FFV1_VULKAN_HWACCEL
+    &ff_vk_dec_ffv1_desc,
+#endif
 };
 
 static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id)
@@ -1035,6 +1041,17 @@ int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
         frames_ctx->free        = free_profile_data;
 
         hwfc->create_pnext = &prof->profile_list;
+    } else {
+        switch (frames_ctx->sw_format) {
+        case AV_PIX_FMT_GBRAP16:
+            frames_ctx->sw_format = AV_PIX_FMT_RGBA64;
+            break;
+        case AV_PIX_FMT_BGR0:
+            frames_ctx->sw_format = AV_PIX_FMT_RGB0;
+            break;
+        default:
+            break;
+        }
     }
 
     frames_ctx->width  = avctx->coded_width;
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
new file mode 100644
index 0000000000..276514a228
--- /dev/null
+++ b/libavcodec/vulkan_ffv1.c
@@ -0,0 +1,1292 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+
+#include "ffv1.h"
+#include "ffv1_vulkan.h"
+#include "libavutil/vulkan_spirv.h"
+#include "libavutil/mem.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_rangecoder_comp;
+extern const char *ff_source_ffv1_vlc_comp;
+extern const char *ff_source_ffv1_common_comp;
+extern const char *ff_source_ffv1_dec_setup_comp;
+extern const char *ff_source_ffv1_reset_comp;
+extern const char *ff_source_ffv1_dec_comp;
+extern const char *ff_source_ffv1_dec_rct_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
+    .codec_id         = AV_CODEC_ID_FFV1,
+    .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
+    .queue_flags      = VK_QUEUE_COMPUTE_BIT,
+};
+
+#define HOST_MAP
+
+typedef struct FFv1VulkanDecodePicture {
+    FFVulkanDecodePicture vp;
+
+    AVBufferRef *tmp_data;
+
+    AVBufferRef *slice_state;
+    uint32_t plane_state_size;
+    uint32_t slice_state_size;
+    uint32_t slice_data_size;
+    uint32_t max_context_count;
+
+    AVBufferRef *slice_offset_buf;
+    uint32_t    *slice_offset;
+    int          slice_num;
+
+    AVBufferRef *slice_status_buf;
+    int crc_checked;
+} FFv1VulkanDecodePicture;
+
+typedef struct FFv1VulkanDecodeContext {
+    AVBufferRef *intermediate_frames_ref[2]; /* 16/32 bit */
+
+    FFVulkanShader setup;
+    FFVulkanShader reset[2]; /* AC/Golomb */
+    FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */
+    FFVulkanShader rct[2]; /* 16/32 bit */
+
+    FFVkBuffer rangecoder_static_buf;
+    FFVkBuffer quant_buf;
+    FFVkBuffer crc_tab_buf;
+
+    AVBufferPool *slice_state_pool;
+    AVBufferPool *tmp_data_pool;
+    AVBufferPool *slice_offset_pool;
+    AVBufferPool *slice_status_pool;
+} FFv1VulkanDecodeContext;
+
+typedef struct FFv1VkParameters {
+    uint32_t context_count[MAX_QUANT_TABLES];
+
+    VkDeviceAddress slice_data;
+    VkDeviceAddress slice_state;
+    VkDeviceAddress scratch_data;
+
+    uint32_t img_size[2];
+    uint32_t chroma_shift[2];
+
+    uint32_t plane_state_size;
+    uint32_t crcref;
+
+    uint8_t bits_per_raw_sample;
+    uint8_t quant_table_count;
+    uint8_t version;
+    uint8_t micro_version;
+    uint8_t key_frame;
+    uint8_t planes;
+    uint8_t codec_planes;
+    uint8_t transparency;
+    uint8_t colorspace;
+    uint8_t ec;
+    uint8_t golomb;
+    uint8_t check_crc;
+} FFv1VkParameters;
+
+static void add_push_data(FFVulkanShader *shd)
+{
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {  );
+    GLSLF(1,    uint context_count[%i];                             ,MAX_QUANT_TABLES);
+    GLSLC(0,                                                        );
+    GLSLC(1,    u8buf slice_data;                                   );
+    GLSLC(1,    u8buf slice_state;                                  );
+    GLSLC(1,    u8buf scratch_data;                                 );
+    GLSLC(0,                                                        );
+    GLSLC(1,    uvec2 img_size;                                     );
+    GLSLC(1,    uvec2 chroma_shift;                                 );
+    GLSLC(0,                                                        );
+    GLSLC(1,    uint plane_state_size;                              );
+    GLSLC(1,    uint32_t crcref;                                    );
+    GLSLC(0,                                                        );
+    GLSLC(1,    uint8_t bits_per_raw_sample;                        );
+    GLSLC(1,    uint8_t quant_table_count;                          );
+    GLSLC(1,    uint8_t version;                                    );
+    GLSLC(1,    uint8_t micro_version;                              );
+    GLSLC(1,    uint8_t key_frame;                                  );
+    GLSLC(1,    uint8_t planes;                                     );
+    GLSLC(1,    uint8_t codec_planes;                               );
+    GLSLC(1,    uint8_t transparency;                               );
+    GLSLC(1,    uint8_t colorspace;                                 );
+    GLSLC(1,    uint8_t ec;                                         );
+    GLSLC(1,    uint8_t golomb;                                     );
+    GLSLC(1,    uint8_t check_crc;                                  );
+    GLSLC(0, };                                                     );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static int vk_ffv1_start_frame(AVCodecContext          *avctx,
+                               av_unused const uint8_t *buffer,
+                               av_unused uint32_t       size)
+{
+    int err;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+    FFV1Context *f = avctx->priv_data;
+
+    FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &fp->vp;
+
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+    enum AVPixelFormat sw_format = hwfc->sw_format;
+
+    int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
+                 !(sw_format == AV_PIX_FMT_YA8);
+
+    fp->slice_num = 0;
+
+    for (int i = 0; i < f->quant_table_count; i++)
+        fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count);
+
+    /* Allocate slice buffer data */
+    if (f->ac == AC_GOLOMB_RICE)
+        fp->plane_state_size = 8;
+    else
+        fp->plane_state_size = CONTEXT_SIZE;
+
+    fp->plane_state_size *= fp->max_context_count;
+    fp->slice_state_size = fp->plane_state_size*f->plane_count;
+
+    fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */
+    fp->slice_state_size += fp->slice_data_size;
+    fp->slice_state_size = FFALIGN(fp->slice_state_size, 8);
+
+    fp->crc_checked = f->ec && (avctx->err_recognition & AV_EF_CRCCHECK);
+
+    /* Host map the input slices data if supported */
+    if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+        err = ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, f->pkt_ref->data,
+                                    f->pkt_ref->buf,
+                                    VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                    VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+        if (err < 0)
+            return err;
+    }
+
+    /* Allocate slice state data */
+    if (f->picture.f->flags & AV_FRAME_FLAG_KEY) {
+        err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool,
+                                      &fp->slice_state,
+                                      VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                      VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                      NULL, f->max_slice_count*fp->slice_state_size,
+                                      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+        if (err < 0)
+            return err;
+    } else {
+        FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
+        fp->slice_state = av_buffer_ref(fpl->slice_state);
+        if (!fp->slice_state)
+            return AVERROR(ENOMEM);
+    }
+
+    /* Allocate temporary data buffer */
+    err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool,
+                                  &fp->tmp_data,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, f->max_slice_count*CONTEXT_SIZE,
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+    if (err < 0)
+        return err;
+
+    /* Allocate slice offsets buffer */
+    err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool,
+                                  &fp->slice_offset_buf,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, 2*f->max_slice_count*sizeof(uint32_t),
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+    if (err < 0)
+        return err;
+
+    /* Allocate slice status buffer */
+    err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_status_pool,
+                                  &fp->slice_status_buf,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, f->max_slice_count*sizeof(uint32_t),
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+    if (err < 0)
+        return err;
+
+    /* Prepare frame to be used */
+    err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1,
+                                         FF_VK_REP_NATIVE, 0);
+    if (err < 0)
+        return err;
+
+    /* Create a temporaty frame for RGB */
+    if (is_rgb) {
+        AVHWFramesContext *dpb_hwfc;
+        dpb_hwfc = (AVHWFramesContext *)fv->intermediate_frames_ref[f->use32bit]->data;
+
+        vp->dpb_frame = av_frame_alloc();
+        if (!vp->dpb_frame)
+            return AVERROR(ENOMEM);
+
+        err = av_hwframe_get_buffer(fv->intermediate_frames_ref[f->use32bit],
+                                    vp->dpb_frame, 0);
+        if (err < 0)
+            return err;
+
+        err = ff_vk_decode_prepare_frame_sdr(dec, vp->dpb_frame, vp, 1,
+                                             FF_VK_REP_NATIVE, 0);
+        if (err < 0)
+            return err;
+
+        for (int i = 0; i < av_pix_fmt_count_planes(dpb_hwfc->sw_format); i++) {
+            err = ff_vk_create_imageview(&ctx->s,
+                                         &vp->view.dst[i], &vp->view.aspect_ref[i],
+                                         vp->dpb_frame,
+                                         i, FF_VK_REP_NATIVE);
+            if (err < 0)
+                return err;
+        }
+    }
+
+    return 0;
+}
+
+static int vk_ffv1_decode_slice(AVCodecContext *avctx,
+                                const uint8_t  *data,
+                                uint32_t        size)
+{
+    FFV1Context *f = avctx->priv_data;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+
+    FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+    FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+
+    if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+        AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 0)*sizeof(uint32_t),
+                data - f->pkt_ref->data);
+        AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 1)*sizeof(uint32_t),
+                size);
+        fp->slice_num++;
+    } else {
+        FFVulkanDecodePicture *vp = &fp->vp;
+        int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+                                         &fp->slice_num,
+                                         (const uint32_t **)&fp->slice_offset);
+        if (err < 0)
+            return err;
+
+        AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 0)*sizeof(uint32_t),
+                fp->slice_offset[fp->slice_num - 1]);
+        AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 1)*sizeof(uint32_t),
+                size);
+    }
+
+    return 0;
+}
+
+static int vk_ffv1_end_frame(AVCodecContext *avctx)
+{
+    int err;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    FFV1Context *f = avctx->priv_data;
+    FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+    FFv1VkParameters pd;
+    FFv1VkResetParameters pd_reset;
+
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+    enum AVPixelFormat sw_format = hwfc->sw_format;
+
+    int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8;
+    int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
+                 !(sw_format == AV_PIX_FMT_YA8);
+
+    FFVulkanShader *reset_shader;
+    FFVulkanShader *decode_shader;
+
+    FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &fp->vp;
+
+    FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+    FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data;
+    FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+    FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
+
+    FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data;
+
+    AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f;
+    VkImageView *decode_dst_view = is_rgb ? vp->view.dst : vp->view.out;
+
+    VkImageMemoryBarrier2 img_bar[37];
+    int nb_img_bar = 0;
+    VkBufferMemoryBarrier2 buf_bar[8];
+    int nb_buf_bar = 0;
+
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+    ff_vk_exec_start(&ctx->s, exec);
+
+    /* Prepare deps */
+    RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f,
+                                 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+    err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+                                      f->picture.f);
+    if (err < 0)
+        return err;
+
+    if (is_rgb)
+        RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
+                                     VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                     VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+    if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
+        FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
+        FFVulkanDecodePicture *vpl = &fpl->vp;
+
+        /* Wait on the previous frame */
+        RET(ff_vk_exec_add_dep_wait_sem(&ctx->s, exec, vpl->sem, vpl->sem_value,
+                                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
+    }
+
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1));
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_status_buf, 1, 1));
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
+    vp->slices_buf = NULL;
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0));
+    fp->slice_offset_buf = NULL;
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0));
+    fp->tmp_data = NULL;
+
+    /* Entry barrier for the slice state */
+    if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
+        buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+            .srcStageMask = slice_state->stage,
+            .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+            .srcAccessMask = slice_state->access,
+            .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = slice_state->buf,
+            .offset = 0,
+            .size = VK_WHOLE_SIZE,
+        };
+    }
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+    });
+    if (nb_buf_bar) {
+        slice_state->stage = buf_bar[1].dstStageMask;
+        slice_state->access = buf_bar[1].dstAccessMask;
+        nb_buf_bar = 0;
+    }
+
+    /* Setup shader */
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+                                    1, 0, 0,
+                                    slice_state,
+                                    0, fp->slice_data_size*f->slice_count,
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+                                    1, 1, 0,
+                                    slice_offset,
+                                    0, 2*f->slice_count*sizeof(uint32_t),
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+                                    1, 2, 0,
+                                    slice_status,
+                                    0, f->slice_count*sizeof(uint32_t),
+                                    VK_FORMAT_UNDEFINED);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
+    pd = (FFv1VkParameters) {
+        /* context_count */
+
+        .slice_data = slices_buf->address,
+        .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
+        .scratch_data = tmp_data->address,
+
+        .img_size[0] = f->picture.f->width,
+        .img_size[1] = f->picture.f->height,
+        .chroma_shift[0] = f->chroma_h_shift,
+        .chroma_shift[1] = f->chroma_v_shift,
+
+        .plane_state_size = fp->plane_state_size,
+        .crcref = f->crcref,
+
+        .bits_per_raw_sample = bits,
+        .quant_table_count = f->quant_table_count,
+        .version = f->version,
+        .micro_version = f->micro_version,
+        .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
+        .planes = av_pix_fmt_count_planes(sw_format),
+        .codec_planes = f->plane_count,
+        .transparency = f->transparency,
+        .colorspace = f->colorspace,
+        .ec = f->ec,
+        .golomb = f->ac == AC_GOLOMB_RICE,
+        .check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
+    };
+    for (int i = 0; i < MAX_QUANT_TABLES; i++)
+        pd.context_count[i] = f->context_count[i];
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+
+    /* Reset shader */
+    reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE];
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader,
+                                    1, 0, 0,
+                                    slice_state,
+                                    0, fp->slice_data_size*f->slice_count,
+                                    VK_FORMAT_UNDEFINED);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader);
+
+    pd_reset = (FFv1VkResetParameters) {
+        .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
+        .plane_state_size = fp->plane_state_size,
+        .context_count = fp->max_context_count,
+        .codec_planes = f->plane_count,
+        .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
+        .version = f->version,
+        .micro_version = f->micro_version,
+    };
+    ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd_reset), &pd_reset);
+
+    /* Sync between setup and reset shaders */
+    buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+        .srcStageMask = slice_state->stage,
+        .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+        .srcAccessMask = slice_state->access,
+        .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+                         VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = slice_state->buf,
+        .offset = 0,
+        .size = fp->slice_data_size*f->slice_count,
+    };
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+    });
+    slice_state->stage = buf_bar[0].dstStageMask;
+    slice_state->access = buf_bar[0].dstAccessMask;
+    nb_buf_bar = 0;
+
+    vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices,
+                    f->plane_count);
+
+    /* Decode */
+    decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb];
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
+                                    1, 0, 0,
+                                    slice_state,
+                                    0, fp->slice_data_size*f->slice_count,
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
+                                  decode_dst, decode_dst_view,
+                                  1, 1,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
+    ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    /* Sync between reset and decode shaders */
+    buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+        .srcStageMask = slice_state->stage,
+        .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+        .srcAccessMask = slice_state->access,
+        .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+                         VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = slice_state->buf,
+        .offset = fp->slice_data_size*f->slice_count,
+        .size = slice_state->size - fp->slice_data_size*f->slice_count,
+    };
+
+    /* Input frame barrier */
+    ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pImageMemoryBarriers = img_bar,
+        .imageMemoryBarrierCount = nb_img_bar,
+        .pBufferMemoryBarriers = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+    });
+    slice_state->stage = buf_bar[0].dstStageMask;
+    slice_state->access = buf_bar[0].dstAccessMask;
+    nb_img_bar = 0;
+    nb_buf_bar = 0;
+
+    vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+
+    /* RCT */
+    if (is_rgb) {
+        FFVulkanShader *rct_shader = &fv->rct[f->use32bit];
+        FFv1VkRCTParameters pd_rct;
+
+        ff_vk_shader_update_desc_buffer(&ctx->s, exec, rct_shader,
+                                        1, 0, 0,
+                                        slice_state,
+                                        0, fp->slice_data_size*f->slice_count,
+                                        VK_FORMAT_UNDEFINED);
+        ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
+                                      decode_dst, decode_dst_view,
+                                      1, 1,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      VK_NULL_HANDLE);
+        ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
+                                      f->picture.f, vp->view.out,
+                                      1, 2,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      VK_NULL_HANDLE);
+
+        ff_vk_exec_bind_shader(&ctx->s, exec, rct_shader);
+
+        pd_rct = (FFv1VkRCTParameters) {
+            .offset = 1 << bits,
+            .bits = bits,
+            .planar_rgb = ff_vk_mt_is_np_rgb(sw_format) &&
+                          (ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1),
+            .transparency = f->transparency,
+        };
+        ff_vk_set_perm(sw_format, pd_rct.fmt_lut);
+
+        ff_vk_shader_update_push_const(&ctx->s, exec, rct_shader,
+                                       VK_SHADER_STAGE_COMPUTE_BIT,
+                                       0, sizeof(pd_rct), &pd_rct);
+
+        ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
+                            VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                            VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                            VK_ACCESS_SHADER_READ_BIT,
+                            VK_IMAGE_LAYOUT_GENERAL,
+                            VK_QUEUE_FAMILY_IGNORED);
+        ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar,
+                            VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                            VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                            VK_ACCESS_SHADER_WRITE_BIT,
+                            VK_IMAGE_LAYOUT_GENERAL,
+                            VK_QUEUE_FAMILY_IGNORED);
+
+        vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_img_bar,
+        });
+        nb_img_bar = 0;
+
+        vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+    }
+
+    err = ff_vk_exec_submit(&ctx->s, exec);
+    if (err < 0)
+        return err;
+
+fail:
+    return 0;
+}
+
+static void define_shared_code(FFVulkanShader *shd, int use32bit)
+{
+    int smp_bits = use32bit ? 32 : 16;
+
+    GLSLC(0, #define DECODE                                              );
+
+    av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n"                    ,CONTEXT_SIZE);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n"          ,MAX_QUANT_TABLE_MASK);
+
+    GLSLF(0, #define TYPE int%i_t                                        ,smp_bits);
+    GLSLF(0, #define VTYPE2 i%ivec2                                      ,smp_bits);
+    GLSLF(0, #define VTYPE3 i%ivec3                                      ,smp_bits);
+    GLSLD(ff_source_rangecoder_comp);
+    GLSLD(ff_source_ffv1_common_comp);
+}
+
+static int init_setup_shader(FFV1Context *f, FFVulkanContext *s,
+                             FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                             FFVulkanShader *shd)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+
+    RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          1, 1, 1,
+                          0));
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    add_push_data(shd);
+
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+    av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "rangecoder_static_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t zero_one_state[512];",
+        },
+        {
+            .name        = "crc_ieee_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint32_t crc_ieee[256];",
+        },
+        {
+            .name        = "quant_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+                           "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+        },
+    };
+
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0));
+
+    define_shared_code(shd, 0 /* Irrelevant */);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "slice_data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .buf_content = "SliceContext slice_ctx",
+            .buf_elems   = f->max_slice_count,
+        },
+        {
+            .name        = "slice_offsets_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_quali   = "readonly",
+            .buf_content = "uint32_t slice_offsets",
+            .buf_elems   = 2*f->max_slice_count,
+        },
+        {
+            .name        = "slice_status_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_quali   = "writeonly",
+            .buf_content = "uint32_t slice_crc_mismatch",
+            .buf_elems   = 2*f->max_slice_count,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
+
+    GLSLD(ff_source_ffv1_dec_setup_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
+                             FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                             FFVulkanShader *shd, int ac)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+    int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
+
+    RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          wg_dim, 1, 1,
+                          0));
+
+    if (ac == AC_GOLOMB_RICE)
+        av_bprintf(&shd->src, "#define GOLOMB\n");
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
+    GLSLC(1,    u8buf slice_state;                                             );
+    GLSLC(1,    uint plane_state_size;                                         );
+    GLSLC(1,    uint context_count;                                            );
+    GLSLC(1,    uint8_t codec_planes;                                          );
+    GLSLC(1,    uint8_t key_frame;                                             );
+    GLSLC(1,    uint8_t version;                                               );
+    GLSLC(1,    uint8_t micro_version;                                         );
+    GLSLC(1,    uint8_t padding[1];                                            );
+    GLSLC(0, };                                                                );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+    av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "rangecoder_static_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t zero_one_state[512];",
+        },
+        {
+            .name        = "quant_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+                           "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+    define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */);
+    if (ac == AC_GOLOMB_RICE)
+        GLSLD(ff_source_ffv1_vlc_comp);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "slice_data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .mem_quali   = "readonly",
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .buf_content = "SliceContext slice_ctx",
+            .buf_elems   = f->max_slice_count,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
+
+    GLSLD(ff_source_ffv1_reset_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
+                              FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                              FFVulkanShader *shd, AVHWFramesContext *frames_ctx,
+                              int use32bit, int ac, int rgb)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+
+    RET(ff_vk_shader_init(s, shd, "ffv1_dec",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          1, 1, 1,
+                          0));
+
+    if (ac == AC_GOLOMB_RICE)
+        av_bprintf(&shd->src, "#define GOLOMB\n");
+
+    if (rgb)
+        av_bprintf(&shd->src, "#define RGB\n");
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    add_push_data(shd);
+
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+    av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "rangecoder_static_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t zero_one_state[512];",
+        },
+        {
+            .name        = "quant_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+                           "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+        },
+    };
+
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+    define_shared_code(shd, use32bit);
+    if (ac == AC_GOLOMB_RICE)
+        GLSLD(ff_source_ffv1_vlc_comp);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "slice_data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .buf_content = "SliceContext slice_ctx",
+            .buf_elems   = f->max_slice_count,
+        },
+        {
+            .name       = "dst",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .dimensions = 2,
+            .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
+                                               FF_VK_REP_NATIVE),
+            .elems      = av_pix_fmt_count_planes(frames_ctx->sw_format),
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
+
+    GLSLD(ff_source_ffv1_dec_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static int init_rct_shader(FFV1Context *f, FFVulkanContext *s,
+                           FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                           FFVulkanShader *shd, int use32bit,
+                           AVHWFramesContext *src_ctx, AVHWFramesContext *dst_ctx)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+    int wg_count = sqrt(s->props.properties.limits.maxComputeWorkGroupInvocations);
+
+    RET(ff_vk_shader_init(s, shd, "ffv1_rct",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          wg_count, wg_count, 1,
+                          0));
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
+    GLSLC(1,    ivec4 fmt_lut;                                                 );
+    GLSLC(1,    int offset;                                                    );
+    GLSLC(1,    uint8_t bits;                                                  );
+    GLSLC(1,    uint8_t planar_rgb;                                            );
+    GLSLC(1,    uint8_t transparency;                                          );
+    GLSLC(1,    uint8_t version;                                               );
+    GLSLC(1,    uint8_t micro_version;                                         );
+    GLSLC(1,    uint8_t padding[3];                                            );
+    GLSLC(0, };                                                                );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+    av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "rangecoder_static_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t zero_one_state[512];",
+        },
+        {
+            .name        = "quant_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+                           "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+    define_shared_code(shd, use32bit);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "slice_data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .mem_quali   = "readonly",
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .buf_content = "SliceContext slice_ctx",
+            .buf_elems   = f->max_slice_count,
+        },
+        {
+            .name       = "src",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .dimensions = 2,
+            .mem_layout = ff_vk_shader_rep_fmt(src_ctx->sw_format,
+                                               FF_VK_REP_NATIVE),
+            .mem_quali  = "readonly",
+            .elems      = av_pix_fmt_count_planes(src_ctx->sw_format),
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+        {
+            .name       = "dst",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .dimensions = 2,
+            .mem_layout = ff_vk_shader_rep_fmt(dst_ctx->sw_format,
+                                               FF_VK_REP_NATIVE),
+            .mem_quali  = "writeonly",
+            .elems      = av_pix_fmt_count_planes(dst_ctx->sw_format),
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
+
+    GLSLD(ff_source_ffv1_dec_rct_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
+                         AVBufferRef **dst, enum AVPixelFormat sw_format)
+{
+    int err;
+    AVHWFramesContext *frames_ctx;
+    AVVulkanFramesContext *vk_frames;
+
+    *dst = av_hwframe_ctx_alloc(s->device_ref);
+    if (!(*dst))
+        return AVERROR(ENOMEM);
+
+    frames_ctx = (AVHWFramesContext *)((*dst)->data);
+    frames_ctx->format    = AV_PIX_FMT_VULKAN;
+    frames_ctx->sw_format = sw_format;
+    frames_ctx->width     = FFALIGN(s->frames->width, 32);
+    frames_ctx->height    = FFALIGN(s->frames->height, 32);
+
+    vk_frames = frames_ctx->hwctx;
+    vk_frames->tiling    = VK_IMAGE_TILING_OPTIMAL;
+    vk_frames->usage     = VK_IMAGE_USAGE_STORAGE_BIT;
+    vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+
+    err = av_hwframe_ctx_init(*dst);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
+               av_get_pix_fmt_name(sw_format), av_err2str(err));
+        av_buffer_unref(dst);
+        return err;
+    }
+
+    return 0;
+}
+
+static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
+{
+    FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+
+    ff_vk_shader_free(&ctx->s, &fv->setup);
+
+    for (int i = 0; i < 2; i++) /* 16/32 bit */
+        av_buffer_unref(&fv->intermediate_frames_ref[i]);
+
+    for (int i = 0; i < 2; i++) /* AC/Golomb */
+        ff_vk_shader_free(&ctx->s, &fv->reset[i]);
+
+    for (int i = 0; i < 2; i++) /* 16/32 bit */
+        for (int j = 0; j < 2; j++) /* AC/Golomb */
+            for (int k = 0; k < 2; k++) /* Normal/RGB */
+                ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]);
+
+    for (int i = 0; i < 2; i++) /* 16/32 bit */
+        ff_vk_shader_free(&ctx->s, &fv->rct[i]);
+
+    ff_vk_free_buf(&ctx->s, &fv->quant_buf);
+    ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
+    ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
+
+    av_buffer_pool_uninit(&fv->tmp_data_pool);
+    av_buffer_pool_uninit(&fv->slice_state_pool);
+    av_buffer_pool_uninit(&fv->slice_offset_pool);
+    av_buffer_pool_uninit(&fv->slice_status_pool);
+}
+
+static int vk_decode_ffv1_init(AVCodecContext *avctx)
+{
+    int err;
+    FFV1Context *f = avctx->priv_data;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = NULL;
+    FFv1VulkanDecodeContext *fv;
+    FFVkSPIRVCompiler *spv;
+
+    if (f->version < 3)
+        return AVERROR(ENOTSUP);
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    err = ff_vk_decode_init(avctx);
+    if (err < 0)
+        return err;
+    ctx = dec->shared_ctx;
+
+    fv = ctx->sd_ctx = av_mallocz(sizeof(*fv));
+    if (!fv) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    ctx->sd_ctx_free = &vk_decode_ffv1_uninit;
+
+    /* Intermediate frame pool for RCT */
+    for (int i = 0; i < 2; i++) { /* 16/32 bit */
+        err = init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref[i],
+                            i ? AV_PIX_FMT_RGBA128 : AV_PIX_FMT_RGBA64);
+        if (err < 0)
+            return err;
+    }
+
+    /* Setup shader */
+    err = init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup);
+    if (err < 0)
+        return err;
+
+    /* Reset shaders */
+    for (int i = 0; i < 2; i++) { /* AC/Golomb */
+        err = init_reset_shader(f, &ctx->s, &ctx->exec_pool,
+                                spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0);
+        if (err < 0)
+            return err;
+    }
+
+    /* Decode shaders */
+    for (int i = 0; i < 2; i++) { /* 16/32 bit */
+        for (int j = 0; j < 2; j++) { /* AC/Golomb */
+            for (int k = 0; k < 2; k++) { /* Normal/RGB */
+                AVHWFramesContext *frames_ctx;
+                frames_ctx = k ? (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data :
+                                 (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+                err = init_decode_shader(f, &ctx->s, &ctx->exec_pool,
+                                         spv, &fv->decode[i][j][k],
+                                         frames_ctx,
+                                         i,
+                                         !j ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE,
+                                         k);
+                if (err < 0)
+                    return err;
+            }
+        }
+    }
+
+    /* RCT shaders */
+    for (int i = 0; i < 2; i++) { /* 16/32 bit */
+        err = init_rct_shader(f, &ctx->s, &ctx->exec_pool,
+                              spv, &fv->rct[i], i,
+                              (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data,
+                              (AVHWFramesContext *)avctx->hw_frames_ctx->data);
+        if (err < 0)
+            return err;
+    }
+
+    /* Range coder data */
+    err = ff_ffv1_vk_init_state_transition_data(&ctx->s,
+                                                &fv->rangecoder_static_buf,
+                                                f);
+    if (err < 0)
+        return err;
+
+    /* Quantization table data */
+    err = ff_ffv1_vk_init_quant_table_data(&ctx->s,
+                                           &fv->quant_buf,
+                                           f);
+    if (err < 0)
+        return err;
+
+    /* CRC table buffer */
+    err = ff_ffv1_vk_init_crc_table_data(&ctx->s,
+                                         &fv->crc_tab_buf,
+                                         f);
+    if (err < 0)
+        return err;
+
+    /* Update setup global descriptors */
+    RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                        &fv->setup, 0, 0, 0,
+                                        &fv->rangecoder_static_buf,
+                                        0, fv->rangecoder_static_buf.size,
+                                        VK_FORMAT_UNDEFINED));
+    RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                        &fv->setup, 0, 1, 0,
+                                        &fv->crc_tab_buf,
+                                        0, fv->crc_tab_buf.size,
+                                        VK_FORMAT_UNDEFINED));
+
+    /* Update decode global descriptors */
+    for (int i = 0; i < 2; i++) { /* 16/32 bit */
+        for (int j = 0; j < 2; j++) { /* AC/Golomb */
+            for (int k = 0; k < 2; k++) { /* Normal/RGB */
+                RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                                    &fv->decode[i][j][k], 0, 0, 0,
+                                                    &fv->rangecoder_static_buf,
+                                                    0, fv->rangecoder_static_buf.size,
+                                                    VK_FORMAT_UNDEFINED));
+                RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                                    &fv->decode[i][j][k], 0, 1, 0,
+                                                    &fv->quant_buf,
+                                                    0, fv->quant_buf.size,
+                                                    VK_FORMAT_UNDEFINED));
+            }
+        }
+    }
+
+fail:
+    return err;
+}
+
+static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+    AVHWDeviceContext *hwctx = _hwctx.nc;
+
+    FFv1VulkanDecodePicture *fp = data;
+    FFVulkanDecodePicture *vp = &fp->vp;
+
+    ff_vk_decode_free_frame(hwctx, vp);
+
+    if (fp->crc_checked) {
+        FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
+        for (int i = 0; i < fp->slice_num; i++) {
+            uint32_t crc_res;
+            crc_res = AV_RN32(slice_status->mapped_mem + i*sizeof(uint32_t));
+            if (crc_res != 0)
+                av_log(hwctx, AV_LOG_ERROR, "CRC mismatch in slice %i, res: 0x%x\n",
+                       i, crc_res);
+        }
+    }
+
+    av_buffer_unref(&vp->slices_buf);
+    av_buffer_unref(&fp->slice_state);
+    av_buffer_unref(&fp->slice_offset_buf);
+    av_buffer_unref(&fp->slice_status_buf);
+    av_buffer_unref(&fp->tmp_data);
+}
+
+const FFHWAccel ff_ffv1_vulkan_hwaccel = {
+    .p.name                = "ffv1_vulkan",
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_FFV1,
+    .p.pix_fmt             = AV_PIX_FMT_VULKAN,
+    .start_frame           = &vk_ffv1_start_frame,
+    .decode_slice          = &vk_ffv1_decode_slice,
+    .end_frame             = &vk_ffv1_end_frame,
+    .free_frame_priv       = &vk_ffv1_free_frame_priv,
+    .frame_priv_data_size  = sizeof(FFv1VulkanDecodePicture),
+    .init                  = &vk_decode_ffv1_init,
+    .update_thread_context = &ff_vk_update_thread_context,
+    .decode_params         = &ff_vk_params_invalidate,
+    .flush                 = &ff_vk_decode_flush,
+    .uninit                = &ff_vk_decode_uninit,
+    .frame_params          = &ff_vk_frame_params,
+    .priv_data_size        = sizeof(FFVulkanDecodeContext),
+    .caps_internal         = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2025-03-10  3:11 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
2025-03-10  3:08   ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
2025-03-10  3:08   ` Lynne [this message]
2025-03-10  3:14   ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250310030912.60902-3-dev@lynne.ee \
    --to=dev@lynne.ee \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git