Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img
@ 2025-03-10  3:08 Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
                   ` (9 more replies)
  0 siblings, 10 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavutil/vulkan.c | 34 +++++++++++++++++-----------------
 libavutil/vulkan.h |  8 ++++----
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 2ae619967a..51372965e0 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -2390,10 +2390,10 @@ static inline void update_set_pool_write(FFVulkanContext *s, FFVkExecContext *e,
     }
 }
 
-int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanShader *shd,
-                               FFVkExecContext *e, int set, int bind, int offs,
-                               VkImageView view, VkImageLayout layout,
-                               VkSampler sampler)
+int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e,
+                            FFVulkanShader *shd, int set, int bind, int offs,
+                            VkImageView view, VkImageLayout layout,
+                            VkSampler sampler)
 {
     FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
 
@@ -2455,6 +2455,19 @@ int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanShader *shd,
     return 0;
 }
 
+void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e,
+                                   FFVulkanShader *shd, AVFrame *f,
+                                   VkImageView *views, int set, int binding,
+                                   VkImageLayout layout, VkSampler sampler)
+{
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+    const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
+
+    for (int i = 0; i < nb_planes; i++)
+        ff_vk_shader_update_img(s, e, shd, set, binding, i,
+                                views[i], layout, sampler);
+}
+
 int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
                                     FFVulkanShader *shd,
                                     int set, int bind, int elem,
@@ -2521,19 +2534,6 @@ int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
     return 0;
 }
 
-void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e,
-                                   FFVulkanShader *shd, AVFrame *f,
-                                   VkImageView *views, int set, int binding,
-                                   VkImageLayout layout, VkSampler sampler)
-{
-    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
-    const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
-
-    for (int i = 0; i < nb_planes; i++)
-        ff_vk_set_descriptor_image(s, shd, e, set, binding, i,
-                                   views[i], layout, sampler);
-}
-
 void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e,
                                     FFVulkanShader *shd,
                                     VkShaderStageFlagBits stage,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 617df952c4..41071b245e 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -593,10 +593,10 @@ int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
 /**
  * Sets an image descriptor for specified shader and binding.
  */
-int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanShader *shd,
-                               FFVkExecContext *e, int set, int bind, int offs,
-                               VkImageView view, VkImageLayout layout,
-                               VkSampler sampler);
+int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e,
+                            FFVulkanShader *shd, int set, int bind, int offs,
+                            VkImageView view, VkImageLayout layout,
+                            VkSampler sampler);
 
 /**
  * Update a descriptor in a buffer with an image array..
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext Lynne
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavutil/vulkan.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 libavutil/vulkan.h |  7 +++++++
 2 files changed, 58 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 51372965e0..4bfa877278 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1595,6 +1595,57 @@ static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt)
     return VK_FORMAT_UNDEFINED;
 }
 
+int ff_vk_create_imageview(FFVulkanContext *s,
+                           VkImageView *img_view, VkImageAspectFlags *aspect,
+                           AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt)
+{
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
+    const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
+    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+    const int nb_images = ff_vk_count_images(vkf);
+
+    VkImageViewUsageCreateInfo view_usage_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
+        .usage = vkfc->usage &
+                 (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR |
+                    VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)),
+    };
+    VkImageViewCreateInfo view_create_info = {
+        .sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .pNext      = &view_usage_info,
+        .image      = vkf->img[FFMIN(plane, nb_images - 1)],
+        .viewType   = VK_IMAGE_VIEW_TYPE_2D,
+        .format     = map_fmt_to_rep(rep_fmts[plane], rep_fmt),
+        .components = ff_comp_identity_map,
+        .subresourceRange = {
+            .aspectMask = ff_vk_aspect_flag(f, plane),
+            .levelCount = 1,
+            .layerCount = 1,
+        },
+    };
+    if (view_create_info.format == VK_FORMAT_UNDEFINED) {
+        av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
+                                "of format %i and mode %i\n",
+               rep_fmts[plane], rep_fmt);
+        return AVERROR(EINVAL);
+    }
+
+    ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
+                              s->hwctx->alloc, img_view);
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    *aspect = view_create_info.subresourceRange.aspectMask;
+
+    return 0;
+}
+
 int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
                             VkImageView views[AV_NUM_DATA_POINTERS],
                             AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 41071b245e..73c4713166 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -457,6 +457,13 @@ int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
                                 AVFrame *f);
 void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
 
+/**
+ * Create a single imageview for a given plane.
+ */
+int ff_vk_create_imageview(FFVulkanContext *s,
+                           VkImageView *img_view, VkImageAspectFlags *aspect,
+                           AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt);
+
 /**
  * Create an imageview and add it as a dependency to an execution.
  */
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour Lynne
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

This is useful elsewhere.
---
 libavutil/vulkan.c | 155 ++++++++++++++++++++++++++++++++++++++++++++-
 libavutil/vulkan.h |  18 +++++-
 2 files changed, 170 insertions(+), 3 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 4bfa877278..7f6c9cc039 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1123,6 +1123,8 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers,
                 .memory = buf[i]->mem,
                 .size   = VK_WHOLE_SIZE,
             };
+
+            av_assert0(!buf[i]->host_ref);
             if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
                 continue;
             flush_list[flush_count++] = flush_buf;
@@ -1154,12 +1156,18 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
     if (!buf || !s->hwctx)
         return;
 
-    if (buf->mapped_mem)
+    if (buf->mapped_mem && !buf->host_ref)
         ff_vk_unmap_buffer(s, buf, 0);
     if (buf->buf != VK_NULL_HANDLE)
         vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
     if (buf->mem != VK_NULL_HANDLE)
         vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+    if (buf->host_ref)
+        av_buffer_unref(&buf->host_ref);
+
+    buf->buf = VK_NULL_HANDLE;
+    buf->mem = VK_NULL_HANDLE;
+    buf->mapped_mem = NULL;
 }
 
 static void free_data_buf(void *opaque, uint8_t *data)
@@ -1236,6 +1244,149 @@ int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
     return 0;
 }
 
+static int create_mapped_buffer(FFVulkanContext *s,
+                                FFVkBuffer *vkb, VkBufferUsageFlags usage,
+                                size_t size,
+                                VkExternalMemoryBufferCreateInfo *create_desc,
+                                VkImportMemoryHostPointerInfoEXT *import_desc,
+                                VkMemoryHostPointerPropertiesEXT props)
+{
+    int err;
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+
+    VkBufferCreateInfo buf_spawn = {
+        .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext       = create_desc,
+        .usage       = usage,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .size        = size,
+    };
+    VkMemoryRequirements req = {
+        .size           = size,
+        .alignment      = s->hprops.minImportedHostPointerAlignment,
+        .memoryTypeBits = props.memoryTypeBits,
+    };
+
+    err = ff_vk_alloc_mem(s, &req,
+                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+                          import_desc, &vkb->flags, &vkb->mem);
+    if (err < 0)
+        return err;
+
+    ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &vkb->buf);
+    if (ret != VK_SUCCESS) {
+        vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc);
+        return AVERROR_EXTERNAL;
+    }
+
+    ret = vk->BindBufferMemory(s->hwctx->act_dev, vkb->buf, vkb->mem, 0);
+    if (ret != VK_SUCCESS) {
+        vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc);
+        vk->DestroyBuffer(s->hwctx->act_dev, vkb->buf, s->hwctx->alloc);
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
+int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst,
+                          uint8_t *src_data, AVBufferRef *src_buf,
+                          VkBufferUsageFlags usage)
+{
+    int err;
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+
+    VkExternalMemoryBufferCreateInfo create_desc = {
+        .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+        .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+    };
+    VkMemoryAllocateFlagsInfo alloc_flags = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
+        .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
+    };
+    VkImportMemoryHostPointerInfoEXT import_desc = {
+        .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+        .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+        .pNext = usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? &alloc_flags : NULL,
+    };
+    VkMemoryHostPointerPropertiesEXT props;
+
+    AVBufferRef *ref;
+    FFVkBuffer *vkb;
+    size_t offs;
+    size_t buffer_size;
+
+    *dst = NULL;
+
+    /* Get the previous point at which mapping was possible and use it */
+    offs = (uintptr_t)src_data % s->hprops.minImportedHostPointerAlignment;
+    import_desc.pHostPointer = src_data - offs;
+
+    props = (VkMemoryHostPointerPropertiesEXT) {
+        VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
+    };
+    ret = vk->GetMemoryHostPointerPropertiesEXT(s->hwctx->act_dev,
+                                                import_desc.handleType,
+                                                import_desc.pHostPointer,
+                                                &props);
+    if (!(ret == VK_SUCCESS && props.memoryTypeBits))
+        return AVERROR(EINVAL);
+
+    /* Ref the source buffer */
+    ref = av_buffer_ref(src_buf);
+    if (!ref)
+        return AVERROR(ENOMEM);
+
+    /* Add the offset at the start, which gets ignored */
+    buffer_size = offs + src_buf->size;
+    buffer_size = FFALIGN(buffer_size, s->props.properties.limits.minMemoryMapAlignment);
+    buffer_size = FFALIGN(buffer_size, s->hprops.minImportedHostPointerAlignment);
+
+    /* Create a buffer struct */
+    vkb = av_mallocz(sizeof(*vkb));
+    if (!vkb) {
+        av_buffer_unref(&ref);
+        return AVERROR(ENOMEM);
+    }
+
+    err = create_mapped_buffer(s, vkb, usage,
+                               buffer_size, &create_desc, &import_desc,
+                               props);
+    if (err < 0) {
+        av_buffer_unref(&ref);
+        av_free(vkb);
+        return err;
+    }
+
+    if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+        VkBufferDeviceAddressInfo address_info = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
+            .buffer = vkb->buf,
+        };
+        vkb->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
+    }
+
+    vkb->host_ref       = ref;
+    vkb->virtual_offset = offs;
+    vkb->address       += offs;
+    vkb->mapped_mem     = src_data;
+    vkb->size           = buffer_size - offs;
+    vkb->flags         |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+
+    /* Create a ref */
+    *dst = av_buffer_create((uint8_t *)vkb, sizeof(*vkb),
+                            destroy_avvkbuf, s, 0);
+    if (!(*dst)) {
+        destroy_avvkbuf(s, (uint8_t *)vkb);
+        *dst = NULL;
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
 int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size,
                                 VkShaderStageFlagBits stage)
 {
@@ -2568,7 +2719,7 @@ int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
     } else {
         VkDescriptorBufferInfo desc_pool_write_info_buf = {
             .buffer = buf->buf,
-            .offset = offset,
+            .offset = buf->virtual_offset + offset,
             .range = len,
         };
         VkWriteDescriptorSet desc_pool_write_info = {
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 73c4713166..89fc4eedc5 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -96,8 +96,17 @@ typedef struct FFVkBuffer {
     VkPipelineStageFlags2 stage;
     VkAccessFlags2 access;
 
-    /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE */
+    /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE or
+     * via ff_vk_host_map_buffer */
     uint8_t *mapped_mem;
+
+    /* Set by ff_vk_host_map_buffer. This is the offset at which the buffer data
+     * actually begins at.
+     * The address and mapped_mem fields will be offset by this amount. */
+    size_t virtual_offset;
+
+    /* If host mapping, reference to the backing host memory buffer */
+    AVBufferRef *host_ref;
 } FFVkBuffer;
 
 typedef struct FFVkExecContext {
@@ -523,6 +532,13 @@ int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
                             void *create_pNext, size_t size,
                             VkMemoryPropertyFlagBits mem_props);
 
+/** Maps a system RAM buffer into a Vulkan buffer.
+ * References the source buffer.
+ */
+int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst,
+                          uint8_t *src_data, AVBufferRef *src_buf,
+                          VkBufferUsageFlags usage);
+
 /**
  * Create a sampler.
  */
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders Lynne
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavutil/vulkan.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7f6c9cc039..7b0f77b076 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1746,6 +1746,29 @@ static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt)
     return VK_FORMAT_UNDEFINED;
 }
 
+static void bgr_workaround(AVVulkanFramesContext *vkfc,
+                           VkImageViewCreateInfo *ci)
+{
+    if (!(vkfc->usage & VK_IMAGE_USAGE_STORAGE_BIT))
+        return;
+    switch (ci->format) {
+#define REMAP(src, dst)   \
+    case src:             \
+        ci->format = dst; \
+        return;
+    REMAP(VK_FORMAT_B8G8R8A8_UNORM,           VK_FORMAT_R8G8B8A8_UNORM)
+    REMAP(VK_FORMAT_B8G8R8A8_SINT,            VK_FORMAT_R8G8B8A8_SINT)
+    REMAP(VK_FORMAT_B8G8R8A8_UINT,            VK_FORMAT_R8G8B8A8_UINT)
+    REMAP(VK_FORMAT_B8G8R8_UNORM,             VK_FORMAT_R8G8B8_UNORM)
+    REMAP(VK_FORMAT_B8G8R8_SINT,              VK_FORMAT_R8G8B8_SINT)
+    REMAP(VK_FORMAT_B8G8R8_UINT,              VK_FORMAT_R8G8B8_UINT)
+    REMAP(VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_A2R10G10B10_UNORM_PACK32)
+#undef REMAP
+    default:
+        return;
+    }
+}
+
 int ff_vk_create_imageview(FFVulkanContext *s,
                            VkImageView *img_view, VkImageAspectFlags *aspect,
                            AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt)
@@ -1777,6 +1800,7 @@ int ff_vk_create_imageview(FFVulkanContext *s,
             .layerCount = 1,
         },
     };
+    bgr_workaround(vkfc, &view_create_info);
     if (view_create_info.format == VK_FORMAT_UNDEFINED) {
         av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
                                 "of format %i and mode %i\n",
@@ -1838,6 +1862,7 @@ int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
                 .layerCount = 1,
             },
         };
+        bgr_workaround(vkfc, &view_create_info);
         if (view_create_info.format == VK_FORMAT_UNDEFINED) {
             av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
                                     "of format %i and mode %i\n",
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
                   ` (2 preceding siblings ...)
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views Lynne
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavcodec/vulkan_decode.c | 194 +++++++++++++++++++++++++++----------
 libavcodec/vulkan_decode.h |  11 +++
 2 files changed, 154 insertions(+), 51 deletions(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index c57998108c..594764a904 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -24,6 +24,9 @@
 #include "libavutil/mem.h"
 #include "libavutil/vulkan_loader.h"
 
+#define DECODER_IS_SDR(codec_id) \
+    ((codec_id) == AV_CODEC_ID_FFV1)
+
 #if CONFIG_H264_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
 #endif
@@ -63,7 +66,9 @@ static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx,
         codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR :
         codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR :
         codec_id == AV_CODEC_ID_AV1  ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR :
-        0;
+                                       VK_STRUCTURE_TYPE_MAX_ENUM;
+    if (profile_struct_type == VK_STRUCTURE_TYPE_MAX_ENUM)
+        return NULL;
 
     profile_list = ff_vk_find_struct(ctx->s.hwfc->create_pnext,
                                      VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR);
@@ -119,13 +124,26 @@ static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx)
     return avf;
 }
 
+static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic)
+{
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    vkpic->dpb_frame     = NULL;
+    vkpic->img_view_ref  = VK_NULL_HANDLE;
+    vkpic->img_view_out  = VK_NULL_HANDLE;
+    vkpic->img_view_dest = VK_NULL_HANDLE;
+
+    vkpic->destroy_image_view = vk->DestroyImageView;
+    vkpic->wait_semaphores = vk->WaitSemaphores;
+}
+
 int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
                                FFVulkanDecodePicture *vkpic, int is_current,
                                int alloc_dpb)
 {
     int err;
     FFVulkanDecodeShared *ctx = dec->shared_ctx;
-    FFVulkanFunctions *vk = &ctx->s.vkfn;
 
     vkpic->slices_size = 0;
 
@@ -134,13 +152,7 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
     if (vkpic->img_view_ref)
         return 0;
 
-    vkpic->dpb_frame     = NULL;
-    vkpic->img_view_ref  = VK_NULL_HANDLE;
-    vkpic->img_view_out  = VK_NULL_HANDLE;
-    vkpic->img_view_dest = VK_NULL_HANDLE;
-
-    vkpic->destroy_image_view = vk->DestroyImageView;
-    vkpic->wait_semaphores = vk->WaitSemaphores;
+    init_frame(dec, vkpic);
 
     if (ctx->common.layered_dpb && alloc_dpb) {
         vkpic->img_view_ref = ctx->common.layered_view;
@@ -183,6 +195,53 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
     return 0;
 }
 
+int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic,
+                                   FFVulkanDecodePicture *vkpic, int is_current,
+                                   enum FFVkShaderRepFormat rep_fmt, int alloc_dpb)
+{
+    int err;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+
+    vkpic->slices_size = 0;
+
+    if (vkpic->img_view_ref)
+        return 0;
+
+    init_frame(dec, vkpic);
+
+    if (ctx->common.layered_dpb && alloc_dpb) {
+        vkpic->img_view_ref = ctx->common.layered_view;
+        vkpic->img_aspect_ref = ctx->common.layered_aspect;
+    } else if (alloc_dpb) {
+        vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+        if (!vkpic->dpb_frame)
+            return AVERROR(ENOMEM);
+
+        err = ff_vk_create_imageview(&ctx->s,
+                                     &vkpic->img_view_ref, &vkpic->img_aspect_ref,
+                                     vkpic->dpb_frame, 0, rep_fmt);
+        if (err < 0)
+            return err;
+
+        vkpic->img_view_dest = vkpic->img_view_ref;
+    }
+
+    if (!alloc_dpb || is_current) {
+        err = ff_vk_create_imageview(&ctx->s,
+                                     &vkpic->img_view_out, &vkpic->img_aspect,
+                                     pic, 0, rep_fmt);
+        if (err < 0)
+            return err;
+
+        if (!alloc_dpb) {
+            vkpic->img_view_ref = vkpic->img_view_out;
+            vkpic->img_aspect_ref = vkpic->img_aspect;
+        }
+    }
+
+    return 0;
+}
+
 int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp,
                            const uint8_t *data, size_t size, int add_startcode,
                            uint32_t *nb_slices, const uint32_t **offsets)
@@ -223,9 +282,14 @@ int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp,
         buf_size = 2 << av_log2(buf_size);
 
         err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &new_ref,
+                                      DECODER_IS_SDR(avctx->codec_id) ?
+                                      (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                       VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) :
                                       VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
                                       ctx->s.hwfc->create_pnext, buf_size,
-                                      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+                                      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                      (DECODER_IS_SDR(avctx->codec_id) ?
+                                       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0));
         if (err < 0)
             return err;
 
@@ -276,6 +340,10 @@ void ff_vk_decode_flush(AVCodecContext *avctx)
     VkCommandBuffer cmd_buf;
     FFVkExecContext *exec;
 
+    /* Non-video queues do not need to be reset */
+    if (!(get_codecdesc(avctx->codec_id)->decode_op))
+        return;
+
     exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
     ff_vk_exec_start(&ctx->s, exec);
     cmd_buf = exec->buf;
@@ -544,6 +612,9 @@ static void free_common(AVRefStructOpaque unused, void *obj)
 
     ff_vk_video_common_uninit(s, &ctx->common);
 
+    if (ctx->sd_ctx_free)
+        ctx->sd_ctx_free(ctx);
+
     ff_vk_uninit(s);
 }
 
@@ -551,6 +622,7 @@ static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_re
 {
     int err;
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    const FFVulkanDecodeDescriptor *vk_desc = get_codecdesc(avctx->codec_id);
     AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
     AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data;
     AVVulkanDeviceContext *hwctx = device->hwctx;
@@ -569,11 +641,13 @@ static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_re
     ctx->s.extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions,
                                                  hwctx->nb_enabled_dev_extensions);
 
-    if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
-        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
-               VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
-        av_refstruct_unref(&dec->shared_ctx);
-        return AVERROR(ENOSYS);
+    if (vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
+        if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
+            av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
+                   VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
+            av_refstruct_unref(&dec->shared_ctx);
+            return AVERROR(ENOSYS);
+        }
     }
 
     err = ff_vk_load_functions(device, &ctx->s.vkfn, ctx->s.extensions, 1, 1);
@@ -927,53 +1001,61 @@ static void free_profile_data(AVHWFramesContext *hwfc)
 
 int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
 {
-    VkFormat vkfmt;
+    VkFormat vkfmt = VK_FORMAT_UNDEFINED;
     int err, dedicated_dpb;
     AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
     AVVulkanFramesContext *hwfc = frames_ctx->hwctx;
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
-    FFVulkanDecodeProfileData *prof;
-    FFVulkanDecodeShared *ctx;
-
-    frames_ctx->sw_format = AV_PIX_FMT_NONE;
+    FFVulkanDecodeProfileData *prof = NULL;
 
     err = vulkan_decode_bootstrap(avctx, hw_frames_ctx);
     if (err < 0)
         return err;
 
-    prof = av_mallocz(sizeof(FFVulkanDecodeProfileData));
-    if (!prof)
-        return AVERROR(ENOMEM);
+    frames_ctx->sw_format = avctx->sw_pix_fmt;
 
-    err = vulkan_decode_get_profile(avctx, hw_frames_ctx,
-                                    &frames_ctx->sw_format, &vkfmt,
-                                    prof, &dedicated_dpb);
-    if (err < 0) {
-        av_free(prof);
-        return err;
-    }
+    if (!DECODER_IS_SDR(avctx->codec_id)) {
+        prof = av_mallocz(sizeof(FFVulkanDecodeProfileData));
+        if (!prof)
+            return AVERROR(ENOMEM);
 
-    frames_ctx->user_opaque = prof;
-    frames_ctx->free        = free_profile_data;
+        err = vulkan_decode_get_profile(avctx, hw_frames_ctx,
+                                        &frames_ctx->sw_format, &vkfmt,
+                                        prof, &dedicated_dpb);
+        if (err < 0) {
+            av_free(prof);
+            return err;
+        }
+
+        frames_ctx->user_opaque = prof;
+        frames_ctx->free        = free_profile_data;
+
+        hwfc->create_pnext = &prof->profile_list;
+    }
 
     frames_ctx->width  = avctx->coded_width;
     frames_ctx->height = avctx->coded_height;
     frames_ctx->format = AV_PIX_FMT_VULKAN;
 
     hwfc->format[0]    = vkfmt;
-    hwfc->create_pnext = &prof->profile_list;
     hwfc->tiling       = VK_IMAGE_TILING_OPTIMAL;
     hwfc->usage        = VK_IMAGE_USAGE_TRANSFER_SRC_BIT         |
-                         VK_IMAGE_USAGE_SAMPLED_BIT              |
-                         VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
+                         VK_IMAGE_USAGE_SAMPLED_BIT;
 
-    if (!dec->dedicated_dpb)
-        hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+    if (prof) {
+        FFVulkanDecodeShared *ctx;
 
-    ctx = dec->shared_ctx;
-    if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
-                             FF_VK_EXT_VIDEO_MAINTENANCE_1))
-        hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
+        hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
+        if (!dec->dedicated_dpb)
+            hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+
+        ctx = dec->shared_ctx;
+        if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
+                                 FF_VK_EXT_VIDEO_MAINTENANCE_1))
+            hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
+    } else if (DECODER_IS_SDR(avctx->codec_id)) {
+        hwfc->usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+    }
 
     return err;
 }
@@ -1075,8 +1157,10 @@ int ff_vk_decode_init(AVCodecContext *avctx)
     if (err < 0)
         return err;
 
+    vk_desc = get_codecdesc(avctx->codec_id);
+
     profile = get_video_profile(ctx, avctx->codec_id);
-    if (!profile) {
+    if ((vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) && !profile) {
         av_log(avctx, AV_LOG_ERROR, "Video profile missing from frames context!");
         return AVERROR(EINVAL);
     }
@@ -1109,9 +1193,11 @@ int ff_vk_decode_init(AVCodecContext *avctx)
     if (err < 0)
         goto fail;
 
-    err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
-    if (err < 0)
-        goto fail;
+    if (profile) {
+        err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
+        if (err < 0)
+            goto fail;
+    }
 
     /* If doing an out-of-place decoding, create a DPB pool */
     if (dec->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) {
@@ -1163,12 +1249,18 @@ int ff_vk_decode_init(AVCodecContext *avctx)
     }
 
     session_params_create.videoSession = ctx->common.session;
-    ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
-                                              s->hwctx->alloc, &ctx->empty_session_params);
-    if (ret != VK_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n",
-               ff_vk_ret2str(ret));
-        return AVERROR_EXTERNAL;
+    if (profile) {
+        ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
+                                                  s->hwctx->alloc, &ctx->empty_session_params);
+        if (ret != VK_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n",
+                   ff_vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
+        }
+    } else {
+        /* For SDR decoders, this alignment value will be 0. Since this will make
+         * add_slice() malfunction, set it to a sane default value. */
+        ctx->caps.minBitstreamBufferSizeAlignment = AV_INPUT_BUFFER_PADDING_SIZE;
     }
 
     driver_props = &dec->shared_ctx->s.driver_props;
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 1d89db323f..5c743e96d2 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -56,6 +56,10 @@ typedef struct FFVulkanDecodeShared {
     VkVideoDecodeCapabilitiesKHR dec_caps;
 
     VkVideoSessionParametersKHR empty_session_params;
+
+    /* Software-defined decoder context */
+    void *sd_ctx;
+    void (*sd_ctx_free)(struct FFVulkanDecodeShared *ctx);
 } FFVulkanDecodeShared;
 
 typedef struct FFVulkanDecodeContext {
@@ -141,6 +145,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
                                FFVulkanDecodePicture *vkpic, int is_current,
                                int alloc_dpb);
 
+/**
+ * Software-defined decoder version of ff_vk_decode_prepare_frame.
+ */
+int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic,
+                                   FFVulkanDecodePicture *vkpic, int is_current,
+                                   enum FFVkShaderRepFormat rep_fmt, int alloc_dpb);
+
 /**
  * Add slice data to frame.
  */
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
                   ` (3 preceding siblings ...)
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created Lynne
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

Enables non-monochrome video decoding using all our existing functions
in the context of an SDR decoder.
---
 libavcodec/vulkan_av1.c    |  4 +-
 libavcodec/vulkan_decode.c | 90 ++++++++++++++++++++------------------
 libavcodec/vulkan_decode.h | 12 ++---
 libavcodec/vulkan_h264.c   |  4 +-
 libavcodec/vulkan_hevc.c   |  4 +-
 5 files changed, 60 insertions(+), 54 deletions(-)

diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index 6659f9d812..7dd7b204d7 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -123,7 +123,7 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src,
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
         .baseArrayLayer = ((has_grain || dec->dedicated_dpb) && ctx->common.layered_dpb) ?
                           hp->frame_id : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
     };
 
     *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -346,7 +346,7 @@ static int vk_av1_start_frame(AVCodecContext          *avctx,
             .codedOffset = (VkOffset2D){ 0, 0 },
             .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
             .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
         },
     };
 
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 594764a904..7f638d6fc6 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -130,9 +130,11 @@ static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic)
     FFVulkanFunctions *vk = &ctx->s.vkfn;
 
     vkpic->dpb_frame     = NULL;
-    vkpic->img_view_ref  = VK_NULL_HANDLE;
-    vkpic->img_view_out  = VK_NULL_HANDLE;
-    vkpic->img_view_dest = VK_NULL_HANDLE;
+    for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
+        vkpic->view.ref[i]  = VK_NULL_HANDLE;
+        vkpic->view.out[i]  = VK_NULL_HANDLE;
+        vkpic->view.dst[i]  = VK_NULL_HANDLE;
+    }
 
     vkpic->destroy_image_view = vk->DestroyImageView;
     vkpic->wait_semaphores = vk->WaitSemaphores;
@@ -149,14 +151,14 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
 
     /* If the decoder made a blank frame to make up for a missing ref, or the
      * frame is the current frame so it's missing one, create a re-representation */
-    if (vkpic->img_view_ref)
+    if (vkpic->view.ref[0])
         return 0;
 
     init_frame(dec, vkpic);
 
     if (ctx->common.layered_dpb && alloc_dpb) {
-        vkpic->img_view_ref = ctx->common.layered_view;
-        vkpic->img_aspect_ref = ctx->common.layered_aspect;
+        vkpic->view.ref[0] = ctx->common.layered_view;
+        vkpic->view.aspect_ref[0] = ctx->common.layered_aspect;
     } else if (alloc_dpb) {
         AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data;
         AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx;
@@ -166,13 +168,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
             return AVERROR(ENOMEM);
 
         err = ff_vk_create_view(&ctx->s, &ctx->common,
-                                &vkpic->img_view_ref, &vkpic->img_aspect_ref,
+                                &vkpic->view.ref[0], &vkpic->view.aspect_ref[0],
                                 (AVVkFrame *)vkpic->dpb_frame->data[0],
                                 dpb_hwfc->format[0], !is_current);
         if (err < 0)
             return err;
 
-        vkpic->img_view_dest = vkpic->img_view_ref;
+        vkpic->view.dst[0] = vkpic->view.ref[0];
     }
 
     if (!alloc_dpb || is_current) {
@@ -180,15 +182,15 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
         AVVulkanFramesContext *hwfc = frames->hwctx;
 
         err = ff_vk_create_view(&ctx->s, &ctx->common,
-                                &vkpic->img_view_out, &vkpic->img_aspect,
+                                &vkpic->view.out[0], &vkpic->view.aspect[0],
                                 (AVVkFrame *)pic->data[0],
                                 hwfc->format[0], !is_current);
         if (err < 0)
             return err;
 
         if (!alloc_dpb) {
-            vkpic->img_view_ref = vkpic->img_view_out;
-            vkpic->img_aspect_ref = vkpic->img_aspect;
+            vkpic->view.ref[0] = vkpic->view.out[0];
+            vkpic->view.aspect_ref[0] = vkpic->view.aspect[0];
         }
     }
 
@@ -201,41 +203,41 @@ int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic,
 {
     int err;
     FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data;
 
     vkpic->slices_size = 0;
 
-    if (vkpic->img_view_ref)
+    if (vkpic->view.ref[0])
         return 0;
 
     init_frame(dec, vkpic);
 
-    if (ctx->common.layered_dpb && alloc_dpb) {
-        vkpic->img_view_ref = ctx->common.layered_view;
-        vkpic->img_aspect_ref = ctx->common.layered_aspect;
-    } else if (alloc_dpb) {
-        vkpic->dpb_frame = vk_get_dpb_pool(ctx);
-        if (!vkpic->dpb_frame)
-            return AVERROR(ENOMEM);
+    for (int i = 0; i < av_pix_fmt_count_planes(frames->sw_format); i++) {
+        if (alloc_dpb) {
+            vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+            if (!vkpic->dpb_frame)
+                return AVERROR(ENOMEM);
 
-        err = ff_vk_create_imageview(&ctx->s,
-                                     &vkpic->img_view_ref, &vkpic->img_aspect_ref,
-                                     vkpic->dpb_frame, 0, rep_fmt);
-        if (err < 0)
-            return err;
+            err = ff_vk_create_imageview(&ctx->s,
+                                         &vkpic->view.ref[i], &vkpic->view.aspect_ref[i],
+                                         vkpic->dpb_frame, i, rep_fmt);
+            if (err < 0)
+                return err;
 
-        vkpic->img_view_dest = vkpic->img_view_ref;
-    }
+            vkpic->view.dst[i] = vkpic->view.ref[i];
+        }
 
-    if (!alloc_dpb || is_current) {
-        err = ff_vk_create_imageview(&ctx->s,
-                                     &vkpic->img_view_out, &vkpic->img_aspect,
-                                     pic, 0, rep_fmt);
-        if (err < 0)
-            return err;
+        if (!alloc_dpb || is_current) {
+            err = ff_vk_create_imageview(&ctx->s,
+                                         &vkpic->view.out[i], &vkpic->view.aspect[i],
+                                         pic, i, rep_fmt);
+            if (err < 0)
+                return err;
 
-        if (!alloc_dpb) {
-            vkpic->img_view_ref = vkpic->img_view_out;
-            vkpic->img_aspect_ref = vkpic->img_aspect;
+            if (!alloc_dpb) {
+                vkpic->view.ref[i] = vkpic->view.out[i];
+                vkpic->view.aspect_ref[i] = vkpic->view.aspect[i];
+            }
         }
     }
 
@@ -467,7 +469,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
         .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
         .image = vkf->img[0],
         .subresourceRange = (VkImageSubresourceRange) {
-            .aspectMask = vp->img_aspect,
+            .aspectMask = vp->view.aspect[0],
             .layerCount = 1,
             .levelCount = 1,
         },
@@ -523,7 +525,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
                     .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
                     .image = rvkf->img[0],
                     .subresourceRange = (VkImageSubresourceRange) {
-                        .aspectMask = rvp->img_aspect_ref,
+                        .aspectMask = rvp->view.aspect_ref[0],
                         .layerCount = 1,
                         .levelCount = 1,
                     },
@@ -533,7 +535,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
             }
         }
     } else if (vp->decode_info.referenceSlotCount ||
-               vp->img_view_out != vp->img_view_ref) {
+               vp->view.out[0] != vp->view.ref[0]) {
         /* Single barrier for a single layered ref */
         err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
                                        VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
@@ -580,12 +582,14 @@ void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *
     av_buffer_unref(&vp->slices_buf);
 
     /* Destroy image view (out) */
-    if (vp->img_view_out && vp->img_view_out != vp->img_view_dest)
-        vp->destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc);
+    for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
+        if (vp->view.out[i] && vp->view.out[i] != vp->view.dst[i])
+            vp->destroy_image_view(hwctx->act_dev, vp->view.out[i], hwctx->alloc);
 
-    /* Destroy image view (ref, unlayered) */
-    if (vp->img_view_dest)
-        vp->destroy_image_view(hwctx->act_dev, vp->img_view_dest, hwctx->alloc);
+        /* Destroy image view (ref, unlayered) */
+        if (vp->view.dst[i])
+            vp->destroy_image_view(hwctx->act_dev, vp->view.dst[i], hwctx->alloc);
+    }
 
     av_frame_free(&vp->dpb_frame);
 }
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 5c743e96d2..cbd22b3591 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -85,11 +85,13 @@ typedef struct FFVulkanDecodeContext {
 typedef struct FFVulkanDecodePicture {
     AVFrame                        *dpb_frame;      /* Only used for out-of-place decoding. */
 
-    VkImageView                     img_view_ref;   /* Image representation view (reference) */
-    VkImageView                     img_view_out;   /* Image representation view (output-only) */
-    VkImageView                     img_view_dest;  /* Set to img_view_out if no layered refs are used */
-    VkImageAspectFlags              img_aspect;     /* Image plane mask bits */
-    VkImageAspectFlags              img_aspect_ref; /* Only used for out-of-place decoding */
+    struct {
+        VkImageView                     ref[AV_NUM_DATA_POINTERS];        /* Image representation view (reference) */
+        VkImageView                     out[AV_NUM_DATA_POINTERS];        /* Image representation view (output-only) */
+        VkImageView                     dst[AV_NUM_DATA_POINTERS];        /* Set to img_view_out if no layered refs are used */
+        VkImageAspectFlags              aspect[AV_NUM_DATA_POINTERS];     /* Image plane mask bits */
+        VkImageAspectFlags              aspect_ref[AV_NUM_DATA_POINTERS]; /* Only used for out-of-place decoding */
+    } view;
 
     VkSemaphore                     sem;
     uint64_t                        sem_value;
diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c
index 1df8f0a208..71cf2c3ad7 100644
--- a/libavcodec/vulkan_h264.c
+++ b/libavcodec/vulkan_h264.c
@@ -98,7 +98,7 @@ static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
         .codedOffset = (VkOffset2D){ 0, 0 },
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
         .baseArrayLayer = ctx->common.layered_dpb ? dpb_slot_index : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
     };
 
     *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -471,7 +471,7 @@ static int vk_h264_start_frame(AVCodecContext          *avctx,
             .codedOffset = (VkOffset2D){ 0, 0 },
             .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
             .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
         },
     };
 
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
index 589c3de83d..a5bcd88e2d 100644
--- a/libavcodec/vulkan_hevc.c
+++ b/libavcodec/vulkan_hevc.c
@@ -164,7 +164,7 @@ static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
         .codedOffset = (VkOffset2D){ 0, 0 },
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
         .baseArrayLayer = ctx->common.layered_dpb ? pic_id : 0,
-        .imageViewBinding = vkpic->img_view_ref,
+        .imageViewBinding = vkpic->view.ref[0],
     };
 
     *ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -823,7 +823,7 @@ static int vk_hevc_start_frame(AVCodecContext          *avctx,
             .codedOffset = (VkOffset2D){ 0, 0 },
             .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
             .baseArrayLayer = 0,
-            .imageViewBinding = vp->img_view_out,
+            .imageViewBinding = vp->view.out[0],
         },
     };
 
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
                   ` (4 preceding siblings ...)
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

This caps the number of contexts we create based on thread count.
This saves VRAM and filters out cases where more async is of lesser
benefit.
---
 libavcodec/vulkan_decode.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 7f638d6fc6..cd77e10e12 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -1122,6 +1122,7 @@ int ff_vk_decode_init(AVCodecContext *avctx)
     FFVulkanDecodeShared *ctx;
     FFVulkanContext *s;
     FFVulkanFunctions *vk;
+    int async_depth;
     const VkVideoProfileInfoKHR *profile;
     const FFVulkanDecodeDescriptor *vk_desc;
     const VkPhysicalDeviceDriverProperties *driver_props;
@@ -1191,9 +1192,14 @@ int ff_vk_decode_init(AVCodecContext *avctx)
     /* Create decode exec context for this specific main thread.
      * 2 async contexts per thread was experimentally determined to be optimal
      * for a majority of streams. */
+    async_depth = 2*ctx->qf->num;
+    /* We don't need more than 2 per thread context */
+    async_depth = FFMIN(async_depth, 2*avctx->thread_count);
+    /* Make sure there are enough async contexts for each thread */
+    async_depth = FFMAX(async_depth, avctx->thread_count);
+
     err = ff_vk_exec_pool_init(s, ctx->qf, &ctx->exec_pool,
-                               FFMAX(2*ctx->qf->num, avctx->thread_count),
-                               0, 0, 0, profile);
+                               async_depth, 0, 0, 0, profile);
     if (err < 0)
         goto fail;
 
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
                   ` (5 preceding siblings ...)
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct Lynne
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

The shaders were written to support sharing, but needed slight
tweaking.
---
 libavcodec/Makefile                   |   2 +-
 libavcodec/ffv1_vulkan.c              | 123 ++++++++++++++
 libavcodec/ffv1_vulkan.h              |  60 +++++++
 libavcodec/ffv1enc_vulkan.c           | 234 +++++++++-----------------
 libavcodec/vulkan/ffv1_common.comp    |  24 ++-
 libavcodec/vulkan/ffv1_enc_setup.comp |  18 +-
 libavcodec/vulkan/ffv1_reset.comp     |   3 +-
 libavcodec/vulkan/rangecoder.comp     |  27 +--
 8 files changed, 302 insertions(+), 189 deletions(-)
 create mode 100644 libavcodec/ffv1_vulkan.c
 create mode 100644 libavcodec/ffv1_vulkan.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index e3ccbf1838..74de7737f9 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -371,7 +371,7 @@ OBJS-$(CONFIG_EXR_ENCODER)             += exrenc.o float2half.o
 OBJS-$(CONFIG_FASTAUDIO_DECODER)       += fastaudio.o
 OBJS-$(CONFIG_FFV1_DECODER)            += ffv1dec.o ffv1_parse.o ffv1.o
 OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1enc.o ffv1_parse.o ffv1.o
-OBJS-$(CONFIG_FFV1_VULKAN_ENCODER)     += ffv1enc.o ffv1.o ffv1enc_vulkan.o
+OBJS-$(CONFIG_FFV1_VULKAN_ENCODER)     += ffv1enc.o ffv1.o ffv1_vulkan.o ffv1enc_vulkan.o
 OBJS-$(CONFIG_FFWAVESYNTH_DECODER)     += ffwavesynth.o
 OBJS-$(CONFIG_FIC_DECODER)             += fic.o
 OBJS-$(CONFIG_FITS_DECODER)            += fitsdec.o fits.o
diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c
new file mode 100644
index 0000000000..6f49e2ebb1
--- /dev/null
+++ b/libavcodec/ffv1_vulkan.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "ffv1_vulkan.h"
+#include "libavutil/crc.h"
+
+int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s,
+                                            FFVkBuffer *vkb, FFV1Context *f)
+{
+    int err;
+    uint8_t *buf_mapped;
+
+    RET(ff_vk_map_buffer(s, vkb, &buf_mapped, 0));
+
+    for (int i = 1; i < 256; i++) {
+        buf_mapped[256 + i] = f->state_transition[i];
+        buf_mapped[256 - i] = 256 - (int)f->state_transition[i];
+    }
+
+    RET(ff_vk_unmap_buffer(s, vkb, 1));
+
+fail:
+    return err;
+}
+
+static int init_state_transition_data(FFVulkanContext *s,
+                                      FFVkBuffer *vkb, FFV1Context *f,
+                                      int (*write_data)(FFVulkanContext *s,
+                                                        FFVkBuffer *vkb, FFV1Context *f))
+{
+    int err;
+    size_t buf_len = 512*sizeof(uint8_t);
+
+    RET(ff_vk_create_buf(s, vkb,
+                         buf_len,
+                         NULL, NULL,
+                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+                         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+                         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+
+    write_data(s, vkb, f);
+
+fail:
+    return err;
+}
+
+int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s,
+                                          FFVkBuffer *vkb, FFV1Context *f)
+{
+    return init_state_transition_data(s, vkb, f,
+                                      ff_ffv1_vk_update_state_transition_data);
+}
+
+int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s,
+                                     FFVkBuffer *vkb, FFV1Context *f)
+{
+    int err;
+
+    int16_t *buf_mapped;
+    size_t buf_len = MAX_QUANT_TABLES*
+                     MAX_CONTEXT_INPUTS*
+                     MAX_QUANT_TABLE_SIZE*sizeof(int16_t);
+
+    RET(ff_vk_create_buf(s, vkb,
+                         buf_len,
+                         NULL, NULL,
+                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+                         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+                         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+    RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0));
+
+    memcpy(buf_mapped, f->quant_tables,
+           sizeof(f->quant_tables));
+
+    RET(ff_vk_unmap_buffer(s, vkb, 1));
+
+fail:
+    return err;
+}
+
+int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
+                                   FFVkBuffer *vkb, FFV1Context *f)
+{
+    int err;
+
+    uint32_t *buf_mapped;
+    size_t buf_len = 256*sizeof(int32_t);
+
+    RET(ff_vk_create_buf(s, vkb,
+                         buf_len,
+                         NULL, NULL,
+                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+                         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+                         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+    RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0));
+
+    memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len);
+
+    RET(ff_vk_unmap_buffer(s, vkb, 1));
+
+fail:
+    return err;
+}
diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h
new file mode 100644
index 0000000000..0da6dc2d33
--- /dev/null
+++ b/libavcodec/ffv1_vulkan.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FFV1_VULKAN_H
+#define AVCODEC_FFV1_VULKAN_H
+
+#include "libavutil/vulkan.h"
+#include "ffv1.h"
+
+int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s,
+                                            FFVkBuffer *vkb, FFV1Context *f);
+
+int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s,
+                                          FFVkBuffer *vkb, FFV1Context *f);
+
+int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s,
+                                     FFVkBuffer *vkb, FFV1Context *f);
+
+int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
+                                   FFVkBuffer *vkb, FFV1Context *f);
+
+typedef struct FFv1VkRCTParameters {
+    int offset;
+    uint8_t bits;
+    uint8_t planar_rgb;
+    uint8_t transparency;
+    uint8_t version;
+    uint8_t micro_version;
+    uint8_t padding[3];
+} FFv1VkRCTParameters;
+
+typedef struct FFv1VkResetParameters {
+    VkDeviceAddress slice_state;
+    uint32_t plane_state_size;
+    uint32_t context_count;
+    uint8_t codec_planes;
+    uint8_t key_frame;
+    uint8_t version;
+    uint8_t micro_version;
+    uint8_t padding[1];
+} FFv1VkResetParameters;
+
+#endif /* AVCODEC_FFV1_VULKAN_H */
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 2bbf310fce..17a93834f3 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -18,7 +18,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/crc.h"
 #include "libavutil/mem.h"
 #include "libavutil/vulkan.h"
 #include "libavutil/vulkan_spirv.h"
@@ -32,6 +31,7 @@
 
 #include "ffv1.h"
 #include "ffv1enc.h"
+#include "ffv1_vulkan.h"
 
 /* Parallel Golomb alignment */
 #define LG_ALIGN_W 32
@@ -122,28 +122,10 @@ extern const char *ff_source_ffv1_enc_setup_comp;
 extern const char *ff_source_ffv1_enc_comp;
 extern const char *ff_source_ffv1_enc_rgb_comp;
 
-typedef struct FFv1VkRCTParameters {
-    int offset;
-    uint8_t bits;
-    uint8_t planar_rgb;
-    uint8_t transparency;
-    uint8_t padding[1];
-} FFv1VkRCTParameters;
-
-typedef struct FFv1VkResetParameters {
-    VkDeviceAddress slice_state;
-    uint32_t plane_state_size;
-    uint32_t context_count;
-    uint8_t codec_planes;
-    uint8_t key_frame;
-    uint8_t padding[3];
-} FFv1VkResetParameters;
-
 typedef struct FFv1VkParameters {
     VkDeviceAddress slice_state;
     VkDeviceAddress scratch_data;
     VkDeviceAddress out_data;
-    uint64_t slice_size_max;
 
     int32_t sar[2];
     uint32_t chroma_shift[2];
@@ -151,6 +133,7 @@ typedef struct FFv1VkParameters {
     uint32_t plane_state_size;
     uint32_t context_count;
     uint32_t crcref;
+    uint32_t slice_size_max;
 
     uint8_t bits_per_raw_sample;
     uint8_t context_model;
@@ -175,7 +158,6 @@ static void add_push_data(FFVulkanShader *shd)
     GLSLC(1,    u8buf slice_state;                                            );
     GLSLC(1,    u8buf scratch_data;                                           );
     GLSLC(1,    u8buf out_data;                                               );
-    GLSLC(1,    uint64_t slice_size_max;                                      );
     GLSLC(0,                                                                  );
     GLSLC(1,    ivec2 sar;                                                    );
     GLSLC(1,    uvec2 chroma_shift;                                           );
@@ -183,6 +165,7 @@ static void add_push_data(FFVulkanShader *shd)
     GLSLC(1,    uint plane_state_size;                                        );
     GLSLC(1,    uint context_count;                                           );
     GLSLC(1,    uint32_t crcref;                                              );
+    GLSLC(1,    uint32_t slice_size_max;                                      );
     GLSLC(0,                                                                  );
     GLSLC(1,    uint8_t bits_per_raw_sample;                                  );
     GLSLC(1,    uint8_t context_model;                                        );
@@ -492,7 +475,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
         .slice_state = slice_data_buf->address + f->slice_count*256,
         .scratch_data = tmp_data_buf->address,
         .out_data = out_data_buf->address,
-        .slice_size_max = out_data_buf->size / f->slice_count,
         .bits_per_raw_sample = f->bits_per_raw_sample,
         .sar[0] = pict->sample_aspect_ratio.num,
         .sar[1] = pict->sample_aspect_ratio.den,
@@ -501,6 +483,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
         .plane_state_size = plane_state_size,
         .context_count = context_count,
         .crcref = f->crcref,
+        .slice_size_max = out_data_buf->size / f->slice_count,
         .context_model = fv->ctx.context_model,
         .version = f->version,
         .micro_version = f->micro_version,
@@ -966,7 +949,6 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
     GLSLF(0, #define TYPE int%i_t                                        ,smp_bits);
     GLSLF(0, #define VTYPE2 i%ivec2                                      ,smp_bits);
     GLSLF(0, #define VTYPE3 i%ivec3                                      ,smp_bits);
-    GLSLD(ff_source_common_comp);
     GLSLD(ff_source_rangecoder_comp);
 
     if (f->ac == AC_GOLOMB_RICE)
@@ -993,6 +975,10 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
                           1, 1, 1,
                           0));
 
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+    add_push_data(shd);
+
     av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
     av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
     av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1038,8 +1024,6 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
     };
     RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0));
 
-    add_push_data(shd);
-
     GLSLD(ff_source_ffv1_enc_setup_comp);
 
     RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1074,6 +1058,22 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
                           wg_dim, 1, 1,
                           0));
 
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
+    GLSLC(1,    u8buf slice_state;                                             );
+    GLSLC(1,    uint plane_state_size;                                         );
+    GLSLC(1,    uint context_count;                                            );
+    GLSLC(1,    uint8_t codec_planes;                                          );
+    GLSLC(1,    uint8_t key_frame;                                             );
+    GLSLC(1,    uint8_t version;                                               );
+    GLSLC(1,    uint8_t micro_version;                                         );
+    GLSLC(1,    uint8_t padding[1];                                            );
+    GLSLC(0, };                                                                );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
     av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
     av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
     av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1110,17 +1110,6 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
     };
     RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0));
 
-    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
-    GLSLC(1,    u8buf slice_state;                                             );
-    GLSLC(1,    uint plane_state_size;                                         );
-    GLSLC(1,    uint context_count;                                            );
-    GLSLC(1,    uint8_t codec_planes;                                          );
-    GLSLC(1,    uint8_t key_frame;                                             );
-    GLSLC(1,    uint8_t padding[3];                                            );
-    GLSLC(0, };                                                                );
-    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
-                                VK_SHADER_STAGE_COMPUTE_BIT);
-
     GLSLD(ff_source_ffv1_reset_comp);
 
     RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1164,6 +1153,21 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
                           wg_count, wg_count, 1,
                           0));
 
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
+    GLSLC(1,    int offset;                                                    );
+    GLSLC(1,    uint8_t bits;                                                  );
+    GLSLC(1,    uint8_t planar_rgb;                                            );
+    GLSLC(1,    uint8_t transparency;                                          );
+    GLSLC(1,    uint8_t version;                                               );
+    GLSLC(1,    uint8_t micro_version;                                         );
+    GLSLC(1,    uint8_t padding[3];                                            );
+    GLSLC(0, };                                                                );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
     av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
     av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
     av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1220,16 +1224,6 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
     };
     RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
 
-    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
-    GLSLC(1,    int offset;                                                    );
-    GLSLC(1,    uint8_t bits;                                                  );
-    GLSLC(1,    uint8_t planar_rgb;                                            );
-    GLSLC(1,    uint8_t transparency;                                          );
-    GLSLC(1,    uint8_t padding[1];                                            );
-    GLSLC(0, };                                                                );
-    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
-                                VK_SHADER_STAGE_COMPUTE_BIT);
-
     GLSLD(ff_source_ffv1_enc_rct_comp);
 
     RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1268,6 +1262,11 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
                           1, 1, 1,
                           0));
 
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    add_push_data(shd);
+
     av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
     av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
     av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1328,8 +1327,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
     };
     RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
 
-    add_push_data(shd);
-
     /* Assemble the shader body */
     GLSLD(ff_source_ffv1_enc_common_comp);
 
@@ -1356,110 +1353,6 @@ fail:
     return err;
 }
 
-static int init_state_transition_data(AVCodecContext *avctx)
-{
-    int err;
-    VulkanEncodeFFv1Context *fv = avctx->priv_data;
-
-    uint8_t *buf_mapped;
-    size_t buf_len = 512*sizeof(uint8_t);
-
-    RET(ff_vk_create_buf(&fv->s, &fv->rangecoder_static_buf,
-                         buf_len,
-                         NULL, NULL,
-                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
-                         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
-                         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
-    RET(ff_vk_map_buffer(&fv->s, &fv->rangecoder_static_buf,
-                         &buf_mapped, 0));
-
-    for (int i = 1; i < 256; i++) {
-        buf_mapped[256 + i] = fv->ctx.state_transition[i];
-        buf_mapped[256 - i] = 256 - (int)fv->ctx.state_transition[i];
-    }
-
-    RET(ff_vk_unmap_buffer(&fv->s, &fv->rangecoder_static_buf, 1));
-
-    /* Update descriptors */
-    RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
-                                        &fv->setup, 0, 0, 0,
-                                        &fv->rangecoder_static_buf,
-                                        0, fv->rangecoder_static_buf.size,
-                                        VK_FORMAT_UNDEFINED));
-    RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
-                                        &fv->enc, 0, 0, 0,
-                                        &fv->rangecoder_static_buf,
-                                        0, fv->rangecoder_static_buf.size,
-                                        VK_FORMAT_UNDEFINED));
-
-fail:
-    return err;
-}
-
-static int init_quant_table_data(AVCodecContext *avctx)
-{
-    int err;
-    VulkanEncodeFFv1Context *fv = avctx->priv_data;
-
-    int16_t *buf_mapped;
-    size_t buf_len = MAX_QUANT_TABLES*
-                     MAX_CONTEXT_INPUTS*
-                     MAX_QUANT_TABLE_SIZE*sizeof(int16_t);
-
-    RET(ff_vk_create_buf(&fv->s, &fv->quant_buf,
-                         buf_len,
-                         NULL, NULL,
-                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
-                         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
-                         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
-    RET(ff_vk_map_buffer(&fv->s, &fv->quant_buf, (void *)&buf_mapped, 0));
-
-    memcpy(buf_mapped, fv->ctx.quant_tables,
-           sizeof(fv->ctx.quant_tables));
-
-    RET(ff_vk_unmap_buffer(&fv->s, &fv->quant_buf, 1));
-    RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
-                                        &fv->enc, 0, 1, 0,
-                                        &fv->quant_buf,
-                                        0, fv->quant_buf.size,
-                                        VK_FORMAT_UNDEFINED));
-
-fail:
-    return err;
-}
-
-static int init_crc_table_data(AVCodecContext *avctx)
-{
-    int err;
-    VulkanEncodeFFv1Context *fv = avctx->priv_data;
-
-    uint32_t *buf_mapped;
-    size_t buf_len = 256*sizeof(int32_t);
-
-    RET(ff_vk_create_buf(&fv->s, &fv->crc_tab_buf,
-                         buf_len,
-                         NULL, NULL,
-                         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
-                         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
-                         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
-    RET(ff_vk_map_buffer(&fv->s, &fv->crc_tab_buf, (void *)&buf_mapped, 0));
-
-    memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len);
-
-    RET(ff_vk_unmap_buffer(&fv->s, &fv->crc_tab_buf, 1));
-    RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
-                                        &fv->enc, 0, 2, 0,
-                                        &fv->crc_tab_buf,
-                                        0, fv->crc_tab_buf.size,
-                                        VK_FORMAT_UNDEFINED));
-
-fail:
-    return err;
-}
-
 static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
 {
     int err;
@@ -1703,20 +1596,50 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
     spv->uninit(&spv);
 
     /* Range coder data */
-    err = init_state_transition_data(avctx);
+    err = ff_ffv1_vk_init_state_transition_data(&fv->s,
+                                                &fv->rangecoder_static_buf,
+                                                f);
     if (err < 0)
         return err;
 
     /* Quantization table data */
-    err = init_quant_table_data(avctx);
+    err = ff_ffv1_vk_init_quant_table_data(&fv->s,
+                                           &fv->quant_buf,
+                                           f);
     if (err < 0)
         return err;
 
     /* CRC table buffer */
-    err = init_crc_table_data(avctx);
+    err = ff_ffv1_vk_init_crc_table_data(&fv->s,
+                                         &fv->crc_tab_buf,
+                                         f);
     if (err < 0)
         return err;
 
+    /* Update setup global descriptors */
+    RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+                                        &fv->setup, 0, 0, 0,
+                                        &fv->rangecoder_static_buf,
+                                        0, fv->rangecoder_static_buf.size,
+                                        VK_FORMAT_UNDEFINED));
+
+    /* Update encode global descriptors */
+    RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+                                        &fv->enc, 0, 0, 0,
+                                        &fv->rangecoder_static_buf,
+                                        0, fv->rangecoder_static_buf.size,
+                                        VK_FORMAT_UNDEFINED));
+    RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+                                        &fv->enc, 0, 1, 0,
+                                        &fv->quant_buf,
+                                        0, fv->quant_buf.size,
+                                        VK_FORMAT_UNDEFINED));
+    RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+                                        &fv->enc, 0, 2, 0,
+                                        &fv->crc_tab_buf,
+                                        0, fv->crc_tab_buf.size,
+                                        VK_FORMAT_UNDEFINED));
+
     /* Temporary frame */
     fv->frame = av_frame_alloc();
     if (!fv->frame)
@@ -1735,7 +1658,8 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
     if (!fv->buf_regions)
         return AVERROR(ENOMEM);
 
-    return 0;
+fail:
+    return err;
 }
 
 static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
index 5b4a882367..604d03b2de 100644
--- a/libavcodec/vulkan/ffv1_common.comp
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -22,17 +22,18 @@
 
 struct SliceContext {
     RangeCoder c;
-
-#ifdef GOLOMB
     PutBitContext pb; /* 8*8 bytes */
-#endif
 
     ivec2 slice_dim;
     ivec2 slice_pos;
     ivec2 slice_rct_coef;
+    u8vec4 quant_table_idx;
+    uint context_count;
 
     uint hdr_len; // only used for golomb
-    int slice_coding_mode;
+
+    uint slice_coding_mode;
+    bool slice_reset_contexts;
 };
 
 /* -1, { -1, 0 } */
@@ -72,3 +73,18 @@ const uint32_t log2_run[41] = {
     16, 17, 18, 19, 20, 21, 22, 23,
     24,
 };
+
+uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
+{
+    uint mpw = 1 << chroma_shift;
+    uint awidth = align(width, mpw);
+
+    if ((version < 4) || ((version == 4) && (micro_version < 3)))
+        return width * sx / num_h_slices;
+
+    sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
+    if (sx == awidth)
+        sx = width;
+
+    return sx;
+}
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp
index b861e25f74..23f09b2af6 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp
@@ -20,21 +20,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
-{
-    uint mpw = 1 << chroma_shift;
-    uint awidth = align(width, mpw);
-
-    if ((version < 4) || ((version == 4) && (micro_version < 3)))
-        return width * sx / num_h_slices;
-
-    sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
-    if (sx == awidth)
-        sx = width;
-
-    return sx;
-}
-
 void init_slice(out SliceContext sc, const uint slice_idx)
 {
     /* Set coordinates */
@@ -52,6 +37,7 @@ void init_slice(out SliceContext sc, const uint slice_idx)
     sc.slice_dim = ivec2(sxe - sxs, sye - sys);
     sc.slice_rct_coef = ivec2(1, 1);
     sc.slice_coding_mode = int(force_pcm == 1);
+    sc.slice_reset_contexts = sc.slice_coding_mode == 1;
 
     rac_init(sc.c,
              OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
@@ -105,7 +91,7 @@ void write_slice_header(inout SliceContext sc, uint64_t state)
     put_symbol_unsigned(sc.c, state, sar.y);
 
     if (version >= 4) {
-        put_rac_full(sc.c, state, sc.slice_coding_mode == 1);
+        put_rac_full(sc.c, state, sc.slice_reset_contexts);
         put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
         if (sc.slice_coding_mode != 1 && colorspace == 1) {
             put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y);
diff --git a/libavcodec/vulkan/ffv1_reset.comp b/libavcodec/vulkan/ffv1_reset.comp
index c7c7962850..1b87ca754e 100644
--- a/libavcodec/vulkan/ffv1_reset.comp
+++ b/libavcodec/vulkan/ffv1_reset.comp
@@ -24,7 +24,8 @@ void main(void)
 {
     const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
 
-    if (slice_ctx[slice_idx].slice_coding_mode == 0 && key_frame == 0)
+    if (key_frame == 0 &&
+        slice_ctx[slice_idx].slice_reset_contexts == false)
         return;
 
     uint64_t slice_state_off = uint64_t(slice_state) +
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 848a056fb1..6e3b9c1238 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -21,8 +21,9 @@
  */
 
 struct RangeCoder {
-    u8buf bytestream_start;
-    u8buf bytestream;
+    uint64_t bytestream_start;
+    uint64_t bytestream;
+    uint64_t bytestream_end;
 
     uint low;
     uint16_t range;
@@ -34,28 +35,29 @@ struct RangeCoder {
 void renorm_encoder_full(inout RangeCoder c)
 {
     int bs_cnt = 0;
+    u8buf bytestream = u8buf(c.bytestream);
 
     if (c.outstanding_byte == 0xFF) {
         c.outstanding_byte = uint8_t(c.low >> 8);
     } else if (c.low <= 0xFF00) {
-        c.bytestream[bs_cnt++].v = c.outstanding_byte;
+        bytestream[bs_cnt++].v = c.outstanding_byte;
         uint16_t cnt = c.outstanding_count;
         for (; cnt > 0; cnt--)
-            c.bytestream[bs_cnt++].v = uint8_t(0xFF);
+            bytestream[bs_cnt++].v = uint8_t(0xFF);
         c.outstanding_count = uint16_t(0);
         c.outstanding_byte = uint8_t(c.low >> 8);
     } else if (c.low >= 0x10000) {
-        c.bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1);
+        bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1);
         uint16_t cnt = c.outstanding_count;
         for (; cnt > 0; cnt--)
-            c.bytestream[bs_cnt++].v = uint8_t(0x00);
+            bytestream[bs_cnt++].v = uint8_t(0x00);
         c.outstanding_count = uint16_t(0);
         c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8));
     } else {
         c.outstanding_count++;
     }
 
-    c.bytestream = OFFBUF(u8buf, c.bytestream, bs_cnt);
+    c.bytestream += bs_cnt;
     c.range <<= 8;
     c.low = bitfieldInsert(0, c.low, 8, 8);
 }
@@ -74,10 +76,10 @@ void renorm_encoder(inout RangeCoder c)
         return;
     }
 
-    u8buf bs = c.bytestream;
+    u8buf bs = u8buf(c.bytestream);
     uint8_t outstanding_byte = c.outstanding_byte;
 
-    c.bytestream        = OFFBUF(u8buf, bs, oc);
+    c.bytestream        = uint64_t(bs) + oc;
     c.outstanding_count = uint16_t(0);
     c.outstanding_byte  = uint8_t(low >> 8);
 
@@ -179,10 +181,11 @@ uint32_t rac_terminate(inout RangeCoder c)
     return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start));
 }
 
-void rac_init(out RangeCoder r, u8buf data, uint64_t buf_size)
+void rac_init(out RangeCoder r, u8buf data, uint buf_size)
 {
-    r.bytestream_start = data;
-    r.bytestream = data;
+    r.bytestream_start = uint64_t(data);
+    r.bytestream = uint64_t(data);
+    r.bytestream_end = uint64_t(data) + buf_size;
     r.low = 0;
     r.range = uint16_t(0xFF00);
     r.outstanding_count = uint16_t(0);
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
                   ` (6 preceding siblings ...)
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavcodec/ffv1_vulkan.h            |  1 +
 libavcodec/ffv1enc_vulkan.c         |  2 ++
 libavcodec/vulkan/ffv1_enc_rct.comp | 17 ++++++-------
 libavutil/vulkan.c                  | 38 +++++++++++++++++++++++++++++
 libavutil/vulkan.h                  |  6 +++++
 5 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h
index 0da6dc2d33..599afae66e 100644
--- a/libavcodec/ffv1_vulkan.h
+++ b/libavcodec/ffv1_vulkan.h
@@ -37,6 +37,7 @@ int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
                                    FFVkBuffer *vkb, FFV1Context *f);
 
 typedef struct FFv1VkRCTParameters {
+    int fmt_lut[4];
     int offset;
     uint8_t bits;
     uint8_t planar_rgb;
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 17a93834f3..f8fe3bec1a 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -264,6 +264,7 @@ static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec,
                       (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1),
         .transparency = f->transparency,
     };
+    ff_vk_set_perm(src_hwfc->sw_format, pd.fmt_lut);
     ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct,
                                    VK_SHADER_STAGE_COMPUTE_BIT,
                                    0, sizeof(pd), &pd);
@@ -1157,6 +1158,7 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
     GLSLD(ff_source_common_comp);
 
     GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
+    GLSLC(1,    ivec4 fmt_lut;                                                 );
     GLSLC(1,    int offset;                                                    );
     GLSLC(1,    uint8_t bits;                                                  );
     GLSLC(1,    uint8_t planar_rgb;                                            );
diff --git a/libavcodec/vulkan/ffv1_enc_rct.comp b/libavcodec/vulkan/ffv1_enc_rct.comp
index a615381c90..b611f4be98 100644
--- a/libavcodec/vulkan/ffv1_enc_rct.comp
+++ b/libavcodec/vulkan/ffv1_enc_rct.comp
@@ -22,17 +22,14 @@
 
 ivec4 load_components(ivec2 pos)
 {
-    if (planar_rgb == 0)
-        return ivec4(imageLoad(src[0], pos));
+    ivec4 pix = ivec4(imageLoad(src[0], pos));
+    if (planar_rgb != 0) {
+        for (int i = 1; i < (3 + transparency); i++)
+            pix[i] = int(imageLoad(src[i], pos)[0]);
+    }
 
-    ivec4 pix;
-    for (int i = 0; i < (3 + transparency); i++)
-        pix[i] = int(imageLoad(src[i], pos)[0]);
-
-    /* Swizzle out the difference */
-    if (bits > 8 && bits < 16 && transparency == 0)
-        return pix.bgra;
-    return pix.brga;
+    return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+                 pix[fmt_lut[2]], pix[fmt_lut[3]]);
 }
 
 void bypass_sample(ivec2 pos)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7b0f77b076..24af8d5753 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1478,6 +1478,44 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
     return 0;
 }
 
+void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4])
+{
+    switch (pix_fmt) {
+    case AV_PIX_FMT_BGRA:
+    case AV_PIX_FMT_BGR0:
+    case AV_PIX_FMT_BGR565:
+    case AV_PIX_FMT_X2BGR10:
+        lut[0] = 2;
+        lut[1] = 1;
+        lut[2] = 0;
+        lut[3] = 3;
+        return;
+    case AV_PIX_FMT_GBRAP:
+    case AV_PIX_FMT_GBRP:
+    case AV_PIX_FMT_GBRAP10:
+    case AV_PIX_FMT_GBRAP12:
+    case AV_PIX_FMT_GBRAP14:
+    case AV_PIX_FMT_GBRAP16:
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRP14:
+    case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GBRPF32:
+    case AV_PIX_FMT_GBRAPF32:
+        lut[0] = 1;
+        lut[1] = 0;
+        lut[2] = 2;
+        lut[3] = 3;
+        return;
+    default:
+        lut[0] = 0;
+        lut[1] = 1;
+        lut[2] = 2;
+        lut[3] = 3;
+        return;
+    }
+}
+
 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt,
                                  enum FFVkShaderRepFormat rep_fmt)
 {
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 89fc4eedc5..2a2a5916a5 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -371,6 +371,12 @@ const char *ff_vk_ret2str(VkResult res);
  */
 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
 
+/**
+ * Since storage images may not be swizzled, we have to do this in the
+ * shader itself. This fills in a lookup table to do it.
+ */
+void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4]);
+
 /**
  * Get the aspect flag for a plane from an image.
  */
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
                   ` (7 preceding siblings ...)
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
  9 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

This commit adds support for hardware accelerated decoding to
the decoder.
The previous commits already refactored the decoder, this commit
simply adds calls to hooks to decode.
---
 libavcodec/ffv1.h    |  2 ++
 libavcodec/ffv1dec.c | 81 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index c23d64d54a..8c0e71284d 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -125,8 +125,10 @@ typedef struct FFV1Context {
     int64_t picture_number;
     int key_frame;
     ProgressFrame picture, last_picture;
+    void *hwaccel_picture_private, *hwaccel_last_picture_private;
     uint32_t crcref;
     enum AVPixelFormat pix_fmt;
+    enum AVPixelFormat configured_pix_fmt;
 
     const AVFrame *cur_enc_frame;
     int plane_count;
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 5e9a765e38..eaa21eebdf 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -41,6 +41,9 @@
 #include "libavutil/refstruct.h"
 #include "thread.h"
 #include "decode.h"
+#include "hwconfig.h"
+#include "hwaccel_internal.h"
+#include "config_components.h"
 
 static inline int get_vlc_symbol(GetBitContext *gb, VlcState *const state,
                                  int bits)
@@ -365,9 +368,12 @@ static int read_header(FFV1Context *f, RangeCoder *c)
     if (ret < 0)
         return ret;
 
-    f->avctx->pix_fmt = get_pixel_format(f);
-    if (f->avctx->pix_fmt < 0)
-        return AVERROR(EINVAL);
+    if (f->configured_pix_fmt != f->pix_fmt) {
+        f->avctx->pix_fmt = get_pixel_format(f);
+        if (f->avctx->pix_fmt < 0)
+            return AVERROR(EINVAL);
+        f->configured_pix_fmt = f->pix_fmt;
+    }
 
     ff_dlog(f->avctx, "%d %d %d\n",
             f->chroma_h_shift, f->chroma_v_shift, f->pix_fmt);
@@ -460,6 +466,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
     FFV1Context *f = avctx->priv_data;
     int ret;
 
+    f->pix_fmt = AV_PIX_FMT_NONE;
+    f->configured_pix_fmt = AV_PIX_FMT_NONE;
+
     if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
         return ret;
 
@@ -644,13 +653,16 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
     FFV1Context *f      = avctx->priv_data;
     int ret;
     AVFrame *p;
+    const FFHWAccel *hwaccel = NULL;
 
     /* This is copied onto the first slice's range coder context */
     RangeCoder c;
 
     ff_progress_frame_unref(&f->last_picture);
-    FFSWAP(ProgressFrame, f->picture, f->last_picture);
+    av_refstruct_unref(&f->hwaccel_last_picture_private);
 
+    FFSWAP(ProgressFrame, f->picture, f->last_picture);
+    FFSWAP(void *, f->hwaccel_picture_private, f->hwaccel_last_picture_private);
 
     f->avctx = avctx;
     f->frame_damaged = 0;
@@ -659,11 +671,18 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
     if (ret < 0)
         return ret;
 
+    if (avctx->hwaccel)
+        hwaccel = ffhwaccel(avctx->hwaccel);
+
     ret = ff_progress_frame_get_buffer(avctx, &f->picture,
                                        AV_GET_BUFFER_FLAG_REF);
     if (ret < 0)
         return ret;
 
+    ret = ff_hwaccel_frame_priv_alloc(avctx, &f->hwaccel_picture_private);
+    if (ret < 0)
+        return ret;
+
     p = f->picture.f;
 
     p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P
@@ -680,15 +699,53 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
         av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n",
                f->version, !!(p->flags & AV_FRAME_FLAG_KEY), f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample);
 
+    /* Start */
+    if (hwaccel) {
+        ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
+        if (ret < 0)
+            return ret;
+    }
+
     ff_thread_finish_setup(avctx);
 
-    ret = decode_slices(avctx, c, avpkt);
-    if (ret < 0)
-        return ret;
+    /* Decode slices */
+    if (hwaccel) {
+        uint8_t *buf_end = avpkt->data + avpkt->size;
+
+        if (!(p->flags & AV_FRAME_FLAG_KEY) && f->last_picture.f)
+            ff_progress_frame_await(&f->last_picture, f->slice_count - 1);
+
+        for (int i = f->slice_count - 1; i >= 0; i--) {
+            uint8_t *pos;
+            uint32_t len;
+            ret = find_next_slice(avctx, avpkt->data, buf_end, i,
+                                  &pos, &len);
+            if (ret < 0)
+                return ret;
+
+            buf_end -= len;
+
+            ret = hwaccel->decode_slice(avctx, pos, len);
+            if (ret < 0)
+                return ret;
+        }
+    } else {
+        ret = decode_slices(avctx, c, avpkt);
+        if (ret < 0)
+            return ret;
+    }
+
+    /* Finalize */
+    if (hwaccel) {
+        ret = hwaccel->end_frame(avctx);
+        if (ret < 0)
+            return ret;
+    }
 
     ff_progress_frame_report(&f->picture, INT_MAX);
 
     ff_progress_frame_unref(&f->last_picture);
+    av_refstruct_unref(&f->hwaccel_last_picture_private);
     if ((ret = av_frame_ref(rframe, f->picture.f)) < 0)
         return ret;
 
@@ -717,6 +774,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
     fdst->ac                  = fsrc->ac;
     fdst->colorspace          = fsrc->colorspace;
     fdst->pix_fmt             = fsrc->pix_fmt;
+    fdst->configured_pix_fmt  = fsrc->configured_pix_fmt;
 
     fdst->ec                  = fsrc->ec;
     fdst->intra               = fsrc->intra;
@@ -752,6 +810,8 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
     av_assert1(fdst->max_slice_count == fsrc->max_slice_count);
 
     ff_progress_frame_replace(&fdst->picture, &fsrc->picture);
+    av_refstruct_replace(&fdst->hwaccel_picture_private,
+                         fsrc->hwaccel_picture_private);
 
     return 0;
 }
@@ -762,8 +822,10 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
     FFV1Context *const s = avctx->priv_data;
 
     ff_progress_frame_unref(&s->picture);
+    av_refstruct_unref(&s->hwaccel_picture_private);
+
     ff_progress_frame_unref(&s->last_picture);
-    av_freep(&avctx->stats_out);
+    av_refstruct_unref(&s->hwaccel_last_picture_private);
 
     ff_ffv1_close(s);
 
@@ -784,4 +846,7 @@ const FFCodec ff_ffv1_decoder = {
                       AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_USES_PROGRESSFRAMES,
+    .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+        NULL
+    },
 };
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
  2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
                   ` (8 preceding siblings ...)
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels Lynne
@ 2025-03-10  3:08 ` Lynne
  2025-03-10  3:08   ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
                     ` (2 more replies)
  9 siblings, 3 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

---
 libavcodec/ffv1.h    |  3 +++
 libavcodec/ffv1dec.c | 19 +++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 8c0e71284d..860a5c14b1 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -174,6 +174,9 @@ typedef struct FFV1Context {
      * NOT shared between frame threads.
      */
     uint8_t           frame_damaged;
+
+    /* Reference to the current packet */
+    AVPacket *pkt_ref;
 } FFV1Context;
 
 int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index eaa21eebdf..6396f22f79 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
     f->pix_fmt = AV_PIX_FMT_NONE;
     f->configured_pix_fmt = AV_PIX_FMT_NONE;
 
+    f->pkt_ref = av_packet_alloc();
+    if (!f->pkt_ref)
+        return AVERROR(ENOMEM);
+
     if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
         return ret;
 
@@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
 
     /* Start */
     if (hwaccel) {
+        ret = av_packet_ref(f->pkt_ref, avpkt);
+        if (ret < 0)
+            return ret;
+
         ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
         if (ret < 0)
             return ret;
@@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
             uint32_t len;
             ret = find_next_slice(avctx, avpkt->data, buf_end, i,
                                   &pos, &len);
-            if (ret < 0)
+            if (ret < 0) {
+                av_packet_unref(f->pkt_ref);
                 return ret;
+            }
 
             buf_end -= len;
 
             ret = hwaccel->decode_slice(avctx, pos, len);
-            if (ret < 0)
+            if (ret < 0) {
+                av_packet_unref(f->pkt_ref);
                 return ret;
+            }
         }
+
+        av_packet_unref(f->pkt_ref);
     } else {
         ret = decode_slices(avctx, c, avpkt);
         if (ret < 0)
@@ -827,6 +841,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
     ff_progress_frame_unref(&s->last_picture);
     av_refstruct_unref(&s->hwaccel_last_picture_private);
 
+    av_packet_free(&s->pkt_ref);
     ff_ffv1_close(s);
 
     return 0;
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem()
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
@ 2025-03-10  3:08   ` Lynne
  2025-03-10  3:08   ` [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder Lynne
  2025-03-10  3:14   ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt
  2 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

This adds a function which adds a regular timeline semaphore
as a wait-only dependency.
---
 libavutil/vulkan.c | 28 ++++++++++++++++++++--------
 libavutil/vulkan.h |  3 +++
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 24af8d5753..085c8b6d4d 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -620,6 +620,23 @@ static void destroy_tmp_semaphores(void *opaque, uint8_t *data)
     av_free(ts);
 }
 
+int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e,
+                                VkSemaphore sem, uint64_t val,
+                                VkPipelineStageFlagBits2 stage)
+{
+    VkSemaphoreSubmitInfo *sem_wait;
+    ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
+
+    e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+        .semaphore = sem,
+        .value = val,
+        .stageMask = stage,
+    };
+
+    return 0;
+}
+
 int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
                                 VkSemaphore *sem, int nb,
                                 VkPipelineStageFlagBits2 stage,
@@ -672,14 +689,9 @@ int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
     }
 
     for (int i = 0; i < nb; i++) {
-        VkSemaphoreSubmitInfo *sem_wait;
-        ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
-
-        e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
-            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
-            .semaphore = sem[i],
-            .stageMask = stage,
-        };
+        err = ff_vk_exec_add_dep_wait_sem(s, e, sem[i], 0, stage);
+        if (err < 0)
+            return err;
     }
 
     return 0;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 2a2a5916a5..de84d6e10a 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -456,6 +456,9 @@ void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
  */
 int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
                            AVBufferRef **deps, int nb_deps, int ref);
+int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e,
+                                VkSemaphore sem, uint64_t val,
+                                VkPipelineStageFlagBits2 stage);
 int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
                                 VkSemaphore *sem, int nb,
                                 VkPipelineStageFlagBits2 stage,
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
  2025-03-10  3:08   ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
@ 2025-03-10  3:08   ` Lynne
  2025-03-10  3:14   ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt
  2 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-10  3:08 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Lynne

This patch adds a fully-featured level 3 and 4 decoder for FFv1,
supporting Golomb and all Range coding variants, all pixel formats,
and all features, except for the newly added floating-point formats.

On a 6000 Ada, for 3840x2160 bgr0 content at 50Mbps (standard desktop
recording), it is able to do 400fps.
An Alder Lake with 24 threads can barely do 100fps.
---
 configure                             |    2 +
 libavcodec/Makefile                   |    1 +
 libavcodec/ffv1dec.c                  |    6 +
 libavcodec/hwaccels.h                 |    1 +
 libavcodec/vulkan/Makefile            |    6 +
 libavcodec/vulkan/common.comp         |   95 ++
 libavcodec/vulkan/ffv1_common.comp    |    5 +
 libavcodec/vulkan/ffv1_dec.comp       |  303 ++++++
 libavcodec/vulkan/ffv1_dec_rct.comp   |   72 ++
 libavcodec/vulkan/ffv1_dec_setup.comp |  138 +++
 libavcodec/vulkan/ffv1_rct.comp       |   90 ++
 libavcodec/vulkan/ffv1_vlc.comp       |   37 +
 libavcodec/vulkan/rangecoder.comp     |   74 ++
 libavcodec/vulkan_decode.c            |   17 +
 libavcodec/vulkan_ffv1.c              | 1292 +++++++++++++++++++++++++
 15 files changed, 2139 insertions(+)
 create mode 100644 libavcodec/vulkan/ffv1_dec.comp
 create mode 100644 libavcodec/vulkan/ffv1_dec_rct.comp
 create mode 100644 libavcodec/vulkan/ffv1_dec_setup.comp
 create mode 100644 libavcodec/vulkan/ffv1_rct.comp
 create mode 100644 libavcodec/vulkan_ffv1.c

diff --git a/configure b/configure
index 04b83a8868..fbee82f920 100755
--- a/configure
+++ b/configure
@@ -3195,6 +3195,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox"
 av1_videotoolbox_hwaccel_select="av1_decoder"
 av1_vulkan_hwaccel_deps="vulkan"
 av1_vulkan_hwaccel_select="av1_decoder"
+ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler"
+ffv1_vulkan_hwaccel_select="ffv1_decoder"
 h263_vaapi_hwaccel_deps="vaapi"
 h263_vaapi_hwaccel_select="h263_decoder"
 h263_videotoolbox_hwaccel_deps="videotoolbox"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 74de7737f9..eb91cbb5ce 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1017,6 +1017,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL)          += vaapi_av1.o
 OBJS-$(CONFIG_AV1_VDPAU_HWACCEL)          += vdpau_av1.o
 OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL)   += videotoolbox_av1.o
 OBJS-$(CONFIG_AV1_VULKAN_HWACCEL)         += vulkan_decode.o vulkan_av1.o
+OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL)        += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o
 OBJS-$(CONFIG_H263_VAAPI_HWACCEL)         += vaapi_mpeg4.o
 OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_H264_D3D11VA_HWACCEL)       += dxva2_h264.o
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 6396f22f79..7b0740ad37 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -349,6 +349,9 @@ static int decode_slice(AVCodecContext *c, void *arg)
 static enum AVPixelFormat get_pixel_format(FFV1Context *f)
 {
     enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_FFV1_VULKAN_HWACCEL
+        AV_PIX_FMT_VULKAN,
+#endif
         f->pix_fmt,
         AV_PIX_FMT_NONE,
     };
@@ -862,6 +865,9 @@ const FFCodec ff_ffv1_decoder = {
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_USES_PROGRESSFRAMES,
     .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_FFV1_VULKAN_HWACCEL
+        HWACCEL_VULKAN(ffv1),
+#endif
         NULL
     },
 };
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 910a024032..0b2c725247 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel;
 extern const struct FFHWAccel ff_av1_vdpau_hwaccel;
 extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_av1_vulkan_hwaccel;
+extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel;
 extern const struct FFHWAccel ff_h263_vaapi_hwaccel;
 extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_h264_d3d11va_hwaccel;
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 351332ee44..e6bad486bd 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -11,6 +11,12 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER)  +=  vulkan/common.o \
 					vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \
 					vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o
 
+OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL)  +=  vulkan/common.o \
+					vulkan/rangecoder.o vulkan/ffv1_vlc.o \
+					vulkan/ffv1_common.o vulkan/ffv1_reset.o \
+					vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o \
+					vulkan/ffv1_dec_rct.o
+
 VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
 .SECONDARY: $(VULKAN:.comp=.c)
 libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index e4e983b3e2..b0adf8590e 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -26,6 +26,10 @@ layout(buffer_reference, buffer_reference_align = 1) buffer u8vec2buf {
     u8vec2 v;
 };
 
+layout(buffer_reference, buffer_reference_align = 1) buffer u8vec4buf {
+    u8vec4 v;
+};
+
 layout(buffer_reference, buffer_reference_align = 2) buffer u16buf {
     uint16_t v;
 };
@@ -182,3 +186,94 @@ uint32_t put_bytes_count(in PutBitContext pb)
     uint64_t num_bytes = (pb.buf - pb.buf_start) + ((BUF_BITS - pb.bit_left) >> 3);
     return uint32_t(num_bytes);
 }
+
+struct GetBitContext {
+    uint64_t buf_start;
+    uint64_t buf;
+    uint64_t buf_end;
+
+    uint64_t bits;
+    uint bits_valid;
+    uint size_in_bits;
+};
+
+#define LOAD64()                                       \
+    {                                                  \
+        u8vec4buf ptr = u8vec4buf(gb.buf);             \
+        uint32_t rf1 = pack32((ptr[0].v).wzyx);        \
+        uint32_t rf2 = pack32((ptr[1].v).wzyx);        \
+        gb.buf += 8;                                   \
+        gb.bits = uint64_t(rf1) << 32 | uint64_t(rf2); \
+        gb.bits_valid = 64;                            \
+    }
+
+#define RELOAD32()                                          \
+    {                                                       \
+        u8vec4buf ptr = u8vec4buf(gb.buf);                  \
+        uint32_t rf = pack32((ptr[0].v).wzyx);              \
+        gb.buf += 4;                                        \
+        gb.bits = uint64_t(rf) << (32 - gb.bits_valid) | gb.bits;    \
+        gb.bits_valid += 32;                                \
+    }
+
+void init_get_bits(inout GetBitContext gb, u8buf data, uint64_t len)
+{
+    gb.buf = gb.buf_start = uint64_t(data);
+    gb.buf_end = uint64_t(data) + len;
+    gb.size_in_bits = uint(len) * 8;
+
+    /* Preload */
+    LOAD64()
+}
+
+bool get_bit(inout GetBitContext gb)
+{
+    if (gb.bits_valid == 0)
+        LOAD64()
+
+    bool val = bool(gb.bits >> (64 - 1));
+    gb.bits <<= 1;
+    gb.bits_valid--;
+    return val;
+}
+
+uint get_bits(inout GetBitContext gb, uint n)
+{
+    if (n == 0)
+        return 0;
+
+    if (n > gb.bits_valid)
+        RELOAD32()
+
+    uint val = uint(gb.bits >> (64 - n));
+    gb.bits <<= n;
+    gb.bits_valid -= n;
+    return val;
+}
+
+uint show_bits(inout GetBitContext gb, uint n)
+{
+    if (n > gb.bits_valid)
+        RELOAD32()
+
+    return uint(gb.bits >> (64 - n));
+}
+
+void skip_bits(inout GetBitContext gb, uint n)
+{
+    if (n > gb.bits_valid)
+        RELOAD32()
+
+    gb.bits <<= n;
+    gb.bits_valid -= n;
+}
+
+uint tell_bits(in GetBitContext gb)
+{
+    return uint(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
+}
+
+uint left_bits(in GetBitContext gb)
+{
+    return gb.size_in_bits - uint(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
+}
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
index 604d03b2de..d2bd7e736e 100644
--- a/libavcodec/vulkan/ffv1_common.comp
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -22,7 +22,12 @@
 
 struct SliceContext {
     RangeCoder c;
+
+#if !defined(DECODE)
     PutBitContext pb; /* 8*8 bytes */
+#else
+    GetBitContext gb;
+#endif
 
     ivec2 slice_dim;
     ivec2 slice_pos;
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
new file mode 100644
index 0000000000..a9feb9d318
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -0,0 +1,303 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec2 get_pred(ivec2 pos, ivec2 off, int p, int comp, int sw,
+               uint8_t context_model)
+{
+    const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+    const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+
+    TYPE top2 = TYPE(0);
+    if (off.y > 1)
+        top2 = TYPE(imageLoad(dst[p], pos + ivec2(0, -2))[comp]);
+
+    VTYPE3 top  = VTYPE3(TYPE(0),
+                         TYPE(0),
+                         TYPE(0));
+    if (off.y > 0 && off != ivec2(0, 1))
+        top[0] = TYPE(imageLoad(dst[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
+    if (off.y > 0) {
+        top[1] = TYPE(imageLoad(dst[p], pos + ivec2(0, -1))[comp]);
+        top[2] = TYPE(imageLoad(dst[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+    }
+
+    VTYPE2 cur = VTYPE2(TYPE(0),
+                        TYPE(0));
+    if (off.x > 0 && off != ivec2(1, 0))
+        cur[0] = TYPE(imageLoad(dst[p], pos + ivec2(-2,  0) + yoff_border2)[comp]);
+    if (off != ivec2(0, 0))
+        cur[1] = TYPE(imageLoad(dst[p], pos + ivec2(-1,  0) + yoff_border1)[comp]);
+
+    /* context, prediction */
+    return ivec2(get_context(cur, top, top2, context_model),
+                 predict(cur[1], VTYPE2(top)));
+}
+
+void store_comp(ivec2 pos, int p, int comp, uint v)
+{
+#ifdef RGB
+    uvec4 pix = imageLoad(dst[p], pos);
+    pix[comp] = v;
+    imageStore(dst[p], pos, pix);
+#else
+    imageStore(dst[p], pos, uvec4(v));
+#endif
+}
+
+#ifndef GOLOMB
+int get_isymbol(inout RangeCoder c, uint64_t state)
+{
+    if (get_rac(c, state))
+        return 0;
+
+    state += 1;
+
+    int e = 0;
+    while (get_rac(c, state + min(e, 9))) { // 1..10
+        e++;
+        if (e > 31) {
+            corrupt = true;
+            return 0;
+        }
+    }
+
+    state += 21;
+
+    int a = 1;
+    for (int i = e - 1; i >= 0; i--)
+        a += a + int(get_rac(c, state + min(i, 9)));  // 22..31
+
+    e = -int(get_rac(c, state - 11 + min(e, 10))); // 11..21 sign
+    return (a ^ e) - e;
+}
+
+void decode_line_pcm(inout SliceContext sc, int y, int p, int comp,
+                     int bits)
+{
+    ivec2 sp = sc.slice_pos;
+    int w = sc.slice_dim.x;
+
+#ifndef RGB
+    if (p > 0 && p < 3) {
+        w >>= chroma_shift.x;
+        sp >>= chroma_shift;
+    }
+#endif
+
+    for (int x = 0; x < w; x++) {
+        uint v = 0;
+        for (int i = (bits - 1); i >= 0; i--)
+            v |= uint(get_rac_equi(sc.c)) << i;
+
+        store_comp(sp + ivec2(x, y), p, comp, v);
+    }
+}
+
+void decode_line(inout SliceContext sc, uint64_t state,
+                 int y, int p, int comp, int bits, const int run_index)
+{
+    ivec2 sp = sc.slice_pos;
+    int w = sc.slice_dim.x;
+
+#ifndef RGB
+    if (p > 0 && p < 3) {
+        w >>= chroma_shift.x;
+        sp >>= chroma_shift;
+    }
+#endif
+
+    for (int x = 0; x < w; x++) {
+        ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w,
+                            sc.quant_table_idx[p]);
+
+        int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*abs(pr[0]));
+        if (pr[0] < 0)
+            diff = -diff;
+
+        uint v = zero_extend(pr[1] + diff, bits);
+        store_comp(sp + ivec2(x, y), p, comp, v);
+    }
+}
+
+#else /* GOLOMB */
+
+void decode_line(inout SliceContext sc, uint64_t state,
+                 int y, int p, int comp, int bits, inout int run_index)
+{
+    ivec2 sp = sc.slice_pos;
+    int w = sc.slice_dim.x;
+
+#ifndef RGB
+    if (p > 0 && p < 3) {
+        w >>= chroma_shift.x;
+        sp >>= chroma_shift;
+    }
+#endif
+
+    int run_count = 0;
+    int run_mode  = 0;
+
+    for (int x = 0; x < w; x++) {
+        ivec2 pos = sp + ivec2(x, y);
+        int diff;
+        ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w,
+                            sc.quant_table_idx[p]);
+
+        VlcState sb = VlcState(state + VLC_STATE_SIZE*abs(pr[0]));
+
+        if (pr[0] == 0 && run_mode == 0)
+            run_mode = 1;
+
+        if (run_mode != 0) {
+            if (run_count == 0 && run_mode == 1) {
+                int tmp_idx = int(log2_run[run_index]);
+                if (get_bit(sc.gb)) {
+                    run_count = 1 << tmp_idx;
+                    if (x + run_count <= w)
+                        run_index++;
+                } else {
+                    if (tmp_idx != 0) {
+                        run_count = int(get_bits(sc.gb, tmp_idx));
+                    } else
+                        run_count = 0;
+
+                    if (run_index != 0)
+                        run_index--;
+                    run_mode = 2;
+                }
+            }
+
+            run_count--;
+            if (run_count < 0) {
+                run_mode  = 0;
+                run_count = 0;
+                diff = read_vlc_symbol(sc.gb, sb, bits);
+                if (diff >= 0)
+                    diff++;
+            } else {
+                diff = 0;
+            }
+        } else {
+            diff = read_vlc_symbol(sc.gb, sb, bits);
+        }
+
+        if (pr[0] < 0)
+            diff = -diff;
+
+        uint v = zero_extend(pr[1] + diff, bits);
+        store_comp(sp + ivec2(x, y), p, comp, v);
+    }
+}
+#endif
+
+void decode_slice(inout SliceContext sc, const uint slice_idx)
+{
+    int run_index = 0;
+
+#ifndef RGB
+    int bits = bits_per_raw_sample;
+#else
+    int bits = 9;
+    if (bits != 8 || sc.slice_coding_mode != 0)
+        bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
+#endif
+
+    /* PCM coding */
+#ifndef GOLOMB
+    if (sc.slice_coding_mode == 1) {
+#ifndef RGB
+        for (int p = 0; p < planes; p++) {
+            int h = sc.slice_dim.y;
+            if (p > 0 && p < 3)
+                h >>= chroma_shift.y;
+
+            for (int y = 0; y < h; y++)
+                decode_line_pcm(sc, y, p, 0, bits);
+        }
+#else
+        if (transparency == 1) {
+            for (int y = 0; y < sc.slice_dim.y; y++) {
+                decode_line_pcm(sc, y, 0, 1, bits);
+                decode_line_pcm(sc, y, 0, 2, bits);
+                decode_line_pcm(sc, y, 0, 0, bits);
+                decode_line_pcm(sc, y, 0, 3, bits);
+            }
+        } else {
+            for (int y = 0; y < sc.slice_dim.y; y++) {
+                decode_line_pcm(sc, y, 0, 1, bits);
+                decode_line_pcm(sc, y, 0, 2, bits);
+                decode_line_pcm(sc, y, 0, 0, bits);
+            }
+        }
+#endif
+    } else
+
+    /* Arithmetic coding */
+#endif
+    {
+        uint64_t slice_state_off = uint64_t(slice_state) +
+                                   slice_idx*plane_state_size*codec_planes;
+
+#ifndef RGB
+        for (int p = 0; p < planes; p++) {
+            int h = sc.slice_dim.y;
+            if (p > 0 && p < 3)
+                h >>= chroma_shift.y;
+
+            for (int y = 0; y < h; y++)
+                decode_line(sc, slice_state_off, y, p, 0, bits, run_index);
+
+            /* For the second chroma plane, reuse the first plane's state */
+            if (p != 1)
+                slice_state_off += plane_state_size;
+        }
+#else
+        if (transparency == 1) {
+            for (int y = 0; y < sc.slice_dim.y; y++) {
+                decode_line(sc, slice_state_off + plane_state_size*0,
+                            y, 0, 1, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*1,
+                            y, 0, 2, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*1,
+                            y, 0, 0, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*2,
+                            y, 0, 3, bits, run_index);
+            }
+        } else {
+            for (int y = 0; y < sc.slice_dim.y; y++) {
+                decode_line(sc, slice_state_off + plane_state_size*0,
+                            y, 0, 1, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*1,
+                            y, 0, 2, bits, run_index);
+                decode_line(sc, slice_state_off + plane_state_size*1,
+                            y, 0, 0, bits, run_index);
+            }
+        }
+#endif
+    }
+}
+
+void main(void)
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+    decode_slice(slice_ctx[slice_idx], slice_idx);
+}
diff --git a/libavcodec/vulkan/ffv1_dec_rct.comp b/libavcodec/vulkan/ffv1_dec_rct.comp
new file mode 100644
index 0000000000..0305dc3295
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_rct.comp
@@ -0,0 +1,72 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void bypass_block(in SliceContext sc)
+{
+    ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+    ivec2 end = sc.slice_pos + sc.slice_dim;
+
+    for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+        for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+            imageStore(dst[0], ivec2(x, y), ivec4(imageLoad(src[0], ivec2(x, y))));
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+    ivec4 pix = ivec4(imageLoad(src[0], pos));
+
+    pix.b -= offset;
+    pix.r -= offset;
+    pix.g -= (pix.b*rct_coef.y + pix.r*rct_coef.x) >> 2;
+    pix.b += pix.g;
+    pix.r += pix.g;
+
+    pix = ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+                pix[fmt_lut[2]], pix[fmt_lut[3]]);
+
+    imageStore(dst[0], pos, pix);
+    if (planar_rgb != 0) {
+        for (int i = 1; i < (3 + transparency); i++)
+            imageStore(dst[i], pos, ivec4(pix[i]));
+    }
+}
+
+void transform_block(in SliceContext sc)
+{
+    const ivec2 rct_coef = sc.slice_rct_coef;
+    const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+    const ivec2 end = sc.slice_pos + sc.slice_dim;
+
+    for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+        for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+            transform_sample(ivec2(x, y), rct_coef);
+}
+
+void main()
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+    if (slice_ctx[slice_idx].slice_coding_mode == 1)
+        bypass_block(slice_ctx[slice_idx]);
+    else
+        transform_block(slice_ctx[slice_idx]);
+}
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp
new file mode 100644
index 0000000000..a10163a8d6
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp
@@ -0,0 +1,138 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+uint get_usymbol(inout RangeCoder c, uint64_t state)
+{
+    if (get_rac(c, state + 0))
+        return 0;
+
+    int e = 0;
+    while (get_rac(c, state + 1 + min(e, 9))) { // 1..10
+        e++;
+        if (e > 31) {
+            corrupt = true;
+            return 0;
+        }
+    }
+
+    uint a = 1;
+    for (int i = e - 1; i >= 0; i--)
+        a += a + uint(get_rac(c, state + 22 + min(i, 9)));  // 22..31
+
+    return a;
+}
+
+bool decode_slice_header(inout SliceContext sc, uint64_t state)
+{
+    u8buf sb = u8buf(state);
+
+    [[unroll]]
+    for (int i = 0; i < CONTEXT_SIZE; i++)
+        sb[i].v = uint8_t(128);
+
+    uint sx = get_usymbol(sc.c, state);
+    uint sy = get_usymbol(sc.c, state);
+    uint sw = get_usymbol(sc.c, state) + 1;
+    uint sh = get_usymbol(sc.c, state) + 1;
+
+    if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 ||
+        sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) ||
+        corrupt) {
+        return true;
+    }
+
+    /* Set coordinates */
+    uint sxs = slice_coord(img_size.x, sx     , gl_NumWorkGroups.x, chroma_shift.x);
+    uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x);
+    uint sys = slice_coord(img_size.y, sy     , gl_NumWorkGroups.y, chroma_shift.y);
+    uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y);
+
+    sc.slice_pos = ivec2(sxs, sys);
+    sc.slice_dim = ivec2(sxe - sxs, sye - sys);
+    sc.slice_rct_coef = ivec2(1, 1);
+    sc.slice_coding_mode = int(0);
+
+    for (uint i = 0; i < codec_planes; i++) {
+        uint idx = get_usymbol(sc.c, state);
+        if (idx >= quant_table_count)
+            return true;
+        sc.quant_table_idx[i] = uint8_t(idx);
+        sc.context_count = context_count[idx];
+    }
+
+    get_usymbol(sc.c, state);
+    get_usymbol(sc.c, state);
+    get_usymbol(sc.c, state);
+
+    if (version >= 4) {
+        sc.slice_reset_contexts = get_rac(sc.c, state);
+        sc.slice_coding_mode = get_usymbol(sc.c, state);
+        if (sc.slice_coding_mode != 1 && colorspace == 1) {
+            sc.slice_rct_coef.x = int(get_usymbol(sc.c, state));
+            sc.slice_rct_coef.y = int(get_usymbol(sc.c, state));
+            if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4)
+                return true;
+        }
+    }
+
+    return false;
+}
+
+void golomb_init(inout SliceContext sc, uint64_t state)
+{
+    if (version == 3 && micro_version > 1 || version > 3) {
+        u8buf(state).v = uint8_t(129);
+        get_rac(sc.c, state);
+    }
+
+    uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
+    init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
+                  sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count);
+}
+
+void main(void)
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+    uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
+
+    u8buf bs = u8buf(slice_data + slice_offsets[2*slice_idx + 0]);
+    uint32_t slice_size = slice_offsets[2*slice_idx + 1];
+
+    rac_init_dec(slice_ctx[slice_idx].c,
+                 bs, slice_size);
+
+    if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1))
+        get_rac_equi(slice_ctx[slice_idx].c);
+
+    decode_slice_header(slice_ctx[slice_idx], scratch_state);
+
+    if (golomb == 1)
+        golomb_init(slice_ctx[slice_idx], scratch_state);
+
+    if (ec != 0 && check_crc != 0) {
+        uint32_t crc = crcref;
+        for (int i = 0; i < slice_size; i++)
+            crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
+
+        slice_crc_mismatch[slice_idx] = crc;
+    }
+}
diff --git a/libavcodec/vulkan/ffv1_rct.comp b/libavcodec/vulkan/ffv1_rct.comp
new file mode 100644
index 0000000000..b10bb47132
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_rct.comp
@@ -0,0 +1,90 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec4 load_components(ivec2 pos)
+{
+    ivec4 pix = ivec4(imageLoad(src[0], pos));
+    if (planar_rgb != 0) {
+        for (int i = 1; i < (3 + transparency); i++)
+            pix[i] = int(imageLoad(src[i], pos)[0]);
+    }
+
+    return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+                 pix[fmt_lut[2]], pix[fmt_lut[3]]);
+}
+
+void bypass_sample(ivec2 pos)
+{
+    imageStore(dst[0], pos, load_components(pos));
+}
+
+void bypass_block(in SliceContext sc)
+{
+    ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+    ivec2 end = sc.slice_pos + sc.slice_dim;
+    for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+        for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+            bypass_sample(ivec2(x, y));
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+    ivec4 pix = load_components(pos);
+    pix.b -= offset;
+    pix.r -= offset;
+    pix.g -= (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+    pix.b += pix.g;
+    pix.r += pix.g;
+    imageStore(dst[0], pos, pix);
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+    ivec4 pix = load_components(pos);
+    pix.b -= pix.g;
+    pix.r -= pix.g;
+    pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+    pix.b += offset;
+    pix.r += offset;
+    imageStore(dst[0], pos, pix);
+}
+
+void transform_block(in SliceContext sc)
+{
+    const ivec2 rct_coef = sc.slice_rct_coef;
+    const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+    const ivec2 end = sc.slice_pos + sc.slice_dim;
+
+    for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+        for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+            transform_sample(ivec2(x, y), rct_coef);
+}
+
+void main()
+{
+    const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+    if (slice_ctx[slice_idx].slice_coding_mode == 1)
+        bypass_block(slice_ctx[slice_idx]);
+    else
+        transform_block(slice_ctx[slice_idx]);
+}
diff --git a/libavcodec/vulkan/ffv1_vlc.comp b/libavcodec/vulkan/ffv1_vlc.comp
index 0a53e035b5..d374e5a069 100644
--- a/libavcodec/vulkan/ffv1_vlc.comp
+++ b/libavcodec/vulkan/ffv1_vlc.comp
@@ -120,3 +120,40 @@ Symbol get_vlc_symbol(inout VlcState state, int v, int bits)
 
     return set_sr_golomb(code, k, 12, bits);
 }
+
+uint get_ur_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+{
+    for (uint i = 0; i < 12; i++)
+        if (get_bit(gb))
+            return get_bits(gb, k) + (i << k);
+
+    return get_bits(gb, esc_len) + 11;
+}
+
+int get_sr_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+{
+    int v = int(get_ur_golomb(gb, k, limit, esc_len));
+    return (v >> 1) ^ -(v & 1);
+}
+
+int read_vlc_symbol(inout GetBitContext gb, inout VlcState state, int bits)
+{
+    int k, i, v, ret;
+
+    i = state.count;
+    k = 0;
+    while (i < state.error_sum) { // FIXME: optimize
+        k++;
+        i += i;
+    }
+
+    v = get_sr_golomb(gb, k, 12, bits);
+
+    v ^= ((2 * state.drift + state.count) >> 31);
+
+    ret = fold(v + state.bias, bits);
+
+    update_vlc_state(state, v);
+
+    return ret;
+}
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 6e3b9c1238..8c8d0d9d9c 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -191,3 +191,77 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size)
     r.outstanding_count = uint16_t(0);
     r.outstanding_byte = uint8_t(0xFF);
 }
+
+/* Decoder */
+uint overread;
+bool corrupt;
+
+void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size)
+{
+    overread = 0;
+    corrupt = false;
+
+    /* Skip priming bytes */
+    rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2);
+
+    u8vec2 prime = u8vec2buf(data).v;
+    /* Switch endianess of the priming bytes */
+    r.low = pack16(prime.yx);
+
+    if (r.low >= 0xFF00) {
+        r.low = 0xFF00;
+        r.bytestream_end = uint64_t(data) + 2;
+    }
+}
+
+void refill(inout RangeCoder c)
+{
+    c.range <<= 8;
+    c.low   <<= 8;
+    if (c.bytestream < c.bytestream_end) {
+        c.low += u8buf(c.bytestream).v;
+        c.bytestream++;
+    } else {
+        overread++;
+    }
+}
+
+bool get_rac(inout RangeCoder c, uint64_t state)
+{
+    u8buf sb = u8buf(state);
+    uint val = uint(sb.v);
+    uint16_t range1 = uint16_t((uint(c.range) * val) >> 8);
+
+    c.range -= range1;
+
+    bool bit = c.low >= c.range;
+    sb.v = zero_one_state[(uint(bit) << 8) + val];
+
+    if (bit) {
+        c.low -= c.range;
+        c.range = range1;
+    }
+
+    if (c.range < 0x100)
+        refill(c);
+
+    return bit;
+}
+
+bool get_rac_equi(inout RangeCoder c)
+{
+    uint16_t range1 = c.range >> 1;
+
+    c.range -= range1;
+
+    bool bit = c.low >= c.range;
+    if (bit) {
+        c.low -= c.range;
+        c.range = range1;
+    }
+
+    if (c.range < 0x100)
+        refill(c);
+
+    return bit;
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index cd77e10e12..bc850a7333 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -36,6 +36,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc;
 #if CONFIG_AV1_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
 #endif
+#if CONFIG_FFV1_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
+#endif
 
 static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_H264_VULKAN_HWACCEL
@@ -47,6 +50,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_AV1_VULKAN_HWACCEL
     &ff_vk_dec_av1_desc,
 #endif
+#if CONFIG_FFV1_VULKAN_HWACCEL
+    &ff_vk_dec_ffv1_desc,
+#endif
 };
 
 static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id)
@@ -1035,6 +1041,17 @@ int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
         frames_ctx->free        = free_profile_data;
 
         hwfc->create_pnext = &prof->profile_list;
+    } else {
+        switch (frames_ctx->sw_format) {
+        case AV_PIX_FMT_GBRAP16:
+            frames_ctx->sw_format = AV_PIX_FMT_RGBA64;
+            break;
+        case AV_PIX_FMT_BGR0:
+            frames_ctx->sw_format = AV_PIX_FMT_RGB0;
+            break;
+        default:
+            break;
+        }
     }
 
     frames_ctx->width  = avctx->coded_width;
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
new file mode 100644
index 0000000000..276514a228
--- /dev/null
+++ b/libavcodec/vulkan_ffv1.c
@@ -0,0 +1,1292 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+
+#include "ffv1.h"
+#include "ffv1_vulkan.h"
+#include "libavutil/vulkan_spirv.h"
+#include "libavutil/mem.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_rangecoder_comp;
+extern const char *ff_source_ffv1_vlc_comp;
+extern const char *ff_source_ffv1_common_comp;
+extern const char *ff_source_ffv1_dec_setup_comp;
+extern const char *ff_source_ffv1_reset_comp;
+extern const char *ff_source_ffv1_dec_comp;
+extern const char *ff_source_ffv1_dec_rct_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
+    .codec_id         = AV_CODEC_ID_FFV1,
+    .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
+    .queue_flags      = VK_QUEUE_COMPUTE_BIT,
+};
+
+#define HOST_MAP
+
+typedef struct FFv1VulkanDecodePicture {
+    FFVulkanDecodePicture vp;
+
+    AVBufferRef *tmp_data;
+
+    AVBufferRef *slice_state;
+    uint32_t plane_state_size;
+    uint32_t slice_state_size;
+    uint32_t slice_data_size;
+    uint32_t max_context_count;
+
+    AVBufferRef *slice_offset_buf;
+    uint32_t    *slice_offset;
+    int          slice_num;
+
+    AVBufferRef *slice_status_buf;
+    int crc_checked;
+} FFv1VulkanDecodePicture;
+
+typedef struct FFv1VulkanDecodeContext {
+    AVBufferRef *intermediate_frames_ref[2]; /* 16/32 bit */
+
+    FFVulkanShader setup;
+    FFVulkanShader reset[2]; /* AC/Golomb */
+    FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */
+    FFVulkanShader rct[2]; /* 16/32 bit */
+
+    FFVkBuffer rangecoder_static_buf;
+    FFVkBuffer quant_buf;
+    FFVkBuffer crc_tab_buf;
+
+    AVBufferPool *slice_state_pool;
+    AVBufferPool *tmp_data_pool;
+    AVBufferPool *slice_offset_pool;
+    AVBufferPool *slice_status_pool;
+} FFv1VulkanDecodeContext;
+
+typedef struct FFv1VkParameters {
+    uint32_t context_count[MAX_QUANT_TABLES];
+
+    VkDeviceAddress slice_data;
+    VkDeviceAddress slice_state;
+    VkDeviceAddress scratch_data;
+
+    uint32_t img_size[2];
+    uint32_t chroma_shift[2];
+
+    uint32_t plane_state_size;
+    uint32_t crcref;
+
+    uint8_t bits_per_raw_sample;
+    uint8_t quant_table_count;
+    uint8_t version;
+    uint8_t micro_version;
+    uint8_t key_frame;
+    uint8_t planes;
+    uint8_t codec_planes;
+    uint8_t transparency;
+    uint8_t colorspace;
+    uint8_t ec;
+    uint8_t golomb;
+    uint8_t check_crc;
+} FFv1VkParameters;
+
+static void add_push_data(FFVulkanShader *shd)
+{
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {  );
+    GLSLF(1,    uint context_count[%i];                             ,MAX_QUANT_TABLES);
+    GLSLC(0,                                                        );
+    GLSLC(1,    u8buf slice_data;                                   );
+    GLSLC(1,    u8buf slice_state;                                  );
+    GLSLC(1,    u8buf scratch_data;                                 );
+    GLSLC(0,                                                        );
+    GLSLC(1,    uvec2 img_size;                                     );
+    GLSLC(1,    uvec2 chroma_shift;                                 );
+    GLSLC(0,                                                        );
+    GLSLC(1,    uint plane_state_size;                              );
+    GLSLC(1,    uint32_t crcref;                                    );
+    GLSLC(0,                                                        );
+    GLSLC(1,    uint8_t bits_per_raw_sample;                        );
+    GLSLC(1,    uint8_t quant_table_count;                          );
+    GLSLC(1,    uint8_t version;                                    );
+    GLSLC(1,    uint8_t micro_version;                              );
+    GLSLC(1,    uint8_t key_frame;                                  );
+    GLSLC(1,    uint8_t planes;                                     );
+    GLSLC(1,    uint8_t codec_planes;                               );
+    GLSLC(1,    uint8_t transparency;                               );
+    GLSLC(1,    uint8_t colorspace;                                 );
+    GLSLC(1,    uint8_t ec;                                         );
+    GLSLC(1,    uint8_t golomb;                                     );
+    GLSLC(1,    uint8_t check_crc;                                  );
+    GLSLC(0, };                                                     );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static int vk_ffv1_start_frame(AVCodecContext          *avctx,
+                               av_unused const uint8_t *buffer,
+                               av_unused uint32_t       size)
+{
+    int err;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+    FFV1Context *f = avctx->priv_data;
+
+    FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &fp->vp;
+
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+    enum AVPixelFormat sw_format = hwfc->sw_format;
+
+    int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
+                 !(sw_format == AV_PIX_FMT_YA8);
+
+    fp->slice_num = 0;
+
+    for (int i = 0; i < f->quant_table_count; i++)
+        fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count);
+
+    /* Allocate slice buffer data */
+    if (f->ac == AC_GOLOMB_RICE)
+        fp->plane_state_size = 8;
+    else
+        fp->plane_state_size = CONTEXT_SIZE;
+
+    fp->plane_state_size *= fp->max_context_count;
+    fp->slice_state_size = fp->plane_state_size*f->plane_count;
+
+    fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */
+    fp->slice_state_size += fp->slice_data_size;
+    fp->slice_state_size = FFALIGN(fp->slice_state_size, 8);
+
+    fp->crc_checked = f->ec && (avctx->err_recognition & AV_EF_CRCCHECK);
+
+    /* Host map the input slices data if supported */
+    if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+        err = ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, f->pkt_ref->data,
+                                    f->pkt_ref->buf,
+                                    VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                    VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+        if (err < 0)
+            return err;
+    }
+
+    /* Allocate slice state data */
+    if (f->picture.f->flags & AV_FRAME_FLAG_KEY) {
+        err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool,
+                                      &fp->slice_state,
+                                      VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                      VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                      NULL, f->max_slice_count*fp->slice_state_size,
+                                      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+        if (err < 0)
+            return err;
+    } else {
+        FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
+        fp->slice_state = av_buffer_ref(fpl->slice_state);
+        if (!fp->slice_state)
+            return AVERROR(ENOMEM);
+    }
+
+    /* Allocate temporary data buffer */
+    err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool,
+                                  &fp->tmp_data,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, f->max_slice_count*CONTEXT_SIZE,
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+    if (err < 0)
+        return err;
+
+    /* Allocate slice offsets buffer */
+    err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool,
+                                  &fp->slice_offset_buf,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, 2*f->max_slice_count*sizeof(uint32_t),
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+    if (err < 0)
+        return err;
+
+    /* Allocate slice status buffer */
+    err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_status_pool,
+                                  &fp->slice_status_buf,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, f->max_slice_count*sizeof(uint32_t),
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+    if (err < 0)
+        return err;
+
+    /* Prepare frame to be used */
+    err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1,
+                                         FF_VK_REP_NATIVE, 0);
+    if (err < 0)
+        return err;
+
+    /* Create a temporaty frame for RGB */
+    if (is_rgb) {
+        AVHWFramesContext *dpb_hwfc;
+        dpb_hwfc = (AVHWFramesContext *)fv->intermediate_frames_ref[f->use32bit]->data;
+
+        vp->dpb_frame = av_frame_alloc();
+        if (!vp->dpb_frame)
+            return AVERROR(ENOMEM);
+
+        err = av_hwframe_get_buffer(fv->intermediate_frames_ref[f->use32bit],
+                                    vp->dpb_frame, 0);
+        if (err < 0)
+            return err;
+
+        err = ff_vk_decode_prepare_frame_sdr(dec, vp->dpb_frame, vp, 1,
+                                             FF_VK_REP_NATIVE, 0);
+        if (err < 0)
+            return err;
+
+        for (int i = 0; i < av_pix_fmt_count_planes(dpb_hwfc->sw_format); i++) {
+            err = ff_vk_create_imageview(&ctx->s,
+                                         &vp->view.dst[i], &vp->view.aspect_ref[i],
+                                         vp->dpb_frame,
+                                         i, FF_VK_REP_NATIVE);
+            if (err < 0)
+                return err;
+        }
+    }
+
+    return 0;
+}
+
+static int vk_ffv1_decode_slice(AVCodecContext *avctx,
+                                const uint8_t  *data,
+                                uint32_t        size)
+{
+    FFV1Context *f = avctx->priv_data;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+
+    FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+    FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+
+    if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+        AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 0)*sizeof(uint32_t),
+                data - f->pkt_ref->data);
+        AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 1)*sizeof(uint32_t),
+                size);
+        fp->slice_num++;
+    } else {
+        FFVulkanDecodePicture *vp = &fp->vp;
+        int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+                                         &fp->slice_num,
+                                         (const uint32_t **)&fp->slice_offset);
+        if (err < 0)
+            return err;
+
+        AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 0)*sizeof(uint32_t),
+                fp->slice_offset[fp->slice_num - 1]);
+        AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 1)*sizeof(uint32_t),
+                size);
+    }
+
+    return 0;
+}
+
+static int vk_ffv1_end_frame(AVCodecContext *avctx)
+{
+    int err;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    FFV1Context *f = avctx->priv_data;
+    FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+    FFv1VkParameters pd;
+    FFv1VkResetParameters pd_reset;
+
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+    enum AVPixelFormat sw_format = hwfc->sw_format;
+
+    int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8;
+    int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
+                 !(sw_format == AV_PIX_FMT_YA8);
+
+    FFVulkanShader *reset_shader;
+    FFVulkanShader *decode_shader;
+
+    FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &fp->vp;
+
+    FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+    FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data;
+    FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+    FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
+
+    FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data;
+
+    AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f;
+    VkImageView *decode_dst_view = is_rgb ? vp->view.dst : vp->view.out;
+
+    VkImageMemoryBarrier2 img_bar[37];
+    int nb_img_bar = 0;
+    VkBufferMemoryBarrier2 buf_bar[8];
+    int nb_buf_bar = 0;
+
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+    ff_vk_exec_start(&ctx->s, exec);
+
+    /* Prepare deps */
+    RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f,
+                                 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+    err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+                                      f->picture.f);
+    if (err < 0)
+        return err;
+
+    if (is_rgb)
+        RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
+                                     VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                     VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+    if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
+        FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
+        FFVulkanDecodePicture *vpl = &fpl->vp;
+
+        /* Wait on the previous frame */
+        RET(ff_vk_exec_add_dep_wait_sem(&ctx->s, exec, vpl->sem, vpl->sem_value,
+                                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
+    }
+
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1));
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_status_buf, 1, 1));
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
+    vp->slices_buf = NULL;
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0));
+    fp->slice_offset_buf = NULL;
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0));
+    fp->tmp_data = NULL;
+
+    /* Entry barrier for the slice state */
+    if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
+        buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+            .srcStageMask = slice_state->stage,
+            .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+            .srcAccessMask = slice_state->access,
+            .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = slice_state->buf,
+            .offset = 0,
+            .size = VK_WHOLE_SIZE,
+        };
+    }
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+    });
+    if (nb_buf_bar) {
+        slice_state->stage = buf_bar[1].dstStageMask;
+        slice_state->access = buf_bar[1].dstAccessMask;
+        nb_buf_bar = 0;
+    }
+
+    /* Setup shader */
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+                                    1, 0, 0,
+                                    slice_state,
+                                    0, fp->slice_data_size*f->slice_count,
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+                                    1, 1, 0,
+                                    slice_offset,
+                                    0, 2*f->slice_count*sizeof(uint32_t),
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+                                    1, 2, 0,
+                                    slice_status,
+                                    0, f->slice_count*sizeof(uint32_t),
+                                    VK_FORMAT_UNDEFINED);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
+    pd = (FFv1VkParameters) {
+        /* context_count */
+
+        .slice_data = slices_buf->address,
+        .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
+        .scratch_data = tmp_data->address,
+
+        .img_size[0] = f->picture.f->width,
+        .img_size[1] = f->picture.f->height,
+        .chroma_shift[0] = f->chroma_h_shift,
+        .chroma_shift[1] = f->chroma_v_shift,
+
+        .plane_state_size = fp->plane_state_size,
+        .crcref = f->crcref,
+
+        .bits_per_raw_sample = bits,
+        .quant_table_count = f->quant_table_count,
+        .version = f->version,
+        .micro_version = f->micro_version,
+        .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
+        .planes = av_pix_fmt_count_planes(sw_format),
+        .codec_planes = f->plane_count,
+        .transparency = f->transparency,
+        .colorspace = f->colorspace,
+        .ec = f->ec,
+        .golomb = f->ac == AC_GOLOMB_RICE,
+        .check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
+    };
+    for (int i = 0; i < MAX_QUANT_TABLES; i++)
+        pd.context_count[i] = f->context_count[i];
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+
+    /* Reset shader */
+    reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE];
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader,
+                                    1, 0, 0,
+                                    slice_state,
+                                    0, fp->slice_data_size*f->slice_count,
+                                    VK_FORMAT_UNDEFINED);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader);
+
+    pd_reset = (FFv1VkResetParameters) {
+        .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
+        .plane_state_size = fp->plane_state_size,
+        .context_count = fp->max_context_count,
+        .codec_planes = f->plane_count,
+        .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
+        .version = f->version,
+        .micro_version = f->micro_version,
+    };
+    ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd_reset), &pd_reset);
+
+    /* Sync between setup and reset shaders */
+    buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+        .srcStageMask = slice_state->stage,
+        .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+        .srcAccessMask = slice_state->access,
+        .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+                         VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = slice_state->buf,
+        .offset = 0,
+        .size = fp->slice_data_size*f->slice_count,
+    };
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pBufferMemoryBarriers = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+    });
+    slice_state->stage = buf_bar[0].dstStageMask;
+    slice_state->access = buf_bar[0].dstAccessMask;
+    nb_buf_bar = 0;
+
+    vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices,
+                    f->plane_count);
+
+    /* Decode */
+    decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb];
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
+                                    1, 0, 0,
+                                    slice_state,
+                                    0, fp->slice_data_size*f->slice_count,
+                                    VK_FORMAT_UNDEFINED);
+    ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
+                                  decode_dst, decode_dst_view,
+                                  1, 1,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
+    ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    /* Sync between reset and decode shaders */
+    buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+        .srcStageMask = slice_state->stage,
+        .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+        .srcAccessMask = slice_state->access,
+        .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+                         VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = slice_state->buf,
+        .offset = fp->slice_data_size*f->slice_count,
+        .size = slice_state->size - fp->slice_data_size*f->slice_count,
+    };
+
+    /* Input frame barrier */
+    ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                        VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pImageMemoryBarriers = img_bar,
+        .imageMemoryBarrierCount = nb_img_bar,
+        .pBufferMemoryBarriers = buf_bar,
+        .bufferMemoryBarrierCount = nb_buf_bar,
+    });
+    slice_state->stage = buf_bar[0].dstStageMask;
+    slice_state->access = buf_bar[0].dstAccessMask;
+    nb_img_bar = 0;
+    nb_buf_bar = 0;
+
+    vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+
+    /* RCT */
+    if (is_rgb) {
+        FFVulkanShader *rct_shader = &fv->rct[f->use32bit];
+        FFv1VkRCTParameters pd_rct;
+
+        ff_vk_shader_update_desc_buffer(&ctx->s, exec, rct_shader,
+                                        1, 0, 0,
+                                        slice_state,
+                                        0, fp->slice_data_size*f->slice_count,
+                                        VK_FORMAT_UNDEFINED);
+        ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
+                                      decode_dst, decode_dst_view,
+                                      1, 1,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      VK_NULL_HANDLE);
+        ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
+                                      f->picture.f, vp->view.out,
+                                      1, 2,
+                                      VK_IMAGE_LAYOUT_GENERAL,
+                                      VK_NULL_HANDLE);
+
+        ff_vk_exec_bind_shader(&ctx->s, exec, rct_shader);
+
+        pd_rct = (FFv1VkRCTParameters) {
+            .offset = 1 << bits,
+            .bits = bits,
+            .planar_rgb = ff_vk_mt_is_np_rgb(sw_format) &&
+                          (ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1),
+            .transparency = f->transparency,
+        };
+        ff_vk_set_perm(sw_format, pd_rct.fmt_lut);
+
+        ff_vk_shader_update_push_const(&ctx->s, exec, rct_shader,
+                                       VK_SHADER_STAGE_COMPUTE_BIT,
+                                       0, sizeof(pd_rct), &pd_rct);
+
+        ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
+                            VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                            VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                            VK_ACCESS_SHADER_READ_BIT,
+                            VK_IMAGE_LAYOUT_GENERAL,
+                            VK_QUEUE_FAMILY_IGNORED);
+        ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar,
+                            VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                            VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+                            VK_ACCESS_SHADER_WRITE_BIT,
+                            VK_IMAGE_LAYOUT_GENERAL,
+                            VK_QUEUE_FAMILY_IGNORED);
+
+        vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_img_bar,
+        });
+        nb_img_bar = 0;
+
+        vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+    }
+
+    err = ff_vk_exec_submit(&ctx->s, exec);
+    if (err < 0)
+        return err;
+
+fail:
+    return 0;
+}
+
+static void define_shared_code(FFVulkanShader *shd, int use32bit)
+{
+    int smp_bits = use32bit ? 32 : 16;
+
+    GLSLC(0, #define DECODE                                              );
+
+    av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n"                    ,CONTEXT_SIZE);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n"          ,MAX_QUANT_TABLE_MASK);
+
+    GLSLF(0, #define TYPE int%i_t                                        ,smp_bits);
+    GLSLF(0, #define VTYPE2 i%ivec2                                      ,smp_bits);
+    GLSLF(0, #define VTYPE3 i%ivec3                                      ,smp_bits);
+    GLSLD(ff_source_rangecoder_comp);
+    GLSLD(ff_source_ffv1_common_comp);
+}
+
+static int init_setup_shader(FFV1Context *f, FFVulkanContext *s,
+                             FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                             FFVulkanShader *shd)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+
+    RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          1, 1, 1,
+                          0));
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    add_push_data(shd);
+
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+    av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "rangecoder_static_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t zero_one_state[512];",
+        },
+        {
+            .name        = "crc_ieee_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint32_t crc_ieee[256];",
+        },
+        {
+            .name        = "quant_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+                           "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+        },
+    };
+
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0));
+
+    define_shared_code(shd, 0 /* Irrelevant */);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "slice_data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .buf_content = "SliceContext slice_ctx",
+            .buf_elems   = f->max_slice_count,
+        },
+        {
+            .name        = "slice_offsets_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_quali   = "readonly",
+            .buf_content = "uint32_t slice_offsets",
+            .buf_elems   = 2*f->max_slice_count,
+        },
+        {
+            .name        = "slice_status_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_quali   = "writeonly",
+            .buf_content = "uint32_t slice_crc_mismatch",
+            .buf_elems   = 2*f->max_slice_count,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
+
+    GLSLD(ff_source_ffv1_dec_setup_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
+                             FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                             FFVulkanShader *shd, int ac)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+    int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
+
+    RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          wg_dim, 1, 1,
+                          0));
+
+    if (ac == AC_GOLOMB_RICE)
+        av_bprintf(&shd->src, "#define GOLOMB\n");
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
+    GLSLC(1,    u8buf slice_state;                                             );
+    GLSLC(1,    uint plane_state_size;                                         );
+    GLSLC(1,    uint context_count;                                            );
+    GLSLC(1,    uint8_t codec_planes;                                          );
+    GLSLC(1,    uint8_t key_frame;                                             );
+    GLSLC(1,    uint8_t version;                                               );
+    GLSLC(1,    uint8_t micro_version;                                         );
+    GLSLC(1,    uint8_t padding[1];                                            );
+    GLSLC(0, };                                                                );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+    av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "rangecoder_static_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t zero_one_state[512];",
+        },
+        {
+            .name        = "quant_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+                           "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+    define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */);
+    if (ac == AC_GOLOMB_RICE)
+        GLSLD(ff_source_ffv1_vlc_comp);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "slice_data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .mem_quali   = "readonly",
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .buf_content = "SliceContext slice_ctx",
+            .buf_elems   = f->max_slice_count,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
+
+    GLSLD(ff_source_ffv1_reset_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
+                              FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                              FFVulkanShader *shd, AVHWFramesContext *frames_ctx,
+                              int use32bit, int ac, int rgb)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+
+    RET(ff_vk_shader_init(s, shd, "ffv1_dec",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          1, 1, 1,
+                          0));
+
+    if (ac == AC_GOLOMB_RICE)
+        av_bprintf(&shd->src, "#define GOLOMB\n");
+
+    if (rgb)
+        av_bprintf(&shd->src, "#define RGB\n");
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    add_push_data(shd);
+
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+    av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "rangecoder_static_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t zero_one_state[512];",
+        },
+        {
+            .name        = "quant_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+                           "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+        },
+    };
+
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+    define_shared_code(shd, use32bit);
+    if (ac == AC_GOLOMB_RICE)
+        GLSLD(ff_source_ffv1_vlc_comp);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "slice_data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .buf_content = "SliceContext slice_ctx",
+            .buf_elems   = f->max_slice_count,
+        },
+        {
+            .name       = "dst",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .dimensions = 2,
+            .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
+                                               FF_VK_REP_NATIVE),
+            .elems      = av_pix_fmt_count_planes(frames_ctx->sw_format),
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
+
+    GLSLD(ff_source_ffv1_dec_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static int init_rct_shader(FFV1Context *f, FFVulkanContext *s,
+                           FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                           FFVulkanShader *shd, int use32bit,
+                           AVHWFramesContext *src_ctx, AVHWFramesContext *dst_ctx)
+{
+    int err;
+    FFVulkanDescriptorSetBinding *desc_set;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+    int wg_count = sqrt(s->props.properties.limits.maxComputeWorkGroupInvocations);
+
+    RET(ff_vk_shader_init(s, shd, "ffv1_rct",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          wg_count, wg_count, 1,
+                          0));
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {             );
+    GLSLC(1,    ivec4 fmt_lut;                                                 );
+    GLSLC(1,    int offset;                                                    );
+    GLSLC(1,    uint8_t bits;                                                  );
+    GLSLC(1,    uint8_t planar_rgb;                                            );
+    GLSLC(1,    uint8_t transparency;                                          );
+    GLSLC(1,    uint8_t version;                                               );
+    GLSLC(1,    uint8_t micro_version;                                         );
+    GLSLC(1,    uint8_t padding[3];                                            );
+    GLSLC(0, };                                                                );
+    ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+    av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+    av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "rangecoder_static_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "uint8_t zero_one_state[512];",
+        },
+        {
+            .name        = "quant_buf",
+            .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_layout  = "scalar",
+            .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+                           "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+    define_shared_code(shd, use32bit);
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name        = "slice_data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .mem_quali   = "readonly",
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .buf_content = "SliceContext slice_ctx",
+            .buf_elems   = f->max_slice_count,
+        },
+        {
+            .name       = "src",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .dimensions = 2,
+            .mem_layout = ff_vk_shader_rep_fmt(src_ctx->sw_format,
+                                               FF_VK_REP_NATIVE),
+            .mem_quali  = "readonly",
+            .elems      = av_pix_fmt_count_planes(src_ctx->sw_format),
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+        {
+            .name       = "dst",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .dimensions = 2,
+            .mem_layout = ff_vk_shader_rep_fmt(dst_ctx->sw_format,
+                                               FF_VK_REP_NATIVE),
+            .mem_quali  = "writeonly",
+            .elems      = av_pix_fmt_count_planes(dst_ctx->sw_format),
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
+
+    GLSLD(ff_source_ffv1_dec_rct_comp);
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
+                         AVBufferRef **dst, enum AVPixelFormat sw_format)
+{
+    int err;
+    AVHWFramesContext *frames_ctx;
+    AVVulkanFramesContext *vk_frames;
+
+    *dst = av_hwframe_ctx_alloc(s->device_ref);
+    if (!(*dst))
+        return AVERROR(ENOMEM);
+
+    frames_ctx = (AVHWFramesContext *)((*dst)->data);
+    frames_ctx->format    = AV_PIX_FMT_VULKAN;
+    frames_ctx->sw_format = sw_format;
+    frames_ctx->width     = FFALIGN(s->frames->width, 32);
+    frames_ctx->height    = FFALIGN(s->frames->height, 32);
+
+    vk_frames = frames_ctx->hwctx;
+    vk_frames->tiling    = VK_IMAGE_TILING_OPTIMAL;
+    vk_frames->usage     = VK_IMAGE_USAGE_STORAGE_BIT;
+    vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+
+    err = av_hwframe_ctx_init(*dst);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
+               av_get_pix_fmt_name(sw_format), av_err2str(err));
+        av_buffer_unref(dst);
+        return err;
+    }
+
+    return 0;
+}
+
+static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
+{
+    FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+
+    ff_vk_shader_free(&ctx->s, &fv->setup);
+
+    for (int i = 0; i < 2; i++) /* 16/32 bit */
+        av_buffer_unref(&fv->intermediate_frames_ref[i]);
+
+    for (int i = 0; i < 2; i++) /* AC/Golomb */
+        ff_vk_shader_free(&ctx->s, &fv->reset[i]);
+
+    for (int i = 0; i < 2; i++) /* 16/32 bit */
+        for (int j = 0; j < 2; j++) /* AC/Golomb */
+            for (int k = 0; k < 2; k++) /* Normal/RGB */
+                ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]);
+
+    for (int i = 0; i < 2; i++) /* 16/32 bit */
+        ff_vk_shader_free(&ctx->s, &fv->rct[i]);
+
+    ff_vk_free_buf(&ctx->s, &fv->quant_buf);
+    ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
+    ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
+
+    av_buffer_pool_uninit(&fv->tmp_data_pool);
+    av_buffer_pool_uninit(&fv->slice_state_pool);
+    av_buffer_pool_uninit(&fv->slice_offset_pool);
+    av_buffer_pool_uninit(&fv->slice_status_pool);
+}
+
+static int vk_decode_ffv1_init(AVCodecContext *avctx)
+{
+    int err;
+    FFV1Context *f = avctx->priv_data;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = NULL;
+    FFv1VulkanDecodeContext *fv;
+    FFVkSPIRVCompiler *spv;
+
+    if (f->version < 3)
+        return AVERROR(ENOTSUP);
+
+    spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    err = ff_vk_decode_init(avctx);
+    if (err < 0)
+        return err;
+    ctx = dec->shared_ctx;
+
+    fv = ctx->sd_ctx = av_mallocz(sizeof(*fv));
+    if (!fv) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    ctx->sd_ctx_free = &vk_decode_ffv1_uninit;
+
+    /* Intermediate frame pool for RCT */
+    for (int i = 0; i < 2; i++) { /* 16/32 bit */
+        err = init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref[i],
+                            i ? AV_PIX_FMT_RGBA128 : AV_PIX_FMT_RGBA64);
+        if (err < 0)
+            return err;
+    }
+
+    /* Setup shader */
+    err = init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup);
+    if (err < 0)
+        return err;
+
+    /* Reset shaders */
+    for (int i = 0; i < 2; i++) { /* AC/Golomb */
+        err = init_reset_shader(f, &ctx->s, &ctx->exec_pool,
+                                spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0);
+        if (err < 0)
+            return err;
+    }
+
+    /* Decode shaders */
+    for (int i = 0; i < 2; i++) { /* 16/32 bit */
+        for (int j = 0; j < 2; j++) { /* AC/Golomb */
+            for (int k = 0; k < 2; k++) { /* Normal/RGB */
+                AVHWFramesContext *frames_ctx;
+                frames_ctx = k ? (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data :
+                                 (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+                err = init_decode_shader(f, &ctx->s, &ctx->exec_pool,
+                                         spv, &fv->decode[i][j][k],
+                                         frames_ctx,
+                                         i,
+                                         !j ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE,
+                                         k);
+                if (err < 0)
+                    return err;
+            }
+        }
+    }
+
+    /* RCT shaders */
+    for (int i = 0; i < 2; i++) { /* 16/32 bit */
+        err = init_rct_shader(f, &ctx->s, &ctx->exec_pool,
+                              spv, &fv->rct[i], i,
+                              (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data,
+                              (AVHWFramesContext *)avctx->hw_frames_ctx->data);
+        if (err < 0)
+            return err;
+    }
+
+    /* Range coder data */
+    err = ff_ffv1_vk_init_state_transition_data(&ctx->s,
+                                                &fv->rangecoder_static_buf,
+                                                f);
+    if (err < 0)
+        return err;
+
+    /* Quantization table data */
+    err = ff_ffv1_vk_init_quant_table_data(&ctx->s,
+                                           &fv->quant_buf,
+                                           f);
+    if (err < 0)
+        return err;
+
+    /* CRC table buffer */
+    err = ff_ffv1_vk_init_crc_table_data(&ctx->s,
+                                         &fv->crc_tab_buf,
+                                         f);
+    if (err < 0)
+        return err;
+
+    /* Update setup global descriptors */
+    RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                        &fv->setup, 0, 0, 0,
+                                        &fv->rangecoder_static_buf,
+                                        0, fv->rangecoder_static_buf.size,
+                                        VK_FORMAT_UNDEFINED));
+    RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                        &fv->setup, 0, 1, 0,
+                                        &fv->crc_tab_buf,
+                                        0, fv->crc_tab_buf.size,
+                                        VK_FORMAT_UNDEFINED));
+
+    /* Update decode global descriptors */
+    for (int i = 0; i < 2; i++) { /* 16/32 bit */
+        for (int j = 0; j < 2; j++) { /* AC/Golomb */
+            for (int k = 0; k < 2; k++) { /* Normal/RGB */
+                RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                                    &fv->decode[i][j][k], 0, 0, 0,
+                                                    &fv->rangecoder_static_buf,
+                                                    0, fv->rangecoder_static_buf.size,
+                                                    VK_FORMAT_UNDEFINED));
+                RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+                                                    &fv->decode[i][j][k], 0, 1, 0,
+                                                    &fv->quant_buf,
+                                                    0, fv->quant_buf.size,
+                                                    VK_FORMAT_UNDEFINED));
+            }
+        }
+    }
+
+fail:
+    return err;
+}
+
+static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+    AVHWDeviceContext *hwctx = _hwctx.nc;
+
+    FFv1VulkanDecodePicture *fp = data;
+    FFVulkanDecodePicture *vp = &fp->vp;
+
+    ff_vk_decode_free_frame(hwctx, vp);
+
+    if (fp->crc_checked) {
+        FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
+        for (int i = 0; i < fp->slice_num; i++) {
+            uint32_t crc_res;
+            crc_res = AV_RN32(slice_status->mapped_mem + i*sizeof(uint32_t));
+            if (crc_res != 0)
+                av_log(hwctx, AV_LOG_ERROR, "CRC mismatch in slice %i, res: 0x%x\n",
+                       i, crc_res);
+        }
+    }
+
+    av_buffer_unref(&vp->slices_buf);
+    av_buffer_unref(&fp->slice_state);
+    av_buffer_unref(&fp->slice_offset_buf);
+    av_buffer_unref(&fp->slice_status_buf);
+    av_buffer_unref(&fp->tmp_data);
+}
+
+const FFHWAccel ff_ffv1_vulkan_hwaccel = {
+    .p.name                = "ffv1_vulkan",
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_FFV1,
+    .p.pix_fmt             = AV_PIX_FMT_VULKAN,
+    .start_frame           = &vk_ffv1_start_frame,
+    .decode_slice          = &vk_ffv1_decode_slice,
+    .end_frame             = &vk_ffv1_end_frame,
+    .free_frame_priv       = &vk_ffv1_free_frame_priv,
+    .frame_priv_data_size  = sizeof(FFv1VulkanDecodePicture),
+    .init                  = &vk_decode_ffv1_init,
+    .update_thread_context = &ff_vk_update_thread_context,
+    .decode_params         = &ff_vk_params_invalidate,
+    .flush                 = &ff_vk_decode_flush,
+    .uninit                = &ff_vk_decode_uninit,
+    .frame_params          = &ff_vk_frame_params,
+    .priv_data_size        = sizeof(FFVulkanDecodeContext),
+    .caps_internal         = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
  2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
  2025-03-10  3:08   ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
  2025-03-10  3:08   ` [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder Lynne
@ 2025-03-10  3:14   ` Andreas Rheinhardt
  2025-03-10 17:42     ` Lynne
  2 siblings, 1 reply; 20+ messages in thread
From: Andreas Rheinhardt @ 2025-03-10  3:14 UTC (permalink / raw)
  To: ffmpeg-devel

Lynne:
> ---
>  libavcodec/ffv1.h    |  3 +++
>  libavcodec/ffv1dec.c | 19 +++++++++++++++++--
>  2 files changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> index 8c0e71284d..860a5c14b1 100644
> --- a/libavcodec/ffv1.h
> +++ b/libavcodec/ffv1.h
> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>       * NOT shared between frame threads.
>       */
>      uint8_t           frame_damaged;
> +
> +    /* Reference to the current packet */
> +    AVPacket *pkt_ref;
>  } FFV1Context;
>  
>  int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
> index eaa21eebdf..6396f22f79 100644
> --- a/libavcodec/ffv1dec.c
> +++ b/libavcodec/ffv1dec.c
> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
>      f->pix_fmt = AV_PIX_FMT_NONE;
>      f->configured_pix_fmt = AV_PIX_FMT_NONE;
>  
> +    f->pkt_ref = av_packet_alloc();
> +    if (!f->pkt_ref)
> +        return AVERROR(ENOMEM);
> +
>      if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>          return ret;
>  
> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>  
>      /* Start */
>      if (hwaccel) {
> +        ret = av_packet_ref(f->pkt_ref, avpkt);
> +        if (ret < 0)
> +            return ret;
> +
>          ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
>          if (ret < 0)
>              return ret;
> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>              uint32_t len;
>              ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>                                    &pos, &len);
> -            if (ret < 0)
> +            if (ret < 0) {
> +                av_packet_unref(f->pkt_ref);
>                  return ret;
> +            }
>  
>              buf_end -= len;
>  
>              ret = hwaccel->decode_slice(avctx, pos, len);
> -            if (ret < 0)
> +            if (ret < 0) {
> +                av_packet_unref(f->pkt_ref);
>                  return ret;
> +            }
>          }
> +
> +        av_packet_unref(f->pkt_ref);
>      } else {
>          ret = decode_slices(avctx, c, avpkt);
>          if (ret < 0)
> @@ -827,6 +841,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
>      ff_progress_frame_unref(&s->last_picture);
>      av_refstruct_unref(&s->hwaccel_last_picture_private);
>  
> +    av_packet_free(&s->pkt_ref);
>      ff_ffv1_close(s);
>  
>      return 0;

Why not simply use a const AVPacket*?

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
  2025-03-10  3:14   ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt
@ 2025-03-10 17:42     ` Lynne
  2025-03-13  0:30       ` Lynne
  0 siblings, 1 reply; 20+ messages in thread
From: Lynne @ 2025-03-10 17:42 UTC (permalink / raw)
  To: ffmpeg-devel

On 10/03/2025 04:14, Andreas Rheinhardt wrote:
> Lynne:
>> ---
>>   libavcodec/ffv1.h    |  3 +++
>>   libavcodec/ffv1dec.c | 19 +++++++++++++++++--
>>   2 files changed, 20 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>> index 8c0e71284d..860a5c14b1 100644
>> --- a/libavcodec/ffv1.h
>> +++ b/libavcodec/ffv1.h
>> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>>        * NOT shared between frame threads.
>>        */
>>       uint8_t           frame_damaged;
>> +
>> +    /* Reference to the current packet */
>> +    AVPacket *pkt_ref;
>>   } FFV1Context;
>>   
>>   int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
>> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
>> index eaa21eebdf..6396f22f79 100644
>> --- a/libavcodec/ffv1dec.c
>> +++ b/libavcodec/ffv1dec.c
>> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
>>       f->pix_fmt = AV_PIX_FMT_NONE;
>>       f->configured_pix_fmt = AV_PIX_FMT_NONE;
>>   
>> +    f->pkt_ref = av_packet_alloc();
>> +    if (!f->pkt_ref)
>> +        return AVERROR(ENOMEM);
>> +
>>       if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>>           return ret;
>>   
>> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>>   
>>       /* Start */
>>       if (hwaccel) {
>> +        ret = av_packet_ref(f->pkt_ref, avpkt);
>> +        if (ret < 0)
>> +            return ret;
>> +
>>           ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
>>           if (ret < 0)
>>               return ret;
>> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>>               uint32_t len;
>>               ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>>                                     &pos, &len);
>> -            if (ret < 0)
>> +            if (ret < 0) {
>> +                av_packet_unref(f->pkt_ref);
>>                   return ret;
>> +            }
>>   
>>               buf_end -= len;
>>   
>>               ret = hwaccel->decode_slice(avctx, pos, len);
>> -            if (ret < 0)
>> +            if (ret < 0) {
>> +                av_packet_unref(f->pkt_ref);
>>                   return ret;
>> +            }
>>           }
>> +
>> +        av_packet_unref(f->pkt_ref);
>>       } else {
>>           ret = decode_slices(avctx, c, avpkt);
>>           if (ret < 0)
>> @@ -827,6 +841,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
>>       ff_progress_frame_unref(&s->last_picture);
>>       av_refstruct_unref(&s->hwaccel_last_picture_private);
>>   
>> +    av_packet_free(&s->pkt_ref);
>>       ff_ffv1_close(s);
>>   
>>       return 0;
> 
> Why not simply use a const AVPacket*?

No reason. Fixed locally.
Thanks.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
  2025-03-10 17:42     ` Lynne
@ 2025-03-13  0:30       ` Lynne
  2025-03-13  1:24         ` Andreas Rheinhardt
  0 siblings, 1 reply; 20+ messages in thread
From: Lynne @ 2025-03-13  0:30 UTC (permalink / raw)
  To: ffmpeg-devel

On 10/03/2025 18:42, Lynne wrote:
> On 10/03/2025 04:14, Andreas Rheinhardt wrote:
>> Lynne:
>>> ---
>>>   libavcodec/ffv1.h    |  3 +++
>>>   libavcodec/ffv1dec.c | 19 +++++++++++++++++--
>>>   2 files changed, 20 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>>> index 8c0e71284d..860a5c14b1 100644
>>> --- a/libavcodec/ffv1.h
>>> +++ b/libavcodec/ffv1.h
>>> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>>>        * NOT shared between frame threads.
>>>        */
>>>       uint8_t           frame_damaged;
>>> +
>>> +    /* Reference to the current packet */
>>> +    AVPacket *pkt_ref;
>>>   } FFV1Context;
>>>   int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
>>> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
>>> index eaa21eebdf..6396f22f79 100644
>>> --- a/libavcodec/ffv1dec.c
>>> +++ b/libavcodec/ffv1dec.c
>>> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext 
>>> *avctx)
>>>       f->pix_fmt = AV_PIX_FMT_NONE;
>>>       f->configured_pix_fmt = AV_PIX_FMT_NONE;
>>> +    f->pkt_ref = av_packet_alloc();
>>> +    if (!f->pkt_ref)
>>> +        return AVERROR(ENOMEM);
>>> +
>>>       if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>>>           return ret;
>>> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, 
>>> AVFrame *rframe,
>>>       /* Start */
>>>       if (hwaccel) {
>>> +        ret = av_packet_ref(f->pkt_ref, avpkt);
>>> +        if (ret < 0)
>>> +            return ret;
>>> +
>>>           ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
>>>           if (ret < 0)
>>>               return ret;
>>> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, 
>>> AVFrame *rframe,
>>>               uint32_t len;
>>>               ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>>>                                     &pos, &len);
>>> -            if (ret < 0)
>>> +            if (ret < 0) {
>>> +                av_packet_unref(f->pkt_ref);
>>>                   return ret;
>>> +            }
>>>               buf_end -= len;
>>>               ret = hwaccel->decode_slice(avctx, pos, len);
>>> -            if (ret < 0)
>>> +            if (ret < 0) {
>>> +                av_packet_unref(f->pkt_ref);
>>>                   return ret;
>>> +            }
>>>           }
>>> +
>>> +        av_packet_unref(f->pkt_ref);
>>>       } else {
>>>           ret = decode_slices(avctx, c, avpkt);
>>>           if (ret < 0)
>>> @@ -827,6 +841,7 @@ static av_cold int 
>>> ffv1_decode_close(AVCodecContext *avctx)
>>>       ff_progress_frame_unref(&s->last_picture);
>>>       av_refstruct_unref(&s->hwaccel_last_picture_private);
>>> +    av_packet_free(&s->pkt_ref);
>>>       ff_ffv1_close(s);
>>>       return 0;
>>
>> Why not simply use a const AVPacket*?
> 
> No reason. Fixed locally.
> Thanks.

*reverted this change.
We need to ref the packet, since we map its memory and let the GPU use 
it directly without copying the contents. 6k16bit content at 24fps is 
typically around 2Gbps when compressed, so avoiding copies is important.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
  2025-03-13  0:30       ` Lynne
@ 2025-03-13  1:24         ` Andreas Rheinhardt
  2025-03-13  1:56           ` Lynne
  0 siblings, 1 reply; 20+ messages in thread
From: Andreas Rheinhardt @ 2025-03-13  1:24 UTC (permalink / raw)
  To: ffmpeg-devel

Lynne:
> On 10/03/2025 18:42, Lynne wrote:
>> On 10/03/2025 04:14, Andreas Rheinhardt wrote:
>>> Lynne:
>>>> ---
>>>>   libavcodec/ffv1.h    |  3 +++
>>>>   libavcodec/ffv1dec.c | 19 +++++++++++++++++--
>>>>   2 files changed, 20 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>>>> index 8c0e71284d..860a5c14b1 100644
>>>> --- a/libavcodec/ffv1.h
>>>> +++ b/libavcodec/ffv1.h
>>>> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>>>>        * NOT shared between frame threads.
>>>>        */
>>>>       uint8_t           frame_damaged;
>>>> +
>>>> +    /* Reference to the current packet */
>>>> +    AVPacket *pkt_ref;
>>>>   } FFV1Context;
>>>>   int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
>>>> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
>>>> index eaa21eebdf..6396f22f79 100644
>>>> --- a/libavcodec/ffv1dec.c
>>>> +++ b/libavcodec/ffv1dec.c
>>>> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext
>>>> *avctx)
>>>>       f->pix_fmt = AV_PIX_FMT_NONE;
>>>>       f->configured_pix_fmt = AV_PIX_FMT_NONE;
>>>> +    f->pkt_ref = av_packet_alloc();
>>>> +    if (!f->pkt_ref)
>>>> +        return AVERROR(ENOMEM);
>>>> +
>>>>       if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>>>>           return ret;
>>>> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx,
>>>> AVFrame *rframe,
>>>>       /* Start */
>>>>       if (hwaccel) {
>>>> +        ret = av_packet_ref(f->pkt_ref, avpkt);
>>>> +        if (ret < 0)
>>>> +            return ret;
>>>> +
>>>>           ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
>>>>           if (ret < 0)
>>>>               return ret;
>>>> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx,
>>>> AVFrame *rframe,
>>>>               uint32_t len;
>>>>               ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>>>>                                     &pos, &len);
>>>> -            if (ret < 0)
>>>> +            if (ret < 0) {
>>>> +                av_packet_unref(f->pkt_ref);
>>>>                   return ret;
>>>> +            }
>>>>               buf_end -= len;
>>>>               ret = hwaccel->decode_slice(avctx, pos, len);
>>>> -            if (ret < 0)
>>>> +            if (ret < 0) {
>>>> +                av_packet_unref(f->pkt_ref);
>>>>                   return ret;
>>>> +            }
>>>>           }
>>>> +
>>>> +        av_packet_unref(f->pkt_ref);
>>>>       } else {
>>>>           ret = decode_slices(avctx, c, avpkt);
>>>>           if (ret < 0)
>>>> @@ -827,6 +841,7 @@ static av_cold int
>>>> ffv1_decode_close(AVCodecContext *avctx)
>>>>       ff_progress_frame_unref(&s->last_picture);
>>>>       av_refstruct_unref(&s->hwaccel_last_picture_private);
>>>> +    av_packet_free(&s->pkt_ref);
>>>>       ff_ffv1_close(s);
>>>>       return 0;
>>>
>>> Why not simply use a const AVPacket*?
>>
>> No reason. Fixed locally.
>> Thanks.
> 
> *reverted this change.
> We need to ref the packet, since we map its memory and let the GPU use
> it directly without copying the contents. 6k16bit content at 24fps is
> typically around 2Gbps when compressed, so avoiding copies is important.

How long does the hwaccel need this data?

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
  2025-03-13  1:24         ` Andreas Rheinhardt
@ 2025-03-13  1:56           ` Lynne
  2025-03-13  4:57             ` Andreas Rheinhardt
  0 siblings, 1 reply; 20+ messages in thread
From: Lynne @ 2025-03-13  1:56 UTC (permalink / raw)
  To: ffmpeg-devel



On 13/03/2025 02:24, Andreas Rheinhardt wrote:
> Lynne:
>> On 10/03/2025 18:42, Lynne wrote:
>>> On 10/03/2025 04:14, Andreas Rheinhardt wrote:
>>>> Lynne:
>>>>> ---
>>>>>    libavcodec/ffv1.h    |  3 +++
>>>>>    libavcodec/ffv1dec.c | 19 +++++++++++++++++--
>>>>>    2 files changed, 20 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>>>>> index 8c0e71284d..860a5c14b1 100644
>>>>> --- a/libavcodec/ffv1.h
>>>>> +++ b/libavcodec/ffv1.h
>>>>> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>>>>>         * NOT shared between frame threads.
>>>>>         */
>>>>>        uint8_t           frame_damaged;
>>>>> +
>>>>> +    /* Reference to the current packet */
>>>>> +    AVPacket *pkt_ref;
>>>>>    } FFV1Context;
>>>>>    int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
>>>>> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
>>>>> index eaa21eebdf..6396f22f79 100644
>>>>> --- a/libavcodec/ffv1dec.c
>>>>> +++ b/libavcodec/ffv1dec.c
>>>>> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext
>>>>> *avctx)
>>>>>        f->pix_fmt = AV_PIX_FMT_NONE;
>>>>>        f->configured_pix_fmt = AV_PIX_FMT_NONE;
>>>>> +    f->pkt_ref = av_packet_alloc();
>>>>> +    if (!f->pkt_ref)
>>>>> +        return AVERROR(ENOMEM);
>>>>> +
>>>>>        if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>>>>>            return ret;
>>>>> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx,
>>>>> AVFrame *rframe,
>>>>>        /* Start */
>>>>>        if (hwaccel) {
>>>>> +        ret = av_packet_ref(f->pkt_ref, avpkt);
>>>>> +        if (ret < 0)
>>>>> +            return ret;
>>>>> +
>>>>>            ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
>>>>>            if (ret < 0)
>>>>>                return ret;
>>>>> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx,
>>>>> AVFrame *rframe,
>>>>>                uint32_t len;
>>>>>                ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>>>>>                                      &pos, &len);
>>>>> -            if (ret < 0)
>>>>> +            if (ret < 0) {
>>>>> +                av_packet_unref(f->pkt_ref);
>>>>>                    return ret;
>>>>> +            }
>>>>>                buf_end -= len;
>>>>>                ret = hwaccel->decode_slice(avctx, pos, len);
>>>>> -            if (ret < 0)
>>>>> +            if (ret < 0) {
>>>>> +                av_packet_unref(f->pkt_ref);
>>>>>                    return ret;
>>>>> +            }
>>>>>            }
>>>>> +
>>>>> +        av_packet_unref(f->pkt_ref);
>>>>>        } else {
>>>>>            ret = decode_slices(avctx, c, avpkt);
>>>>>            if (ret < 0)
>>>>> @@ -827,6 +841,7 @@ static av_cold int
>>>>> ffv1_decode_close(AVCodecContext *avctx)
>>>>>        ff_progress_frame_unref(&s->last_picture);
>>>>>        av_refstruct_unref(&s->hwaccel_last_picture_private);
>>>>> +    av_packet_free(&s->pkt_ref);
>>>>>        ff_ffv1_close(s);
>>>>>        return 0;
>>>>
>>>> Why not simply use a const AVPacket*?
>>>
>>> No reason. Fixed locally.
>>> Thanks.
>>
>> *reverted this change.
>> We need to ref the packet, since we map its memory and let the GPU use
>> it directly without copying the contents. 6k16bit content at 24fps is
>> typically around 2Gbps when compressed, so avoiding copies is important.
> 
> How long does the hwaccel need this data?

Until the frame has been asynchronously decoded. We give an output frame 
with a semaphore that receivers need to wait on to determine when that is.

On the decoder-side, the hardware has a fixed number of queues where 
submissions can be sent to asynchronously. We treat it as a ring buffer 
and keep a reference to all resources our side for each submission, 
until we need to reuse the slot, at which point we wait on the frame 
decoding to complete (which it usually has), and we release all 
resources used.

Output frames also have a bit of state that has to be freed once the 
frame is marked (unreferenced) by the decoder as no longer being needed 
as a reference, this is done in the FFHWAccel.free_frame_priv callback. 
There, we have to wait for the last internal use of the frame to be 
finished (done via the vp->wait_semaphores() call in vulkan_decode.c).

This is valid for both ASIC hardware decoders and a compute shader based 
implementation, since the two share the same code, except for decode 
submissions.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
  2025-03-13  1:56           ` Lynne
@ 2025-03-13  4:57             ` Andreas Rheinhardt
  2025-03-13 12:51               ` Lynne
  0 siblings, 1 reply; 20+ messages in thread
From: Andreas Rheinhardt @ 2025-03-13  4:57 UTC (permalink / raw)
  To: ffmpeg-devel

Lynne:
> 
> 
> On 13/03/2025 02:24, Andreas Rheinhardt wrote:
>> Lynne:
>>> On 10/03/2025 18:42, Lynne wrote:
>>>> On 10/03/2025 04:14, Andreas Rheinhardt wrote:
>>>>> Lynne:
>>>>>> ---
>>>>>>    libavcodec/ffv1.h    |  3 +++
>>>>>>    libavcodec/ffv1dec.c | 19 +++++++++++++++++--
>>>>>>    2 files changed, 20 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>>>>>> index 8c0e71284d..860a5c14b1 100644
>>>>>> --- a/libavcodec/ffv1.h
>>>>>> +++ b/libavcodec/ffv1.h
>>>>>> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>>>>>>         * NOT shared between frame threads.
>>>>>>         */
>>>>>>        uint8_t           frame_damaged;
>>>>>> +
>>>>>> +    /* Reference to the current packet */
>>>>>> +    AVPacket *pkt_ref;
>>>>>>    } FFV1Context;
>>>>>>    int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
>>>>>> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
>>>>>> index eaa21eebdf..6396f22f79 100644
>>>>>> --- a/libavcodec/ffv1dec.c
>>>>>> +++ b/libavcodec/ffv1dec.c
>>>>>> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext
>>>>>> *avctx)
>>>>>>        f->pix_fmt = AV_PIX_FMT_NONE;
>>>>>>        f->configured_pix_fmt = AV_PIX_FMT_NONE;
>>>>>> +    f->pkt_ref = av_packet_alloc();
>>>>>> +    if (!f->pkt_ref)
>>>>>> +        return AVERROR(ENOMEM);
>>>>>> +
>>>>>>        if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>>>>>>            return ret;
>>>>>> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx,
>>>>>> AVFrame *rframe,
>>>>>>        /* Start */
>>>>>>        if (hwaccel) {
>>>>>> +        ret = av_packet_ref(f->pkt_ref, avpkt);
>>>>>> +        if (ret < 0)
>>>>>> +            return ret;
>>>>>> +
>>>>>>            ret = hwaccel->start_frame(avctx, avpkt->data, avpkt-
>>>>>> >size);
>>>>>>            if (ret < 0)
>>>>>>                return ret;
>>>>>> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx,
>>>>>> AVFrame *rframe,
>>>>>>                uint32_t len;
>>>>>>                ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>>>>>>                                      &pos, &len);
>>>>>> -            if (ret < 0)
>>>>>> +            if (ret < 0) {
>>>>>> +                av_packet_unref(f->pkt_ref);
>>>>>>                    return ret;
>>>>>> +            }
>>>>>>                buf_end -= len;
>>>>>>                ret = hwaccel->decode_slice(avctx, pos, len);
>>>>>> -            if (ret < 0)
>>>>>> +            if (ret < 0) {
>>>>>> +                av_packet_unref(f->pkt_ref);
>>>>>>                    return ret;
>>>>>> +            }
>>>>>>            }
>>>>>> +
>>>>>> +        av_packet_unref(f->pkt_ref);
>>>>>>        } else {
>>>>>>            ret = decode_slices(avctx, c, avpkt);
>>>>>>            if (ret < 0)
>>>>>> @@ -827,6 +841,7 @@ static av_cold int
>>>>>> ffv1_decode_close(AVCodecContext *avctx)
>>>>>>        ff_progress_frame_unref(&s->last_picture);
>>>>>>        av_refstruct_unref(&s->hwaccel_last_picture_private);
>>>>>> +    av_packet_free(&s->pkt_ref);
>>>>>>        ff_ffv1_close(s);
>>>>>>        return 0;
>>>>>
>>>>> Why not simply use a const AVPacket*?
>>>>
>>>> No reason. Fixed locally.
>>>> Thanks.
>>>
>>> *reverted this change.
>>> We need to ref the packet, since we map its memory and let the GPU use
>>> it directly without copying the contents. 6k16bit content at 24fps is
>>> typically around 2Gbps when compressed, so avoiding copies is important.
>>
>> How long does the hwaccel need this data?
> 
> Until the frame has been asynchronously decoded. We give an output frame
> with a semaphore that receivers need to wait on to determine when that is.
> 
> On the decoder-side, the hardware has a fixed number of queues where
> submissions can be sent to asynchronously. We treat it as a ring buffer
> and keep a reference to all resources our side for each submission,
> until we need to reuse the slot, at which point we wait on the frame
> decoding to complete (which it usually has), and we release all
> resources used.
> 
> Output frames also have a bit of state that has to be freed once the
> frame is marked (unreferenced) by the decoder as no longer being needed
> as a reference, this is done in the FFHWAccel.free_frame_priv callback.
> There, we have to wait for the last internal use of the frame to be
> finished (done via the vp->wait_semaphores() call in vulkan_decode.c).
> 
> This is valid for both ASIC hardware decoders and a compute shader based
> implementation, since the two share the same code, except for decode
> submissions.

1. If you need a reference to the packet's data, then reference
AVPacket.buf, not the whole AVPacket. This avoids allocating a spare
AVPacket as well as copying side data.
2. It sounds very wrong and fragile that the decoder has to keep a
reference because the hwaccel might need it. There may be future
hwaccels that don't need such a reference etc. It seems better to extend
e.g. the start_frame callback and pass a reference to the input data (no
need to change this for all other start_frame calls; they can pass NULL
until needed).
3. If the user closes the decoder (which is allowed at any time, even
without draining the decoder), ff_codec_close() uninitializes the
hwaccel after calling the decoder's close function; the latter
unreferences the reference to the packet. Is this really safe?

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
  2025-03-13  4:57             ` Andreas Rheinhardt
@ 2025-03-13 12:51               ` Lynne
  0 siblings, 0 replies; 20+ messages in thread
From: Lynne @ 2025-03-13 12:51 UTC (permalink / raw)
  To: ffmpeg-devel

On 13/03/2025 05:57, Andreas Rheinhardt wrote:
> Lynne:
>>
>>
>> On 13/03/2025 02:24, Andreas Rheinhardt wrote:
>>> Lynne:
>>>> On 10/03/2025 18:42, Lynne wrote:
>>>>> On 10/03/2025 04:14, Andreas Rheinhardt wrote:
>>>>>> Lynne:
>>>>>>> ---
>>>>>>>     libavcodec/ffv1.h    |  3 +++
>>>>>>>     libavcodec/ffv1dec.c | 19 +++++++++++++++++--
>>>>>>>     2 files changed, 20 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>>>>>>> index 8c0e71284d..860a5c14b1 100644
>>>>>>> --- a/libavcodec/ffv1.h
>>>>>>> +++ b/libavcodec/ffv1.h
>>>>>>> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>>>>>>>          * NOT shared between frame threads.
>>>>>>>          */
>>>>>>>         uint8_t           frame_damaged;
>>>>>>> +
>>>>>>> +    /* Reference to the current packet */
>>>>>>> +    AVPacket *pkt_ref;
>>>>>>>     } FFV1Context;
>>>>>>>     int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
>>>>>>> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
>>>>>>> index eaa21eebdf..6396f22f79 100644
>>>>>>> --- a/libavcodec/ffv1dec.c
>>>>>>> +++ b/libavcodec/ffv1dec.c
>>>>>>> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext
>>>>>>> *avctx)
>>>>>>>         f->pix_fmt = AV_PIX_FMT_NONE;
>>>>>>>         f->configured_pix_fmt = AV_PIX_FMT_NONE;
>>>>>>> +    f->pkt_ref = av_packet_alloc();
>>>>>>> +    if (!f->pkt_ref)
>>>>>>> +        return AVERROR(ENOMEM);
>>>>>>> +
>>>>>>>         if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>>>>>>>             return ret;
>>>>>>> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx,
>>>>>>> AVFrame *rframe,
>>>>>>>         /* Start */
>>>>>>>         if (hwaccel) {
>>>>>>> +        ret = av_packet_ref(f->pkt_ref, avpkt);
>>>>>>> +        if (ret < 0)
>>>>>>> +            return ret;
>>>>>>> +
>>>>>>>             ret = hwaccel->start_frame(avctx, avpkt->data, avpkt-
>>>>>>>> size);
>>>>>>>             if (ret < 0)
>>>>>>>                 return ret;
>>>>>>> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx,
>>>>>>> AVFrame *rframe,
>>>>>>>                 uint32_t len;
>>>>>>>                 ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>>>>>>>                                       &pos, &len);
>>>>>>> -            if (ret < 0)
>>>>>>> +            if (ret < 0) {
>>>>>>> +                av_packet_unref(f->pkt_ref);
>>>>>>>                     return ret;
>>>>>>> +            }
>>>>>>>                 buf_end -= len;
>>>>>>>                 ret = hwaccel->decode_slice(avctx, pos, len);
>>>>>>> -            if (ret < 0)
>>>>>>> +            if (ret < 0) {
>>>>>>> +                av_packet_unref(f->pkt_ref);
>>>>>>>                     return ret;
>>>>>>> +            }
>>>>>>>             }
>>>>>>> +
>>>>>>> +        av_packet_unref(f->pkt_ref);
>>>>>>>         } else {
>>>>>>>             ret = decode_slices(avctx, c, avpkt);
>>>>>>>             if (ret < 0)
>>>>>>> @@ -827,6 +841,7 @@ static av_cold int
>>>>>>> ffv1_decode_close(AVCodecContext *avctx)
>>>>>>>         ff_progress_frame_unref(&s->last_picture);
>>>>>>>         av_refstruct_unref(&s->hwaccel_last_picture_private);
>>>>>>> +    av_packet_free(&s->pkt_ref);
>>>>>>>         ff_ffv1_close(s);
>>>>>>>         return 0;
>>>>>>
>>>>>> Why not simply use a const AVPacket*?
>>>>>
>>>>> No reason. Fixed locally.
>>>>> Thanks.
>>>>
>>>> *reverted this change.
>>>> We need to ref the packet, since we map its memory and let the GPU use
>>>> it directly without copying the contents. 6k16bit content at 24fps is
>>>> typically around 2Gbps when compressed, so avoiding copies is important.
>>>
>>> How long does the hwaccel need this data?
>>
>> Until the frame has been asynchronously decoded. We give an output frame
>> with a semaphore that receivers need to wait on to determine when that is.
>>
>> On the decoder-side, the hardware has a fixed number of queues where
>> submissions can be sent to asynchronously. We treat it as a ring buffer
>> and keep a reference to all resources our side for each submission,
>> until we need to reuse the slot, at which point we wait on the frame
>> decoding to complete (which it usually has), and we release all
>> resources used.
>>
>> Output frames also have a bit of state that has to be freed once the
>> frame is marked (unreferenced) by the decoder as no longer being needed
>> as a reference, this is done in the FFHWAccel.free_frame_priv callback.
>> There, we have to wait for the last internal use of the frame to be
>> finished (done via the vp->wait_semaphores() call in vulkan_decode.c).
>>
>> This is valid for both ASIC hardware decoders and a compute shader based
>> implementation, since the two share the same code, except for decode
>> submissions.
> 
> 1. If you need a reference to the packet's data, then reference
> AVPacket.buf, not the whole AVPacket. This avoids allocating a spare
> AVPacket as well as copying side data.
> 2. It sounds very wrong and fragile that the decoder has to keep a
> reference because the hwaccel might need it. There may be future
> hwaccels that don't need such a reference etc. It seems better to extend
> e.g. the start_frame callback and pass a reference to the input data (no
> need to change this for all other start_frame calls; they can pass NULL
> until needed).
> 3. If the user closes the decoder (which is allowed at any time, even
> without draining the decoder), ff_codec_close() uninitializes the
> hwaccel after calling the decoder's close function; the latter
> unreferences the reference to the packet. Is this really safe?

I wanted to avoid having to change the FFHWAccel API for a single 
hwaccel, so I opted to simply add a field to the codec private context.
I'll add an argument to start_frame and resubmit.

I don't see why it wouldn't be safe, upon uninit, all submissions are 
waited on to complete before unreferencing all resources they held.
The output frames themselves have no references to any resources used 
during decoding, so all resources are guaranteed to be freed at uninit.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2025-03-13 12:51 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-10  3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels Lynne
2025-03-10  3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
2025-03-10  3:08   ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
2025-03-10  3:08   ` [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder Lynne
2025-03-10  3:14   ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt
2025-03-10 17:42     ` Lynne
2025-03-13  0:30       ` Lynne
2025-03-13  1:24         ` Andreas Rheinhardt
2025-03-13  1:56           ` Lynne
2025-03-13  4:57             ` Andreas Rheinhardt
2025-03-13 12:51               ` Lynne

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git