* [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img
@ 2025-03-10 3:08 Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
` (9 more replies)
0 siblings, 10 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/vulkan.c | 34 +++++++++++++++++-----------------
libavutil/vulkan.h | 8 ++++----
2 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 2ae619967a..51372965e0 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -2390,10 +2390,10 @@ static inline void update_set_pool_write(FFVulkanContext *s, FFVkExecContext *e,
}
}
-int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanShader *shd,
- FFVkExecContext *e, int set, int bind, int offs,
- VkImageView view, VkImageLayout layout,
- VkSampler sampler)
+int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanShader *shd, int set, int bind, int offs,
+ VkImageView view, VkImageLayout layout,
+ VkSampler sampler)
{
FFVulkanDescriptorSet *desc_set = &shd->desc_set[set];
@@ -2455,6 +2455,19 @@ int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanShader *shd,
return 0;
}
+void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanShader *shd, AVFrame *f,
+ VkImageView *views, int set, int binding,
+ VkImageLayout layout, VkSampler sampler)
+{
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
+
+ for (int i = 0; i < nb_planes; i++)
+ ff_vk_shader_update_img(s, e, shd, set, binding, i,
+ views[i], layout, sampler);
+}
+
int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
FFVulkanShader *shd,
int set, int bind, int elem,
@@ -2521,19 +2534,6 @@ int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
return 0;
}
-void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e,
- FFVulkanShader *shd, AVFrame *f,
- VkImageView *views, int set, int binding,
- VkImageLayout layout, VkSampler sampler)
-{
- AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
- const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
-
- for (int i = 0; i < nb_planes; i++)
- ff_vk_set_descriptor_image(s, shd, e, set, binding, i,
- views[i], layout, sampler);
-}
-
void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e,
FFVulkanShader *shd,
VkShaderStageFlagBits stage,
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 617df952c4..41071b245e 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -593,10 +593,10 @@ int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
/**
* Sets an image descriptor for specified shader and binding.
*/
-int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanShader *shd,
- FFVkExecContext *e, int set, int bind, int offs,
- VkImageView view, VkImageLayout layout,
- VkSampler sampler);
+int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e,
+ FFVulkanShader *shd, int set, int bind, int offs,
+ VkImageView view, VkImageLayout layout,
+ VkSampler sampler);
/**
* Update a descriptor in a buffer with an image array..
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext Lynne
` (8 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/vulkan.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++
libavutil/vulkan.h | 7 +++++++
2 files changed, 58 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 51372965e0..4bfa877278 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1595,6 +1595,57 @@ static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt)
return VK_FORMAT_UNDEFINED;
}
+int ff_vk_create_imageview(FFVulkanContext *s,
+ VkImageView *img_view, VkImageAspectFlags *aspect,
+ AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt)
+{
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
+ const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format);
+ AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+ const int nb_images = ff_vk_count_images(vkf);
+
+ VkImageViewUsageCreateInfo view_usage_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
+ .usage = vkfc->usage &
+ (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR |
+ VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)),
+ };
+ VkImageViewCreateInfo view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = &view_usage_info,
+ .image = vkf->img[FFMIN(plane, nb_images - 1)],
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = map_fmt_to_rep(rep_fmts[plane], rep_fmt),
+ .components = ff_comp_identity_map,
+ .subresourceRange = {
+ .aspectMask = ff_vk_aspect_flag(f, plane),
+ .levelCount = 1,
+ .layerCount = 1,
+ },
+ };
+ if (view_create_info.format == VK_FORMAT_UNDEFINED) {
+ av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
+ "of format %i and mode %i\n",
+ rep_fmts[plane], rep_fmt);
+ return AVERROR(EINVAL);
+ }
+
+ ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info,
+ s->hwctx->alloc, img_view);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ *aspect = view_create_info.subresourceRange.aspectMask;
+
+ return 0;
+}
+
int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
VkImageView views[AV_NUM_DATA_POINTERS],
AVFrame *f, enum FFVkShaderRepFormat rep_fmt)
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 41071b245e..73c4713166 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -457,6 +457,13 @@ int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e,
AVFrame *f);
void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e);
+/**
+ * Create a single imageview for a given plane.
+ */
+int ff_vk_create_imageview(FFVulkanContext *s,
+ VkImageView *img_view, VkImageAspectFlags *aspect,
+ AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt);
+
/**
* Create an imageview and add it as a dependency to an execution.
*/
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour Lynne
` (7 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This is useful elsewhere.
---
libavutil/vulkan.c | 155 ++++++++++++++++++++++++++++++++++++++++++++-
libavutil/vulkan.h | 18 +++++-
2 files changed, 170 insertions(+), 3 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 4bfa877278..7f6c9cc039 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1123,6 +1123,8 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers,
.memory = buf[i]->mem,
.size = VK_WHOLE_SIZE,
};
+
+ av_assert0(!buf[i]->host_ref);
if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
flush_list[flush_count++] = flush_buf;
@@ -1154,12 +1156,18 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
if (!buf || !s->hwctx)
return;
- if (buf->mapped_mem)
+ if (buf->mapped_mem && !buf->host_ref)
ff_vk_unmap_buffer(s, buf, 0);
if (buf->buf != VK_NULL_HANDLE)
vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
if (buf->mem != VK_NULL_HANDLE)
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+ if (buf->host_ref)
+ av_buffer_unref(&buf->host_ref);
+
+ buf->buf = VK_NULL_HANDLE;
+ buf->mem = VK_NULL_HANDLE;
+ buf->mapped_mem = NULL;
}
static void free_data_buf(void *opaque, uint8_t *data)
@@ -1236,6 +1244,149 @@ int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
return 0;
}
+static int create_mapped_buffer(FFVulkanContext *s,
+ FFVkBuffer *vkb, VkBufferUsageFlags usage,
+ size_t size,
+ VkExternalMemoryBufferCreateInfo *create_desc,
+ VkImportMemoryHostPointerInfoEXT *import_desc,
+ VkMemoryHostPointerPropertiesEXT props)
+{
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = create_desc,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size,
+ };
+ VkMemoryRequirements req = {
+ .size = size,
+ .alignment = s->hprops.minImportedHostPointerAlignment,
+ .memoryTypeBits = props.memoryTypeBits,
+ };
+
+ err = ff_vk_alloc_mem(s, &req,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+ import_desc, &vkb->flags, &vkb->mem);
+ if (err < 0)
+ return err;
+
+ ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &vkb->buf);
+ if (ret != VK_SUCCESS) {
+ vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc);
+ return AVERROR_EXTERNAL;
+ }
+
+ ret = vk->BindBufferMemory(s->hwctx->act_dev, vkb->buf, vkb->mem, 0);
+ if (ret != VK_SUCCESS) {
+ vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc);
+ vk->DestroyBuffer(s->hwctx->act_dev, vkb->buf, s->hwctx->alloc);
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst,
+ uint8_t *src_data, AVBufferRef *src_buf,
+ VkBufferUsageFlags usage)
+{
+ int err;
+ VkResult ret;
+ FFVulkanFunctions *vk = &s->vkfn;
+
+ VkExternalMemoryBufferCreateInfo create_desc = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ };
+ VkMemoryAllocateFlagsInfo alloc_flags = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
+ .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
+ };
+ VkImportMemoryHostPointerInfoEXT import_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ .pNext = usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? &alloc_flags : NULL,
+ };
+ VkMemoryHostPointerPropertiesEXT props;
+
+ AVBufferRef *ref;
+ FFVkBuffer *vkb;
+ size_t offs;
+ size_t buffer_size;
+
+ *dst = NULL;
+
+ /* Get the previous point at which mapping was possible and use it */
+ offs = (uintptr_t)src_data % s->hprops.minImportedHostPointerAlignment;
+ import_desc.pHostPointer = src_data - offs;
+
+ props = (VkMemoryHostPointerPropertiesEXT) {
+ VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
+ };
+ ret = vk->GetMemoryHostPointerPropertiesEXT(s->hwctx->act_dev,
+ import_desc.handleType,
+ import_desc.pHostPointer,
+ &props);
+ if (!(ret == VK_SUCCESS && props.memoryTypeBits))
+ return AVERROR(EINVAL);
+
+ /* Ref the source buffer */
+ ref = av_buffer_ref(src_buf);
+ if (!ref)
+ return AVERROR(ENOMEM);
+
+ /* Add the offset at the start, which gets ignored */
+ buffer_size = offs + src_buf->size;
+ buffer_size = FFALIGN(buffer_size, s->props.properties.limits.minMemoryMapAlignment);
+ buffer_size = FFALIGN(buffer_size, s->hprops.minImportedHostPointerAlignment);
+
+ /* Create a buffer struct */
+ vkb = av_mallocz(sizeof(*vkb));
+ if (!vkb) {
+ av_buffer_unref(&ref);
+ return AVERROR(ENOMEM);
+ }
+
+ err = create_mapped_buffer(s, vkb, usage,
+ buffer_size, &create_desc, &import_desc,
+ props);
+ if (err < 0) {
+ av_buffer_unref(&ref);
+ av_free(vkb);
+ return err;
+ }
+
+ if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
+ VkBufferDeviceAddressInfo address_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
+ .buffer = vkb->buf,
+ };
+ vkb->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info);
+ }
+
+ vkb->host_ref = ref;
+ vkb->virtual_offset = offs;
+ vkb->address += offs;
+ vkb->mapped_mem = src_data;
+ vkb->size = buffer_size - offs;
+ vkb->flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+
+ /* Create a ref */
+ *dst = av_buffer_create((uint8_t *)vkb, sizeof(*vkb),
+ destroy_avvkbuf, s, 0);
+ if (!(*dst)) {
+ destroy_avvkbuf(s, (uint8_t *)vkb);
+ *dst = NULL;
+ return AVERROR(ENOMEM);
+ }
+
+ return 0;
+}
+
int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size,
VkShaderStageFlagBits stage)
{
@@ -2568,7 +2719,7 @@ int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e,
} else {
VkDescriptorBufferInfo desc_pool_write_info_buf = {
.buffer = buf->buf,
- .offset = offset,
+ .offset = buf->virtual_offset + offset,
.range = len,
};
VkWriteDescriptorSet desc_pool_write_info = {
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 73c4713166..89fc4eedc5 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -96,8 +96,17 @@ typedef struct FFVkBuffer {
VkPipelineStageFlags2 stage;
VkAccessFlags2 access;
- /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE */
+ /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE or
+ * via ff_vk_host_map_buffer */
uint8_t *mapped_mem;
+
+ /* Set by ff_vk_host_map_buffer. This is the offset at which the buffer data
+ * actually begins at.
+ * The address and mapped_mem fields will be offset by this amount. */
+ size_t virtual_offset;
+
+ /* If host mapping, reference to the backing host memory buffer */
+ AVBufferRef *host_ref;
} FFVkBuffer;
typedef struct FFVkExecContext {
@@ -523,6 +532,13 @@ int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool,
void *create_pNext, size_t size,
VkMemoryPropertyFlagBits mem_props);
+/** Maps a system RAM buffer into a Vulkan buffer.
+ * References the source buffer.
+ */
+int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst,
+ uint8_t *src_data, AVBufferRef *src_buf,
+ VkBufferUsageFlags usage);
+
/**
* Create a sampler.
*/
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders Lynne
` (6 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavutil/vulkan.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7f6c9cc039..7b0f77b076 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1746,6 +1746,29 @@ static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt)
return VK_FORMAT_UNDEFINED;
}
+static void bgr_workaround(AVVulkanFramesContext *vkfc,
+ VkImageViewCreateInfo *ci)
+{
+ if (!(vkfc->usage & VK_IMAGE_USAGE_STORAGE_BIT))
+ return;
+ switch (ci->format) {
+#define REMAP(src, dst) \
+ case src: \
+ ci->format = dst; \
+ return;
+ REMAP(VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM)
+ REMAP(VK_FORMAT_B8G8R8A8_SINT, VK_FORMAT_R8G8B8A8_SINT)
+ REMAP(VK_FORMAT_B8G8R8A8_UINT, VK_FORMAT_R8G8B8A8_UINT)
+ REMAP(VK_FORMAT_B8G8R8_UNORM, VK_FORMAT_R8G8B8_UNORM)
+ REMAP(VK_FORMAT_B8G8R8_SINT, VK_FORMAT_R8G8B8_SINT)
+ REMAP(VK_FORMAT_B8G8R8_UINT, VK_FORMAT_R8G8B8_UINT)
+ REMAP(VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_A2R10G10B10_UNORM_PACK32)
+#undef REMAP
+ default:
+ return;
+ }
+}
+
int ff_vk_create_imageview(FFVulkanContext *s,
VkImageView *img_view, VkImageAspectFlags *aspect,
AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt)
@@ -1777,6 +1800,7 @@ int ff_vk_create_imageview(FFVulkanContext *s,
.layerCount = 1,
},
};
+ bgr_workaround(vkfc, &view_create_info);
if (view_create_info.format == VK_FORMAT_UNDEFINED) {
av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
"of format %i and mode %i\n",
@@ -1838,6 +1862,7 @@ int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e,
.layerCount = 1,
},
};
+ bgr_workaround(vkfc, &view_create_info);
if (view_create_info.format == VK_FORMAT_UNDEFINED) {
av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation "
"of format %i and mode %i\n",
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
` (2 preceding siblings ...)
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views Lynne
` (5 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/vulkan_decode.c | 194 +++++++++++++++++++++++++++----------
libavcodec/vulkan_decode.h | 11 +++
2 files changed, 154 insertions(+), 51 deletions(-)
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index c57998108c..594764a904 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -24,6 +24,9 @@
#include "libavutil/mem.h"
#include "libavutil/vulkan_loader.h"
+#define DECODER_IS_SDR(codec_id) \
+ ((codec_id) == AV_CODEC_ID_FFV1)
+
#if CONFIG_H264_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc;
#endif
@@ -63,7 +66,9 @@ static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx,
codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR :
codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR :
codec_id == AV_CODEC_ID_AV1 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR :
- 0;
+ VK_STRUCTURE_TYPE_MAX_ENUM;
+ if (profile_struct_type == VK_STRUCTURE_TYPE_MAX_ENUM)
+ return NULL;
profile_list = ff_vk_find_struct(ctx->s.hwfc->create_pnext,
VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR);
@@ -119,13 +124,26 @@ static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx)
return avf;
}
+static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic)
+{
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ vkpic->dpb_frame = NULL;
+ vkpic->img_view_ref = VK_NULL_HANDLE;
+ vkpic->img_view_out = VK_NULL_HANDLE;
+ vkpic->img_view_dest = VK_NULL_HANDLE;
+
+ vkpic->destroy_image_view = vk->DestroyImageView;
+ vkpic->wait_semaphores = vk->WaitSemaphores;
+}
+
int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
FFVulkanDecodePicture *vkpic, int is_current,
int alloc_dpb)
{
int err;
FFVulkanDecodeShared *ctx = dec->shared_ctx;
- FFVulkanFunctions *vk = &ctx->s.vkfn;
vkpic->slices_size = 0;
@@ -134,13 +152,7 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
if (vkpic->img_view_ref)
return 0;
- vkpic->dpb_frame = NULL;
- vkpic->img_view_ref = VK_NULL_HANDLE;
- vkpic->img_view_out = VK_NULL_HANDLE;
- vkpic->img_view_dest = VK_NULL_HANDLE;
-
- vkpic->destroy_image_view = vk->DestroyImageView;
- vkpic->wait_semaphores = vk->WaitSemaphores;
+ init_frame(dec, vkpic);
if (ctx->common.layered_dpb && alloc_dpb) {
vkpic->img_view_ref = ctx->common.layered_view;
@@ -183,6 +195,53 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
return 0;
}
+int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic,
+ FFVulkanDecodePicture *vkpic, int is_current,
+ enum FFVkShaderRepFormat rep_fmt, int alloc_dpb)
+{
+ int err;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+
+ vkpic->slices_size = 0;
+
+ if (vkpic->img_view_ref)
+ return 0;
+
+ init_frame(dec, vkpic);
+
+ if (ctx->common.layered_dpb && alloc_dpb) {
+ vkpic->img_view_ref = ctx->common.layered_view;
+ vkpic->img_aspect_ref = ctx->common.layered_aspect;
+ } else if (alloc_dpb) {
+ vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+ if (!vkpic->dpb_frame)
+ return AVERROR(ENOMEM);
+
+ err = ff_vk_create_imageview(&ctx->s,
+ &vkpic->img_view_ref, &vkpic->img_aspect_ref,
+ vkpic->dpb_frame, 0, rep_fmt);
+ if (err < 0)
+ return err;
+
+ vkpic->img_view_dest = vkpic->img_view_ref;
+ }
+
+ if (!alloc_dpb || is_current) {
+ err = ff_vk_create_imageview(&ctx->s,
+ &vkpic->img_view_out, &vkpic->img_aspect,
+ pic, 0, rep_fmt);
+ if (err < 0)
+ return err;
+
+ if (!alloc_dpb) {
+ vkpic->img_view_ref = vkpic->img_view_out;
+ vkpic->img_aspect_ref = vkpic->img_aspect;
+ }
+ }
+
+ return 0;
+}
+
int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp,
const uint8_t *data, size_t size, int add_startcode,
uint32_t *nb_slices, const uint32_t **offsets)
@@ -223,9 +282,14 @@ int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp,
buf_size = 2 << av_log2(buf_size);
err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &new_ref,
+ DECODER_IS_SDR(avctx->codec_id) ?
+ (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) :
VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR,
ctx->s.hwfc->create_pnext, buf_size,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ (DECODER_IS_SDR(avctx->codec_id) ?
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0));
if (err < 0)
return err;
@@ -276,6 +340,10 @@ void ff_vk_decode_flush(AVCodecContext *avctx)
VkCommandBuffer cmd_buf;
FFVkExecContext *exec;
+ /* Non-video queues do not need to be reset */
+ if (!(get_codecdesc(avctx->codec_id)->decode_op))
+ return;
+
exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
ff_vk_exec_start(&ctx->s, exec);
cmd_buf = exec->buf;
@@ -544,6 +612,9 @@ static void free_common(AVRefStructOpaque unused, void *obj)
ff_vk_video_common_uninit(s, &ctx->common);
+ if (ctx->sd_ctx_free)
+ ctx->sd_ctx_free(ctx);
+
ff_vk_uninit(s);
}
@@ -551,6 +622,7 @@ static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_re
{
int err;
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ const FFVulkanDecodeDescriptor *vk_desc = get_codecdesc(avctx->codec_id);
AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data;
AVVulkanDeviceContext *hwctx = device->hwctx;
@@ -569,11 +641,13 @@ static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_re
ctx->s.extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions,
hwctx->nb_enabled_dev_extensions);
- if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
- av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
- VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
- av_refstruct_unref(&dec->shared_ctx);
- return AVERROR(ENOSYS);
+ if (vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
+ if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
+ av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
+ VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
+ av_refstruct_unref(&dec->shared_ctx);
+ return AVERROR(ENOSYS);
+ }
}
err = ff_vk_load_functions(device, &ctx->s.vkfn, ctx->s.extensions, 1, 1);
@@ -927,53 +1001,61 @@ static void free_profile_data(AVHWFramesContext *hwfc)
int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
{
- VkFormat vkfmt;
+ VkFormat vkfmt = VK_FORMAT_UNDEFINED;
int err, dedicated_dpb;
AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
AVVulkanFramesContext *hwfc = frames_ctx->hwctx;
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
- FFVulkanDecodeProfileData *prof;
- FFVulkanDecodeShared *ctx;
-
- frames_ctx->sw_format = AV_PIX_FMT_NONE;
+ FFVulkanDecodeProfileData *prof = NULL;
err = vulkan_decode_bootstrap(avctx, hw_frames_ctx);
if (err < 0)
return err;
- prof = av_mallocz(sizeof(FFVulkanDecodeProfileData));
- if (!prof)
- return AVERROR(ENOMEM);
+ frames_ctx->sw_format = avctx->sw_pix_fmt;
- err = vulkan_decode_get_profile(avctx, hw_frames_ctx,
- &frames_ctx->sw_format, &vkfmt,
- prof, &dedicated_dpb);
- if (err < 0) {
- av_free(prof);
- return err;
- }
+ if (!DECODER_IS_SDR(avctx->codec_id)) {
+ prof = av_mallocz(sizeof(FFVulkanDecodeProfileData));
+ if (!prof)
+ return AVERROR(ENOMEM);
- frames_ctx->user_opaque = prof;
- frames_ctx->free = free_profile_data;
+ err = vulkan_decode_get_profile(avctx, hw_frames_ctx,
+ &frames_ctx->sw_format, &vkfmt,
+ prof, &dedicated_dpb);
+ if (err < 0) {
+ av_free(prof);
+ return err;
+ }
+
+ frames_ctx->user_opaque = prof;
+ frames_ctx->free = free_profile_data;
+
+ hwfc->create_pnext = &prof->profile_list;
+ }
frames_ctx->width = avctx->coded_width;
frames_ctx->height = avctx->coded_height;
frames_ctx->format = AV_PIX_FMT_VULKAN;
hwfc->format[0] = vkfmt;
- hwfc->create_pnext = &prof->profile_list;
hwfc->tiling = VK_IMAGE_TILING_OPTIMAL;
hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
- VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
+ VK_IMAGE_USAGE_SAMPLED_BIT;
- if (!dec->dedicated_dpb)
- hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+ if (prof) {
+ FFVulkanDecodeShared *ctx;
- ctx = dec->shared_ctx;
- if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
- FF_VK_EXT_VIDEO_MAINTENANCE_1))
- hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
+ hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR;
+ if (!dec->dedicated_dpb)
+ hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
+
+ ctx = dec->shared_ctx;
+ if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
+ FF_VK_EXT_VIDEO_MAINTENANCE_1))
+ hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
+ } else if (DECODER_IS_SDR(avctx->codec_id)) {
+ hwfc->usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+ }
return err;
}
@@ -1075,8 +1157,10 @@ int ff_vk_decode_init(AVCodecContext *avctx)
if (err < 0)
return err;
+ vk_desc = get_codecdesc(avctx->codec_id);
+
profile = get_video_profile(ctx, avctx->codec_id);
- if (!profile) {
+ if ((vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) && !profile) {
av_log(avctx, AV_LOG_ERROR, "Video profile missing from frames context!");
return AVERROR(EINVAL);
}
@@ -1109,9 +1193,11 @@ int ff_vk_decode_init(AVCodecContext *avctx)
if (err < 0)
goto fail;
- err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
- if (err < 0)
- goto fail;
+ if (profile) {
+ err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
+ if (err < 0)
+ goto fail;
+ }
/* If doing an out-of-place decoding, create a DPB pool */
if (dec->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) {
@@ -1163,12 +1249,18 @@ int ff_vk_decode_init(AVCodecContext *avctx)
}
session_params_create.videoSession = ctx->common.session;
- ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
- s->hwctx->alloc, &ctx->empty_session_params);
- if (ret != VK_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n",
- ff_vk_ret2str(ret));
- return AVERROR_EXTERNAL;
+ if (profile) {
+ ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create,
+ s->hwctx->alloc, &ctx->empty_session_params);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ } else {
+ /* For SDR decoders, this alignment value will be 0. Since this will make
+ * add_slice() malfunction, set it to a sane default value. */
+ ctx->caps.minBitstreamBufferSizeAlignment = AV_INPUT_BUFFER_PADDING_SIZE;
}
driver_props = &dec->shared_ctx->s.driver_props;
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 1d89db323f..5c743e96d2 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -56,6 +56,10 @@ typedef struct FFVulkanDecodeShared {
VkVideoDecodeCapabilitiesKHR dec_caps;
VkVideoSessionParametersKHR empty_session_params;
+
+ /* Software-defined decoder context */
+ void *sd_ctx;
+ void (*sd_ctx_free)(struct FFVulkanDecodeShared *ctx);
} FFVulkanDecodeShared;
typedef struct FFVulkanDecodeContext {
@@ -141,6 +145,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
FFVulkanDecodePicture *vkpic, int is_current,
int alloc_dpb);
+/**
+ * Software-defined decoder version of ff_vk_decode_prepare_frame.
+ */
+int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic,
+ FFVulkanDecodePicture *vkpic, int is_current,
+ enum FFVkShaderRepFormat rep_fmt, int alloc_dpb);
+
/**
* Add slice data to frame.
*/
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
` (3 preceding siblings ...)
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created Lynne
` (4 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
Enables non-monochrome video decoding using all our existing functions
in the context of an SDR decoder.
---
libavcodec/vulkan_av1.c | 4 +-
libavcodec/vulkan_decode.c | 90 ++++++++++++++++++++------------------
libavcodec/vulkan_decode.h | 12 ++---
libavcodec/vulkan_h264.c | 4 +-
libavcodec/vulkan_hevc.c | 4 +-
5 files changed, 60 insertions(+), 54 deletions(-)
diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index 6659f9d812..7dd7b204d7 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -123,7 +123,7 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src,
.codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
.baseArrayLayer = ((has_grain || dec->dedicated_dpb) && ctx->common.layered_dpb) ?
hp->frame_id : 0,
- .imageViewBinding = vkpic->img_view_ref,
+ .imageViewBinding = vkpic->view.ref[0],
};
*ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -346,7 +346,7 @@ static int vk_av1_start_frame(AVCodecContext *avctx,
.codedOffset = (VkOffset2D){ 0, 0 },
.codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
.baseArrayLayer = 0,
- .imageViewBinding = vp->img_view_out,
+ .imageViewBinding = vp->view.out[0],
},
};
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 594764a904..7f638d6fc6 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -130,9 +130,11 @@ static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic)
FFVulkanFunctions *vk = &ctx->s.vkfn;
vkpic->dpb_frame = NULL;
- vkpic->img_view_ref = VK_NULL_HANDLE;
- vkpic->img_view_out = VK_NULL_HANDLE;
- vkpic->img_view_dest = VK_NULL_HANDLE;
+ for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
+ vkpic->view.ref[i] = VK_NULL_HANDLE;
+ vkpic->view.out[i] = VK_NULL_HANDLE;
+ vkpic->view.dst[i] = VK_NULL_HANDLE;
+ }
vkpic->destroy_image_view = vk->DestroyImageView;
vkpic->wait_semaphores = vk->WaitSemaphores;
@@ -149,14 +151,14 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
/* If the decoder made a blank frame to make up for a missing ref, or the
* frame is the current frame so it's missing one, create a re-representation */
- if (vkpic->img_view_ref)
+ if (vkpic->view.ref[0])
return 0;
init_frame(dec, vkpic);
if (ctx->common.layered_dpb && alloc_dpb) {
- vkpic->img_view_ref = ctx->common.layered_view;
- vkpic->img_aspect_ref = ctx->common.layered_aspect;
+ vkpic->view.ref[0] = ctx->common.layered_view;
+ vkpic->view.aspect_ref[0] = ctx->common.layered_aspect;
} else if (alloc_dpb) {
AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data;
AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx;
@@ -166,13 +168,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
return AVERROR(ENOMEM);
err = ff_vk_create_view(&ctx->s, &ctx->common,
- &vkpic->img_view_ref, &vkpic->img_aspect_ref,
+ &vkpic->view.ref[0], &vkpic->view.aspect_ref[0],
(AVVkFrame *)vkpic->dpb_frame->data[0],
dpb_hwfc->format[0], !is_current);
if (err < 0)
return err;
- vkpic->img_view_dest = vkpic->img_view_ref;
+ vkpic->view.dst[0] = vkpic->view.ref[0];
}
if (!alloc_dpb || is_current) {
@@ -180,15 +182,15 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
AVVulkanFramesContext *hwfc = frames->hwctx;
err = ff_vk_create_view(&ctx->s, &ctx->common,
- &vkpic->img_view_out, &vkpic->img_aspect,
+ &vkpic->view.out[0], &vkpic->view.aspect[0],
(AVVkFrame *)pic->data[0],
hwfc->format[0], !is_current);
if (err < 0)
return err;
if (!alloc_dpb) {
- vkpic->img_view_ref = vkpic->img_view_out;
- vkpic->img_aspect_ref = vkpic->img_aspect;
+ vkpic->view.ref[0] = vkpic->view.out[0];
+ vkpic->view.aspect_ref[0] = vkpic->view.aspect[0];
}
}
@@ -201,41 +203,41 @@ int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic,
{
int err;
FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data;
vkpic->slices_size = 0;
- if (vkpic->img_view_ref)
+ if (vkpic->view.ref[0])
return 0;
init_frame(dec, vkpic);
- if (ctx->common.layered_dpb && alloc_dpb) {
- vkpic->img_view_ref = ctx->common.layered_view;
- vkpic->img_aspect_ref = ctx->common.layered_aspect;
- } else if (alloc_dpb) {
- vkpic->dpb_frame = vk_get_dpb_pool(ctx);
- if (!vkpic->dpb_frame)
- return AVERROR(ENOMEM);
+ for (int i = 0; i < av_pix_fmt_count_planes(frames->sw_format); i++) {
+ if (alloc_dpb) {
+ vkpic->dpb_frame = vk_get_dpb_pool(ctx);
+ if (!vkpic->dpb_frame)
+ return AVERROR(ENOMEM);
- err = ff_vk_create_imageview(&ctx->s,
- &vkpic->img_view_ref, &vkpic->img_aspect_ref,
- vkpic->dpb_frame, 0, rep_fmt);
- if (err < 0)
- return err;
+ err = ff_vk_create_imageview(&ctx->s,
+ &vkpic->view.ref[i], &vkpic->view.aspect_ref[i],
+ vkpic->dpb_frame, i, rep_fmt);
+ if (err < 0)
+ return err;
- vkpic->img_view_dest = vkpic->img_view_ref;
- }
+ vkpic->view.dst[i] = vkpic->view.ref[i];
+ }
- if (!alloc_dpb || is_current) {
- err = ff_vk_create_imageview(&ctx->s,
- &vkpic->img_view_out, &vkpic->img_aspect,
- pic, 0, rep_fmt);
- if (err < 0)
- return err;
+ if (!alloc_dpb || is_current) {
+ err = ff_vk_create_imageview(&ctx->s,
+ &vkpic->view.out[i], &vkpic->view.aspect[i],
+ pic, i, rep_fmt);
+ if (err < 0)
+ return err;
- if (!alloc_dpb) {
- vkpic->img_view_ref = vkpic->img_view_out;
- vkpic->img_aspect_ref = vkpic->img_aspect;
+ if (!alloc_dpb) {
+ vkpic->view.ref[i] = vkpic->view.out[i];
+ vkpic->view.aspect_ref[i] = vkpic->view.aspect[i];
+ }
}
}
@@ -467,7 +469,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = vkf->img[0],
.subresourceRange = (VkImageSubresourceRange) {
- .aspectMask = vp->img_aspect,
+ .aspectMask = vp->view.aspect[0],
.layerCount = 1,
.levelCount = 1,
},
@@ -523,7 +525,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = rvkf->img[0],
.subresourceRange = (VkImageSubresourceRange) {
- .aspectMask = rvp->img_aspect_ref,
+ .aspectMask = rvp->view.aspect_ref[0],
.layerCount = 1,
.levelCount = 1,
},
@@ -533,7 +535,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
}
}
} else if (vp->decode_info.referenceSlotCount ||
- vp->img_view_out != vp->img_view_ref) {
+ vp->view.out[0] != vp->view.ref[0]) {
/* Single barrier for a single layered ref */
err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
@@ -580,12 +582,14 @@ void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *
av_buffer_unref(&vp->slices_buf);
/* Destroy image view (out) */
- if (vp->img_view_out && vp->img_view_out != vp->img_view_dest)
- vp->destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc);
+ for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
+ if (vp->view.out[i] && vp->view.out[i] != vp->view.dst[i])
+ vp->destroy_image_view(hwctx->act_dev, vp->view.out[i], hwctx->alloc);
- /* Destroy image view (ref, unlayered) */
- if (vp->img_view_dest)
- vp->destroy_image_view(hwctx->act_dev, vp->img_view_dest, hwctx->alloc);
+ /* Destroy image view (ref, unlayered) */
+ if (vp->view.dst[i])
+ vp->destroy_image_view(hwctx->act_dev, vp->view.dst[i], hwctx->alloc);
+ }
av_frame_free(&vp->dpb_frame);
}
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 5c743e96d2..cbd22b3591 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -85,11 +85,13 @@ typedef struct FFVulkanDecodeContext {
typedef struct FFVulkanDecodePicture {
AVFrame *dpb_frame; /* Only used for out-of-place decoding. */
- VkImageView img_view_ref; /* Image representation view (reference) */
- VkImageView img_view_out; /* Image representation view (output-only) */
- VkImageView img_view_dest; /* Set to img_view_out if no layered refs are used */
- VkImageAspectFlags img_aspect; /* Image plane mask bits */
- VkImageAspectFlags img_aspect_ref; /* Only used for out-of-place decoding */
+ struct {
+ VkImageView ref[AV_NUM_DATA_POINTERS]; /* Image representation view (reference) */
+ VkImageView out[AV_NUM_DATA_POINTERS]; /* Image representation view (output-only) */
+ VkImageView dst[AV_NUM_DATA_POINTERS]; /* Set to img_view_out if no layered refs are used */
+ VkImageAspectFlags aspect[AV_NUM_DATA_POINTERS]; /* Image plane mask bits */
+ VkImageAspectFlags aspect_ref[AV_NUM_DATA_POINTERS]; /* Only used for out-of-place decoding */
+ } view;
VkSemaphore sem;
uint64_t sem_value;
diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c
index 1df8f0a208..71cf2c3ad7 100644
--- a/libavcodec/vulkan_h264.c
+++ b/libavcodec/vulkan_h264.c
@@ -98,7 +98,7 @@ static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
.codedOffset = (VkOffset2D){ 0, 0 },
.codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
.baseArrayLayer = ctx->common.layered_dpb ? dpb_slot_index : 0,
- .imageViewBinding = vkpic->img_view_ref,
+ .imageViewBinding = vkpic->view.ref[0],
};
*ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -471,7 +471,7 @@ static int vk_h264_start_frame(AVCodecContext *avctx,
.codedOffset = (VkOffset2D){ 0, 0 },
.codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
.baseArrayLayer = 0,
- .imageViewBinding = vp->img_view_out,
+ .imageViewBinding = vp->view.out[0],
},
};
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
index 589c3de83d..a5bcd88e2d 100644
--- a/libavcodec/vulkan_hevc.c
+++ b/libavcodec/vulkan_hevc.c
@@ -164,7 +164,7 @@ static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
.codedOffset = (VkOffset2D){ 0, 0 },
.codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
.baseArrayLayer = ctx->common.layered_dpb ? pic_id : 0,
- .imageViewBinding = vkpic->img_view_ref,
+ .imageViewBinding = vkpic->view.ref[0],
};
*ref_slot = (VkVideoReferenceSlotInfoKHR) {
@@ -823,7 +823,7 @@ static int vk_hevc_start_frame(AVCodecContext *avctx,
.codedOffset = (VkOffset2D){ 0, 0 },
.codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
.baseArrayLayer = 0,
- .imageViewBinding = vp->img_view_out,
+ .imageViewBinding = vp->view.out[0],
},
};
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
` (4 preceding siblings ...)
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne
` (3 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This caps the number of contexts we create based on thread count.
This saves VRAM and filters out cases where more async is of lesser
benefit.
---
libavcodec/vulkan_decode.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 7f638d6fc6..cd77e10e12 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -1122,6 +1122,7 @@ int ff_vk_decode_init(AVCodecContext *avctx)
FFVulkanDecodeShared *ctx;
FFVulkanContext *s;
FFVulkanFunctions *vk;
+ int async_depth;
const VkVideoProfileInfoKHR *profile;
const FFVulkanDecodeDescriptor *vk_desc;
const VkPhysicalDeviceDriverProperties *driver_props;
@@ -1191,9 +1192,14 @@ int ff_vk_decode_init(AVCodecContext *avctx)
/* Create decode exec context for this specific main thread.
* 2 async contexts per thread was experimentally determined to be optimal
* for a majority of streams. */
+ async_depth = 2*ctx->qf->num;
+ /* We don't need more than 2 per thread context */
+ async_depth = FFMIN(async_depth, 2*avctx->thread_count);
+ /* Make sure there are enough async contexts for each thread */
+ async_depth = FFMAX(async_depth, avctx->thread_count);
+
err = ff_vk_exec_pool_init(s, ctx->qf, &ctx->exec_pool,
- FFMAX(2*ctx->qf->num, avctx->thread_count),
- 0, 0, 0, profile);
+ async_depth, 0, 0, 0, profile);
if (err < 0)
goto fail;
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
` (5 preceding siblings ...)
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct Lynne
` (2 subsequent siblings)
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
The shaders were written to support sharing, but needed slight
tweaking.
---
libavcodec/Makefile | 2 +-
libavcodec/ffv1_vulkan.c | 123 ++++++++++++++
libavcodec/ffv1_vulkan.h | 60 +++++++
libavcodec/ffv1enc_vulkan.c | 234 +++++++++-----------------
libavcodec/vulkan/ffv1_common.comp | 24 ++-
libavcodec/vulkan/ffv1_enc_setup.comp | 18 +-
libavcodec/vulkan/ffv1_reset.comp | 3 +-
libavcodec/vulkan/rangecoder.comp | 27 +--
8 files changed, 302 insertions(+), 189 deletions(-)
create mode 100644 libavcodec/ffv1_vulkan.c
create mode 100644 libavcodec/ffv1_vulkan.h
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index e3ccbf1838..74de7737f9 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -371,7 +371,7 @@ OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o float2half.o
OBJS-$(CONFIG_FASTAUDIO_DECODER) += fastaudio.o
OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1_parse.o ffv1.o
OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1_parse.o ffv1.o
-OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1enc_vulkan.o
+OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1_vulkan.o ffv1enc_vulkan.o
OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o
OBJS-$(CONFIG_FIC_DECODER) += fic.o
OBJS-$(CONFIG_FITS_DECODER) += fitsdec.o fits.o
diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c
new file mode 100644
index 0000000000..6f49e2ebb1
--- /dev/null
+++ b/libavcodec/ffv1_vulkan.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "ffv1_vulkan.h"
+#include "libavutil/crc.h"
+
+int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f)
+{
+ int err;
+ uint8_t *buf_mapped;
+
+ RET(ff_vk_map_buffer(s, vkb, &buf_mapped, 0));
+
+ for (int i = 1; i < 256; i++) {
+ buf_mapped[256 + i] = f->state_transition[i];
+ buf_mapped[256 - i] = 256 - (int)f->state_transition[i];
+ }
+
+ RET(ff_vk_unmap_buffer(s, vkb, 1));
+
+fail:
+ return err;
+}
+
+static int init_state_transition_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f,
+ int (*write_data)(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f))
+{
+ int err;
+ size_t buf_len = 512*sizeof(uint8_t);
+
+ RET(ff_vk_create_buf(s, vkb,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+
+ write_data(s, vkb, f);
+
+fail:
+ return err;
+}
+
+int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f)
+{
+ return init_state_transition_data(s, vkb, f,
+ ff_ffv1_vk_update_state_transition_data);
+}
+
+int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f)
+{
+ int err;
+
+ int16_t *buf_mapped;
+ size_t buf_len = MAX_QUANT_TABLES*
+ MAX_CONTEXT_INPUTS*
+ MAX_QUANT_TABLE_SIZE*sizeof(int16_t);
+
+ RET(ff_vk_create_buf(s, vkb,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0));
+
+ memcpy(buf_mapped, f->quant_tables,
+ sizeof(f->quant_tables));
+
+ RET(ff_vk_unmap_buffer(s, vkb, 1));
+
+fail:
+ return err;
+}
+
+int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f)
+{
+ int err;
+
+ uint32_t *buf_mapped;
+ size_t buf_len = 256*sizeof(int32_t);
+
+ RET(ff_vk_create_buf(s, vkb,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0));
+
+ memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len);
+
+ RET(ff_vk_unmap_buffer(s, vkb, 1));
+
+fail:
+ return err;
+}
diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h
new file mode 100644
index 0000000000..0da6dc2d33
--- /dev/null
+++ b/libavcodec/ffv1_vulkan.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FFV1_VULKAN_H
+#define AVCODEC_FFV1_VULKAN_H
+
+#include "libavutil/vulkan.h"
+#include "ffv1.h"
+
+int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f);
+
+int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f);
+
+int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f);
+
+int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
+ FFVkBuffer *vkb, FFV1Context *f);
+
+typedef struct FFv1VkRCTParameters {
+ int offset;
+ uint8_t bits;
+ uint8_t planar_rgb;
+ uint8_t transparency;
+ uint8_t version;
+ uint8_t micro_version;
+ uint8_t padding[3];
+} FFv1VkRCTParameters;
+
+typedef struct FFv1VkResetParameters {
+ VkDeviceAddress slice_state;
+ uint32_t plane_state_size;
+ uint32_t context_count;
+ uint8_t codec_planes;
+ uint8_t key_frame;
+ uint8_t version;
+ uint8_t micro_version;
+ uint8_t padding[1];
+} FFv1VkResetParameters;
+
+#endif /* AVCODEC_FFV1_VULKAN_H */
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 2bbf310fce..17a93834f3 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -18,7 +18,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "libavutil/crc.h"
#include "libavutil/mem.h"
#include "libavutil/vulkan.h"
#include "libavutil/vulkan_spirv.h"
@@ -32,6 +31,7 @@
#include "ffv1.h"
#include "ffv1enc.h"
+#include "ffv1_vulkan.h"
/* Parallel Golomb alignment */
#define LG_ALIGN_W 32
@@ -122,28 +122,10 @@ extern const char *ff_source_ffv1_enc_setup_comp;
extern const char *ff_source_ffv1_enc_comp;
extern const char *ff_source_ffv1_enc_rgb_comp;
-typedef struct FFv1VkRCTParameters {
- int offset;
- uint8_t bits;
- uint8_t planar_rgb;
- uint8_t transparency;
- uint8_t padding[1];
-} FFv1VkRCTParameters;
-
-typedef struct FFv1VkResetParameters {
- VkDeviceAddress slice_state;
- uint32_t plane_state_size;
- uint32_t context_count;
- uint8_t codec_planes;
- uint8_t key_frame;
- uint8_t padding[3];
-} FFv1VkResetParameters;
-
typedef struct FFv1VkParameters {
VkDeviceAddress slice_state;
VkDeviceAddress scratch_data;
VkDeviceAddress out_data;
- uint64_t slice_size_max;
int32_t sar[2];
uint32_t chroma_shift[2];
@@ -151,6 +133,7 @@ typedef struct FFv1VkParameters {
uint32_t plane_state_size;
uint32_t context_count;
uint32_t crcref;
+ uint32_t slice_size_max;
uint8_t bits_per_raw_sample;
uint8_t context_model;
@@ -175,7 +158,6 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, u8buf slice_state; );
GLSLC(1, u8buf scratch_data; );
GLSLC(1, u8buf out_data; );
- GLSLC(1, uint64_t slice_size_max; );
GLSLC(0, );
GLSLC(1, ivec2 sar; );
GLSLC(1, uvec2 chroma_shift; );
@@ -183,6 +165,7 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint context_count; );
GLSLC(1, uint32_t crcref; );
+ GLSLC(1, uint32_t slice_size_max; );
GLSLC(0, );
GLSLC(1, uint8_t bits_per_raw_sample; );
GLSLC(1, uint8_t context_model; );
@@ -492,7 +475,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.slice_state = slice_data_buf->address + f->slice_count*256,
.scratch_data = tmp_data_buf->address,
.out_data = out_data_buf->address,
- .slice_size_max = out_data_buf->size / f->slice_count,
.bits_per_raw_sample = f->bits_per_raw_sample,
.sar[0] = pict->sample_aspect_ratio.num,
.sar[1] = pict->sample_aspect_ratio.den,
@@ -501,6 +483,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.plane_state_size = plane_state_size,
.context_count = context_count,
.crcref = f->crcref,
+ .slice_size_max = out_data_buf->size / f->slice_count,
.context_model = fv->ctx.context_model,
.version = f->version,
.micro_version = f->micro_version,
@@ -966,7 +949,6 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
GLSLF(0, #define TYPE int%i_t ,smp_bits);
GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
- GLSLD(ff_source_common_comp);
GLSLD(ff_source_rangecoder_comp);
if (f->ac == AC_GOLOMB_RICE)
@@ -993,6 +975,10 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
1, 1, 1,
0));
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+ add_push_data(shd);
+
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1038,8 +1024,6 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0));
- add_push_data(shd);
-
GLSLD(ff_source_ffv1_enc_setup_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1074,6 +1058,22 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
wg_dim, 1, 1,
0));
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, u8buf slice_state; );
+ GLSLC(1, uint plane_state_size; );
+ GLSLC(1, uint context_count; );
+ GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t padding[1]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1110,17 +1110,6 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0));
- GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
- GLSLC(1, u8buf slice_state; );
- GLSLC(1, uint plane_state_size; );
- GLSLC(1, uint context_count; );
- GLSLC(1, uint8_t codec_planes; );
- GLSLC(1, uint8_t key_frame; );
- GLSLC(1, uint8_t padding[3]; );
- GLSLC(0, }; );
- ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
- VK_SHADER_STAGE_COMPUTE_BIT);
-
GLSLD(ff_source_ffv1_reset_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1164,6 +1153,21 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
wg_count, wg_count, 1,
0));
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, int offset; );
+ GLSLC(1, uint8_t bits; );
+ GLSLC(1, uint8_t planar_rgb; );
+ GLSLC(1, uint8_t transparency; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t padding[3]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1220,16 +1224,6 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
- GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
- GLSLC(1, int offset; );
- GLSLC(1, uint8_t bits; );
- GLSLC(1, uint8_t planar_rgb; );
- GLSLC(1, uint8_t transparency; );
- GLSLC(1, uint8_t padding[1]; );
- GLSLC(0, }; );
- ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
- VK_SHADER_STAGE_COMPUTE_BIT);
-
GLSLD(ff_source_ffv1_enc_rct_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1268,6 +1262,11 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
1, 1, 1,
0));
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ add_push_data(shd);
+
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1328,8 +1327,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
- add_push_data(shd);
-
/* Assemble the shader body */
GLSLD(ff_source_ffv1_enc_common_comp);
@@ -1356,110 +1353,6 @@ fail:
return err;
}
-static int init_state_transition_data(AVCodecContext *avctx)
-{
- int err;
- VulkanEncodeFFv1Context *fv = avctx->priv_data;
-
- uint8_t *buf_mapped;
- size_t buf_len = 512*sizeof(uint8_t);
-
- RET(ff_vk_create_buf(&fv->s, &fv->rangecoder_static_buf,
- buf_len,
- NULL, NULL,
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
- RET(ff_vk_map_buffer(&fv->s, &fv->rangecoder_static_buf,
- &buf_mapped, 0));
-
- for (int i = 1; i < 256; i++) {
- buf_mapped[256 + i] = fv->ctx.state_transition[i];
- buf_mapped[256 - i] = 256 - (int)fv->ctx.state_transition[i];
- }
-
- RET(ff_vk_unmap_buffer(&fv->s, &fv->rangecoder_static_buf, 1));
-
- /* Update descriptors */
- RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
- &fv->setup, 0, 0, 0,
- &fv->rangecoder_static_buf,
- 0, fv->rangecoder_static_buf.size,
- VK_FORMAT_UNDEFINED));
- RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
- &fv->enc, 0, 0, 0,
- &fv->rangecoder_static_buf,
- 0, fv->rangecoder_static_buf.size,
- VK_FORMAT_UNDEFINED));
-
-fail:
- return err;
-}
-
-static int init_quant_table_data(AVCodecContext *avctx)
-{
- int err;
- VulkanEncodeFFv1Context *fv = avctx->priv_data;
-
- int16_t *buf_mapped;
- size_t buf_len = MAX_QUANT_TABLES*
- MAX_CONTEXT_INPUTS*
- MAX_QUANT_TABLE_SIZE*sizeof(int16_t);
-
- RET(ff_vk_create_buf(&fv->s, &fv->quant_buf,
- buf_len,
- NULL, NULL,
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
- RET(ff_vk_map_buffer(&fv->s, &fv->quant_buf, (void *)&buf_mapped, 0));
-
- memcpy(buf_mapped, fv->ctx.quant_tables,
- sizeof(fv->ctx.quant_tables));
-
- RET(ff_vk_unmap_buffer(&fv->s, &fv->quant_buf, 1));
- RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
- &fv->enc, 0, 1, 0,
- &fv->quant_buf,
- 0, fv->quant_buf.size,
- VK_FORMAT_UNDEFINED));
-
-fail:
- return err;
-}
-
-static int init_crc_table_data(AVCodecContext *avctx)
-{
- int err;
- VulkanEncodeFFv1Context *fv = avctx->priv_data;
-
- uint32_t *buf_mapped;
- size_t buf_len = 256*sizeof(int32_t);
-
- RET(ff_vk_create_buf(&fv->s, &fv->crc_tab_buf,
- buf_len,
- NULL, NULL,
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
- VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
- RET(ff_vk_map_buffer(&fv->s, &fv->crc_tab_buf, (void *)&buf_mapped, 0));
-
- memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len);
-
- RET(ff_vk_unmap_buffer(&fv->s, &fv->crc_tab_buf, 1));
- RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
- &fv->enc, 0, 2, 0,
- &fv->crc_tab_buf,
- 0, fv->crc_tab_buf.size,
- VK_FORMAT_UNDEFINED));
-
-fail:
- return err;
-}
-
static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
{
int err;
@@ -1703,20 +1596,50 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
spv->uninit(&spv);
/* Range coder data */
- err = init_state_transition_data(avctx);
+ err = ff_ffv1_vk_init_state_transition_data(&fv->s,
+ &fv->rangecoder_static_buf,
+ f);
if (err < 0)
return err;
/* Quantization table data */
- err = init_quant_table_data(avctx);
+ err = ff_ffv1_vk_init_quant_table_data(&fv->s,
+ &fv->quant_buf,
+ f);
if (err < 0)
return err;
/* CRC table buffer */
- err = init_crc_table_data(avctx);
+ err = ff_ffv1_vk_init_crc_table_data(&fv->s,
+ &fv->crc_tab_buf,
+ f);
if (err < 0)
return err;
+ /* Update setup global descriptors */
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->setup, 0, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+ /* Update encode global descriptors */
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 0, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 0, 1, 0,
+ &fv->quant_buf,
+ 0, fv->quant_buf.size,
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 0, 2, 0,
+ &fv->crc_tab_buf,
+ 0, fv->crc_tab_buf.size,
+ VK_FORMAT_UNDEFINED));
+
/* Temporary frame */
fv->frame = av_frame_alloc();
if (!fv->frame)
@@ -1735,7 +1658,8 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
if (!fv->buf_regions)
return AVERROR(ENOMEM);
- return 0;
+fail:
+ return err;
}
static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
index 5b4a882367..604d03b2de 100644
--- a/libavcodec/vulkan/ffv1_common.comp
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -22,17 +22,18 @@
struct SliceContext {
RangeCoder c;
-
-#ifdef GOLOMB
PutBitContext pb; /* 8*8 bytes */
-#endif
ivec2 slice_dim;
ivec2 slice_pos;
ivec2 slice_rct_coef;
+ u8vec4 quant_table_idx;
+ uint context_count;
uint hdr_len; // only used for golomb
- int slice_coding_mode;
+
+ uint slice_coding_mode;
+ bool slice_reset_contexts;
};
/* -1, { -1, 0 } */
@@ -72,3 +73,18 @@ const uint32_t log2_run[41] = {
16, 17, 18, 19, 20, 21, 22, 23,
24,
};
+
+uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
+{
+ uint mpw = 1 << chroma_shift;
+ uint awidth = align(width, mpw);
+
+ if ((version < 4) || ((version == 4) && (micro_version < 3)))
+ return width * sx / num_h_slices;
+
+ sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
+ if (sx == awidth)
+ sx = width;
+
+ return sx;
+}
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp
index b861e25f74..23f09b2af6 100644
--- a/libavcodec/vulkan/ffv1_enc_setup.comp
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp
@@ -20,21 +20,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
-{
- uint mpw = 1 << chroma_shift;
- uint awidth = align(width, mpw);
-
- if ((version < 4) || ((version == 4) && (micro_version < 3)))
- return width * sx / num_h_slices;
-
- sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
- if (sx == awidth)
- sx = width;
-
- return sx;
-}
-
void init_slice(out SliceContext sc, const uint slice_idx)
{
/* Set coordinates */
@@ -52,6 +37,7 @@ void init_slice(out SliceContext sc, const uint slice_idx)
sc.slice_dim = ivec2(sxe - sxs, sye - sys);
sc.slice_rct_coef = ivec2(1, 1);
sc.slice_coding_mode = int(force_pcm == 1);
+ sc.slice_reset_contexts = sc.slice_coding_mode == 1;
rac_init(sc.c,
OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
@@ -105,7 +91,7 @@ void write_slice_header(inout SliceContext sc, uint64_t state)
put_symbol_unsigned(sc.c, state, sar.y);
if (version >= 4) {
- put_rac_full(sc.c, state, sc.slice_coding_mode == 1);
+ put_rac_full(sc.c, state, sc.slice_reset_contexts);
put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
if (sc.slice_coding_mode != 1 && colorspace == 1) {
put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y);
diff --git a/libavcodec/vulkan/ffv1_reset.comp b/libavcodec/vulkan/ffv1_reset.comp
index c7c7962850..1b87ca754e 100644
--- a/libavcodec/vulkan/ffv1_reset.comp
+++ b/libavcodec/vulkan/ffv1_reset.comp
@@ -24,7 +24,8 @@ void main(void)
{
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
- if (slice_ctx[slice_idx].slice_coding_mode == 0 && key_frame == 0)
+ if (key_frame == 0 &&
+ slice_ctx[slice_idx].slice_reset_contexts == false)
return;
uint64_t slice_state_off = uint64_t(slice_state) +
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 848a056fb1..6e3b9c1238 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -21,8 +21,9 @@
*/
struct RangeCoder {
- u8buf bytestream_start;
- u8buf bytestream;
+ uint64_t bytestream_start;
+ uint64_t bytestream;
+ uint64_t bytestream_end;
uint low;
uint16_t range;
@@ -34,28 +35,29 @@ struct RangeCoder {
void renorm_encoder_full(inout RangeCoder c)
{
int bs_cnt = 0;
+ u8buf bytestream = u8buf(c.bytestream);
if (c.outstanding_byte == 0xFF) {
c.outstanding_byte = uint8_t(c.low >> 8);
} else if (c.low <= 0xFF00) {
- c.bytestream[bs_cnt++].v = c.outstanding_byte;
+ bytestream[bs_cnt++].v = c.outstanding_byte;
uint16_t cnt = c.outstanding_count;
for (; cnt > 0; cnt--)
- c.bytestream[bs_cnt++].v = uint8_t(0xFF);
+ bytestream[bs_cnt++].v = uint8_t(0xFF);
c.outstanding_count = uint16_t(0);
c.outstanding_byte = uint8_t(c.low >> 8);
} else if (c.low >= 0x10000) {
- c.bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1);
+ bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1);
uint16_t cnt = c.outstanding_count;
for (; cnt > 0; cnt--)
- c.bytestream[bs_cnt++].v = uint8_t(0x00);
+ bytestream[bs_cnt++].v = uint8_t(0x00);
c.outstanding_count = uint16_t(0);
c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8));
} else {
c.outstanding_count++;
}
- c.bytestream = OFFBUF(u8buf, c.bytestream, bs_cnt);
+ c.bytestream += bs_cnt;
c.range <<= 8;
c.low = bitfieldInsert(0, c.low, 8, 8);
}
@@ -74,10 +76,10 @@ void renorm_encoder(inout RangeCoder c)
return;
}
- u8buf bs = c.bytestream;
+ u8buf bs = u8buf(c.bytestream);
uint8_t outstanding_byte = c.outstanding_byte;
- c.bytestream = OFFBUF(u8buf, bs, oc);
+ c.bytestream = uint64_t(bs) + oc;
c.outstanding_count = uint16_t(0);
c.outstanding_byte = uint8_t(low >> 8);
@@ -179,10 +181,11 @@ uint32_t rac_terminate(inout RangeCoder c)
return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start));
}
-void rac_init(out RangeCoder r, u8buf data, uint64_t buf_size)
+void rac_init(out RangeCoder r, u8buf data, uint buf_size)
{
- r.bytestream_start = data;
- r.bytestream = data;
+ r.bytestream_start = uint64_t(data);
+ r.bytestream = uint64_t(data);
+ r.bytestream_end = uint64_t(data) + buf_size;
r.low = 0;
r.range = uint16_t(0xFF00);
r.outstanding_count = uint16_t(0);
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
` (6 preceding siblings ...)
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/ffv1_vulkan.h | 1 +
libavcodec/ffv1enc_vulkan.c | 2 ++
libavcodec/vulkan/ffv1_enc_rct.comp | 17 ++++++-------
libavutil/vulkan.c | 38 +++++++++++++++++++++++++++++
libavutil/vulkan.h | 6 +++++
5 files changed, 54 insertions(+), 10 deletions(-)
diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h
index 0da6dc2d33..599afae66e 100644
--- a/libavcodec/ffv1_vulkan.h
+++ b/libavcodec/ffv1_vulkan.h
@@ -37,6 +37,7 @@ int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
FFVkBuffer *vkb, FFV1Context *f);
typedef struct FFv1VkRCTParameters {
+ int fmt_lut[4];
int offset;
uint8_t bits;
uint8_t planar_rgb;
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 17a93834f3..f8fe3bec1a 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -264,6 +264,7 @@ static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec,
(ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1),
.transparency = f->transparency,
};
+ ff_vk_set_perm(src_hwfc->sw_format, pd.fmt_lut);
ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
@@ -1157,6 +1158,7 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
GLSLD(ff_source_common_comp);
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, ivec4 fmt_lut; );
GLSLC(1, int offset; );
GLSLC(1, uint8_t bits; );
GLSLC(1, uint8_t planar_rgb; );
diff --git a/libavcodec/vulkan/ffv1_enc_rct.comp b/libavcodec/vulkan/ffv1_enc_rct.comp
index a615381c90..b611f4be98 100644
--- a/libavcodec/vulkan/ffv1_enc_rct.comp
+++ b/libavcodec/vulkan/ffv1_enc_rct.comp
@@ -22,17 +22,14 @@
ivec4 load_components(ivec2 pos)
{
- if (planar_rgb == 0)
- return ivec4(imageLoad(src[0], pos));
+ ivec4 pix = ivec4(imageLoad(src[0], pos));
+ if (planar_rgb != 0) {
+ for (int i = 1; i < (3 + transparency); i++)
+ pix[i] = int(imageLoad(src[i], pos)[0]);
+ }
- ivec4 pix;
- for (int i = 0; i < (3 + transparency); i++)
- pix[i] = int(imageLoad(src[i], pos)[0]);
-
- /* Swizzle out the difference */
- if (bits > 8 && bits < 16 && transparency == 0)
- return pix.bgra;
- return pix.brga;
+ return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+ pix[fmt_lut[2]], pix[fmt_lut[3]]);
}
void bypass_sample(ivec2 pos)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7b0f77b076..24af8d5753 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1478,6 +1478,44 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
return 0;
}
+void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4])
+{
+ switch (pix_fmt) {
+ case AV_PIX_FMT_BGRA:
+ case AV_PIX_FMT_BGR0:
+ case AV_PIX_FMT_BGR565:
+ case AV_PIX_FMT_X2BGR10:
+ lut[0] = 2;
+ lut[1] = 1;
+ lut[2] = 0;
+ lut[3] = 3;
+ return;
+ case AV_PIX_FMT_GBRAP:
+ case AV_PIX_FMT_GBRP:
+ case AV_PIX_FMT_GBRAP10:
+ case AV_PIX_FMT_GBRAP12:
+ case AV_PIX_FMT_GBRAP14:
+ case AV_PIX_FMT_GBRAP16:
+ case AV_PIX_FMT_GBRP10:
+ case AV_PIX_FMT_GBRP12:
+ case AV_PIX_FMT_GBRP14:
+ case AV_PIX_FMT_GBRP16:
+ case AV_PIX_FMT_GBRPF32:
+ case AV_PIX_FMT_GBRAPF32:
+ lut[0] = 1;
+ lut[1] = 0;
+ lut[2] = 2;
+ lut[3] = 3;
+ return;
+ default:
+ lut[0] = 0;
+ lut[1] = 1;
+ lut[2] = 2;
+ lut[3] = 3;
+ return;
+ }
+}
+
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt,
enum FFVkShaderRepFormat rep_fmt)
{
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 89fc4eedc5..2a2a5916a5 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -371,6 +371,12 @@ const char *ff_vk_ret2str(VkResult res);
*/
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
+/**
+ * Since storage images may not be swizzled, we have to do this in the
+ * shader itself. This fills in a lookup table to do it.
+ */
+void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4]);
+
/**
* Get the aspect flag for a plane from an image.
*/
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
` (7 preceding siblings ...)
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
9 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This commit adds support for hardware accelerated decoding to
the decoder.
The previous commits already refactored the decoder, this commit
simply adds calls to hooks to decode.
---
libavcodec/ffv1.h | 2 ++
libavcodec/ffv1dec.c | 81 +++++++++++++++++++++++++++++++++++++++-----
2 files changed, 75 insertions(+), 8 deletions(-)
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index c23d64d54a..8c0e71284d 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -125,8 +125,10 @@ typedef struct FFV1Context {
int64_t picture_number;
int key_frame;
ProgressFrame picture, last_picture;
+ void *hwaccel_picture_private, *hwaccel_last_picture_private;
uint32_t crcref;
enum AVPixelFormat pix_fmt;
+ enum AVPixelFormat configured_pix_fmt;
const AVFrame *cur_enc_frame;
int plane_count;
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 5e9a765e38..eaa21eebdf 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -41,6 +41,9 @@
#include "libavutil/refstruct.h"
#include "thread.h"
#include "decode.h"
+#include "hwconfig.h"
+#include "hwaccel_internal.h"
+#include "config_components.h"
static inline int get_vlc_symbol(GetBitContext *gb, VlcState *const state,
int bits)
@@ -365,9 +368,12 @@ static int read_header(FFV1Context *f, RangeCoder *c)
if (ret < 0)
return ret;
- f->avctx->pix_fmt = get_pixel_format(f);
- if (f->avctx->pix_fmt < 0)
- return AVERROR(EINVAL);
+ if (f->configured_pix_fmt != f->pix_fmt) {
+ f->avctx->pix_fmt = get_pixel_format(f);
+ if (f->avctx->pix_fmt < 0)
+ return AVERROR(EINVAL);
+ f->configured_pix_fmt = f->pix_fmt;
+ }
ff_dlog(f->avctx, "%d %d %d\n",
f->chroma_h_shift, f->chroma_v_shift, f->pix_fmt);
@@ -460,6 +466,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
FFV1Context *f = avctx->priv_data;
int ret;
+ f->pix_fmt = AV_PIX_FMT_NONE;
+ f->configured_pix_fmt = AV_PIX_FMT_NONE;
+
if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
return ret;
@@ -644,13 +653,16 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
FFV1Context *f = avctx->priv_data;
int ret;
AVFrame *p;
+ const FFHWAccel *hwaccel = NULL;
/* This is copied onto the first slice's range coder context */
RangeCoder c;
ff_progress_frame_unref(&f->last_picture);
- FFSWAP(ProgressFrame, f->picture, f->last_picture);
+ av_refstruct_unref(&f->hwaccel_last_picture_private);
+ FFSWAP(ProgressFrame, f->picture, f->last_picture);
+ FFSWAP(void *, f->hwaccel_picture_private, f->hwaccel_last_picture_private);
f->avctx = avctx;
f->frame_damaged = 0;
@@ -659,11 +671,18 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
if (ret < 0)
return ret;
+ if (avctx->hwaccel)
+ hwaccel = ffhwaccel(avctx->hwaccel);
+
ret = ff_progress_frame_get_buffer(avctx, &f->picture,
AV_GET_BUFFER_FLAG_REF);
if (ret < 0)
return ret;
+ ret = ff_hwaccel_frame_priv_alloc(avctx, &f->hwaccel_picture_private);
+ if (ret < 0)
+ return ret;
+
p = f->picture.f;
p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P
@@ -680,15 +699,53 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n",
f->version, !!(p->flags & AV_FRAME_FLAG_KEY), f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample);
+ /* Start */
+ if (hwaccel) {
+ ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
+ if (ret < 0)
+ return ret;
+ }
+
ff_thread_finish_setup(avctx);
- ret = decode_slices(avctx, c, avpkt);
- if (ret < 0)
- return ret;
+ /* Decode slices */
+ if (hwaccel) {
+ uint8_t *buf_end = avpkt->data + avpkt->size;
+
+ if (!(p->flags & AV_FRAME_FLAG_KEY) && f->last_picture.f)
+ ff_progress_frame_await(&f->last_picture, f->slice_count - 1);
+
+ for (int i = f->slice_count - 1; i >= 0; i--) {
+ uint8_t *pos;
+ uint32_t len;
+ ret = find_next_slice(avctx, avpkt->data, buf_end, i,
+ &pos, &len);
+ if (ret < 0)
+ return ret;
+
+ buf_end -= len;
+
+ ret = hwaccel->decode_slice(avctx, pos, len);
+ if (ret < 0)
+ return ret;
+ }
+ } else {
+ ret = decode_slices(avctx, c, avpkt);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Finalize */
+ if (hwaccel) {
+ ret = hwaccel->end_frame(avctx);
+ if (ret < 0)
+ return ret;
+ }
ff_progress_frame_report(&f->picture, INT_MAX);
ff_progress_frame_unref(&f->last_picture);
+ av_refstruct_unref(&f->hwaccel_last_picture_private);
if ((ret = av_frame_ref(rframe, f->picture.f)) < 0)
return ret;
@@ -717,6 +774,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
fdst->ac = fsrc->ac;
fdst->colorspace = fsrc->colorspace;
fdst->pix_fmt = fsrc->pix_fmt;
+ fdst->configured_pix_fmt = fsrc->configured_pix_fmt;
fdst->ec = fsrc->ec;
fdst->intra = fsrc->intra;
@@ -752,6 +810,8 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
av_assert1(fdst->max_slice_count == fsrc->max_slice_count);
ff_progress_frame_replace(&fdst->picture, &fsrc->picture);
+ av_refstruct_replace(&fdst->hwaccel_picture_private,
+ fsrc->hwaccel_picture_private);
return 0;
}
@@ -762,8 +822,10 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
FFV1Context *const s = avctx->priv_data;
ff_progress_frame_unref(&s->picture);
+ av_refstruct_unref(&s->hwaccel_picture_private);
+
ff_progress_frame_unref(&s->last_picture);
- av_freep(&avctx->stats_out);
+ av_refstruct_unref(&s->hwaccel_last_picture_private);
ff_ffv1_close(s);
@@ -784,4 +846,7 @@ const FFCodec ff_ffv1_decoder = {
AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP |
FF_CODEC_CAP_USES_PROGRESSFRAMES,
+ .hw_configs = (const AVCodecHWConfigInternal *const []) {
+ NULL
+ },
};
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
` (8 preceding siblings ...)
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
` (2 more replies)
9 siblings, 3 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
---
libavcodec/ffv1.h | 3 +++
libavcodec/ffv1dec.c | 19 +++++++++++++++++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 8c0e71284d..860a5c14b1 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -174,6 +174,9 @@ typedef struct FFV1Context {
* NOT shared between frame threads.
*/
uint8_t frame_damaged;
+
+ /* Reference to the current packet */
+ AVPacket *pkt_ref;
} FFV1Context;
int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index eaa21eebdf..6396f22f79 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
f->pix_fmt = AV_PIX_FMT_NONE;
f->configured_pix_fmt = AV_PIX_FMT_NONE;
+ f->pkt_ref = av_packet_alloc();
+ if (!f->pkt_ref)
+ return AVERROR(ENOMEM);
+
if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
return ret;
@@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
/* Start */
if (hwaccel) {
+ ret = av_packet_ref(f->pkt_ref, avpkt);
+ if (ret < 0)
+ return ret;
+
ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
if (ret < 0)
return ret;
@@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
uint32_t len;
ret = find_next_slice(avctx, avpkt->data, buf_end, i,
&pos, &len);
- if (ret < 0)
+ if (ret < 0) {
+ av_packet_unref(f->pkt_ref);
return ret;
+ }
buf_end -= len;
ret = hwaccel->decode_slice(avctx, pos, len);
- if (ret < 0)
+ if (ret < 0) {
+ av_packet_unref(f->pkt_ref);
return ret;
+ }
}
+
+ av_packet_unref(f->pkt_ref);
} else {
ret = decode_slices(avctx, c, avpkt);
if (ret < 0)
@@ -827,6 +841,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
ff_progress_frame_unref(&s->last_picture);
av_refstruct_unref(&s->hwaccel_last_picture_private);
+ av_packet_free(&s->pkt_ref);
ff_ffv1_close(s);
return 0;
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem()
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder Lynne
2025-03-10 3:14 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt
2 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This adds a function which adds a regular timeline semaphore
as a wait-only dependency.
---
libavutil/vulkan.c | 28 ++++++++++++++++++++--------
libavutil/vulkan.h | 3 +++
2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 24af8d5753..085c8b6d4d 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -620,6 +620,23 @@ static void destroy_tmp_semaphores(void *opaque, uint8_t *data)
av_free(ts);
}
+int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e,
+ VkSemaphore sem, uint64_t val,
+ VkPipelineStageFlagBits2 stage)
+{
+ VkSemaphoreSubmitInfo *sem_wait;
+ ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
+
+ e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
+ .semaphore = sem,
+ .value = val,
+ .stageMask = stage,
+ };
+
+ return 0;
+}
+
int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
VkSemaphore *sem, int nb,
VkPipelineStageFlagBits2 stage,
@@ -672,14 +689,9 @@ int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
}
for (int i = 0; i < nb; i++) {
- VkSemaphoreSubmitInfo *sem_wait;
- ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt);
-
- e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
- .semaphore = sem[i],
- .stageMask = stage,
- };
+ err = ff_vk_exec_add_dep_wait_sem(s, e, sem[i], 0, stage);
+ if (err < 0)
+ return err;
}
return 0;
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 2a2a5916a5..de84d6e10a 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -456,6 +456,9 @@ void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e);
*/
int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps, int ref);
+int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e,
+ VkSemaphore sem, uint64_t val,
+ VkPipelineStageFlagBits2 stage);
int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e,
VkSemaphore *sem, int nb,
VkPipelineStageFlagBits2 stage,
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
@ 2025-03-10 3:08 ` Lynne
2025-03-10 3:14 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt
2 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 3:08 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
This patch adds a fully-featured level 3 and 4 decoder for FFv1,
supporting Golomb and all Range coding variants, all pixel formats,
and all features, except for the newly added floating-point formats.
On a 6000 Ada, for 3840x2160 bgr0 content at 50Mbps (standard desktop
recording), it is able to do 400fps.
An Alder Lake with 24 threads can barely do 100fps.
---
configure | 2 +
libavcodec/Makefile | 1 +
libavcodec/ffv1dec.c | 6 +
libavcodec/hwaccels.h | 1 +
libavcodec/vulkan/Makefile | 6 +
libavcodec/vulkan/common.comp | 95 ++
libavcodec/vulkan/ffv1_common.comp | 5 +
libavcodec/vulkan/ffv1_dec.comp | 303 ++++++
libavcodec/vulkan/ffv1_dec_rct.comp | 72 ++
libavcodec/vulkan/ffv1_dec_setup.comp | 138 +++
libavcodec/vulkan/ffv1_rct.comp | 90 ++
libavcodec/vulkan/ffv1_vlc.comp | 37 +
libavcodec/vulkan/rangecoder.comp | 74 ++
libavcodec/vulkan_decode.c | 17 +
libavcodec/vulkan_ffv1.c | 1292 +++++++++++++++++++++++++
15 files changed, 2139 insertions(+)
create mode 100644 libavcodec/vulkan/ffv1_dec.comp
create mode 100644 libavcodec/vulkan/ffv1_dec_rct.comp
create mode 100644 libavcodec/vulkan/ffv1_dec_setup.comp
create mode 100644 libavcodec/vulkan/ffv1_rct.comp
create mode 100644 libavcodec/vulkan_ffv1.c
diff --git a/configure b/configure
index 04b83a8868..fbee82f920 100755
--- a/configure
+++ b/configure
@@ -3195,6 +3195,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox"
av1_videotoolbox_hwaccel_select="av1_decoder"
av1_vulkan_hwaccel_deps="vulkan"
av1_vulkan_hwaccel_select="av1_decoder"
+ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler"
+ffv1_vulkan_hwaccel_select="ffv1_decoder"
h263_vaapi_hwaccel_deps="vaapi"
h263_vaapi_hwaccel_select="h263_decoder"
h263_videotoolbox_hwaccel_deps="videotoolbox"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 74de7737f9..eb91cbb5ce 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1017,6 +1017,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o
OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o
OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL) += videotoolbox_av1.o
OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o
+OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o
OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o
OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 6396f22f79..7b0740ad37 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -349,6 +349,9 @@ static int decode_slice(AVCodecContext *c, void *arg)
static enum AVPixelFormat get_pixel_format(FFV1Context *f)
{
enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_FFV1_VULKAN_HWACCEL
+ AV_PIX_FMT_VULKAN,
+#endif
f->pix_fmt,
AV_PIX_FMT_NONE,
};
@@ -862,6 +865,9 @@ const FFCodec ff_ffv1_decoder = {
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP |
FF_CODEC_CAP_USES_PROGRESSFRAMES,
.hw_configs = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_FFV1_VULKAN_HWACCEL
+ HWACCEL_VULKAN(ffv1),
+#endif
NULL
},
};
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 910a024032..0b2c725247 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel;
extern const struct FFHWAccel ff_av1_vdpau_hwaccel;
extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_av1_vulkan_hwaccel;
+extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel;
extern const struct FFHWAccel ff_h263_vaapi_hwaccel;
extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel;
extern const struct FFHWAccel ff_h264_d3d11va_hwaccel;
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 351332ee44..e6bad486bd 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -11,6 +11,12 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \
vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o
+OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
+ vulkan/rangecoder.o vulkan/ffv1_vlc.o \
+ vulkan/ffv1_common.o vulkan/ffv1_reset.o \
+ vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o \
+ vulkan/ffv1_dec_rct.o
+
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index e4e983b3e2..b0adf8590e 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -26,6 +26,10 @@ layout(buffer_reference, buffer_reference_align = 1) buffer u8vec2buf {
u8vec2 v;
};
+layout(buffer_reference, buffer_reference_align = 1) buffer u8vec4buf {
+ u8vec4 v;
+};
+
layout(buffer_reference, buffer_reference_align = 2) buffer u16buf {
uint16_t v;
};
@@ -182,3 +186,94 @@ uint32_t put_bytes_count(in PutBitContext pb)
uint64_t num_bytes = (pb.buf - pb.buf_start) + ((BUF_BITS - pb.bit_left) >> 3);
return uint32_t(num_bytes);
}
+
+struct GetBitContext {
+ uint64_t buf_start;
+ uint64_t buf;
+ uint64_t buf_end;
+
+ uint64_t bits;
+ uint bits_valid;
+ uint size_in_bits;
+};
+
+#define LOAD64() \
+ { \
+ u8vec4buf ptr = u8vec4buf(gb.buf); \
+ uint32_t rf1 = pack32((ptr[0].v).wzyx); \
+ uint32_t rf2 = pack32((ptr[1].v).wzyx); \
+ gb.buf += 8; \
+ gb.bits = uint64_t(rf1) << 32 | uint64_t(rf2); \
+ gb.bits_valid = 64; \
+ }
+
+#define RELOAD32() \
+ { \
+ u8vec4buf ptr = u8vec4buf(gb.buf); \
+ uint32_t rf = pack32((ptr[0].v).wzyx); \
+ gb.buf += 4; \
+ gb.bits = uint64_t(rf) << (32 - gb.bits_valid) | gb.bits; \
+ gb.bits_valid += 32; \
+ }
+
+void init_get_bits(inout GetBitContext gb, u8buf data, uint64_t len)
+{
+ gb.buf = gb.buf_start = uint64_t(data);
+ gb.buf_end = uint64_t(data) + len;
+ gb.size_in_bits = uint(len) * 8;
+
+ /* Preload */
+ LOAD64()
+}
+
+bool get_bit(inout GetBitContext gb)
+{
+ if (gb.bits_valid == 0)
+ LOAD64()
+
+ bool val = bool(gb.bits >> (64 - 1));
+ gb.bits <<= 1;
+ gb.bits_valid--;
+ return val;
+}
+
+uint get_bits(inout GetBitContext gb, uint n)
+{
+ if (n == 0)
+ return 0;
+
+ if (n > gb.bits_valid)
+ RELOAD32()
+
+ uint val = uint(gb.bits >> (64 - n));
+ gb.bits <<= n;
+ gb.bits_valid -= n;
+ return val;
+}
+
+uint show_bits(inout GetBitContext gb, uint n)
+{
+ if (n > gb.bits_valid)
+ RELOAD32()
+
+ return uint(gb.bits >> (64 - n));
+}
+
+void skip_bits(inout GetBitContext gb, uint n)
+{
+ if (n > gb.bits_valid)
+ RELOAD32()
+
+ gb.bits <<= n;
+ gb.bits_valid -= n;
+}
+
+uint tell_bits(in GetBitContext gb)
+{
+ return uint(gb.buf - gb.buf_start) * 8 - gb.bits_valid;
+}
+
+uint left_bits(in GetBitContext gb)
+{
+ return gb.size_in_bits - uint(gb.buf - gb.buf_start) * 8 + gb.bits_valid;
+}
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
index 604d03b2de..d2bd7e736e 100644
--- a/libavcodec/vulkan/ffv1_common.comp
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -22,7 +22,12 @@
struct SliceContext {
RangeCoder c;
+
+#if !defined(DECODE)
PutBitContext pb; /* 8*8 bytes */
+#else
+ GetBitContext gb;
+#endif
ivec2 slice_dim;
ivec2 slice_pos;
diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
new file mode 100644
index 0000000000..a9feb9d318
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -0,0 +1,303 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec2 get_pred(ivec2 pos, ivec2 off, int p, int comp, int sw,
+ uint8_t context_model)
+{
+ const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+ const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+
+ TYPE top2 = TYPE(0);
+ if (off.y > 1)
+ top2 = TYPE(imageLoad(dst[p], pos + ivec2(0, -2))[comp]);
+
+ VTYPE3 top = VTYPE3(TYPE(0),
+ TYPE(0),
+ TYPE(0));
+ if (off.y > 0 && off != ivec2(0, 1))
+ top[0] = TYPE(imageLoad(dst[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
+ if (off.y > 0) {
+ top[1] = TYPE(imageLoad(dst[p], pos + ivec2(0, -1))[comp]);
+ top[2] = TYPE(imageLoad(dst[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+ }
+
+ VTYPE2 cur = VTYPE2(TYPE(0),
+ TYPE(0));
+ if (off.x > 0 && off != ivec2(1, 0))
+ cur[0] = TYPE(imageLoad(dst[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
+ if (off != ivec2(0, 0))
+ cur[1] = TYPE(imageLoad(dst[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
+
+ /* context, prediction */
+ return ivec2(get_context(cur, top, top2, context_model),
+ predict(cur[1], VTYPE2(top)));
+}
+
+void store_comp(ivec2 pos, int p, int comp, uint v)
+{
+#ifdef RGB
+ uvec4 pix = imageLoad(dst[p], pos);
+ pix[comp] = v;
+ imageStore(dst[p], pos, pix);
+#else
+ imageStore(dst[p], pos, uvec4(v));
+#endif
+}
+
+#ifndef GOLOMB
+int get_isymbol(inout RangeCoder c, uint64_t state)
+{
+ if (get_rac(c, state))
+ return 0;
+
+ state += 1;
+
+ int e = 0;
+ while (get_rac(c, state + min(e, 9))) { // 1..10
+ e++;
+ if (e > 31) {
+ corrupt = true;
+ return 0;
+ }
+ }
+
+ state += 21;
+
+ int a = 1;
+ for (int i = e - 1; i >= 0; i--)
+ a += a + int(get_rac(c, state + min(i, 9))); // 22..31
+
+ e = -int(get_rac(c, state - 11 + min(e, 10))); // 11..21 sign
+ return (a ^ e) - e;
+}
+
+void decode_line_pcm(inout SliceContext sc, int y, int p, int comp,
+ int bits)
+{
+ ivec2 sp = sc.slice_pos;
+ int w = sc.slice_dim.x;
+
+#ifndef RGB
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+#endif
+
+ for (int x = 0; x < w; x++) {
+ uint v = 0;
+ for (int i = (bits - 1); i >= 0; i--)
+ v |= uint(get_rac_equi(sc.c)) << i;
+
+ store_comp(sp + ivec2(x, y), p, comp, v);
+ }
+}
+
+void decode_line(inout SliceContext sc, uint64_t state,
+ int y, int p, int comp, int bits, const int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+ int w = sc.slice_dim.x;
+
+#ifndef RGB
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+#endif
+
+ for (int x = 0; x < w; x++) {
+ ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w,
+ sc.quant_table_idx[p]);
+
+ int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*abs(pr[0]));
+ if (pr[0] < 0)
+ diff = -diff;
+
+ uint v = zero_extend(pr[1] + diff, bits);
+ store_comp(sp + ivec2(x, y), p, comp, v);
+ }
+}
+
+#else /* GOLOMB */
+
+void decode_line(inout SliceContext sc, uint64_t state,
+ int y, int p, int comp, int bits, inout int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+ int w = sc.slice_dim.x;
+
+#ifndef RGB
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+#endif
+
+ int run_count = 0;
+ int run_mode = 0;
+
+ for (int x = 0; x < w; x++) {
+ ivec2 pos = sp + ivec2(x, y);
+ int diff;
+ ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w,
+ sc.quant_table_idx[p]);
+
+ VlcState sb = VlcState(state + VLC_STATE_SIZE*abs(pr[0]));
+
+ if (pr[0] == 0 && run_mode == 0)
+ run_mode = 1;
+
+ if (run_mode != 0) {
+ if (run_count == 0 && run_mode == 1) {
+ int tmp_idx = int(log2_run[run_index]);
+ if (get_bit(sc.gb)) {
+ run_count = 1 << tmp_idx;
+ if (x + run_count <= w)
+ run_index++;
+ } else {
+ if (tmp_idx != 0) {
+ run_count = int(get_bits(sc.gb, tmp_idx));
+ } else
+ run_count = 0;
+
+ if (run_index != 0)
+ run_index--;
+ run_mode = 2;
+ }
+ }
+
+ run_count--;
+ if (run_count < 0) {
+ run_mode = 0;
+ run_count = 0;
+ diff = read_vlc_symbol(sc.gb, sb, bits);
+ if (diff >= 0)
+ diff++;
+ } else {
+ diff = 0;
+ }
+ } else {
+ diff = read_vlc_symbol(sc.gb, sb, bits);
+ }
+
+ if (pr[0] < 0)
+ diff = -diff;
+
+ uint v = zero_extend(pr[1] + diff, bits);
+ store_comp(sp + ivec2(x, y), p, comp, v);
+ }
+}
+#endif
+
+void decode_slice(inout SliceContext sc, const uint slice_idx)
+{
+ int run_index = 0;
+
+#ifndef RGB
+ int bits = bits_per_raw_sample;
+#else
+ int bits = 9;
+ if (bits != 8 || sc.slice_coding_mode != 0)
+ bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
+#endif
+
+ /* PCM coding */
+#ifndef GOLOMB
+ if (sc.slice_coding_mode == 1) {
+#ifndef RGB
+ for (int p = 0; p < planes; p++) {
+ int h = sc.slice_dim.y;
+ if (p > 0 && p < 3)
+ h >>= chroma_shift.y;
+
+ for (int y = 0; y < h; y++)
+ decode_line_pcm(sc, y, p, 0, bits);
+ }
+#else
+ if (transparency == 1) {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ decode_line_pcm(sc, y, 0, 1, bits);
+ decode_line_pcm(sc, y, 0, 2, bits);
+ decode_line_pcm(sc, y, 0, 0, bits);
+ decode_line_pcm(sc, y, 0, 3, bits);
+ }
+ } else {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ decode_line_pcm(sc, y, 0, 1, bits);
+ decode_line_pcm(sc, y, 0, 2, bits);
+ decode_line_pcm(sc, y, 0, 0, bits);
+ }
+ }
+#endif
+ } else
+
+ /* Arithmetic coding */
+#endif
+ {
+ uint64_t slice_state_off = uint64_t(slice_state) +
+ slice_idx*plane_state_size*codec_planes;
+
+#ifndef RGB
+ for (int p = 0; p < planes; p++) {
+ int h = sc.slice_dim.y;
+ if (p > 0 && p < 3)
+ h >>= chroma_shift.y;
+
+ for (int y = 0; y < h; y++)
+ decode_line(sc, slice_state_off, y, p, 0, bits, run_index);
+
+ /* For the second chroma plane, reuse the first plane's state */
+ if (p != 1)
+ slice_state_off += plane_state_size;
+ }
+#else
+ if (transparency == 1) {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ decode_line(sc, slice_state_off + plane_state_size*0,
+ y, 0, 1, bits, run_index);
+ decode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 2, bits, run_index);
+ decode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 0, bits, run_index);
+ decode_line(sc, slice_state_off + plane_state_size*2,
+ y, 0, 3, bits, run_index);
+ }
+ } else {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ decode_line(sc, slice_state_off + plane_state_size*0,
+ y, 0, 1, bits, run_index);
+ decode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 2, bits, run_index);
+ decode_line(sc, slice_state_off + plane_state_size*1,
+ y, 0, 0, bits, run_index);
+ }
+ }
+#endif
+ }
+}
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ decode_slice(slice_ctx[slice_idx], slice_idx);
+}
diff --git a/libavcodec/vulkan/ffv1_dec_rct.comp b/libavcodec/vulkan/ffv1_dec_rct.comp
new file mode 100644
index 0000000000..0305dc3295
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_rct.comp
@@ -0,0 +1,72 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2025 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void bypass_block(in SliceContext sc)
+{
+ ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+ ivec2 end = sc.slice_pos + sc.slice_dim;
+
+ for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+ for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+ imageStore(dst[0], ivec2(x, y), ivec4(imageLoad(src[0], ivec2(x, y))));
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+ ivec4 pix = ivec4(imageLoad(src[0], pos));
+
+ pix.b -= offset;
+ pix.r -= offset;
+ pix.g -= (pix.b*rct_coef.y + pix.r*rct_coef.x) >> 2;
+ pix.b += pix.g;
+ pix.r += pix.g;
+
+ pix = ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+ pix[fmt_lut[2]], pix[fmt_lut[3]]);
+
+ imageStore(dst[0], pos, pix);
+ if (planar_rgb != 0) {
+ for (int i = 1; i < (3 + transparency); i++)
+ imageStore(dst[i], pos, ivec4(pix[i]));
+ }
+}
+
+void transform_block(in SliceContext sc)
+{
+ const ivec2 rct_coef = sc.slice_rct_coef;
+ const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+ const ivec2 end = sc.slice_pos + sc.slice_dim;
+
+ for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+ for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+ transform_sample(ivec2(x, y), rct_coef);
+}
+
+void main()
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+ if (slice_ctx[slice_idx].slice_coding_mode == 1)
+ bypass_block(slice_ctx[slice_idx]);
+ else
+ transform_block(slice_ctx[slice_idx]);
+}
diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp
new file mode 100644
index 0000000000..a10163a8d6
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_dec_setup.comp
@@ -0,0 +1,138 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+uint get_usymbol(inout RangeCoder c, uint64_t state)
+{
+ if (get_rac(c, state + 0))
+ return 0;
+
+ int e = 0;
+ while (get_rac(c, state + 1 + min(e, 9))) { // 1..10
+ e++;
+ if (e > 31) {
+ corrupt = true;
+ return 0;
+ }
+ }
+
+ uint a = 1;
+ for (int i = e - 1; i >= 0; i--)
+ a += a + uint(get_rac(c, state + 22 + min(i, 9))); // 22..31
+
+ return a;
+}
+
+bool decode_slice_header(inout SliceContext sc, uint64_t state)
+{
+ u8buf sb = u8buf(state);
+
+ [[unroll]]
+ for (int i = 0; i < CONTEXT_SIZE; i++)
+ sb[i].v = uint8_t(128);
+
+ uint sx = get_usymbol(sc.c, state);
+ uint sy = get_usymbol(sc.c, state);
+ uint sw = get_usymbol(sc.c, state) + 1;
+ uint sh = get_usymbol(sc.c, state) + 1;
+
+ if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 ||
+ sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) ||
+ corrupt) {
+ return true;
+ }
+
+ /* Set coordinates */
+ uint sxs = slice_coord(img_size.x, sx , gl_NumWorkGroups.x, chroma_shift.x);
+ uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x);
+ uint sys = slice_coord(img_size.y, sy , gl_NumWorkGroups.y, chroma_shift.y);
+ uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y);
+
+ sc.slice_pos = ivec2(sxs, sys);
+ sc.slice_dim = ivec2(sxe - sxs, sye - sys);
+ sc.slice_rct_coef = ivec2(1, 1);
+ sc.slice_coding_mode = int(0);
+
+ for (uint i = 0; i < codec_planes; i++) {
+ uint idx = get_usymbol(sc.c, state);
+ if (idx >= quant_table_count)
+ return true;
+ sc.quant_table_idx[i] = uint8_t(idx);
+ sc.context_count = context_count[idx];
+ }
+
+ get_usymbol(sc.c, state);
+ get_usymbol(sc.c, state);
+ get_usymbol(sc.c, state);
+
+ if (version >= 4) {
+ sc.slice_reset_contexts = get_rac(sc.c, state);
+ sc.slice_coding_mode = get_usymbol(sc.c, state);
+ if (sc.slice_coding_mode != 1 && colorspace == 1) {
+ sc.slice_rct_coef.x = int(get_usymbol(sc.c, state));
+ sc.slice_rct_coef.y = int(get_usymbol(sc.c, state));
+ if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void golomb_init(inout SliceContext sc, uint64_t state)
+{
+ if (version == 3 && micro_version > 1 || version > 3) {
+ u8buf(state).v = uint8_t(129);
+ get_rac(sc.c, state);
+ }
+
+ uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
+ init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
+ sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count);
+}
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
+
+ u8buf bs = u8buf(slice_data + slice_offsets[2*slice_idx + 0]);
+ uint32_t slice_size = slice_offsets[2*slice_idx + 1];
+
+ rac_init_dec(slice_ctx[slice_idx].c,
+ bs, slice_size);
+
+ if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1))
+ get_rac_equi(slice_ctx[slice_idx].c);
+
+ decode_slice_header(slice_ctx[slice_idx], scratch_state);
+
+ if (golomb == 1)
+ golomb_init(slice_ctx[slice_idx], scratch_state);
+
+ if (ec != 0 && check_crc != 0) {
+ uint32_t crc = crcref;
+ for (int i = 0; i < slice_size; i++)
+ crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
+
+ slice_crc_mismatch[slice_idx] = crc;
+ }
+}
diff --git a/libavcodec/vulkan/ffv1_rct.comp b/libavcodec/vulkan/ffv1_rct.comp
new file mode 100644
index 0000000000..b10bb47132
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_rct.comp
@@ -0,0 +1,90 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec4 load_components(ivec2 pos)
+{
+ ivec4 pix = ivec4(imageLoad(src[0], pos));
+ if (planar_rgb != 0) {
+ for (int i = 1; i < (3 + transparency); i++)
+ pix[i] = int(imageLoad(src[i], pos)[0]);
+ }
+
+ return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
+ pix[fmt_lut[2]], pix[fmt_lut[3]]);
+}
+
+void bypass_sample(ivec2 pos)
+{
+ imageStore(dst[0], pos, load_components(pos));
+}
+
+void bypass_block(in SliceContext sc)
+{
+ ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+ ivec2 end = sc.slice_pos + sc.slice_dim;
+ for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+ for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+ bypass_sample(ivec2(x, y));
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+ ivec4 pix = load_components(pos);
+ pix.b -= offset;
+ pix.r -= offset;
+ pix.g -= (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+ pix.b += pix.g;
+ pix.r += pix.g;
+ imageStore(dst[0], pos, pix);
+}
+
+void transform_sample(ivec2 pos, ivec2 rct_coef)
+{
+ ivec4 pix = load_components(pos);
+ pix.b -= pix.g;
+ pix.r -= pix.g;
+ pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
+ pix.b += offset;
+ pix.r += offset;
+ imageStore(dst[0], pos, pix);
+}
+
+void transform_block(in SliceContext sc)
+{
+ const ivec2 rct_coef = sc.slice_rct_coef;
+ const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
+ const ivec2 end = sc.slice_pos + sc.slice_dim;
+
+ for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
+ for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
+ transform_sample(ivec2(x, y), rct_coef);
+}
+
+void main()
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+
+ if (slice_ctx[slice_idx].slice_coding_mode == 1)
+ bypass_block(slice_ctx[slice_idx]);
+ else
+ transform_block(slice_ctx[slice_idx]);
+}
diff --git a/libavcodec/vulkan/ffv1_vlc.comp b/libavcodec/vulkan/ffv1_vlc.comp
index 0a53e035b5..d374e5a069 100644
--- a/libavcodec/vulkan/ffv1_vlc.comp
+++ b/libavcodec/vulkan/ffv1_vlc.comp
@@ -120,3 +120,40 @@ Symbol get_vlc_symbol(inout VlcState state, int v, int bits)
return set_sr_golomb(code, k, 12, bits);
}
+
+uint get_ur_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+{
+ for (uint i = 0; i < 12; i++)
+ if (get_bit(gb))
+ return get_bits(gb, k) + (i << k);
+
+ return get_bits(gb, esc_len) + 11;
+}
+
+int get_sr_golomb(inout GetBitContext gb, uint k, int limit, int esc_len)
+{
+ int v = int(get_ur_golomb(gb, k, limit, esc_len));
+ return (v >> 1) ^ -(v & 1);
+}
+
+int read_vlc_symbol(inout GetBitContext gb, inout VlcState state, int bits)
+{
+ int k, i, v, ret;
+
+ i = state.count;
+ k = 0;
+ while (i < state.error_sum) { // FIXME: optimize
+ k++;
+ i += i;
+ }
+
+ v = get_sr_golomb(gb, k, 12, bits);
+
+ v ^= ((2 * state.drift + state.count) >> 31);
+
+ ret = fold(v + state.bias, bits);
+
+ update_vlc_state(state, v);
+
+ return ret;
+}
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index 6e3b9c1238..8c8d0d9d9c 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -191,3 +191,77 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size)
r.outstanding_count = uint16_t(0);
r.outstanding_byte = uint8_t(0xFF);
}
+
+/* Decoder */
+uint overread;
+bool corrupt;
+
+void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size)
+{
+ overread = 0;
+ corrupt = false;
+
+ /* Skip priming bytes */
+ rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2);
+
+ u8vec2 prime = u8vec2buf(data).v;
+ /* Switch endianess of the priming bytes */
+ r.low = pack16(prime.yx);
+
+ if (r.low >= 0xFF00) {
+ r.low = 0xFF00;
+ r.bytestream_end = uint64_t(data) + 2;
+ }
+}
+
+void refill(inout RangeCoder c)
+{
+ c.range <<= 8;
+ c.low <<= 8;
+ if (c.bytestream < c.bytestream_end) {
+ c.low += u8buf(c.bytestream).v;
+ c.bytestream++;
+ } else {
+ overread++;
+ }
+}
+
+bool get_rac(inout RangeCoder c, uint64_t state)
+{
+ u8buf sb = u8buf(state);
+ uint val = uint(sb.v);
+ uint16_t range1 = uint16_t((uint(c.range) * val) >> 8);
+
+ c.range -= range1;
+
+ bool bit = c.low >= c.range;
+ sb.v = zero_one_state[(uint(bit) << 8) + val];
+
+ if (bit) {
+ c.low -= c.range;
+ c.range = range1;
+ }
+
+ if (c.range < 0x100)
+ refill(c);
+
+ return bit;
+}
+
+bool get_rac_equi(inout RangeCoder c)
+{
+ uint16_t range1 = c.range >> 1;
+
+ c.range -= range1;
+
+ bool bit = c.low >= c.range;
+ if (bit) {
+ c.low -= c.range;
+ c.range = range1;
+ }
+
+ if (c.range < 0x100)
+ refill(c);
+
+ return bit;
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index cd77e10e12..bc850a7333 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -36,6 +36,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc;
#if CONFIG_AV1_VULKAN_HWACCEL
extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc;
#endif
+#if CONFIG_FFV1_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc;
+#endif
static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_H264_VULKAN_HWACCEL
@@ -47,6 +50,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
#if CONFIG_AV1_VULKAN_HWACCEL
&ff_vk_dec_av1_desc,
#endif
+#if CONFIG_FFV1_VULKAN_HWACCEL
+ &ff_vk_dec_ffv1_desc,
+#endif
};
static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id)
@@ -1035,6 +1041,17 @@ int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
frames_ctx->free = free_profile_data;
hwfc->create_pnext = &prof->profile_list;
+ } else {
+ switch (frames_ctx->sw_format) {
+ case AV_PIX_FMT_GBRAP16:
+ frames_ctx->sw_format = AV_PIX_FMT_RGBA64;
+ break;
+ case AV_PIX_FMT_BGR0:
+ frames_ctx->sw_format = AV_PIX_FMT_RGB0;
+ break;
+ default:
+ break;
+ }
}
frames_ctx->width = avctx->coded_width;
diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
new file mode 100644
index 0000000000..276514a228
--- /dev/null
+++ b/libavcodec/vulkan_ffv1.c
@@ -0,0 +1,1292 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+
+#include "ffv1.h"
+#include "ffv1_vulkan.h"
+#include "libavutil/vulkan_spirv.h"
+#include "libavutil/mem.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_rangecoder_comp;
+extern const char *ff_source_ffv1_vlc_comp;
+extern const char *ff_source_ffv1_common_comp;
+extern const char *ff_source_ffv1_dec_setup_comp;
+extern const char *ff_source_ffv1_reset_comp;
+extern const char *ff_source_ffv1_dec_comp;
+extern const char *ff_source_ffv1_dec_rct_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
+ .codec_id = AV_CODEC_ID_FFV1,
+ .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
+ .queue_flags = VK_QUEUE_COMPUTE_BIT,
+};
+
+#define HOST_MAP
+
+typedef struct FFv1VulkanDecodePicture {
+ FFVulkanDecodePicture vp;
+
+ AVBufferRef *tmp_data;
+
+ AVBufferRef *slice_state;
+ uint32_t plane_state_size;
+ uint32_t slice_state_size;
+ uint32_t slice_data_size;
+ uint32_t max_context_count;
+
+ AVBufferRef *slice_offset_buf;
+ uint32_t *slice_offset;
+ int slice_num;
+
+ AVBufferRef *slice_status_buf;
+ int crc_checked;
+} FFv1VulkanDecodePicture;
+
+typedef struct FFv1VulkanDecodeContext {
+ AVBufferRef *intermediate_frames_ref[2]; /* 16/32 bit */
+
+ FFVulkanShader setup;
+ FFVulkanShader reset[2]; /* AC/Golomb */
+ FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */
+ FFVulkanShader rct[2]; /* 16/32 bit */
+
+ FFVkBuffer rangecoder_static_buf;
+ FFVkBuffer quant_buf;
+ FFVkBuffer crc_tab_buf;
+
+ AVBufferPool *slice_state_pool;
+ AVBufferPool *tmp_data_pool;
+ AVBufferPool *slice_offset_pool;
+ AVBufferPool *slice_status_pool;
+} FFv1VulkanDecodeContext;
+
+typedef struct FFv1VkParameters {
+ uint32_t context_count[MAX_QUANT_TABLES];
+
+ VkDeviceAddress slice_data;
+ VkDeviceAddress slice_state;
+ VkDeviceAddress scratch_data;
+
+ uint32_t img_size[2];
+ uint32_t chroma_shift[2];
+
+ uint32_t plane_state_size;
+ uint32_t crcref;
+
+ uint8_t bits_per_raw_sample;
+ uint8_t quant_table_count;
+ uint8_t version;
+ uint8_t micro_version;
+ uint8_t key_frame;
+ uint8_t planes;
+ uint8_t codec_planes;
+ uint8_t transparency;
+ uint8_t colorspace;
+ uint8_t ec;
+ uint8_t golomb;
+ uint8_t check_crc;
+} FFv1VkParameters;
+
+static void add_push_data(FFVulkanShader *shd)
+{
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
+ GLSLC(0, );
+ GLSLC(1, u8buf slice_data; );
+ GLSLC(1, u8buf slice_state; );
+ GLSLC(1, u8buf scratch_data; );
+ GLSLC(0, );
+ GLSLC(1, uvec2 img_size; );
+ GLSLC(1, uvec2 chroma_shift; );
+ GLSLC(0, );
+ GLSLC(1, uint plane_state_size; );
+ GLSLC(1, uint32_t crcref; );
+ GLSLC(0, );
+ GLSLC(1, uint8_t bits_per_raw_sample; );
+ GLSLC(1, uint8_t quant_table_count; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t planes; );
+ GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t transparency; );
+ GLSLC(1, uint8_t colorspace; );
+ GLSLC(1, uint8_t ec; );
+ GLSLC(1, uint8_t golomb; );
+ GLSLC(1, uint8_t check_crc; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static int vk_ffv1_start_frame(AVCodecContext *avctx,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int err;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+ FFV1Context *f = avctx->priv_data;
+
+ FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &fp->vp;
+
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+ enum AVPixelFormat sw_format = hwfc->sw_format;
+
+ int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
+ !(sw_format == AV_PIX_FMT_YA8);
+
+ fp->slice_num = 0;
+
+ for (int i = 0; i < f->quant_table_count; i++)
+ fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count);
+
+ /* Allocate slice buffer data */
+ if (f->ac == AC_GOLOMB_RICE)
+ fp->plane_state_size = 8;
+ else
+ fp->plane_state_size = CONTEXT_SIZE;
+
+ fp->plane_state_size *= fp->max_context_count;
+ fp->slice_state_size = fp->plane_state_size*f->plane_count;
+
+ fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */
+ fp->slice_state_size += fp->slice_data_size;
+ fp->slice_state_size = FFALIGN(fp->slice_state_size, 8);
+
+ fp->crc_checked = f->ec && (avctx->err_recognition & AV_EF_CRCCHECK);
+
+ /* Host map the input slices data if supported */
+ if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+ err = ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, f->pkt_ref->data,
+ f->pkt_ref->buf,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+ if (err < 0)
+ return err;
+ }
+
+ /* Allocate slice state data */
+ if (f->picture.f->flags & AV_FRAME_FLAG_KEY) {
+ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool,
+ &fp->slice_state,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, f->max_slice_count*fp->slice_state_size,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (err < 0)
+ return err;
+ } else {
+ FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
+ fp->slice_state = av_buffer_ref(fpl->slice_state);
+ if (!fp->slice_state)
+ return AVERROR(ENOMEM);
+ }
+
+ /* Allocate temporary data buffer */
+ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool,
+ &fp->tmp_data,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, f->max_slice_count*CONTEXT_SIZE,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (err < 0)
+ return err;
+
+ /* Allocate slice offsets buffer */
+ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool,
+ &fp->slice_offset_buf,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, 2*f->max_slice_count*sizeof(uint32_t),
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0)
+ return err;
+
+ /* Allocate slice status buffer */
+ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_status_pool,
+ &fp->slice_status_buf,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, f->max_slice_count*sizeof(uint32_t),
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0)
+ return err;
+
+ /* Prepare frame to be used */
+ err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1,
+ FF_VK_REP_NATIVE, 0);
+ if (err < 0)
+ return err;
+
+ /* Create a temporaty frame for RGB */
+ if (is_rgb) {
+ AVHWFramesContext *dpb_hwfc;
+ dpb_hwfc = (AVHWFramesContext *)fv->intermediate_frames_ref[f->use32bit]->data;
+
+ vp->dpb_frame = av_frame_alloc();
+ if (!vp->dpb_frame)
+ return AVERROR(ENOMEM);
+
+ err = av_hwframe_get_buffer(fv->intermediate_frames_ref[f->use32bit],
+ vp->dpb_frame, 0);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_decode_prepare_frame_sdr(dec, vp->dpb_frame, vp, 1,
+ FF_VK_REP_NATIVE, 0);
+ if (err < 0)
+ return err;
+
+ for (int i = 0; i < av_pix_fmt_count_planes(dpb_hwfc->sw_format); i++) {
+ err = ff_vk_create_imageview(&ctx->s,
+ &vp->view.dst[i], &vp->view.aspect_ref[i],
+ vp->dpb_frame,
+ i, FF_VK_REP_NATIVE);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int vk_ffv1_decode_slice(AVCodecContext *avctx,
+ const uint8_t *data,
+ uint32_t size)
+{
+ FFV1Context *f = avctx->priv_data;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+
+ FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+ FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+
+ if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+ AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 0)*sizeof(uint32_t),
+ data - f->pkt_ref->data);
+ AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 1)*sizeof(uint32_t),
+ size);
+ fp->slice_num++;
+ } else {
+ FFVulkanDecodePicture *vp = &fp->vp;
+ int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+ &fp->slice_num,
+ (const uint32_t **)&fp->slice_offset);
+ if (err < 0)
+ return err;
+
+ AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 0)*sizeof(uint32_t),
+ fp->slice_offset[fp->slice_num - 1]);
+ AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 1)*sizeof(uint32_t),
+ size);
+ }
+
+ return 0;
+}
+
+static int vk_ffv1_end_frame(AVCodecContext *avctx)
+{
+ int err;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = dec->shared_ctx;
+ FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+ FFV1Context *f = avctx->priv_data;
+ FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+ FFv1VkParameters pd;
+ FFv1VkResetParameters pd_reset;
+
+ AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+ enum AVPixelFormat sw_format = hwfc->sw_format;
+
+ int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8;
+ int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
+ !(sw_format == AV_PIX_FMT_YA8);
+
+ FFVulkanShader *reset_shader;
+ FFVulkanShader *decode_shader;
+
+ FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private;
+ FFVulkanDecodePicture *vp = &fp->vp;
+
+ FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+ FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data;
+ FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data;
+ FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
+
+ FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data;
+
+ AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f;
+ VkImageView *decode_dst_view = is_rgb ? vp->view.dst : vp->view.out;
+
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+ VkBufferMemoryBarrier2 buf_bar[8];
+ int nb_buf_bar = 0;
+
+ FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+ ff_vk_exec_start(&ctx->s, exec);
+
+ /* Prepare deps */
+ RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+ err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+ f->picture.f);
+ if (err < 0)
+ return err;
+
+ if (is_rgb)
+ RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+ if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
+ FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private;
+ FFVulkanDecodePicture *vpl = &fpl->vp;
+
+ /* Wait on the previous frame */
+ RET(ff_vk_exec_add_dep_wait_sem(&ctx->s, exec, vpl->sem, vpl->sem_value,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT));
+ }
+
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1));
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_status_buf, 1, 1));
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
+ vp->slices_buf = NULL;
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0));
+ fp->slice_offset_buf = NULL;
+ RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0));
+ fp->tmp_data = NULL;
+
+ /* Entry barrier for the slice state */
+ if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_state->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_state->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_state->buf,
+ .offset = 0,
+ .size = VK_WHOLE_SIZE,
+ };
+ }
+
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ if (nb_buf_bar) {
+ slice_state->stage = buf_bar[1].dstStageMask;
+ slice_state->access = buf_bar[1].dstAccessMask;
+ nb_buf_bar = 0;
+ }
+
+ /* Setup shader */
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+ 1, 0, 0,
+ slice_state,
+ 0, fp->slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+ 1, 1, 0,
+ slice_offset,
+ 0, 2*f->slice_count*sizeof(uint32_t),
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup,
+ 1, 2, 0,
+ slice_status,
+ 0, f->slice_count*sizeof(uint32_t),
+ VK_FORMAT_UNDEFINED);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
+ pd = (FFv1VkParameters) {
+ /* context_count */
+
+ .slice_data = slices_buf->address,
+ .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
+ .scratch_data = tmp_data->address,
+
+ .img_size[0] = f->picture.f->width,
+ .img_size[1] = f->picture.f->height,
+ .chroma_shift[0] = f->chroma_h_shift,
+ .chroma_shift[1] = f->chroma_v_shift,
+
+ .plane_state_size = fp->plane_state_size,
+ .crcref = f->crcref,
+
+ .bits_per_raw_sample = bits,
+ .quant_table_count = f->quant_table_count,
+ .version = f->version,
+ .micro_version = f->micro_version,
+ .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
+ .planes = av_pix_fmt_count_planes(sw_format),
+ .codec_planes = f->plane_count,
+ .transparency = f->transparency,
+ .colorspace = f->colorspace,
+ .ec = f->ec,
+ .golomb = f->ac == AC_GOLOMB_RICE,
+ .check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
+ };
+ for (int i = 0; i < MAX_QUANT_TABLES; i++)
+ pd.context_count[i] = f->context_count[i];
+
+ ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+
+ vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+
+ /* Reset shader */
+ reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE];
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader,
+ 1, 0, 0,
+ slice_state,
+ 0, fp->slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader);
+
+ pd_reset = (FFv1VkResetParameters) {
+ .slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
+ .plane_state_size = fp->plane_state_size,
+ .context_count = fp->max_context_count,
+ .codec_planes = f->plane_count,
+ .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
+ .version = f->version,
+ .micro_version = f->micro_version,
+ };
+ ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd_reset), &pd_reset);
+
+ /* Sync between setup and reset shaders */
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_state->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_state->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_state->buf,
+ .offset = 0,
+ .size = fp->slice_data_size*f->slice_count,
+ };
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ slice_state->stage = buf_bar[0].dstStageMask;
+ slice_state->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+
+ vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices,
+ f->plane_count);
+
+ /* Decode */
+ decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb];
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
+ 1, 0, 0,
+ slice_state,
+ 0, fp->slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
+ decode_dst, decode_dst_view,
+ 1, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
+ ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+
+ /* Sync between reset and decode shaders */
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_state->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_state->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_state->buf,
+ .offset = fp->slice_data_size*f->slice_count,
+ .size = slice_state->size - fp->slice_data_size*f->slice_count,
+ };
+
+ /* Input frame barrier */
+ ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ slice_state->stage = buf_bar[0].dstStageMask;
+ slice_state->access = buf_bar[0].dstAccessMask;
+ nb_img_bar = 0;
+ nb_buf_bar = 0;
+
+ vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+
+ /* RCT */
+ if (is_rgb) {
+ FFVulkanShader *rct_shader = &fv->rct[f->use32bit];
+ FFv1VkRCTParameters pd_rct;
+
+ ff_vk_shader_update_desc_buffer(&ctx->s, exec, rct_shader,
+ 1, 0, 0,
+ slice_state,
+ 0, fp->slice_data_size*f->slice_count,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
+ decode_dst, decode_dst_view,
+ 1, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+ ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
+ f->picture.f, vp->view.out,
+ 1, 2,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_exec_bind_shader(&ctx->s, exec, rct_shader);
+
+ pd_rct = (FFv1VkRCTParameters) {
+ .offset = 1 << bits,
+ .bits = bits,
+ .planar_rgb = ff_vk_mt_is_np_rgb(sw_format) &&
+ (ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1),
+ .transparency = f->transparency,
+ };
+ ff_vk_set_perm(sw_format, pd_rct.fmt_lut);
+
+ ff_vk_shader_update_push_const(&ctx->s, exec, rct_shader,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd_rct), &pd_rct);
+
+ ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+ ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ });
+ nb_img_bar = 0;
+
+ vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
+ }
+
+ err = ff_vk_exec_submit(&ctx->s, exec);
+ if (err < 0)
+ return err;
+
+fail:
+ return 0;
+}
+
+static void define_shared_code(FFVulkanShader *shd, int use32bit)
+{
+ int smp_bits = use32bit ? 32 : 16;
+
+ GLSLC(0, #define DECODE );
+
+ av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
+
+ GLSLF(0, #define TYPE int%i_t ,smp_bits);
+ GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
+ GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
+ GLSLD(ff_source_rangecoder_comp);
+ GLSLD(ff_source_ffv1_common_comp);
+}
+
+static int init_setup_shader(FFV1Context *f, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ 1, 1, 1,
+ 0));
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ add_push_data(shd);
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "crc_ieee_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint32_t crc_ieee[256];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0));
+
+ define_shared_code(shd, 0 /* Irrelevant */);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx",
+ .buf_elems = f->max_slice_count,
+ },
+ {
+ .name = "slice_offsets_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "readonly",
+ .buf_content = "uint32_t slice_offsets",
+ .buf_elems = 2*f->max_slice_count,
+ },
+ {
+ .name = "slice_status_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "writeonly",
+ .buf_content = "uint32_t slice_crc_mismatch",
+ .buf_elems = 2*f->max_slice_count,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
+
+ GLSLD(ff_source_ffv1_dec_setup_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd, int ac)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+ int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
+
+ RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ wg_dim, 1, 1,
+ 0));
+
+ if (ac == AC_GOLOMB_RICE)
+ av_bprintf(&shd->src, "#define GOLOMB\n");
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, u8buf slice_state; );
+ GLSLC(1, uint plane_state_size; );
+ GLSLC(1, uint context_count; );
+ GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t padding[1]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+ define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */);
+ if (ac == AC_GOLOMB_RICE)
+ GLSLD(ff_source_ffv1_vlc_comp);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx",
+ .buf_elems = f->max_slice_count,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
+
+ GLSLD(ff_source_ffv1_reset_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd, AVHWFramesContext *frames_ctx,
+ int use32bit, int ac, int rgb)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(s, shd, "ffv1_dec",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ 1, 1, 1,
+ 0));
+
+ if (ac == AC_GOLOMB_RICE)
+ av_bprintf(&shd->src, "#define GOLOMB\n");
+
+ if (rgb)
+ av_bprintf(&shd->src, "#define RGB\n");
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ add_push_data(shd);
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+ define_shared_code(shd, use32bit);
+ if (ac == AC_GOLOMB_RICE)
+ GLSLD(ff_source_ffv1_vlc_comp);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx",
+ .buf_elems = f->max_slice_count,
+ },
+ {
+ .name = "dst",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
+ FF_VK_REP_NATIVE),
+ .elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
+
+ GLSLD(ff_source_ffv1_dec_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_rct_shader(FFV1Context *f, FFVulkanContext *s,
+ FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+ FFVulkanShader *shd, int use32bit,
+ AVHWFramesContext *src_ctx, AVHWFramesContext *dst_ctx)
+{
+ int err;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+ int wg_count = sqrt(s->props.properties.limits.maxComputeWorkGroupInvocations);
+
+ RET(ff_vk_shader_init(s, shd, "ffv1_rct",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ wg_count, wg_count, 1,
+ 0));
+
+ /* Common codec header */
+ GLSLD(ff_source_common_comp);
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, ivec4 fmt_lut; );
+ GLSLC(1, int offset; );
+ GLSLC(1, uint8_t bits; );
+ GLSLC(1, uint8_t planar_rgb; );
+ GLSLC(1, uint8_t transparency; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t padding[3]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
+
+ define_shared_code(shd, use32bit);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "slice_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .buf_content = "SliceContext slice_ctx",
+ .buf_elems = f->max_slice_count,
+ },
+ {
+ .name = "src",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(src_ctx->sw_format,
+ FF_VK_REP_NATIVE),
+ .mem_quali = "readonly",
+ .elems = av_pix_fmt_count_planes(src_ctx->sw_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ {
+ .name = "dst",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(dst_ctx->sw_format,
+ FF_VK_REP_NATIVE),
+ .mem_quali = "writeonly",
+ .elems = av_pix_fmt_count_planes(dst_ctx->sw_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
+
+ GLSLD(ff_source_ffv1_dec_rct_comp);
+
+ RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
+ AVBufferRef **dst, enum AVPixelFormat sw_format)
+{
+ int err;
+ AVHWFramesContext *frames_ctx;
+ AVVulkanFramesContext *vk_frames;
+
+ *dst = av_hwframe_ctx_alloc(s->device_ref);
+ if (!(*dst))
+ return AVERROR(ENOMEM);
+
+ frames_ctx = (AVHWFramesContext *)((*dst)->data);
+ frames_ctx->format = AV_PIX_FMT_VULKAN;
+ frames_ctx->sw_format = sw_format;
+ frames_ctx->width = FFALIGN(s->frames->width, 32);
+ frames_ctx->height = FFALIGN(s->frames->height, 32);
+
+ vk_frames = frames_ctx->hwctx;
+ vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
+ vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT;
+ vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+
+ err = av_hwframe_ctx_init(*dst);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
+ av_get_pix_fmt_name(sw_format), av_err2str(err));
+ av_buffer_unref(dst);
+ return err;
+ }
+
+ return 0;
+}
+
+static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
+{
+ FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
+
+ ff_vk_shader_free(&ctx->s, &fv->setup);
+
+ for (int i = 0; i < 2; i++) /* 16/32 bit */
+ av_buffer_unref(&fv->intermediate_frames_ref[i]);
+
+ for (int i = 0; i < 2; i++) /* AC/Golomb */
+ ff_vk_shader_free(&ctx->s, &fv->reset[i]);
+
+ for (int i = 0; i < 2; i++) /* 16/32 bit */
+ for (int j = 0; j < 2; j++) /* AC/Golomb */
+ for (int k = 0; k < 2; k++) /* Normal/RGB */
+ ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]);
+
+ for (int i = 0; i < 2; i++) /* 16/32 bit */
+ ff_vk_shader_free(&ctx->s, &fv->rct[i]);
+
+ ff_vk_free_buf(&ctx->s, &fv->quant_buf);
+ ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
+ ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
+
+ av_buffer_pool_uninit(&fv->tmp_data_pool);
+ av_buffer_pool_uninit(&fv->slice_state_pool);
+ av_buffer_pool_uninit(&fv->slice_offset_pool);
+ av_buffer_pool_uninit(&fv->slice_status_pool);
+}
+
+static int vk_decode_ffv1_init(AVCodecContext *avctx)
+{
+ int err;
+ FFV1Context *f = avctx->priv_data;
+ FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ FFVulkanDecodeShared *ctx = NULL;
+ FFv1VulkanDecodeContext *fv;
+ FFVkSPIRVCompiler *spv;
+
+ if (f->version < 3)
+ return AVERROR(ENOTSUP);
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ err = ff_vk_decode_init(avctx);
+ if (err < 0)
+ return err;
+ ctx = dec->shared_ctx;
+
+ fv = ctx->sd_ctx = av_mallocz(sizeof(*fv));
+ if (!fv) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ ctx->sd_ctx_free = &vk_decode_ffv1_uninit;
+
+ /* Intermediate frame pool for RCT */
+ for (int i = 0; i < 2; i++) { /* 16/32 bit */
+ err = init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref[i],
+ i ? AV_PIX_FMT_RGBA128 : AV_PIX_FMT_RGBA64);
+ if (err < 0)
+ return err;
+ }
+
+ /* Setup shader */
+ err = init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup);
+ if (err < 0)
+ return err;
+
+ /* Reset shaders */
+ for (int i = 0; i < 2; i++) { /* AC/Golomb */
+ err = init_reset_shader(f, &ctx->s, &ctx->exec_pool,
+ spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0);
+ if (err < 0)
+ return err;
+ }
+
+ /* Decode shaders */
+ for (int i = 0; i < 2; i++) { /* 16/32 bit */
+ for (int j = 0; j < 2; j++) { /* AC/Golomb */
+ for (int k = 0; k < 2; k++) { /* Normal/RGB */
+ AVHWFramesContext *frames_ctx;
+ frames_ctx = k ? (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data :
+ (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+ err = init_decode_shader(f, &ctx->s, &ctx->exec_pool,
+ spv, &fv->decode[i][j][k],
+ frames_ctx,
+ i,
+ !j ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE,
+ k);
+ if (err < 0)
+ return err;
+ }
+ }
+ }
+
+ /* RCT shaders */
+ for (int i = 0; i < 2; i++) { /* 16/32 bit */
+ err = init_rct_shader(f, &ctx->s, &ctx->exec_pool,
+ spv, &fv->rct[i], i,
+ (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data,
+ (AVHWFramesContext *)avctx->hw_frames_ctx->data);
+ if (err < 0)
+ return err;
+ }
+
+ /* Range coder data */
+ err = ff_ffv1_vk_init_state_transition_data(&ctx->s,
+ &fv->rangecoder_static_buf,
+ f);
+ if (err < 0)
+ return err;
+
+ /* Quantization table data */
+ err = ff_ffv1_vk_init_quant_table_data(&ctx->s,
+ &fv->quant_buf,
+ f);
+ if (err < 0)
+ return err;
+
+ /* CRC table buffer */
+ err = ff_ffv1_vk_init_crc_table_data(&ctx->s,
+ &fv->crc_tab_buf,
+ f);
+ if (err < 0)
+ return err;
+
+ /* Update setup global descriptors */
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &fv->setup, 0, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &fv->setup, 0, 1, 0,
+ &fv->crc_tab_buf,
+ 0, fv->crc_tab_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+ /* Update decode global descriptors */
+ for (int i = 0; i < 2; i++) { /* 16/32 bit */
+ for (int j = 0; j < 2; j++) { /* AC/Golomb */
+ for (int k = 0; k < 2; k++) { /* Normal/RGB */
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &fv->decode[i][j][k], 0, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
+ &fv->decode[i][j][k], 0, 1, 0,
+ &fv->quant_buf,
+ 0, fv->quant_buf.size,
+ VK_FORMAT_UNDEFINED));
+ }
+ }
+ }
+
+fail:
+ return err;
+}
+
+static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+ AVHWDeviceContext *hwctx = _hwctx.nc;
+
+ FFv1VulkanDecodePicture *fp = data;
+ FFVulkanDecodePicture *vp = &fp->vp;
+
+ ff_vk_decode_free_frame(hwctx, vp);
+
+ if (fp->crc_checked) {
+ FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data;
+ for (int i = 0; i < fp->slice_num; i++) {
+ uint32_t crc_res;
+ crc_res = AV_RN32(slice_status->mapped_mem + i*sizeof(uint32_t));
+ if (crc_res != 0)
+ av_log(hwctx, AV_LOG_ERROR, "CRC mismatch in slice %i, res: 0x%x\n",
+ i, crc_res);
+ }
+ }
+
+ av_buffer_unref(&vp->slices_buf);
+ av_buffer_unref(&fp->slice_state);
+ av_buffer_unref(&fp->slice_offset_buf);
+ av_buffer_unref(&fp->slice_status_buf);
+ av_buffer_unref(&fp->tmp_data);
+}
+
+const FFHWAccel ff_ffv1_vulkan_hwaccel = {
+ .p.name = "ffv1_vulkan",
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_FFV1,
+ .p.pix_fmt = AV_PIX_FMT_VULKAN,
+ .start_frame = &vk_ffv1_start_frame,
+ .decode_slice = &vk_ffv1_decode_slice,
+ .end_frame = &vk_ffv1_end_frame,
+ .free_frame_priv = &vk_ffv1_free_frame_priv,
+ .frame_priv_data_size = sizeof(FFv1VulkanDecodePicture),
+ .init = &vk_decode_ffv1_init,
+ .update_thread_context = &ff_vk_update_thread_context,
+ .decode_params = &ff_vk_params_invalidate,
+ .flush = &ff_vk_decode_flush,
+ .uninit = &ff_vk_decode_uninit,
+ .frame_params = &ff_vk_frame_params,
+ .priv_data_size = sizeof(FFVulkanDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder Lynne
@ 2025-03-10 3:14 ` Andreas Rheinhardt
2025-03-10 17:42 ` Lynne
2 siblings, 1 reply; 15+ messages in thread
From: Andreas Rheinhardt @ 2025-03-10 3:14 UTC (permalink / raw)
To: ffmpeg-devel
Lynne:
> ---
> libavcodec/ffv1.h | 3 +++
> libavcodec/ffv1dec.c | 19 +++++++++++++++++--
> 2 files changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> index 8c0e71284d..860a5c14b1 100644
> --- a/libavcodec/ffv1.h
> +++ b/libavcodec/ffv1.h
> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
> * NOT shared between frame threads.
> */
> uint8_t frame_damaged;
> +
> + /* Reference to the current packet */
> + AVPacket *pkt_ref;
> } FFV1Context;
>
> int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
> index eaa21eebdf..6396f22f79 100644
> --- a/libavcodec/ffv1dec.c
> +++ b/libavcodec/ffv1dec.c
> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
> f->pix_fmt = AV_PIX_FMT_NONE;
> f->configured_pix_fmt = AV_PIX_FMT_NONE;
>
> + f->pkt_ref = av_packet_alloc();
> + if (!f->pkt_ref)
> + return AVERROR(ENOMEM);
> +
> if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
> return ret;
>
> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>
> /* Start */
> if (hwaccel) {
> + ret = av_packet_ref(f->pkt_ref, avpkt);
> + if (ret < 0)
> + return ret;
> +
> ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
> if (ret < 0)
> return ret;
> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
> uint32_t len;
> ret = find_next_slice(avctx, avpkt->data, buf_end, i,
> &pos, &len);
> - if (ret < 0)
> + if (ret < 0) {
> + av_packet_unref(f->pkt_ref);
> return ret;
> + }
>
> buf_end -= len;
>
> ret = hwaccel->decode_slice(avctx, pos, len);
> - if (ret < 0)
> + if (ret < 0) {
> + av_packet_unref(f->pkt_ref);
> return ret;
> + }
> }
> +
> + av_packet_unref(f->pkt_ref);
> } else {
> ret = decode_slices(avctx, c, avpkt);
> if (ret < 0)
> @@ -827,6 +841,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
> ff_progress_frame_unref(&s->last_picture);
> av_refstruct_unref(&s->hwaccel_last_picture_private);
>
> + av_packet_free(&s->pkt_ref);
> ff_ffv1_close(s);
>
> return 0;
Why not simply use a const AVPacket*?
- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context
2025-03-10 3:14 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt
@ 2025-03-10 17:42 ` Lynne
0 siblings, 0 replies; 15+ messages in thread
From: Lynne @ 2025-03-10 17:42 UTC (permalink / raw)
To: ffmpeg-devel
On 10/03/2025 04:14, Andreas Rheinhardt wrote:
> Lynne:
>> ---
>> libavcodec/ffv1.h | 3 +++
>> libavcodec/ffv1dec.c | 19 +++++++++++++++++--
>> 2 files changed, 20 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>> index 8c0e71284d..860a5c14b1 100644
>> --- a/libavcodec/ffv1.h
>> +++ b/libavcodec/ffv1.h
>> @@ -174,6 +174,9 @@ typedef struct FFV1Context {
>> * NOT shared between frame threads.
>> */
>> uint8_t frame_damaged;
>> +
>> + /* Reference to the current packet */
>> + AVPacket *pkt_ref;
>> } FFV1Context;
>>
>> int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s);
>> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
>> index eaa21eebdf..6396f22f79 100644
>> --- a/libavcodec/ffv1dec.c
>> +++ b/libavcodec/ffv1dec.c
>> @@ -469,6 +469,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
>> f->pix_fmt = AV_PIX_FMT_NONE;
>> f->configured_pix_fmt = AV_PIX_FMT_NONE;
>>
>> + f->pkt_ref = av_packet_alloc();
>> + if (!f->pkt_ref)
>> + return AVERROR(ENOMEM);
>> +
>> if ((ret = ff_ffv1_common_init(avctx, f)) < 0)
>> return ret;
>>
>> @@ -701,6 +705,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>>
>> /* Start */
>> if (hwaccel) {
>> + ret = av_packet_ref(f->pkt_ref, avpkt);
>> + if (ret < 0)
>> + return ret;
>> +
>> ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
>> if (ret < 0)
>> return ret;
>> @@ -720,15 +728,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
>> uint32_t len;
>> ret = find_next_slice(avctx, avpkt->data, buf_end, i,
>> &pos, &len);
>> - if (ret < 0)
>> + if (ret < 0) {
>> + av_packet_unref(f->pkt_ref);
>> return ret;
>> + }
>>
>> buf_end -= len;
>>
>> ret = hwaccel->decode_slice(avctx, pos, len);
>> - if (ret < 0)
>> + if (ret < 0) {
>> + av_packet_unref(f->pkt_ref);
>> return ret;
>> + }
>> }
>> +
>> + av_packet_unref(f->pkt_ref);
>> } else {
>> ret = decode_slices(avctx, c, avpkt);
>> if (ret < 0)
>> @@ -827,6 +841,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx)
>> ff_progress_frame_unref(&s->last_picture);
>> av_refstruct_unref(&s->hwaccel_last_picture_private);
>>
>> + av_packet_free(&s->pkt_ref);
>> ff_ffv1_close(s);
>>
>> return 0;
>
> Why not simply use a const AVPacket*?
No reason. Fixed locally.
Thanks.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2025-03-10 17:42 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-10 3:08 [FFmpeg-devel] [PATCH 01/13] vulkan: rename ff_vk_set_descriptor_image to ff_vk_shader_update_img Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 02/13] vulkan: add ff_vk_create_imageview Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 03/13] vulkan: copy host-mapping buffer code from hwcontext Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 04/13] vulkan: workaround BGR storage image undefined behaviour Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 05/13] vulkan_decode: support software-defined decoders Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 06/13] vulkan_decode: support multiple image views Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 07/13] vulkan_decode: adjust number of async contexts created Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 08/13] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 09/13] vulkan: unify handling of BGR and simplify ffv1_rct Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 10/13] ffv1dec: add support for hwaccels Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 12/13] vulkan: add ff_vk_exec_add_dep_wait_sem() Lynne
2025-03-10 3:08 ` [FFmpeg-devel] [PATCH 13/13] ffv1: add a Vulkan-based decoder Lynne
2025-03-10 3:14 ` [FFmpeg-devel] [PATCH 11/13] ffv1dec: reference the current packet into the main context Andreas Rheinhardt
2025-03-10 17:42 ` Lynne
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git