* [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth @ 2025-02-24 8:04 Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 02/12] vulkan: add ff_vk_create_imageview Lynne ` (9 more replies) 0 siblings, 10 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne The issue is that this could consume gigabytes of VRAM at higher resolutions for not that much of a speedup. Automatic detection was not a good idea as we can't know how much VRAM is actually free. Just remove it. --- libavcodec/ffv1enc_vulkan.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 3d7ee073aa..6a12ee2055 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -1631,11 +1631,6 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) max_heap_size = max_heap_size - (max_heap_size >> 3); } - if (!fv->async_depth) { - fv->async_depth = FFMIN(fv->qf->num, FFMAX(max_heap_size / maxsize, 1)); - fv->async_depth = FFMAX(fv->async_depth, 1); - } - av_log(avctx, AV_LOG_INFO, "Async buffers: %zuMiB per context, %zuMiB total, depth: %i\n", maxsize / (1024*1024), (fv->async_depth * maxsize) / (1024*1024), @@ -1820,7 +1815,7 @@ static const AVOption vulkan_encode_ffv1_options[] = { { .i64 = 0 }, 0, 1, VE }, { "async_depth", "Internal parallelization depth", OFFSET(async_depth), AV_OPT_TYPE_INT, - { .i64 = 0 }, 0, INT_MAX, VE }, + { .i64 = 1 }, 1, INT_MAX, VE }, { NULL } }; -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 02/12] vulkan: add ff_vk_create_imageview 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 03/12] vulkan: copy host-mapping buffer code from hwcontext Lynne ` (8 subsequent siblings) 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavutil/vulkan.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ libavutil/vulkan.h | 7 +++++++ 2 files changed, 58 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 31610e2d94..de6260b2f8 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1586,6 +1586,57 @@ static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt) return VK_FORMAT_UNDEFINED; } +int ff_vk_create_imageview(FFVulkanContext *s, + VkImageView *img_view, VkImageAspectFlags *aspect, + AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + + VkImageViewUsageCreateInfo view_usage_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = vkfc->usage & + (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)), + }; + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &view_usage_info, + .image = vkf->img[FFMIN(plane, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = map_fmt_to_rep(rep_fmts[plane], rep_fmt), + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = ff_vk_aspect_flag(f, plane), + .levelCount = 1, + .layerCount = 1, + }, + }; + if (view_create_info.format == VK_FORMAT_UNDEFINED) { + av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation " + "of format %i and mode %i\n", + rep_fmts[plane], rep_fmt); + return AVERROR(EINVAL); + } + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, img_view); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + *aspect = view_create_info.subresourceRange.aspectMask; + + return 0; +} + int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f, enum FFVkShaderRepFormat rep_fmt) diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 8690c13b3d..c3ddf0cd6b 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -457,6 +457,13 @@ int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f); void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); +/** + * Create a single imageview for a given plane. + */ +int ff_vk_create_imageview(FFVulkanContext *s, + VkImageView *img_view, VkImageAspectFlags *aspect, + AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt); + /** * Create an imageview and add it as a dependency to an execution. */ -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 03/12] vulkan: copy host-mapping buffer code from hwcontext 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 02/12] vulkan: add ff_vk_create_imageview Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 04/12] vulkan_decode: support software-defined decoders Lynne ` (7 subsequent siblings) 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne This is useful elsewhere. --- libavutil/vulkan.c | 153 ++++++++++++++++++++++++++++++++++++++++++++- libavutil/vulkan.h | 17 ++++- 2 files changed, 167 insertions(+), 3 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index de6260b2f8..4d60ac1b3a 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1115,6 +1115,8 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; + + av_assert0(!buf[i]->host_ref); if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; flush_list[flush_count++] = flush_buf; @@ -1146,12 +1148,18 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) if (!buf || !s->hwctx) return; - if (buf->mapped_mem) + if (buf->mapped_mem && !buf->host_ref) ff_vk_unmap_buffer(s, buf, 0); if (buf->buf != VK_NULL_HANDLE) vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); if (buf->mem != VK_NULL_HANDLE) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); + if (buf->host_ref) + av_buffer_unref(&buf->host_ref); + + buf->buf = VK_NULL_HANDLE; + buf->mem = VK_NULL_HANDLE; + buf->mapped_mem = NULL; } static void free_data_buf(void *opaque, uint8_t *data) @@ -1228,6 +1236,147 @@ int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, return 0; } +static int create_mapped_buffer(FFVulkanContext *s, + FFVkBuffer *vkb, VkBufferUsageFlags usage, + size_t size, + VkExternalMemoryBufferCreateInfo *create_desc, + VkImportMemoryHostPointerInfoEXT *import_desc, + VkMemoryHostPointerPropertiesEXT props) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = create_desc, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .size = size, + }; + VkMemoryRequirements req = { + .size = size, + .alignment = s->hprops.minImportedHostPointerAlignment, + .memoryTypeBits = props.memoryTypeBits, + }; + + err = ff_vk_alloc_mem(s, &req, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + import_desc, &vkb->flags, &vkb->mem); + if (err < 0) + return err; + + ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &vkb->buf); + if (ret != VK_SUCCESS) { + vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc); + return AVERROR_EXTERNAL; + } + + ret = vk->BindBufferMemory(s->hwctx->act_dev, vkb->buf, vkb->mem, 0); + if (ret != VK_SUCCESS) { + vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc); + vk->DestroyBuffer(s->hwctx->act_dev, vkb->buf, s->hwctx->alloc); + return AVERROR_EXTERNAL; + } + + return 0; +} + +int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst, + AVBufferRef *src, VkBufferUsageFlags usage) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkExternalMemoryBufferCreateInfo create_desc = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + }; + VkMemoryAllocateFlagsInfo alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; + VkImportMemoryHostPointerInfoEXT import_desc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + .pNext = usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? &alloc_flags : NULL, + }; + VkMemoryHostPointerPropertiesEXT props; + + AVBufferRef *ref; + FFVkBuffer *vkb; + size_t offs; + size_t buffer_size; + + *dst = NULL; + + /* Get the previous point at which mapping was possible and use it */ + offs = (uintptr_t)src->data % s->hprops.minImportedHostPointerAlignment; + import_desc.pHostPointer = src->data - offs; + + props = (VkMemoryHostPointerPropertiesEXT) { + VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, + }; + ret = vk->GetMemoryHostPointerPropertiesEXT(s->hwctx->act_dev, + import_desc.handleType, + import_desc.pHostPointer, + &props); + if (!(ret == VK_SUCCESS && props.memoryTypeBits)) + return AVERROR(EINVAL); + + /* Ref the source buffer */ + ref = av_buffer_ref(src); + if (!ref) + return AVERROR(ENOMEM); + + /* Add the offset at the start, which gets ignored */ + buffer_size = offs + src->size; + buffer_size = FFALIGN(buffer_size, s->props.properties.limits.minMemoryMapAlignment); + buffer_size = FFALIGN(buffer_size, s->hprops.minImportedHostPointerAlignment); + + /* Create a buffer struct */ + vkb = av_mallocz(sizeof(*vkb)); + if (!vkb) { + av_buffer_unref(&ref); + return AVERROR(ENOMEM); + } + + err = create_mapped_buffer(s, vkb, usage, + buffer_size, &create_desc, &import_desc, + props); + if (err < 0) { + av_buffer_unref(&ref); + av_free(vkb); + return err; + } + + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = vkb->buf, + }; + vkb->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); + } + + vkb->host_ref = ref; + vkb->virtual_offset = offs; + vkb->address += offs; + vkb->mapped_mem = src->data; + vkb->size = buffer_size - offs; + + /* Create a ref */ + *dst = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), + destroy_avvkbuf, s, 0); + if (!(*dst)) { + destroy_avvkbuf(s, (uint8_t *)vkb); + *dst = NULL; + return AVERROR(ENOMEM); + } + + return 0; +} + int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage) { @@ -2546,7 +2695,7 @@ int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, } else { VkDescriptorBufferInfo desc_pool_write_info_buf = { .buffer = buf->buf, - .offset = offset, + .offset = buf->virtual_offset + offset, .range = len, }; VkWriteDescriptorSet desc_pool_write_info = { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index c3ddf0cd6b..327ce2b286 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -96,8 +96,17 @@ typedef struct FFVkBuffer { VkPipelineStageFlags2 stage; VkAccessFlags2 access; - /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE */ + /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE or + * via ff_vk_host_map_buffer */ uint8_t *mapped_mem; + + /* Set by ff_vk_host_map_buffer. This is the offset at which the buffer data + * actually begins at. + * The address and mapped_mem fields will be offset by this amount. */ + size_t virtual_offset; + + /* If host mapping, reference to the backing host memory buffer */ + AVBufferRef *host_ref; } FFVkBuffer; typedef struct FFVkExecContext { @@ -523,6 +532,12 @@ int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props); +/** Maps a system RAM buffer into a Vulkan buffer. + * References the source buffer. + */ +int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst, + AVBufferRef *src, VkBufferUsageFlags usage); + /** * Create a sampler. */ -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 04/12] vulkan_decode: support software-defined decoders 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 02/12] vulkan: add ff_vk_create_imageview Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 03/12] vulkan: copy host-mapping buffer code from hwcontext Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 05/12] vulkan_decode: support multiple image views Lynne ` (6 subsequent siblings) 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavcodec/vulkan_decode.c | 191 +++++++++++++++++++++++++++---------- libavcodec/vulkan_decode.h | 10 ++ 2 files changed, 150 insertions(+), 51 deletions(-) diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index c57998108c..084563e8e9 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -24,6 +24,9 @@ #include "libavutil/mem.h" #include "libavutil/vulkan_loader.h" +#define DECODER_IS_SDR(codec_id) \ + ((codec_id) == AV_CODEC_ID_FFV1) + #if CONFIG_H264_VULKAN_HWACCEL extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc; #endif @@ -63,7 +66,9 @@ static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx, codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR : codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR : codec_id == AV_CODEC_ID_AV1 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR : - 0; + VK_STRUCTURE_TYPE_MAX_ENUM; + if (profile_struct_type == VK_STRUCTURE_TYPE_MAX_ENUM) + return NULL; profile_list = ff_vk_find_struct(ctx->s.hwfc->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); @@ -119,13 +124,26 @@ static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx) return avf; } +static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic) +{ + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + vkpic->dpb_frame = NULL; + vkpic->img_view_ref = VK_NULL_HANDLE; + vkpic->img_view_out = VK_NULL_HANDLE; + vkpic->img_view_dest = VK_NULL_HANDLE; + + vkpic->destroy_image_view = vk->DestroyImageView; + vkpic->wait_semaphores = vk->WaitSemaphores; +} + int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, FFVulkanDecodePicture *vkpic, int is_current, int alloc_dpb) { int err; FFVulkanDecodeShared *ctx = dec->shared_ctx; - FFVulkanFunctions *vk = &ctx->s.vkfn; vkpic->slices_size = 0; @@ -134,13 +152,7 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, if (vkpic->img_view_ref) return 0; - vkpic->dpb_frame = NULL; - vkpic->img_view_ref = VK_NULL_HANDLE; - vkpic->img_view_out = VK_NULL_HANDLE; - vkpic->img_view_dest = VK_NULL_HANDLE; - - vkpic->destroy_image_view = vk->DestroyImageView; - vkpic->wait_semaphores = vk->WaitSemaphores; + init_frame(dec, vkpic); if (ctx->common.layered_dpb && alloc_dpb) { vkpic->img_view_ref = ctx->common.layered_view; @@ -183,6 +195,53 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, return 0; } +int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + enum FFVkShaderRepFormat rep_fmt, int alloc_dpb) +{ + int err; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + vkpic->slices_size = 0; + + if (vkpic->img_view_ref) + return 0; + + init_frame(dec, vkpic); + + if (ctx->common.layered_dpb && alloc_dpb) { + vkpic->img_view_ref = ctx->common.layered_view; + vkpic->img_aspect_ref = ctx->common.layered_aspect; + } else if (alloc_dpb) { + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); + + err = ff_vk_create_imageview(&ctx->s, + &vkpic->img_view_ref, &vkpic->img_aspect_ref, + vkpic->dpb_frame, 0, rep_fmt); + if (err < 0) + return err; + + vkpic->img_view_dest = vkpic->img_view_ref; + } + + if (!alloc_dpb || is_current) { + err = ff_vk_create_imageview(&ctx->s, + &vkpic->img_view_out, &vkpic->img_aspect, + pic, 0, rep_fmt); + if (err < 0) + return err; + + if (!alloc_dpb) { + vkpic->img_view_ref = vkpic->img_view_out; + vkpic->img_aspect_ref = vkpic->img_aspect; + } + } + + return 0; +} + int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, const uint8_t *data, size_t size, int add_startcode, uint32_t *nb_slices, const uint32_t **offsets) @@ -223,9 +282,14 @@ int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, buf_size = 2 << av_log2(buf_size); err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &new_ref, + DECODER_IS_SDR(avctx->codec_id) ? + (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, ctx->s.hwfc->create_pnext, buf_size, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + (DECODER_IS_SDR(avctx->codec_id) ? + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0)); if (err < 0) return err; @@ -276,6 +340,10 @@ void ff_vk_decode_flush(AVCodecContext *avctx) VkCommandBuffer cmd_buf; FFVkExecContext *exec; + /* Non-video queues do not need to be reset */ + if (!(get_codecdesc(avctx->codec_id)->decode_op)) + return; + exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); ff_vk_exec_start(&ctx->s, exec); cmd_buf = exec->buf; @@ -551,6 +619,7 @@ static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_re { int err; FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + const FFVulkanDecodeDescriptor *vk_desc = get_codecdesc(avctx->codec_id); AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; AVVulkanDeviceContext *hwctx = device->hwctx; @@ -569,11 +638,13 @@ static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_re ctx->s.extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); - if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { - av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", - VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); - av_refstruct_unref(&dec->shared_ctx); - return AVERROR(ENOSYS); + if (vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) { + if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); + av_refstruct_unref(&dec->shared_ctx); + return AVERROR(ENOSYS); + } } err = ff_vk_load_functions(device, &ctx->s.vkfn, ctx->s.extensions, 1, 1); @@ -927,53 +998,61 @@ static void free_profile_data(AVHWFramesContext *hwfc) int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) { - VkFormat vkfmt; + VkFormat vkfmt = VK_FORMAT_UNDEFINED; int err, dedicated_dpb; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; AVVulkanFramesContext *hwfc = frames_ctx->hwctx; FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeProfileData *prof; - FFVulkanDecodeShared *ctx; - - frames_ctx->sw_format = AV_PIX_FMT_NONE; + FFVulkanDecodeProfileData *prof = NULL; err = vulkan_decode_bootstrap(avctx, hw_frames_ctx); if (err < 0) return err; - prof = av_mallocz(sizeof(FFVulkanDecodeProfileData)); - if (!prof) - return AVERROR(ENOMEM); + frames_ctx->sw_format = avctx->sw_pix_fmt; - err = vulkan_decode_get_profile(avctx, hw_frames_ctx, - &frames_ctx->sw_format, &vkfmt, - prof, &dedicated_dpb); - if (err < 0) { - av_free(prof); - return err; - } + if (!DECODER_IS_SDR(avctx->codec_id)) { + prof = av_mallocz(sizeof(FFVulkanDecodeProfileData)); + if (!prof) + return AVERROR(ENOMEM); + + err = vulkan_decode_get_profile(avctx, hw_frames_ctx, + &frames_ctx->sw_format, &vkfmt, + prof, &dedicated_dpb); + if (err < 0) { + av_free(prof); + return err; + } + + frames_ctx->user_opaque = prof; + frames_ctx->free = free_profile_data; - frames_ctx->user_opaque = prof; - frames_ctx->free = free_profile_data; + hwfc->create_pnext = &prof->profile_list; + } frames_ctx->width = avctx->coded_width; frames_ctx->height = avctx->coded_height; frames_ctx->format = AV_PIX_FMT_VULKAN; hwfc->format[0] = vkfmt; - hwfc->create_pnext = &prof->profile_list; hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + VK_IMAGE_USAGE_SAMPLED_BIT; - if (!dec->dedicated_dpb) - hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + if (prof) { + FFVulkanDecodeShared *ctx; - ctx = dec->shared_ctx; - if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | - FF_VK_EXT_VIDEO_MAINTENANCE_1)) - hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + if (!dec->dedicated_dpb) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + + ctx = dec->shared_ctx; + if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | + FF_VK_EXT_VIDEO_MAINTENANCE_1)) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + } else if (DECODER_IS_SDR(avctx->codec_id)) { + hwfc->usage |= VK_IMAGE_USAGE_STORAGE_BIT; + } return err; } @@ -1075,8 +1154,10 @@ int ff_vk_decode_init(AVCodecContext *avctx) if (err < 0) return err; + vk_desc = get_codecdesc(avctx->codec_id); + profile = get_video_profile(ctx, avctx->codec_id); - if (!profile) { + if ((vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) && !profile) { av_log(avctx, AV_LOG_ERROR, "Video profile missing from frames context!"); return AVERROR(EINVAL); } @@ -1109,9 +1190,11 @@ int ff_vk_decode_init(AVCodecContext *avctx) if (err < 0) goto fail; - err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); - if (err < 0) - goto fail; + if (profile) { + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + goto fail; + } /* If doing an out-of-place decoding, create a DPB pool */ if (dec->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) { @@ -1163,12 +1246,18 @@ int ff_vk_decode_init(AVCodecContext *avctx) } session_params_create.videoSession = ctx->common.session; - ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, - s->hwctx->alloc, &ctx->empty_session_params); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + if (profile) { + ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, + s->hwctx->alloc, &ctx->empty_session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } else { + /* For SDR decoders, this alignment value will be 0. Since this will make + * add_slice() malfunction, set it to a sane default value. */ + ctx->caps.minBitstreamBufferSizeAlignment = AV_INPUT_BUFFER_PADDING_SIZE; } driver_props = &dec->shared_ctx->s.driver_props; diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h index 1d89db323f..9a11a80f95 100644 --- a/libavcodec/vulkan_decode.h +++ b/libavcodec/vulkan_decode.h @@ -56,6 +56,9 @@ typedef struct FFVulkanDecodeShared { VkVideoDecodeCapabilitiesKHR dec_caps; VkVideoSessionParametersKHR empty_session_params; + + /* Software-defined decoder context */ + void *sd_ctx; } FFVulkanDecodeShared; typedef struct FFVulkanDecodeContext { @@ -141,6 +144,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, FFVulkanDecodePicture *vkpic, int is_current, int alloc_dpb); +/** + * Software-defined decoder version of ff_vk_decode_prepare_frame. + */ +int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + enum FFVkShaderRepFormat rep_fmt, int alloc_dpb); + /** * Add slice data to frame. */ -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 05/12] vulkan_decode: support multiple image views 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne ` (2 preceding siblings ...) 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 04/12] vulkan_decode: support software-defined decoders Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 06/12] hwcontext_vulkan: enable read/write without storage Lynne ` (5 subsequent siblings) 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne Enables non-monochrome video decoding using all our existing functions in the context of an SDR decoder. --- libavcodec/vulkan_av1.c | 4 +- libavcodec/vulkan_decode.c | 90 ++++++++++++++++++++------------------ libavcodec/vulkan_decode.h | 12 ++--- libavcodec/vulkan_h264.c | 4 +- libavcodec/vulkan_hevc.c | 4 +- 5 files changed, 60 insertions(+), 54 deletions(-) diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c index 6659f9d812..7dd7b204d7 100644 --- a/libavcodec/vulkan_av1.c +++ b/libavcodec/vulkan_av1.c @@ -123,7 +123,7 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src, .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, .baseArrayLayer = ((has_grain || dec->dedicated_dpb) && ctx->common.layered_dpb) ? hp->frame_id : 0, - .imageViewBinding = vkpic->img_view_ref, + .imageViewBinding = vkpic->view.ref[0], }; *ref_slot = (VkVideoReferenceSlotInfoKHR) { @@ -346,7 +346,7 @@ static int vk_av1_start_frame(AVCodecContext *avctx, .codedOffset = (VkOffset2D){ 0, 0 }, .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, .baseArrayLayer = 0, - .imageViewBinding = vp->img_view_out, + .imageViewBinding = vp->view.out[0], }, }; diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index 084563e8e9..9eaafa6495 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -130,9 +130,11 @@ static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic) FFVulkanFunctions *vk = &ctx->s.vkfn; vkpic->dpb_frame = NULL; - vkpic->img_view_ref = VK_NULL_HANDLE; - vkpic->img_view_out = VK_NULL_HANDLE; - vkpic->img_view_dest = VK_NULL_HANDLE; + for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) { + vkpic->view.ref[i] = VK_NULL_HANDLE; + vkpic->view.out[i] = VK_NULL_HANDLE; + vkpic->view.dst[i] = VK_NULL_HANDLE; + } vkpic->destroy_image_view = vk->DestroyImageView; vkpic->wait_semaphores = vk->WaitSemaphores; @@ -149,14 +151,14 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, /* If the decoder made a blank frame to make up for a missing ref, or the * frame is the current frame so it's missing one, create a re-representation */ - if (vkpic->img_view_ref) + if (vkpic->view.ref[0]) return 0; init_frame(dec, vkpic); if (ctx->common.layered_dpb && alloc_dpb) { - vkpic->img_view_ref = ctx->common.layered_view; - vkpic->img_aspect_ref = ctx->common.layered_aspect; + vkpic->view.ref[0] = ctx->common.layered_view; + vkpic->view.aspect_ref[0] = ctx->common.layered_aspect; } else if (alloc_dpb) { AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data; AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx; @@ -166,13 +168,13 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, return AVERROR(ENOMEM); err = ff_vk_create_view(&ctx->s, &ctx->common, - &vkpic->img_view_ref, &vkpic->img_aspect_ref, + &vkpic->view.ref[0], &vkpic->view.aspect_ref[0], (AVVkFrame *)vkpic->dpb_frame->data[0], dpb_hwfc->format[0], !is_current); if (err < 0) return err; - vkpic->img_view_dest = vkpic->img_view_ref; + vkpic->view.dst[0] = vkpic->view.ref[0]; } if (!alloc_dpb || is_current) { @@ -180,15 +182,15 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, AVVulkanFramesContext *hwfc = frames->hwctx; err = ff_vk_create_view(&ctx->s, &ctx->common, - &vkpic->img_view_out, &vkpic->img_aspect, + &vkpic->view.out[0], &vkpic->view.aspect[0], (AVVkFrame *)pic->data[0], hwfc->format[0], !is_current); if (err < 0) return err; if (!alloc_dpb) { - vkpic->img_view_ref = vkpic->img_view_out; - vkpic->img_aspect_ref = vkpic->img_aspect; + vkpic->view.ref[0] = vkpic->view.out[0]; + vkpic->view.aspect_ref[0] = vkpic->view.aspect[0]; } } @@ -201,41 +203,41 @@ int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, { int err; FFVulkanDecodeShared *ctx = dec->shared_ctx; + AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data; vkpic->slices_size = 0; - if (vkpic->img_view_ref) + if (vkpic->view.ref[0]) return 0; init_frame(dec, vkpic); - if (ctx->common.layered_dpb && alloc_dpb) { - vkpic->img_view_ref = ctx->common.layered_view; - vkpic->img_aspect_ref = ctx->common.layered_aspect; - } else if (alloc_dpb) { - vkpic->dpb_frame = vk_get_dpb_pool(ctx); - if (!vkpic->dpb_frame) - return AVERROR(ENOMEM); + for (int i = 0; i < av_pix_fmt_count_planes(frames->sw_format); i++) { + if (alloc_dpb) { + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); - err = ff_vk_create_imageview(&ctx->s, - &vkpic->img_view_ref, &vkpic->img_aspect_ref, - vkpic->dpb_frame, 0, rep_fmt); - if (err < 0) - return err; + err = ff_vk_create_imageview(&ctx->s, + &vkpic->view.ref[i], &vkpic->view.aspect_ref[i], + vkpic->dpb_frame, i, rep_fmt); + if (err < 0) + return err; - vkpic->img_view_dest = vkpic->img_view_ref; - } + vkpic->view.dst[i] = vkpic->view.ref[i]; + } - if (!alloc_dpb || is_current) { - err = ff_vk_create_imageview(&ctx->s, - &vkpic->img_view_out, &vkpic->img_aspect, - pic, 0, rep_fmt); - if (err < 0) - return err; + if (!alloc_dpb || is_current) { + err = ff_vk_create_imageview(&ctx->s, + &vkpic->view.out[i], &vkpic->view.aspect[i], + pic, i, rep_fmt); + if (err < 0) + return err; - if (!alloc_dpb) { - vkpic->img_view_ref = vkpic->img_view_out; - vkpic->img_aspect_ref = vkpic->img_aspect; + if (!alloc_dpb) { + vkpic->view.ref[i] = vkpic->view.out[i]; + vkpic->view.aspect_ref[i] = vkpic->view.aspect[i]; + } } } @@ -467,7 +469,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = vkf->img[0], .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = vp->img_aspect, + .aspectMask = vp->view.aspect[0], .layerCount = 1, .levelCount = 1, }, @@ -523,7 +525,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = rvkf->img[0], .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = rvp->img_aspect_ref, + .aspectMask = rvp->view.aspect_ref[0], .layerCount = 1, .levelCount = 1, }, @@ -533,7 +535,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx, } } } else if (vp->decode_info.referenceSlotCount || - vp->img_view_out != vp->img_view_ref) { + vp->view.out[0] != vp->view.ref[0]) { /* Single barrier for a single layered ref */ err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame, VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, @@ -580,12 +582,14 @@ void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture * av_buffer_unref(&vp->slices_buf); /* Destroy image view (out) */ - if (vp->img_view_out && vp->img_view_out != vp->img_view_dest) - vp->destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc); + for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) { + if (vp->view.out[i] && vp->view.out[i] != vp->view.dst[i]) + vp->destroy_image_view(hwctx->act_dev, vp->view.out[i], hwctx->alloc); - /* Destroy image view (ref, unlayered) */ - if (vp->img_view_dest) - vp->destroy_image_view(hwctx->act_dev, vp->img_view_dest, hwctx->alloc); + /* Destroy image view (ref, unlayered) */ + if (vp->view.dst[i]) + vp->destroy_image_view(hwctx->act_dev, vp->view.dst[i], hwctx->alloc); + } av_frame_free(&vp->dpb_frame); } diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h index 9a11a80f95..4051732e49 100644 --- a/libavcodec/vulkan_decode.h +++ b/libavcodec/vulkan_decode.h @@ -84,11 +84,13 @@ typedef struct FFVulkanDecodeContext { typedef struct FFVulkanDecodePicture { AVFrame *dpb_frame; /* Only used for out-of-place decoding. */ - VkImageView img_view_ref; /* Image representation view (reference) */ - VkImageView img_view_out; /* Image representation view (output-only) */ - VkImageView img_view_dest; /* Set to img_view_out if no layered refs are used */ - VkImageAspectFlags img_aspect; /* Image plane mask bits */ - VkImageAspectFlags img_aspect_ref; /* Only used for out-of-place decoding */ + struct { + VkImageView ref[AV_NUM_DATA_POINTERS]; /* Image representation view (reference) */ + VkImageView out[AV_NUM_DATA_POINTERS]; /* Image representation view (output-only) */ + VkImageView dst[AV_NUM_DATA_POINTERS]; /* Set to img_view_out if no layered refs are used */ + VkImageAspectFlags aspect[AV_NUM_DATA_POINTERS]; /* Image plane mask bits */ + VkImageAspectFlags aspect_ref[AV_NUM_DATA_POINTERS]; /* Only used for out-of-place decoding */ + } view; VkSemaphore sem; uint64_t sem_value; diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c index 1df8f0a208..71cf2c3ad7 100644 --- a/libavcodec/vulkan_h264.c +++ b/libavcodec/vulkan_h264.c @@ -98,7 +98,7 @@ static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src, .codedOffset = (VkOffset2D){ 0, 0 }, .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, .baseArrayLayer = ctx->common.layered_dpb ? dpb_slot_index : 0, - .imageViewBinding = vkpic->img_view_ref, + .imageViewBinding = vkpic->view.ref[0], }; *ref_slot = (VkVideoReferenceSlotInfoKHR) { @@ -471,7 +471,7 @@ static int vk_h264_start_frame(AVCodecContext *avctx, .codedOffset = (VkOffset2D){ 0, 0 }, .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, .baseArrayLayer = 0, - .imageViewBinding = vp->img_view_out, + .imageViewBinding = vp->view.out[0], }, }; diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c index 589c3de83d..a5bcd88e2d 100644 --- a/libavcodec/vulkan_hevc.c +++ b/libavcodec/vulkan_hevc.c @@ -164,7 +164,7 @@ static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src, .codedOffset = (VkOffset2D){ 0, 0 }, .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, .baseArrayLayer = ctx->common.layered_dpb ? pic_id : 0, - .imageViewBinding = vkpic->img_view_ref, + .imageViewBinding = vkpic->view.ref[0], }; *ref_slot = (VkVideoReferenceSlotInfoKHR) { @@ -823,7 +823,7 @@ static int vk_hevc_start_frame(AVCodecContext *avctx, .codedOffset = (VkOffset2D){ 0, 0 }, .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, .baseArrayLayer = 0, - .imageViewBinding = vp->img_view_out, + .imageViewBinding = vp->view.out[0], }, }; -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 06/12] hwcontext_vulkan: enable read/write without storage 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne ` (3 preceding siblings ...) 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 05/12] vulkan_decode: support multiple image views Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 07/12] vulkan: workaround BGR storage image undefined behaviour Lynne ` (4 subsequent siblings) 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavutil/hwcontext_vulkan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ec6459712b..11d9e987b0 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -244,6 +244,8 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF COPY_VAL(device.features.shaderInt64); COPY_VAL(device.features.shaderInt16); COPY_VAL(device.features.shaderFloat64); + COPY_VAL(device.features.shaderStorageImageReadWithoutFormat); + COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat); COPY_VAL(vulkan_1_1.samplerYcbcrConversion); COPY_VAL(vulkan_1_1.storagePushConstant16); -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 07/12] vulkan: workaround BGR storage image undefined behaviour 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne ` (4 preceding siblings ...) 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 06/12] hwcontext_vulkan: enable read/write without storage Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne ` (3 subsequent siblings) 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavutil/vulkan.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 4d60ac1b3a..40ca737422 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1735,6 +1735,29 @@ static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt) return VK_FORMAT_UNDEFINED; } +static void bgr_workaround(AVVulkanFramesContext *vkfc, + VkImageViewCreateInfo *ci) +{ + if (!(vkfc->usage & VK_IMAGE_USAGE_STORAGE_BIT)) + return; + switch (ci->format) { +#define REMAP(src, dst) \ + case src: \ + ci->format = dst; \ + return; + REMAP(VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_R8G8B8A8_UNORM) + REMAP(VK_FORMAT_B8G8R8A8_SINT, VK_FORMAT_R8G8B8A8_SINT) + REMAP(VK_FORMAT_B8G8R8A8_UINT, VK_FORMAT_R8G8B8A8_UINT) + REMAP(VK_FORMAT_B8G8R8_UNORM, VK_FORMAT_R8G8B8_UNORM) + REMAP(VK_FORMAT_B8G8R8_SINT, VK_FORMAT_R8G8B8_SINT) + REMAP(VK_FORMAT_B8G8R8_UINT, VK_FORMAT_R8G8B8_UINT) + REMAP(VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_A2R10G10B10_UNORM_PACK32) +#undef REMAP + default: + return; + } +} + int ff_vk_create_imageview(FFVulkanContext *s, VkImageView *img_view, VkImageAspectFlags *aspect, AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt) @@ -1766,6 +1789,7 @@ int ff_vk_create_imageview(FFVulkanContext *s, .layerCount = 1, }, }; + bgr_workaround(vkfc, &view_create_info); if (view_create_info.format == VK_FORMAT_UNDEFINED) { av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation " "of format %i and mode %i\n", @@ -1827,6 +1851,7 @@ int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, .layerCount = 1, }, }; + bgr_workaround(vkfc, &view_create_info); if (view_create_info.format == VK_FORMAT_UNDEFINED) { av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation " "of format %i and mode %i\n", -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne ` (5 preceding siblings ...) 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 07/12] vulkan: workaround BGR storage image undefined behaviour Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 09/12] vulkan: unify handling of BGR and simplify ffv1_rct Lynne ` (2 subsequent siblings) 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne The shaders were written to support sharing, but needed slight tweaking. --- libavcodec/Makefile | 2 +- libavcodec/ffv1_vulkan.c | 123 ++++++++++++++ libavcodec/ffv1_vulkan.h | 60 +++++++ libavcodec/ffv1enc_vulkan.c | 234 +++++++++----------------- libavcodec/vulkan/ffv1_common.comp | 24 ++- libavcodec/vulkan/ffv1_enc_setup.comp | 18 +- libavcodec/vulkan/ffv1_reset.comp | 3 +- libavcodec/vulkan/rangecoder.comp | 27 +-- 8 files changed, 302 insertions(+), 189 deletions(-) create mode 100644 libavcodec/ffv1_vulkan.c create mode 100644 libavcodec/ffv1_vulkan.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 9630074205..0e96b33ef3 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -371,7 +371,7 @@ OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o float2half.o OBJS-$(CONFIG_FASTAUDIO_DECODER) += fastaudio.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1_parse.o ffv1.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1_parse.o ffv1.o -OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1enc_vulkan.o +OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1_vulkan.o ffv1enc_vulkan.o OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o OBJS-$(CONFIG_FIC_DECODER) += fic.o OBJS-$(CONFIG_FITS_DECODER) += fitsdec.o fits.o diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c new file mode 100644 index 0000000000..6f49e2ebb1 --- /dev/null +++ b/libavcodec/ffv1_vulkan.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2025 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "ffv1_vulkan.h" +#include "libavutil/crc.h" + +int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + uint8_t *buf_mapped; + + RET(ff_vk_map_buffer(s, vkb, &buf_mapped, 0)); + + for (int i = 1; i < 256; i++) { + buf_mapped[256 + i] = f->state_transition[i]; + buf_mapped[256 - i] = 256 - (int)f->state_transition[i]; + } + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} + +static int init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f, + int (*write_data)(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f)) +{ + int err; + size_t buf_len = 512*sizeof(uint8_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + + write_data(s, vkb, f); + +fail: + return err; +} + +int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + return init_state_transition_data(s, vkb, f, + ff_ffv1_vk_update_state_transition_data); +} + +int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + + int16_t *buf_mapped; + size_t buf_len = MAX_QUANT_TABLES* + MAX_CONTEXT_INPUTS* + MAX_QUANT_TABLE_SIZE*sizeof(int16_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0)); + + memcpy(buf_mapped, f->quant_tables, + sizeof(f->quant_tables)); + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} + +int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + + uint32_t *buf_mapped; + size_t buf_len = 256*sizeof(int32_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0)); + + memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len); + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h new file mode 100644 index 0000000000..0da6dc2d33 --- /dev/null +++ b/libavcodec/ffv1_vulkan.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_FFV1_VULKAN_H +#define AVCODEC_FFV1_VULKAN_H + +#include "libavutil/vulkan.h" +#include "ffv1.h" + +int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +typedef struct FFv1VkRCTParameters { + int offset; + uint8_t bits; + uint8_t planar_rgb; + uint8_t transparency; + uint8_t version; + uint8_t micro_version; + uint8_t padding[3]; +} FFv1VkRCTParameters; + +typedef struct FFv1VkResetParameters { + VkDeviceAddress slice_state; + uint32_t plane_state_size; + uint32_t context_count; + uint8_t codec_planes; + uint8_t key_frame; + uint8_t version; + uint8_t micro_version; + uint8_t padding[1]; +} FFv1VkResetParameters; + +#endif /* AVCODEC_FFV1_VULKAN_H */ diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 6a12ee2055..88801ca8e6 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -18,7 +18,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/crc.h" #include "libavutil/mem.h" #include "libavutil/vulkan.h" #include "libavutil/vulkan_spirv.h" @@ -32,6 +31,7 @@ #include "ffv1.h" #include "ffv1enc.h" +#include "ffv1_vulkan.h" /* Parallel Golomb alignment */ #define LG_ALIGN_W 32 @@ -122,28 +122,10 @@ extern const char *ff_source_ffv1_enc_setup_comp; extern const char *ff_source_ffv1_enc_comp; extern const char *ff_source_ffv1_enc_rgb_comp; -typedef struct FFv1VkRCTParameters { - int offset; - uint8_t bits; - uint8_t planar_rgb; - uint8_t transparency; - uint8_t padding[1]; -} FFv1VkRCTParameters; - -typedef struct FFv1VkResetParameters { - VkDeviceAddress slice_state; - uint32_t plane_state_size; - uint32_t context_count; - uint8_t codec_planes; - uint8_t key_frame; - uint8_t padding[3]; -} FFv1VkResetParameters; - typedef struct FFv1VkParameters { VkDeviceAddress slice_state; VkDeviceAddress scratch_data; VkDeviceAddress out_data; - uint64_t slice_size_max; int32_t sar[2]; uint32_t chroma_shift[2]; @@ -151,6 +133,7 @@ typedef struct FFv1VkParameters { uint32_t plane_state_size; uint32_t context_count; uint32_t crcref; + uint32_t slice_size_max; uint8_t bits_per_raw_sample; uint8_t context_model; @@ -175,7 +158,6 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, u8buf slice_state; ); GLSLC(1, u8buf scratch_data; ); GLSLC(1, u8buf out_data; ); - GLSLC(1, uint64_t slice_size_max; ); GLSLC(0, ); GLSLC(1, ivec2 sar; ); GLSLC(1, uvec2 chroma_shift; ); @@ -183,6 +165,7 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, uint plane_state_size; ); GLSLC(1, uint context_count; ); GLSLC(1, uint32_t crcref; ); + GLSLC(1, uint32_t slice_size_max; ); GLSLC(0, ); GLSLC(1, uint8_t bits_per_raw_sample; ); GLSLC(1, uint8_t context_model; ); @@ -492,7 +475,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .slice_state = slice_data_buf->address + f->slice_count*256, .scratch_data = tmp_data_buf->address, .out_data = out_data_buf->address, - .slice_size_max = out_data_buf->size / f->slice_count, .bits_per_raw_sample = f->bits_per_raw_sample, .sar[0] = pict->sample_aspect_ratio.num, .sar[1] = pict->sample_aspect_ratio.den, @@ -501,6 +483,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .plane_state_size = plane_state_size, .context_count = context_count, .crcref = f->crcref, + .slice_size_max = out_data_buf->size / f->slice_count, .context_model = fv->ctx.context_model, .version = f->version, .micro_version = f->micro_version, @@ -966,7 +949,6 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) GLSLF(0, #define TYPE int%i_t ,smp_bits); GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); - GLSLD(ff_source_common_comp); GLSLD(ff_source_rangecoder_comp); if (f->ac == AC_GOLOMB_RICE) @@ -993,6 +975,10 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) 1, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + add_push_data(shd); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1038,8 +1024,6 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); - add_push_data(shd); - GLSLD(ff_source_ffv1_enc_setup_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1074,6 +1058,22 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) wg_dim, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint context_count; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1110,17 +1110,6 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0)); - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf slice_state; ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint context_count; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t padding[3]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - GLSLD(ff_source_ffv1_reset_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1164,6 +1153,21 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) wg_count, wg_count, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, int offset; ); + GLSLC(1, uint8_t bits; ); + GLSLC(1, uint8_t planar_rgb; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[3]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1220,16 +1224,6 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, int offset; ); - GLSLC(1, uint8_t bits; ); - GLSLC(1, uint8_t planar_rgb; ); - GLSLC(1, uint8_t transparency; ); - GLSLC(1, uint8_t padding[1]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - GLSLD(ff_source_ffv1_enc_rct_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1268,6 +1262,11 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) 1, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1328,8 +1327,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); - add_push_data(shd); - /* Assemble the shader body */ GLSLD(ff_source_ffv1_enc_common_comp); @@ -1356,110 +1353,6 @@ fail: return err; } -static int init_state_transition_data(AVCodecContext *avctx) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - - uint8_t *buf_mapped; - size_t buf_len = 512*sizeof(uint8_t); - - RET(ff_vk_create_buf(&fv->s, &fv->rangecoder_static_buf, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(&fv->s, &fv->rangecoder_static_buf, - &buf_mapped, 0)); - - for (int i = 1; i < 256; i++) { - buf_mapped[256 + i] = fv->ctx.state_transition[i]; - buf_mapped[256 - i] = 256 - (int)fv->ctx.state_transition[i]; - } - - RET(ff_vk_unmap_buffer(&fv->s, &fv->rangecoder_static_buf, 1)); - - /* Update descriptors */ - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->setup, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, - VK_FORMAT_UNDEFINED)); - -fail: - return err; -} - -static int init_quant_table_data(AVCodecContext *avctx) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - - int16_t *buf_mapped; - size_t buf_len = MAX_QUANT_TABLES* - MAX_CONTEXT_INPUTS* - MAX_QUANT_TABLE_SIZE*sizeof(int16_t); - - RET(ff_vk_create_buf(&fv->s, &fv->quant_buf, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(&fv->s, &fv->quant_buf, (void *)&buf_mapped, 0)); - - memcpy(buf_mapped, fv->ctx.quant_tables, - sizeof(fv->ctx.quant_tables)); - - RET(ff_vk_unmap_buffer(&fv->s, &fv->quant_buf, 1)); - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 1, 0, - &fv->quant_buf, - 0, fv->quant_buf.size, - VK_FORMAT_UNDEFINED)); - -fail: - return err; -} - -static int init_crc_table_data(AVCodecContext *avctx) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - - uint32_t *buf_mapped; - size_t buf_len = 256*sizeof(int32_t); - - RET(ff_vk_create_buf(&fv->s, &fv->crc_tab_buf, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(&fv->s, &fv->crc_tab_buf, (void *)&buf_mapped, 0)); - - memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len); - - RET(ff_vk_unmap_buffer(&fv->s, &fv->crc_tab_buf, 1)); - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 2, 0, - &fv->crc_tab_buf, - 0, fv->crc_tab_buf.size, - VK_FORMAT_UNDEFINED)); - -fail: - return err; -} - static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) { int err; @@ -1703,20 +1596,50 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) spv->uninit(&spv); /* Range coder data */ - err = init_state_transition_data(avctx); + err = ff_ffv1_vk_init_state_transition_data(&fv->s, + &fv->rangecoder_static_buf, + f); if (err < 0) return err; /* Quantization table data */ - err = init_quant_table_data(avctx); + err = ff_ffv1_vk_init_quant_table_data(&fv->s, + &fv->quant_buf, + f); if (err < 0) return err; /* CRC table buffer */ - err = init_crc_table_data(avctx); + err = ff_ffv1_vk_init_crc_table_data(&fv->s, + &fv->crc_tab_buf, + f); if (err < 0) return err; + /* Update setup global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->setup, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + + /* Update encode global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 1, 0, + &fv->quant_buf, + 0, fv->quant_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 2, 0, + &fv->crc_tab_buf, + 0, fv->crc_tab_buf.size, + VK_FORMAT_UNDEFINED)); + /* Temporary frame */ fv->frame = av_frame_alloc(); if (!fv->frame) @@ -1735,7 +1658,8 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) if (!fv->buf_regions) return AVERROR(ENOMEM); - return 0; +fail: + return err; } static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp index 5b4a882367..604d03b2de 100644 --- a/libavcodec/vulkan/ffv1_common.comp +++ b/libavcodec/vulkan/ffv1_common.comp @@ -22,17 +22,18 @@ struct SliceContext { RangeCoder c; - -#ifdef GOLOMB PutBitContext pb; /* 8*8 bytes */ -#endif ivec2 slice_dim; ivec2 slice_pos; ivec2 slice_rct_coef; + u8vec4 quant_table_idx; + uint context_count; uint hdr_len; // only used for golomb - int slice_coding_mode; + + uint slice_coding_mode; + bool slice_reset_contexts; }; /* -1, { -1, 0 } */ @@ -72,3 +73,18 @@ const uint32_t log2_run[41] = { 16, 17, 18, 19, 20, 21, 22, 23, 24, }; + +uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) +{ + uint mpw = 1 << chroma_shift; + uint awidth = align(width, mpw); + + if ((version < 4) || ((version == 4) && (micro_version < 3))) + return width * sx / num_h_slices; + + sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; + if (sx == awidth) + sx = width; + + return sx; +} diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp index b861e25f74..23f09b2af6 100644 --- a/libavcodec/vulkan/ffv1_enc_setup.comp +++ b/libavcodec/vulkan/ffv1_enc_setup.comp @@ -20,21 +20,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) -{ - uint mpw = 1 << chroma_shift; - uint awidth = align(width, mpw); - - if ((version < 4) || ((version == 4) && (micro_version < 3))) - return width * sx / num_h_slices; - - sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; - if (sx == awidth) - sx = width; - - return sx; -} - void init_slice(out SliceContext sc, const uint slice_idx) { /* Set coordinates */ @@ -52,6 +37,7 @@ void init_slice(out SliceContext sc, const uint slice_idx) sc.slice_dim = ivec2(sxe - sxs, sye - sys); sc.slice_rct_coef = ivec2(1, 1); sc.slice_coding_mode = int(force_pcm == 1); + sc.slice_reset_contexts = sc.slice_coding_mode == 1; rac_init(sc.c, OFFBUF(u8buf, out_data, slice_idx * slice_size_max), @@ -105,7 +91,7 @@ void write_slice_header(inout SliceContext sc, uint64_t state) put_symbol_unsigned(sc.c, state, sar.y); if (version >= 4) { - put_rac_full(sc.c, state, sc.slice_coding_mode == 1); + put_rac_full(sc.c, state, sc.slice_reset_contexts); put_symbol_unsigned(sc.c, state, sc.slice_coding_mode); if (sc.slice_coding_mode != 1 && colorspace == 1) { put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y); diff --git a/libavcodec/vulkan/ffv1_reset.comp b/libavcodec/vulkan/ffv1_reset.comp index c7c7962850..1b87ca754e 100644 --- a/libavcodec/vulkan/ffv1_reset.comp +++ b/libavcodec/vulkan/ffv1_reset.comp @@ -24,7 +24,8 @@ void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - if (slice_ctx[slice_idx].slice_coding_mode == 0 && key_frame == 0) + if (key_frame == 0 && + slice_ctx[slice_idx].slice_reset_contexts == false) return; uint64_t slice_state_off = uint64_t(slice_state) + diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 848a056fb1..6e3b9c1238 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -21,8 +21,9 @@ */ struct RangeCoder { - u8buf bytestream_start; - u8buf bytestream; + uint64_t bytestream_start; + uint64_t bytestream; + uint64_t bytestream_end; uint low; uint16_t range; @@ -34,28 +35,29 @@ struct RangeCoder { void renorm_encoder_full(inout RangeCoder c) { int bs_cnt = 0; + u8buf bytestream = u8buf(c.bytestream); if (c.outstanding_byte == 0xFF) { c.outstanding_byte = uint8_t(c.low >> 8); } else if (c.low <= 0xFF00) { - c.bytestream[bs_cnt++].v = c.outstanding_byte; + bytestream[bs_cnt++].v = c.outstanding_byte; uint16_t cnt = c.outstanding_count; for (; cnt > 0; cnt--) - c.bytestream[bs_cnt++].v = uint8_t(0xFF); + bytestream[bs_cnt++].v = uint8_t(0xFF); c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(c.low >> 8); } else if (c.low >= 0x10000) { - c.bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1); + bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1); uint16_t cnt = c.outstanding_count; for (; cnt > 0; cnt--) - c.bytestream[bs_cnt++].v = uint8_t(0x00); + bytestream[bs_cnt++].v = uint8_t(0x00); c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8)); } else { c.outstanding_count++; } - c.bytestream = OFFBUF(u8buf, c.bytestream, bs_cnt); + c.bytestream += bs_cnt; c.range <<= 8; c.low = bitfieldInsert(0, c.low, 8, 8); } @@ -74,10 +76,10 @@ void renorm_encoder(inout RangeCoder c) return; } - u8buf bs = c.bytestream; + u8buf bs = u8buf(c.bytestream); uint8_t outstanding_byte = c.outstanding_byte; - c.bytestream = OFFBUF(u8buf, bs, oc); + c.bytestream = uint64_t(bs) + oc; c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(low >> 8); @@ -179,10 +181,11 @@ uint32_t rac_terminate(inout RangeCoder c) return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start)); } -void rac_init(out RangeCoder r, u8buf data, uint64_t buf_size) +void rac_init(out RangeCoder r, u8buf data, uint buf_size) { - r.bytestream_start = data; - r.bytestream = data; + r.bytestream_start = uint64_t(data); + r.bytestream = uint64_t(data); + r.bytestream_end = uint64_t(data) + buf_size; r.low = 0; r.range = uint16_t(0xFF00); r.outstanding_count = uint16_t(0); -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 09/12] vulkan: unify handling of BGR and simplify ffv1_rct 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne ` (6 preceding siblings ...) 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 10/12] ffv1dec: add support for hwaccels Lynne 2025-02-24 8:05 ` [FFmpeg-devel] [PATCH v2 11/12] ffv1dec: reference the current packet into the main context Lynne 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavcodec/ffv1_vulkan.h | 1 + libavcodec/ffv1enc_vulkan.c | 2 ++ libavcodec/vulkan/ffv1_enc_rct.comp | 17 ++++++------- libavutil/vulkan.c | 38 +++++++++++++++++++++++++++++ libavutil/vulkan.h | 6 +++++ 5 files changed, 54 insertions(+), 10 deletions(-) diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h index 0da6dc2d33..599afae66e 100644 --- a/libavcodec/ffv1_vulkan.h +++ b/libavcodec/ffv1_vulkan.h @@ -37,6 +37,7 @@ int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, FFVkBuffer *vkb, FFV1Context *f); typedef struct FFv1VkRCTParameters { + int fmt_lut[4]; int offset; uint8_t bits; uint8_t planar_rgb; diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 88801ca8e6..225d478fb7 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -264,6 +264,7 @@ static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec, (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1), .transparency = f->transparency, }; + ff_vk_set_perm(src_hwfc->sw_format, pd.fmt_lut); ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(pd), &pd); @@ -1157,6 +1158,7 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) GLSLD(ff_source_common_comp); GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, ivec4 fmt_lut; ); GLSLC(1, int offset; ); GLSLC(1, uint8_t bits; ); GLSLC(1, uint8_t planar_rgb; ); diff --git a/libavcodec/vulkan/ffv1_enc_rct.comp b/libavcodec/vulkan/ffv1_enc_rct.comp index a615381c90..b611f4be98 100644 --- a/libavcodec/vulkan/ffv1_enc_rct.comp +++ b/libavcodec/vulkan/ffv1_enc_rct.comp @@ -22,17 +22,14 @@ ivec4 load_components(ivec2 pos) { - if (planar_rgb == 0) - return ivec4(imageLoad(src[0], pos)); + ivec4 pix = ivec4(imageLoad(src[0], pos)); + if (planar_rgb != 0) { + for (int i = 1; i < (3 + transparency); i++) + pix[i] = int(imageLoad(src[i], pos)[0]); + } - ivec4 pix; - for (int i = 0; i < (3 + transparency); i++) - pix[i] = int(imageLoad(src[i], pos)[0]); - - /* Swizzle out the difference */ - if (bits > 8 && bits < 16 && transparency == 0) - return pix.bgra; - return pix.brga; + return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], + pix[fmt_lut[2]], pix[fmt_lut[3]]); } void bypass_sample(ivec2 pos) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 40ca737422..26d7e9e600 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1468,6 +1468,44 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) return 0; } +void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4]) +{ + switch (pix_fmt) { + case AV_PIX_FMT_BGRA: + case AV_PIX_FMT_BGR0: + case AV_PIX_FMT_BGR565: + case AV_PIX_FMT_X2BGR10: + lut[0] = 2; + lut[1] = 1; + lut[2] = 0; + lut[3] = 3; + return; + case AV_PIX_FMT_GBRAP: + case AV_PIX_FMT_GBRP: + case AV_PIX_FMT_GBRAP10: + case AV_PIX_FMT_GBRAP12: + case AV_PIX_FMT_GBRAP14: + case AV_PIX_FMT_GBRAP16: + case AV_PIX_FMT_GBRP10: + case AV_PIX_FMT_GBRP12: + case AV_PIX_FMT_GBRP14: + case AV_PIX_FMT_GBRP16: + case AV_PIX_FMT_GBRPF32: + case AV_PIX_FMT_GBRAPF32: + lut[0] = 2; + lut[1] = 0; + lut[2] = 1; + lut[3] = 3; + return; + default: + lut[0] = 0; + lut[1] = 1; + lut[2] = 2; + lut[3] = 3; + return; + } +} + const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, enum FFVkShaderRepFormat rep_fmt) { diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 327ce2b286..1be432ed5e 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -371,6 +371,12 @@ const char *ff_vk_ret2str(VkResult res); */ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); +/** + * Since storage images may not be swizzled, we have to do this in the + * shader itself. This fills in a lookup table to do it. + */ +void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4]); + /** * Get the aspect flag for a plane from an image. */ -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 10/12] ffv1dec: add support for hwaccels 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne ` (7 preceding siblings ...) 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 09/12] vulkan: unify handling of BGR and simplify ffv1_rct Lynne @ 2025-02-24 8:04 ` Lynne 2025-02-24 8:05 ` [FFmpeg-devel] [PATCH v2 11/12] ffv1dec: reference the current packet into the main context Lynne 9 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:04 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne This commit adds support for hardware accelerated decoding to the decoder. The previous commits already refactored the decoder, this commit simply adds calls to hooks to decode. --- libavcodec/ffv1.h | 1 + libavcodec/ffv1dec.c | 65 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 583696a36e..22acac35e4 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -123,6 +123,7 @@ typedef struct FFV1Context { int64_t picture_number; int key_frame; ProgressFrame picture, last_picture; + void *hwaccel_picture_private, *hwaccel_last_picture_private; uint32_t crcref; enum AVPixelFormat pix_fmt; diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 7de161f442..990fdc3711 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -41,6 +41,9 @@ #include "libavutil/refstruct.h" #include "thread.h" #include "decode.h" +#include "hwconfig.h" +#include "hwaccel_internal.h" +#include "config_components.h" static inline int get_vlc_symbol(GetBitContext *gb, VlcState *const state, int bits) @@ -636,13 +639,15 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, FFV1Context *f = avctx->priv_data; int ret; AVFrame *p; + const FFHWAccel *hwaccel = NULL; /* This is copied onto the first slice's range coder context */ RangeCoder c; ff_progress_frame_unref(&f->last_picture); + av_refstruct_unref(&f->hwaccel_last_picture_private); FFSWAP(ProgressFrame, f->picture, f->last_picture); - + FFSWAP(void *, f->hwaccel_picture_private, f->hwaccel_last_picture_private); f->avctx = avctx; f->frame_damaged = 0; @@ -651,11 +656,18 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, if (ret < 0) return ret; + if (avctx->hwaccel) + hwaccel = ffhwaccel(avctx->hwaccel); + ret = ff_progress_frame_get_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF); if (ret < 0) return ret; + ret = ff_hwaccel_frame_priv_alloc(avctx, &f->hwaccel_picture_private); + if (ret < 0) + return ret; + p = f->picture.f; p->pict_type = AV_PICTURE_TYPE_I; //FIXME I vs. P @@ -672,15 +684,53 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n", f->version, !!(p->flags & AV_FRAME_FLAG_KEY), f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample); + /* Start */ + if (hwaccel) { + ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size); + if (ret < 0) + return ret; + } + ff_thread_finish_setup(avctx); - ret = decode_slices(avctx, c, avpkt); - if (ret < 0) - return ret; + /* Decode slices */ + if (hwaccel) { + uint8_t *buf_end = avpkt->data + avpkt->size; + + if (!(p->flags & AV_FRAME_FLAG_KEY) && f->last_picture.f) + ff_progress_frame_await(&f->last_picture, f->slice_count - 1); + + for (int i = f->slice_count - 1; i >= 0; i--) { + uint8_t *pos; + uint32_t len; + ret = find_next_slice(avctx, avpkt->data, buf_end, i, + &pos, &len); + if (ret < 0) + return ret; + + buf_end -= len; + + ret = hwaccel->decode_slice(avctx, pos, len); + if (ret < 0) + return ret; + } + } else { + ret = decode_slices(avctx, c, avpkt); + if (ret < 0) + return ret; + } + + /* Finalize */ + if (hwaccel) { + ret = hwaccel->end_frame(avctx); + if (ret < 0) + return ret; + } ff_progress_frame_report(&f->picture, INT_MAX); ff_progress_frame_unref(&f->last_picture); + av_refstruct_unref(&f->hwaccel_last_picture_private); if ((ret = av_frame_ref(rframe, f->picture.f)) < 0) return ret; @@ -754,8 +804,10 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx) FFV1Context *const s = avctx->priv_data; ff_progress_frame_unref(&s->picture); + av_refstruct_unref(&s->hwaccel_picture_private); + ff_progress_frame_unref(&s->last_picture); - av_freep(&avctx->stats_out); + av_refstruct_unref(&s->hwaccel_last_picture_private); ff_ffv1_close(s); @@ -776,4 +828,7 @@ const FFCodec ff_ffv1_decoder = { AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS, .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_USES_PROGRESSFRAMES, + .hw_configs = (const AVCodecHWConfigInternal *const []) { + NULL + }, }; -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 11/12] ffv1dec: reference the current packet into the main context 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne ` (8 preceding siblings ...) 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 10/12] ffv1dec: add support for hwaccels Lynne @ 2025-02-24 8:05 ` Lynne 2025-02-24 8:05 ` [FFmpeg-devel] [PATCH v2 12/12] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation Lynne 9 siblings, 1 reply; 12+ messages in thread From: Lynne @ 2025-02-24 8:05 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne --- libavcodec/ffv1.h | 3 +++ libavcodec/ffv1dec.c | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 22acac35e4..9af17326b3 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -169,6 +169,9 @@ typedef struct FFV1Context { * NOT shared between frame threads. */ uint8_t frame_damaged; + + /* Reference to the current packet */ + AVPacket *pkt_ref; } FFV1Context; int ff_ffv1_common_init(AVCodecContext *avctx, FFV1Context *s); diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 990fdc3711..5ab41da1b7 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -455,6 +455,10 @@ static av_cold int decode_init(AVCodecContext *avctx) FFV1Context *f = avctx->priv_data; int ret; + f->pkt_ref = av_packet_alloc(); + if (!f->pkt_ref) + return AVERROR(ENOMEM); + if ((ret = ff_ffv1_common_init(avctx, f)) < 0) return ret; @@ -686,6 +690,10 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, /* Start */ if (hwaccel) { + ret = av_packet_ref(f->pkt_ref, avpkt); + if (ret < 0) + return ret; + ret = hwaccel->start_frame(avctx, avpkt->data, avpkt->size); if (ret < 0) return ret; @@ -705,15 +713,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, uint32_t len; ret = find_next_slice(avctx, avpkt->data, buf_end, i, &pos, &len); - if (ret < 0) + if (ret < 0) { + av_packet_unref(f->pkt_ref); return ret; + } buf_end -= len; ret = hwaccel->decode_slice(avctx, pos, len); - if (ret < 0) + if (ret < 0) { + av_packet_unref(f->pkt_ref); return ret; + } } + + av_packet_unref(f->pkt_ref); } else { ret = decode_slices(avctx, c, avpkt); if (ret < 0) @@ -809,6 +823,7 @@ static av_cold int ffv1_decode_close(AVCodecContext *avctx) ff_progress_frame_unref(&s->last_picture); av_refstruct_unref(&s->hwaccel_last_picture_private); + av_packet_free(&s->pkt_ref); ff_ffv1_close(s); return 0; -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH v2 12/12] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation 2025-02-24 8:05 ` [FFmpeg-devel] [PATCH v2 11/12] ffv1dec: reference the current packet into the main context Lynne @ 2025-02-24 8:05 ` Lynne 0 siblings, 0 replies; 12+ messages in thread From: Lynne @ 2025-02-24 8:05 UTC (permalink / raw) To: ffmpeg-devel; +Cc: Lynne This commit adds a Vulkan-based accelerated decoding of FFv1. Currently, arithmetic coding, version 3 and 4 are supported, without RGB. --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/ffv1.h | 1 + libavcodec/ffv1dec.c | 20 +- libavcodec/hwaccels.h | 1 + libavcodec/vulkan/Makefile | 5 + libavcodec/vulkan/ffv1_dec.comp | 168 +++++ libavcodec/vulkan/ffv1_dec_setup.comp | 113 +++ libavcodec/vulkan/rangecoder.comp | 74 ++ libavcodec/vulkan_decode.c | 6 + libavcodec/vulkan_ffv1.c | 985 ++++++++++++++++++++++++++ 11 files changed, 1373 insertions(+), 3 deletions(-) create mode 100644 libavcodec/vulkan/ffv1_dec.comp create mode 100644 libavcodec/vulkan/ffv1_dec_setup.comp create mode 100644 libavcodec/vulkan_ffv1.c diff --git a/configure b/configure index f76f946dfe..fc007f3ef0 100755 --- a/configure +++ b/configure @@ -3193,6 +3193,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox" av1_videotoolbox_hwaccel_select="av1_decoder" av1_vulkan_hwaccel_deps="vulkan" av1_vulkan_hwaccel_select="av1_decoder" +ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler" +ffv1_vulkan_hwaccel_select="ffv1_decoder" h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" h263_videotoolbox_hwaccel_deps="videotoolbox" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 0e96b33ef3..22bebd3096 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1017,6 +1017,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL) += videotoolbox_av1.o OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 9af17326b3..5d0514f923 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -126,6 +126,7 @@ typedef struct FFV1Context { void *hwaccel_picture_private, *hwaccel_last_picture_private; uint32_t crcref; enum AVPixelFormat pix_fmt; + enum AVPixelFormat configured_pix_fmt; const AVFrame *cur_enc_frame; int plane_count; diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 5ab41da1b7..7940d86b25 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -341,6 +341,9 @@ static int decode_slice(AVCodecContext *c, void *arg) static enum AVPixelFormat get_pixel_format(FFV1Context *f) { enum AVPixelFormat pix_fmts[] = { +#if CONFIG_FFV1_VULKAN_HWACCEL + AV_PIX_FMT_VULKAN, +#endif f->pix_fmt, AV_PIX_FMT_NONE, }; @@ -360,9 +363,12 @@ static int read_header(FFV1Context *f, RangeCoder *c) if (ret < 0) return ret; - f->avctx->pix_fmt = get_pixel_format(f); - if (f->avctx->pix_fmt < 0) - return AVERROR(EINVAL); + if (f->configured_pix_fmt != f->pix_fmt) { + f->avctx->pix_fmt = get_pixel_format(f); + if (f->avctx->pix_fmt < 0) + return AVERROR(EINVAL); + f->configured_pix_fmt = f->pix_fmt; + } ff_dlog(f->avctx, "%d %d %d\n", f->chroma_h_shift, f->chroma_v_shift, f->pix_fmt); @@ -455,6 +461,9 @@ static av_cold int decode_init(AVCodecContext *avctx) FFV1Context *f = avctx->priv_data; int ret; + f->pix_fmt = AV_PIX_FMT_NONE; + f->configured_pix_fmt = AV_PIX_FMT_NONE; + f->pkt_ref = av_packet_alloc(); if (!f->pkt_ref) return AVERROR(ENOMEM); @@ -744,6 +753,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe, ff_progress_frame_report(&f->picture, INT_MAX); ff_progress_frame_unref(&f->last_picture); + av_refstruct_unref(&f->hwaccel_picture_private); // TODO: fixme av_refstruct_unref(&f->hwaccel_last_picture_private); if ((ret = av_frame_ref(rframe, f->picture.f)) < 0) return ret; @@ -773,6 +783,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src) fdst->ac = fsrc->ac; fdst->colorspace = fsrc->colorspace; fdst->pix_fmt = fsrc->pix_fmt; + fdst->configured_pix_fmt = fsrc->configured_pix_fmt; fdst->ec = fsrc->ec; fdst->intra = fsrc->intra; @@ -844,6 +855,9 @@ const FFCodec ff_ffv1_decoder = { .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_USES_PROGRESSFRAMES, .hw_configs = (const AVCodecHWConfigInternal *const []) { +#if CONFIG_FFV1_VULKAN_HWACCEL + HWACCEL_VULKAN(ffv1), +#endif NULL }, }; diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 910a024032..0b2c725247 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel; extern const struct FFHWAccel ff_av1_vdpau_hwaccel; extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel; extern const struct FFHWAccel ff_av1_vulkan_hwaccel; +extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel; extern const struct FFHWAccel ff_h263_vaapi_hwaccel; extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel; extern const struct FFHWAccel ff_h264_d3d11va_hwaccel; diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 351332ee44..feb5d2ea51 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -11,6 +11,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \ vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \ vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ + vulkan/rangecoder.o vulkan/ffv1_vlc.o \ + vulkan/ffv1_common.o vulkan/ffv1_reset.o \ + vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o + VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp)) .SECONDARY: $(VULKAN:.comp=.c) libavcodec/vulkan/%.c: TAG = VULKAN diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp new file mode 100644 index 0000000000..3d7fdb3e38 --- /dev/null +++ b/libavcodec/vulkan/ffv1_dec.comp @@ -0,0 +1,168 @@ +/* + * FFv1 codec + * + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +ivec2 get_pred(ivec2 pos, ivec2 off, int p, int comp, int sw, + uint8_t context_model) +{ + const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); + const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); + + TYPE top2 = TYPE(0); + if (off.y > 1) + top2 = TYPE(imageLoad(dst[p], pos + ivec2(0, -2))[comp]); + + VTYPE3 top = VTYPE3(TYPE(0), + TYPE(0), + TYPE(0)); + if (off.y > 0 && off != ivec2(0, 1)) + top[0] = TYPE(imageLoad(dst[p], pos + ivec2(-1, -1) + yoff_border1)[comp]); + if (off.y > 0) { + top[1] = TYPE(imageLoad(dst[p], pos + ivec2(0, -1))[comp]); + top[2] = TYPE(imageLoad(dst[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]); + } + + VTYPE2 cur = VTYPE2(TYPE(0), + TYPE(0)); + if (off.x > 0 && off != ivec2(1, 0)) + cur[0] = TYPE(imageLoad(dst[p], pos + ivec2(-2, 0) + yoff_border2)[comp]); + if (off != ivec2(0, 0)) + cur[1] = TYPE(imageLoad(dst[p], pos + ivec2(-1, 0) + yoff_border1)[comp]); + + /* context, prediction */ + return ivec2(get_context(cur, top, top2, context_model), + predict(cur[1], VTYPE2(top))); +} + +#ifndef GOLOMB +int get_isymbol(inout RangeCoder c, uint64_t state) +{ + if (get_rac(c, state)) + return 0; + + state += 1; + + int e = 0; + while (get_rac(c, state + min(e, 9))) { // 1..10 + e++; + if (e > 31) { + corrupt = true; + return 0; + } + } + + state += 21; + + int a = 1; + for (int i = e - 1; i >= 0; i--) + a += a + int(get_rac(c, state + min(i, 9))); // 22..31 + + e = -int(get_rac(c, state - 11 + min(e, 10))); // 11..21 sign + return (a ^ e) - e; +} + +void decode_line_pcm(inout SliceContext sc, int y, int p, int comp, + int bits) +{ + ivec2 sp = sc.slice_pos; + int w = sc.slice_dim.x; + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } + + for (int x = 0; x < w; x++) { + uint v = 0; + for (int i = (bits - 1); i >= 0; i--) + v |= uint(get_rac_equi(sc.c)) << i; + imageStore(dst[p], (sp + ivec2(x, y)), uvec4(v)); + } +} + +void decode_line(inout SliceContext sc, uint64_t state, + int y, int p, int comp, int bits, const int run_index) +{ + ivec2 sp = sc.slice_pos; + + int w = sc.slice_dim.x; + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } + + for (int x = 0; x < w; x++) { + ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w, + sc.quant_table_idx[p]); + + int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*abs(pr[0])); + + if (pr[0] < 0) + diff = -diff; + + int pix = zero_extend(pr[1] + diff, bits); + imageStore(dst[p], (sp + ivec2(x, y)), uvec4(pix)); + } +} +#endif + +void decode_slice(inout SliceContext sc, const uint slice_idx) +{ + int bits = bits_per_raw_sample; + +#ifndef GOLOMB + if (sc.slice_coding_mode == 1) { + for (int p = 0; p < planes; p++) { + + int h = sc.slice_dim.y; + if (p > 0 && p < 3) + h >>= chroma_shift.y; + + for (int y = 0; y < h; y++) + decode_line_pcm(sc, y, p, 0, bits); + } + } else +#endif + { + uint64_t slice_state_off = uint64_t(slice_state) + + slice_idx*plane_state_size*codec_planes; + + for (int p = 0; p < planes; p++) { + int run_index = 0; + + int h = sc.slice_dim.y; + if (p > 0 && p < 3) + h >>= chroma_shift.y; + + for (int y = 0; y < h; y++) + decode_line(sc, slice_state_off, y, p, 0, bits, run_index); + + /* For the second chroma plane, reuse the first plane's state */ + if (p != 1) + slice_state_off += plane_state_size; + } + } +} + +void main(void) +{ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + decode_slice(slice_ctx[slice_idx], slice_idx); +} diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp new file mode 100644 index 0000000000..11c367efee --- /dev/null +++ b/libavcodec/vulkan/ffv1_dec_setup.comp @@ -0,0 +1,113 @@ +/* + * FFv1 codec + * + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +uint get_usymbol(inout RangeCoder c, uint64_t state) +{ + if (get_rac(c, state + 0)) + return 0; + + int e = 0; + while (get_rac(c, state + 1 + min(e, 9))) { // 1..10 + e++; + if (e > 31) { + corrupt = true; + return 0; + } + } + + uint a = 1; + for (int i = e - 1; i >= 0; i--) + a += a + uint(get_rac(c, state + 22 + min(i, 9))); // 22..31 + + return a; +} + +bool decode_slice_header(inout SliceContext sc, uint64_t state) +{ + u8buf sb = u8buf(state); + + [[unroll]] + for (int i = 0; i < CONTEXT_SIZE; i++) + sb[i].v = uint8_t(128); + + uint sx = get_usymbol(sc.c, state); + uint sy = get_usymbol(sc.c, state); + uint sw = get_usymbol(sc.c, state) + 1; + uint sh = get_usymbol(sc.c, state) + 1; + + if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 || + sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) || + corrupt) { + return true; + } + + /* Set coordinates */ + uint sxs = slice_coord(img_size.x, sx , gl_NumWorkGroups.x, chroma_shift.x); + uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x); + uint sys = slice_coord(img_size.y, sy , gl_NumWorkGroups.y, chroma_shift.y); + uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y); + + sc.slice_pos = ivec2(sxs, sys); + sc.slice_dim = ivec2(sxe - sxs, sye - sys); + sc.slice_rct_coef = ivec2(1, 1); + sc.slice_coding_mode = int(0); + + for (uint i = 0; i < codec_planes; i++) { + uint idx = get_usymbol(sc.c, state); + if (idx >= quant_table_count) + return true; + sc.quant_table_idx[i] = uint8_t(idx); + sc.context_count = context_count[idx]; + } + + get_usymbol(sc.c, state); + get_usymbol(sc.c, state); + get_usymbol(sc.c, state); + + if (version >= 4) { + sc.slice_reset_contexts = get_rac(sc.c, state); + sc.slice_coding_mode = get_usymbol(sc.c, state); + if (sc.slice_coding_mode != 1 && colorspace == 1) { + sc.slice_rct_coef.x = int(get_usymbol(sc.c, state)); + sc.slice_rct_coef.y = int(get_usymbol(sc.c, state)); + if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4) + return true; + } + } + + return false; +} + +void main(void) +{ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE; + + rac_init_dec(slice_ctx[slice_idx].c, + u8buf(slice_data + slice_offsets[slice_idx]), + slice_offsets[slice_idx + 1] - slice_offsets[slice_idx]); + + if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1)) + get_rac_equi(slice_ctx[slice_idx].c); + + decode_slice_header(slice_ctx[slice_idx], scratch_state); +} diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 6e3b9c1238..8c8d0d9d9c 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -191,3 +191,77 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size) r.outstanding_count = uint16_t(0); r.outstanding_byte = uint8_t(0xFF); } + +/* Decoder */ +uint overread; +bool corrupt; + +void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size) +{ + overread = 0; + corrupt = false; + + /* Skip priming bytes */ + rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2); + + u8vec2 prime = u8vec2buf(data).v; + /* Switch endianess of the priming bytes */ + r.low = pack16(prime.yx); + + if (r.low >= 0xFF00) { + r.low = 0xFF00; + r.bytestream_end = uint64_t(data) + 2; + } +} + +void refill(inout RangeCoder c) +{ + c.range <<= 8; + c.low <<= 8; + if (c.bytestream < c.bytestream_end) { + c.low += u8buf(c.bytestream).v; + c.bytestream++; + } else { + overread++; + } +} + +bool get_rac(inout RangeCoder c, uint64_t state) +{ + u8buf sb = u8buf(state); + uint val = uint(sb.v); + uint16_t range1 = uint16_t((uint(c.range) * val) >> 8); + + c.range -= range1; + + bool bit = c.low >= c.range; + sb.v = zero_one_state[(uint(bit) << 8) + val]; + + if (bit) { + c.low -= c.range; + c.range = range1; + } + + if (c.range < 0x100) + refill(c); + + return bit; +} + +bool get_rac_equi(inout RangeCoder c) +{ + uint16_t range1 = c.range >> 1; + + c.range -= range1; + + bool bit = c.low >= c.range; + if (bit) { + c.low -= c.range; + c.range = range1; + } + + if (c.range < 0x100) + refill(c); + + return bit; +} diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index 9eaafa6495..b910834566 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -36,6 +36,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc; #if CONFIG_AV1_VULKAN_HWACCEL extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc; #endif +#if CONFIG_FFV1_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc; +#endif static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_H264_VULKAN_HWACCEL @@ -47,6 +50,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_AV1_VULKAN_HWACCEL &ff_vk_dec_av1_desc, #endif +#if CONFIG_FFV1_VULKAN_HWACCEL + &ff_vk_dec_ffv1_desc, +#endif }; static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id) diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c new file mode 100644 index 0000000000..6356f3163c --- /dev/null +++ b/libavcodec/vulkan_ffv1.c @@ -0,0 +1,985 @@ +/* + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_decode.h" +#include "hwaccel_internal.h" + +#include "ffv1.h" +#include "ffv1_vulkan.h" +#include "libavutil/vulkan_spirv.h" +#include "libavutil/mem.h" + +extern const char *ff_source_common_comp; +extern const char *ff_source_rangecoder_comp; +extern const char *ff_source_ffv1_vlc_comp; +extern const char *ff_source_ffv1_common_comp; +extern const char *ff_source_ffv1_dec_setup_comp; +extern const char *ff_source_ffv1_reset_comp; +extern const char *ff_source_ffv1_dec_comp; +extern const char *ff_source_ffv1_dec_rct_comp; + +const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { + .codec_id = AV_CODEC_ID_FFV1, + .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR, + .queue_flags = VK_QUEUE_COMPUTE_BIT, +}; + +typedef struct FFv1VulkanDecodePicture { + FFVulkanDecodePicture vp; + + VkImageView img_view_rct; + AVFrame *rct; + + AVBufferRef *tmp_data; + + AVBufferRef *slice_state; + uint32_t plane_state_size; + uint32_t slice_state_size; + uint32_t slice_data_size; + uint32_t max_context_count; + + AVBufferRef *slice_offset_buf; + uint32_t *slice_offset; + int slice_num; +} FFv1VulkanDecodePicture; + +typedef struct FFv1VulkanDecodeContext { + AVBufferRef *intermediate_frames_ref; + + FFVulkanShader setup; + FFVulkanShader reset[2]; /* AC/Golomb */ + FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */ + FFVulkanShader rct[2]; /* 16/32 bit */ + + FFVkBuffer rangecoder_static_buf; + FFVkBuffer quant_buf; + FFVkBuffer crc_tab_buf; + + AVBufferPool *slice_state_pool; + AVBufferPool *tmp_data_pool; + AVBufferPool *slice_offset_pool; +} FFv1VulkanDecodeContext; + +typedef struct FFv1VkResetParameters { + VkDeviceAddress slice_state; + uint32_t plane_state_size; + uint32_t context_count; + uint8_t codec_planes; + uint8_t key_frame; + uint8_t version; + uint8_t micro_version; + uint8_t padding[1]; +} FFv1VkResetParameters; + +typedef struct FFv1VkParameters { + uint32_t context_count[MAX_QUANT_TABLES]; + + VkDeviceAddress slice_data; + VkDeviceAddress slice_state; + VkDeviceAddress scratch_data; + + uint32_t img_size[2]; + uint32_t chroma_shift[2]; + + uint32_t plane_state_size; + uint32_t crcref; + + uint8_t bits_per_raw_sample; + uint8_t quant_table_count; + uint8_t version; + uint8_t micro_version; + uint8_t key_frame; + uint8_t planes; + uint8_t codec_planes; + uint8_t transparency; + uint8_t colorspace; + uint8_t ec; + uint8_t padding[2]; +} FFv1VkParameters; + +static void add_push_data(FFVulkanShader *shd) +{ + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); + GLSLC(0, ); + GLSLC(1, u8buf slice_data; ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, u8buf scratch_data; ); + GLSLC(0, ); + GLSLC(1, uvec2 img_size; ); + GLSLC(1, uvec2 chroma_shift; ); + GLSLC(0, ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint32_t crcref; ); + GLSLC(0, ); + GLSLC(1, uint8_t bits_per_raw_sample; ); + GLSLC(1, uint8_t quant_table_count; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t planes; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t colorspace; ); + GLSLC(1, uint8_t ec; ); + GLSLC(1, uint8_t padding[2]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); +} + +static int vk_ffv1_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + FFV1Context *f = avctx->priv_data; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + FFVkBuffer *slice_offset; + + fp->slice_num = 0; + + for (int i = 0; i < f->quant_table_count; i++) + fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count); + + /* Allocate slice buffer data */ + if (f->ac == AC_GOLOMB_RICE) + fp->plane_state_size = 8; + else + fp->plane_state_size = CONTEXT_SIZE; + + fp->plane_state_size *= fp->max_context_count; + fp->slice_state_size = fp->plane_state_size*f->plane_count; + + fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */ + fp->slice_state_size += fp->slice_data_size; + fp->slice_state_size = FFALIGN(fp->slice_state_size, 8); + +#if 0 + /* Host map the input slices data */ + err = ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, f->pkt_ref->buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); + if (err < 0) + return err; +#endif + + /* Allocate slice state data */ + if (f->picture.f->flags & AV_FRAME_FLAG_KEY) { + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool, + &fp->slice_state, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, f->max_slice_count*fp->slice_state_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + } else { + FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; + fp->slice_state = av_buffer_ref(fpl->slice_state); + if (!fp->slice_state) + return AVERROR(ENOMEM); + } + + /* Allocate temporary data buffer */ + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool, + &fp->tmp_data, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, f->max_slice_count*CONTEXT_SIZE, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + + /* Allocate slice offsets buffer */ + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool, + &fp->slice_offset_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, (f->max_slice_count + 1)*sizeof(uint32_t), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) + return err; + + /* First offset is always 0 */ + slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + AV_WN32(slice_offset->mapped_mem, 0); + + /* Prepare frame to be used */ + err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1, + FF_VK_REP_NATIVE, 0); + if (err < 0) + return err; + + return 0; +} + +static int vk_ffv1_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + FFV1Context *f = avctx->priv_data; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + +#if 1 + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + &fp->slice_num, + (const uint32_t **)&fp->slice_offset); + if (err < 0) + return err; + + AV_WN32(slice_offset->mapped_mem + fp->slice_num*sizeof(uint32_t), + fp->slice_offset[fp->slice_num - 1] + size); +#else + FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; + + fp->slice_num++; + AV_WN32(slice_offset->mapped_mem + fp->slice_num*sizeof(uint32_t), + data - slices_buf->mapped_mem + size); +#endif + + return 0; +} + +static int vk_ffv1_end_frame(AVCodecContext *avctx) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + FFV1Context *f = avctx->priv_data; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + FFv1VkParameters pd; + FFv1VkResetParameters pd_reset; + + int is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) && + !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8); + + FFVulkanShader *reset_shader; + FFVulkanShader *decode_shader; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + + FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; + FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data; + FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + + FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + + /* Prepare deps */ + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1)); + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); + vp->slices_buf = NULL; + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0)); + fp->slice_offset_buf = NULL; + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0)); + fp->tmp_data = NULL; + + /* Input frame barrier */ + ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Entry barrier */ + if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) { + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + } + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + nb_img_bar = 0; + if (nb_buf_bar) { + slice_state->stage = buf_bar[1].dstStageMask; + slice_state->access = buf_bar[1].dstAccessMask; + nb_buf_bar = 0; + } + + /* Setup shader */ + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 1, 0, + slice_offset, + 0, (f->slice_count + 1)*sizeof(uint32_t), + VK_FORMAT_UNDEFINED); + + ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); + pd = (FFv1VkParameters) { + /* context_count */ + + .slice_data = slices_buf->address, + .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, + .scratch_data = tmp_data->address, + + .img_size[0] = f->picture.f->width, + .img_size[1] = f->picture.f->height, + .chroma_shift[0] = f->chroma_h_shift, + .chroma_shift[1] = f->chroma_v_shift, + + .plane_state_size = fp->plane_state_size, + .crcref = f->crcref, + + .bits_per_raw_sample = avctx->bits_per_raw_sample, + .quant_table_count = f->quant_table_count, + .version = f->version, + .micro_version = f->micro_version, + .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, + .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt), + .codec_planes = f->plane_count, + .transparency = f->transparency, + .colorspace = f->colorspace, + .ec = f->ec, + }; + for (int i = 0; i < MAX_QUANT_TABLES; i++) + pd.context_count[i] = f->context_count[i]; + + ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); + + /* Reset shader */ + reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE]; + ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + + ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader); + + pd_reset = (FFv1VkResetParameters) { + .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, + .plane_state_size = fp->plane_state_size, + .context_count = fp->max_context_count, + .codec_planes = f->plane_count, + .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, + .version = f->version, + .micro_version = f->micro_version, + }; + ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd_reset), &pd_reset); + + /* Sync between setup and reset shaders */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = 0, + .size = fp->slice_data_size*f->slice_count, + }; + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, + f->plane_count); + + /* Decode */ + decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb]; + ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, + f->picture.f, vp->view.out, + 1, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader); + ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + /* Sync between reset and decode shaders */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = fp->slice_data_size*f->slice_count, + .size = slice_state->size - fp->slice_data_size*f->slice_count, + }; + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); + + err = ff_vk_exec_submit(&ctx->s, exec); + if (err < 0) + return err; + +fail: + return 0; +} + +static void define_shared_code(FFVulkanShader *shd, int use32bit) +{ + int smp_bits = use32bit ? 32 : 16; + + av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); + + GLSLF(0, #define TYPE int%i_t ,smp_bits); + GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); + GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); + GLSLD(ff_source_rangecoder_comp); + GLSLD(ff_source_ffv1_common_comp); +} + +static int init_setup_shader(FFV1Context *f, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 1, 1, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + { + .name = "crc_ieee_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint32_t crc_ieee[256];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); + + define_shared_code(shd, 0 /* Irrelevant */); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + { + .name = "slice_offsets_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "readonly", + .buf_content = "uint32_t slice_offsets", + .buf_elems = f->max_slice_count + 1, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); + + GLSLD(ff_source_ffv1_dec_setup_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_reset_shader(FFV1Context *f, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd, int ac) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024); + + RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + wg_dim, 1, 1, + 0)); + + if (ac == AC_GOLOMB_RICE) { + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); + av_bprintf(&shd->src, "#define GOLOMB\n"); + } + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint context_count; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); + + define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */); + if (ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0)); + + GLSLD(ff_source_ffv1_reset_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd, AVHWFramesContext *frames_ctx, + int use32bit, int ac, int rgb) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(s, shd, "ffv1_dec", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 1, 1, 1, + 0)); + + if (ac == AC_GOLOMB_RICE) { + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); + av_bprintf(&shd->src, "#define GOLOMB\n"); + } + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + { + .name = "crc_ieee_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint32_t crc_ieee[256];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); + + define_shared_code(shd, use32bit); + if (ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .elems = av_pix_fmt_count_planes(frames_ctx->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); + + GLSLD(ff_source_ffv1_dec_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int vk_decode_ffv1_init(AVCodecContext *avctx) +{ + int err; + FFV1Context *f = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = NULL; + FFv1VulkanDecodeContext *fv; + FFVkSPIRVCompiler *spv; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + err = ff_vk_decode_init(avctx); + if (err < 0) + return err; + ctx = dec->shared_ctx; + + fv = ctx->sd_ctx = av_mallocz(sizeof(*fv)); + if (!fv) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Setup shader */ + err = init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup); + if (err < 0) + return err; + + /* Reset shaders */ + for (int i = 0; i < 2; i++) { /* AC/Golomb */ + err = init_reset_shader(f, &ctx->s, &ctx->exec_pool, + spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0); + if (err < 0) + return err; + } + + /* Decode shaders */ + for (int i = 0; i < 2; i++) { /* 16/32 bit */ + for (int j = 0; j < 1; j++) { /* AC/Golomb */ + for (int k = 0; k < 1; k++) { /* Normal/RGB */ + AVHWFramesContext *frames_ctx; + frames_ctx = k ?(AVHWFramesContext *)fv->intermediate_frames_ref->data : + (AVHWFramesContext *)avctx->hw_frames_ctx->data; + + err = init_decode_shader(f, &ctx->s, &ctx->exec_pool, + spv, &fv->decode[i][j][k], + frames_ctx, + i, + !j ? AC_RANGE_CUSTOM_TAB : 0, + k); + if (err < 0) + return err; + } + } + } + + /* Range coder data */ + err = ff_ffv1_vk_init_state_transition_data(&ctx->s, + &fv->rangecoder_static_buf, + f); + if (err < 0) + return err; + + /* Quantization table data */ + err = ff_ffv1_vk_init_quant_table_data(&ctx->s, + &fv->quant_buf, + f); + if (err < 0) + return err; + + /* CRC table buffer */ + err = ff_ffv1_vk_init_crc_table_data(&ctx->s, + &fv->crc_tab_buf, + f); + if (err < 0) + return err; + + /* Update setup global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->setup, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + + /* Update decode global descriptors */ + for (int i = 0; i < 2; i++) { /* 16/32 bit */ + for (int j = 0; j < 1; j++) { /* AC/Golomb */ + for (int k = 0; k < 1; k++) { /* Normal/RGB */ + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 1, 0, + &fv->quant_buf, + 0, fv->quant_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 2, 0, + &fv->crc_tab_buf, + 0, fv->crc_tab_buf.size, + VK_FORMAT_UNDEFINED)); + } + } + } + +fail: + return err; +} + +static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *hwctx = _hwctx.nc; + + FFv1VulkanDecodePicture *fp = data; + FFVulkanDecodePicture *vp = &fp->vp; + + ff_vk_decode_free_frame(hwctx, vp); + + av_buffer_unref(&vp->slices_buf); + av_buffer_unref(&fp->slice_state); + av_buffer_unref(&fp->slice_offset_buf); + av_buffer_unref(&fp->tmp_data); + +// FFVulkanFunctions *vk = &ctx->s.vkfn; +// vk->DestroyImageView(hwctx->act_dev, fp->img_view_rct, hwctx->alloc); + + av_frame_free(&fp->rct); +} + +static int vk_decode_ffv1_uninit(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + + ff_vk_decode_uninit(avctx); + + ff_vk_shader_free(&ctx->s, &fv->setup); + + for (int i = 0; i < 2; i++) /* AC/Golomb */ + ff_vk_shader_free(&ctx->s, &fv->reset[i]); + + for (int i = 0; i < 2; i++) /* 16/32 bit */ + for (int j = 0; j < 1; j++) /* AC/Golomb */ + for (int k = 0; k < 1; k++) /* Normal/RGB */ + ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]); + + for (int i = 0; i < 2; i++) /* 16/32 bit */ + ff_vk_shader_free(&ctx->s, &fv->rct[i]); + + av_buffer_pool_uninit(&fv->tmp_data_pool); + av_buffer_pool_uninit(&fv->slice_state_pool); + av_buffer_pool_uninit(&fv->slice_offset_pool); + + ff_vk_free_buf(&ctx->s, &fv->quant_buf); + ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf); + ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf); + + return 0; +} + +const FFHWAccel ff_ffv1_vulkan_hwaccel = { + .p.name = "ffv1_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_FFV1, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_ffv1_start_frame, + .decode_slice = &vk_ffv1_decode_slice, + .end_frame = &vk_ffv1_end_frame, + .free_frame_priv = &vk_ffv1_free_frame_priv, + .frame_priv_data_size = sizeof(FFv1VulkanDecodePicture), + .init = &vk_decode_ffv1_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &vk_decode_ffv1_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE /* | HWACCEL_CAP_THREAD_SAFE */, +}; -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2025-02-24 8:07 UTC | newest] Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2025-02-24 8:04 [FFmpeg-devel] [PATCH v2 01/12] ffv1enc_vulkan: disable autodetection of async_depth Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 02/12] vulkan: add ff_vk_create_imageview Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 03/12] vulkan: copy host-mapping buffer code from hwcontext Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 04/12] vulkan_decode: support software-defined decoders Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 05/12] vulkan_decode: support multiple image views Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 06/12] hwcontext_vulkan: enable read/write without storage Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 07/12] vulkan: workaround BGR storage image undefined behaviour Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 09/12] vulkan: unify handling of BGR and simplify ffv1_rct Lynne 2025-02-24 8:04 ` [FFmpeg-devel] [PATCH v2 10/12] ffv1dec: add support for hwaccels Lynne 2025-02-24 8:05 ` [FFmpeg-devel] [PATCH v2 11/12] ffv1dec: reference the current packet into the main context Lynne 2025-02-24 8:05 ` [FFmpeg-devel] [PATCH v2 12/12] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation Lynne
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git