From: Lynne <dev@lynne.ee> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH 2/3] hwcontext_vulkan: add queue and frame locking functions Date: Sun, 3 Apr 2022 16:52:24 +0200 (CEST) Message-ID: <MzjuQ-Z--3-2@lynne.ee> (raw) In-Reply-To: <MzjuD2p--3-2@lynne.ee-MzjuHsU----2> [-- Attachment #1: Type: text/plain, Size: 242 bytes --] This allows for multiple threads to access the same frame. This is unfortunately necessary, as in Vulkan, queues are considered to be up to the user to synchronize, while frames often have their layout changed upon reading. Patch attached. [-- Attachment #2: 0002-hwcontext_vulkan-add-queue-and-frame-locking-functio.patch --] [-- Type: text/x-patch, Size: 15362 bytes --] From d8bd429859f9dc90325dbd0a7355b21ad5a80b6f Mon Sep 17 00:00:00 2001 From: Lynne <dev@lynne.ee> Date: Sun, 3 Apr 2022 16:44:58 +0200 Subject: [PATCH 2/3] hwcontext_vulkan: add queue and frame locking functions This allows for multiple threads to access the same frame. This is unfortunately necessary, as in Vulkan, queues are considered to be up to the user to synchronize, while frames often have their layout changed upon reading. --- libavutil/hwcontext_vulkan.c | 180 ++++++++++++++++++++++++++--------- libavutil/hwcontext_vulkan.h | 28 ++++++ 2 files changed, 164 insertions(+), 44 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 1176858545..5bd0cab7ef 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -27,6 +27,7 @@ #include <dlfcn.h> #endif +#include <pthread.h> #include <unistd.h> #include "config.h" @@ -93,6 +94,8 @@ typedef struct VulkanDevicePriv { /* Queues */ uint32_t qfs[5]; int num_qfs; + int *num_queues_in_qf; + pthread_mutex_t **qf_mutex; /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; @@ -126,6 +129,8 @@ typedef struct VulkanFramesPriv { } VulkanFramesPriv; typedef struct AVVkFrameInternal { + pthread_mutex_t lock; + #if CONFIG_CUDA /* Importing external memory into cuda is really expensive so we keep the * memory imported all the time */ @@ -1307,6 +1312,11 @@ static void vulkan_device_free(AVHWDeviceContext *ctx) if (p->libvulkan) dlclose(p->libvulkan); + av_freep(&p->num_queues_in_qf); + for (int i = 0; i < p->num_qfs; i++) + av_freep(&p->qf_mutex[p->qfs[i]]); + av_freep(&p->qf_mutex); + RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions); RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); } @@ -1430,9 +1440,21 @@ end: return err; } +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_lock(&p->qf_mutex[queue_family][index]); +} + +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]); +} + static int vulkan_device_init(AVHWDeviceContext *ctx) { - int err; + int err, last_qf = 0; uint32_t queue_num; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; @@ -1487,6 +1509,16 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) enc_index = hwctx->queue_family_encode_index; dec_index = hwctx->queue_family_decode_index; + last_qf = FFMAX(graph_index, FFMAX(comp_index, FFMAX(tx_index, FFMAX(enc_index, dec_index)))); + + p->qf_mutex = av_mallocz(last_qf*sizeof(*p->qf_mutex)); + if (!p->qf_mutex) + return AVERROR(ENOMEM); + + p->num_queues_in_qf = av_mallocz(last_qf*sizeof(*p->num_queues_in_qf)); + if (!p->num_queues_in_qf) + return AVERROR(ENOMEM); + #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \ do { \ if (ctx_qf < 0 && required) { \ @@ -1515,6 +1547,14 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \ dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \ p->qfs[p->num_qfs++] = ctx_qf; \ + \ + p->num_queues_in_qf[ctx_qf] = qc; \ + p->qf_mutex[ctx_qf] = av_mallocz(qc*sizeof(**p->qf_mutex)); \ + if (!p->qf_mutex[ctx_qf]) \ + return AVERROR(ENOMEM); \ + \ + for (int i = 0; i < qc; i++) \ + pthread_mutex_init(&p->qf_mutex[ctx_qf][i], NULL); \ } while (0) CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues); @@ -1525,6 +1565,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) #undef CHECK_QUEUE + if (!hwctx->lock_queue) + hwctx->lock_queue = lock_queue; + if (!hwctx->unlock_queue) + hwctx->unlock_queue = unlock_queue; + /* Get device capabilities */ vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); @@ -1726,9 +1771,6 @@ static void vulkan_free_internal(AVVkFrame *f) { AVVkFrameInternal *internal = f->internal; - if (!internal) - return; - #if CONFIG_CUDA if (internal->cuda_fc_ref) { AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; @@ -1757,6 +1799,8 @@ static void vulkan_free_internal(AVVkFrame *f) } #endif + pthread_mutex_destroy(&internal->lock); + av_freep(&f->internal); } @@ -1916,13 +1960,16 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, int err; uint32_t src_qf, dst_qf; VkImageLayout new_layout; - VkAccessFlags new_access; + VkAccessFlags2 new_access; + AVVulkanFramesContext *vkfc = hwfc->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + AVFrame tmp = { .data[0] = (uint8_t *)frame }; uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkDependencyInfo dep_info; VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, @@ -1938,6 +1985,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, }; VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; + + if ((err = wait_start_exec_ctx(hwfc, ectx))) + return err; + + vkfc->lock_frame(hwfc, &tmp); + for (int i = 0; i < planes; i++) { wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; sem_sig_val[i] = frame->sem_value[i] + 1; @@ -1974,35 +2027,46 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, break; } - if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; - /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable * for synchronization */ for (int i = 0; i < planes; i++) { - img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - img_bar[i].srcAccessMask = 0x0; - img_bar[i].dstAccessMask = new_access; - img_bar[i].oldLayout = frame->layout[i]; - img_bar[i].newLayout = new_layout; - img_bar[i].srcQueueFamilyIndex = src_qf; - img_bar[i].dstQueueFamilyIndex = dst_qf; - img_bar[i].image = frame->img[i]; - img_bar[i].subresourceRange.levelCount = 1; - img_bar[i].subresourceRange.layerCount = 1; - img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_bar[i] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, + .srcAccessMask = 0x0, + .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, + .dstAccessMask = new_access, + .oldLayout = frame->layout[i], + .newLayout = new_layout, + .srcQueueFamilyIndex = src_qf, + .dstQueueFamilyIndex = dst_qf, + .image = frame->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + }, + }; frame->layout[i] = img_bar[i].newLayout; frame->access[i] = img_bar[i].dstAccessMask; } - vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx), - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, 0, NULL, planes, img_bar); + dep_info = (VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = planes, + }; - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info); + + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + vkfc->unlock_frame(hwfc, &tmp); + + return err; } static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, @@ -2254,6 +2318,18 @@ fail: return NULL; } +static void lock_frame(AVHWFramesContext *fc, AVFrame *f) +{ + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + pthread_mutex_lock(&vkf->internal->lock); +} + +static void unlock_frame(AVHWFramesContext *fc, AVFrame *f) +{ + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + pthread_mutex_unlock(&vkf->internal->lock); +} + static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { VulkanFramesPriv *fp = hwfc->internal->priv; @@ -2416,6 +2492,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) return AVERROR(ENOMEM); } + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; + if (!hwctx->unlock_frame) + hwctx->unlock_frame = unlock_frame; + return 0; } @@ -3004,20 +3085,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, CU_AD_FORMAT_UNSIGNED_INT8; dst_f = (AVVkFrame *)frame->data[0]; - dst_int = dst_f->internal; - if (!dst_int || !dst_int->cuda_fc_ref) { - if (!dst_f->internal) - dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); - - if (!dst_int) - return AVERROR(ENOMEM); + if (!dst_int->cuda_fc_ref) { dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); - if (!dst_int->cuda_fc_ref) { - av_freep(&dst_f->internal); + if (!dst_int->cuda_fc_ref) return AVERROR(ENOMEM); - } for (int i = 0; i < planes; i++) { CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { @@ -3691,13 +3764,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, return err; } -static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, +static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, AVBufferRef **bufs, size_t *buf_offsets, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) { int err; AVVkFrame *frame = (AVVkFrame *)f->data[0]; + AVVulkanFramesContext *vkfc = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; @@ -3732,11 +3806,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, .waitSemaphoreCount = planes, }; - for (int i = 0; i < planes; i++) - sem_signal_values[i] = frame->sem_value[i] + 1; + vkfc->lock_frame(hwfc, f); if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; + goto end; + + for (int i = 0; i < planes; i++) + sem_signal_values[i] = frame->sem_value[i] + 1; /* Change the image layout to something more optimal for transfers */ for (int i = 0; i < planes; i++) { @@ -3811,14 +3887,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, if (!f->buf[ref]) break; if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) - return err; + goto end; } if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) - return err; - return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); + goto end; + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); } else { - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); } + +end: + vkfc->unlock_frame(hwfc, f); + return err; } static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, @@ -4129,7 +4209,19 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, AVVkFrame *av_vk_frame_alloc(void) { - return av_mallocz(sizeof(AVVkFrame)); + AVVkFrame *f = av_mallocz(sizeof(AVVkFrame)); + if (!f) + return NULL; + + f->internal = av_mallocz(sizeof(*f->internal)); + if (!f->internal) { + av_free(f); + return NULL; + } + + pthread_mutex_init(&f->internal->lock, NULL); + + return f; } const HWContextType ff_hwcontext_type_vulkan = { diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index ce8a835c6f..5864ae1264 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -135,6 +135,19 @@ typedef struct AVVulkanDeviceContext { */ int queue_family_decode_index; int nb_decode_queues; + + /** + * Locks a queue, preventing other threads from submitting any command + * buffers to this queue. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + */ + void (*lock_queue)(AVHWDeviceContext *ctx, int queue_family, int index); + + /** + * Similar to lock_queue(), unlocks a queue. Must only be called after it. + */ + void (*unlock_queue)(AVHWDeviceContext *ctx, int queue_family, int index); } AVVulkanDeviceContext; /** @@ -195,6 +208,21 @@ typedef struct AVVulkanFramesContext { * av_hwframe_ctx_init(). */ AVVkFrameFlags flags; + + /** + * Locks a frame, preventing other threads from changing frame properties. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + * Users SHOULD only ever lock just before command submission in order + * to get accurate frame properties, and unlock immediately after command + * submission without waiting for it to finish. + */ + void (*lock_frame)(AVHWFramesContext *fc, AVFrame *f); + + /** + * Similar to lock_frame(), unlocks a frame. Must only be called after it. + */ + void (*unlock_frame)(AVHWFramesContext *fc, AVFrame *f); } AVVulkanFramesContext; /* -- 2.35.1 [-- Attachment #3: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-04-03 14:52 UTC|newest] Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-04-03 14:51 [FFmpeg-devel] [PATCH 1/3] hwcontext_vulkan: expose image queue families Lynne [not found] ` <MzjuD2p--3-2@lynne.ee-MzjuHsU----2> 2022-04-03 14:52 ` Lynne [this message] 2022-04-05 9:38 ` [FFmpeg-devel] [PATCH 2/3] hwcontext_vulkan: add queue and frame locking functions Anton Khirnov 2022-04-05 14:16 ` Lynne [not found] ` <MzjuQ-Z--3-2@lynne.ee-MzjuTDW----2> 2022-04-03 14:53 ` [FFmpeg-devel] [PATCH 3/3] lavu: bump version and add APIchanges for Vulkan API changes Lynne [not found] ` <MzjuZff--3-2@lynne.ee-Mzjubs4--J-2> 2022-04-03 15:49 ` [FFmpeg-devel] [PATCH] hwcontext_vulkan: properly enable sync2 and make prepare_frame compatible Lynne
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=MzjuQ-Z--3-2@lynne.ee \ --to=dev@lynne.ee \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git