From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTP id 823794080F for ; Sun, 3 Apr 2022 14:52:32 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 0E7C768B22C; Sun, 3 Apr 2022 17:52:31 +0300 (EEST) Received: from w4.tutanota.de (w4.tutanota.de [81.3.6.165]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id A9AA668B204 for ; Sun, 3 Apr 2022 17:52:24 +0300 (EEST) Received: from w3.tutanota.de (unknown [192.168.1.164]) by w4.tutanota.de (Postfix) with ESMTP id 5719C106016F for ; Sun, 3 Apr 2022 14:52:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; t=1648997544; s=s1; d=lynne.ee; h=From:From:To:To:Subject:Subject:Content-Description:Content-ID:Content-Type:Content-Type:Content-Transfer-Encoding:Cc:Date:Date:In-Reply-To:In-Reply-To:MIME-Version:MIME-Version:Message-ID:Message-ID:Reply-To:References:References:Sender; bh=pBy8ZaUuUPhHlUzr8hF8qdYM8pkXm8rtX3MgTMW/VeU=; b=kOv3cM9AFU8CK4jLfS5UTrHx4+ghaAoPIQGPlP/2FnskPAp7yvdwAxs39aD1pI+0 cUYWwjzsT4MqH0n8mePJgDTO4bD0CxapzUYLKYFI1TJ4M6tD9vKw/tnJBaOlv0OB8Qy sDg2Ro1m8j3tCwCLNUO9m1U3tu7imEDEHxJo2GWn+BnkbrfO73xCjfLkI3OGZJV0r4c 8ZWNFhzwNwdULlHj/oJjLvsgGsjf6EZXWZNzgmpTfjcZ3oTd2u48X6pZ+ipfgvuA9kO q81q3ffx91AKrJpyCm6So8+p2P+LdfzwxnX2J7k+13mJgmska/sb1PyVM6JJbvamYp5 kTuH3bKbeQ== Date: Sun, 3 Apr 2022 16:52:24 +0200 (CEST) From: Lynne To: FFmpeg development discussions and patches Message-ID: In-Reply-To: References: MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----=_Part_786175_252458240.1648997544345" Subject: [FFmpeg-devel] [PATCH 2/3] hwcontext_vulkan: add queue and frame locking functions X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: ------=_Part_786175_252458240.1648997544345 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit This allows for multiple threads to access the same frame. This is unfortunately necessary, as in Vulkan, queues are considered to be up to the user to synchronize, while frames often have their layout changed upon reading. Patch attached. ------=_Part_786175_252458240.1648997544345 Content-Type: text/x-patch; charset=us-ascii; name=0002-hwcontext_vulkan-add-queue-and-frame-locking-functio.patch Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename=0002-hwcontext_vulkan-add-queue-and-frame-locking-functio.patch >From d8bd429859f9dc90325dbd0a7355b21ad5a80b6f Mon Sep 17 00:00:00 2001 From: Lynne Date: Sun, 3 Apr 2022 16:44:58 +0200 Subject: [PATCH 2/3] hwcontext_vulkan: add queue and frame locking functions This allows for multiple threads to access the same frame. This is unfortunately necessary, as in Vulkan, queues are considered to be up to the user to synchronize, while frames often have their layout changed upon reading. --- libavutil/hwcontext_vulkan.c | 180 ++++++++++++++++++++++++++--------- libavutil/hwcontext_vulkan.h | 28 ++++++ 2 files changed, 164 insertions(+), 44 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 1176858545..5bd0cab7ef 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -27,6 +27,7 @@ #include #endif +#include #include #include "config.h" @@ -93,6 +94,8 @@ typedef struct VulkanDevicePriv { /* Queues */ uint32_t qfs[5]; int num_qfs; + int *num_queues_in_qf; + pthread_mutex_t **qf_mutex; /* Debug callback */ VkDebugUtilsMessengerEXT debug_ctx; @@ -126,6 +129,8 @@ typedef struct VulkanFramesPriv { } VulkanFramesPriv; typedef struct AVVkFrameInternal { + pthread_mutex_t lock; + #if CONFIG_CUDA /* Importing external memory into cuda is really expensive so we keep the * memory imported all the time */ @@ -1307,6 +1312,11 @@ static void vulkan_device_free(AVHWDeviceContext *ctx) if (p->libvulkan) dlclose(p->libvulkan); + av_freep(&p->num_queues_in_qf); + for (int i = 0; i < p->num_qfs; i++) + av_freep(&p->qf_mutex[p->qfs[i]]); + av_freep(&p->qf_mutex); + RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions); RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); } @@ -1430,9 +1440,21 @@ end: return err; } +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_lock(&p->qf_mutex[queue_family][index]); +} + +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index) +{ + VulkanDevicePriv *p = ctx->internal->priv; + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]); +} + static int vulkan_device_init(AVHWDeviceContext *ctx) { - int err; + int err, last_qf = 0; uint32_t queue_num; AVVulkanDeviceContext *hwctx = ctx->hwctx; VulkanDevicePriv *p = ctx->internal->priv; @@ -1487,6 +1509,16 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) enc_index = hwctx->queue_family_encode_index; dec_index = hwctx->queue_family_decode_index; + last_qf = FFMAX(graph_index, FFMAX(comp_index, FFMAX(tx_index, FFMAX(enc_index, dec_index)))); + + p->qf_mutex = av_mallocz(last_qf*sizeof(*p->qf_mutex)); + if (!p->qf_mutex) + return AVERROR(ENOMEM); + + p->num_queues_in_qf = av_mallocz(last_qf*sizeof(*p->num_queues_in_qf)); + if (!p->num_queues_in_qf) + return AVERROR(ENOMEM); + #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \ do { \ if (ctx_qf < 0 && required) { \ @@ -1515,6 +1547,14 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \ dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \ p->qfs[p->num_qfs++] = ctx_qf; \ + \ + p->num_queues_in_qf[ctx_qf] = qc; \ + p->qf_mutex[ctx_qf] = av_mallocz(qc*sizeof(**p->qf_mutex)); \ + if (!p->qf_mutex[ctx_qf]) \ + return AVERROR(ENOMEM); \ + \ + for (int i = 0; i < qc; i++) \ + pthread_mutex_init(&p->qf_mutex[ctx_qf][i], NULL); \ } while (0) CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues); @@ -1525,6 +1565,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx) #undef CHECK_QUEUE + if (!hwctx->lock_queue) + hwctx->lock_queue = lock_queue; + if (!hwctx->unlock_queue) + hwctx->unlock_queue = unlock_queue; + /* Get device capabilities */ vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); @@ -1726,9 +1771,6 @@ static void vulkan_free_internal(AVVkFrame *f) { AVVkFrameInternal *internal = f->internal; - if (!internal) - return; - #if CONFIG_CUDA if (internal->cuda_fc_ref) { AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; @@ -1757,6 +1799,8 @@ static void vulkan_free_internal(AVVkFrame *f) } #endif + pthread_mutex_destroy(&internal->lock); + av_freep(&f->internal); } @@ -1916,13 +1960,16 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, int err; uint32_t src_qf, dst_qf; VkImageLayout new_layout; - VkAccessFlags new_access; + VkAccessFlags2 new_access; + AVVulkanFramesContext *vkfc = hwfc->hwctx; const int planes = av_pix_fmt_count_planes(hwfc->sw_format); VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; + AVFrame tmp = { .data[0] = (uint8_t *)frame }; uint64_t sem_sig_val[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 }; + VkDependencyInfo dep_info; VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, @@ -1938,6 +1985,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, }; VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS]; + + if ((err = wait_start_exec_ctx(hwfc, ectx))) + return err; + + vkfc->lock_frame(hwfc, &tmp); + for (int i = 0; i < planes; i++) { wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; sem_sig_val[i] = frame->sem_value[i] + 1; @@ -1974,35 +2027,46 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, break; } - if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; - /* Change the image layout to something more optimal for writes. * This also signals the newly created semaphore, making it usable * for synchronization */ for (int i = 0; i < planes; i++) { - img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - img_bar[i].srcAccessMask = 0x0; - img_bar[i].dstAccessMask = new_access; - img_bar[i].oldLayout = frame->layout[i]; - img_bar[i].newLayout = new_layout; - img_bar[i].srcQueueFamilyIndex = src_qf; - img_bar[i].dstQueueFamilyIndex = dst_qf; - img_bar[i].image = frame->img[i]; - img_bar[i].subresourceRange.levelCount = 1; - img_bar[i].subresourceRange.layerCount = 1; - img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_bar[i] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, + .srcAccessMask = 0x0, + .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT, + .dstAccessMask = new_access, + .oldLayout = frame->layout[i], + .newLayout = new_layout, + .srcQueueFamilyIndex = src_qf, + .dstQueueFamilyIndex = dst_qf, + .image = frame->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + }, + }; frame->layout[i] = img_bar[i].newLayout; frame->access[i] = img_bar[i].dstAccessMask; } - vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx), - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, 0, NULL, planes, img_bar); + dep_info = (VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = planes, + }; - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info); + + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0); + vkfc->unlock_frame(hwfc, &tmp); + + return err; } static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format, @@ -2254,6 +2318,18 @@ fail: return NULL; } +static void lock_frame(AVHWFramesContext *fc, AVFrame *f) +{ + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + pthread_mutex_lock(&vkf->internal->lock); +} + +static void unlock_frame(AVHWFramesContext *fc, AVFrame *f) +{ + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + pthread_mutex_unlock(&vkf->internal->lock); +} + static void vulkan_frames_uninit(AVHWFramesContext *hwfc) { VulkanFramesPriv *fp = hwfc->internal->priv; @@ -2416,6 +2492,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc) return AVERROR(ENOMEM); } + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; + if (!hwctx->unlock_frame) + hwctx->unlock_frame = unlock_frame; + return 0; } @@ -3004,20 +3085,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, CU_AD_FORMAT_UNSIGNED_INT8; dst_f = (AVVkFrame *)frame->data[0]; - dst_int = dst_f->internal; - if (!dst_int || !dst_int->cuda_fc_ref) { - if (!dst_f->internal) - dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); - - if (!dst_int) - return AVERROR(ENOMEM); + if (!dst_int->cuda_fc_ref) { dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); - if (!dst_int->cuda_fc_ref) { - av_freep(&dst_f->internal); + if (!dst_int->cuda_fc_ref) return AVERROR(ENOMEM); - } for (int i = 0; i < planes; i++) { CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { @@ -3691,13 +3764,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, return err; } -static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, +static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, AVBufferRef **bufs, size_t *buf_offsets, const int *buf_stride, int w, int h, enum AVPixelFormat pix_fmt, int to_buf) { int err; AVVkFrame *frame = (AVVkFrame *)f->data[0]; + AVVulkanFramesContext *vkfc = hwfc->hwctx; VulkanFramesPriv *fp = hwfc->internal->priv; VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; FFVulkanFunctions *vk = &p->vkfn; @@ -3732,11 +3806,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, .waitSemaphoreCount = planes, }; - for (int i = 0; i < planes; i++) - sem_signal_values[i] = frame->sem_value[i] + 1; + vkfc->lock_frame(hwfc, f); if ((err = wait_start_exec_ctx(hwfc, ectx))) - return err; + goto end; + + for (int i = 0; i < planes; i++) + sem_signal_values[i] = frame->sem_value[i] + 1; /* Change the image layout to something more optimal for transfers */ for (int i = 0; i < planes; i++) { @@ -3811,14 +3887,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f, if (!f->buf[ref]) break; if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1))) - return err; + goto end; } if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes))) - return err; - return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); + goto end; + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref); } else { - return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); + err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1); } + +end: + vkfc->unlock_frame(hwfc, f); + return err; } static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, @@ -4129,7 +4209,19 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, AVVkFrame *av_vk_frame_alloc(void) { - return av_mallocz(sizeof(AVVkFrame)); + AVVkFrame *f = av_mallocz(sizeof(AVVkFrame)); + if (!f) + return NULL; + + f->internal = av_mallocz(sizeof(*f->internal)); + if (!f->internal) { + av_free(f); + return NULL; + } + + pthread_mutex_init(&f->internal->lock, NULL); + + return f; } const HWContextType ff_hwcontext_type_vulkan = { diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index ce8a835c6f..5864ae1264 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -135,6 +135,19 @@ typedef struct AVVulkanDeviceContext { */ int queue_family_decode_index; int nb_decode_queues; + + /** + * Locks a queue, preventing other threads from submitting any command + * buffers to this queue. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + */ + void (*lock_queue)(AVHWDeviceContext *ctx, int queue_family, int index); + + /** + * Similar to lock_queue(), unlocks a queue. Must only be called after it. + */ + void (*unlock_queue)(AVHWDeviceContext *ctx, int queue_family, int index); } AVVulkanDeviceContext; /** @@ -195,6 +208,21 @@ typedef struct AVVulkanFramesContext { * av_hwframe_ctx_init(). */ AVVkFrameFlags flags; + + /** + * Locks a frame, preventing other threads from changing frame properties. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + * Users SHOULD only ever lock just before command submission in order + * to get accurate frame properties, and unlock immediately after command + * submission without waiting for it to finish. + */ + void (*lock_frame)(AVHWFramesContext *fc, AVFrame *f); + + /** + * Similar to lock_frame(), unlocks a frame. Must only be called after it. + */ + void (*unlock_frame)(AVHWFramesContext *fc, AVFrame *f); } AVVulkanFramesContext; /* -- 2.35.1 ------=_Part_786175_252458240.1648997544345 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ------=_Part_786175_252458240.1648997544345--