[FFmpeg-devel] [PATCH 2/3] hwcontext_vulkan: add queue and frame locking functions

From: Lynne <dev@lynne.ee>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH 2/3] hwcontext_vulkan: add queue and frame locking functions
Date: Sun, 3 Apr 2022 16:52:24 +0200 (CEST)
Message-ID: <MzjuQ-Z--3-2@lynne.ee> (raw)
In-Reply-To: <MzjuD2p--3-2@lynne.ee-MzjuHsU----2>

[-- Attachment #1: Type: text/plain, Size: 242 bytes --]

This allows for multiple threads to access the same frame. This is
unfortunately necessary, as in Vulkan, queues are considered to be
up to the user to synchronize, while frames often have their layout
changed upon reading.

Patch attached.


[-- Attachment #2: 0002-hwcontext_vulkan-add-queue-and-frame-locking-functio.patch --]
[-- Type: text/x-patch, Size: 15362 bytes --]

From d8bd429859f9dc90325dbd0a7355b21ad5a80b6f Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sun, 3 Apr 2022 16:44:58 +0200
Subject: [PATCH 2/3] hwcontext_vulkan: add queue and frame locking functions

This allows for multiple threads to access the same frame. This is
unfortunately necessary, as in Vulkan, queues are considered to be
up to the user to synchronize, while frames often have their layout
changed upon reading.
---
 libavutil/hwcontext_vulkan.c | 180 ++++++++++++++++++++++++++---------
 libavutil/hwcontext_vulkan.h |  28 ++++++
 2 files changed, 164 insertions(+), 44 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 1176858545..5bd0cab7ef 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -27,6 +27,7 @@
 #include <dlfcn.h>
 #endif
 
+#include <pthread.h>
 #include <unistd.h>
 
 #include "config.h"
@@ -93,6 +94,8 @@ typedef struct VulkanDevicePriv {
     /* Queues */
     uint32_t qfs[5];
     int num_qfs;
+    int *num_queues_in_qf;
+    pthread_mutex_t **qf_mutex;
 
     /* Debug callback */
     VkDebugUtilsMessengerEXT debug_ctx;
@@ -126,6 +129,8 @@ typedef struct VulkanFramesPriv {
 } VulkanFramesPriv;
 
 typedef struct AVVkFrameInternal {
+    pthread_mutex_t lock;
+
 #if CONFIG_CUDA
     /* Importing external memory into cuda is really expensive so we keep the
      * memory imported all the time */
@@ -1307,6 +1312,11 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
     if (p->libvulkan)
         dlclose(p->libvulkan);
 
+    av_freep(&p->num_queues_in_qf);
+    for (int i = 0; i < p->num_qfs; i++)
+        av_freep(&p->qf_mutex[p->qfs[i]]);
+    av_freep(&p->qf_mutex);
+
     RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
     RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
 }
@@ -1430,9 +1440,21 @@ end:
     return err;
 }
 
+static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+    VulkanDevicePriv *p = ctx->internal->priv;
+    pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
+}
+
+static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
+{
+    VulkanDevicePriv *p = ctx->internal->priv;
+    pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
+}
+
 static int vulkan_device_init(AVHWDeviceContext *ctx)
 {
-    int err;
+    int err, last_qf = 0;
     uint32_t queue_num;
     AVVulkanDeviceContext *hwctx = ctx->hwctx;
     VulkanDevicePriv *p = ctx->internal->priv;
@@ -1487,6 +1509,16 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
     enc_index   = hwctx->queue_family_encode_index;
     dec_index   = hwctx->queue_family_decode_index;
 
+    last_qf = FFMAX(graph_index, FFMAX(comp_index, FFMAX(tx_index, FFMAX(enc_index, dec_index))));
+
+    p->qf_mutex = av_mallocz(last_qf*sizeof(*p->qf_mutex));
+    if (!p->qf_mutex)
+        return AVERROR(ENOMEM);
+
+    p->num_queues_in_qf = av_mallocz(last_qf*sizeof(*p->num_queues_in_qf));
+    if (!p->num_queues_in_qf)
+        return AVERROR(ENOMEM);
+
 #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc)                                           \
     do {                                                                                        \
         if (ctx_qf < 0 && required) {                                                           \
@@ -1515,6 +1547,14 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
         enc_index   = (ctx_qf == enc_index)   ? -1 : enc_index;                                 \
         dec_index   = (ctx_qf == dec_index)   ? -1 : dec_index;                                 \
         p->qfs[p->num_qfs++] = ctx_qf;                                                          \
+                                                                                                \
+        p->num_queues_in_qf[ctx_qf] = qc;                                                       \
+        p->qf_mutex[ctx_qf] = av_mallocz(qc*sizeof(**p->qf_mutex));                             \
+        if (!p->qf_mutex[ctx_qf])                                                               \
+            return AVERROR(ENOMEM);                                                             \
+                                                                                                \
+        for (int i = 0; i < qc; i++)                                                            \
+            pthread_mutex_init(&p->qf_mutex[ctx_qf][i], NULL);                                  \
     } while (0)
 
     CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index,        hwctx->nb_graphics_queues);
@@ -1525,6 +1565,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
 
 #undef CHECK_QUEUE
 
+    if (!hwctx->lock_queue)
+        hwctx->lock_queue = lock_queue;
+    if (!hwctx->unlock_queue)
+        hwctx->unlock_queue = unlock_queue;
+
     /* Get device capabilities */
     vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
 
@@ -1726,9 +1771,6 @@ static void vulkan_free_internal(AVVkFrame *f)
 {
     AVVkFrameInternal *internal = f->internal;
 
-    if (!internal)
-        return;
-
 #if CONFIG_CUDA
     if (internal->cuda_fc_ref) {
         AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
@@ -1757,6 +1799,8 @@ static void vulkan_free_internal(AVVkFrame *f)
     }
 #endif
 
+    pthread_mutex_destroy(&internal->lock);
+
     av_freep(&f->internal);
 }
 
@@ -1916,13 +1960,16 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     int err;
     uint32_t src_qf, dst_qf;
     VkImageLayout new_layout;
-    VkAccessFlags new_access;
+    VkAccessFlags2 new_access;
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
     const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
+    AVFrame tmp = { .data[0] = (uint8_t *)frame };
     uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
 
-    VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+    VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS] = { 0 };
+    VkDependencyInfo dep_info;
 
     VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@@ -1938,6 +1985,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
     };
 
     VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
+
+    if ((err = wait_start_exec_ctx(hwfc, ectx)))
+        return err;
+
+    vkfc->lock_frame(hwfc, &tmp);
+
     for (int i = 0; i < planes; i++) {
         wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
         sem_sig_val[i] = frame->sem_value[i] + 1;
@@ -1974,35 +2027,46 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         break;
     }
 
-    if ((err = wait_start_exec_ctx(hwfc, ectx)))
-        return err;
-
     /* Change the image layout to something more optimal for writes.
      * This also signals the newly created semaphore, making it usable
      * for synchronization */
     for (int i = 0; i < planes; i++) {
-        img_bar[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
-        img_bar[i].srcAccessMask = 0x0;
-        img_bar[i].dstAccessMask = new_access;
-        img_bar[i].oldLayout = frame->layout[i];
-        img_bar[i].newLayout = new_layout;
-        img_bar[i].srcQueueFamilyIndex = src_qf;
-        img_bar[i].dstQueueFamilyIndex = dst_qf;
-        img_bar[i].image = frame->img[i];
-        img_bar[i].subresourceRange.levelCount = 1;
-        img_bar[i].subresourceRange.layerCount = 1;
-        img_bar[i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+        img_bar[i] = (VkImageMemoryBarrier2) {
+            .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+            .pNext = NULL,
+            .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
+            .srcAccessMask = 0x0,
+            .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
+            .dstAccessMask = new_access,
+            .oldLayout = frame->layout[i],
+            .newLayout = new_layout,
+            .srcQueueFamilyIndex = src_qf,
+            .dstQueueFamilyIndex = dst_qf,
+            .image = frame->img[i],
+            .subresourceRange = (VkImageSubresourceRange) {
+                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                .levelCount = 1,
+                .layerCount = 1,
+            },
+        };
 
         frame->layout[i] = img_bar[i].newLayout;
         frame->access[i] = img_bar[i].dstAccessMask;
     }
 
-    vk->CmdPipelineBarrier(get_buf_exec_ctx(hwfc, ectx),
-                           VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
-                           VK_PIPELINE_STAGE_TRANSFER_BIT,
-                           0, 0, NULL, 0, NULL, planes, img_bar);
+    dep_info = (VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT,
+        .pImageMemoryBarriers = img_bar,
+        .imageMemoryBarrierCount = planes,
+    };
 
-    return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+    vk->CmdPipelineBarrier2KHR(get_buf_exec_ctx(hwfc, ectx), &dep_info);
+
+    err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+    vkfc->unlock_frame(hwfc, &tmp);
+
+    return err;
 }
 
 static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
@@ -2254,6 +2318,18 @@ fail:
     return NULL;
 }
 
+static void lock_frame(AVHWFramesContext *fc, AVFrame *f)
+{
+    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+    pthread_mutex_lock(&vkf->internal->lock);
+}
+
+static void unlock_frame(AVHWFramesContext *fc, AVFrame *f)
+{
+    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+    pthread_mutex_unlock(&vkf->internal->lock);
+}
+
 static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
 {
     VulkanFramesPriv *fp = hwfc->internal->priv;
@@ -2416,6 +2492,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
             return AVERROR(ENOMEM);
     }
 
+    if (!hwctx->lock_frame)
+        hwctx->lock_frame = lock_frame;
+    if (!hwctx->unlock_frame)
+        hwctx->unlock_frame = unlock_frame;
+
     return 0;
 }
 
@@ -3004,20 +3085,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
                                                      CU_AD_FORMAT_UNSIGNED_INT8;
 
     dst_f = (AVVkFrame *)frame->data[0];
-
     dst_int = dst_f->internal;
-    if (!dst_int || !dst_int->cuda_fc_ref) {
-        if (!dst_f->internal)
-            dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
-
-        if (!dst_int)
-            return AVERROR(ENOMEM);
 
+    if (!dst_int->cuda_fc_ref) {
         dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
-        if (!dst_int->cuda_fc_ref) {
-            av_freep(&dst_f->internal);
+        if (!dst_int->cuda_fc_ref)
             return AVERROR(ENOMEM);
-        }
 
         for (int i = 0; i < planes; i++) {
             CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
@@ -3691,13 +3764,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
     return err;
 }
 
-static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
+static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
                               AVBufferRef **bufs, size_t *buf_offsets,
                               const int *buf_stride, int w,
                               int h, enum AVPixelFormat pix_fmt, int to_buf)
 {
     int err;
     AVVkFrame *frame = (AVVkFrame *)f->data[0];
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
     VulkanFramesPriv *fp = hwfc->internal->priv;
     VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
     FFVulkanFunctions *vk = &p->vkfn;
@@ -3732,11 +3806,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
         .waitSemaphoreCount   = planes,
     };
 
-    for (int i = 0; i < planes; i++)
-        sem_signal_values[i] = frame->sem_value[i] + 1;
+    vkfc->lock_frame(hwfc, f);
 
     if ((err = wait_start_exec_ctx(hwfc, ectx)))
-        return err;
+        goto end;
+
+    for (int i = 0; i < planes; i++)
+        sem_signal_values[i] = frame->sem_value[i] + 1;
 
     /* Change the image layout to something more optimal for transfers */
     for (int i = 0; i < planes; i++) {
@@ -3811,14 +3887,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
             if (!f->buf[ref])
                 break;
             if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
-                return err;
+                goto end;
         }
         if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
-            return err;
-        return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
+            goto end;
+        err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
     } else {
-        return submit_exec_ctx(hwfc, ectx, &s_info, frame,    1);
+        err = submit_exec_ctx(hwfc, ectx, &s_info, frame,    1);
     }
+
+end:
+    vkfc->unlock_frame(hwfc, f);
+    return err;
 }
 
 static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
@@ -4129,7 +4209,19 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
 
 AVVkFrame *av_vk_frame_alloc(void)
 {
-    return av_mallocz(sizeof(AVVkFrame));
+    AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
+    if (!f)
+        return NULL;
+
+    f->internal = av_mallocz(sizeof(*f->internal));
+    if (!f->internal) {
+        av_free(f);
+        return NULL;
+    }
+
+    pthread_mutex_init(&f->internal->lock, NULL);
+
+    return f;
 }
 
 const HWContextType ff_hwcontext_type_vulkan = {
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index ce8a835c6f..5864ae1264 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -135,6 +135,19 @@ typedef struct AVVulkanDeviceContext {
      */
     int queue_family_decode_index;
     int nb_decode_queues;
+
+    /**
+     * Locks a queue, preventing other threads from submitting any command
+     * buffers to this queue.
+     * If set to NULL, will be set to lavu-internal functions that utilize a
+     * mutex.
+     */
+    void (*lock_queue)(AVHWDeviceContext *ctx, int queue_family, int index);
+
+    /**
+     * Similar to lock_queue(), unlocks a queue. Must only be called after it.
+     */
+    void (*unlock_queue)(AVHWDeviceContext *ctx, int queue_family, int index);
 } AVVulkanDeviceContext;
 
 /**
@@ -195,6 +208,21 @@ typedef struct AVVulkanFramesContext {
      * av_hwframe_ctx_init().
      */
     AVVkFrameFlags flags;
+
+    /**
+     * Locks a frame, preventing other threads from changing frame properties.
+     * If set to NULL, will be set to lavu-internal functions that utilize a
+     * mutex.
+     * Users SHOULD only ever lock just before command submission in order
+     * to get accurate frame properties, and unlock immediately after command
+     * submission without waiting for it to finish.
+     */
+    void (*lock_frame)(AVHWFramesContext *fc, AVFrame *f);
+
+    /**
+     * Similar to lock_frame(), unlocks a frame. Must only be called after it.
+     */
+    void (*unlock_frame)(AVHWFramesContext *fc, AVFrame *f);
 } AVVulkanFramesContext;
 
 /*
-- 
2.35.1


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".