Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Lynne via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Lynne <dev@lynne.ee>
Subject: [FFmpeg-devel] [PATCH] hwcontext_vulkan: rewrite upload/download
Date: Fri, 19 Jul 2024 09:59:50 +0200
Message-ID: <20240719080000.44735-1-dev@lynne.ee> (raw)

This commit was long overdue. The old transfer dubiously tried to
merge as much code as possible, and had very little in the way
of optimizations, apart from basic host-mapping.

The new code uses buffer pools for any temporary bufflers, and
handles falling back to buffer-based uploads if host-mapping fails.

Roundtrip performance difference:
ffmpeg -init_hw_device "vulkan=vk:0,debug=0,disable_multiplane=1" -f lavfi \
-i color=red:s=3840x2160 -vf hwupload,hwdownload,format=yuv420p -f null -

7900XTX:
Before: 224fps
After: 502fps

Ada, with proprietary drivers:
Before: 29fps
After: 54fps

Alder Lake:
Before: 85fps
After: 108fps

With the host-mapping codepath disabled:
Before: 32fps
After: 51fps
---
 libavutil/hwcontext_vulkan.c | 515 +++++++++++++++++++++++------------
 1 file changed, 336 insertions(+), 179 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index ebb8e63220..ad93f00948 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -128,6 +128,9 @@ typedef struct VulkanFramesPriv {
     FFVkExecPool upload_exec;
     FFVkExecPool download_exec;
 
+    /* Temporary buffer pools */
+    AVBufferPool *tmp;
+
     /* Modifier info list to free at uninit */
     VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
 } VulkanFramesPriv;
@@ -2414,6 +2417,8 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
     ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec);
     ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec);
     ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec);
+
+    av_buffer_pool_uninit(&fp->tmp);
 }
 
 static int vulkan_frames_init(AVHWFramesContext *hwfc)
@@ -3440,128 +3445,298 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
     return AVERROR(ENOSYS);
 }
 
-static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height)
+static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf,
+                            AVFrame *swf, VkBufferImageCopy *region,
+                            int planes, int upload)
 {
-    size_t size;
-    *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
-    size = height*(*stride);
-    size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment);
-    return size;
+    VkResult ret;
+    VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
+    FFVulkanFunctions *vk = &p->vkctx.vkfn;
+    AVVulkanDeviceContext *hwctx = &p->p;
+
+    FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data;
+
+    const VkMappedMemoryRange flush_info = {
+        .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+        .memory = vkbuf->mem,
+        .size   = VK_WHOLE_SIZE,
+    };
+
+    if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && !upload) {
+        ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1,
+                                               &flush_info);
+        if (ret != VK_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n",
+                   ff_vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
+        }
+    }
+
+    if (upload) {
+        for (int i = 0; i < planes; i++)
+            av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset,
+                                region[i].bufferRowLength,
+                                swf->data[i],
+                                swf->linesize[i],
+                                swf->linesize[i],
+                                region[i].imageExtent.height);
+    } else {
+        for (int i = 0; i < planes; i++)
+            av_image_copy_plane_uc_from(swf->data[i],
+                                        swf->linesize[i],
+                                        vkbuf->mapped_mem + region[i].bufferOffset,
+                                        region[i].bufferRowLength,
+                                        swf->linesize[i],
+                                        region[i].imageExtent.height);
+    }
+
+    if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && upload) {
+        ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1,
+                                          &flush_info);
+        if (ret != VK_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n",
+                   ff_vk_ret2str(ret));
+            return AVERROR_EXTERNAL;
+        }
+    }
+
+    return 0;
 }
 
-static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
-                              AVBufferRef **bufs, size_t *buf_offsets,
-                              const int *buf_stride, int w,
-                              int h, enum AVPixelFormat pix_fmt, int to_buf)
+static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst,
+                         AVFrame *swf, VkBufferImageCopy *region, int upload)
 {
     int err;
-    AVVkFrame *frame = (AVVkFrame *)f->data[0];
     VulkanFramesPriv *fp = hwfc->hwctx;
     VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
-    FFVulkanFunctions *vk = &p->vkctx.vkfn;
-    VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
-    int nb_img_bar = 0;
+    const int planes = av_pix_fmt_count_planes(swf->format);
+
+    size_t buf_offset = 0;
+    for (int i = 0; i < planes; i++) {
+        size_t size;
+        ptrdiff_t linesize = swf->linesize[i];
 
-    const int nb_images = ff_vk_count_images(frame);
-    int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt);
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+        uint32_t p_w, p_h;
+        get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
 
-    VkCommandBuffer cmd_buf;
-    FFVkExecContext *exec = ff_vk_exec_get(to_buf ? &fp->download_exec :
-                                                    &fp->upload_exec);
-    cmd_buf = exec->buf;
-    ff_vk_exec_start(&p->vkctx, exec);
+        linesize = FFALIGN(linesize,
+                           p->props.properties.limits.optimalBufferCopyRowPitchAlignment);
+        size = p_h*linesize;
 
-    err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, pixfmt_planes, 1);
+        region[i] = (VkBufferImageCopy) {
+            .bufferOffset = buf_offset,
+            .bufferRowLength = linesize,
+            .bufferImageHeight = p_h,
+            .imageSubresource.layerCount = 1,
+            .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
+            /* Rest of the fields adjusted/filled in later */
+        };
+
+        buf_offset = FFALIGN(buf_offset + size,
+                             p->props.properties.limits.optimalBufferCopyOffsetAlignment);
+    }
+
+    err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst,
+                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+                                  VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+                                  NULL, buf_offset,
+                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
     if (err < 0)
         return err;
 
-    err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, f,
-                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
-                                   VK_PIPELINE_STAGE_2_TRANSFER_BIT);
-    if (err < 0)
+    return 0;
+}
+
+static int create_mapped_buffer(AVHWFramesContext *hwfc,
+                                FFVkBuffer *vkb, VkBufferUsageFlags usage,
+                                size_t size,
+                                VkExternalMemoryBufferCreateInfo *create_desc,
+                                VkImportMemoryHostPointerInfoEXT *import_desc,
+                                VkMemoryHostPointerPropertiesEXT props)
+{
+    int err;
+    VkResult ret;
+    VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
+    FFVulkanFunctions *vk = &p->vkctx.vkfn;
+    AVVulkanDeviceContext *hwctx = &p->p;
+
+    VkBufferCreateInfo buf_spawn = {
+        .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .pNext       = create_desc,
+        .usage       = usage,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .size        = size,
+    };
+    VkMemoryRequirements req = {
+        .size           = size,
+        .alignment      = p->hprops.minImportedHostPointerAlignment,
+        .memoryTypeBits = props.memoryTypeBits,
+    };
+
+    ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, hwctx->alloc, &vkb->buf);
+    if (ret != VK_SUCCESS)
+        return AVERROR_EXTERNAL;
+
+    err = ff_vk_alloc_mem(&p->vkctx, &req,
+                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+                          import_desc, &vkb->flags, &vkb->mem);
+    if (err < 0) {
+        vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc);
         return err;
+    }
 
-    ff_vk_frame_barrier(&p->vkctx, exec, f, img_bar, &nb_img_bar,
-                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
-                        VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
-                        to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
-                                 VK_ACCESS_TRANSFER_WRITE_BIT,
-                        to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
-                                 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-                        VK_QUEUE_FAMILY_IGNORED);
+    ret = vk->BindBufferMemory(hwctx->act_dev, vkb->buf, vkb->mem, 0);
+    if (ret != VK_SUCCESS) {
+        vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc);
+        vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc);
+        return AVERROR_EXTERNAL;
+    }
 
-    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
-            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
-            .pImageMemoryBarriers = img_bar,
-            .imageMemoryBarrierCount = nb_img_bar,
-        });
+    return 0;
+}
+
+static void destroy_avvkbuf(void *opaque, uint8_t *data)
+{
+    FFVulkanContext *s = opaque;
+    FFVkBuffer *buf = (FFVkBuffer *)data;
+    ff_vk_free_buf(s, buf);
+    av_free(buf);
+}
 
-    /* Schedule a copy for each plane */
-    for (int i = 0; i < pixfmt_planes; i++) {
-        int idx = FFMIN(i, nb_images - 1);
-        VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
-                                              VK_IMAGE_ASPECT_PLANE_0_BIT,
-                                              VK_IMAGE_ASPECT_PLANE_1_BIT,
-                                              VK_IMAGE_ASPECT_PLANE_2_BIT, };
-
-        FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[i]->data;
-        VkBufferImageCopy buf_reg = {
-            .bufferOffset = buf_offsets[i],
-            .bufferRowLength = buf_stride[i] / desc->comp[i].step,
+static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs,
+                          AVFrame *swf, VkBufferImageCopy *region, int upload)
+{
+    int err;
+    VkResult ret;
+    VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
+    FFVulkanFunctions *vk = &p->vkctx.vkfn;
+    AVVulkanDeviceContext *hwctx = &p->p;
+
+    const int planes = av_pix_fmt_count_planes(swf->format);
+
+    VkExternalMemoryBufferCreateInfo create_desc = {
+        .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
+        .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+    };
+    VkImportMemoryHostPointerInfoEXT import_desc = {
+        .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+        .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+    };
+    VkMemoryHostPointerPropertiesEXT props;
+
+    for (int i = 0; i < planes; i++) {
+        FFVkBuffer *vkb;
+        uint32_t p_w, p_h;
+        size_t offs;
+        size_t buffer_size;
+
+        /* We can't host map images with negative strides */
+        if (swf->linesize[i] < 0) {
+            err = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+
+        /* Get the previous point at which mapping was possible and use it */
+        offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
+        import_desc.pHostPointer = swf->data[i] - offs;
+
+        props = (VkMemoryHostPointerPropertiesEXT) {
+            VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
+        };
+        ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
+                                                    import_desc.handleType,
+                                                    import_desc.pHostPointer,
+                                                    &props);
+        if (!(ret == VK_SUCCESS && props.memoryTypeBits)) {
+            err = AVERROR(EINVAL);
+            goto fail;
+        }
+
+        /* Buffer region for this plane */
+        region[i] = (VkBufferImageCopy) {
+            .bufferOffset = offs,
+            .bufferRowLength = swf->linesize[i],
+            .bufferImageHeight = p_h,
             .imageSubresource.layerCount = 1,
-            .imageSubresource.aspectMask = plane_aspect[(pixfmt_planes != nb_images) +
-                                                        i*(pixfmt_planes != nb_images)],
-            .imageOffset = { 0, 0, 0, },
+            .imageExtent = (VkExtent3D){ p_w, p_h, 1 },
+            /* Rest of the fields adjusted/filled in later */
         };
 
-        uint32_t p_w, p_h;
-        get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i);
+        /* Add the offset at the start, which gets ignored */
+        buffer_size = offs + swf->linesize[i]*p_h;
+        buffer_size = FFALIGN(buffer_size, p->props.properties.limits.minMemoryMapAlignment);
 
-        buf_reg.bufferImageHeight = p_h;
-        buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
+        /* Create a buffer */
+        vkb = av_mallocz(sizeof(*vkb));
+        if (!vkb) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
 
-        if (to_buf)
-            vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx],
-                                     img_bar[0].newLayout,
-                                     vkbuf->buf,
-                                     1, &buf_reg);
-        else
-            vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx],
-                                     img_bar[0].newLayout,
-                                     1, &buf_reg);
-    }
+        err = create_mapped_buffer(hwfc, vkb,
+                                   upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT :
+                                            VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+                                   buffer_size, &create_desc, &import_desc,
+                                   props);
+        if (err < 0) {
+            av_free(vkb);
+            goto fail;
+        }
 
-    err = ff_vk_exec_submit(&p->vkctx, exec);
-    if (err < 0)
-        return err;
+        /* Create a ref */
+        dst[*nb_bufs] = av_buffer_create((uint8_t *)vkb, sizeof(*vkb),
+                                         destroy_avvkbuf, &p->vkctx, 0);
+        if (!dst[*nb_bufs]) {
+            destroy_avvkbuf(&p->vkctx, (uint8_t *)vkb);
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
 
-    ff_vk_exec_wait(&p->vkctx, exec);
+        (*nb_bufs)++;
+    }
 
     return 0;
+
+fail:
+    for (int i = 0; i < (*nb_bufs); i++)
+        av_buffer_unref(&dst[i]);
+    return err;
 }
 
-static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
-                                const AVFrame *swf, int from)
+static int vulkan_transfer_frame(AVHWFramesContext *hwfc,
+                                 AVFrame *swf, AVFrame *hwf,
+                                 int upload)
 {
-    int err = 0;
-    VkResult ret;
-    AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
-    VulkanDevicePriv *p = dev_ctx->hwctx;
-    AVVulkanDeviceContext *hwctx = &p->p;
+    int err;
+    VulkanFramesPriv *fp = hwfc->hwctx;
+    VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
     FFVulkanFunctions *vk = &p->vkctx.vkfn;
 
-    AVFrame tmp;
-    FFVkBuffer *vkbufs[AV_NUM_DATA_POINTERS];
-    AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
-    size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 };
+    int host_mapped = 0;
+
+    AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0];
+    VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane
 
-    uint32_t p_w, p_h;
     const int planes = av_pix_fmt_count_planes(swf->format);
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format);
+    const int nb_images = ff_vk_count_images(hwf_vk);
+    static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT,
+                                                       VK_IMAGE_ASPECT_PLANE_0_BIT,
+                                                       VK_IMAGE_ASPECT_PLANE_1_BIT,
+                                                       VK_IMAGE_ASPECT_PLANE_2_BIT, };
+
+    VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS];
+    int nb_img_bar = 0;
 
-    int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
-    const int map_host = !!(p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY);
+    AVBufferRef *bufs[AV_NUM_DATA_POINTERS];
+    int nb_bufs = 0;
 
+    VkCommandBuffer cmd_buf;
+    FFVkExecContext *exec;
+
+    /* Sanity checking */
     if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) {
         av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n");
         return AVERROR(EINVAL);
@@ -3570,115 +3745,97 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
     if (swf->width > hwfc->width || swf->height > hwfc->height)
         return AVERROR(EINVAL);
 
-    /* Create buffers */
-    for (int i = 0; i < planes; i++) {
-        size_t req_size;
-
-        VkExternalMemoryBufferCreateInfo create_desc = {
-            .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
-            .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
-        };
-
-        VkImportMemoryHostPointerInfoEXT import_desc = {
-            .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
-            .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
-        };
-
-        VkMemoryHostPointerPropertiesEXT p_props = {
-            .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
-        };
-
-        get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
-
-        tmp.linesize[i] = FFABS(swf->linesize[i]);
-
-        /* Do not map images with a negative stride */
-        if (map_host && swf->linesize[i] > 0) {
-            size_t offs;
-            offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment;
-            import_desc.pHostPointer = swf->data[i] - offs;
-
-            /* We have to compensate for the few extra bytes of padding we
-             * completely ignore at the start */
-            req_size = FFALIGN(offs + tmp.linesize[i] * p_h,
-                               p->hprops.minImportedHostPointerAlignment);
-
-            ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev,
-                                                        import_desc.handleType,
-                                                        import_desc.pHostPointer,
-                                                        &p_props);
-            if (ret == VK_SUCCESS && p_props.memoryTypeBits) {
-                host_mapped[i] = 1;
-                buf_offsets[i] = offs;
-            }
-        }
+    /* Setup buffers first */
+    if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) {
+        err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload);
+        if (err >= 0)
+            host_mapped = 1;
+    }
 
-        if (!host_mapped[i])
-            req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h);
-
-        err = ff_vk_create_avbuf(&p->vkctx, &bufs[i], req_size,
-                                host_mapped[i] ? &create_desc : NULL,
-                                host_mapped[i] ? &import_desc : NULL,
-                                from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
-                                       VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
-                                VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                                (host_mapped[i] ?
-                                     VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : 0x0));
+    if (!host_mapped) {
+        err = get_plane_buf(hwfc, &bufs[0], swf, region, upload);
         if (err < 0)
             goto end;
+        nb_bufs = 1;
 
-        vkbufs[i] = (FFVkBuffer *)bufs[i]->data;
+        if (upload) {
+            err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1);
+            if (err < 0)
+                goto end;
+        }
     }
 
-    if (!from) {
-        /* Map, copy image TO buffer (which then goes to the VkImage), unmap */
-        if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0)))
-            goto end;
-
-        for (int i = 0; i < planes; i++) {
-            if (host_mapped[i])
-                continue;
+    exec = ff_vk_exec_get(&fp->upload_exec);
+    cmd_buf = exec->buf;
 
-            get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+    ff_vk_exec_start(&p->vkctx, exec);
 
-            av_image_copy_plane(tmp.data[i], tmp.linesize[i],
-                                (const uint8_t *)swf->data[i], swf->linesize[i],
-                                FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
-                                p_h);
-        }
+    /* Prep destination Vulkan frame */
+    err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf,
+                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                   VK_PIPELINE_STAGE_2_TRANSFER_BIT);
+    if (err < 0)
+        goto end;
 
-        if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1)))
+    /* No need to declare buf deps for synchronous transfers */
+    if (upload) {
+        err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1);
+        if (err < 0) {
+            ff_vk_exec_discard_deps(&p->vkctx, exec);
             goto end;
+        }
     }
 
-    /* Copy buffers into/from image */
-    err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
-                             tmp.linesize, swf->width, swf->height, swf->format,
-                             from);
-
-    if (from) {
-        /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
-        if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0)))
-            goto end;
+    ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR,
+                        upload ? VK_ACCESS_TRANSFER_WRITE_BIT :
+                                 VK_ACCESS_TRANSFER_READ_BIT,
+                        upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL :
+                                 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                        VK_QUEUE_FAMILY_IGNORED);
 
-        for (int i = 0; i < planes; i++) {
-            if (host_mapped[i])
-                continue;
+    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_img_bar,
+    });
 
-            get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i);
+    for (int i = 0; i < planes; i++) {
+        int buf_idx = FFMIN(i, (nb_bufs - 1));
+        int img_idx = FFMIN(i, (nb_images - 1));
+        FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data;
+
+        uint32_t orig_stride = region[i].bufferRowLength;
+        region[i].bufferRowLength /= desc->comp[i].step;
+        region[i].imageSubresource.aspectMask = plane_aspect[(planes != nb_images) +
+                                                             i*(planes != nb_images)];
+
+        if (upload)
+            vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf,
+                                     hwf_vk->img[img_idx],
+                                     img_bar[img_idx].newLayout,
+                                     1, &region[i]);
+        else
+            vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx],
+                                     img_bar[img_idx].newLayout,
+                                     vkbuf->buf,
+                                     1, &region[i]);
 
-            av_image_copy_plane_uc_from(swf->data[i], swf->linesize[i],
-                                        (const uint8_t *)tmp.data[i], tmp.linesize[i],
-                                        FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])),
-                                        p_h);
-        }
+        region[i].bufferRowLength = orig_stride;
+    }
 
-        if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1)))
-            goto end;
+    err = ff_vk_exec_submit(&p->vkctx, exec);
+    if (err < 0) {
+        ff_vk_exec_discard_deps(&p->vkctx, exec);
+    } else if (!upload) {
+        ff_vk_exec_wait(&p->vkctx, exec);
+        if (!host_mapped)
+            err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0);
     }
 
 end:
-    for (int i = 0; i < planes; i++)
+    for (int i = 0; i < nb_bufs; i++)
         av_buffer_unref(&bufs[i]);
 
     return err;
@@ -3705,7 +3862,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
         if (src->hw_frames_ctx)
             return AVERROR(ENOSYS);
         else
-            return vulkan_transfer_data(hwfc, dst, src, 0);
+            return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1);
     }
 }
 
@@ -3822,7 +3979,7 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
         if (dst->hw_frames_ctx)
             return AVERROR(ENOSYS);
         else
-            return vulkan_transfer_data(hwfc, src, dst, 1);
+            return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0);
     }
 }
 
-- 
2.45.2.753.g447d99e1c3b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

                 reply	other threads:[~2024-07-19  8:00 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240719080000.44735-1-dev@lynne.ee \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=dev@lynne.ee \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git