From: Lynne via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: Lynne <dev@lynne.ee> Subject: [FFmpeg-devel] [PATCH] hwcontext_vulkan: rewrite upload/download Date: Fri, 19 Jul 2024 09:59:50 +0200 Message-ID: <20240719080000.44735-1-dev@lynne.ee> (raw) This commit was long overdue. The old transfer dubiously tried to merge as much code as possible, and had very little in the way of optimizations, apart from basic host-mapping. The new code uses buffer pools for any temporary bufflers, and handles falling back to buffer-based uploads if host-mapping fails. Roundtrip performance difference: ffmpeg -init_hw_device "vulkan=vk:0,debug=0,disable_multiplane=1" -f lavfi \ -i color=red:s=3840x2160 -vf hwupload,hwdownload,format=yuv420p -f null - 7900XTX: Before: 224fps After: 502fps Ada, with proprietary drivers: Before: 29fps After: 54fps Alder Lake: Before: 85fps After: 108fps With the host-mapping codepath disabled: Before: 32fps After: 51fps --- libavutil/hwcontext_vulkan.c | 515 +++++++++++++++++++++++------------ 1 file changed, 336 insertions(+), 179 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ebb8e63220..ad93f00948 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -128,6 +128,9 @@ typedef struct VulkanFramesPriv { FFVkExecPool upload_exec; FFVkExecPool download_exec; + /* Temporary buffer pools */ + AVBufferPool *tmp; + /* Modifier info list to free at uninit */ VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; } VulkanFramesPriv; @@ -2414,6 +2417,8 @@ static void vulkan_frames_uninit(AVHWFramesContext *hwfc) ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec); ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec); ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec); + + av_buffer_pool_uninit(&fp->tmp); } static int vulkan_frames_init(AVHWFramesContext *hwfc) @@ -3440,128 +3445,298 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, return AVERROR(ENOSYS); } -static size_t get_req_buffer_size(VulkanDevicePriv *p, int *stride, int height) +static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf, + AVFrame *swf, VkBufferImageCopy *region, + int planes, int upload) { - size_t size; - *stride = FFALIGN(*stride, p->props.properties.limits.optimalBufferCopyRowPitchAlignment); - size = height*(*stride); - size = FFALIGN(size, p->props.properties.limits.minMemoryMapAlignment); - return size; + VkResult ret; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + AVVulkanDeviceContext *hwctx = &p->p; + + FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data; + + const VkMappedMemoryRange flush_info = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = vkbuf->mem, + .size = VK_WHOLE_SIZE, + }; + + if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && !upload) { + ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1, + &flush_info); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + if (upload) { + for (int i = 0; i < planes; i++) + av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset, + region[i].bufferRowLength, + swf->data[i], + swf->linesize[i], + swf->linesize[i], + region[i].imageExtent.height); + } else { + for (int i = 0; i < planes; i++) + av_image_copy_plane_uc_from(swf->data[i], + swf->linesize[i], + vkbuf->mapped_mem + region[i].bufferOffset, + region[i].bufferRowLength, + swf->linesize[i], + region[i].imageExtent.height); + } + + if (!(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && upload) { + ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1, + &flush_info); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + return 0; } -static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f, - AVBufferRef **bufs, size_t *buf_offsets, - const int *buf_stride, int w, - int h, enum AVPixelFormat pix_fmt, int to_buf) +static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst, + AVFrame *swf, VkBufferImageCopy *region, int upload) { int err; - AVVkFrame *frame = (AVVkFrame *)f->data[0]; VulkanFramesPriv *fp = hwfc->hwctx; VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; - int nb_img_bar = 0; + const int planes = av_pix_fmt_count_planes(swf->format); + + size_t buf_offset = 0; + for (int i = 0; i < planes; i++) { + size_t size; + ptrdiff_t linesize = swf->linesize[i]; - const int nb_images = ff_vk_count_images(frame); - int pixfmt_planes = av_pix_fmt_count_planes(pix_fmt); - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + uint32_t p_w, p_h; + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); - VkCommandBuffer cmd_buf; - FFVkExecContext *exec = ff_vk_exec_get(to_buf ? &fp->download_exec : - &fp->upload_exec); - cmd_buf = exec->buf; - ff_vk_exec_start(&p->vkctx, exec); + linesize = FFALIGN(linesize, + p->props.properties.limits.optimalBufferCopyRowPitchAlignment); + size = p_h*linesize; - err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, pixfmt_planes, 1); + region[i] = (VkBufferImageCopy) { + .bufferOffset = buf_offset, + .bufferRowLength = linesize, + .bufferImageHeight = p_h, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, + /* Rest of the fields adjusted/filled in later */ + }; + + buf_offset = FFALIGN(buf_offset + size, + p->props.properties.limits.optimalBufferCopyOffsetAlignment); + } + + err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + NULL, buf_offset, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); if (err < 0) return err; - err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, f, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_TRANSFER_BIT); - if (err < 0) + return 0; +} + +static int create_mapped_buffer(AVHWFramesContext *hwfc, + FFVkBuffer *vkb, VkBufferUsageFlags usage, + size_t size, + VkExternalMemoryBufferCreateInfo *create_desc, + VkImportMemoryHostPointerInfoEXT *import_desc, + VkMemoryHostPointerPropertiesEXT props) +{ + int err; + VkResult ret; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + AVVulkanDeviceContext *hwctx = &p->p; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = create_desc, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .size = size, + }; + VkMemoryRequirements req = { + .size = size, + .alignment = p->hprops.minImportedHostPointerAlignment, + .memoryTypeBits = props.memoryTypeBits, + }; + + ret = vk->CreateBuffer(hwctx->act_dev, &buf_spawn, hwctx->alloc, &vkb->buf); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + err = ff_vk_alloc_mem(&p->vkctx, &req, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + import_desc, &vkb->flags, &vkb->mem); + if (err < 0) { + vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc); return err; + } - ff_vk_frame_barrier(&p->vkctx, exec, f, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, - to_buf ? VK_ACCESS_TRANSFER_READ_BIT : - VK_ACCESS_TRANSFER_WRITE_BIT, - to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_QUEUE_FAMILY_IGNORED); + ret = vk->BindBufferMemory(hwctx->act_dev, vkb->buf, vkb->mem, 0); + if (ret != VK_SUCCESS) { + vk->FreeMemory(hwctx->act_dev, vkb->mem, hwctx->alloc); + vk->DestroyBuffer(hwctx->act_dev, vkb->buf, hwctx->alloc); + return AVERROR_EXTERNAL; + } - vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); + return 0; +} + +static void destroy_avvkbuf(void *opaque, uint8_t *data) +{ + FFVulkanContext *s = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(s, buf); + av_free(buf); +} - /* Schedule a copy for each plane */ - for (int i = 0; i < pixfmt_planes; i++) { - int idx = FFMIN(i, nb_images - 1); - VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, - VK_IMAGE_ASPECT_PLANE_0_BIT, - VK_IMAGE_ASPECT_PLANE_1_BIT, - VK_IMAGE_ASPECT_PLANE_2_BIT, }; - - FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[i]->data; - VkBufferImageCopy buf_reg = { - .bufferOffset = buf_offsets[i], - .bufferRowLength = buf_stride[i] / desc->comp[i].step, +static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs, + AVFrame *swf, VkBufferImageCopy *region, int upload) +{ + int err; + VkResult ret; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + AVVulkanDeviceContext *hwctx = &p->p; + + const int planes = av_pix_fmt_count_planes(swf->format); + + VkExternalMemoryBufferCreateInfo create_desc = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + }; + VkImportMemoryHostPointerInfoEXT import_desc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + }; + VkMemoryHostPointerPropertiesEXT props; + + for (int i = 0; i < planes; i++) { + FFVkBuffer *vkb; + uint32_t p_w, p_h; + size_t offs; + size_t buffer_size; + + /* We can't host map images with negative strides */ + if (swf->linesize[i] < 0) { + err = AVERROR(EINVAL); + goto fail; + } + + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + + /* Get the previous point at which mapping was possible and use it */ + offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment; + import_desc.pHostPointer = swf->data[i] - offs; + + props = (VkMemoryHostPointerPropertiesEXT) { + VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, + }; + ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev, + import_desc.handleType, + import_desc.pHostPointer, + &props); + if (!(ret == VK_SUCCESS && props.memoryTypeBits)) { + err = AVERROR(EINVAL); + goto fail; + } + + /* Buffer region for this plane */ + region[i] = (VkBufferImageCopy) { + .bufferOffset = offs, + .bufferRowLength = swf->linesize[i], + .bufferImageHeight = p_h, .imageSubresource.layerCount = 1, - .imageSubresource.aspectMask = plane_aspect[(pixfmt_planes != nb_images) + - i*(pixfmt_planes != nb_images)], - .imageOffset = { 0, 0, 0, }, + .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, + /* Rest of the fields adjusted/filled in later */ }; - uint32_t p_w, p_h; - get_plane_wh(&p_w, &p_h, pix_fmt, w, h, i); + /* Add the offset at the start, which gets ignored */ + buffer_size = offs + swf->linesize[i]*p_h; + buffer_size = FFALIGN(buffer_size, p->props.properties.limits.minMemoryMapAlignment); - buf_reg.bufferImageHeight = p_h; - buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, }; + /* Create a buffer */ + vkb = av_mallocz(sizeof(*vkb)); + if (!vkb) { + err = AVERROR(ENOMEM); + goto fail; + } - if (to_buf) - vk->CmdCopyImageToBuffer(cmd_buf, frame->img[idx], - img_bar[0].newLayout, - vkbuf->buf, - 1, &buf_reg); - else - vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, frame->img[idx], - img_bar[0].newLayout, - 1, &buf_reg); - } + err = create_mapped_buffer(hwfc, vkb, + upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT : + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + buffer_size, &create_desc, &import_desc, + props); + if (err < 0) { + av_free(vkb); + goto fail; + } - err = ff_vk_exec_submit(&p->vkctx, exec); - if (err < 0) - return err; + /* Create a ref */ + dst[*nb_bufs] = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), + destroy_avvkbuf, &p->vkctx, 0); + if (!dst[*nb_bufs]) { + destroy_avvkbuf(&p->vkctx, (uint8_t *)vkb); + err = AVERROR(ENOMEM); + goto fail; + } - ff_vk_exec_wait(&p->vkctx, exec); + (*nb_bufs)++; + } return 0; + +fail: + for (int i = 0; i < (*nb_bufs); i++) + av_buffer_unref(&dst[i]); + return err; } -static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, - const AVFrame *swf, int from) +static int vulkan_transfer_frame(AVHWFramesContext *hwfc, + AVFrame *swf, AVFrame *hwf, + int upload) { - int err = 0; - VkResult ret; - AVHWDeviceContext *dev_ctx = hwfc->device_ctx; - VulkanDevicePriv *p = dev_ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; + int err; + VulkanFramesPriv *fp = hwfc->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; FFVulkanFunctions *vk = &p->vkctx.vkfn; - AVFrame tmp; - FFVkBuffer *vkbufs[AV_NUM_DATA_POINTERS]; - AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; - size_t buf_offsets[AV_NUM_DATA_POINTERS] = { 0 }; + int host_mapped = 0; + + AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0]; + VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane - uint32_t p_w, p_h; const int planes = av_pix_fmt_count_planes(swf->format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format); + const int nb_images = ff_vk_count_images(hwf_vk); + static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + int nb_img_bar = 0; - int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; - const int map_host = !!(p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY); + AVBufferRef *bufs[AV_NUM_DATA_POINTERS]; + int nb_bufs = 0; + VkCommandBuffer cmd_buf; + FFVkExecContext *exec; + + /* Sanity checking */ if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); return AVERROR(EINVAL); @@ -3570,115 +3745,97 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf, if (swf->width > hwfc->width || swf->height > hwfc->height) return AVERROR(EINVAL); - /* Create buffers */ - for (int i = 0; i < planes; i++) { - size_t req_size; - - VkExternalMemoryBufferCreateInfo create_desc = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, - }; - - VkImportMemoryHostPointerInfoEXT import_desc = { - .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, - }; - - VkMemoryHostPointerPropertiesEXT p_props = { - .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, - }; - - get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); - - tmp.linesize[i] = FFABS(swf->linesize[i]); - - /* Do not map images with a negative stride */ - if (map_host && swf->linesize[i] > 0) { - size_t offs; - offs = (uintptr_t)swf->data[i] % p->hprops.minImportedHostPointerAlignment; - import_desc.pHostPointer = swf->data[i] - offs; - - /* We have to compensate for the few extra bytes of padding we - * completely ignore at the start */ - req_size = FFALIGN(offs + tmp.linesize[i] * p_h, - p->hprops.minImportedHostPointerAlignment); - - ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->act_dev, - import_desc.handleType, - import_desc.pHostPointer, - &p_props); - if (ret == VK_SUCCESS && p_props.memoryTypeBits) { - host_mapped[i] = 1; - buf_offsets[i] = offs; - } - } + /* Setup buffers first */ + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) { + err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload); + if (err >= 0) + host_mapped = 1; + } - if (!host_mapped[i]) - req_size = get_req_buffer_size(p, &tmp.linesize[i], p_h); - - err = ff_vk_create_avbuf(&p->vkctx, &bufs[i], req_size, - host_mapped[i] ? &create_desc : NULL, - host_mapped[i] ? &import_desc : NULL, - from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT : - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - (host_mapped[i] ? - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT : 0x0)); + if (!host_mapped) { + err = get_plane_buf(hwfc, &bufs[0], swf, region, upload); if (err < 0) goto end; + nb_bufs = 1; - vkbufs[i] = (FFVkBuffer *)bufs[i]->data; + if (upload) { + err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1); + if (err < 0) + goto end; + } } - if (!from) { - /* Map, copy image TO buffer (which then goes to the VkImage), unmap */ - if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) - goto end; - - for (int i = 0; i < planes; i++) { - if (host_mapped[i]) - continue; + exec = ff_vk_exec_get(&fp->upload_exec); + cmd_buf = exec->buf; - get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + ff_vk_exec_start(&p->vkctx, exec); - av_image_copy_plane(tmp.data[i], tmp.linesize[i], - (const uint8_t *)swf->data[i], swf->linesize[i], - FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), - p_h); - } + /* Prep destination Vulkan frame */ + err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_TRANSFER_BIT); + if (err < 0) + goto end; - if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) + /* No need to declare buf deps for synchronous transfers */ + if (upload) { + err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1); + if (err < 0) { + ff_vk_exec_discard_deps(&p->vkctx, exec); goto end; + } } - /* Copy buffers into/from image */ - err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets, - tmp.linesize, swf->width, swf->height, swf->format, - from); - - if (from) { - /* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */ - if ((err = ff_vk_map_buffers(&p->vkctx, vkbufs, tmp.data, planes, 0))) - goto end; + ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, + upload ? VK_ACCESS_TRANSFER_WRITE_BIT : + VK_ACCESS_TRANSFER_READ_BIT, + upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); - for (int i = 0; i < planes; i++) { - if (host_mapped[i]) - continue; + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); - get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + for (int i = 0; i < planes; i++) { + int buf_idx = FFMIN(i, (nb_bufs - 1)); + int img_idx = FFMIN(i, (nb_images - 1)); + FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data; + + uint32_t orig_stride = region[i].bufferRowLength; + region[i].bufferRowLength /= desc->comp[i].step; + region[i].imageSubresource.aspectMask = plane_aspect[(planes != nb_images) + + i*(planes != nb_images)]; + + if (upload) + vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, + hwf_vk->img[img_idx], + img_bar[img_idx].newLayout, + 1, ®ion[i]); + else + vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx], + img_bar[img_idx].newLayout, + vkbuf->buf, + 1, ®ion[i]); - av_image_copy_plane_uc_from(swf->data[i], swf->linesize[i], - (const uint8_t *)tmp.data[i], tmp.linesize[i], - FFMIN(tmp.linesize[i], FFABS(swf->linesize[i])), - p_h); - } + region[i].bufferRowLength = orig_stride; + } - if ((err = ff_vk_unmap_buffers(&p->vkctx, vkbufs, planes, 1))) - goto end; + err = ff_vk_exec_submit(&p->vkctx, exec); + if (err < 0) { + ff_vk_exec_discard_deps(&p->vkctx, exec); + } else if (!upload) { + ff_vk_exec_wait(&p->vkctx, exec); + if (!host_mapped) + err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0); } end: - for (int i = 0; i < planes; i++) + for (int i = 0; i < nb_bufs; i++) av_buffer_unref(&bufs[i]); return err; @@ -3705,7 +3862,7 @@ static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, if (src->hw_frames_ctx) return AVERROR(ENOSYS); else - return vulkan_transfer_data(hwfc, dst, src, 0); + return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1); } } @@ -3822,7 +3979,7 @@ static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, if (dst->hw_frames_ctx) return AVERROR(ENOSYS); else - return vulkan_transfer_data(hwfc, src, dst, 1); + return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0); } } -- 2.45.2.753.g447d99e1c3b _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
reply other threads:[~2024-07-19 8:00 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240719080000.44735-1-dev@lynne.ee \ --to=ffmpeg-devel@ffmpeg.org \ --cc=dev@lynne.ee \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git