From 6b5301aa29b63b90d04505c9386822b2e207a038 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 29 Dec 2022 21:16:21 +0100 Subject: [PATCH 55/97] vulkan: rewrite to support all necessary features --- libavutil/vulkan.c | 2145 ++++++++++++++++++---------------- libavutil/vulkan.h | 515 ++++---- libavutil/vulkan_functions.h | 1 + 3 files changed, 1344 insertions(+), 1317 deletions(-) diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index cb8e08e02f..9d607ee1ce 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -21,33 +23,6 @@ #include "vulkan.h" #include "vulkan_loader.h" -#if CONFIG_LIBGLSLANG -#include "vulkan_glslang.c" -#elif CONFIG_LIBSHADERC -#include "vulkan_shaderc.c" -#endif - -/* Generic macro for creating contexts which need to keep their addresses - * if another context is created. */ -#define FN_CREATING(ctx, type, shortname, array, num) \ -static av_always_inline type *create_ ##shortname(ctx *dctx) \ -{ \ - type **array, *sctx = av_mallocz(sizeof(*sctx)); \ - if (!sctx) \ - return NULL; \ - \ - array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ - if (!array) { \ - av_free(sctx); \ - return NULL; \ - } \ - \ - dctx->array = array; \ - dctx->array[dctx->num++] = sctx; \ - \ - return sctx; \ -} - const VkComponentMapping ff_comp_identity_map = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -113,15 +88,22 @@ int ff_vk_load_props(FFVulkanContext *s) uint32_t qc = 0; FFVulkanFunctions *vk = &s->vkfn; + s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, + }; + s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT, + .pNext = &s->hprops, + }; s->driver_props = (VkPhysicalDeviceDriverProperties) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + .pNext = &s->desc_buf_props, }; s->props = (VkPhysicalDeviceProperties2) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &s->driver_props, }; - vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); @@ -165,32 +147,7 @@ int ff_vk_load_props(FFVulkanContext *s) return 0; } -void ff_vk_qf_fill(FFVulkanContext *s) -{ - s->nb_qfs = 0; - - /* Simply fills in all unique queues into s->qfs */ - if (s->hwctx->queue_family_index >= 0) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; - if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; - if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && - s->qfs[1] != s->hwctx->queue_family_comp_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; - if (s->hwctx->queue_family_decode_index >= 0 && - (s->qfs[0] != s->hwctx->queue_family_decode_index && - s->qfs[1] != s->hwctx->queue_family_decode_index && - s->qfs[2] != s->hwctx->queue_family_decode_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; - if (s->hwctx->queue_family_encode_index >= 0 && - (s->qfs[0] != s->hwctx->queue_family_encode_index && - s->qfs[1] != s->hwctx->queue_family_encode_index && - s->qfs[2] != s->hwctx->queue_family_encode_index && - s->qfs[3] != s->hwctx->queue_family_encode_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; -} - -int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) +static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) { int ret, num; @@ -226,24 +183,552 @@ int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) } int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues) + VkQueueFlagBits dev_family) { - int ret; + /* Fill in queue families from context if not done yet */ + if (!s->nb_qfs) { + s->nb_qfs = 0; + + /* Simply fills in all unique queues into s->qfs */ + if (s->hwctx->queue_family_index >= 0) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; + if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; + if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && + s->qfs[1] != s->hwctx->queue_family_comp_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; + if (s->hwctx->queue_family_decode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_decode_index && + s->qfs[1] != s->hwctx->queue_family_decode_index && + s->qfs[2] != s->hwctx->queue_family_decode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; + if (s->hwctx->queue_family_encode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_encode_index && + s->qfs[1] != s->hwctx->queue_family_encode_index && + s->qfs[2] != s->hwctx->queue_family_encode_index && + s->qfs[3] != s->hwctx->queue_family_encode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; + } - ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues); + return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues)); +} - if (!nb_queues) - qf->nb_queues = qf->actual_queues; - else - qf->nb_queues = nb_queues; +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) +{ + FFVulkanFunctions *vk = &s->vkfn; - return ret; + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + + if (e->fence) { + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + } + + ff_vk_exec_discard_deps(s, e); + + av_free(e->frame_deps); + av_free(e->buf_deps); + av_free(e->queue_family_dst); + av_free(e->layout_dst); + av_free(e->access_dst); + av_free(e->frame_update); + av_free(e->frame_locked); + av_free(e->sem_sig); + av_free(e->sem_sig_val_dst); + av_free(e->sem_wait); + } + + if (pool->cmd_bufs) + vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool, + pool->pool_size, pool->cmd_bufs); + if (pool->cmd_buf_pool) + vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc); + if (pool->query_pool) + vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc); + + av_free(pool->query_data); + av_free(pool->cmd_bufs); + av_free(pool->contexts); +} + +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkCommandPoolCreateInfo cqueue_create; + VkCommandBufferAllocateInfo cbuf_create; + + atomic_init(&pool->idx, 0); + + /* Create command pool */ + cqueue_create = (VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = qf->queue_family, + }; + ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &pool->cmd_buf_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate space for command buffers */ + pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs)); + if (!pool->cmd_bufs) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Allocate command buffer */ + cbuf_create = (VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandPool = pool->cmd_buf_pool, + .commandBufferCount = nb_contexts, + }; + ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, + pool->cmd_bufs); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Query pool */ + if (nb_queries) { + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = query_create_pnext, + .queryType = query_type, + .queryCount = nb_queries*nb_contexts, + }; + ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, + s->hwctx->alloc, &pool->query_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + pool->nb_queries = nb_queries; + pool->query_status_stride = 2; + pool->query_results = nb_queries; + pool->query_statuses = 0; /* if radv supports it, nb_queries; */ + +#if CONFIG_VULKAN_ENCODE + /* Video encode quieries produce two results per query */ + if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { + pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */ + pool->query_results *= 2; + } else +#endif + if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + pool->query_status_stride = 1; + pool->query_results = 0; + pool->query_statuses = nb_queries; + } + + pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); + + /* Allocate space for the query data */ + pool->query_data = av_mallocz(nb_contexts*pool->qd_size); + if (!pool->query_data) { + err = AVERROR(ENOMEM); + goto fail; + } + } + + /* Allocate space for the contexts */ + pool->contexts = av_mallocz(nb_contexts*sizeof(*pool->contexts)); + if (!pool->contexts) { + err = AVERROR(ENOMEM); + goto fail; + } + + pool->pool_size = nb_contexts; + + /* Init contexts */ + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + + /* Fence */ + VkFenceCreateInfo fence_create = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc, + &e->fence); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + e->idx = i; + e->parent = pool; + + /* Query data */ + e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i; + e->query_idx = nb_queries*i; + + /* Command buffer */ + e->buf = pool->cmd_bufs[i]; + + /* Queue index distribution */ + e->qi = i % qf->nb_queues; + e->qf = qf->queue_family; + vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, + e->qi, &e->queue); + } + + return 0; + +fail: + ff_vk_exec_pool_free(s, pool); + return err; +} + +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + int32_t *res32 = e->query_data; + int64_t *res64 = e->query_data; + int64_t res = 0; + VkQueryResultFlags qf = 0; + + qf |= pool->query_64bit ? + VK_QUERY_RESULT_64_BIT : 0x0; + qf |= pool->query_statuses ? + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; + + ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool, + e->query_idx, + pool->nb_queries, + pool->qd_size, e->query_data, + pool->query_64bit ? 8 : 4, qf); + if (ret != VK_SUCCESS) + return ret; + + if (pool->query_statuses && pool->query_64bit) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? + res64[i] : res; + res64 += pool->query_status_stride; + } + } else if (pool->query_statuses) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? + res32[i] : res; + res32 += pool->query_status_stride; + } + } + + if (data) + *data = e->query_data; + if (status) + *status = res; + + return VK_SUCCESS; +} + +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool) +{ + int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed); + idx %= pool->pool_size; + return &pool->contexts[idx]; +} + +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); +} + +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + /* Create the fence and don't wait for it initially */ + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(s->hwctx->act_dev, 1, &e->fence); + + /* Discard queue dependencies */ + ff_vk_exec_discard_deps(s, e); + + ret = vk->BeginCommandBuffer(e->buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (pool->nb_queries) + vk->CmdResetQueryPool(e->buf, pool->query_pool, + e->query_idx, pool->nb_queries); + + return 0; +} + +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) +{ + for (int j = 0; j < e->nb_buf_deps; j++) + av_buffer_unref(&e->buf_deps[j]); + e->nb_buf_deps = 0; + + for (int j = 0; j < e->nb_frame_deps; j++) { + AVFrame *f = e->frame_deps[j]; + if (e->frame_locked[j]) { + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + e->frame_update[j] = 0; + } + if (f->buf[0]) + av_frame_free(&e->frame_deps[j]); + } + e->nb_frame_deps = 0; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; + e->sem_sig_val_dst_cnt = 0; +} + +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref) +{ + AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size, + (e->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + e->buf_deps = dst; + + for (int i = 0; i < nb_deps; i++) { + e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i]; + if (!e->buf_deps[e->nb_buf_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + e->nb_buf_deps++; + } + + return 0; +} + +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage) +{ + uint8_t *frame_locked; + uint8_t *frame_update; + AVFrame **frame_deps; + VkImageLayout *layout_dst; + uint32_t *queue_family_dst; + VkAccessFlagBits *access_dst; + + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + int nb_images = ff_vk_count_images(vkf); + + /* Don't add duplicates */ + for (int i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + return 1; + +#define ARR_REALLOC(str, arr, alloc_s, cnt) \ + do { \ + arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \ + if (!arr) { \ + ff_vk_exec_discard_deps(s, e); \ + return AVERROR(ENOMEM); \ + } \ + str->arr = arr; \ + } while (0) + + ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps); + + ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); + + e->frame_deps[e->nb_frame_deps] = f->buf[0] ? av_frame_clone(f) : f; + if (!e->frame_deps[e->nb_frame_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + vkfc->lock_frame(hwfc, vkf); + e->frame_locked[e->nb_frame_deps] = 1; + e->frame_update[e->nb_frame_deps] = 0; + e->nb_frame_deps++; + + for (int i = 0; i < nb_images; i++) { + VkSemaphoreSubmitInfo *sem_wait; + VkSemaphoreSubmitInfo *sem_sig; + uint64_t **sem_sig_val_dst; + + ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); + ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i], + .stageMask = wait_stage, + }; + + e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i] + 1, + .stageMask = signal_stage, + }; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i]; + e->sem_sig_val_dst_cnt++; + } + + return 0; +} + +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar) +{ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + av_assert0(i < e->nb_frame_deps); + + /* Don't update duplicates */ + if (nb_img_bar && !e->frame_update[i]) + (*nb_img_bar)++; + + e->queue_family_dst[i] = bar->dstQueueFamilyIndex; + e->access_dst[i] = bar->dstAccessMask; + e->layout_dst[i] = bar->newLayout; + e->frame_update[i] = 1; +} + +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f) +{ + uint64_t **sem_sig_val_dst; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + /* Reject unknown frames */ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + if (i == e->nb_frame_deps) + return AVERROR(EINVAL); + + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + *dst = vkf->sem[0]; + *dst_val = vkf->sem_value[0]; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val; + e->sem_sig_val_dst_cnt++; + + return 0; } -int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) { - qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; - return qf->cur_queue; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = e->buf, + }; + VkSubmitInfo2 submit_info = (VkSubmitInfo2) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pCommandBufferInfos = &cmd_buf_info, + .commandBufferInfoCount = 1, + .pWaitSemaphoreInfos = e->sem_wait, + .waitSemaphoreInfoCount = e->sem_wait_cnt, + .pSignalSemaphoreInfos = e->sem_sig, + .signalSemaphoreInfoCount = e->sem_sig_cnt, + }; + + ret = vk->EndCommandBuffer(e->buf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + s->hwctx->lock_queue(s->device, e->qf, e->qi); + ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence); + s->hwctx->unlock_queue(s->device, e->qf, e->qi); + + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + for (int i = 0; i < e->sem_sig_val_dst_cnt; i++) + *e->sem_sig_val_dst[i] += 1; + + /* Unlock all frames */ + for (int j = 0; j < e->nb_frame_deps; j++) { + if (e->frame_locked[j]) { + AVFrame *f = e->frame_deps[j]; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + if (e->frame_update[j]) { + int nb_images = ff_vk_count_images(vkf); + for (int i = 0; i < nb_images; i++) { + vkf->layout[i] = e->layout_dst[j]; + vkf->access[i] = e->access_dst[j]; + vkf->queue_family[i] = e->queue_family_dst[j]; + } + } + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + } + + return 0; } int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, @@ -322,6 +807,10 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, but should be ok */ }; + VkMemoryAllocateFlagsInfo alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; VkBufferMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, }; @@ -351,11 +840,18 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; - if (use_ded_mem) + if (use_ded_mem) { ded_alloc.buffer = buf->buf; + ded_alloc.pNext = alloc_pNext; + alloc_pNext = &ded_alloc; + } - err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + alloc_flags.pNext = alloc_pNext; + alloc_pNext = &alloc_flags; + } + + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext, &buf->flags, &buf->mem); if (err) return err; @@ -367,27 +863,72 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, return AVERROR_EXTERNAL; } + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buf->buf, + }; + buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); + } + buf->size = size; return 0; } -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], +static void destroy_avvkbuf(void *opaque, uint8_t *data) +{ + FFVulkanContext *s = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(s, buf); + av_free(buf); +} + +int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +{ + int err; + AVBufferRef *buf; + FFVkBuffer *vkb = av_mallocz(sizeof(*vkb)); + if (!vkb) + return AVERROR(ENOMEM); + + err = ff_vk_create_buf(s, vkb, size, pNext, alloc_pNext, usage, flags); + if (err < 0) { + av_free(vkb); + return err; + } + + buf = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), destroy_avvkbuf, s, 0); + if (!buf) { + destroy_avvkbuf(s, (uint8_t *)vkb); + return AVERROR(ENOMEM); + } + + *ref = buf; + + return 0; +} + +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], int nb_buffers, int invalidate) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *inval_list = NULL; + VkMappedMemoryRange inval_list[64]; int inval_count = 0; for (int i = 0; i < nb_buffers; i++) { - ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0, - VK_WHOLE_SIZE, 0, (void **)&mem[i]); + void *dst; + ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0, + VK_WHOLE_SIZE, 0, &dst); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } + mem[i] = dst; } if (!invalidate) @@ -396,16 +937,12 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange ival_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; - inval_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++inval_count)*sizeof(*inval_list)); - if (!inval_list) - return AVERROR(ENOMEM); - inval_list[inval_count - 1] = ival_buf; + inval_list[inval_count++] = ival_buf; } if (inval_count) { @@ -421,29 +958,25 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], return 0; } -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, int flush) { int err = 0; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *flush_list = NULL; + VkMappedMemoryRange flush_list[64]; int flush_count = 0; if (flush) { for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange flush_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; - flush_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++flush_count)*sizeof(*flush_list)); - if (!flush_list) - return AVERROR(ENOMEM); - flush_list[flush_count - 1] = flush_buf; + flush_list[flush_count++] = flush_buf; } } @@ -458,7 +991,7 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, } for (int i = 0; i < nb_buffers; i++) - vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem); + vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem); return err; } @@ -470,547 +1003,105 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) if (!buf || !s->hwctx) return; + if (buf->mapped_mem) + ff_vk_unmap_buffer(s, buf, 0); if (buf->buf != VK_NULL_HANDLE) vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); if (buf->mem != VK_NULL_HANDLE) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } -int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx, - int width, int height, VkFormat fmt, VkImageTiling tiling, - VkImageUsageFlagBits usage, VkImageCreateFlags flags, - void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext) +static void free_data_buf(void *opaque, uint8_t *data) { - int err; - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - AVVulkanDeviceContext *hwctx = s->hwctx; - - VkExportSemaphoreCreateInfo ext_sem_info = { - .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, -#ifdef _WIN32 - .handleTypes = IsWindows8OrGreater() - ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, -#else - .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, -#endif - }; + FFVulkanContext *ctx = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(ctx, buf); + av_free(data); +} - VkSemaphoreTypeCreateInfo sem_type_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, -#ifdef _WIN32 - .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, -#else - .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, -#endif - .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, - .initialValue = 0, - }; - - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &sem_type_info, - }; - - /* Create the image */ - VkImageCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = create_pnext, - .imageType = VK_IMAGE_TYPE_2D, - .format = fmt, - .extent.depth = 1, - .mipLevels = 1, - .arrayLayers = 1, - .flags = flags, - .tiling = tiling, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .usage = usage, - .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = s->qfs, - .queueFamilyIndexCount = s->nb_qfs, - .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, - }; - - ret = vk->CreateImage(hwctx->act_dev, &create_info, - hwctx->alloc, &f->img[0]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR(EINVAL); - goto fail; - } - - /* Create semaphore */ - ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[0]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0]; - f->layout[0] = create_info.initialLayout; - f->access[0] = 0x0; - f->sem_value[0] = 0; - - f->flags = 0x0; - f->tiling = tiling; - - return 0; - -fail: - return err; -} - -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage) -{ - VkPushConstantRange *pc; - - pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), - pl->push_consts_num + 1); - if (!pl->push_consts) - return AVERROR(ENOMEM); - - pc = &pl->push_consts[pl->push_consts_num++]; - memset(pc, 0, sizeof(*pc)); - - pc->stageFlags = stage; - pc->offset = offset; - pc->size = size; - - return 0; -} - -FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf) +static AVBufferRef *alloc_data_buf(void *opaque, size_t size) { - VkResult ret; - FFVkExecContext *e; - FFVulkanFunctions *vk = &s->vkfn; - - VkCommandPoolCreateInfo cqueue_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = qf->queue_family, - }; - VkCommandBufferAllocateInfo cbuf_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = qf->nb_queues, - }; - - e = create_exec_ctx(s); - if (!e) - return AVERROR(ENOMEM); - - e->qf = qf; - - e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues)); - if (!e->queues) - return AVERROR(ENOMEM); - - e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs)); - if (!e->bufs) - return AVERROR(ENOMEM); - - /* Create command pool */ - ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, - s->hwctx->alloc, &e->pool); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - cbuf_create.commandPool = e->pool; - - /* Allocate command buffer */ - ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - for (int i = 0; i < qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, - i % qf->actual_queues, &q->queue); - } - - *ctx = e; + uint8_t *buf = av_mallocz(size); + if (!buf) + return NULL; - return 0; + return av_buffer_create(buf, size, free_data_buf, opaque, 0); } -int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, - int nb_queries, VkQueryType type, - int elem_64bits, void *create_pnext) +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props) { - VkResult ret; - size_t qd_size; - int nb_results = nb_queries; - int nb_statuses = 0 /* Once RADV has support, = nb_queries */; - int status_stride = 2; - int result_elem_size = elem_64bits ? 8 : 4; - FFVulkanFunctions *vk = &s->vkfn; - VkQueryPoolCreateInfo query_pool_info = { - .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, - .pNext = create_pnext, - .queryType = type, - .queryCount = nb_queries*e->qf->nb_queues, - }; - - if (e->query.pool) - return AVERROR(EINVAL); + int err; + AVBufferRef *ref; + FFVkBuffer *data; - /* Video encode quieries produce two results per query */ - if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) { - status_stride = 3; /* skip,skip,result,skip,skip,result */ - nb_results *= 2; - } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { - status_stride = 1; - nb_results *= 0; + if (!(*buf_pool)) { + *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx, + alloc_data_buf, NULL); + if (!(*buf_pool)) + return AVERROR(ENOMEM); } - qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size; - - e->query.data = av_mallocz(e->qf->nb_queues*qd_size); - if (!e->query.data) + *buf = ref = av_buffer_pool_get(*buf_pool); + if (!ref) return AVERROR(ENOMEM); - ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, - s->hwctx->alloc, &e->query.pool); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; - - e->query.data_per_queue = qd_size; - e->query.nb_queries = nb_queries; - e->query.nb_results = nb_results; - e->query.nb_statuses = nb_statuses; - e->query.elem_64bits = elem_64bits; - e->query.status_stride = status_stride; - - return 0; -} - -int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, - int query_idx, void **data, int64_t *status) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - uint8_t *qd; - int32_t *res32; - int64_t *res64; - int64_t res = 0; - VkQueryResultFlags qf = 0; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + data = (FFVkBuffer *)ref->data; + data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + data->access = VK_ACCESS_2_NONE; - if (!q->submitted) { - *data = NULL; + if (data->size >= size) return 0; - } - qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue; - qf |= e->query.nb_results && e->query.nb_statuses ? - VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; - qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0; - res32 = (int32_t *)(qd + e->query.nb_results*4); - res64 = (int64_t *)(qd + e->query.nb_results*8); - - ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool, - query_idx, - e->query.nb_queries, - e->query.data_per_queue, qd, - e->query.elem_64bits ? 8 : 4, qf); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } + ff_vk_free_buf(ctx, data); + memset(data, 0, sizeof(*data)); - if (e->query.nb_statuses && e->query.elem_64bits) { - for (int i = 0; i < e->query.nb_queries; i++) { - res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? - res64[i] : res; - res64 += e->query.status_stride; - } - } else if (e->query.nb_statuses) { - for (int i = 0; i < e->query.nb_queries; i++) { - res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? - res32[i] : res; - res32 += e->query.status_stride; - } - } - - if (data) - *data = qd; - if (status) - *status = res; - - return 0; -} + av_log(ctx, AV_LOG_DEBUG, "Allocating buffer of %lu bytes for pool %p\n", + size, *buf_pool); -void ff_vk_discard_exec_deps(FFVkExecContext *e) -{ - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - q->nb_buf_deps = 0; - - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - q->nb_frame_deps = 0; - - e->sem_wait_cnt = 0; - e->sem_sig_cnt = 0; -} - -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - - /* Create the fence and don't wait for it initially */ - if (!q->fence) { - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; - ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc, - &q->fence); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } else if (!q->synchronous) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - q->synchronous = 0; - - /* Discard queue dependencies */ - ff_vk_discard_exec_deps(e); - - ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (e->query.pool) { - e->query.idx = e->qf->cur_queue*e->query.nb_queries; - vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool, - e->query.idx, e->query.nb_queries); + err = ff_vk_create_buf(ctx, data, size, + create_pNext, NULL, usage, + mem_props); + if (err < 0) { + av_buffer_unref(&ref); + return err; } - return 0; -} - -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e) -{ - return e->bufs[e->qf->cur_queue]; -} - -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag) -{ - AVFrame **dst; - AVVkFrame *f = (AVVkFrame *)frame->data[0]; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; - int planes = av_pix_fmt_count_planes(fc->sw_format); - - for (int i = 0; i < planes; i++) { - e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); - if (!e->sem_wait) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); - if (!e->sem_wait_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val)); - if (!e->sem_wait_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); - if (!e->sem_sig) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); + if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0); + if (err < 0) { + av_buffer_unref(&ref); + return err; } - - e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val)); - if (!e->sem_sig_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst)); - if (!e->sem_sig_val_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait[e->sem_wait_cnt] = f->sem[i]; - e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; - e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; - e->sem_wait_cnt++; - - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; - e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; - e->sem_sig_cnt++; } - dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size, - (q->nb_frame_deps + 1) * sizeof(*dst)); - if (!dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - q->frame_deps = dst; - q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame); - if (!q->frame_deps[q->nb_frame_deps]) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - q->nb_frame_deps++; - return 0; } -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pWaitSemaphoreValues = e->sem_wait_val, - .pSignalSemaphoreValues = e->sem_sig_val, - .waitSemaphoreValueCount = e->sem_wait_cnt, - .signalSemaphoreValueCount = e->sem_sig_cnt, - }; - - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, - - .commandBufferCount = 1, - .pCommandBuffers = &e->bufs[e->qf->cur_queue], - - .pWaitSemaphores = e->sem_wait, - .pWaitDstStageMask = e->sem_wait_dst, - .waitSemaphoreCount = e->sem_wait_cnt, - - .pSignalSemaphores = e->sem_sig, - .signalSemaphoreCount = e->sem_sig_cnt, - }; - - ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, - e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); - - ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); - - s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, - e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); - - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - for (int i = 0; i < e->sem_sig_cnt; i++) - *e->sem_sig_val_dst[i] += 1; - - e->query.idx = e->qf->cur_queue*e->query.nb_queries; - q->submitted = 1; - - return 0; -} - -void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - if (!q->submitted) - return; - - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - q->synchronous = 1; -} - -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps) +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage) { - AVBufferRef **dst; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - if (!deps || !nb_deps) - return 0; + VkPushConstantRange *pc; - dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, - (q->nb_buf_deps + nb_deps) * sizeof(*dst)); - if (!dst) - goto err; + pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), + pl->push_consts_num + 1); + if (!pl->push_consts) + return AVERROR(ENOMEM); - q->buf_deps = dst; + pc = &pl->push_consts[pl->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); - for (int i = 0; i < nb_deps; i++) { - q->buf_deps[q->nb_buf_deps] = deps[i]; - if (!q->buf_deps[q->nb_buf_deps]) - goto err; - q->nb_buf_deps++; - } + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; return 0; - -err: - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); } -FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num) -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, - int unnorm_coords, VkFilter filt) +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; @@ -1030,22 +1121,15 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, .unnormalizedCoordinates = unnorm_coords, }; - FFVkSampler *sctx = create_sampler(s); - if (!sctx) - return NULL; - ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, - s->hwctx->alloc, &sctx->sampler[0]); + s->hwctx->alloc, sampler); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", ff_vk_ret2str(ret)); - return NULL; + return AVERROR_EXTERNAL; } - for (int i = 1; i < 4; i++) - sctx->sampler[i] = sctx->sampler[0]; - - return sctx; + return 0; } int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) @@ -1068,79 +1152,137 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) } typedef struct ImageViewCtx { - VkImageView view; + VkImageView views[AV_NUM_DATA_POINTERS]; + int nb_views; } ImageViewCtx; -static void destroy_imageview(void *opaque, uint8_t *data) +static void destroy_imageviews(void *opaque, uint8_t *data) { FFVulkanContext *s = opaque; FFVulkanFunctions *vk = &s->vkfn; ImageViewCtx *iv = (ImageViewCtx *)data; - vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc); + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); } -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map) +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f) { int err; + VkResult ret; AVBufferRef *buf; FFVulkanFunctions *vk = &s->vkfn; - - VkImageViewCreateInfo imgview_spawn = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = NULL, - .image = img, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = fmt, - .components = map, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); ImageViewCtx *iv = av_mallocz(sizeof(*iv)); - VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn, - s->hwctx->alloc, &iv->view); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + for (int i = 0; i < nb_planes; i++) { + VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = NULL, + .image = vkf->img[FFMIN(i, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = rep_fmts[i], + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = plane_aspect[(nb_planes != nb_images) + + i*(nb_planes != nb_images)], + .levelCount = 1, + .layerCount = 1, + }, + }; + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, &iv->views[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + iv->nb_views++; } - buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0); + buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0); if (!buf) { - destroy_imageview(s, (uint8_t *)iv); - return AVERROR(ENOMEM); + err = AVERROR(ENOMEM); + goto fail; } /* Add to queue dependencies */ - err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1); - if (err) { + err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); + if (err < 0) av_buffer_unref(&buf); - return err; - } - *v = iv->view; + memcpy(views, iv->views, nb_planes*sizeof(*views)); - return 0; + return err; + +fail: + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); + return err; } -FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num) -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage) +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf) { - FFVkSPIRVShader *shd = create_shader(pl); - if (!shd) - return NULL; + int i, found; + AVVkFrame *vkf = (AVVkFrame *)pic->data[0]; + const int nb_images = ff_vk_count_images(vkf); + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == pic->data[0]) + break; + found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1; + + for (int i = 0; i < nb_images; i++) { + bar[*nb_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = src_stage, + .dstStageMask = dst_stage, + .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i], + .dstAccessMask = new_access, + .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0], + .newLayout = new_layout, + .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0], + .dstQueueFamilyIndex = new_qf, + .image = vkf->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + .levelCount = 1, + }, + }; + *nb_bar += 1; + } + + ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); +} +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage) +{ av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -1151,22 +1293,24 @@ FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, GLSLF(0, #version %i ,460); GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); GLSLC(0, ); + GLSLC(0, #extension GL_EXT_buffer_reference : require ); + GLSLC(0, #extension GL_EXT_buffer_reference2 : require ); - return shd; + return 0; } -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]) +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z) { - shd->local_size[0] = local_size[0]; - shd->local_size[1] = local_size[1]; - shd->local_size[2] = local_size[2]; + shd->local_size[0] = x; + shd->local_size[1] = y; + shd->local_size[2] = z; av_bprintf(&shd->src, "layout (local_size_x = %i, " "local_size_y = %i, local_size_z = %i) in;\n\n", shd->local_size[0], shd->local_size[1], shd->local_size[2]); } -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio) { int line = 0; const char *p = shd->src.str; @@ -1188,36 +1332,24 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) av_bprint_finalize(&buf, NULL); } -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint) +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd) +{ + FFVulkanFunctions *vk = &s->vkfn; + av_bprint_finalize(&shd->src, NULL); + + if (shd->shader.module) + vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc); +} + +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint) { - int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkShaderModuleCreateInfo shader_create; - uint8_t *spirv; - size_t spirv_size; - void *priv; shd->shader.pName = entrypoint; - if (!s->spirv_compiler) { -#if CONFIG_LIBGLSLANG - s->spirv_compiler = ff_vk_glslang_init(); -#elif CONFIG_LIBSHADERC - s->spirv_compiler = ff_vk_shaderc_init(); -#else - return AVERROR(ENOSYS); -#endif - if (!s->spirv_compiler) - return AVERROR(ENOMEM); - } - - err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv, - &spirv_size, entrypoint, &priv); - if (err < 0) - return err; - av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n", shd->name, spirv_size); @@ -1229,11 +1361,8 @@ int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, &shd->shader.module); - - s->spirv_compiler->free_shader(s->spirv_compiler, &priv); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n", + av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -1262,132 +1391,88 @@ static const struct descriptor_props { [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, }; -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader) +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only) { VkResult ret; - VkDescriptorSetLayout *layout; + int has_sampler = 0; FFVulkanFunctions *vk = &s->vkfn; + FFVulkanDescriptorSet *set; + VkDescriptorSetLayoutCreateInfo desc_create_layout; - if (only_print_to_shader) + if (print_to_shader_only) goto print; - pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), - pl->desc_layout_num + pl->qf->nb_queues); - if (!pl->desc_layout) + /* Actual layout allocated for the pipeline */ + set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set), + pl->nb_descriptor_sets + 1); + if (!set) return AVERROR(ENOMEM); + pl->desc_set = set; + set = &set[pl->nb_descriptor_sets]; + memset(set, 0, sizeof(*set)); - pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized, - sizeof(*pl->desc_set_initialized), - pl->descriptor_sets_num + 1); - if (!pl->desc_set_initialized) + set->binding = av_mallocz(nb*sizeof(*set->binding)); + if (!set->binding) return AVERROR(ENOMEM); - pl->desc_set_initialized[pl->descriptor_sets_num] = 0; - layout = &pl->desc_layout[pl->desc_layout_num]; - - { /* Create descriptor set layout descriptions */ - VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; - VkDescriptorSetLayoutBinding *desc_binding; - - desc_binding = av_mallocz(sizeof(*desc_binding)*num); - if (!desc_binding) - return AVERROR(ENOMEM); - - for (int i = 0; i < num; i++) { - desc_binding[i].binding = i; - desc_binding[i].descriptorType = desc[i].type; - desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); - desc_binding[i].stageFlags = desc[i].stages; - desc_binding[i].pImmutableSamplers = desc[i].sampler ? - desc[i].sampler->sampler : - NULL; - } - - desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - desc_create_layout.pBindings = desc_binding; - desc_create_layout.bindingCount = num; - - for (int i = 0; i < pl->qf->nb_queues; i++) { - ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, - s->hwctx->alloc, &layout[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "layout: %s\n", ff_vk_ret2str(ret)); - av_free(desc_binding); - return AVERROR_EXTERNAL; - } - } - - av_free(desc_binding); + set->binding_offset = av_mallocz(nb*sizeof(*set->binding_offset)); + if (!set->binding_offset) { + av_freep(&set->binding); + return AVERROR(ENOMEM); } - { /* Pool each descriptor by type and update pool counts */ - for (int i = 0; i < num; i++) { - int j; - for (j = 0; j < pl->pool_size_desc_num; j++) - if (pl->pool_size_desc[j].type == desc[i].type) - break; - if (j >= pl->pool_size_desc_num) { - pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, - sizeof(*pl->pool_size_desc), - ++pl->pool_size_desc_num); - if (!pl->pool_size_desc) - return AVERROR(ENOMEM); - memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); - } - pl->pool_size_desc[j].type = desc[i].type; - pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues; - } - } + desc_create_layout = (VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = nb, + .pBindings = set->binding, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + }; - { /* Create template creation struct */ - VkDescriptorUpdateTemplateCreateInfo *dt; - VkDescriptorUpdateTemplateEntry *des_entries; + for (int i = 0; i < nb; i++) { + set->binding[i].binding = i; + set->binding[i].descriptorType = desc[i].type; + set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + set->binding[i].stageFlags = desc[i].stages; + set->binding[i].pImmutableSamplers = desc[i].samplers; - /* Freed after descriptor set initialization */ - des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); - if (!des_entries) - return AVERROR(ENOMEM); + if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + has_sampler |= 1; + } - for (int i = 0; i < num; i++) { - des_entries[i].dstBinding = i; - des_entries[i].descriptorType = desc[i].type; - des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); - des_entries[i].dstArrayElement = 0; - des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; - des_entries[i].stride = descriptor_props[desc[i].type].struct_size; - } + set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + if (has_sampler) + set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - pl->desc_template_info = av_realloc_array(pl->desc_template_info, - sizeof(*pl->desc_template_info), - pl->total_descriptor_sets + pl->qf->nb_queues); - if (!pl->desc_template_info) - return AVERROR(ENOMEM); + ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, &set->layout); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - dt = &pl->desc_template_info[pl->total_descriptor_sets]; - memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues); + vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size); - for (int i = 0; i < pl->qf->nb_queues; i++) { - dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; - dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; - dt[i].descriptorSetLayout = layout[i]; - dt[i].pDescriptorUpdateEntries = des_entries; - dt[i].descriptorUpdateEntryCount = num; - } - } + set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); - pl->descriptor_sets_num++; + for (int i = 0; i < nb; i++) + vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout, + i, &set->binding_offset[i]); - pl->desc_layout_num += pl->qf->nb_queues; - pl->total_descriptor_sets += pl->qf->nb_queues; + set->read_only = read_only; + set->nb_bindings = nb; + pl->nb_descriptor_sets++; print: /* Write shader info */ - for (int i = 0; i < num; i++) { + for (int i = 0; i < nb; i++) { const struct descriptor_props *prop = &descriptor_props[desc[i].type]; - GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i); if (desc[i].mem_layout) GLSLA(", %s", desc[i].mem_layout); @@ -1412,185 +1497,268 @@ print: else if (desc[i].elems > 0) GLSLA("[%i]", desc[i].elems); - GLSLA(";\n"); + GLSLA(";"); + GLSLA("\n"); } GLSLA("\n"); return 0; } -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id) +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl) { - FFVulkanFunctions *vk = &s->vkfn; + int err; - /* If a set has never been updated, update all queues' sets. */ - if (!pl->desc_set_initialized[set_id]) { - for (int i = 0; i < pl->qf->nb_queues; i++) { - int idx = set_id*pl->qf->nb_queues + i; - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[idx], - pl->desc_template[idx], - s); - } - pl->desc_set_initialized[set_id] = 1; - return; - } + pl->desc_bind = av_mallocz(pl->nb_descriptor_sets*sizeof(*pl->desc_bind)); + if (!pl->desc_bind) + return AVERROR(ENOMEM); + + pl->bound_buffer_indices = av_mallocz(pl->nb_descriptor_sets* + sizeof(*pl->bound_buffer_indices)); + if (!pl->bound_buffer_indices) + return AVERROR(ENOMEM); - set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + int nb = set->read_only ? 1 : pool->pool_size; + + err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb, + NULL, NULL, set->usage, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + + err = ff_vk_map_buffer(s, &set->buf, &set->desc_mem, 0); + if (err < 0) + return err; + + pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, + .usage = set->usage, + .address = set->buf.address, + }; + + pl->bound_buffer_indices[i] = i; + } - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[set_id], - pl->desc_template[set_id], - s); + return 0; } -void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src) +static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanDescriptorSet *set, + int bind_idx, int array_idx, + VkDescriptorGetInfoEXT *desc_get_info, + size_t desc_size) { FFVulkanFunctions *vk = &s->vkfn; + const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx; + void *desc = set->desc_mem + /* Base */ + exec_offset + /* Execution context */ + set->binding_offset[bind_idx] + /* Descriptor binding */ + array_idx*desc_size; /* Array position */ - vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout, - stage, offset, size, src); + vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); } -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler) { - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging)); - if (!pl->desc_staging) - return AVERROR(ENOMEM); + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; - { /* Init descriptor set pool */ - VkDescriptorPoolCreateInfo pool_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .poolSizeCount = pl->pool_size_desc_num, - .pPoolSizes = pl->pool_size_desc, - .maxSets = pl->total_descriptor_sets, - }; + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + desc_get_info.data.pSampler = sampler; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, - s->hwctx->alloc, &pl->desc_pool); - av_freep(&pl->pool_size_desc); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "pool: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, + s->desc_buf_props.samplerDescriptorSize); - { /* Allocate descriptor sets */ - VkDescriptorSetAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = pl->desc_pool, - .descriptorSetCount = pl->total_descriptor_sets, - .pSetLayouts = pl->desc_layout, - }; + return 0; +} - pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set)); - if (!pl->desc_set) - return AVERROR(ENOMEM); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorImageInfo desc_img_info = { + .imageView = view, + .sampler = sampler, + .imageLayout = layout, + }; + size_t desc_size; - ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, - pl->desc_set); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + desc_get_info.data.pSampledImage = &desc_img_info; + desc_size = s->desc_buf_props.sampledImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + desc_get_info.data.pStorageImage = &desc_img_info; + desc_size = s->desc_buf_props.storageImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + desc_get_info.data.pInputAttachmentImage = &desc_img_info; + desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + desc_get_info.data.pCombinedImageSampler = &desc_img_info; + desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - { /* Finally create the pipeline layout */ - VkPipelineLayoutCreateInfo spawn_pipeline_layout = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging, - .pushConstantRangeCount = pl->push_consts_num, - .pPushConstantRanges = pl->push_consts, - }; + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) - pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i]; + return 0; +} - ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, - s->hwctx->alloc, &pl->pipeline_layout); - av_freep(&pl->push_consts); - pl->push_consts_num = 0; - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorAddressInfoEXT desc_buf_info = { + .address = addr, + .range = len, + .format = fmt, + }; + size_t desc_size; - { /* Descriptor template (for tightly packed descriptors) */ - VkDescriptorUpdateTemplateCreateInfo *dt; + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + desc_get_info.data.pUniformBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + desc_get_info.data.pStorageBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template)); - if (!pl->desc_template) - return AVERROR(ENOMEM); + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - /* Create update templates for the descriptor sets */ - for (int i = 0; i < pl->total_descriptor_sets; i++) { - dt = &pl->desc_template_info[i]; - dt->pipelineLayout = pl->pipeline_layout; - ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, - dt, s->hwctx->alloc, - &pl->desc_template[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor " - "template: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + return 0; +} - /* Free the duplicated memory used for the template entries */ - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler) +{ + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); - av_freep(&pl->desc_template_info); - } + for (int i = 0; i < nb_planes; i++) + ff_vk_set_descriptor_image(s, pl, e, set, binding, i, + views[i], layout, sampler); +} - return 0; +void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->CmdPushConstants(e->buf, pl->pipeline_layout, + stage, offset, size, src); } -FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num) -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf) +static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) { - FFVulkanPipeline *pl = create_pipeline(s); - if (pl) - pl->qf = qf; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkPipelineLayoutCreateInfo pipeline_layout_info; + + VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets* + sizeof(desc_layouts)); + if (!desc_layouts) + return AVERROR(ENOMEM); + + for (int i = 0; i < pl->nb_descriptor_sets; i++) + desc_layouts[i] = pl->desc_set[i].layout; + + /* Finally create the pipeline layout */ + pipeline_layout_info = (VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pSetLayouts = desc_layouts, + .setLayoutCount = pl->nb_descriptor_sets, + .pushConstantRangeCount = pl->push_consts_num, + .pPushConstantRanges = pl->push_consts, + }; + + ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info, + s->hwctx->alloc, &pl->pipeline_layout); + av_free(desc_layouts); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - return pl; + return 0; } -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd) { - int i; + int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkComputePipelineCreateInfo pipe = { + VkComputePipelineCreateInfo pipeline_create_info; + + err = init_pipeline_layout(s, pl); + if (err < 0) + return err; + + pipeline_create_info = (VkComputePipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, .layout = pl->pipeline_layout, + .stage = shd->shader, }; - for (i = 0; i < pl->shaders_num; i++) { - if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { - pipe.stage = pl->shaders[i]->shader; - break; - } - } - if (i == pl->shaders_num) { - av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); - return AVERROR(EINVAL); - } - - ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, + ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, + &pipeline_create_info, s->hwctx->alloc, &pl->pipeline); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", @@ -1599,157 +1767,68 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) } pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + pl->wg_size[0] = shd->local_size[0]; + pl->wg_size[1] = shd->local_size[1]; + pl->wg_size[2] = shd->local_size[2]; return 0; } -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; + VkDeviceSize offsets[1024]; - vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline); - - for (int i = 0; i < pl->descriptor_sets_num; i++) - pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; + /* Bind pipeline */ + vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); - vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point, - pl->pipeline_layout, 0, - pl->descriptor_sets_num, - (VkDescriptorSet *)pl->desc_staging, - 0, NULL); + if (pl->nb_descriptor_sets) { + for (int i = 0; i < pl->nb_descriptor_sets; i++) + offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx; - e->bound_pl = pl; -} - -static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - - /* Make sure all queues have finished executing */ - for (int i = 0; i < e->qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - - if (q->fence) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - /* Free the fence */ - if (q->fence) - vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc); - - /* Free buffer dependencies */ - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - av_free(q->buf_deps); - - /* Free frame dependencies */ - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - av_free(q->frame_deps); + /* Bind descriptor buffers */ + vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind); + /* Binding offsets */ + vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout, + 0, pl->nb_descriptor_sets, + pl->bound_buffer_indices, offsets); } - - if (e->bufs) - vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); - if (e->pool) - vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); - if (e->query.pool) - vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc); - - av_freep(&e->query.data); - av_freep(&e->bufs); - av_freep(&e->queues); - av_freep(&e->sem_sig); - av_freep(&e->sem_sig_val); - av_freep(&e->sem_sig_val_dst); - av_freep(&e->sem_wait); - av_freep(&e->sem_wait_dst); - av_freep(&e->sem_wait_val); - av_free(e); } -static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; - for (int i = 0; i < pl->shaders_num; i++) { - FFVkSPIRVShader *shd = pl->shaders[i]; - av_bprint_finalize(&shd->src, NULL); - vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, - s->hwctx->alloc); - av_free(shd); - } - - vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); - vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, - s->hwctx->alloc); + if (pl->pipeline) + vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); + if (pl->pipeline_layout) + vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, + s->hwctx->alloc); - for (int i = 0; i < pl->desc_layout_num; i++) { - if (pl->desc_template && pl->desc_template[i]) - vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], - s->hwctx->alloc); - if (pl->desc_layout && pl->desc_layout[i]) - vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + if (set->buf.mem) + ff_vk_unmap_buffer(s, &set->buf, 0); + ff_vk_free_buf(s, &set->buf); + if (set->layout) + vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout, s->hwctx->alloc); + av_free(set->binding); + av_free(set->binding_offset); } - /* Also frees the descriptor sets */ - if (pl->desc_pool) - vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, - s->hwctx->alloc); - - av_freep(&pl->desc_staging); av_freep(&pl->desc_set); - av_freep(&pl->shaders); - av_freep(&pl->desc_layout); - av_freep(&pl->desc_template); - av_freep(&pl->desc_set_initialized); + av_freep(&pl->desc_bind); av_freep(&pl->push_consts); pl->push_consts_num = 0; - - /* Only freed in case of failure */ - av_freep(&pl->pool_size_desc); - if (pl->desc_template_info) { - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } - av_freep(&pl->desc_template_info); - } - - av_free(pl); } void ff_vk_uninit(FFVulkanContext *s) { - FFVulkanFunctions *vk = &s->vkfn; - av_freep(&s->query_props); av_freep(&s->qf_props); av_freep(&s->video_props); - if (s->spirv_compiler) - s->spirv_compiler->uninit(&s->spirv_compiler); - - for (int i = 0; i < s->exec_ctx_num; i++) - free_exec_ctx(s, s->exec_ctx[i]); - av_freep(&s->exec_ctx); - - for (int i = 0; i < s->samplers_num; i++) { - vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0], - s->hwctx->alloc); - av_free(s->samplers[i]); - } - av_freep(&s->samplers); - - for (int i = 0; i < s->pipelines_num; i++) - free_pipeline(s, s->pipelines[i]); - av_freep(&s->pipelines); - - av_freep(&s->scratch); - s->scratch_size = 0; - - av_buffer_unref(&s->device_ref); av_buffer_unref(&s->frames_ref); } diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 3f887a782e..b0921810c6 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -21,6 +21,8 @@ #define VK_NO_PROTOTYPES +#include + #include "pixdesc.h" #include "bprint.h" #include "hwcontext.h" @@ -28,11 +30,6 @@ #include "hwcontext_vulkan.h" #include "vulkan_loader.h" -#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \ - VK_IMAGE_USAGE_STORAGE_BIT | \ - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \ - VK_IMAGE_USAGE_TRANSFER_DST_BIT) - /* GLSL management macros */ #define INDENT(N) INDENT_##N #define INDENT_0 @@ -57,6 +54,8 @@ goto fail; \ } while (0) +#define DUP_SAMPLER(x) { x, x, x, x } + typedef struct FFVkSPIRVShader { const char *name; /* Name for id/debugging purposes */ AVBPrint src; @@ -64,19 +63,6 @@ typedef struct FFVkSPIRVShader { VkPipelineShaderStageCreateInfo shader; } FFVkSPIRVShader; -typedef struct FFVkSPIRVCompiler { - void *priv; - int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, - struct FFVkSPIRVShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, void **opaque); - void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); - void (*uninit)(struct FFVkSPIRVCompiler **ctx); -} FFVkSPIRVCompiler; - -typedef struct FFVkSampler { - VkSampler sampler[4]; -} FFVkSampler; - typedef struct FFVulkanDescriptorSetBinding { const char *name; VkDescriptorType type; @@ -86,8 +72,7 @@ typedef struct FFVulkanDescriptorSetBinding { uint32_t dimensions; /* Needed for e.g. sampler%iD */ uint32_t elems; /* 0 - scalar, 1 or more - vector */ VkShaderStageFlags stages; - FFVkSampler *sampler; /* Sampler to use for all elems */ - void *updater; /* Pointer to VkDescriptor*Info */ + VkSampler samplers[4]; /* Sampler to use for all elems */ } FFVulkanDescriptorSetBinding; typedef struct FFVkBuffer { @@ -95,119 +80,133 @@ typedef struct FFVkBuffer { VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; size_t size; + VkDeviceAddress address; + + /* Local use only */ + VkPipelineStageFlags2 stage; + VkAccessFlags2 access; + + /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE */ + uint8_t *mapped_mem; } FFVkBuffer; typedef struct FFVkQueueFamilyCtx { int queue_family; int nb_queues; - int cur_queue; - int actual_queues; } FFVkQueueFamilyCtx; -typedef struct FFVulkanPipeline { - FFVkQueueFamilyCtx *qf; +typedef struct FFVulkanDescriptorSet { + VkDescriptorSetLayout layout; + FFVkBuffer buf; + uint8_t *desc_mem; + VkDeviceSize layout_size; + VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ + VkDeviceSize total_size; /* Once registered to an exec context */ + VkBufferUsageFlags usage; + + VkDescriptorSetLayoutBinding *binding; + VkDeviceSize *binding_offset; + int nb_bindings; + int read_only; +} FFVulkanDescriptorSet; + +typedef struct FFVulkanPipeline { VkPipelineBindPoint bind_point; /* Contexts */ VkPipelineLayout pipeline_layout; VkPipeline pipeline; - /* Shaders */ - FFVkSPIRVShader **shaders; - int shaders_num; - /* Push consts */ VkPushConstantRange *push_consts; int push_consts_num; + /* Workgroup */ + int wg_size[3]; + /* Descriptors */ - VkDescriptorSetLayout *desc_layout; - VkDescriptorPool desc_pool; - VkDescriptorSet *desc_set; -#if VK_USE_64_BIT_PTR_DEFINES == 1 - void **desc_staging; -#else - uint64_t *desc_staging; -#endif - VkDescriptorSetLayoutBinding **desc_binding; - VkDescriptorUpdateTemplate *desc_template; - int *desc_set_initialized; - int desc_layout_num; - int descriptor_sets_num; - int total_descriptor_sets; - int pool_size_desc_num; - - /* Temporary, used to store data in between initialization stages */ - VkDescriptorUpdateTemplateCreateInfo *desc_template_info; - VkDescriptorPoolSize *pool_size_desc; + FFVulkanDescriptorSet *desc_set; + VkDescriptorBufferBindingInfoEXT *desc_bind; + uint32_t *bound_buffer_indices; + int nb_descriptor_sets; } FFVulkanPipeline; -typedef struct FFVkQueueCtx { - VkFence fence; +typedef struct FFVkExecContext { + int idx; + const struct FFVkExecPool *parent; + + /* Queue for the execution context */ VkQueue queue; + int qf; + int qi; + + /* Command buffer for the context */ + VkCommandBuffer buf; + + /* Fence for the command buffer */ + VkFence fence; - int synchronous; - int submitted; + void *query_data; + int query_idx; /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; - int buf_deps_alloc_size; + unsigned int buf_deps_alloc_size; /* Frame dependencies */ AVFrame **frame_deps; + unsigned int frame_deps_alloc_size; int nb_frame_deps; - int frame_deps_alloc_size; -} FFVkQueueCtx; - -typedef struct FFVkExecContext { - FFVkQueueFamilyCtx *qf; - VkCommandPool pool; - VkCommandBuffer *bufs; - FFVkQueueCtx *queues; - - struct { - int idx; - VkQueryPool pool; - uint8_t *data; - - int nb_queries; - int nb_results; - int nb_statuses; - int elem_64bits; - size_t data_per_queue; - int status_stride; - } query; + VkSemaphoreSubmitInfo *sem_wait; + unsigned int sem_wait_alloc; + int sem_wait_cnt; - AVBufferRef ***deps; - int *nb_deps; - int *dep_alloc_size; + VkSemaphoreSubmitInfo *sem_sig; + unsigned int sem_sig_alloc; + int sem_sig_cnt; - FFVulkanPipeline *bound_pl; + uint64_t **sem_sig_val_dst; + unsigned int sem_sig_val_dst_alloc; + int sem_sig_val_dst_cnt; - VkSemaphore *sem_wait; - int sem_wait_alloc; /* Allocated sem_wait */ - int sem_wait_cnt; + uint8_t *frame_locked; + unsigned int frame_locked_alloc_size; - uint64_t *sem_wait_val; - int sem_wait_val_alloc; + VkAccessFlagBits *access_dst; + unsigned int access_dst_alloc; - VkPipelineStageFlagBits *sem_wait_dst; - int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ + VkImageLayout *layout_dst; + unsigned int layout_dst_alloc; - VkSemaphore *sem_sig; - int sem_sig_alloc; /* Allocated sem_sig */ - int sem_sig_cnt; + uint32_t *queue_family_dst; + unsigned int queue_family_dst_alloc; - uint64_t *sem_sig_val; - int sem_sig_val_alloc; - - uint64_t **sem_sig_val_dst; - int sem_sig_val_dst_alloc; + uint8_t *frame_update; + unsigned int frame_update_alloc_size; } FFVkExecContext; +typedef struct FFVkExecPool { + FFVkQueueFamilyCtx *qf; + FFVkExecContext *contexts; + atomic_int_least64_t idx; + + VkCommandPool cmd_buf_pool; + VkCommandBuffer *cmd_bufs; + int pool_size; + + VkQueryPool query_pool; + void *query_data; + int query_results; + int query_statuses; + int query_64bit; + int query_status_stride; + int nb_queries; + size_t qd_size; +} FFVkExecPool; + typedef struct FFVulkanContext { const AVClass *class; /* Filters and encoders use this */ @@ -216,14 +215,16 @@ typedef struct FFVulkanContext { VkPhysicalDeviceProperties2 props; VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; + VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; VkQueueFamilyVideoPropertiesKHR *video_props; VkQueueFamilyProperties2 *qf_props; - AVBufferRef *device_ref; AVHWDeviceContext *device; AVVulkanDeviceContext *hwctx; + AVBufferRef *input_frames_ref; AVBufferRef *frames_ref; AVHWFramesContext *frames; AVVulkanFramesContext *hwfc; @@ -231,28 +232,11 @@ typedef struct FFVulkanContext { uint32_t qfs[5]; int nb_qfs; - FFVkSPIRVCompiler *spirv_compiler; - /* Properties */ int output_width; int output_height; enum AVPixelFormat output_format; enum AVPixelFormat input_format; - - /* Samplers */ - FFVkSampler **samplers; - int samplers_num; - - /* Exec contexts */ - FFVkExecContext **exec_ctx; - int exec_ctx_num; - - /* Pipelines (each can have 1 shader of each type) */ - FFVulkanPipeline **pipelines; - int pipelines_num; - - void *scratch; /* Scratch memory used only in functions */ - unsigned int scratch_size; } FFVulkanContext; /* Identity mapping - r = r, b = b, g = g, a = a */ @@ -264,244 +248,207 @@ extern const VkComponentMapping ff_comp_identity_map; const char *ff_vk_ret2str(VkResult res); /** - * Loads props/mprops/driver_props - */ -int ff_vk_load_props(FFVulkanContext *s); - -/** - * Returns 1 if the image is any sort of supported RGB + * Returns 1 if pixfmt is a usable RGB format. */ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); /** - * Gets the glsl format string for a pixel format + * Returns the format to use for images in shaders. */ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); /** - * Setup the queue families from the hardware device context. - * Necessary for image creation to work. - */ -void ff_vk_qf_fill(FFVulkanContext *s); - -/** - * Allocate device memory. - */ -int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); - -/** - * Get a queue family index and the number of queues. nb is optional. + * Loads props/mprops/driver_props */ -int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb); +int ff_vk_load_props(FFVulkanContext *s); /** - * Initialize a queue family with a specific number of queues. - * If nb_queues == 0, use however many queues the queue family has. + * Chooses a QF and loads it into a context. */ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues); - -/** - * Rotate through the queues in a queue family. - */ -int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); - -/** - * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() - */ -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords, - VkFilter filt); + VkQueueFlagBits dev_family); /** - * Create an imageview. - * Guaranteed to remain alive until the queue submission has finished executing, - * and will be destroyed after that. + * Allocates/frees an execution pool. + * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add() + * has been called. */ -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map); +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext); +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool); /** - * Define a push constant for a given stage into a pipeline. - * Must be called before the pipeline layout has been initialized. + * Retrieve an execution pool. Threadsafe. */ -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage); +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool); /** - * Inits a pipeline. Everything in it will be auto-freed when calling - * ff_vk_filter_uninit(). + * Performs nb_queries queries and returns their results and statuses. + * Execution must have been waited on to produce valid results. */ -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf); +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status); /** - * Inits a shader for a specific pipeline. Will be auto-freed on uninit. + * Start/submit/wait an execution. + * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait() + * is not necessary (unless using it is just better). */ -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage); +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e); +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e); /** - * Writes the workgroup size for a shader. + * Execution dependency management. + * Can attach buffers to executions that will only be unref'd once the + * buffer has finished executing. + * Adding a frame dep will *lock the frame*, until either the dependencies + * are discarded, the execution is submitted, or a failure happens. + * update_frame will update the frame's properties before it is unlocked, + * only if submission was successful. */ -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]); +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref); +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage); +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar); +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f); +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); /** - * Adds a descriptor set to the shader and registers them in the pipeline. + * Create an imageview and add it as a dependency to an execution. */ -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader); +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f); -/** - * Compiles the shader, entrypoint must be set to "main". - */ -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint); +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf); /** - * Pretty print shader, mainly used by shader compilers. + * Memory/buffer/image allocation helpers. */ -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio); - -/** - * Initializes the pipeline layout after all shaders and descriptor sets have - * been finished. - */ -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl); +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); +int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** - * Initializes a compute pipeline. Will pick the first shader with the - * COMPUTE flag set. + * Buffer management code. */ -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl); +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], + int nb_buffers, int invalidate); +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, + int flush); -/** - * Updates a descriptor set via the updaters defined. - * Can be called immediately after pipeline creation, but must be called - * at least once before queue submission. - */ -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id); +static inline int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, + int invalidate) +{ + return ff_vk_map_buffers(s, (FFVkBuffer *[]){ buf }, mem, + 1, invalidate); +} -/** - * Init an execution context for command recording and queue submission. - * WIll be auto-freed on uninit. - */ -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf); +static inline int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush) +{ + return ff_vk_unmap_buffers(s, (FFVkBuffer *[]){ buf }, 1, flush); +} -/** - * Create a query pool for a command context. - * elem_64bits exists to troll driver devs for compliance. All results - * and statuses returned should be 32 bits, unless this is set, then it's 64bits. - */ -int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, - int nb_queries, VkQueryType type, - int elem_64bits, void *create_pnext); +void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); -/** - * Get results for query. - * Returns the status of the query. - * Sets *res to the status of the queries. - */ -int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, - int query_idx, void **data, int64_t *status); +/** Initialize a pool and create AVBufferRefs containing FFVkBuffer. + * Threadsafe to use. Buffers are automatically mapped on creation if + * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT is set in mem_props. Users should + * synchronize access themselvesd. Mainly meant for device-local buffers. */ +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props); /** - * Begin recording to the command buffer. Previous execution must have been - * completed, which ff_vk_submit_exec_queue() will ensure. + * Create a sampler. */ -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt); /** - * Add a command to bind the completed pipeline and its descriptor sets. - * Must be called after ff_vk_start_exec_recording() and before submission. + * Shader management. */ -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanPipeline *pl); +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage); +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z); +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio); +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint); +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd); /** - * Updates push constants. - * Must be called after binding a pipeline if any push constants were defined. + * Add/update push constants for execution. */ +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage); void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src); + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src); /** - * Gets the command buffer to use for this submission from the exe context. + * Add descriptor to a pipeline. Must be called before pipeline init. */ -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e); +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only); -/** - * Adds a generic AVBufferRef as a queue depenency. - */ -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps); - -/** - * Discards all queue dependencies - */ -void ff_vk_discard_exec_deps(FFVkExecContext *e); +/* Initialize/free a pipeline. */ +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd); +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl); /** - * Adds a frame as a queue dependency. This also manages semaphore signalling. - * Must be called before submission. + * Register a pipeline with an exec pool. + * Pool may be NULL if all descriptor sets are read-only. */ -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag); +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl); -/** - * Submits a command buffer to the queue for execution. Will not block. - */ -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Wait on a command buffer's execution. Mainly useful for debugging and - * development. - */ -void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Create a VkBuffer with the specified parameters. - */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, - void *pNext, void *alloc_pNext, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); - -/** - * Maps the buffer to userspace. Set invalidate to 1 if reading the contents - * is necessary. - */ -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], - int nb_buffers, int invalidate); - -/** - * Unmaps the buffer from userspace. Set flush to 1 to write and sync. - */ -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, - int flush); +/* Bind pipeline */ +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl); -/** - * Frees a buffer. - */ -void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); +/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */ +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler); +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt); -/** - * Creates an image, allocates and binds memory in the given - * idx value of the dst frame. If mem is non-NULL, then no memory will be - * allocated, but instead the given memory will be bound to the image. - */ -int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx, - int width, int height, VkFormat fmt, VkImageTiling tiling, - VkImageUsageFlagBits usage, VkImageCreateFlags flags, - void *create_pnext, - VkDeviceMemory *mem, void *alloc_pnext); +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler); /** - * Frees the main Vulkan context. + * Frees main context. */ void ff_vk_uninit(FFVulkanContext *s); diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 212681d475..24b096af10 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -94,6 +94,7 @@ typedef enum FFVulkanExtensions { /* Queue */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetDeviceQueue) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit2) \ \ /* Fences */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateFence) \ -- 2.40.0