From: Lynne via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: Lynne <dev@lynne.ee> Subject: [FFmpeg-devel] [PATCH 03/13] hwcontext_vulkan: rewrite queue picking system for the new API Date: Wed, 7 Aug 2024 23:33:28 +0200 Message-ID: <20240807213347.917235-3-dev@lynne.ee> (raw) In-Reply-To: <20240807213347.917235-1-dev@lynne.ee> This allows us to support different video ops on different queues, as well as any other arbitrary queues we need. --- libavutil/hwcontext_vulkan.c | 262 ++++++++++++++++++++++------------- 1 file changed, 167 insertions(+), 95 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 33d856ddd3..5baf68660a 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -1028,16 +1028,51 @@ end: } /* Picks the least used qf with the fewest unneeded flags, or -1 if none found */ -static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf, +static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf, VkQueueFlagBits flags) { int index = -1; uint32_t min_score = UINT32_MAX; for (int i = 0; i < num_qf; i++) { - const VkQueueFlagBits qflags = qf[i].queueFlags; + VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags; + + /* Per the spec, reporting transfer caps is optional for these 2 types */ + if ((flags & VK_QUEUE_TRANSFER_BIT) && + (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT))) + qflags |= VK_QUEUE_TRANSFER_BIT; + if (qflags & flags) { - uint32_t score = av_popcount(qflags) + qf[i].timestampValidBits; + uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits; + if (score < min_score) { + index = i; + min_score = score; + } + } + } + + if (index > -1) + qf[index].queueFamilyProperties.timestampValidBits++; + + return index; +} + +static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf, + VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf, + VkVideoCodecOperationFlagBitsKHR flags) +{ + int index = -1; + uint32_t min_score = UINT32_MAX; + + for (int i = 0; i < num_qf; i++) { + const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags; + const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations; + + if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR))) + continue; + + if (vflags & flags) { + uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits; if (score < min_score) { index = i; min_score = score; @@ -1046,7 +1081,7 @@ static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf } if (index > -1) - qf[index].timestampValidBits++; + qf[index].queueFamilyProperties.timestampValidBits++; return index; } @@ -1054,12 +1089,12 @@ static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) { uint32_t num; - float *weights; - VkQueueFamilyProperties *qf = NULL; VulkanDevicePriv *p = ctx->hwctx; AVVulkanDeviceContext *hwctx = &p->p; FFVulkanFunctions *vk = &p->vkctx.vkfn; - int graph_index, comp_index, tx_index, enc_index, dec_index; + + VkQueueFamilyProperties2 *qf = NULL; + VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL; /* First get the number of queue families */ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); @@ -1069,118 +1104,155 @@ static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) } /* Then allocate memory */ - qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties)); + qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2)); if (!qf) return AVERROR(ENOMEM); + qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR)); + if (!qf_vid) + return AVERROR(ENOMEM); + + for (uint32_t i = 0; i < num; i++) { + qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, + }; + qf[i] = (VkQueueFamilyProperties2) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, + .pNext = &qf_vid[i], + }; + } + /* Finally retrieve the queue families */ - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qf); + vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf); av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n"); for (int i = 0; i < num; i++) { av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s (queues: %i)\n", i, - ((qf[i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "", - ((qf[i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "", - ((qf[i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "", - ((qf[i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "", - ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "", - ((qf[i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "", - ((qf[i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "", - qf[i].queueCount); + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "", + qf[i].queueFamilyProperties.queueCount); /* We use this field to keep a score of how many times we've used that * queue family in order to make better choices. */ - qf[i].timestampValidBits = 0; + qf[i].queueFamilyProperties.timestampValidBits = 0; } + hwctx->nb_qf = 0; + /* Pick each queue family to use */ - graph_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT); - comp_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT); - tx_index = pick_queue_family(qf, num, VK_QUEUE_TRANSFER_BIT); - enc_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); - dec_index = pick_queue_family(qf, num, VK_QUEUE_VIDEO_DECODE_BIT_KHR); +#define PICK_QF(type, vid_op) \ + do { \ + uint32_t i; \ + uint32_t idx; \ + \ + if (vid_op) \ + idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \ + else \ + idx = pick_queue_family(qf, num, type); \ + \ + if (idx == -1) \ + continue; \ + \ + for (i = 0; i < hwctx->nb_qf; i++) { \ + if (hwctx->qf[i].idx == idx) { \ + hwctx->qf[i].flags |= type; \ + hwctx->qf[i].video_caps |= vid_op; \ + break; \ + } \ + } \ + if (i == hwctx->nb_qf) { \ + hwctx->qf[i].idx = idx; \ + hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \ + hwctx->qf[i].flags = type; \ + hwctx->qf[i].video_caps = vid_op; \ + hwctx->nb_qf++; \ + } \ + } while (0) + + PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); + PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); + PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); + + PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR); + PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR); + + PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR); + PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR); + + PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); + + av_free(qf); + av_free(qf_vid); + +#undef PICK_QF + + cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf, + sizeof(VkDeviceQueueCreateInfo)); + if (!cd->pQueueCreateInfos) + return AVERROR(ENOMEM); + + for (uint32_t i = 0; i < hwctx->nb_qf; i++) { + int dup = 0; + float *weights = NULL; + VkDeviceQueueCreateInfo *pc; + for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) { + if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) { + dup = 1; + break; + } + } + if (dup) + continue; + + weights = av_malloc_array(hwctx->qf[i].num, sizeof(float)); + if (!weights) { + for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) + av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities); + av_free((void *)cd->pQueueCreateInfos); + return AVERROR(ENOMEM); + } + + for (uint32_t j = 0; j < hwctx->qf[i].num; j++) + weights[j] = 1.0; - /* Signalling the transfer capabilities on a queue family is optional */ - if (tx_index < 0) { - tx_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT); - if (tx_index < 0) - tx_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT); + pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos; + pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueFamilyIndex = hwctx->qf[i].idx, + .queueCount = hwctx->qf[i].num, + .pQueuePriorities = weights, + }; } + /* Setup deprecated fields */ hwctx->queue_family_index = -1; hwctx->queue_family_comp_index = -1; hwctx->queue_family_tx_index = -1; hwctx->queue_family_encode_index = -1; hwctx->queue_family_decode_index = -1; -#define SETUP_QUEUE(qf_idx) \ - if (qf_idx > -1) { \ - int fidx = qf_idx; \ - int qc = qf[fidx].queueCount; \ - VkDeviceQueueCreateInfo *pc; \ - \ - if (fidx == graph_index) { \ - hwctx->queue_family_index = fidx; \ - hwctx->nb_graphics_queues = qc; \ - graph_index = -1; \ - } \ - if (fidx == comp_index) { \ - hwctx->queue_family_comp_index = fidx; \ - hwctx->nb_comp_queues = qc; \ - comp_index = -1; \ - } \ - if (fidx == tx_index) { \ - hwctx->queue_family_tx_index = fidx; \ - hwctx->nb_tx_queues = qc; \ - tx_index = -1; \ - } \ - if (fidx == enc_index) { \ - hwctx->queue_family_encode_index = fidx; \ - hwctx->nb_encode_queues = qc; \ - enc_index = -1; \ - } \ - if (fidx == dec_index) { \ - hwctx->queue_family_decode_index = fidx; \ - hwctx->nb_decode_queues = qc; \ - dec_index = -1; \ - } \ - \ - pc = av_realloc((void *)cd->pQueueCreateInfos, \ - sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \ - if (!pc) { \ - av_free(qf); \ - return AVERROR(ENOMEM); \ - } \ - cd->pQueueCreateInfos = pc; \ - pc = &pc[cd->queueCreateInfoCount]; \ - \ - weights = av_malloc(qc * sizeof(float)); \ - if (!weights) { \ - av_free(qf); \ - return AVERROR(ENOMEM); \ - } \ - \ - memset(pc, 0, sizeof(*pc)); \ - pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \ - pc->queueFamilyIndex = fidx; \ - pc->queueCount = qc; \ - pc->pQueuePriorities = weights; \ - \ - for (int i = 0; i < qc; i++) \ - weights[i] = 1.0f / qc; \ - \ - cd->queueCreateInfoCount++; \ - } - - SETUP_QUEUE(graph_index) - SETUP_QUEUE(comp_index) - SETUP_QUEUE(tx_index) - SETUP_QUEUE(enc_index) - SETUP_QUEUE(dec_index) - -#undef SETUP_QUEUE +#define SET_OLD_QF(field, nb_field, type) \ + do { \ + if (field < 0 && hwctx->qf[i].flags & type) { \ + field = hwctx->qf[i].idx; \ + nb_field = hwctx->qf[i].num; \ + } \ + } while (0) - av_free(qf); + for (uint32_t i = 0; i < hwctx->nb_qf; i++) { + SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT); + SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT); + SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT); + SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); + SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR); + } + +#undef SET_OLD_QF return 0; } -- 2.45.2.753.g447d99e1c3b _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-08-07 21:34 UTC|newest] Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-08-07 21:33 [FFmpeg-devel] [PATCH 01/13] hwcontext_vulkan: add a new mechanism to expose used queue families Lynne via ffmpeg-devel 2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 02/13] vulkan: use the new queue family mechanism Lynne via ffmpeg-devel 2024-08-07 21:33 ` Lynne via ffmpeg-devel [this message] 2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 04/13] hwcontext_vulkan: initialize optical flow queues if available Lynne via ffmpeg-devel 2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 05/13] vulkan_video: remove NIH pooled buffer implementation Lynne via ffmpeg-devel 2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 06/13] hwcontext_vulkan: remove unused struct Lynne via ffmpeg-devel 2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 07/13] hwcontext_vulkan: rewrite upload/download Lynne via ffmpeg-devel 2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 08/13] hwcontext_vulkan: constify validation layer features table Lynne via ffmpeg-devel 2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 09/13] vulkan_shaderc: fix error reporting for certain errors Lynne via ffmpeg-devel 2024-08-07 21:33 ` [FFmpeg-devel] [PATCH 10/13] hwcontext_vulkan: enable storageBuffer16BitAccess if available Lynne via ffmpeg-devel 2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 11/13] vulkan_shaderc: add debug information to shaders Lynne via ffmpeg-devel 2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 12/13] hwcontext_vulkan: add support for VK_EXT_shader_object Lynne via ffmpeg-devel 2024-08-07 21:35 ` [FFmpeg-devel] [PATCH 13/13] vulkan: use allocator callback for buffer creation Lynne via ffmpeg-devel
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240807213347.917235-3-dev@lynne.ee \ --to=ffmpeg-devel@ffmpeg.org \ --cc=dev@lynne.ee \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git