From d3f2fa8e530dc94c9058149a2cee92196c7adb33 Mon Sep 17 00:00:00 2001 From: Lynne Date: Mon, 16 Jan 2023 07:23:27 +0100 Subject: [PATCH 68/72] libavcodec: add Vulkan common video decoding code --- libavcodec/Makefile | 2 +- libavcodec/vulkan_decode.c | 1135 ++++++++++++++++++++++++++++++++++++ libavcodec/vulkan_decode.h | 163 ++++++ 3 files changed, 1299 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vulkan_decode.c create mode 100644 libavcodec/vulkan_decode.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index a45c32e564..eabf4eb43e 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1285,7 +1285,7 @@ SKIPHEADERS-$(CONFIG_XVMC) += xvmc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h -SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_video.h vulkan_decode.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c new file mode 100644 index 0000000000..582968e1da --- /dev/null +++ b/libavcodec/vulkan_decode.c @@ -0,0 +1,1135 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_video.h" +#include "vulkan_decode.h" +#include "config_components.h" + +#if CONFIG_H264_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_h264_ext; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL +extern const VkExtensionProperties ff_vk_dec_hevc_ext; +#endif + +static const VkExtensionProperties *dec_ext[] = { +#if CONFIG_H264_VULKAN_HWACCEL + [AV_CODEC_ID_H264] = &ff_vk_dec_h264_ext, +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + [AV_CODEC_ID_HEVC] = &ff_vk_dec_hevc_ext, +#endif +}; + +static int vk_decode_create_view(FFVulkanDecodeContext *ctx, VkImageView *dst_view, + VkImageAspectFlags *aspect, AVVkFrame *src) +{ + VkResult ret; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(ctx->pic_format); + + VkSamplerYcbcrConversionInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, + .conversion = ctx->yuv_sampler, + }; + VkImageViewCreateInfo img_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &yuv_sampler_info, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = ctx->pic_format, + .image = src->img[0], + .components = (VkComponentMapping) { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + .levelCount = 1, + }, + }; + + ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info, + ctx->s.hwctx->alloc, dst_view); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + *aspect = aspect_mask; + + return 0; +} + +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeContext *ctx) +{ + AVFrame *avf = av_frame_alloc(); + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + if (!avf) + return NULL; + + avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref); + if (!avf->hw_frames_ctx) + av_frame_free(&avf); + avf->buf[0] = av_buffer_pool_get(dpb_frames->pool); + if (!avf->buf[0]) + av_frame_free(&avf); + avf->data[0] = avf->buf[0]->data; + + return avf; +} + +int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb) +{ + int err; + + vkpic->nb_slices = 0; + vkpic->slices_size = 0; + + /* If the decoder made a blank frame to make up for a missing ref, or the + * frame is the current frame so it's missing one, create a re-representation */ + if (vkpic->img_view_ref) + return 0; + + /* Pre-allocate slice buffer with a reasonable default */ + if (is_current) { + uint64_t min_alloc = 4096; + if (0) + min_alloc = 2*ctx->s.hprops.minImportedHostPointerAlignment; + + vkpic->slices = av_fast_realloc(NULL, &vkpic->slices_size_max, min_alloc); + if (!vkpic->slices) + return AVERROR(ENOMEM); + + if (0) + vkpic->slices_size += ctx->s.hprops.minImportedHostPointerAlignment; + } + + vkpic->dpb_frame = NULL; + vkpic->dpb_vkf = NULL; + vkpic->img_view_ref = NULL; + vkpic->img_view_out = NULL; + + if (ctx->layered_dpb && alloc_dpb) { + vkpic->img_view_ref = ctx->layered_view; + vkpic->img_aspect_ref = ctx->layered_aspect; + } else if (alloc_dpb) { + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); + + vkpic->dpb_vkf = (AVVkFrame *)vkpic->dpb_frame->data[0]; + + err = vk_decode_create_view(ctx, &vkpic->img_view_ref, + &vkpic->img_aspect_ref, + vkpic->dpb_vkf); + if (err < 0) + return err; + } + + if (!alloc_dpb || is_current) { + err = vk_decode_create_view(ctx, &vkpic->img_view_out, + &vkpic->img_aspect, + (AVVkFrame *)pic->buf[0]->data); + if (err < 0) + return err; + + if (!alloc_dpb) { + vkpic->img_view_ref = vkpic->img_view_out; + vkpic->img_aspect_ref = vkpic->img_aspect; + } + } + + return 0; +} + +int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets) +{ + static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 }; + const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0; + const int nb = *nb_slices; + uint8_t *slices; + uint32_t *slice_off; + + slice_off = av_fast_realloc(vp->slice_off, &vp->slice_off_max, + (nb + 1)*sizeof(slice_off)); + if (!slice_off) + return AVERROR(ENOMEM); + + *offsets = vp->slice_off = slice_off; + slice_off[nb] = vp->slices_size; + + slices = av_fast_realloc(vp->slices, &vp->slices_size_max, + vp->slices_size + size + startcode_len); + if (!slices) + return AVERROR(ENOMEM); + + vp->slices = slices; + + /* Startcode */ + memcpy(slices + vp->slices_size, startcode_prefix, startcode_len); + + /* Slice data */ + memcpy(slices + vp->slices_size + startcode_len, data, size); + + *nb_slices = nb + 1; + vp->nb_slices++; + vp->slices_size += startcode_len + size; + + return 0; +} + +void ff_vk_decode_flush(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = ctx->empty_session_params, + }; + VkVideoCodingControlInfoKHR decode_ctrl = { + .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, + .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl); + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + ff_vk_exec_submit(&ctx->s, exec); +} + +static void host_map_buf_free(void *opaque, uint8_t *data) +{ + FFVulkanContext *ctx = opaque; + FFVkVideoBuffer *buf = (FFVkVideoBuffer *)data; + ff_vk_free_buf(ctx, &buf->buf); + av_free(data); +} + +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]) +{ + int err; + VkResult ret; + VkCommandBuffer cmd_buf; + FFVkVideoBuffer *sd_buf; + + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Output */ + AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data; + + /* Quirks */ + const int layered_dpb = ctx->layered_dpb; + + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)vp->session_params->data; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = *par, + .referenceSlotCount = vp->decode_info.referenceSlotCount, + .pReferenceSlots = vp->decode_info.pReferenceSlots, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + AVBufferRef *sd_ref = NULL; + size_t data_size = FFALIGN(vp->slices_size, ctx->common.caps.minBitstreamBufferSizeAlignment); + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->exec_pool); + + if (ctx->exec_pool.nb_queries) { + int64_t prev_sub_res = 0; + ff_vk_exec_wait(&ctx->s, exec); + ret = ff_vk_exec_get_query(&ctx->s, exec, NULL, &prev_sub_res); + if (ret != VK_NOT_READY && ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (ret == VK_SUCCESS) + av_log(avctx, prev_sub_res < 0 ? AV_LOG_ERROR : AV_LOG_DEBUG, + "Result of previous frame decoding: %li\n", prev_sub_res); + } + + if (0) { + size_t req_size; + VkExternalMemoryBufferCreateInfo create_desc = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + .pNext = &ctx->profile_list, + }; + + VkImportMemoryHostPointerInfoEXT import_desc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + }; + + VkMemoryHostPointerPropertiesEXT p_props = { + .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, + }; + + /* Align slices pointer */ + import_desc.pHostPointer = (void *)FFALIGN((uintptr_t)vp->slices, + ctx->s.hprops.minImportedHostPointerAlignment); + + req_size = FFALIGN(data_size, + ctx->s.hprops.minImportedHostPointerAlignment); + + ret = vk->GetMemoryHostPointerPropertiesEXT(ctx->s.hwctx->act_dev, + import_desc.handleType, + import_desc.pHostPointer, + &p_props); + + if (ret == VK_SUCCESS) { + sd_buf = av_mallocz(sizeof(*sd_buf)); + if (!sd_buf) + return AVERROR(ENOMEM); + + err = ff_vk_create_buf(&ctx->s, &sd_buf->buf, req_size, + &create_desc, &import_desc, + VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) { + av_free(sd_buf); + return err; /* This shouldn't error out, unless it's critical */ + } else { + size_t neg_offs = (uint8_t *)import_desc.pHostPointer - vp->slices; + + sd_ref = av_buffer_create((uint8_t *)sd_buf, sizeof(*sd_buf), + host_map_buf_free, &ctx->s, 0); + if (!sd_ref) { + ff_vk_free_buf(&ctx->s, &sd_buf->buf); + av_free(sd_buf); + return AVERROR(ENOMEM); + } + + for (int i = 0; i < vp->nb_slices; i++) + vp->slice_off[i] -= neg_offs; + + sd_buf->mem = vp->slices; + } + } + } + + if (!sd_ref) { + err = ff_vk_video_get_buffer(&ctx->s, &ctx->common, &sd_ref, + VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, + &ctx->profile_list, data_size); + if (err < 0) + return err; + + sd_buf = (FFVkVideoBuffer *)sd_ref->data; + + /* Copy the slices data to the buffer */ + memcpy(sd_buf->mem, vp->slices, vp->slices_size); + } + + /* Flush if needed */ + if (!(sd_buf->buf.flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = sd_buf->buf.mem, + .offset = 0, + .size = FFALIGN(vp->slices_size, + ctx->s.props.properties.limits.nonCoherentAtomSize), + }; + + ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + av_buffer_unref(&sd_ref); + return AVERROR_EXTERNAL; + } + } + + vp->decode_info.srcBuffer = sd_buf->buf.buf; + vp->decode_info.srcBufferOffset = 0; + vp->decode_info.srcBufferRange = data_size; + + /* Start command buffer recording */ + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + /* Slices */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &sd_ref, 1, 0); + if (err < 0) + return err; + + /* Parameters */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->session_params, 1, 0); + if (err < 0) + return err; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + if (err < 0) + return err; + + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + pic); + if (err < 0) + return err; + + /* Output image - change layout, as it comes from a pool */ + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = vkf->access[0], + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = vkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR, + .srcQueueFamilyIndex = vkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = vp->img_aspect, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, pic, + &img_bar[nb_img_bar], &nb_img_bar); + + /* Reference for the current image, if existing and not layered */ + if (vp->dpb_frame) { + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + if (err < 0) + return err; + } + + if (!layered_dpb) { + /* All references (apart from the current) for non-layered refs */ + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + AVFrame *ref_frame = rpic[i]; + FFVulkanDecodePicture *rvp = rvkp[i]; + AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + if (err < 0) + return err; + + if (err == 0) { + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, + &rvp->sem, &rvp->sem_value, + ref); + if (err < 0) + return err; + } + + if (!rvp->dpb_frame) { + AVVkFrame *rvkf = (AVVkFrame *)ref->data; + + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = rvkf->access[0], + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR | + VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = rvkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, + .srcQueueFamilyIndex = rvkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = rvkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = rvp->img_aspect_ref, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, ref, + &img_bar[nb_img_bar], &nb_img_bar); + } + } + } else if (vp->decode_info.referenceSlotCount || + vp->img_view_out != vp->img_view_ref) { + /* Single barrier for a single layered ref */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->layered_frame, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + if (err < 0) + return err; + } + + /* Change image layout */ + vk->CmdPipelineBarrier2KHR(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Start, use parameters, decode and end decoding */ + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + + /* Start status query TODO: remove check when radv gets support */ + if (ctx->exec_pool.nb_queries) + vk->CmdBeginQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0, 0); + + vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info); + + /* End status query */ + if (ctx->exec_pool.nb_queries) + vk->CmdEndQuery(cmd_buf, ctx->exec_pool.query_pool, exec->query_idx + 0); + + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + + /* End recording and submit for execution */ + return ff_vk_exec_submit(&ctx->s, exec); +} + +void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp) +{ + FFVulkanFunctions *vk; + VkSemaphoreWaitInfo sem_wait; + + // TODO: investigate why this happens + if (!ctx) { + av_freep(&vp->slices); + av_freep(&vp->slice_off); + av_frame_free(&vp->dpb_frame); + return; + } + + vk = &ctx->s.vkfn; + + /* We do not have to lock the frame here because we're not interested + * in the actual current semaphore value, but only that it's later than + * the time we submitted the image for decoding. */ + sem_wait = (VkSemaphoreWaitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = &vp->sem, + .pValues = &vp->sem_value, + .semaphoreCount = 1, + }; + + if (vp->sem) + vk->WaitSemaphores(ctx->s.hwctx->act_dev, &sem_wait, UINT64_MAX); + + /* Free slices data + * TODO: use a pool in the decode context instead to avoid per-frame allocs. */ + av_freep(&vp->slices); + av_freep(&vp->slice_off); + + /* Destroy image view (out) */ + if (vp->img_view_out != vp->img_view_ref && vp->img_view_out) + vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_out, ctx->s.hwctx->alloc); + + /* Destroy image view (ref, unlayered) */ + if (vp->dpb_vkf && vp->img_view_ref) + vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->img_view_ref, ctx->s.hwctx->alloc); + + av_frame_free(&vp->dpb_frame); +} + +/* Since to even get decoder capabilities, we have to initialize quite a lot, + * this function does initialization and saves it to hwaccel_priv_data if + * available. */ +static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref, + int *width_align, int *height_align, + enum AVPixelFormat *pix_fmt, int *dpb_dedicate) +{ + VkResult ret; + int err, max_level, score = INT32_MAX; + const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id]; + AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; + AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; + AVVulkanDeviceContext *hwctx = device->hwctx; + enum AVPixelFormat context_format = frames->sw_format; + int context_format_was_found = 0; + int base_profile, cur_profile = avctx->profile; + + int dedicated_dpb; + int layered_dpb; + + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanExtensions local_extensions = 0x0; + FFVulkanExtensions *extensions = ctx ? &ctx->s.extensions : &local_extensions; + FFVulkanFunctions local_vk = { 0 }; + FFVulkanFunctions *vk = ctx ? &ctx->s.vkfn : &local_vk; + VkVideoCapabilitiesKHR local_caps = { 0 }; + VkVideoCapabilitiesKHR *caps = ctx ? &ctx->common.caps : &local_caps; + VkVideoDecodeCapabilitiesKHR local_dec_caps = { 0 }; + VkVideoDecodeCapabilitiesKHR *dec_caps = ctx ? &ctx->dec_caps : &local_dec_caps; + VkVideoDecodeUsageInfoKHR local_usage = { 0 }; + VkVideoDecodeUsageInfoKHR *usage = ctx ? &ctx->usage : &local_usage; + VkVideoProfileInfoKHR local_profile = { 0 }; + VkVideoProfileInfoKHR *profile = ctx ? &ctx->profile : &local_profile; + VkVideoProfileListInfoKHR local_profile_list = { 0 }; + VkVideoProfileListInfoKHR *profile_list = ctx ? &ctx->profile_list : &local_profile_list; + + VkVideoDecodeH264ProfileInfoKHR local_h264_profile = { 0 }; + VkVideoDecodeH264ProfileInfoKHR *h264_profile = ctx ? &ctx->h264_profile : &local_h264_profile; + + VkVideoDecodeH264ProfileInfoKHR local_h265_profile = { 0 }; + VkVideoDecodeH264ProfileInfoKHR *h265_profile = ctx ? &ctx->h265_profile : &local_h265_profile; + + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, + .pNext = profile_list, + }; + VkVideoDecodeH264CapabilitiesKHR h264_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, + }; + VkVideoDecodeH265CapabilitiesKHR h265_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, + }; + VkVideoFormatPropertiesKHR *ret_info; + uint32_t nb_out_fmts = 0; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!desc) + return AVERROR(EINVAL); + + if (ctx && ctx->init) + return 0; + + if (!vk_codec->decode_op) + return AVERROR(EINVAL); + + *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, + hwctx->nb_enabled_dev_extensions); + + if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); + return AVERROR(ENOSYS); + } else if (!vk_codec->decode_extension) { + av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } else if (!(vk_codec->decode_extension & *extensions)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } + + err = ff_vk_load_functions(device, vk, *extensions, 1, 1); + if (err < 0) + return err; + +repeat: + if (avctx->codec_id == AV_CODEC_ID_H264) { + base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE; + dec_caps->pNext = &h264_caps; + usage->pNext = h264_profile; + h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR; + h264_profile->stdProfileIdc = cur_profile; + h264_profile->pictureLayout = avctx->field_order == AV_FIELD_PROGRESSIVE ? + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR : + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR; + } else if (avctx->codec_id == AV_CODEC_ID_H265) { + base_profile = FF_PROFILE_HEVC_MAIN; + dec_caps->pNext = &h265_caps; + usage->pNext = h265_profile; + h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; + h265_profile->stdProfileIdc = cur_profile; + } + + usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR; + usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR; + + profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; + /* NOTE: NVIDIA's implementation fails if the USAGE hint is inserted. + * Remove this once it's fixed. */ + profile->pNext = usage->pNext; + profile->videoCodecOperation = vk_codec->decode_op; + profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); + profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); + profile->chromaBitDepth = profile->lumaBitDepth; + + profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; + profile_list->profileCount = 1; + profile_list->pProfiles = profile; + + /* Get the capabilities of the decoder for the given profile */ + caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + caps->pNext = dec_caps; + dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR; + /* dec_caps->pNext already filled in */ + + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile, + caps); + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR && + avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH && + cur_profile != base_profile) { + cur_profile = base_profile; + av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting " + "again with profile %s\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile), + avcodec_profile_name(avctx->codec_id, base_profile)); + goto repeat; + } else if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "%s profile \"%s\" not supported!\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, cur_profile)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "format (%s) not supported!\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || + ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } + + max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc : + avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc : + 0; + + if (ctx) { + av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile)); + av_log(avctx, AV_LOG_VERBOSE, " Maximum level: %i\n", + max_level); + av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", + caps->minCodedExtent.width, caps->maxCodedExtent.width); + av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", + caps->minCodedExtent.height, caps->maxCodedExtent.height); + av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", + caps->pictureAccessGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", + caps->pictureAccessGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", + caps->minBitstreamBufferOffsetAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", + caps->minBitstreamBufferSizeAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", + caps->maxDpbSlots); + av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", + caps->maxActiveReferencePictures); + av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", + CODEC_VER(caps->stdHeaderVersion.specVersion), + CODEC_VER(dec_ext[avctx->codec_id]->specVersion)); + av_log(avctx, AV_LOG_VERBOSE, " Decode modes:%s%s%s\n", + dec_caps->flags ? "" : + " invalid", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ? + " reuse_dst_dpb" : "", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ? + " dedicated_dpb" : ""); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", + caps->flags ? "" : + " none", + caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? + " protected" : "", + caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? + " separate_references" : ""); + } + + /* Check if decoding is possible with the given parameters */ + if (avctx->coded_width < caps->minCodedExtent.width || + avctx->coded_height < caps->minCodedExtent.height || + avctx->coded_width > caps->maxCodedExtent.width || + avctx->coded_height > caps->maxCodedExtent.height) + return AVERROR(EINVAL); + + if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) && + avctx->level > max_level) + return AVERROR(EINVAL); + + /* Some basic sanity checking */ + if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) { + av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n"); + return AVERROR_EXTERNAL; + } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) == + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) && + !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) { + av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set " + "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n"); + return AVERROR_EXTERNAL; + } + + /* TODO: make dedicated_dpb tunable */ + dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); + layered_dpb = !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + if (dedicated_dpb) { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + } else { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + } + + /* Get the format of the images necessary */ + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, NULL); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); + if (!ret_info) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_out_fmts; i++) + ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, ret_info); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + av_free(ret_info); + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + av_free(ret_info); + return AVERROR_EXTERNAL; + } + + if (ctx) { + ctx->dedicated_dpb = dedicated_dpb; + ctx->layered_dpb = layered_dpb; + ctx->init = 1; + } + + *pix_fmt = AV_PIX_FMT_NONE; + + av_log(avctx, AV_LOG_DEBUG, "Pixel format list for decoding:\n"); + for (int i = 0; i < nb_out_fmts; i++) { + int tmp_score; + enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format, + &tmp_score); + const AVPixFmtDescriptor *tmp_desc = av_pix_fmt_desc_get(tmp); + if (tmp == AV_PIX_FMT_NONE || !tmp_desc) + continue; + + av_log(avctx, AV_LOG_DEBUG, " %i - %s (%i), score %i\n", i, + av_get_pix_fmt_name(tmp), ret_info[i].format, tmp_score); + + if (context_format == tmp || tmp_score < score) { + if (ctx) + ctx->pic_format = ret_info[i].format; + *pix_fmt = tmp; + context_format_was_found |= context_format == tmp; + if (context_format_was_found) + break; + } + } + + if (*pix_fmt == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "No valid pixel format for decoding!\n"); + return AVERROR(EINVAL); + } + + if (width_align) + *width_align = caps->pictureAccessGranularity.width; + if (height_align) + *height_align = caps->pictureAccessGranularity.height; + if (dpb_dedicate) + *dpb_dedicate = dedicated_dpb; + + av_free(ret_info); + + av_log(avctx, AV_LOG_VERBOSE, "Chosen frames format: %s\n", + av_get_pix_fmt_name(*pix_fmt)); + + if (context_format != AV_PIX_FMT_NONE && !context_format_was_found) { + av_log(avctx, AV_LOG_ERROR, "Frames context had a pixel format set which " + "was not available for decoding into!\n"); + return AVERROR(EINVAL); + } + + return *pix_fmt == AV_PIX_FMT_NONE ? AVERROR(EINVAL) : 0; +} + +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + int err, width_align, height_align, dedicated_dpb; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames_ctx->hwctx; + + err = vulkan_decode_check_init(avctx, hw_frames_ctx, &width_align, &height_align, + &frames_ctx->sw_format, &dedicated_dpb); + if (err < 0) + return err; + + frames_ctx->width = FFALIGN(avctx->coded_width, width_align); + frames_ctx->height = FFALIGN(avctx->coded_height, height_align); + frames_ctx->format = AV_PIX_FMT_VULKAN; + + hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + + if (!dedicated_dpb) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + + return err; +} + +void ff_vk_decode_free_params(void *opaque, uint8_t *data) +{ + FFVulkanDecodeContext *ctx = opaque; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data; + vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par, + ctx->s.hwctx->alloc); + av_free(par); +} + +int ff_vk_decode_uninit(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(s, &ctx->exec_pool); + + /* Destroy layered view */ + if (ctx->layered_view) + vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc); + + /* This also frees all references from this pool */ + av_frame_free(&ctx->layered_frame); + av_buffer_unref(&ctx->dpb_hwfc_ref); + + /* Destroy parameters */ + if (ctx->empty_session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->empty_session_params, + s->hwctx->alloc); + + ff_vk_video_common_uninit(s, &ctx->common); + + vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler, + s->hwctx->alloc); + + av_buffer_pool_uninit(&ctx->tmp_pool); + + ff_vk_uninit(s); + + return 0; +} + +int ff_vk_decode_init(AVCodecContext *avctx) +{ + int err, qf, cxpos = 0, cypos = 0, nb_q = 0; + VkResult ret; + FFVulkanDecodeContext *ctx = avctx->internal->hwaccel_priv_data; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params : + avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params : + NULL, + }; + VkVideoSessionCreateInfoKHR session_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, + }; + VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, + .components = ff_comp_identity_map, + .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, + .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */ + }; + + err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN); + if (err < 0) + return err; + + s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); + s->frames = (AVHWFramesContext *)s->frames_ref->data; + s->hwfc = s->frames->hwctx; + + s->device_ref = av_buffer_ref(s->frames->device_ref); + s->device = (AVHWDeviceContext *)s->device_ref->data; + s->hwctx = s->device->hwctx; + + /* Get parameters, capabilities and final pixel/vulkan format */ + err = vulkan_decode_check_init(avctx, s->frames_ref, NULL, NULL, + &ctx->sw_format, NULL); + if (err < 0) + goto fail; + + /* Load all properties */ + err = ff_vk_load_props(s); + if (err < 0) + goto fail; + + /* Create queue context */ + qf = ff_vk_qf_init(s, &ctx->qf_dec, VK_QUEUE_VIDEO_DECODE_BIT_KHR); + + /* Check for support */ + if (!(s->video_props[qf].videoCodecOperations & + ff_vk_codec_map[avctx->codec_id].decode_op)) { + av_log(avctx, AV_LOG_ERROR, "Decoding %s not supported on the given " + "queue family %i!\n", avcodec_get_name(avctx->codec_id), qf); + return AVERROR(EINVAL); + } + + /* TODO: enable when stable and tested. */ + if (s->query_props[qf].queryResultStatusSupport) + nb_q = 1; + + /* Create decode exec context. + * 4 async contexts per thread seems like a good number. */ + err = ff_vk_exec_pool_init(s, &ctx->qf_dec, &ctx->exec_pool, 4*avctx->thread_count, + nb_q, VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR, 0, + &ctx->profile); + if (err < 0) + goto fail; + + session_create.pVideoProfile = &ctx->profile; + session_create.flags = 0x0; + session_create.queueFamilyIndex = s->hwctx->queue_family_decode_index; + session_create.maxCodedExtent = ctx->common.caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->common.caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->common.caps.maxActiveReferencePictures; + session_create.pictureFormat = ctx->pic_format; + session_create.referencePictureFormat = session_create.pictureFormat; + session_create.pStdHeaderVersion = dec_ext[avctx->codec_id]; + + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + goto fail; + + /* Get sampler */ + av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location); + yuv_sampler_info.xChromaOffset = cxpos >> 7; + yuv_sampler_info.yChromaOffset = cypos >> 7; + yuv_sampler_info.format = ctx->pic_format; + ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info, + s->hwctx->alloc, &ctx->yuv_sampler); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* If doing an out-of-place decoding, create a DPB pool */ + if (ctx->dedicated_dpb) { + AVHWFramesContext *dpb_frames; + AVVulkanFramesContext *dpb_hwfc; + + ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->device_ref); + if (!ctx->dpb_hwfc_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + + dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data; + dpb_frames->format = s->frames->format; + dpb_frames->sw_format = s->frames->sw_format; + dpb_frames->width = s->frames->width; + dpb_frames->height = s->frames->height; + + dpb_hwfc = dpb_frames->hwctx; + dpb_hwfc->create_pnext = &ctx->profile_list; + dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */ + + if (ctx->layered_dpb) + dpb_hwfc->nb_layers = ctx->common.caps.maxDpbSlots; + + err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref); + if (err < 0) + goto fail; + + if (ctx->layered_dpb) { + ctx->layered_frame = vk_get_dpb_pool(ctx); + if (!ctx->layered_frame) { + err = AVERROR(ENOMEM); + goto fail; + } + + err = vk_decode_create_view(ctx, &ctx->layered_view, &ctx->layered_aspect, + (AVVkFrame *)ctx->layered_frame->data); + if (err < 0) + goto fail; + } + } + + session_params_create.videoSession = ctx->common.session; + ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, + s->hwctx->alloc, &ctx->empty_session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ff_vk_decode_flush(avctx); + + av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n"); + + return 0; + +fail: + ff_vk_decode_uninit(avctx); + + return err; +} diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h new file mode 100644 index 0000000000..9f9676bbfa --- /dev/null +++ b/libavcodec/vulkan_decode.h @@ -0,0 +1,163 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_DECODE_H +#define AVCODEC_VULKAN_DECODE_H + +#include "decode.h" +#include "hwconfig.h" +#include "internal.h" + +#include "vulkan_video.h" + +typedef struct FFVulkanDecodeContext { + FFVulkanContext s; + FFVkVideoCommon common; + + int dedicated_dpb; /* Oddity #1 - separate DPB images */ + int layered_dpb; /* Madness #1 - layered DPB images */ + + AVBufferRef *dpb_hwfc_ref; /* Only used for dedicated_dpb */ + + AVFrame *layered_frame; /* Only used for layered_dpb */ + VkImageView layered_view; + VkImageAspectFlags layered_aspect; + + VkVideoDecodeH264ProfileInfoKHR h264_profile; + VkVideoDecodeH264ProfileInfoKHR h265_profile; + VkVideoSessionParametersKHR empty_session_params; + + VkSamplerYcbcrConversion yuv_sampler; + VkVideoDecodeUsageInfoKHR usage; + VkVideoProfileInfoKHR profile; + VkVideoDecodeCapabilitiesKHR dec_caps; + VkVideoProfileListInfoKHR profile_list; + VkFormat pic_format; + enum AVPixelFormat sw_format; + int init; + + AVBufferRef *session_params; + + FFVkQueueFamilyCtx qf_dec; + FFVkExecPool exec_pool; + + AVBufferPool *tmp_pool; /* Pool for temporary data, if needed (HEVC) */ + size_t tmp_pool_ele_size; + + uint16_t last_ref_frames_in_use; +} FFVulkanDecodeContext; + +typedef struct FFVulkanDecodePicture { + AVFrame *dpb_frame; /* Only used for out-of-place decoding. */ + AVVkFrame *dpb_vkf; /* Only used for out-of-place decoding. */ + + VkImageView img_view_ref; /* Image representation view (reference) */ + VkImageView img_view_out; /* Image representation view (output-only) */ + VkImageAspectFlags img_aspect; /* Image plane mask bits */ + VkImageAspectFlags img_aspect_ref; /* Only used for out-of-place decoding */ + + VkSemaphore sem; + uint64_t sem_value; + + /* State */ + int update_params; + AVBufferRef *session_params; + + /* Current picture */ + VkVideoPictureResourceInfoKHR ref; + VkVideoReferenceSlotInfoKHR ref_slot; + + /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */ + VkVideoPictureResourceInfoKHR refs [36]; + VkVideoReferenceSlotInfoKHR ref_slots[36]; + + /* Main decoding struct */ + AVBufferRef *params_buf; + VkVideoDecodeInfoKHR decode_info; + + /* Slice data */ + uint8_t *slices; + size_t slices_size; + unsigned int slices_size_max; + uint32_t *slice_off; + unsigned int slice_off_max; + uint32_t nb_slices; +} FFVulkanDecodePicture; + +/** + * Initialize decoder. + */ +int ff_vk_decode_init(AVCodecContext *avctx); + +/** + * Initialize hw_frames_ctx with the parameters needed to decode the stream + * using the parameters from avctx. + * + * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize + * the context. + */ +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + +/** + * Prepare a frame, creates the image view, and sets up the dpb fields. + */ +int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *ctx, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb); + +/** + * Add slice data to frame. + */ +int ff_vk_decode_add_slice(FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets); + +/** + * Decode a frame. + */ +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]); + +/** + * Free a frame and its state. + */ +void ff_vk_decode_free_frame(FFVulkanDecodeContext *ctx, FFVulkanDecodePicture *vp); + +/** + * Get an FFVkBuffer suitable for decoding from. + */ +int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf, + void *create_pNext, size_t size); + +/** + * Free VkVideoSessionParametersKHR. + */ +void ff_vk_decode_free_params(void *opaque, uint8_t *data); + +/** + * Flush decoder. + */ +void ff_vk_decode_flush(AVCodecContext *avctx); + +/** + * Free decoder. + */ +int ff_vk_decode_uninit(AVCodecContext *avctx); + +#endif /* AVCODEC_VULKAN_DECODE_H */ -- 2.39.2