From: Jamaika1 via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: Jamaika1 <code@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] Changing-vulkan-file-directory (PR #20417) Message-ID: <175691058908.25.12192413182319126055@463a07221176> (raw) PR #20417 opened by Jamaika1 URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20417 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20417.patch Testing vulkan in msys2 ucrt gcc/mingw. From 5501cb44351dde57d8ad9412f3106d3da3f1c968 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:41:53 +0000 Subject: [PATCH 001/118] Changing vulkan file directory Signed-off-by: Jamaika1 <lukaszcz18@wp.pl> --- libavfilter/vf_avgblur_vulkan.c | 259 -------------------------------- 1 file changed, 259 deletions(-) delete mode 100644 libavfilter/vf_avgblur_vulkan.c diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c deleted file mode 100644 index 156278dd78..0000000000 --- a/libavfilter/vf_avgblur_vulkan.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/vulkan_spirv.h" -#include "libavutil/opt.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "video.h" - -typedef struct AvgBlurVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - - /* Push constants / options */ - struct { - float filter_norm[4]; - int32_t filter_len[2]; - } opts; - - int size_x; - int size_y; - int planes; -} AvgBlurVulkanContext; - -static const char blur_kernel[] = { - C(0, void distort(const ivec2 pos, const int idx) ) - C(0, { ) - C(1, vec4 sum = vec4(0); ) - C(1, for (int y = -filter_len.y; y <= filter_len.y; y++) ) - C(1, for (int x = -filter_len.x; x <= filter_len.x; x++) ) - C(2, sum += imageLoad(input_img[idx], pos + ivec2(x, y)); ) - C(0, ) - C(1, imageStore(output_img[idx], pos, sum * filter_norm); ) - C(0, } ) -}; - -static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - AvgBlurVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "avgblur", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 1, 1, - 0)); - shd = &s->shd; - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 2, 0, 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, vec4 filter_norm; ); - GLSLC(1, ivec2 filter_len; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), - VK_SHADER_STAGE_COMPUTE_BIT); - - GLSLD( blur_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, vec4 res; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (!IS_WITHIN(pos, size)) ); - GLSLC(2, return; ); - if (s->planes & (1 << i)) { - GLSLF(1, distort(pos, %i); ,i); - } else { - GLSLF(1, res = imageLoad(input_img[%i], pos); ,i); - GLSLF(1, imageStore(output_img[%i], pos, res); ,i); - } - } - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - s->opts.filter_len[0] = s->size_x - 1; - s->opts.filter_len[1] = s->size_y - 1; - - s->opts.filter_norm[0] = s->opts.filter_len[0]*2 + 1; - s->opts.filter_norm[0] = 1.0/(s->opts.filter_norm[0]*s->opts.filter_norm[0]); - s->opts.filter_norm[1] = s->opts.filter_norm[0]; - s->opts.filter_norm[2] = s->opts.filter_norm[0]; - s->opts.filter_norm[3] = s->opts.filter_norm[0]; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - int err; - AVFrame *out = NULL; - AVFilterContext *ctx = link->dst; - AvgBlurVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - if (!s->initialized) - RET(init_filter(ctx, in)); - - RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, - out, in, VK_NULL_HANDLE, - &s->opts, sizeof(s->opts))); - - err = av_frame_copy_props(out, in); - if (err < 0) - goto fail; - - av_frame_free(&in); - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&in); - av_frame_free(&out); - return err; -} - -static void avgblur_vulkan_uninit(AVFilterContext *avctx) -{ - AvgBlurVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -#define OFFSET(x) offsetof(AvgBlurVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption avgblur_vulkan_options[] = { - { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, - { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, - { "planes", "Set planes to filter (bitmask)", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS }, - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(avgblur_vulkan); - -static const AVFilterPad avgblur_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &avgblur_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad avgblur_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_output, - }, -}; - -const FFFilter ff_vf_avgblur_vulkan = { - .p.name = "avgblur_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Apply avgblur mask to input video"), - .p.priv_class = &avgblur_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(AvgBlurVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &avgblur_vulkan_uninit, - FILTER_INPUTS(avgblur_vulkan_inputs), - FILTER_OUTPUTS(avgblur_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 39dcae8b9e6f6dcb149fa45416f07e10f7c0c5e3 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:45:30 +0000 Subject: [PATCH 002/118] Changing vulkan file directory --- libavfilter/vulkan/vf_avgblur_vulkan.c | 259 +++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 libavfilter/vulkan/vf_avgblur_vulkan.c diff --git a/libavfilter/vulkan/vf_avgblur_vulkan.c b/libavfilter/vulkan/vf_avgblur_vulkan.c new file mode 100644 index 0000000000..9f33ec28c1 --- /dev/null +++ b/libavfilter/vulkan/vf_avgblur_vulkan.c @@ -0,0 +1,259 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/video.h" + +typedef struct AvgBlurVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + + /* Push constants / options */ + struct { + float filter_norm[4]; + int32_t filter_len[2]; + } opts; + + int size_x; + int size_y; + int planes; +} AvgBlurVulkanContext; + +static const char blur_kernel[] = { + C(0, void distort(const ivec2 pos, const int idx) ) + C(0, { ) + C(1, vec4 sum = vec4(0); ) + C(1, for (int y = -filter_len.y; y <= filter_len.y; y++) ) + C(1, for (int x = -filter_len.x; x <= filter_len.x; x++) ) + C(2, sum += imageLoad(input_img[idx], pos + ivec2(x, y)); ) + C(0, ) + C(1, imageStore(output_img[idx], pos, sum * filter_norm); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + AvgBlurVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "avgblur", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 1, 1, + 0)); + shd = &s->shd; + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 2, 0, 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec4 filter_norm; ); + GLSLC(1, ivec2 filter_len; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + GLSLD( blur_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, vec4 res; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + if (s->planes & (1 << i)) { + GLSLF(1, distort(pos, %i); ,i); + } else { + GLSLF(1, res = imageLoad(input_img[%i], pos); ,i); + GLSLF(1, imageStore(output_img[%i], pos, res); ,i); + } + } + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + s->opts.filter_len[0] = s->size_x - 1; + s->opts.filter_len[1] = s->size_y - 1; + + s->opts.filter_norm[0] = s->opts.filter_len[0]*2 + 1; + s->opts.filter_norm[0] = 1.0/(s->opts.filter_norm[0]*s->opts.filter_norm[0]); + s->opts.filter_norm[1] = s->opts.filter_norm[0]; + s->opts.filter_norm[2] = s->opts.filter_norm[0]; + s->opts.filter_norm[3] = s->opts.filter_norm[0]; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFrame *out = NULL; + AVFilterContext *ctx = link->dst; + AvgBlurVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, + out, in, VK_NULL_HANDLE, + &s->opts, sizeof(s->opts))); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static void avgblur_vulkan_uninit(AVFilterContext *avctx) +{ + AvgBlurVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(AvgBlurVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption avgblur_vulkan_options[] = { + { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, + { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, { .i64 = 3 }, 1, 32, .flags = FLAGS }, + { "planes", "Set planes to filter (bitmask)", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(avgblur_vulkan); + +static const AVFilterPad avgblur_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &avgblur_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad avgblur_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, +}; + +const FFFilter ff_vf_avgblur_vulkan = { + .p.name = "avgblur_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Apply avgblur mask to input video"), + .p.priv_class = &avgblur_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(AvgBlurVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &avgblur_vulkan_uninit, + FILTER_INPUTS(avgblur_vulkan_inputs), + FILTER_OUTPUTS(avgblur_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From 39c595bcae324d35dcd97de9e85b3b5f653bca21 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:46:28 +0000 Subject: [PATCH 003/118] Changing vulkan file directory --- libavfilter/vf_blackdetect_vulkan.c | 445 ---------------------------- 1 file changed, 445 deletions(-) delete mode 100644 libavfilter/vf_blackdetect_vulkan.c diff --git a/libavfilter/vf_blackdetect_vulkan.c b/libavfilter/vf_blackdetect_vulkan.c deleted file mode 100644 index 279b057148..0000000000 --- a/libavfilter/vf_blackdetect_vulkan.c +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright 2025 (c) Niklas Haas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <float.h> -#include "libavutil/vulkan_spirv.h" -#include "libavutil/opt.h" -#include "libavutil/timestamp.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "video.h" - -typedef struct BlackDetectVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - AVBufferPool *sum_buf_pool; - - double black_min_duration_time; - double picture_black_ratio_th; - double pixel_black_th; - int alpha; - - int64_t black_start; -} BlackDetectVulkanContext; - -typedef struct BlackDetectPushData { - float threshold; -} BlackDetectPushData; - -typedef struct BlackDetectBuf { -#define SLICES 16 - uint32_t slice_sum[SLICES]; -} BlackDetectBuf; - -static av_cold int init_filter(AVFilterContext *ctx) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - BlackDetectVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - const int plane = s->alpha ? 3 : 0; - - const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format); - if (pixdesc->flags & AV_PIX_FMT_FLAG_RGB) { - av_log(ctx, AV_LOG_ERROR, "RGB inputs are not supported\n"); - return AVERROR(ENOTSUP); - } - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "blackdetect", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_KHR_shader_subgroup_ballot" }, 1, - 32, 32, 1, - 0)); - shd = &s->shd; - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, float threshold; ); - GLSLC(0, }; ); - - ff_vk_shader_add_push_const(shd, 0, sizeof(BlackDetectPushData), - VK_SHADER_STAGE_COMPUTE_BIT); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.input_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, { - .name = "sum_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "uint slice_sum[];", - } - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); - - GLSLC(0, shared uint wg_sum; ); - GLSLC(0, ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, wg_sum = 0u; ); - GLSLC(1, barrier(); ); - GLSLC(0, ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, if (!IS_WITHIN(pos, imageSize(input_img[%d]))) ,plane); - GLSLC(2, return; ); - GLSLF(1, float value = imageLoad(input_img[%d], pos).x; ,plane); - GLSLC(1, uvec4 isblack = subgroupBallot(value <= threshold); ); - GLSLC(1, if (subgroupElect()) ); - GLSLC(2, atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); ); - GLSLC(1, barrier(); ); - GLSLC(1, if (gl_LocalInvocationIndex == 0u) ); - GLSLF(2, atomicAdd(slice_sum[gl_WorkGroupID.x %% %du], wg_sum); ,SLICES); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->black_start = AV_NOPTS_VALUE; - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static void report_black_region(AVFilterContext *ctx, int64_t black_end) -{ - BlackDetectVulkanContext *s = ctx->priv; - const AVFilterLink *inlink = ctx->inputs[0]; - if (s->black_start == AV_NOPTS_VALUE) - return; - - if ((black_end - s->black_start) >= s->black_min_duration_time / av_q2d(inlink->time_base)) { - av_log(ctx, AV_LOG_INFO, - "black_start:%s black_end:%s black_duration:%s\n", - av_ts2timestr(s->black_start, &inlink->time_base), - av_ts2timestr(black_end, &inlink->time_base), - av_ts2timestr(black_end - s->black_start, &inlink->time_base)); - } -} - -static void evaluate(AVFilterLink *link, AVFrame *in, - const BlackDetectBuf *sum) -{ - AVFilterContext *ctx = link->dst; - BlackDetectVulkanContext *s = ctx->priv; - FilterLink *inl = ff_filter_link(link); - uint64_t nb_black_pixels = 0; - double ratio; - - for (int i = 0; i < FF_ARRAY_ELEMS(sum->slice_sum); i++) - nb_black_pixels += sum->slice_sum[i]; - - ratio = (double) nb_black_pixels / (link->w * link->h); - - av_log(ctx, AV_LOG_DEBUG, - "frame:%"PRId64" picture_black_ratio:%f pts:%s t:%s type:%c\n", - inl->frame_count_out, ratio, - av_ts2str(in->pts), av_ts2timestr(in->pts, &in->time_base), - av_get_picture_type_char(in->pict_type)); - - if (ratio >= s->picture_black_ratio_th) { - if (s->black_start == AV_NOPTS_VALUE) { - s->black_start = in->pts; - av_dict_set(&in->metadata, "lavfi.black_start", - av_ts2timestr(in->pts, &in->time_base), 0); - } - } else if (s->black_start != AV_NOPTS_VALUE) { - report_black_region(ctx, in->pts); - av_dict_set(&in->metadata, "lavfi.black_end", - av_ts2timestr(in->pts, &in->time_base), 0); - s->black_start = AV_NOPTS_VALUE; - } -} - -static int blackdetect_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - int err; - AVFilterContext *ctx = link->dst; - BlackDetectVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - VkImageView in_views[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier2 img_bar[4]; - int nb_img_bar = 0; - - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - FFVkExecContext *exec = NULL; - AVBufferRef *sum_buf = NULL; - FFVkBuffer *sum_vk; - - BlackDetectBuf *sum; - BlackDetectPushData push_data; - - if (in->color_range == AVCOL_RANGE_JPEG || s->alpha) { - push_data.threshold = s->pixel_black_th; - } else { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(vkctx->input_format); - const int depth = desc->comp[0].depth; - const int ymin = 16 << (depth - 8); - const int ymax = 235 << (depth - 8); - const int imax = (1 << depth) - 1; - push_data.threshold = (s->pixel_black_th * (ymax - ymin) + ymin) / imax; - } - - if (!s->initialized) - RET(init_filter(ctx)); - - err = ff_vk_get_pooled_buffer(vkctx, &s->sum_buf_pool, &sum_buf, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - NULL, - sizeof(BlackDetectBuf), - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - if (err < 0) - return err; - sum_vk = (FFVkBuffer *)sum_buf->data; - sum = (BlackDetectBuf *) sum_vk->mapped_mem; - - exec = ff_vk_exec_get(vkctx, &s->e); - ff_vk_exec_start(vkctx, exec); - - RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT)); - - ff_vk_shader_update_img_array(vkctx, exec, &s->shd, in, in_views, 0, 0, - VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - - ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - /* zero sum buffer */ - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_NONE, - .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = sum_vk->buf, - .size = sum_vk->size, - .offset = 0, - }, - .bufferMemoryBarrierCount = 1, - }); - - vk->CmdFillBuffer(exec->buf, sum_vk->buf, 0, sum_vk->size, 0x0); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = sum_vk->buf, - .size = sum_vk->size, - .offset = 0, - }, - .bufferMemoryBarrierCount = 1, - }); - - RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd, 0, 1, 0, - sum_vk, 0, sum_vk->size, - VK_FORMAT_UNDEFINED)); - - ff_vk_exec_bind_shader(vkctx, exec, &s->shd); - ff_vk_shader_update_push_const(vkctx, exec, &s->shd, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(push_data), &push_data); - - vk->CmdDispatch(exec->buf, - FFALIGN(in->width, s->shd.lg_size[0]) / s->shd.lg_size[0], - FFALIGN(in->height, s->shd.lg_size[1]) / s->shd.lg_size[1], - s->shd.lg_size[2]); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT, - .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .dstAccessMask = VK_ACCESS_HOST_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = sum_vk->buf, - .size = sum_vk->size, - .offset = 0, - }, - .bufferMemoryBarrierCount = 1, - }); - - RET(ff_vk_exec_submit(vkctx, exec)); - ff_vk_exec_wait(vkctx, exec); - evaluate(link, in, sum); - - av_buffer_unref(&sum_buf); - return ff_filter_frame(outlink, in); - -fail: - if (exec) - ff_vk_exec_discard_deps(&s->vkctx, exec); - av_frame_free(&in); - av_buffer_unref(&sum_buf); - return err; -} - -static void blackdetect_vulkan_uninit(AVFilterContext *avctx) -{ - BlackDetectVulkanContext *s = avctx->priv; - AVFilterLink *inlink = avctx->inputs[0]; - FilterLink *inl = ff_filter_link(inlink); - FFVulkanContext *vkctx = &s->vkctx; - - report_black_region(avctx, inl->current_pts); - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - av_buffer_pool_uninit(&s->sum_buf_pool); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -static int config_output(AVFilterLink *outlink) -{ - AVFilterContext *ctx = outlink->src; - BlackDetectVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(vkctx->input_format); - - if (s->alpha && !(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) { - av_log(ctx, AV_LOG_ERROR, "Input format %s does not have an alpha channel\n", - av_get_pix_fmt_name(vkctx->input_format)); - return AVERROR(EINVAL); - } - - if (desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_XYZ) || - !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) { - av_log(ctx, AV_LOG_ERROR, "Input format %s is not planar YUV\n", - av_get_pix_fmt_name(vkctx->input_format)); - return AVERROR(EINVAL); - } - - return ff_vk_filter_config_output(outlink); -} - -#define OFFSET(x) offsetof(BlackDetectVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption blackdetect_vulkan_options[] = { - { "d", "set minimum detected black duration in seconds", OFFSET(black_min_duration_time), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, DBL_MAX, FLAGS }, - { "black_min_duration", "set minimum detected black duration in seconds", OFFSET(black_min_duration_time), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, DBL_MAX, FLAGS }, - { "picture_black_ratio_th", "set the picture black ratio threshold", OFFSET(picture_black_ratio_th), AV_OPT_TYPE_DOUBLE, {.dbl=.98}, 0, 1, FLAGS }, - { "pic_th", "set the picture black ratio threshold", OFFSET(picture_black_ratio_th), AV_OPT_TYPE_DOUBLE, {.dbl=.98}, 0, 1, FLAGS }, - { "pixel_black_th", "set the pixel black threshold", OFFSET(pixel_black_th), AV_OPT_TYPE_DOUBLE, {.dbl=.10}, 0, 1, FLAGS }, - { "pix_th", "set the pixel black threshold", OFFSET(pixel_black_th), AV_OPT_TYPE_DOUBLE, {.dbl=.10}, 0, 1, FLAGS }, - { "alpha", "check alpha instead of luma", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, - { NULL } -}; - -AVFILTER_DEFINE_CLASS(blackdetect_vulkan); - -static const AVFilterPad blackdetect_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &blackdetect_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad blackdetect_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &config_output, - }, -}; - -const FFFilter ff_vf_blackdetect_vulkan = { - .p.name = "blackdetect_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Detect video intervals that are (almost) black."), - .p.priv_class = &blackdetect_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(BlackDetectVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &blackdetect_vulkan_uninit, - FILTER_INPUTS(blackdetect_vulkan_inputs), - FILTER_OUTPUTS(blackdetect_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 32384df38c93bbb7f6aa317482e80325ea0af68b Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:48:05 +0000 Subject: [PATCH 004/118] Changing vulkan file directory --- libavfilter/vulkan/vf_blackdetect_vulkan.c | 445 +++++++++++++++++++++ 1 file changed, 445 insertions(+) create mode 100644 libavfilter/vulkan/vf_blackdetect_vulkan.c diff --git a/libavfilter/vulkan/vf_blackdetect_vulkan.c b/libavfilter/vulkan/vf_blackdetect_vulkan.c new file mode 100644 index 0000000000..b7330c297e --- /dev/null +++ b/libavfilter/vulkan/vf_blackdetect_vulkan.c @@ -0,0 +1,445 @@ +/* + * Copyright 2025 (c) Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <float.h> +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "libavutil/opt.h" +#include "libavutil/timestamp.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/video.h" + +typedef struct BlackDetectVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + AVBufferPool *sum_buf_pool; + + double black_min_duration_time; + double picture_black_ratio_th; + double pixel_black_th; + int alpha; + + int64_t black_start; +} BlackDetectVulkanContext; + +typedef struct BlackDetectPushData { + float threshold; +} BlackDetectPushData; + +typedef struct BlackDetectBuf { +#define SLICES 16 + uint32_t slice_sum[SLICES]; +} BlackDetectBuf; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + BlackDetectVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + const int plane = s->alpha ? 3 : 0; + + const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format); + if (pixdesc->flags & AV_PIX_FMT_FLAG_RGB) { + av_log(ctx, AV_LOG_ERROR, "RGB inputs are not supported\n"); + return AVERROR(ENOTSUP); + } + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "blackdetect", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_KHR_shader_subgroup_ballot" }, 1, + 32, 32, 1, + 0)); + shd = &s->shd; + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, float threshold; ); + GLSLC(0, }; ); + + ff_vk_shader_add_push_const(shd, 0, sizeof(BlackDetectPushData), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, { + .name = "sum_buffer", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "uint slice_sum[];", + } + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); + + GLSLC(0, shared uint wg_sum; ); + GLSLC(0, ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, wg_sum = 0u; ); + GLSLC(1, barrier(); ); + GLSLC(0, ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, if (!IS_WITHIN(pos, imageSize(input_img[%d]))) ,plane); + GLSLC(2, return; ); + GLSLF(1, float value = imageLoad(input_img[%d], pos).x; ,plane); + GLSLC(1, uvec4 isblack = subgroupBallot(value <= threshold); ); + GLSLC(1, if (subgroupElect()) ); + GLSLC(2, atomicAdd(wg_sum, subgroupBallotBitCount(isblack)); ); + GLSLC(1, barrier(); ); + GLSLC(1, if (gl_LocalInvocationIndex == 0u) ); + GLSLF(2, atomicAdd(slice_sum[gl_WorkGroupID.x %% %du], wg_sum); ,SLICES); + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->black_start = AV_NOPTS_VALUE; + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static void report_black_region(AVFilterContext *ctx, int64_t black_end) +{ + BlackDetectVulkanContext *s = ctx->priv; + const AVFilterLink *inlink = ctx->inputs[0]; + if (s->black_start == AV_NOPTS_VALUE) + return; + + if ((black_end - s->black_start) >= s->black_min_duration_time / av_q2d(inlink->time_base)) { + av_log(ctx, AV_LOG_INFO, + "black_start:%s black_end:%s black_duration:%s\n", + av_ts2timestr(s->black_start, &inlink->time_base), + av_ts2timestr(black_end, &inlink->time_base), + av_ts2timestr(black_end - s->black_start, &inlink->time_base)); + } +} + +static void evaluate(AVFilterLink *link, AVFrame *in, + const BlackDetectBuf *sum) +{ + AVFilterContext *ctx = link->dst; + BlackDetectVulkanContext *s = ctx->priv; + FilterLink *inl = ff_filter_link(link); + uint64_t nb_black_pixels = 0; + double ratio; + + for (int i = 0; i < FF_ARRAY_ELEMS(sum->slice_sum); i++) + nb_black_pixels += sum->slice_sum[i]; + + ratio = (double) nb_black_pixels / (link->w * link->h); + + av_log(ctx, AV_LOG_DEBUG, + "frame:%"PRId64" picture_black_ratio:%f pts:%s t:%s type:%c\n", + inl->frame_count_out, ratio, + av_ts2str(in->pts), av_ts2timestr(in->pts, &in->time_base), + av_get_picture_type_char(in->pict_type)); + + if (ratio >= s->picture_black_ratio_th) { + if (s->black_start == AV_NOPTS_VALUE) { + s->black_start = in->pts; + av_dict_set(&in->metadata, "lavfi.black_start", + av_ts2timestr(in->pts, &in->time_base), 0); + } + } else if (s->black_start != AV_NOPTS_VALUE) { + report_black_region(ctx, in->pts); + av_dict_set(&in->metadata, "lavfi.black_end", + av_ts2timestr(in->pts, &in->time_base), 0); + s->black_start = AV_NOPTS_VALUE; + } +} + +static int blackdetect_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + BlackDetectVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[4]; + int nb_img_bar = 0; + + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + FFVkExecContext *exec = NULL; + AVBufferRef *sum_buf = NULL; + FFVkBuffer *sum_vk; + + BlackDetectBuf *sum; + BlackDetectPushData push_data; + + if (in->color_range == AVCOL_RANGE_JPEG || s->alpha) { + push_data.threshold = s->pixel_black_th; + } else { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(vkctx->input_format); + const int depth = desc->comp[0].depth; + const int ymin = 16 << (depth - 8); + const int ymax = 235 << (depth - 8); + const int imax = (1 << depth) - 1; + push_data.threshold = (s->pixel_black_th * (ymax - ymin) + ymin) / imax; + } + + if (!s->initialized) + RET(init_filter(ctx)); + + err = ff_vk_get_pooled_buffer(vkctx, &s->sum_buf_pool, &sum_buf, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + NULL, + sizeof(BlackDetectBuf), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + sum_vk = (FFVkBuffer *)sum_buf->data; + sum = (BlackDetectBuf *) sum_vk->mapped_mem; + + exec = ff_vk_exec_get(vkctx, &s->e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT)); + + ff_vk_shader_update_img_array(vkctx, exec, &s->shd, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* zero sum buffer */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_NONE, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = sum_vk->buf, + .size = sum_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + vk->CmdFillBuffer(exec->buf, sum_vk->buf, 0, sum_vk->size, 0x0); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = sum_vk->buf, + .size = sum_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd, 0, 1, 0, + sum_vk, 0, sum_vk->size, + VK_FORMAT_UNDEFINED)); + + ff_vk_exec_bind_shader(vkctx, exec, &s->shd); + ff_vk_shader_update_push_const(vkctx, exec, &s->shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(push_data), &push_data); + + vk->CmdDispatch(exec->buf, + FFALIGN(in->width, s->shd.lg_size[0]) / s->shd.lg_size[0], + FFALIGN(in->height, s->shd.lg_size[1]) / s->shd.lg_size[1], + s->shd.lg_size[2]); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT, + .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = sum_vk->buf, + .size = sum_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + RET(ff_vk_exec_submit(vkctx, exec)); + ff_vk_exec_wait(vkctx, exec); + evaluate(link, in, sum); + + av_buffer_unref(&sum_buf); + return ff_filter_frame(outlink, in); + +fail: + if (exec) + ff_vk_exec_discard_deps(&s->vkctx, exec); + av_frame_free(&in); + av_buffer_unref(&sum_buf); + return err; +} + +static void blackdetect_vulkan_uninit(AVFilterContext *avctx) +{ + BlackDetectVulkanContext *s = avctx->priv; + AVFilterLink *inlink = avctx->inputs[0]; + FilterLink *inl = ff_filter_link(inlink); + FFVulkanContext *vkctx = &s->vkctx; + + report_black_region(avctx, inl->current_pts); + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + av_buffer_pool_uninit(&s->sum_buf_pool); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + BlackDetectVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(vkctx->input_format); + + if (s->alpha && !(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) { + av_log(ctx, AV_LOG_ERROR, "Input format %s does not have an alpha channel\n", + av_get_pix_fmt_name(vkctx->input_format)); + return AVERROR(EINVAL); + } + + if (desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_XYZ) || + !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) { + av_log(ctx, AV_LOG_ERROR, "Input format %s is not planar YUV\n", + av_get_pix_fmt_name(vkctx->input_format)); + return AVERROR(EINVAL); + } + + return ff_vk_filter_config_output(outlink); +} + +#define OFFSET(x) offsetof(BlackDetectVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption blackdetect_vulkan_options[] = { + { "d", "set minimum detected black duration in seconds", OFFSET(black_min_duration_time), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, DBL_MAX, FLAGS }, + { "black_min_duration", "set minimum detected black duration in seconds", OFFSET(black_min_duration_time), AV_OPT_TYPE_DOUBLE, {.dbl=2}, 0, DBL_MAX, FLAGS }, + { "picture_black_ratio_th", "set the picture black ratio threshold", OFFSET(picture_black_ratio_th), AV_OPT_TYPE_DOUBLE, {.dbl=.98}, 0, 1, FLAGS }, + { "pic_th", "set the picture black ratio threshold", OFFSET(picture_black_ratio_th), AV_OPT_TYPE_DOUBLE, {.dbl=.98}, 0, 1, FLAGS }, + { "pixel_black_th", "set the pixel black threshold", OFFSET(pixel_black_th), AV_OPT_TYPE_DOUBLE, {.dbl=.10}, 0, 1, FLAGS }, + { "pix_th", "set the pixel black threshold", OFFSET(pixel_black_th), AV_OPT_TYPE_DOUBLE, {.dbl=.10}, 0, 1, FLAGS }, + { "alpha", "check alpha instead of luma", OFFSET(alpha), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(blackdetect_vulkan); + +static const AVFilterPad blackdetect_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &blackdetect_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad blackdetect_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &config_output, + }, +}; + +const FFFilter ff_vf_blackdetect_vulkan = { + .p.name = "blackdetect_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Detect video intervals that are (almost) black."), + .p.priv_class = &blackdetect_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(BlackDetectVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &blackdetect_vulkan_uninit, + FILTER_INPUTS(blackdetect_vulkan_inputs), + FILTER_OUTPUTS(blackdetect_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From eadf958003b170b5409c6580ba7369cb48da4f2d Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:48:41 +0000 Subject: [PATCH 005/118] Changing vulkan file directory --- libavfilter/vf_blend_vulkan.c | 399 ---------------------------------- 1 file changed, 399 deletions(-) delete mode 100644 libavfilter/vf_blend_vulkan.c diff --git a/libavfilter/vf_blend_vulkan.c b/libavfilter/vf_blend_vulkan.c deleted file mode 100644 index 57cf3c696b..0000000000 --- a/libavfilter/vf_blend_vulkan.c +++ /dev/null @@ -1,399 +0,0 @@ -/* - * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com> - * Copyright (c) Lynne - * - * The blend modes are based on the blend.c. - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/vulkan_spirv.h" -#include "libavutil/opt.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "framesync.h" -#include "blend.h" -#include "video.h" - -#define IN_TOP 0 -#define IN_BOTTOM 1 - -typedef struct FilterParamsVulkan { - const char *blend; - const char *blend_func; - double opacity; - enum BlendMode mode; -} FilterParamsVulkan; - -typedef struct BlendVulkanContext { - FFVulkanContext vkctx; - FFFrameSync fs; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - - FilterParamsVulkan params[4]; - double all_opacity; - enum BlendMode all_mode; -} BlendVulkanContext; - -#define DEFINE_BLEND_MODE(MODE, EXPR) \ -static const char blend_##MODE[] = "blend_"#MODE; \ -static const char blend_##MODE##_func[] = { \ - C(0, vec4 blend_##MODE(vec4 top, vec4 bottom, float opacity) { ) \ - C(1, vec4 dst = EXPR; ) \ - C(1, return dst; ) \ - C(0, } ) \ -}; - -#define A top -#define B bottom - -#define FN(EXPR) A + ((EXPR) - A) * opacity - -DEFINE_BLEND_MODE(NORMAL, A * opacity + B * (1.0f - opacity)) -DEFINE_BLEND_MODE(MULTIPLY, FN(1.0f * A * B / 1.0f)) - -static inline void init_blend_func(FilterParamsVulkan *param) -{ -#define CASE(MODE) case BLEND_##MODE: \ - param->blend = blend_##MODE;\ - param->blend_func = blend_##MODE##_func; \ - break; - - switch (param->mode) { - CASE(NORMAL) - CASE(MULTIPLY) - default: param->blend = NULL; break; - } - -#undef CASE -} - -static int config_params(AVFilterContext *avctx) -{ - BlendVulkanContext *s = avctx->priv; - - for (int plane = 0; plane < FF_ARRAY_ELEMS(s->params); plane++) { - FilterParamsVulkan *param = &s->params[plane]; - - if (s->all_mode >= 0) - param->mode = s->all_mode; - if (s->all_opacity < 1) - param->opacity = s->all_opacity; - - init_blend_func(param); - if (!param->blend) { - av_log(avctx, AV_LOG_ERROR, - "Currently the blend mode specified is not supported yet.\n"); - return AVERROR(EINVAL); - } - } - - return 0; -} - -static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, - char *res, int res_len, int flags) -{ - int ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags); - if (ret < 0) - return ret; - - return config_params(ctx); -} - -static av_cold int init_filter(AVFilterContext *avctx) -{ - int err = 0; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - BlendVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd = &s->shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(avctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "blend", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 32, 1, - 0)); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "top_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "bottom_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "output_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); - - for (int i = 0, j = 0; i < planes; i++) { - for (j = 0; j < i; j++) - if (s->params[i].blend_func == s->params[j].blend_func) - break; - /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */ - if (j == i) { - GLSLD(s->params[i].blend_func); - } - } - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - GLSLF(2, const vec4 top = imageLoad(top_images[%i], pos); ,i); - GLSLF(2, const vec4 bottom = imageLoad(bottom_images[%i], pos); ,i); - GLSLF(2, const float opacity = %f; ,s->params[i].opacity); - GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend); - GLSLC(0, ); - GLSLF(2, imageStore(output_images[%i], pos, dst); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int blend_frame(FFFrameSync *fs) -{ - int err; - AVFilterContext *avctx = fs->parent; - BlendVulkanContext *s = avctx->priv; - AVFilterLink *outlink = avctx->outputs[0]; - AVFrame *top, *bottom, *out; - - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - RET(ff_framesync_get_frame(fs, IN_TOP, &top, 0)); - RET(ff_framesync_get_frame(fs, IN_BOTTOM, &bottom, 0)); - - RET(av_frame_copy_props(out, top)); - - if (!s->initialized) { - AVHWFramesContext *top_fc = (AVHWFramesContext*)top->hw_frames_ctx->data; - AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom->hw_frames_ctx->data; - if (top_fc->sw_format != bottom_fc->sw_format) { - av_log(avctx, AV_LOG_ERROR, - "Currently the sw format of the bottom video need to match the top!\n"); - err = AVERROR(EINVAL); - goto fail; - } - RET(init_filter(avctx)); - } - - RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, - out, (AVFrame *[]){ top, bottom }, 2, - VK_NULL_HANDLE, NULL, 0)); - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&out); - return err; -} - -static av_cold int init(AVFilterContext *avctx) -{ - BlendVulkanContext *s = avctx->priv; - - s->fs.on_event = blend_frame; - - return ff_vk_filter_init(avctx); -} - -static av_cold void uninit(AVFilterContext *avctx) -{ - BlendVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - ff_vk_uninit(&s->vkctx); - ff_framesync_uninit(&s->fs); - - s->initialized = 0; -} - -static int config_props_output(AVFilterLink *outlink) -{ - int err; - FilterLink *outl = ff_filter_link(outlink); - AVFilterContext *avctx = outlink->src; - BlendVulkanContext *s = avctx->priv; - AVFilterLink *toplink = avctx->inputs[IN_TOP]; - FilterLink *tl = ff_filter_link(toplink); - AVFilterLink *bottomlink = avctx->inputs[IN_BOTTOM]; - - if (toplink->w != bottomlink->w || toplink->h != bottomlink->h) { - av_log(avctx, AV_LOG_ERROR, "First input link %s parameters " - "(size %dx%d) do not match the corresponding " - "second input link %s parameters (size %dx%d)\n", - avctx->input_pads[IN_TOP].name, toplink->w, toplink->h, - avctx->input_pads[IN_BOTTOM].name, bottomlink->w, bottomlink->h); - return AVERROR(EINVAL); - } - - outlink->sample_aspect_ratio = toplink->sample_aspect_ratio; - outl->frame_rate = tl->frame_rate; - - RET(ff_vk_filter_config_output(outlink)); - - RET(ff_framesync_init_dualinput(&s->fs, avctx)); - - RET(ff_framesync_configure(&s->fs)); - outlink->time_base = s->fs.time_base; - - RET(config_params(avctx)); - -fail: - return err; -} - -static int activate(AVFilterContext *avctx) -{ - BlendVulkanContext *s = avctx->priv; - return ff_framesync_activate(&s->fs); -} - -#define OFFSET(x) offsetof(BlendVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) - -static const AVOption blend_vulkan_options[] = { - { "c0_mode", "set component #0 blend mode", OFFSET(params[0].mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, BLEND_NB - 1, FLAGS, .unit = "mode" }, - { "c1_mode", "set component #1 blend mode", OFFSET(params[1].mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, BLEND_NB - 1, FLAGS, .unit = "mode" }, - { "c2_mode", "set component #2 blend mode", OFFSET(params[2].mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, BLEND_NB - 1, FLAGS, .unit = "mode" }, - { "c3_mode", "set component #3 blend mode", OFFSET(params[3].mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, BLEND_NB - 1, FLAGS, .unit = "mode" }, - { "all_mode", "set blend mode for all components", OFFSET(all_mode), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, BLEND_NB - 1, FLAGS, .unit = "mode" }, - { "normal", "", 0, AV_OPT_TYPE_CONST, { .i64 = BLEND_NORMAL }, 0, 0, FLAGS, .unit = "mode" }, - { "multiply", "", 0, AV_OPT_TYPE_CONST, { .i64 = BLEND_MULTIPLY }, 0, 0, FLAGS, .unit = "mode" }, - - { "c0_opacity", "set color component #0 opacity", OFFSET(params[0].opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, - { "c1_opacity", "set color component #1 opacity", OFFSET(params[1].opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, - { "c2_opacity", "set color component #2 opacity", OFFSET(params[2].opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, - { "c3_opacity", "set color component #3 opacity", OFFSET(params[3].opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, - { "all_opacity", "set opacity for all color components", OFFSET(all_opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, - - { NULL } -}; - -AVFILTER_DEFINE_CLASS(blend_vulkan); - -static const AVFilterPad blend_vulkan_inputs[] = { - { - .name = "top", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_input, - }, - { - .name = "bottom", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_input, - }, -}; - - -static const AVFilterPad blend_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &config_props_output, - } -}; - -const FFFilter ff_vf_blend_vulkan = { - .p.name = "blend_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Blend two video frames in Vulkan"), - .p.priv_class = &blend_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(BlendVulkanContext), - .init = &init, - .uninit = &uninit, - .activate = &activate, - FILTER_INPUTS(blend_vulkan_inputs), - FILTER_OUTPUTS(blend_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, - .process_command = &process_command, -}; -- 2.49.1 From 58cedfe6764899d191e4f3e42f557023b63eb477 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:49:14 +0000 Subject: [PATCH 006/118] Changing vulkan file directory --- libavfilter/vulkan/vf_blend_vulkan.c | 399 +++++++++++++++++++++++++++ 1 file changed, 399 insertions(+) create mode 100644 libavfilter/vulkan/vf_blend_vulkan.c diff --git a/libavfilter/vulkan/vf_blend_vulkan.c b/libavfilter/vulkan/vf_blend_vulkan.c new file mode 100644 index 0000000000..83e217be02 --- /dev/null +++ b/libavfilter/vulkan/vf_blend_vulkan.c @@ -0,0 +1,399 @@ +/* + * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com> + * Copyright (c) Lynne + * + * The blend modes are based on the blend.c. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/framesync.h" +#include "libavfilter/blend.h" +#include "libavfilter/video.h" + +#define IN_TOP 0 +#define IN_BOTTOM 1 + +typedef struct FilterParamsVulkan { + const char *blend; + const char *blend_func; + double opacity; + enum BlendMode mode; +} FilterParamsVulkan; + +typedef struct BlendVulkanContext { + FFVulkanContext vkctx; + FFFrameSync fs; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + + FilterParamsVulkan params[4]; + double all_opacity; + enum BlendMode all_mode; +} BlendVulkanContext; + +#define DEFINE_BLEND_MODE(MODE, EXPR) \ +static const char blend_##MODE[] = "blend_"#MODE; \ +static const char blend_##MODE##_func[] = { \ + C(0, vec4 blend_##MODE(vec4 top, vec4 bottom, float opacity) { ) \ + C(1, vec4 dst = EXPR; ) \ + C(1, return dst; ) \ + C(0, } ) \ +}; + +#define A top +#define B bottom + +#define FN(EXPR) A + ((EXPR) - A) * opacity + +DEFINE_BLEND_MODE(NORMAL, A * opacity + B * (1.0f - opacity)) +DEFINE_BLEND_MODE(MULTIPLY, FN(1.0f * A * B / 1.0f)) + +static inline void init_blend_func(FilterParamsVulkan *param) +{ +#define CASE(MODE) case BLEND_##MODE: \ + param->blend = blend_##MODE;\ + param->blend_func = blend_##MODE##_func; \ + break; + + switch (param->mode) { + CASE(NORMAL) + CASE(MULTIPLY) + default: param->blend = NULL; break; + } + +#undef CASE +} + +static int config_params(AVFilterContext *avctx) +{ + BlendVulkanContext *s = avctx->priv; + + for (int plane = 0; plane < FF_ARRAY_ELEMS(s->params); plane++) { + FilterParamsVulkan *param = &s->params[plane]; + + if (s->all_mode >= 0) + param->mode = s->all_mode; + if (s->all_opacity < 1) + param->opacity = s->all_opacity; + + init_blend_func(param); + if (!param->blend) { + av_log(avctx, AV_LOG_ERROR, + "Currently the blend mode specified is not supported yet.\n"); + return AVERROR(EINVAL); + } + } + + return 0; +} + +static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, + char *res, int res_len, int flags) +{ + int ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags); + if (ret < 0) + return ret; + + return config_params(ctx); +} + +static av_cold int init_filter(AVFilterContext *avctx) +{ + int err = 0; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + BlendVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(avctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "blend", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 32, 1, + 0)); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "top_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "bottom_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "output_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); + + for (int i = 0, j = 0; i < planes; i++) { + for (j = 0; j < i; j++) + if (s->params[i].blend_func == s->params[j].blend_func) + break; + /* note: the bracket is needed, for GLSLD is a macro with multiple statements. */ + if (j == i) { + GLSLD(s->params[i].blend_func); + } + } + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + GLSLF(2, const vec4 top = imageLoad(top_images[%i], pos); ,i); + GLSLF(2, const vec4 bottom = imageLoad(bottom_images[%i], pos); ,i); + GLSLF(2, const float opacity = %f; ,s->params[i].opacity); + GLSLF(2, vec4 dst = %s(top, bottom, opacity); ,s->params[i].blend); + GLSLC(0, ); + GLSLF(2, imageStore(output_images[%i], pos, dst); ,i); + GLSLC(1, } ); + } + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int blend_frame(FFFrameSync *fs) +{ + int err; + AVFilterContext *avctx = fs->parent; + BlendVulkanContext *s = avctx->priv; + AVFilterLink *outlink = avctx->outputs[0]; + AVFrame *top, *bottom, *out; + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + RET(ff_framesync_get_frame(fs, IN_TOP, &top, 0)); + RET(ff_framesync_get_frame(fs, IN_BOTTOM, &bottom, 0)); + + RET(av_frame_copy_props(out, top)); + + if (!s->initialized) { + AVHWFramesContext *top_fc = (AVHWFramesContext*)top->hw_frames_ctx->data; + AVHWFramesContext *bottom_fc = (AVHWFramesContext*)bottom->hw_frames_ctx->data; + if (top_fc->sw_format != bottom_fc->sw_format) { + av_log(avctx, AV_LOG_ERROR, + "Currently the sw format of the bottom video need to match the top!\n"); + err = AVERROR(EINVAL); + goto fail; + } + RET(init_filter(avctx)); + } + + RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, + out, (AVFrame *[]){ top, bottom }, 2, + VK_NULL_HANDLE, NULL, 0)); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&out); + return err; +} + +static av_cold int init(AVFilterContext *avctx) +{ + BlendVulkanContext *s = avctx->priv; + + s->fs.on_event = blend_frame; + + return ff_vk_filter_init(avctx); +} + +static av_cold void uninit(AVFilterContext *avctx) +{ + BlendVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + ff_framesync_uninit(&s->fs); + + s->initialized = 0; +} + +static int config_props_output(AVFilterLink *outlink) +{ + int err; + FilterLink *outl = ff_filter_link(outlink); + AVFilterContext *avctx = outlink->src; + BlendVulkanContext *s = avctx->priv; + AVFilterLink *toplink = avctx->inputs[IN_TOP]; + FilterLink *tl = ff_filter_link(toplink); + AVFilterLink *bottomlink = avctx->inputs[IN_BOTTOM]; + + if (toplink->w != bottomlink->w || toplink->h != bottomlink->h) { + av_log(avctx, AV_LOG_ERROR, "First input link %s parameters " + "(size %dx%d) do not match the corresponding " + "second input link %s parameters (size %dx%d)\n", + avctx->input_pads[IN_TOP].name, toplink->w, toplink->h, + avctx->input_pads[IN_BOTTOM].name, bottomlink->w, bottomlink->h); + return AVERROR(EINVAL); + } + + outlink->sample_aspect_ratio = toplink->sample_aspect_ratio; + outl->frame_rate = tl->frame_rate; + + RET(ff_vk_filter_config_output(outlink)); + + RET(ff_framesync_init_dualinput(&s->fs, avctx)); + + RET(ff_framesync_configure(&s->fs)); + outlink->time_base = s->fs.time_base; + + RET(config_params(avctx)); + +fail: + return err; +} + +static int activate(AVFilterContext *avctx) +{ + BlendVulkanContext *s = avctx->priv; + return ff_framesync_activate(&s->fs); +} + +#define OFFSET(x) offsetof(BlendVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + +static const AVOption blend_vulkan_options[] = { + { "c0_mode", "set component #0 blend mode", OFFSET(params[0].mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, BLEND_NB - 1, FLAGS, .unit = "mode" }, + { "c1_mode", "set component #1 blend mode", OFFSET(params[1].mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, BLEND_NB - 1, FLAGS, .unit = "mode" }, + { "c2_mode", "set component #2 blend mode", OFFSET(params[2].mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, BLEND_NB - 1, FLAGS, .unit = "mode" }, + { "c3_mode", "set component #3 blend mode", OFFSET(params[3].mode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, BLEND_NB - 1, FLAGS, .unit = "mode" }, + { "all_mode", "set blend mode for all components", OFFSET(all_mode), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, BLEND_NB - 1, FLAGS, .unit = "mode" }, + { "normal", "", 0, AV_OPT_TYPE_CONST, { .i64 = BLEND_NORMAL }, 0, 0, FLAGS, .unit = "mode" }, + { "multiply", "", 0, AV_OPT_TYPE_CONST, { .i64 = BLEND_MULTIPLY }, 0, 0, FLAGS, .unit = "mode" }, + + { "c0_opacity", "set color component #0 opacity", OFFSET(params[0].opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, + { "c1_opacity", "set color component #1 opacity", OFFSET(params[1].opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, + { "c2_opacity", "set color component #2 opacity", OFFSET(params[2].opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, + { "c3_opacity", "set color component #3 opacity", OFFSET(params[3].opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, + { "all_opacity", "set opacity for all color components", OFFSET(all_opacity), AV_OPT_TYPE_DOUBLE, { .dbl = 1 }, 0, 1, FLAGS }, + + { NULL } +}; + +AVFILTER_DEFINE_CLASS(blend_vulkan); + +static const AVFilterPad blend_vulkan_inputs[] = { + { + .name = "top", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_input, + }, + { + .name = "bottom", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_input, + }, +}; + + +static const AVFilterPad blend_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &config_props_output, + } +}; + +const FFFilter ff_vf_blend_vulkan = { + .p.name = "blend_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Blend two video frames in Vulkan"), + .p.priv_class = &blend_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(BlendVulkanContext), + .init = &init, + .uninit = &uninit, + .activate = &activate, + FILTER_INPUTS(blend_vulkan_inputs), + FILTER_OUTPUTS(blend_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, + .process_command = &process_command, +}; -- 2.49.1 From 8dd9511e075f43937fdcc39335adb76150e32497 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:49:57 +0000 Subject: [PATCH 007/118] Changing vulkan file directory --- libavfilter/vf_bwdif_vulkan.c | 337 ---------------------------------- 1 file changed, 337 deletions(-) delete mode 100644 libavfilter/vf_bwdif_vulkan.c diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c deleted file mode 100644 index 549e814886..0000000000 --- a/libavfilter/vf_bwdif_vulkan.c +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Copyright (c) Lynne - * Copyright (C) 2018 Philip Langdale <philipl@overt.org> - * Copyright (C) 2016 Thomas Mundt <loudmax@yahoo.de> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" -#include "yadif.h" -#include "filters.h" - -typedef struct BWDIFVulkanContext { - YADIFContext yadif; - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; -} BWDIFVulkanContext; - -typedef struct BWDIFParameters { - int parity; - int tff; - int current_field; -} BWDIFParameters; - -extern const char *ff_source_bwdif_comp; - -static av_cold int init_filter(AVFilterContext *ctx) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - BWDIFVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - - RET(ff_vk_shader_init(vkctx, &s->shd, "bwdif", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 1, 64, 1, - 0)); - shd = &s->shd; - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "prev", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "cur", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "next", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "dst", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 4, 0, 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, int parity; ); - GLSLC(1, int tff; ); - GLSLC(1, int current_field; ); - GLSLC(0, }; ); - - ff_vk_shader_add_push_const(&s->shd, 0, sizeof(BWDIFParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - - GLSLD(ff_source_bwdif_comp ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLC(1, bool filter_field = ((pos.y ^ parity) & 1) == 1; ); - GLSLF(1, bool is_intra = filter_field && (current_field == %i); ,YADIF_FIELD_END); - GLSLC(1, bool field_parity = (parity ^ tff) != 0; ); - GLSLC(0, ); - GLSLC(1, size = imageSize(dst[0]); ); - GLSLC(1, if (!IS_WITHIN(pos, size)) { ); - GLSLC(2, return; ); - GLSLC(1, } else if (is_intra) { ); - for (int i = 0; i < planes; i++) { - if (i == 1) { - GLSLF(2, size = imageSize(dst[%i]); ,i); - GLSLC(2, if (!IS_WITHIN(pos, size)) ); - GLSLC(3, return; ); - } - GLSLF(2, process_plane_intra(%i, pos); ,i); - } - GLSLC(1, } else if (filter_field) { ); - for (int i = 0; i < planes; i++) { - if (i == 1) { - GLSLF(2, size = imageSize(dst[%i]); ,i); - GLSLC(2, if (!IS_WITHIN(pos, size)) ); - GLSLC(3, return; ); - } - GLSLF(2, process_plane(%i, pos, filter_field, is_intra, field_parity); ,i); - } - GLSLC(1, } else { ); - for (int i = 0; i < planes; i++) { - if (i == 1) { - GLSLF(2, size = imageSize(dst[%i]); ,i); - GLSLC(2, if (!IS_WITHIN(pos, size)) ); - GLSLC(3, return; ); - } - GLSLF(2, imageStore(dst[%i], pos, imageLoad(cur[%i], pos)); ,i, i); - } - GLSLC(1, } ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static void bwdif_vulkan_filter_frame(AVFilterContext *ctx, AVFrame *dst, - int parity, int tff) -{ - BWDIFVulkanContext *s = ctx->priv; - YADIFContext *y = &s->yadif; - BWDIFParameters params = { - .parity = parity, - .tff = tff, - .current_field = y->current_field, - }; - - ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, dst, - (AVFrame *[]){ y->prev, y->cur, y->next }, 3, - VK_NULL_HANDLE, ¶ms, sizeof(params)); - - if (y->current_field == YADIF_FIELD_END) - y->current_field = YADIF_FIELD_NORMAL; -} - -static void bwdif_vulkan_uninit(AVFilterContext *avctx) -{ - BWDIFVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - ff_vk_uninit(&s->vkctx); - - ff_yadif_uninit(avctx); - - s->initialized = 0; -} - -static int bwdif_vulkan_config_input(AVFilterLink *inlink) -{ - FilterLink *l = ff_filter_link(inlink); - AVHWFramesContext *input_frames; - AVFilterContext *avctx = inlink->dst; - BWDIFVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - if (!l->hw_frames_ctx) { - av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " - "hardware frames context on the input.\n"); - return AVERROR(EINVAL); - } - - input_frames = (AVHWFramesContext *)l->hw_frames_ctx->data; - if (input_frames->format != AV_PIX_FMT_VULKAN) - return AVERROR(EINVAL); - - /* Extract the device and default output format from the first input. */ - if (avctx->inputs[0] != inlink) - return 0; - - /* Save the ref, without reffing it */ - vkctx->input_frames_ref = l->hw_frames_ctx; - - /* Defaults */ - vkctx->output_format = input_frames->sw_format; - vkctx->output_width = inlink->w; - vkctx->output_height = inlink->h; - - return 0; -} - -static int bwdif_vulkan_config_output(AVFilterLink *outlink) -{ - FilterLink *l = ff_filter_link(outlink); - int err; - AVFilterContext *avctx = outlink->src; - BWDIFVulkanContext *s = avctx->priv; - YADIFContext *y = &s->yadif; - FFVulkanContext *vkctx = &s->vkctx; - - av_buffer_unref(&l->hw_frames_ctx); - - err = ff_vk_filter_init_context(avctx, vkctx, vkctx->input_frames_ref, - vkctx->output_width, vkctx->output_height, - vkctx->output_format); - if (err < 0) - return err; - - /* For logging */ - vkctx->class = y->class; - - l->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref); - if (!l->hw_frames_ctx) - return AVERROR(ENOMEM); - - err = ff_yadif_config_output_common(outlink); - if (err < 0) - return err; - - y->csp = av_pix_fmt_desc_get(vkctx->frames->sw_format); - y->filter = bwdif_vulkan_filter_frame; - - if (AV_CEIL_RSHIFT(outlink->w, y->csp->log2_chroma_w) < 4 || AV_CEIL_RSHIFT(outlink->h, y->csp->log2_chroma_h) < 4) { - av_log(avctx, AV_LOG_ERROR, "Video with planes less than 4 columns or lines is not supported\n"); - return AVERROR(EINVAL); - } - - return init_filter(avctx); -} - -static const AVClass bwdif_vulkan_class = { - .class_name = "bwdif_vulkan", - .item_name = av_default_item_name, - .option = ff_yadif_options, - .version = LIBAVUTIL_VERSION_INT, - .category = AV_CLASS_CATEGORY_FILTER, -}; - -static const AVFilterPad bwdif_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = ff_yadif_filter_frame, - .config_props = &bwdif_vulkan_config_input, - }, -}; - -static const AVFilterPad bwdif_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .request_frame = ff_yadif_request_frame, - .config_props = &bwdif_vulkan_config_output, - }, -}; - -const FFFilter ff_vf_bwdif_vulkan = { - .p.name = "bwdif_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Deinterlace Vulkan frames via bwdif"), - .p.priv_class = &bwdif_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE | - AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, - .priv_size = sizeof(BWDIFVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &bwdif_vulkan_uninit, - FILTER_INPUTS(bwdif_vulkan_inputs), - FILTER_OUTPUTS(bwdif_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 868579008423a7861e65f4bd2a7812839f4d1159 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:51:02 +0000 Subject: [PATCH 008/118] Changing vulkan file directory --- libavfilter/vulkan/vf_bwdif_vulkan.c | 337 +++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 libavfilter/vulkan/vf_bwdif_vulkan.c diff --git a/libavfilter/vulkan/vf_bwdif_vulkan.c b/libavfilter/vulkan/vf_bwdif_vulkan.c new file mode 100644 index 0000000000..7164fe69ba --- /dev/null +++ b/libavfilter/vulkan/vf_bwdif_vulkan.c @@ -0,0 +1,337 @@ +/* + * Copyright (c) Lynne + * Copyright (C) 2018 Philip Langdale <philipl@overt.org> + * Copyright (C) 2016 Thomas Mundt <loudmax@yahoo.de> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" +#include "libavfilter/yadif.h" +#include "libavfilter/filters.h" + +typedef struct BWDIFVulkanContext { + YADIFContext yadif; + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; +} BWDIFVulkanContext; + +typedef struct BWDIFParameters { + int parity; + int tff; + int current_field; +} BWDIFParameters; + +extern const char *ff_source_bwdif_comp; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + BWDIFVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + + RET(ff_vk_shader_init(vkctx, &s->shd, "bwdif", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 1, 64, 1, + 0)); + shd = &s->shd; + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "prev", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "cur", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "next", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 4, 0, 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, int parity; ); + GLSLC(1, int tff; ); + GLSLC(1, int current_field; ); + GLSLC(0, }; ); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(BWDIFParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + GLSLD(ff_source_bwdif_comp ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(1, bool filter_field = ((pos.y ^ parity) & 1) == 1; ); + GLSLF(1, bool is_intra = filter_field && (current_field == %i); ,YADIF_FIELD_END); + GLSLC(1, bool field_parity = (parity ^ tff) != 0; ); + GLSLC(0, ); + GLSLC(1, size = imageSize(dst[0]); ); + GLSLC(1, if (!IS_WITHIN(pos, size)) { ); + GLSLC(2, return; ); + GLSLC(1, } else if (is_intra) { ); + for (int i = 0; i < planes; i++) { + if (i == 1) { + GLSLF(2, size = imageSize(dst[%i]); ,i); + GLSLC(2, if (!IS_WITHIN(pos, size)) ); + GLSLC(3, return; ); + } + GLSLF(2, process_plane_intra(%i, pos); ,i); + } + GLSLC(1, } else if (filter_field) { ); + for (int i = 0; i < planes; i++) { + if (i == 1) { + GLSLF(2, size = imageSize(dst[%i]); ,i); + GLSLC(2, if (!IS_WITHIN(pos, size)) ); + GLSLC(3, return; ); + } + GLSLF(2, process_plane(%i, pos, filter_field, is_intra, field_parity); ,i); + } + GLSLC(1, } else { ); + for (int i = 0; i < planes; i++) { + if (i == 1) { + GLSLF(2, size = imageSize(dst[%i]); ,i); + GLSLC(2, if (!IS_WITHIN(pos, size)) ); + GLSLC(3, return; ); + } + GLSLF(2, imageStore(dst[%i], pos, imageLoad(cur[%i], pos)); ,i, i); + } + GLSLC(1, } ); + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static void bwdif_vulkan_filter_frame(AVFilterContext *ctx, AVFrame *dst, + int parity, int tff) +{ + BWDIFVulkanContext *s = ctx->priv; + YADIFContext *y = &s->yadif; + BWDIFParameters params = { + .parity = parity, + .tff = tff, + .current_field = y->current_field, + }; + + ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, dst, + (AVFrame *[]){ y->prev, y->cur, y->next }, 3, + VK_NULL_HANDLE, ¶ms, sizeof(params)); + + if (y->current_field == YADIF_FIELD_END) + y->current_field = YADIF_FIELD_NORMAL; +} + +static void bwdif_vulkan_uninit(AVFilterContext *avctx) +{ + BWDIFVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + + ff_yadif_uninit(avctx); + + s->initialized = 0; +} + +static int bwdif_vulkan_config_input(AVFilterLink *inlink) +{ + FilterLink *l = ff_filter_link(inlink); + AVHWFramesContext *input_frames; + AVFilterContext *avctx = inlink->dst; + BWDIFVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + if (!l->hw_frames_ctx) { + av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " + "hardware frames context on the input.\n"); + return AVERROR(EINVAL); + } + + input_frames = (AVHWFramesContext *)l->hw_frames_ctx->data; + if (input_frames->format != AV_PIX_FMT_VULKAN) + return AVERROR(EINVAL); + + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; + + /* Save the ref, without reffing it */ + vkctx->input_frames_ref = l->hw_frames_ctx; + + /* Defaults */ + vkctx->output_format = input_frames->sw_format; + vkctx->output_width = inlink->w; + vkctx->output_height = inlink->h; + + return 0; +} + +static int bwdif_vulkan_config_output(AVFilterLink *outlink) +{ + FilterLink *l = ff_filter_link(outlink); + int err; + AVFilterContext *avctx = outlink->src; + BWDIFVulkanContext *s = avctx->priv; + YADIFContext *y = &s->yadif; + FFVulkanContext *vkctx = &s->vkctx; + + av_buffer_unref(&l->hw_frames_ctx); + + err = ff_vk_filter_init_context(avctx, vkctx, vkctx->input_frames_ref, + vkctx->output_width, vkctx->output_height, + vkctx->output_format); + if (err < 0) + return err; + + /* For logging */ + vkctx->class = y->class; + + l->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref); + if (!l->hw_frames_ctx) + return AVERROR(ENOMEM); + + err = ff_yadif_config_output_common(outlink); + if (err < 0) + return err; + + y->csp = av_pix_fmt_desc_get(vkctx->frames->sw_format); + y->filter = bwdif_vulkan_filter_frame; + + if (AV_CEIL_RSHIFT(outlink->w, y->csp->log2_chroma_w) < 4 || AV_CEIL_RSHIFT(outlink->h, y->csp->log2_chroma_h) < 4) { + av_log(avctx, AV_LOG_ERROR, "Video with planes less than 4 columns or lines is not supported\n"); + return AVERROR(EINVAL); + } + + return init_filter(avctx); +} + +static const AVClass bwdif_vulkan_class = { + .class_name = "bwdif_vulkan", + .item_name = av_default_item_name, + .option = ff_yadif_options, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_FILTER, +}; + +static const AVFilterPad bwdif_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = ff_yadif_filter_frame, + .config_props = &bwdif_vulkan_config_input, + }, +}; + +static const AVFilterPad bwdif_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .request_frame = ff_yadif_request_frame, + .config_props = &bwdif_vulkan_config_output, + }, +}; + +const FFFilter ff_vf_bwdif_vulkan = { + .p.name = "bwdif_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Deinterlace Vulkan frames via bwdif"), + .p.priv_class = &bwdif_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE | + AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, + .priv_size = sizeof(BWDIFVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &bwdif_vulkan_uninit, + FILTER_INPUTS(bwdif_vulkan_inputs), + FILTER_OUTPUTS(bwdif_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From bb950737d8c70903138cf874c893d793e8c9225b Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:51:40 +0000 Subject: [PATCH 009/118] Changing vulkan file directory --- libavfilter/vf_chromaber_vulkan.c | 264 ------------------------------ 1 file changed, 264 deletions(-) delete mode 100644 libavfilter/vf_chromaber_vulkan.c diff --git a/libavfilter/vf_chromaber_vulkan.c b/libavfilter/vf_chromaber_vulkan.c deleted file mode 100644 index 65b53afd64..0000000000 --- a/libavfilter/vf_chromaber_vulkan.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "video.h" - -typedef struct ChromaticAberrationVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - VkSampler sampler; - - /* Push constants / options */ - struct { - float dist[2]; - } opts; -} ChromaticAberrationVulkanContext; - -static const char distort_chroma_kernel[] = { - C(0, void distort_rgb(ivec2 size, ivec2 pos) ) - C(0, { ) - C(1, const vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) - C(1, const vec2 o = p * (dist - 1.0f); ) - C(0, ) - C(1, vec4 res; ) - C(1, res.r = texture(input_img[0], ((p - o)/2.0f) + 0.5f).r; ) - C(1, res.g = texture(input_img[0], ((p )/2.0f) + 0.5f).g; ) - C(1, res.b = texture(input_img[0], ((p + o)/2.0f) + 0.5f).b; ) - C(1, res.a = texture(input_img[0], ((p )/2.0f) + 0.5f).a; ) - C(1, imageStore(output_img[0], pos, res); ) - C(0, } ) - C(0, ) - C(0, void distort_chroma(int idx, ivec2 size, ivec2 pos) ) - C(0, { ) - C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) - C(1, float d = sqrt(p.x*p.x + p.y*p.y); ) - C(1, p *= d / (d*dist); ) - C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); ) - C(1, imageStore(output_img[idx], pos, res); ) - C(0, } ) -}; - -static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - ChromaticAberrationVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd = &s->shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - /* Normalize options */ - s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f; - s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR)); - RET(ff_vk_shader_init(vkctx, &s->shd, "chromatic_abberation", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 32, 1, - 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, vec2 dist; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), - VK_SHADER_STAGE_COMPUTE_BIT); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); - - GLSLD( distort_chroma_kernel ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - if (planes == 1) { - GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); - } else { - GLSLC(1, ivec2 size = imageSize(output_img[0]); ); - GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); - GLSLC(1, vec4 res = texture(input_img[0], npos); ); - GLSLC(1, imageStore(output_img[0], pos, res); ); - for (int i = 1; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (!IS_WITHIN(pos, size)) ); - GLSLC(2, return; ); - GLSLF(1, distort_chroma(%i, size, pos); ,i); - } - } - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - int err; - AVFilterContext *ctx = link->dst; - ChromaticAberrationVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - if (!s->initialized) - RET(init_filter(ctx, in)); - - RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in, - s->sampler, &s->opts, sizeof(s->opts))); - - err = av_frame_copy_props(out, in); - if (err < 0) - goto fail; - - av_frame_free(&in); - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&in); - av_frame_free(&out); - return err; -} - -static void chromaber_vulkan_uninit(AVFilterContext *avctx) -{ - ChromaticAberrationVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - if (s->sampler) - vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, - vkctx->hwctx->alloc); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -#define OFFSET(x) offsetof(ChromaticAberrationVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption chromaber_vulkan_options[] = { - { "dist_x", "Set horizontal distortion amount", OFFSET(opts.dist[0]), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, -10.0f, 10.0f, .flags = FLAGS }, - { "dist_y", "Set vertical distortion amount", OFFSET(opts.dist[1]), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, -10.0f, 10.0f, .flags = FLAGS }, - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(chromaber_vulkan); - -static const AVFilterPad chromaber_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &chromaber_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad chromaber_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_output, - }, -}; - -const FFFilter ff_vf_chromaber_vulkan = { - .p.name = "chromaber_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Offset chroma of input video (chromatic aberration)"), - .p.priv_class = &chromaber_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(ChromaticAberrationVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &chromaber_vulkan_uninit, - FILTER_INPUTS(chromaber_vulkan_inputs), - FILTER_OUTPUTS(chromaber_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 61ce4dae19345bbf9b54792af50ab1cb7bea8b22 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:52:16 +0000 Subject: [PATCH 010/118] Changing vulkan file directory --- libavfilter/vulkan/vf_chromaber_vulkan.c | 264 +++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 libavfilter/vulkan/vf_chromaber_vulkan.c diff --git a/libavfilter/vulkan/vf_chromaber_vulkan.c b/libavfilter/vulkan/vf_chromaber_vulkan.c new file mode 100644 index 0000000000..6066c9581c --- /dev/null +++ b/libavfilter/vulkan/vf_chromaber_vulkan.c @@ -0,0 +1,264 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/video.h" + +typedef struct ChromaticAberrationVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + VkSampler sampler; + + /* Push constants / options */ + struct { + float dist[2]; + } opts; +} ChromaticAberrationVulkanContext; + +static const char distort_chroma_kernel[] = { + C(0, void distort_rgb(ivec2 size, ivec2 pos) ) + C(0, { ) + C(1, const vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) + C(1, const vec2 o = p * (dist - 1.0f); ) + C(0, ) + C(1, vec4 res; ) + C(1, res.r = texture(input_img[0], ((p - o)/2.0f) + 0.5f).r; ) + C(1, res.g = texture(input_img[0], ((p )/2.0f) + 0.5f).g; ) + C(1, res.b = texture(input_img[0], ((p + o)/2.0f) + 0.5f).b; ) + C(1, res.a = texture(input_img[0], ((p )/2.0f) + 0.5f).a; ) + C(1, imageStore(output_img[0], pos, res); ) + C(0, } ) + C(0, ) + C(0, void distort_chroma(int idx, ivec2 size, ivec2 pos) ) + C(0, { ) + C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; ) + C(1, float d = sqrt(p.x*p.x + p.y*p.y); ) + C(1, p *= d / (d*dist); ) + C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); ) + C(1, imageStore(output_img[idx], pos, res); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + ChromaticAberrationVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + /* Normalize options */ + s->opts.dist[0] = (s->opts.dist[0] / 100.0f) + 1.0f; + s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(vkctx, &s->shd, "chromatic_abberation", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 32, 1, + 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec2 dist; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); + + GLSLD( distort_chroma_kernel ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (planes == 1) { + GLSLC(1, distort_rgb(imageSize(output_img[0]), pos); ); + } else { + GLSLC(1, ivec2 size = imageSize(output_img[0]); ); + GLSLC(1, vec2 npos = vec2(pos)/vec2(size); ); + GLSLC(1, vec4 res = texture(input_img[0], npos); ); + GLSLC(1, imageStore(output_img[0], pos, res); ); + for (int i = 1; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + GLSLF(1, distort_chroma(%i, size, pos); ,i); + } + } + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int chromaber_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + ChromaticAberrationVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in, + s->sampler, &s->opts, sizeof(s->opts))); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static void chromaber_vulkan_uninit(AVFilterContext *avctx) +{ + ChromaticAberrationVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(ChromaticAberrationVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption chromaber_vulkan_options[] = { + { "dist_x", "Set horizontal distortion amount", OFFSET(opts.dist[0]), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, -10.0f, 10.0f, .flags = FLAGS }, + { "dist_y", "Set vertical distortion amount", OFFSET(opts.dist[1]), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, -10.0f, 10.0f, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(chromaber_vulkan); + +static const AVFilterPad chromaber_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &chromaber_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad chromaber_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, +}; + +const FFFilter ff_vf_chromaber_vulkan = { + .p.name = "chromaber_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Offset chroma of input video (chromatic aberration)"), + .p.priv_class = &chromaber_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(ChromaticAberrationVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &chromaber_vulkan_uninit, + FILTER_INPUTS(chromaber_vulkan_inputs), + FILTER_OUTPUTS(chromaber_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From 2077a171e097c2343f36086b9930f18d93db5ce2 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:53:02 +0000 Subject: [PATCH 011/118] Changing vulkan file directory --- libavfilter/vf_flip_vulkan.c | 297 ----------------------------------- 1 file changed, 297 deletions(-) delete mode 100644 libavfilter/vf_flip_vulkan.c diff --git a/libavfilter/vf_flip_vulkan.c b/libavfilter/vf_flip_vulkan.c deleted file mode 100644 index 3e2aed0fda..0000000000 --- a/libavfilter/vf_flip_vulkan.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com> - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "video.h" - -enum FlipType { - FLIP_VERTICAL, - FLIP_HORIZONTAL, - FLIP_BOTH -}; - -typedef struct FlipVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; -} FlipVulkanContext; - -static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type) -{ - int err = 0; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - FlipVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd = &s->shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "flip", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 32, 1, - 0)); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_image", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "output_image", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_image[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - switch (type) - { - case FLIP_HORIZONTAL: - GLSLF(2, vec4 res = imageLoad(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i); - break; - case FLIP_VERTICAL: - GLSLF(2, vec4 res = imageLoad(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i); - break; - case FLIP_BOTH: - GLSLF(2, vec4 res = imageLoad(input_image[%i], ivec2(size.xy - pos.xy));, i); - break; - default: - GLSLF(2, vec4 res = imageLoad(input_image[%i], pos); ,i); - break; - } - GLSLF(2, imageStore(output_image[%i], pos, res); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static av_cold void flip_vulkan_uninit(AVFilterContext *avctx) -{ - FlipVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type) -{ - int err; - AVFrame *out = NULL; - AVFilterContext *ctx = link->dst; - FlipVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - if (!s->initialized) - RET(init_filter(ctx, in, type)); - - RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in, - VK_NULL_HANDLE, NULL, 0)); - - RET(av_frame_copy_props(out, in)); - - av_frame_free(&in); - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&in); - av_frame_free(&out); - return err; -} - -static int hflip_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - return filter_frame(link, in, FLIP_HORIZONTAL); -} - -static int vflip_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - return filter_frame(link, in, FLIP_VERTICAL); -} - -static int flip_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - return filter_frame(link, in, FLIP_BOTH); -} - -static const AVFilterPad flip_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_output, - } -}; - -static const AVOption hflip_vulkan_options[] = { - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(hflip_vulkan); - -static const AVFilterPad hflip_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &hflip_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - } -}; - -const FFFilter ff_vf_hflip_vulkan = { - .p.name = "hflip_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Horizontally flip the input video in Vulkan"), - .p.priv_class = &hflip_vulkan_class, - .priv_size = sizeof(FlipVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &flip_vulkan_uninit, - FILTER_INPUTS(hflip_vulkan_inputs), - FILTER_OUTPUTS(flip_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; - -static const AVOption vflip_vulkan_options[] = { - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(vflip_vulkan); - -static const AVFilterPad vflip_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &vflip_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - } -}; - -const FFFilter ff_vf_vflip_vulkan = { - .p.name = "vflip_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Vertically flip the input video in Vulkan"), - .p.priv_class = &vflip_vulkan_class, - .priv_size = sizeof(FlipVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &flip_vulkan_uninit, - FILTER_INPUTS(vflip_vulkan_inputs), - FILTER_OUTPUTS(flip_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; - -static const AVOption flip_vulkan_options[] = { - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(flip_vulkan); - -static const AVFilterPad flip_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &flip_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - } -}; - -const FFFilter ff_vf_flip_vulkan = { - .p.name = "flip_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Flip both horizontally and vertically"), - .p.priv_class = &flip_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(FlipVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &flip_vulkan_uninit, - FILTER_INPUTS(flip_vulkan_inputs), - FILTER_OUTPUTS(flip_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 1c29ae6603f0fed130e5ea6d1316baafbe1db735 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:53:26 +0000 Subject: [PATCH 012/118] Changing vulkan file directory --- libavfilter/vulkan/vf_flip_vulkan.c | 297 ++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 libavfilter/vulkan/vf_flip_vulkan.c diff --git a/libavfilter/vulkan/vf_flip_vulkan.c b/libavfilter/vulkan/vf_flip_vulkan.c new file mode 100644 index 0000000000..b00a7e5510 --- /dev/null +++ b/libavfilter/vulkan/vf_flip_vulkan.c @@ -0,0 +1,297 @@ +/* + * copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com> + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/video.h" + +enum FlipType { + FLIP_VERTICAL, + FLIP_HORIZONTAL, + FLIP_BOTH +}; + +typedef struct FlipVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; +} FlipVulkanContext; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in, enum FlipType type) +{ + int err = 0; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + FlipVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "flip", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 32, 1, + 0)); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_image", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "output_image", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_image[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + switch (type) + { + case FLIP_HORIZONTAL: + GLSLF(2, vec4 res = imageLoad(input_image[%i], ivec2(size.x - pos.x, pos.y)); ,i); + break; + case FLIP_VERTICAL: + GLSLF(2, vec4 res = imageLoad(input_image[%i], ivec2(pos.x, size.y - pos.y)); ,i); + break; + case FLIP_BOTH: + GLSLF(2, vec4 res = imageLoad(input_image[%i], ivec2(size.xy - pos.xy));, i); + break; + default: + GLSLF(2, vec4 res = imageLoad(input_image[%i], pos); ,i); + break; + } + GLSLF(2, imageStore(output_image[%i], pos, res); ,i); + GLSLC(1, } ); + } + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static av_cold void flip_vulkan_uninit(AVFilterContext *avctx) +{ + FlipVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static int filter_frame(AVFilterLink *link, AVFrame *in, enum FlipType type) +{ + int err; + AVFrame *out = NULL; + AVFilterContext *ctx = link->dst; + FlipVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in, type)); + + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in, + VK_NULL_HANDLE, NULL, 0)); + + RET(av_frame_copy_props(out, in)); + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static int hflip_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + return filter_frame(link, in, FLIP_HORIZONTAL); +} + +static int vflip_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + return filter_frame(link, in, FLIP_VERTICAL); +} + +static int flip_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + return filter_frame(link, in, FLIP_BOTH); +} + +static const AVFilterPad flip_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + } +}; + +static const AVOption hflip_vulkan_options[] = { + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(hflip_vulkan); + +static const AVFilterPad hflip_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &hflip_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + } +}; + +const FFFilter ff_vf_hflip_vulkan = { + .p.name = "hflip_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Horizontally flip the input video in Vulkan"), + .p.priv_class = &hflip_vulkan_class, + .priv_size = sizeof(FlipVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &flip_vulkan_uninit, + FILTER_INPUTS(hflip_vulkan_inputs), + FILTER_OUTPUTS(flip_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; + +static const AVOption vflip_vulkan_options[] = { + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(vflip_vulkan); + +static const AVFilterPad vflip_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &vflip_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + } +}; + +const FFFilter ff_vf_vflip_vulkan = { + .p.name = "vflip_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Vertically flip the input video in Vulkan"), + .p.priv_class = &vflip_vulkan_class, + .priv_size = sizeof(FlipVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &flip_vulkan_uninit, + FILTER_INPUTS(vflip_vulkan_inputs), + FILTER_OUTPUTS(flip_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; + +static const AVOption flip_vulkan_options[] = { + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(flip_vulkan); + +static const AVFilterPad flip_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &flip_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + } +}; + +const FFFilter ff_vf_flip_vulkan = { + .p.name = "flip_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Flip both horizontally and vertically"), + .p.priv_class = &flip_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(FlipVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &flip_vulkan_uninit, + FILTER_INPUTS(flip_vulkan_inputs), + FILTER_OUTPUTS(flip_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From 02ccafbadab8361156f6d08bca4ab3aeca3a2a1e Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:53:56 +0000 Subject: [PATCH 013/118] Changing vulkan file directory --- libavfilter/vf_gblur_vulkan.c | 379 ---------------------------------- 1 file changed, 379 deletions(-) delete mode 100644 libavfilter/vf_gblur_vulkan.c diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c deleted file mode 100644 index 1b447e2754..0000000000 --- a/libavfilter/vf_gblur_vulkan.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com> - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/mem.h" -#include "libavutil/random_seed.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "video.h" - -#define CGS 32 -#define GBLUR_MAX_KERNEL_SIZE 127 - -typedef struct GBlurVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - VkSampler sampler; - FFVulkanShader shd_hor; - FFVkBuffer params_hor; - FFVulkanShader shd_ver; - FFVkBuffer params_ver; - - int size; - int sizeV; - int planes; - float sigma; - float sigmaV; -} GBlurVulkanContext; - -static const char gblur_func[] = { - C(0, void gblur(const ivec2 pos, const int index) ) - C(0, { ) - C(1, vec4 sum = imageLoad(input_images[index], pos) * kernel[0]; ) - C(0, ) - C(1, for(int i = 1; i < kernel.length(); i++) { ) - C(2, sum += imageLoad(input_images[index], pos + OFFSET) * kernel[i]; ) - C(2, sum += imageLoad(input_images[index], pos - OFFSET) * kernel[i]; ) - C(1, } ) - C(0, ) - C(1, imageStore(output_images[index], pos, sum); ) - C(0, } ) -}; - -static inline float gaussian(float sigma, float x) -{ - return 1.0 / (sqrt(2.0 * M_PI) * sigma) * - exp(-(x * x) / (2.0 * sigma * sigma)); -} - -static inline float gaussian_simpson_integration(float sigma, float a, float b) -{ - return (b - a) * (1.0 / 6.0) * ((gaussian(sigma, a) + - 4.0 * gaussian(sigma, (a + b) * 0.5) + gaussian(sigma, b))); -} - -static void init_gaussian_kernel(float *kernel, float sigma, float kernel_size) -{ - int x; - float sum; - - sum = 0; - for (x = 0; x < kernel_size; x++) { - kernel[x] = gaussian_simpson_integration(sigma, x - 0.5f, x + 0.5f); - if (!x) - sum += kernel[x]; - else - sum += kernel[x] * 2.0; - } - /* Normalized */ - sum = 1.0 / sum; - for (x = 0; x < kernel_size; x++) { - kernel[x] *= sum; - } -} - -static inline void init_kernel_size(void *log_ctx, int *out_size) -{ - int size = *out_size; - - if (!(size & 1)) { - av_log(log_ctx, AV_LOG_WARNING, "The kernel size should be odd\n"); - size++; - } - - *out_size = (size >> 1) + 1; -} - -static av_cold void init_gaussian_params(AVFilterContext *ctx) -{ - GBlurVulkanContext *s = ctx->priv; - if (s->sigmaV <= 0) - s->sigmaV = s->sigma; - - init_kernel_size(ctx, &s->size); - - if (s->sizeV <= 0) - s->sizeV = s->size; - else - init_kernel_size(ctx, &s->sizeV); -} - -static int init_gblur_pipeline(GBlurVulkanContext *s, - FFVulkanShader *shd, FFVkBuffer *params_buf, - int ksize, float sigma, FFVkSPIRVCompiler *spv) -{ - int err = 0; - uint8_t *kernel_mapped; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - FFVulkanDescriptorSetBinding buf_desc = { - .name = "data", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .mem_layout = "std430", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float kernel", - .buf_elems = ksize, - }; - - RET(ff_vk_shader_add_descriptor_set(&s->vkctx, shd, &buf_desc, 1, 1, 0)); - - GLSLD( gblur_func ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (!IS_WITHIN(pos, size)) ); - GLSLC(2, return; ); - if (s->planes & (1 << i)) { - GLSLF(1, gblur(pos, %i); ,i); - } else { - GLSLF(1, vec4 res = imageLoad(input_images[%i], pos); ,i); - GLSLF(1, imageStore(output_images[%i], pos, res); ,i); - } - } - GLSLC(0, } ); - - RET(spv->compile_shader(&s->vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(&s->vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(&s->vkctx, &s->e, shd)); - - RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(&s->vkctx, params_buf, &kernel_mapped, 0)); - - init_gaussian_kernel((float *)kernel_mapped, sigma, ksize); - - RET(ff_vk_unmap_buffer(&s->vkctx, params_buf, 1)); - RET(ff_vk_shader_update_desc_buffer(&s->vkctx, &s->e.contexts[0], shd, 1, 0, 0, - params_buf, 0, params_buf->size, - VK_FORMAT_UNDEFINED)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - return err; -} - -static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) -{ - int err = 0; - GBlurVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "output_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - init_gaussian_params(ctx); - - { - shd = &s->shd_hor; - RET(ff_vk_shader_init(vkctx, shd, "gblur_hor", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 1, 1, - 0)); - - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 2, 0, 0)); - - GLSLC(0, #define OFFSET (ivec2(i, 0.0))); - RET(init_gblur_pipeline(s, shd, &s->params_hor, s->size, s->sigma, spv)); - } - - { - shd = &s->shd_ver; - RET(ff_vk_shader_init(vkctx, shd, "gblur_hor", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 1, 32, 1, - 0)); - - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 2, 0, 0)); - - GLSLC(0, #define OFFSET (ivec2(0.0, i))); - RET(init_gblur_pipeline(s, shd, &s->params_ver, s->sizeV, s->sigmaV, spv)); - } - - s->initialized = 1; - -fail: - if (spv) - spv->uninit(&spv); - - return err; -} - -static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx) -{ - GBlurVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd_hor); - ff_vk_shader_free(vkctx, &s->shd_ver); - ff_vk_free_buf(vkctx, &s->params_hor); - ff_vk_free_buf(vkctx, &s->params_ver); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - int err; - AVFrame *tmp = NULL, *out = NULL; - AVFilterContext *ctx = link->dst; - GBlurVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!tmp) { - err = AVERROR(ENOMEM); - goto fail; - } - - if (!s->initialized) - RET(init_filter(ctx, in)); - - RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e, - (FFVulkanShader *[2]){ &s->shd_hor, &s->shd_ver }, - out, tmp, in, VK_NULL_HANDLE, NULL, 0)); - - err = av_frame_copy_props(out, in); - if (err < 0) - goto fail; - - av_frame_free(&in); - av_frame_free(&tmp); - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&in); - av_frame_free(&tmp); - av_frame_free(&out); - return err; -} - -#define OFFSET(x) offsetof(GBlurVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption gblur_vulkan_options[] = { - { "sigma", "Set sigma", OFFSET(sigma), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0.01, 1024.0, FLAGS }, - { "sigmaV", "Set vertical sigma", OFFSET(sigmaV), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, 0.0, 1024.0, FLAGS }, - { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, { .i64 = 0xF }, 0, 0xF, FLAGS }, - { "size", "Set kernel size", OFFSET(size), AV_OPT_TYPE_INT, { .i64 = 19 }, 1, GBLUR_MAX_KERNEL_SIZE, FLAGS }, - { "sizeV", "Set vertical kernel size", OFFSET(sizeV), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, GBLUR_MAX_KERNEL_SIZE, FLAGS }, - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(gblur_vulkan); - -static const AVFilterPad gblur_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &gblur_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - } -}; - -static const AVFilterPad gblur_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_output, - } -}; - -const FFFilter ff_vf_gblur_vulkan = { - .p.name = "gblur_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Gaussian Blur in Vulkan"), - .p.priv_class = &gblur_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(GBlurVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &gblur_vulkan_uninit, - FILTER_INPUTS(gblur_vulkan_inputs), - FILTER_OUTPUTS(gblur_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 9d0af8705418ef8b2cfcce6f2134398d4b0d0f13 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:54:35 +0000 Subject: [PATCH 014/118] Changing vulkan file directory --- libavfilter/vulkan/vf_gblur_vulkan.c | 379 +++++++++++++++++++++++++++ 1 file changed, 379 insertions(+) create mode 100644 libavfilter/vulkan/vf_gblur_vulkan.c diff --git a/libavfilter/vulkan/vf_gblur_vulkan.c b/libavfilter/vulkan/vf_gblur_vulkan.c new file mode 100644 index 0000000000..168ef01d56 --- /dev/null +++ b/libavfilter/vulkan/vf_gblur_vulkan.c @@ -0,0 +1,379 @@ +/* + * copyright (c) 2021-2022 Wu Jianhua <jianhua.wu@intel.com> + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/video.h" + +#define CGS 32 +#define GBLUR_MAX_KERNEL_SIZE 127 + +typedef struct GBlurVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + VkSampler sampler; + FFVulkanShader shd_hor; + FFVkBuffer params_hor; + FFVulkanShader shd_ver; + FFVkBuffer params_ver; + + int size; + int sizeV; + int planes; + float sigma; + float sigmaV; +} GBlurVulkanContext; + +static const char gblur_func[] = { + C(0, void gblur(const ivec2 pos, const int index) ) + C(0, { ) + C(1, vec4 sum = imageLoad(input_images[index], pos) * kernel[0]; ) + C(0, ) + C(1, for(int i = 1; i < kernel.length(); i++) { ) + C(2, sum += imageLoad(input_images[index], pos + OFFSET) * kernel[i]; ) + C(2, sum += imageLoad(input_images[index], pos - OFFSET) * kernel[i]; ) + C(1, } ) + C(0, ) + C(1, imageStore(output_images[index], pos, sum); ) + C(0, } ) +}; + +static inline float gaussian(float sigma, float x) +{ + return 1.0 / (sqrt(2.0 * M_PI) * sigma) * + exp(-(x * x) / (2.0 * sigma * sigma)); +} + +static inline float gaussian_simpson_integration(float sigma, float a, float b) +{ + return (b - a) * (1.0 / 6.0) * ((gaussian(sigma, a) + + 4.0 * gaussian(sigma, (a + b) * 0.5) + gaussian(sigma, b))); +} + +static void init_gaussian_kernel(float *kernel, float sigma, float kernel_size) +{ + int x; + float sum; + + sum = 0; + for (x = 0; x < kernel_size; x++) { + kernel[x] = gaussian_simpson_integration(sigma, x - 0.5f, x + 0.5f); + if (!x) + sum += kernel[x]; + else + sum += kernel[x] * 2.0; + } + /* Normalized */ + sum = 1.0 / sum; + for (x = 0; x < kernel_size; x++) { + kernel[x] *= sum; + } +} + +static inline void init_kernel_size(void *log_ctx, int *out_size) +{ + int size = *out_size; + + if (!(size & 1)) { + av_log(log_ctx, AV_LOG_WARNING, "The kernel size should be odd\n"); + size++; + } + + *out_size = (size >> 1) + 1; +} + +static av_cold void init_gaussian_params(AVFilterContext *ctx) +{ + GBlurVulkanContext *s = ctx->priv; + if (s->sigmaV <= 0) + s->sigmaV = s->sigma; + + init_kernel_size(ctx, &s->size); + + if (s->sizeV <= 0) + s->sizeV = s->size; + else + init_kernel_size(ctx, &s->sizeV); +} + +static int init_gblur_pipeline(GBlurVulkanContext *s, + FFVulkanShader *shd, FFVkBuffer *params_buf, + int ksize, float sigma, FFVkSPIRVCompiler *spv) +{ + int err = 0; + uint8_t *kernel_mapped; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + FFVulkanDescriptorSetBinding buf_desc = { + .name = "data", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .mem_layout = "std430", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float kernel", + .buf_elems = ksize, + }; + + RET(ff_vk_shader_add_descriptor_set(&s->vkctx, shd, &buf_desc, 1, 1, 0)); + + GLSLD( gblur_func ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + if (s->planes & (1 << i)) { + GLSLF(1, gblur(pos, %i); ,i); + } else { + GLSLF(1, vec4 res = imageLoad(input_images[%i], pos); ,i); + GLSLF(1, imageStore(output_images[%i], pos, res); ,i); + } + } + GLSLC(0, } ); + + RET(spv->compile_shader(&s->vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(&s->vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(&s->vkctx, &s->e, shd)); + + RET(ff_vk_create_buf(&s->vkctx, params_buf, sizeof(float) * ksize, NULL, NULL, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(&s->vkctx, params_buf, &kernel_mapped, 0)); + + init_gaussian_kernel((float *)kernel_mapped, sigma, ksize); + + RET(ff_vk_unmap_buffer(&s->vkctx, params_buf, 1)); + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, &s->e.contexts[0], shd, 1, 0, 0, + params_buf, 0, params_buf->size, + VK_FORMAT_UNDEFINED)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + return err; +} + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err = 0; + GBlurVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + + FFVulkanShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "output_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + init_gaussian_params(ctx); + + { + shd = &s->shd_hor; + RET(ff_vk_shader_init(vkctx, shd, "gblur_hor", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 1, 1, + 0)); + + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 2, 0, 0)); + + GLSLC(0, #define OFFSET (ivec2(i, 0.0))); + RET(init_gblur_pipeline(s, shd, &s->params_hor, s->size, s->sigma, spv)); + } + + { + shd = &s->shd_ver; + RET(ff_vk_shader_init(vkctx, shd, "gblur_hor", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 1, 32, 1, + 0)); + + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 2, 0, 0)); + + GLSLC(0, #define OFFSET (ivec2(0.0, i))); + RET(init_gblur_pipeline(s, shd, &s->params_ver, s->sizeV, s->sigmaV, spv)); + } + + s->initialized = 1; + +fail: + if (spv) + spv->uninit(&spv); + + return err; +} + +static av_cold void gblur_vulkan_uninit(AVFilterContext *avctx) +{ + GBlurVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd_hor); + ff_vk_shader_free(vkctx, &s->shd_ver); + ff_vk_free_buf(vkctx, &s->params_hor); + ff_vk_free_buf(vkctx, &s->params_ver); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static int gblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFrame *tmp = NULL, *out = NULL; + AVFilterContext *ctx = link->dst; + GBlurVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + tmp = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!tmp) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(ff_vk_filter_process_2pass(&s->vkctx, &s->e, + (FFVulkanShader *[2]){ &s->shd_hor, &s->shd_ver }, + out, tmp, in, VK_NULL_HANDLE, NULL, 0)); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + av_frame_free(&tmp); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&tmp); + av_frame_free(&out); + return err; +} + +#define OFFSET(x) offsetof(GBlurVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption gblur_vulkan_options[] = { + { "sigma", "Set sigma", OFFSET(sigma), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0.01, 1024.0, FLAGS }, + { "sigmaV", "Set vertical sigma", OFFSET(sigmaV), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, 0.0, 1024.0, FLAGS }, + { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, { .i64 = 0xF }, 0, 0xF, FLAGS }, + { "size", "Set kernel size", OFFSET(size), AV_OPT_TYPE_INT, { .i64 = 19 }, 1, GBLUR_MAX_KERNEL_SIZE, FLAGS }, + { "sizeV", "Set vertical kernel size", OFFSET(sizeV), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, GBLUR_MAX_KERNEL_SIZE, FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(gblur_vulkan); + +static const AVFilterPad gblur_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &gblur_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + } +}; + +static const AVFilterPad gblur_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + } +}; + +const FFFilter ff_vf_gblur_vulkan = { + .p.name = "gblur_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Gaussian Blur in Vulkan"), + .p.priv_class = &gblur_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(GBlurVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &gblur_vulkan_uninit, + FILTER_INPUTS(gblur_vulkan_inputs), + FILTER_OUTPUTS(gblur_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From e1e9ccec4903a48739ce0770f3fe5ed271ef02cd Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:54:59 +0000 Subject: [PATCH 015/118] Changing vulkan file directory --- libavfilter/vf_interlace_vulkan.c | 321 ------------------------------ 1 file changed, 321 deletions(-) delete mode 100644 libavfilter/vf_interlace_vulkan.c diff --git a/libavfilter/vf_interlace_vulkan.c b/libavfilter/vf_interlace_vulkan.c deleted file mode 100644 index 7afb30c2d7..0000000000 --- a/libavfilter/vf_interlace_vulkan.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright 2025 (c) Niklas Haas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/vulkan_spirv.h" -#include "libavutil/opt.h" -#include "vulkan_filter.h" - -#include "tinterlace.h" -#include "filters.h" -#include "video.h" - -typedef struct InterlaceVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - VkSampler sampler; - FFVulkanShader shd; - - int mode; - int lowpass; - - AVFrame *cur; /* first frame in pair */ -} InterlaceVulkanContext; - -static const char lowpass_off[] = { - C(0, vec4 get_line(sampler2D tex, const vec2 pos) ) - C(0, { ) - C(1, return texture(tex, pos); ) - C(0, } ) -}; - -static const char lowpass_lin[] = { - C(0, vec4 get_line(sampler2D tex, const vec2 pos) ) - C(0, { ) - C(1, return 0.50 * texture(tex, pos) + ) - C(1, 0.25 * texture(tex, pos - ivec2(0, 1)) + ) - C(1, 0.25 * texture(tex, pos + ivec2(0, 1)); ) - C(0, } ) -}; - -static const char lowpass_complex[] = { - C(0, vec4 get_line(sampler2D tex, const vec2 pos) ) - C(0, { ) - C(1, return 0.75 * texture(tex, pos) + ) - C(1, 0.25 * texture(tex, pos - ivec2(0, 1)) + ) - C(1, 0.25 * texture(tex, pos + ivec2(0, 1)) + ) - C(1, -0.125 * texture(tex, pos - ivec2(0, 2)) + ) - C(1, -0.125 * texture(tex, pos + ivec2(0, 2)); ) - C(0, } ) -}; - -static av_cold int init_filter(AVFilterContext *ctx) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - InterlaceVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, - s->lowpass == VLPF_OFF ? VK_FILTER_NEAREST - : VK_FILTER_LINEAR)); - RET(ff_vk_shader_init(vkctx, &s->shd, "interlace", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 32, 1, - 0)); - shd = &s->shd; - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "top_field", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), - }, - { - .name = "bot_field", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 3, 0, 0)); - - switch (s->lowpass) { - case VLPF_OFF: - GLSLD(lowpass_off); - break; - case VLPF_LIN: - GLSLD(lowpass_lin); - break; - case VLPF_CMP: - GLSLD(lowpass_complex); - break; - } - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, vec4 res; ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLC(1, const vec2 ipos = pos + vec2(0.5); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (!IS_WITHIN(pos, size)) ); - GLSLC(2, return; ); - GLSLC(1, if (pos.y %% 2 == 0) ); - GLSLF(1, res = get_line(top_field[%i], ipos); ,i); - GLSLC(1, else ); - GLSLF(1, res = get_line(bot_field[%i], ipos); ,i); - GLSLF(1, imageStore(output_img[%i], pos, res); ,i); - } - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int interlace_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - int err; - AVFrame *out = NULL, *input_top, *input_bot; - AVFilterContext *ctx = link->dst; - InterlaceVulkanContext *s = ctx->priv; - const AVFilterLink *inlink = ctx->inputs[0]; - AVFilterLink *outlink = ctx->outputs[0]; - FilterLink *l = ff_filter_link(outlink); - - if (!s->initialized) - RET(init_filter(ctx)); - - /* Need both frames to filter */ - if (!s->cur) { - s->cur = in; - return 0; - } - - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - if (s->mode == MODE_TFF) { - input_top = s->cur; - input_bot = in; - } else { - input_top = in; - input_bot = s->cur; - } - - RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, - out, (AVFrame *[]){ input_top, input_bot }, 2, - s->sampler, NULL, 0)); - - err = av_frame_copy_props(out, s->cur); - if (err < 0) - goto fail; - - out->flags |= AV_FRAME_FLAG_INTERLACED; - if (s->mode == MODE_TFF) - out->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; - - out->pts = av_rescale_q(out->pts, inlink->time_base, outlink->time_base); - out->duration = av_rescale_q(1, av_inv_q(l->frame_rate), outlink->time_base); - - av_frame_free(&s->cur); - av_frame_free(&in); - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&s->cur); - av_frame_free(&in); - av_frame_free(&out); - return err; -} - -static void interlace_vulkan_uninit(AVFilterContext *avctx) -{ - InterlaceVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - - av_frame_free(&s->cur); - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - if (s->sampler) - vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, - vkctx->hwctx->alloc); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -static int config_out_props(AVFilterLink *outlink) -{ - AVFilterLink *inlink = outlink->src->inputs[0]; - const FilterLink *il = ff_filter_link(inlink); - FilterLink *ol = ff_filter_link(outlink); - - ol->frame_rate = av_mul_q(il->frame_rate, av_make_q(1, 2)); - outlink->time_base = av_mul_q(inlink->time_base, av_make_q(2, 1)); - return ff_vk_filter_config_output(outlink); -} - -#define OFFSET(x) offsetof(InterlaceVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption interlace_vulkan_options[] = { - { "scan", "scanning mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_TFF}, 0, 1, FLAGS, .unit = "mode"}, - { "tff", "top field first", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_TFF}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, - { "bff", "bottom field first", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_BFF}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, - { "lowpass", "set vertical low-pass filter", OFFSET(lowpass), AV_OPT_TYPE_INT, {.i64 = VLPF_LIN}, 0, 2, FLAGS, .unit = "lowpass" }, - { "off", "disable vertical low-pass filter", 0, AV_OPT_TYPE_CONST, {.i64 = VLPF_OFF}, INT_MIN, INT_MAX, FLAGS, .unit = "lowpass" }, - { "linear", "linear vertical low-pass filter", 0, AV_OPT_TYPE_CONST, {.i64 = VLPF_LIN}, INT_MIN, INT_MAX, FLAGS, .unit = "lowpass" }, - { "complex", "complex vertical low-pass filter", 0, AV_OPT_TYPE_CONST, {.i64 = VLPF_CMP}, INT_MIN, INT_MAX, FLAGS, .unit = "lowpass" }, - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(interlace_vulkan); - -static const AVFilterPad interlace_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &interlace_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad interlace_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &config_out_props, - }, -}; - -const FFFilter ff_vf_interlace_vulkan = { - .p.name = "interlace_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Convert progressive video into interlaced."), - .p.priv_class = &interlace_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(InterlaceVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &interlace_vulkan_uninit, - FILTER_INPUTS(interlace_vulkan_inputs), - FILTER_OUTPUTS(interlace_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From b7f37d411ca2afb5aacc5514ecf208a5567c9c43 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:56:39 +0000 Subject: [PATCH 016/118] Changing vulkan file directory --- libavfilter/vulkan/vf_interlace_vulkan.c | 321 +++++++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 libavfilter/vulkan/vf_interlace_vulkan.c diff --git a/libavfilter/vulkan/vf_interlace_vulkan.c b/libavfilter/vulkan/vf_interlace_vulkan.c new file mode 100644 index 0000000000..948497a7f0 --- /dev/null +++ b/libavfilter/vulkan/vf_interlace_vulkan.c @@ -0,0 +1,321 @@ +/* + * Copyright 2025 (c) Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" + +#include "libavfilter/tinterlace.h" +#include "libavfilter/filters.h" +#include "libavfilter/video.h" + +typedef struct InterlaceVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + VkSampler sampler; + FFVulkanShader shd; + + int mode; + int lowpass; + + AVFrame *cur; /* first frame in pair */ +} InterlaceVulkanContext; + +static const char lowpass_off[] = { + C(0, vec4 get_line(sampler2D tex, const vec2 pos) ) + C(0, { ) + C(1, return texture(tex, pos); ) + C(0, } ) +}; + +static const char lowpass_lin[] = { + C(0, vec4 get_line(sampler2D tex, const vec2 pos) ) + C(0, { ) + C(1, return 0.50 * texture(tex, pos) + ) + C(1, 0.25 * texture(tex, pos - ivec2(0, 1)) + ) + C(1, 0.25 * texture(tex, pos + ivec2(0, 1)); ) + C(0, } ) +}; + +static const char lowpass_complex[] = { + C(0, vec4 get_line(sampler2D tex, const vec2 pos) ) + C(0, { ) + C(1, return 0.75 * texture(tex, pos) + ) + C(1, 0.25 * texture(tex, pos - ivec2(0, 1)) + ) + C(1, 0.25 * texture(tex, pos + ivec2(0, 1)) + ) + C(1, -0.125 * texture(tex, pos - ivec2(0, 2)) + ) + C(1, -0.125 * texture(tex, pos + ivec2(0, 2)); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + InterlaceVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, + s->lowpass == VLPF_OFF ? VK_FILTER_NEAREST + : VK_FILTER_LINEAR)); + RET(ff_vk_shader_init(vkctx, &s->shd, "interlace", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 32, 1, + 0)); + shd = &s->shd; + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "top_field", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "bot_field", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc, 3, 0, 0)); + + switch (s->lowpass) { + case VLPF_OFF: + GLSLD(lowpass_off); + break; + case VLPF_LIN: + GLSLD(lowpass_lin); + break; + case VLPF_CMP: + GLSLD(lowpass_complex); + break; + } + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, vec4 res; ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(1, const vec2 ipos = pos + vec2(0.5); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + GLSLC(1, if (pos.y %% 2 == 0) ); + GLSLF(1, res = get_line(top_field[%i], ipos); ,i); + GLSLC(1, else ); + GLSLF(1, res = get_line(bot_field[%i], ipos); ,i); + GLSLF(1, imageStore(output_img[%i], pos, res); ,i); + } + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int interlace_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFrame *out = NULL, *input_top, *input_bot; + AVFilterContext *ctx = link->dst; + InterlaceVulkanContext *s = ctx->priv; + const AVFilterLink *inlink = ctx->inputs[0]; + AVFilterLink *outlink = ctx->outputs[0]; + FilterLink *l = ff_filter_link(outlink); + + if (!s->initialized) + RET(init_filter(ctx)); + + /* Need both frames to filter */ + if (!s->cur) { + s->cur = in; + return 0; + } + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (s->mode == MODE_TFF) { + input_top = s->cur; + input_bot = in; + } else { + input_top = in; + input_bot = s->cur; + } + + RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, + out, (AVFrame *[]){ input_top, input_bot }, 2, + s->sampler, NULL, 0)); + + err = av_frame_copy_props(out, s->cur); + if (err < 0) + goto fail; + + out->flags |= AV_FRAME_FLAG_INTERLACED; + if (s->mode == MODE_TFF) + out->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; + + out->pts = av_rescale_q(out->pts, inlink->time_base, outlink->time_base); + out->duration = av_rescale_q(1, av_inv_q(l->frame_rate), outlink->time_base); + + av_frame_free(&s->cur); + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&s->cur); + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static void interlace_vulkan_uninit(AVFilterContext *avctx) +{ + InterlaceVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + av_frame_free(&s->cur); + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static int config_out_props(AVFilterLink *outlink) +{ + AVFilterLink *inlink = outlink->src->inputs[0]; + const FilterLink *il = ff_filter_link(inlink); + FilterLink *ol = ff_filter_link(outlink); + + ol->frame_rate = av_mul_q(il->frame_rate, av_make_q(1, 2)); + outlink->time_base = av_mul_q(inlink->time_base, av_make_q(2, 1)); + return ff_vk_filter_config_output(outlink); +} + +#define OFFSET(x) offsetof(InterlaceVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption interlace_vulkan_options[] = { + { "scan", "scanning mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_TFF}, 0, 1, FLAGS, .unit = "mode"}, + { "tff", "top field first", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_TFF}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, + { "bff", "bottom field first", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_BFF}, INT_MIN, INT_MAX, FLAGS, .unit = "mode"}, + { "lowpass", "set vertical low-pass filter", OFFSET(lowpass), AV_OPT_TYPE_INT, {.i64 = VLPF_LIN}, 0, 2, FLAGS, .unit = "lowpass" }, + { "off", "disable vertical low-pass filter", 0, AV_OPT_TYPE_CONST, {.i64 = VLPF_OFF}, INT_MIN, INT_MAX, FLAGS, .unit = "lowpass" }, + { "linear", "linear vertical low-pass filter", 0, AV_OPT_TYPE_CONST, {.i64 = VLPF_LIN}, INT_MIN, INT_MAX, FLAGS, .unit = "lowpass" }, + { "complex", "complex vertical low-pass filter", 0, AV_OPT_TYPE_CONST, {.i64 = VLPF_CMP}, INT_MIN, INT_MAX, FLAGS, .unit = "lowpass" }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(interlace_vulkan); + +static const AVFilterPad interlace_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &interlace_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad interlace_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &config_out_props, + }, +}; + +const FFFilter ff_vf_interlace_vulkan = { + .p.name = "interlace_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Convert progressive video into interlaced."), + .p.priv_class = &interlace_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(InterlaceVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &interlace_vulkan_uninit, + FILTER_INPUTS(interlace_vulkan_inputs), + FILTER_OUTPUTS(interlace_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From 37420dbbb910355e3424882ce34cd419213db9c1 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:57:37 +0000 Subject: [PATCH 017/118] Changing vulkan file directory --- libavfilter/vf_nlmeans_vulkan.c | 1120 ------------------------------- 1 file changed, 1120 deletions(-) delete mode 100644 libavfilter/vf_nlmeans_vulkan.c diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c deleted file mode 100644 index 22a2a73eae..0000000000 --- a/libavfilter/vf_nlmeans_vulkan.c +++ /dev/null @@ -1,1120 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/mem.h" -#include "libavutil/random_seed.h" -#include "libavutil/vulkan_spirv.h" -#include "libavutil/opt.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "video.h" - -#define TYPE_NAME "vec4" -#define TYPE_ELEMS 4 -#define TYPE_SIZE (TYPE_ELEMS*4) - -typedef struct NLMeansVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - - AVBufferPool *integral_buf_pool; - AVBufferPool *ws_buf_pool; - - FFVkBuffer xyoffsets_buf; - - int pl_weights_rows; - FFVulkanShader shd_weights; - FFVulkanShader shd_denoise; - - int *xoffsets; - int *yoffsets; - int nb_offsets; - float strength[4]; - int patch[4]; - - struct nlmeans_opts { - int r; - double s; - double sc[4]; - int p; - int pc[4]; - int t; - } opts; -} NLMeansVulkanContext; - -static void insert_first(FFVulkanShader *shd, int r, const char *off, int horiz, int plane, int comp) -{ - GLSLF(4, s1 = imageLoad(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i]; - ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); - - GLSLF(4, s2[0] = imageLoad(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i + %s))[%i]; - ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); - GLSLF(4, s2[1] = imageLoad(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i + %s))[%i]; - ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); - GLSLF(4, s2[2] = imageLoad(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i + %s))[%i]; - ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); - GLSLF(4, s2[3] = imageLoad(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i + %s))[%i]; - ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); - - GLSLC(4, s2 = (s1 - s2) * (s1 - s2); ); -} - -static void insert_horizontal_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp) -{ - GLSLF(1, pos.y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows); - if (!first) - GLSLC(1, barrier(); ); - GLSLC(0, ); - GLSLF(1, if (pos.y < height[%i]) { ,plane); - GLSLC(2, #pragma unroll(1) ); - GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); - GLSLC(3, prefix_sum = DTYPE(0); ); - GLSLC(3, offset = int_stride * uint64_t(pos.y + r); ); - GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); - GLSLC(0, ); - GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane); - if (first) - insert_first(shd, 0, "r", 0, plane, comp); - else - GLSLC(4, s2 = dst.v[pos.x]; ); - GLSLC(4, dst.v[pos.x] = s2 + prefix_sum; ); - GLSLC(4, prefix_sum += s2; ); - GLSLC(3, } ); - GLSLC(2, } ); - GLSLC(1, } ); - GLSLC(0, ); -} - -static void insert_vertical_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp) -{ - GLSLF(1, pos.x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows); - GLSLC(1, #pragma unroll(1) ); - GLSLF(1, for (r = 0; r < %i; r++) ,nb_rows); - GLSLC(2, psum[r] = DTYPE(0); ); - GLSLC(0, ); - if (!first) - GLSLC(1, barrier(); ); - GLSLC(0, ); - GLSLF(1, if (pos.x < width[%i]) { ,plane); - GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane); - GLSLC(3, offset = int_stride * uint64_t(pos.y); ); - GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); - GLSLC(0, ); - GLSLC(3, #pragma unroll(1) ); - GLSLF(3, for (r = 0; r < %i; r++) { ,nb_rows); - if (first) - insert_first(shd, 0, "r", 1, plane, comp); - else - GLSLC(4, s2 = dst.v[pos.x + r]; ); - GLSLC(4, dst.v[pos.x + r] = s2 + psum[r]; ); - GLSLC(4, psum[r] += s2; ); - GLSLC(3, } ); - GLSLC(2, } ); - GLSLC(1, } ); - GLSLC(0, ); -} - -static void insert_weights_pass(FFVulkanShader *shd, int nb_rows, int vert, - int t, int dst_comp, int plane, int comp) -{ - GLSLF(1, p = patch_size[%i]; ,dst_comp); - GLSLC(0, ); - GLSLC(1, barrier(); ); - GLSLC(0, ); - if (!vert) { - GLSLF(1, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane); - GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane); - GLSLC(3, break; ); - GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); - GLSLF(3, pos.x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows); - } else { - GLSLF(1, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane); - GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane); - GLSLC(3, break; ); - GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); - GLSLF(3, pos.y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows); - } - GLSLC(0, ); - GLSLC(3, a = DTYPE(0); ); - GLSLC(3, b = DTYPE(0); ); - GLSLC(3, c = DTYPE(0); ); - GLSLC(3, d = DTYPE(0); ); - GLSLC(0, ); - GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); ); - GLSLC(0, ); - GLSLF(3, src[0] = imageLoad(input_img[%i], pos + offs[0])[%i]; ,plane, comp); - GLSLF(3, src[1] = imageLoad(input_img[%i], pos + offs[1])[%i]; ,plane, comp); - GLSLF(3, src[2] = imageLoad(input_img[%i], pos + offs[2])[%i]; ,plane, comp); - GLSLF(3, src[3] = imageLoad(input_img[%i], pos + offs[3])[%i]; ,plane, comp); - GLSLC(0, ); - GLSLC(3, if (lt == false) { ); - GLSLC(3, offset = int_stride * uint64_t(pos.y - p); ); - GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); - GLSLC(4, a = dst.v[pos.x - p]; ); - GLSLC(4, c = dst.v[pos.x + p]; ); - GLSLC(3, offset = int_stride * uint64_t(pos.y + p); ); - GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); - GLSLC(4, b = dst.v[pos.x - p]; ); - GLSLC(4, d = dst.v[pos.x + p]; ); - GLSLC(3, } ); - GLSLC(0, ); - GLSLC(3, patch_diff = d + a - b - c; ); - GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp); - GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; ); - GLSLC(3, sum = dot(w, src*255); ); - GLSLC(0, ); - if (t > 1) { - GLSLF(3, atomicAdd(weights_%i[pos.y*ws_stride[%i] + pos.x], w_sum); ,dst_comp, dst_comp); - GLSLF(3, atomicAdd(sums_%i[pos.y*ws_stride[%i] + pos.x], sum); ,dst_comp, dst_comp); - } else { - GLSLF(3, weights_%i[pos.y*ws_stride[%i] + pos.x] += w_sum; ,dst_comp, dst_comp); - GLSLF(3, sums_%i[pos.y*ws_stride[%i] + pos.x] += sum; ,dst_comp, dst_comp); - } - GLSLC(2, } ); - GLSLC(1, } ); -} - -typedef struct HorizontalPushData { - uint32_t width[4]; - uint32_t height[4]; - uint32_t ws_stride[4]; - int32_t patch_size[4]; - float strength[4]; - VkDeviceAddress integral_base; - uint64_t integral_size; - uint64_t int_stride; - uint32_t xyoffs_start; -} HorizontalPushData; - -static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, - FFVulkanShader *shd, - FFVkSPIRVCompiler *spv, - int width, int height, int t, - const AVPixFmtDescriptor *desc, - int planes, int *nb_rows) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - FFVulkanDescriptorSetBinding *desc_set; - int max_dim = FFMAX(width, height); - uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0]; - int wg_size, wg_rows; - - /* Round the max workgroup size to the previous power of two */ - wg_size = max_wg; - wg_rows = 1; - - if (max_wg > max_dim) { - wg_size = max_dim; - } else if (max_wg < max_dim) { - /* Make it fit */ - while (wg_size*wg_rows < max_dim) - wg_rows++; - } - - RET(ff_vk_shader_init(vkctx, shd, "nlmeans_weights", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - wg_size, 1, 1, - 0)); - - *nb_rows = wg_rows; - - if (t > 1) - GLSLC(0, #extension GL_EXT_shader_atomic_float : require ); - GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require ); - GLSLC(0, ); - GLSLF(0, #define DTYPE %s ,TYPE_NAME); - GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE); - GLSLC(0, ); - GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { ); - GLSLC(1, DTYPE v[]; ); - GLSLC(0, }; ); - GLSLC(0, ); - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, uvec4 width; ); - GLSLC(1, uvec4 height; ); - GLSLC(1, uvec4 ws_stride; ); - GLSLC(1, ivec4 patch_size; ); - GLSLC(1, vec4 strength; ); - GLSLC(1, DataBuffer integral_base; ); - GLSLC(1, uint64_t integral_size; ); - GLSLC(1, uint64_t int_stride; ); - GLSLC(1, uint xyoffs_start; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_shader_add_push_const(shd, 0, sizeof(HorizontalPushData), - VK_SHADER_STAGE_COMPUTE_BIT); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "weights_buffer_0", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights_0[];", - }, - { - .name = "sums_buffer_0", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums_0[];", - }, - { - .name = "weights_buffer_1", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights_1[];", - }, - { - .name = "sums_buffer_1", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums_1[];", - }, - { - .name = "weights_buffer_2", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights_2[];", - }, - { - .name = "sums_buffer_2", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums_2[];", - }, - { - .name = "weights_buffer_3", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights_3[];", - }, - { - .name = "sums_buffer_3", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums_3[];", - }, - }; - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1 + 2*desc->nb_components, 0, 0)); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "xyoffsets_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "ivec2 xyoffsets[];", - }, - }; - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0)); - - GLSLC(0, ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, uint64_t offset; ); - GLSLC(1, DataBuffer dst; ); - GLSLC(1, float s1; ); - GLSLC(1, DTYPE s2; ); - GLSLC(1, DTYPE prefix_sum; ); - GLSLF(1, DTYPE psum[%i]; ,*nb_rows); - GLSLC(1, int r; ); - GLSLC(1, ivec2 pos; ); - GLSLC(1, int p; ); - GLSLC(0, ); - GLSLC(1, DataBuffer integral_data; ); - GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS); - GLSLC(0, ); - GLSLC(1, int invoc_idx = int(gl_WorkGroupID.z); ); - GLSLC(0, ); - GLSLC(1, offset = integral_size * invoc_idx; ); - GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); ); - for (int i = 0; i < TYPE_ELEMS; i++) - GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i); - GLSLC(0, ); - GLSLC(1, DTYPE a; ); - GLSLC(1, DTYPE b; ); - GLSLC(1, DTYPE c; ); - GLSLC(1, DTYPE d; ); - GLSLC(0, ); - GLSLC(1, DTYPE patch_diff; ); - if (TYPE_ELEMS == 4) { - GLSLC(1, vec4 src; ); - GLSLC(1, vec4 w; ); - } else { - GLSLC(1, vec4 src[4]; ); - GLSLC(1, vec4 w[4]; ); - } - GLSLC(1, float w_sum; ); - GLSLC(1, float sum; ); - GLSLC(0, ); - GLSLC(1, bool lt; ); - GLSLC(1, bool gt; ); - GLSLC(0, ); - - for (int i = 0; i < desc->nb_components; i++) { - int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8); - if (width >= height) { - insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off); - insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off); - insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off); - } else { - insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off); - insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off); - insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off); - } - } - - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, exec, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -typedef struct DenoisePushData { - uint32_t ws_stride[4]; -} DenoisePushData; - -static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, - FFVulkanShader *shd, FFVkSPIRVCompiler *spv, - const AVPixFmtDescriptor *desc, int planes) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - FFVulkanDescriptorSetBinding *desc_set; - - RET(ff_vk_shader_init(vkctx, shd, "nlmeans_denoise", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 32, 32, 1, - 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, uvec4 ws_stride; ); - GLSLC(0, }; ); - - ff_vk_shader_add_push_const(shd, 0, sizeof(DenoisePushData), - VK_SHADER_STAGE_COMPUTE_BIT); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0)); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "weights_buffer_0", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights_0[];", - }, - { - .name = "sums_buffer_0", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums_0[];", - }, - { - .name = "weights_buffer_1", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights_1[];", - }, - { - .name = "sums_buffer_1", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums_1[];", - }, - { - .name = "weights_buffer_2", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights_2[];", - }, - { - .name = "sums_buffer_2", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums_2[];", - }, - { - .name = "weights_buffer_3", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float weights_3[];", - }, - { - .name = "sums_buffer_3", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "float sums_3[];", - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2*desc->nb_components, 0, 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); ); - GLSLC(0, ); - GLSLC(1, float w_sum; ); - GLSLC(1, float sum; ); - GLSLC(1, vec4 src; ); - GLSLC(1, vec4 r; ); - GLSLC(0, ); - GLSLC(1, size = imageSize(output_img[plane]); ); - GLSLC(1, if (!IS_WITHIN(pos, size)) ); - GLSLC(2, return; ); - GLSLC(0, ); - GLSLC(1, src = imageLoad(input_img[plane], pos); ); - GLSLC(0, ); - for (int c = 0; c < desc->nb_components; c++) { - int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8); - GLSLF(1, if (plane == %i) { ,desc->comp[c].plane); - GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c); - GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c); - GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off); - GLSLC(1, } ); - GLSLC(0, ); - } - GLSLC(1, imageStore(output_img[plane], pos, r); ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, exec, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static av_cold int init_filter(AVFilterContext *ctx) -{ - int rad, err; - int xcnt = 0, ycnt = 0; - NLMeansVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVkSPIRVCompiler *spv = NULL; - int *offsets_buf; - int offsets_dispatched = 0, nb_dispatches = 0; - - const AVPixFmtDescriptor *desc; - desc = av_pix_fmt_desc_get(vkctx->output_format); - if (!desc) - return AVERROR(EINVAL); - - if (!(s->opts.r & 1)) { - s->opts.r |= 1; - av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i", - s->opts.r); - } - - if (!(s->opts.p & 1)) { - s->opts.p |= 1; - av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i", - s->opts.p); - } - - for (int i = 0; i < 4; i++) { - double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s; - int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p); - str = 10.0f*str; - str *= -str; - str = 255.0*255.0 / str; - s->strength[i] = str; - if (!(ps & 1)) { - ps |= 1; - av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i", - ps); - } - s->patch[i] = ps / 2; - } - - rad = s->opts.r/2; - s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1; - s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets)); - s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets)); - s->nb_offsets = 0; - - for (int x = -rad; x <= rad; x++) { - for (int y = -rad; y <= rad; y++) { - if (!x && !y) - continue; - - s->xoffsets[xcnt++] = x; - s->yoffsets[ycnt++] = y; - s->nb_offsets++; - } - } - - RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); - RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0)); - - for (int i = 0; i < 2*s->nb_offsets; i += 2) { - offsets_buf[i + 0] = s->xoffsets[i >> 1]; - offsets_buf[i + 1] = s->yoffsets[i >> 1]; - } - - RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1)); - - s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS)); - if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) { - av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, " - "disabling dispatch parallelism\n"); - s->opts.t = 1; - } - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, 1, 0, 0, 0, NULL)); - - RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, - spv, s->vkctx.output_width, s->vkctx.output_height, - s->opts.t, desc, planes, &s->pl_weights_rows)); - - RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, - spv, desc, planes)); - - RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_weights, - 1, 0, 0, - &s->xyoffsets_buf, 0, s->xyoffsets_buf.size, - VK_FORMAT_UNDEFINED)); - - do { - int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t); - wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]); - offsets_dispatched += wg_invoc * TYPE_ELEMS; - nb_dispatches++; - } while (offsets_dispatched < s->nb_offsets); - - av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n", - s->nb_offsets, nb_dispatches); - - s->initialized = 1; - -fail: - if (spv) - spv->uninit(&spv); - - return err; -} - -static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, - FFVkBuffer *ws_vk, uint32_t ws_stride[4]) -{ - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - VkBufferMemoryBarrier2 buf_bar[8]; - int nb_buf_bar = 0; - - DenoisePushData pd = { - { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, - }; - - /* Denoise pass pipeline */ - ff_vk_exec_bind_shader(vkctx, exec, &s->shd_denoise); - - /* Push data */ - ff_vk_shader_update_push_const(vkctx, exec, &s->shd_denoise, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = ws_vk->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = ws_vk->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = ws_vk->buf, - .size = ws_vk->size, - .offset = 0, - }; - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - ws_vk->stage = buf_bar[0].dstStageMask; - ws_vk->access = buf_bar[0].dstAccessMask; - - /* End of denoise pass */ - vk->CmdDispatch(exec->buf, - FFALIGN(vkctx->output_width, s->shd_denoise.lg_size[0])/s->shd_denoise.lg_size[0], - FFALIGN(vkctx->output_height, s->shd_denoise.lg_size[1])/s->shd_denoise.lg_size[1], - av_pix_fmt_count_planes(s->vkctx.output_format)); - - return 0; -} - -static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - int err; - AVFrame *out = NULL; - AVFilterContext *ctx = link->dst; - NLMeansVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - - const AVPixFmtDescriptor *desc; - int plane_widths[4]; - int plane_heights[4]; - - int offsets_dispatched = 0; - - /* Integral */ - AVBufferRef *integral_buf = NULL; - FFVkBuffer *integral_vk; - size_t int_stride; - size_t int_size; - - /* Weights/sums */ - AVBufferRef *ws_buf = NULL; - FFVkBuffer *ws_vk; - VkDeviceSize weights_offs[4]; - VkDeviceSize sums_offs[4]; - uint32_t ws_stride[4]; - size_t ws_size[4]; - size_t ws_total_size = 0; - - FFVkExecContext *exec; - VkImageView in_views[AV_NUM_DATA_POINTERS]; - VkImageView out_views[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier2 img_bar[8]; - int nb_img_bar = 0; - VkBufferMemoryBarrier2 buf_bar[8]; - int nb_buf_bar = 0; - - if (!s->initialized) - RET(init_filter(ctx)); - - desc = av_pix_fmt_desc_get(vkctx->output_format); - if (!desc) - return AVERROR(EINVAL); - - /* Integral image */ - int_stride = s->shd_weights.lg_size[0]*s->pl_weights_rows*TYPE_SIZE; - int_size = s->shd_weights.lg_size[0]*s->pl_weights_rows*int_stride; - - /* Plane dimensions */ - for (int i = 0; i < desc->nb_components; i++) { - plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w); - plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w); - plane_widths[i] = FFALIGN(plane_widths[i], s->shd_denoise.lg_size[0]); - plane_heights[i] = FFALIGN(plane_heights[i], s->shd_denoise.lg_size[1]); - - ws_stride[i] = plane_widths[i]; - ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float); - ws_total_size += ws_size[i]; - } - - /* Buffers */ - err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, - s->opts.t * int_size, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (err < 0) - return err; - integral_vk = (FFVkBuffer *)integral_buf->data; - - err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, - ws_total_size * 2, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (err < 0) - return err; - ws_vk = (FFVkBuffer *)ws_buf->data; - - weights_offs[0] = 0; - sums_offs[0] = ws_total_size; - for (int i = 1; i < desc->nb_components; i++) { - weights_offs[i] = weights_offs[i - 1] + ws_size[i - 1]; - sums_offs[i] = sums_offs[i - 1] + ws_size[i - 1]; - } - - /* Output frame */ - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - /* Execution context */ - exec = ff_vk_exec_get(&s->vkctx, &s->e); - ff_vk_exec_start(vkctx, exec); - - /* Dependencies */ - RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - - RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0)); - integral_buf = NULL; - - RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0)); - ws_buf = NULL; - - /* Input frame prep */ - RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT)); - ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - /* Output frame prep */ - RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT)); - ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - nb_buf_bar = 0; - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = ws_vk->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .srcAccessMask = ws_vk->access, - .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = ws_vk->buf, - .size = ws_vk->size, - .offset = 0, - }; - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = integral_vk->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = integral_vk->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = integral_vk->buf, - .size = integral_vk->size, - .offset = 0, - }; - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - ws_vk->stage = buf_bar[0].dstStageMask; - ws_vk->access = buf_bar[0].dstAccessMask; - integral_vk->stage = buf_bar[1].dstStageMask; - integral_vk->access = buf_bar[1].dstAccessMask; - - /* Buffer zeroing */ - vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0); - - nb_buf_bar = 0; - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = ws_vk->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = ws_vk->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = ws_vk->buf, - .size = ws_vk->size, - .offset = 0, - }; - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - ws_vk->stage = buf_bar[0].dstStageMask; - ws_vk->access = buf_bar[0].dstAccessMask; - - /* Update weights descriptors */ - ff_vk_shader_update_img_array(vkctx, exec, &s->shd_weights, in, in_views, 0, 0, - VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - for (int i = 0; i < desc->nb_components; i++) { - RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 0, 0, - ws_vk, weights_offs[i], ws_size[i], - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 1, 0, - ws_vk, sums_offs[i], ws_size[i], - VK_FORMAT_UNDEFINED)); - } - - /* Update denoise descriptors */ - ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, in, in_views, 0, 0, - VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, out, out_views, 0, 1, - VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - for (int i = 0; i < desc->nb_components; i++) { - RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 0, 0, - ws_vk, weights_offs[i], ws_size[i], - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 1, 0, - ws_vk, sums_offs[i], ws_size[i], - VK_FORMAT_UNDEFINED)); - } - - /* Weights pipeline */ - ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights); - - do { - int wg_invoc; - HorizontalPushData pd = { - { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] }, - { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] }, - { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, - { s->patch[0], s->patch[1], s->patch[2], s->patch[3] }, - { s->strength[0], s->strength[1], s->strength[2], s->strength[2], }, - integral_vk->address, - (uint64_t)int_size, - (uint64_t)int_stride, - offsets_dispatched, - }; - - /* Push data */ - ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - - if (offsets_dispatched) { - nb_buf_bar = 0; - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = integral_vk->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = integral_vk->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = integral_vk->buf, - .size = integral_vk->size, - .offset = 0, - }; - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - integral_vk->stage = buf_bar[1].dstStageMask; - integral_vk->access = buf_bar[1].dstAccessMask; - } - - wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t); - wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]); - - /* End of horizontal pass */ - vk->CmdDispatch(exec->buf, 1, 1, wg_invoc); - - offsets_dispatched += wg_invoc * TYPE_ELEMS; - } while (offsets_dispatched < s->nb_offsets); - - RET(denoise_pass(s, exec, ws_vk, ws_stride)); - - err = ff_vk_exec_submit(vkctx, exec); - if (err < 0) - return err; - - err = av_frame_copy_props(out, in); - if (err < 0) - goto fail; - - av_frame_free(&in); - - return ff_filter_frame(outlink, out); - -fail: - av_buffer_unref(&integral_buf); - av_buffer_unref(&ws_buf); - av_frame_free(&in); - av_frame_free(&out); - return err; -} - -static void nlmeans_vulkan_uninit(AVFilterContext *avctx) -{ - NLMeansVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd_weights); - ff_vk_shader_free(vkctx, &s->shd_denoise); - - av_buffer_pool_uninit(&s->integral_buf_pool); - av_buffer_pool_uninit(&s->ws_buf_pool); - - ff_vk_uninit(&s->vkctx); - - av_freep(&s->xoffsets); - av_freep(&s->yoffsets); - - s->initialized = 0; -} - -#define OFFSET(x) offsetof(NLMeansVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption nlmeans_vulkan_options[] = { - { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, - { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS }, - { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS }, - { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS }, - - { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, - { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, - { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, - { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, - - { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, - { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, - { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, - { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, - - { NULL } -}; - -AVFILTER_DEFINE_CLASS(nlmeans_vulkan); - -static const AVFilterPad nlmeans_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &nlmeans_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad nlmeans_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_output, - }, -}; - -const FFFilter ff_vf_nlmeans_vulkan = { - .p.name = "nlmeans_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"), - .p.priv_class = &nlmeans_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(NLMeansVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &nlmeans_vulkan_uninit, - FILTER_INPUTS(nlmeans_vulkan_inputs), - FILTER_OUTPUTS(nlmeans_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From ccafec6505a2853ff27071328806911f6529971c Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:58:11 +0000 Subject: [PATCH 018/118] Changing vulkan file directory --- libavfilter/vulkan/vf_nlmeans_vulkan.c | 1120 ++++++++++++++++++++++++ 1 file changed, 1120 insertions(+) create mode 100644 libavfilter/vulkan/vf_nlmeans_vulkan.c diff --git a/libavfilter/vulkan/vf_nlmeans_vulkan.c b/libavfilter/vulkan/vf_nlmeans_vulkan.c new file mode 100644 index 0000000000..6c86206829 --- /dev/null +++ b/libavfilter/vulkan/vf_nlmeans_vulkan.c @@ -0,0 +1,1120 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" +#include "libavutil/random_seed.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "libavutil/opt.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/video.h" + +#define TYPE_NAME "vec4" +#define TYPE_ELEMS 4 +#define TYPE_SIZE (TYPE_ELEMS*4) + +typedef struct NLMeansVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + + AVBufferPool *integral_buf_pool; + AVBufferPool *ws_buf_pool; + + FFVkBuffer xyoffsets_buf; + + int pl_weights_rows; + FFVulkanShader shd_weights; + FFVulkanShader shd_denoise; + + int *xoffsets; + int *yoffsets; + int nb_offsets; + float strength[4]; + int patch[4]; + + struct nlmeans_opts { + int r; + double s; + double sc[4]; + int p; + int pc[4]; + int t; + } opts; +} NLMeansVulkanContext; + +static void insert_first(FFVulkanShader *shd, int r, const char *off, int horiz, int plane, int comp) +{ + GLSLF(4, s1 = imageLoad(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i]; + ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); + + GLSLF(4, s2[0] = imageLoad(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i + %s))[%i]; + ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); + GLSLF(4, s2[1] = imageLoad(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i + %s))[%i]; + ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); + GLSLF(4, s2[2] = imageLoad(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i + %s))[%i]; + ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); + GLSLF(4, s2[3] = imageLoad(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i + %s))[%i]; + ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? off : "0", comp); + + GLSLC(4, s2 = (s1 - s2) * (s1 - s2); ); +} + +static void insert_horizontal_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp) +{ + GLSLF(1, pos.y = int(gl_GlobalInvocationID.x) * %i; ,nb_rows); + if (!first) + GLSLC(1, barrier(); ); + GLSLC(0, ); + GLSLF(1, if (pos.y < height[%i]) { ,plane); + GLSLC(2, #pragma unroll(1) ); + GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); + GLSLC(3, prefix_sum = DTYPE(0); ); + GLSLC(3, offset = int_stride * uint64_t(pos.y + r); ); + GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); + GLSLC(0, ); + GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane); + if (first) + insert_first(shd, 0, "r", 0, plane, comp); + else + GLSLC(4, s2 = dst.v[pos.x]; ); + GLSLC(4, dst.v[pos.x] = s2 + prefix_sum; ); + GLSLC(4, prefix_sum += s2; ); + GLSLC(3, } ); + GLSLC(2, } ); + GLSLC(1, } ); + GLSLC(0, ); +} + +static void insert_vertical_pass(FFVulkanShader *shd, int nb_rows, int first, int plane, int comp) +{ + GLSLF(1, pos.x = int(gl_GlobalInvocationID.x) * %i; ,nb_rows); + GLSLC(1, #pragma unroll(1) ); + GLSLF(1, for (r = 0; r < %i; r++) ,nb_rows); + GLSLC(2, psum[r] = DTYPE(0); ); + GLSLC(0, ); + if (!first) + GLSLC(1, barrier(); ); + GLSLC(0, ); + GLSLF(1, if (pos.x < width[%i]) { ,plane); + GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane); + GLSLC(3, offset = int_stride * uint64_t(pos.y); ); + GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); + GLSLC(0, ); + GLSLC(3, #pragma unroll(1) ); + GLSLF(3, for (r = 0; r < %i; r++) { ,nb_rows); + if (first) + insert_first(shd, 0, "r", 1, plane, comp); + else + GLSLC(4, s2 = dst.v[pos.x + r]; ); + GLSLC(4, dst.v[pos.x + r] = s2 + psum[r]; ); + GLSLC(4, psum[r] += s2; ); + GLSLC(3, } ); + GLSLC(2, } ); + GLSLC(1, } ); + GLSLC(0, ); +} + +static void insert_weights_pass(FFVulkanShader *shd, int nb_rows, int vert, + int t, int dst_comp, int plane, int comp) +{ + GLSLF(1, p = patch_size[%i]; ,dst_comp); + GLSLC(0, ); + GLSLC(1, barrier(); ); + GLSLC(0, ); + if (!vert) { + GLSLF(1, for (pos.y = 0; pos.y < height[%i]; pos.y++) { ,plane); + GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) ,nb_rows, plane); + GLSLC(3, break; ); + GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); + GLSLF(3, pos.x = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows); + } else { + GLSLF(1, for (pos.x = 0; pos.x < width[%i]; pos.x++) { ,plane); + GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i]) ,nb_rows, plane); + GLSLC(3, break; ); + GLSLF(2, for (r = 0; r < %i; r++) { ,nb_rows); + GLSLF(3, pos.y = int(gl_GlobalInvocationID.x) * %i + r; ,nb_rows); + } + GLSLC(0, ); + GLSLC(3, a = DTYPE(0); ); + GLSLC(3, b = DTYPE(0); ); + GLSLC(3, c = DTYPE(0); ); + GLSLC(3, d = DTYPE(0); ); + GLSLC(0, ); + GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); ); + GLSLC(0, ); + GLSLF(3, src[0] = imageLoad(input_img[%i], pos + offs[0])[%i]; ,plane, comp); + GLSLF(3, src[1] = imageLoad(input_img[%i], pos + offs[1])[%i]; ,plane, comp); + GLSLF(3, src[2] = imageLoad(input_img[%i], pos + offs[2])[%i]; ,plane, comp); + GLSLF(3, src[3] = imageLoad(input_img[%i], pos + offs[3])[%i]; ,plane, comp); + GLSLC(0, ); + GLSLC(3, if (lt == false) { ); + GLSLC(3, offset = int_stride * uint64_t(pos.y - p); ); + GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); + GLSLC(4, a = dst.v[pos.x - p]; ); + GLSLC(4, c = dst.v[pos.x + p]; ); + GLSLC(3, offset = int_stride * uint64_t(pos.y + p); ); + GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset); ); + GLSLC(4, b = dst.v[pos.x - p]; ); + GLSLC(4, d = dst.v[pos.x + p]; ); + GLSLC(3, } ); + GLSLC(0, ); + GLSLC(3, patch_diff = d + a - b - c; ); + GLSLF(3, w = exp(patch_diff * strength[%i]); ,dst_comp); + GLSLC(3, w_sum = w[0] + w[1] + w[2] + w[3]; ); + GLSLC(3, sum = dot(w, src*255); ); + GLSLC(0, ); + if (t > 1) { + GLSLF(3, atomicAdd(weights_%i[pos.y*ws_stride[%i] + pos.x], w_sum); ,dst_comp, dst_comp); + GLSLF(3, atomicAdd(sums_%i[pos.y*ws_stride[%i] + pos.x], sum); ,dst_comp, dst_comp); + } else { + GLSLF(3, weights_%i[pos.y*ws_stride[%i] + pos.x] += w_sum; ,dst_comp, dst_comp); + GLSLF(3, sums_%i[pos.y*ws_stride[%i] + pos.x] += sum; ,dst_comp, dst_comp); + } + GLSLC(2, } ); + GLSLC(1, } ); +} + +typedef struct HorizontalPushData { + uint32_t width[4]; + uint32_t height[4]; + uint32_t ws_stride[4]; + int32_t patch_size[4]; + float strength[4]; + VkDeviceAddress integral_base; + uint64_t integral_size; + uint64_t int_stride; + uint32_t xyoffs_start; +} HorizontalPushData; + +static av_cold int init_weights_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, + FFVulkanShader *shd, + FFVkSPIRVCompiler *spv, + int width, int height, int t, + const AVPixFmtDescriptor *desc, + int planes, int *nb_rows) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + FFVulkanDescriptorSetBinding *desc_set; + int max_dim = FFMAX(width, height); + uint32_t max_wg = vkctx->props.properties.limits.maxComputeWorkGroupSize[0]; + int wg_size, wg_rows; + + /* Round the max workgroup size to the previous power of two */ + wg_size = max_wg; + wg_rows = 1; + + if (max_wg > max_dim) { + wg_size = max_dim; + } else if (max_wg < max_dim) { + /* Make it fit */ + while (wg_size*wg_rows < max_dim) + wg_rows++; + } + + RET(ff_vk_shader_init(vkctx, shd, "nlmeans_weights", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + wg_size, 1, 1, + 0)); + + *nb_rows = wg_rows; + + if (t > 1) + GLSLC(0, #extension GL_EXT_shader_atomic_float : require ); + GLSLC(0, #extension GL_ARB_gpu_shader_int64 : require ); + GLSLC(0, ); + GLSLF(0, #define DTYPE %s ,TYPE_NAME); + GLSLF(0, #define T_ALIGN %i ,TYPE_SIZE); + GLSLC(0, ); + GLSLC(0, layout(buffer_reference, buffer_reference_align = T_ALIGN) buffer DataBuffer { ); + GLSLC(1, DTYPE v[]; ); + GLSLC(0, }; ); + GLSLC(0, ); + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, uvec4 width; ); + GLSLC(1, uvec4 height; ); + GLSLC(1, uvec4 ws_stride; ); + GLSLC(1, ivec4 patch_size; ); + GLSLC(1, vec4 strength; ); + GLSLC(1, DataBuffer integral_base; ); + GLSLC(1, uint64_t integral_size; ); + GLSLC(1, uint64_t int_stride; ); + GLSLC(1, uint xyoffs_start; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_shader_add_push_const(shd, 0, sizeof(HorizontalPushData), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "weights_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_0[];", + }, + { + .name = "sums_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_0[];", + }, + { + .name = "weights_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_1[];", + }, + { + .name = "sums_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_1[];", + }, + { + .name = "weights_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_2[];", + }, + { + .name = "sums_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_2[];", + }, + { + .name = "weights_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_3[];", + }, + { + .name = "sums_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_3[];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1 + 2*desc->nb_components, 0, 0)); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "xyoffsets_buffer", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "ivec2 xyoffsets[];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 1, 1, 0)); + + GLSLC(0, ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, uint64_t offset; ); + GLSLC(1, DataBuffer dst; ); + GLSLC(1, float s1; ); + GLSLC(1, DTYPE s2; ); + GLSLC(1, DTYPE prefix_sum; ); + GLSLF(1, DTYPE psum[%i]; ,*nb_rows); + GLSLC(1, int r; ); + GLSLC(1, ivec2 pos; ); + GLSLC(1, int p; ); + GLSLC(0, ); + GLSLC(1, DataBuffer integral_data; ); + GLSLF(1, ivec2 offs[%i]; ,TYPE_ELEMS); + GLSLC(0, ); + GLSLC(1, int invoc_idx = int(gl_WorkGroupID.z); ); + GLSLC(0, ); + GLSLC(1, offset = integral_size * invoc_idx; ); + GLSLC(1, integral_data = DataBuffer(uint64_t(integral_base) + offset); ); + for (int i = 0; i < TYPE_ELEMS; i++) + GLSLF(1, offs[%i] = xyoffsets[xyoffs_start + %i*invoc_idx + %i]; ,i,TYPE_ELEMS,i); + GLSLC(0, ); + GLSLC(1, DTYPE a; ); + GLSLC(1, DTYPE b; ); + GLSLC(1, DTYPE c; ); + GLSLC(1, DTYPE d; ); + GLSLC(0, ); + GLSLC(1, DTYPE patch_diff; ); + if (TYPE_ELEMS == 4) { + GLSLC(1, vec4 src; ); + GLSLC(1, vec4 w; ); + } else { + GLSLC(1, vec4 src[4]; ); + GLSLC(1, vec4 w[4]; ); + } + GLSLC(1, float w_sum; ); + GLSLC(1, float sum; ); + GLSLC(0, ); + GLSLC(1, bool lt; ); + GLSLC(1, bool gt; ); + GLSLC(0, ); + + for (int i = 0; i < desc->nb_components; i++) { + int off = desc->comp[i].offset / (FFALIGN(desc->comp[i].depth, 8)/8); + if (width >= height) { + insert_horizontal_pass(shd, *nb_rows, 1, desc->comp[i].plane, off); + insert_vertical_pass(shd, *nb_rows, 0, desc->comp[i].plane, off); + insert_weights_pass(shd, *nb_rows, 0, t, i, desc->comp[i].plane, off); + } else { + insert_vertical_pass(shd, *nb_rows, 1, desc->comp[i].plane, off); + insert_horizontal_pass(shd, *nb_rows, 0, desc->comp[i].plane, off); + insert_weights_pass(shd, *nb_rows, 1, t, i, desc->comp[i].plane, off); + } + } + + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, exec, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +typedef struct DenoisePushData { + uint32_t ws_stride[4]; +} DenoisePushData; + +static av_cold int init_denoise_pipeline(FFVulkanContext *vkctx, FFVkExecPool *exec, + FFVulkanShader *shd, FFVkSPIRVCompiler *spv, + const AVPixFmtDescriptor *desc, int planes) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + FFVulkanDescriptorSetBinding *desc_set; + + RET(ff_vk_shader_init(vkctx, shd, "nlmeans_denoise", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 32, 32, 1, + 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, uvec4 ws_stride; ); + GLSLC(0, }; ); + + ff_vk_shader_add_push_const(shd, 0, sizeof(DenoisePushData), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(vkctx->input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2, 0, 0)); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "weights_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_0[];", + }, + { + .name = "sums_buffer_0", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_0[];", + }, + { + .name = "weights_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_1[];", + }, + { + .name = "sums_buffer_1", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_1[];", + }, + { + .name = "weights_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_2[];", + }, + { + .name = "sums_buffer_2", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_2[];", + }, + { + .name = "weights_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float weights_3[];", + }, + { + .name = "sums_buffer_3", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "float sums_3[];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc_set, 2*desc->nb_components, 0, 0)); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(1, const uint plane = uint(gl_WorkGroupID.z); ); + GLSLC(0, ); + GLSLC(1, float w_sum; ); + GLSLC(1, float sum; ); + GLSLC(1, vec4 src; ); + GLSLC(1, vec4 r; ); + GLSLC(0, ); + GLSLC(1, size = imageSize(output_img[plane]); ); + GLSLC(1, if (!IS_WITHIN(pos, size)) ); + GLSLC(2, return; ); + GLSLC(0, ); + GLSLC(1, src = imageLoad(input_img[plane], pos); ); + GLSLC(0, ); + for (int c = 0; c < desc->nb_components; c++) { + int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8); + GLSLF(1, if (plane == %i) { ,desc->comp[c].plane); + GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c); + GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x]; ,c, c); + GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255; ,off, off); + GLSLC(1, } ); + GLSLC(0, ); + } + GLSLC(1, imageStore(output_img[plane], pos, r); ); + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, exec, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int rad, err; + int xcnt = 0, ycnt = 0; + NLMeansVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVkSPIRVCompiler *spv = NULL; + int *offsets_buf; + int offsets_dispatched = 0, nb_dispatches = 0; + + const AVPixFmtDescriptor *desc; + desc = av_pix_fmt_desc_get(vkctx->output_format); + if (!desc) + return AVERROR(EINVAL); + + if (!(s->opts.r & 1)) { + s->opts.r |= 1; + av_log(ctx, AV_LOG_WARNING, "Research size should be odd, setting to %i", + s->opts.r); + } + + if (!(s->opts.p & 1)) { + s->opts.p |= 1; + av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i", + s->opts.p); + } + + for (int i = 0; i < 4; i++) { + double str = (s->opts.sc[i] > 1.0) ? s->opts.sc[i] : s->opts.s; + int ps = (s->opts.pc[i] ? s->opts.pc[i] : s->opts.p); + str = 10.0f*str; + str *= -str; + str = 255.0*255.0 / str; + s->strength[i] = str; + if (!(ps & 1)) { + ps |= 1; + av_log(ctx, AV_LOG_WARNING, "Patch size should be odd, setting to %i", + ps); + } + s->patch[i] = ps / 2; + } + + rad = s->opts.r/2; + s->nb_offsets = (2*rad + 1)*(2*rad + 1) - 1; + s->xoffsets = av_malloc(s->nb_offsets*sizeof(*s->xoffsets)); + s->yoffsets = av_malloc(s->nb_offsets*sizeof(*s->yoffsets)); + s->nb_offsets = 0; + + for (int x = -rad; x <= rad; x++) { + for (int y = -rad; y <= rad; y++) { + if (!x && !y) + continue; + + s->xoffsets[xcnt++] = x; + s->yoffsets[ycnt++] = y; + s->nb_offsets++; + } + } + + RET(ff_vk_create_buf(&s->vkctx, &s->xyoffsets_buf, 2*s->nb_offsets*sizeof(int32_t), NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); + RET(ff_vk_map_buffer(&s->vkctx, &s->xyoffsets_buf, (uint8_t **)&offsets_buf, 0)); + + for (int i = 0; i < 2*s->nb_offsets; i += 2) { + offsets_buf[i + 0] = s->xoffsets[i >> 1]; + offsets_buf[i + 1] = s->yoffsets[i >> 1]; + } + + RET(ff_vk_unmap_buffer(&s->vkctx, &s->xyoffsets_buf, 1)); + + s->opts.t = FFMIN(s->opts.t, (FFALIGN(s->nb_offsets, TYPE_ELEMS) / TYPE_ELEMS)); + if (!vkctx->atomic_float_feats.shaderBufferFloat32AtomicAdd) { + av_log(ctx, AV_LOG_WARNING, "Device doesn't support atomic float adds, " + "disabling dispatch parallelism\n"); + s->opts.t = 1; + } + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, 1, 0, 0, 0, NULL)); + + RET(init_weights_pipeline(vkctx, &s->e, &s->shd_weights, + spv, s->vkctx.output_width, s->vkctx.output_height, + s->opts.t, desc, planes, &s->pl_weights_rows)); + + RET(init_denoise_pipeline(vkctx, &s->e, &s->shd_denoise, + spv, desc, planes)); + + RET(ff_vk_shader_update_desc_buffer(vkctx, &s->e.contexts[0], &s->shd_weights, + 1, 0, 0, + &s->xyoffsets_buf, 0, s->xyoffsets_buf.size, + VK_FORMAT_UNDEFINED)); + + do { + int wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t); + wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]); + offsets_dispatched += wg_invoc * TYPE_ELEMS; + nb_dispatches++; + } while (offsets_dispatched < s->nb_offsets); + + av_log(ctx, AV_LOG_VERBOSE, "Filter initialized, %i x/y offsets, %i dispatches\n", + s->nb_offsets, nb_dispatches); + + s->initialized = 1; + +fail: + if (spv) + spv->uninit(&spv); + + return err; +} + +static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, + FFVkBuffer *ws_vk, uint32_t ws_stride[4]) +{ + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + DenoisePushData pd = { + { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, + }; + + /* Denoise pass pipeline */ + ff_vk_exec_bind_shader(vkctx, exec, &s->shd_denoise); + + /* Push data */ + ff_vk_shader_update_push_const(vkctx, exec, &s->shd_denoise, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + + /* End of denoise pass */ + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, s->shd_denoise.lg_size[0])/s->shd_denoise.lg_size[0], + FFALIGN(vkctx->output_height, s->shd_denoise.lg_size[1])/s->shd_denoise.lg_size[1], + av_pix_fmt_count_planes(s->vkctx.output_format)); + + return 0; +} + +static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFrame *out = NULL; + AVFilterContext *ctx = link->dst; + NLMeansVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + const AVPixFmtDescriptor *desc; + int plane_widths[4]; + int plane_heights[4]; + + int offsets_dispatched = 0; + + /* Integral */ + AVBufferRef *integral_buf = NULL; + FFVkBuffer *integral_vk; + size_t int_stride; + size_t int_size; + + /* Weights/sums */ + AVBufferRef *ws_buf = NULL; + FFVkBuffer *ws_vk; + VkDeviceSize weights_offs[4]; + VkDeviceSize sums_offs[4]; + uint32_t ws_stride[4]; + size_t ws_size[4]; + size_t ws_total_size = 0; + + FFVkExecContext *exec; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[8]; + int nb_img_bar = 0; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + if (!s->initialized) + RET(init_filter(ctx)); + + desc = av_pix_fmt_desc_get(vkctx->output_format); + if (!desc) + return AVERROR(EINVAL); + + /* Integral image */ + int_stride = s->shd_weights.lg_size[0]*s->pl_weights_rows*TYPE_SIZE; + int_size = s->shd_weights.lg_size[0]*s->pl_weights_rows*int_stride; + + /* Plane dimensions */ + for (int i = 0; i < desc->nb_components; i++) { + plane_widths[i] = !i || (i == 3) ? vkctx->output_width : AV_CEIL_RSHIFT(vkctx->output_width, desc->log2_chroma_w); + plane_heights[i] = !i || (i == 3) ? vkctx->output_height : AV_CEIL_RSHIFT(vkctx->output_height, desc->log2_chroma_w); + plane_widths[i] = FFALIGN(plane_widths[i], s->shd_denoise.lg_size[0]); + plane_heights[i] = FFALIGN(plane_heights[i], s->shd_denoise.lg_size[1]); + + ws_stride[i] = plane_widths[i]; + ws_size[i] = ws_stride[i] * plane_heights[i] * sizeof(float); + ws_total_size += ws_size[i]; + } + + /* Buffers */ + err = ff_vk_get_pooled_buffer(&s->vkctx, &s->integral_buf_pool, &integral_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, + s->opts.t * int_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + integral_vk = (FFVkBuffer *)integral_buf->data; + + err = ff_vk_get_pooled_buffer(&s->vkctx, &s->ws_buf_pool, &ws_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, + ws_total_size * 2, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + ws_vk = (FFVkBuffer *)ws_buf->data; + + weights_offs[0] = 0; + sums_offs[0] = ws_total_size; + for (int i = 1; i < desc->nb_components; i++) { + weights_offs[i] = weights_offs[i - 1] + ws_size[i - 1]; + sums_offs[i] = sums_offs[i - 1] + ws_size[i - 1]; + } + + /* Output frame */ + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Execution context */ + exec = ff_vk_exec_get(&s->vkctx, &s->e); + ff_vk_exec_start(vkctx, exec); + + /* Dependencies */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_exec_add_dep_buf(vkctx, exec, &integral_buf, 1, 0)); + integral_buf = NULL; + + RET(ff_vk_exec_add_dep_buf(vkctx, exec, &ws_buf, 1, 0)); + ws_buf = NULL; + + /* Input frame prep */ + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT)); + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Output frame prep */ + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT)); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + nb_buf_bar = 0; + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = integral_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = integral_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = integral_vk->buf, + .size = integral_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + integral_vk->stage = buf_bar[1].dstStageMask; + integral_vk->access = buf_bar[1].dstAccessMask; + + /* Buffer zeroing */ + vk->CmdFillBuffer(exec->buf, ws_vk->buf, 0, ws_vk->size, 0x0); + + nb_buf_bar = 0; + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = ws_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = ws_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = ws_vk->buf, + .size = ws_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + ws_vk->stage = buf_bar[0].dstStageMask; + ws_vk->access = buf_bar[0].dstAccessMask; + + /* Update weights descriptors */ + ff_vk_shader_update_img_array(vkctx, exec, &s->shd_weights, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + for (int i = 0; i < desc->nb_components; i++) { + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 0, 0, + ws_vk, weights_offs[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_weights, 0, 1 + i*2 + 1, 0, + ws_vk, sums_offs[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + } + + /* Update denoise descriptors */ + ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, in, in_views, 0, 0, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + ff_vk_shader_update_img_array(vkctx, exec, &s->shd_denoise, out, out_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + for (int i = 0; i < desc->nb_components; i++) { + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 0, 0, + ws_vk, weights_offs[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd_denoise, 1, i*2 + 1, 0, + ws_vk, sums_offs[i], ws_size[i], + VK_FORMAT_UNDEFINED)); + } + + /* Weights pipeline */ + ff_vk_exec_bind_shader(vkctx, exec, &s->shd_weights); + + do { + int wg_invoc; + HorizontalPushData pd = { + { plane_widths[0], plane_widths[1], plane_widths[2], plane_widths[3] }, + { plane_heights[0], plane_heights[1], plane_heights[2], plane_heights[3] }, + { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, + { s->patch[0], s->patch[1], s->patch[2], s->patch[3] }, + { s->strength[0], s->strength[1], s->strength[2], s->strength[2], }, + integral_vk->address, + (uint64_t)int_size, + (uint64_t)int_stride, + offsets_dispatched, + }; + + /* Push data */ + ff_vk_shader_update_push_const(vkctx, exec, &s->shd_weights, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + if (offsets_dispatched) { + nb_buf_bar = 0; + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = integral_vk->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = integral_vk->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = integral_vk->buf, + .size = integral_vk->size, + .offset = 0, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + integral_vk->stage = buf_bar[1].dstStageMask; + integral_vk->access = buf_bar[1].dstAccessMask; + } + + wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t); + wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]); + + /* End of horizontal pass */ + vk->CmdDispatch(exec->buf, 1, 1, wg_invoc); + + offsets_dispatched += wg_invoc * TYPE_ELEMS; + } while (offsets_dispatched < s->nb_offsets); + + RET(denoise_pass(s, exec, ws_vk, ws_stride)); + + err = ff_vk_exec_submit(vkctx, exec); + if (err < 0) + return err; + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_buffer_unref(&integral_buf); + av_buffer_unref(&ws_buf); + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static void nlmeans_vulkan_uninit(AVFilterContext *avctx) +{ + NLMeansVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd_weights); + ff_vk_shader_free(vkctx, &s->shd_denoise); + + av_buffer_pool_uninit(&s->integral_buf_pool); + av_buffer_pool_uninit(&s->ws_buf_pool); + + ff_vk_uninit(&s->vkctx); + + av_freep(&s->xoffsets); + av_freep(&s->yoffsets); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(NLMeansVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption nlmeans_vulkan_options[] = { + { "s", "denoising strength for all components", OFFSET(opts.s), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "p", "patch size for all components", OFFSET(opts.p), AV_OPT_TYPE_INT, { .i64 = 3*2+1 }, 0, 99, FLAGS }, + { "r", "research window radius", OFFSET(opts.r), AV_OPT_TYPE_INT, { .i64 = 7*2+1 }, 0, 99, FLAGS }, + { "t", "parallelism", OFFSET(opts.t), AV_OPT_TYPE_INT, { .i64 = 36 }, 1, 168, FLAGS }, + + { "s1", "denoising strength for component 1", OFFSET(opts.sc[0]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s2", "denoising strength for component 2", OFFSET(opts.sc[1]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s3", "denoising strength for component 3", OFFSET(opts.sc[2]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + { "s4", "denoising strength for component 4", OFFSET(opts.sc[3]), AV_OPT_TYPE_DOUBLE, { .dbl = 1.0 }, 1.0, 100.0, FLAGS }, + + { "p1", "patch size for component 1", OFFSET(opts.pc[0]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p2", "patch size for component 2", OFFSET(opts.pc[1]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p3", "patch size for component 3", OFFSET(opts.pc[2]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + { "p4", "patch size for component 4", OFFSET(opts.pc[3]), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 99, FLAGS }, + + { NULL } +}; + +AVFILTER_DEFINE_CLASS(nlmeans_vulkan); + +static const AVFilterPad nlmeans_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &nlmeans_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad nlmeans_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, +}; + +const FFFilter ff_vf_nlmeans_vulkan = { + .p.name = "nlmeans_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Non-local means denoiser (Vulkan)"), + .p.priv_class = &nlmeans_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(NLMeansVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &nlmeans_vulkan_uninit, + FILTER_INPUTS(nlmeans_vulkan_inputs), + FILTER_OUTPUTS(nlmeans_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From dce5b25f4dc18874a41e6a56777ae09d2f459553 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:58:31 +0000 Subject: [PATCH 019/118] Changing vulkan file directory --- libavfilter/vf_overlay_vulkan.c | 347 -------------------------------- 1 file changed, 347 deletions(-) delete mode 100644 libavfilter/vf_overlay_vulkan.c diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c deleted file mode 100644 index 1f9eed8e08..0000000000 --- a/libavfilter/vf_overlay_vulkan.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "framesync.h" -#include "video.h" - -typedef struct OverlayVulkanContext { - FFVulkanContext vkctx; - FFFrameSync fs; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - - /* Push constants / options */ - struct { - int32_t o_offset[2*3]; - int32_t o_size[2*3]; - } opts; - - int overlay_x; - int overlay_y; - int overlay_w; - int overlay_h; -} OverlayVulkanContext; - -static const char overlay_noalpha[] = { - C(0, void overlay_noalpha(int i, ivec2 pos) ) - C(0, { ) - C(1, if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) && - (pos.x < (o_offset[i].x + o_size[i].x)) && - (pos.y < (o_offset[i].y + o_size[i].y))) { ) - C(2, vec4 res = imageLoad(overlay_img[i], pos - o_offset[i]); ) - C(2, imageStore(output_img[i], pos, res); ) - C(1, } else { ) - C(2, vec4 res = imageLoad(main_img[i], pos); ) - C(2, imageStore(output_img[i], pos, res); ) - C(1, } ) - C(0, } ) -}; - -static const char overlay_alpha[] = { - C(0, void overlay_alpha_opaque(int i, ivec2 pos) ) - C(0, { ) - C(1, vec4 res = imageLoad(main_img[i], pos); ) - C(1, if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) && - (pos.x < (o_offset[i].x + o_size[i].x)) && - (pos.y < (o_offset[i].y + o_size[i].y))) { ) - C(2, vec4 ovr = imageLoad(overlay_img[i], pos - o_offset[i]); ) - C(2, res = ovr * ovr.a + res * (1.0f - ovr.a); ) - C(2, res.a = 1.0f; ) - C(2, imageStore(output_img[i], pos, res); ) - C(1, } ) - C(1, imageStore(output_img[i], pos, res); ) - C(0, } ) -}; - -static av_cold int init_filter(AVFilterContext *ctx) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - OverlayVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; - const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format); - FFVulkanShader *shd = &s->shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "overlay", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 32, 1, - 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, ivec2 o_offset[3]; ); - GLSLC(1, ivec2 o_size[3]; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), - VK_SHADER_STAGE_COMPUTE_BIT); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "main_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "overlay_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); - - GLSLD( overlay_noalpha ); - GLSLD( overlay_alpha ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, int planes = %i; ,planes); - GLSLC(1, for (int i = 0; i < planes; i++) { ); - if (ialpha) - GLSLC(2, overlay_alpha_opaque(i, pos); ); - else - GLSLC(2, overlay_noalpha(i, pos); ); - GLSLC(1, } ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->opts.o_offset[0] = s->overlay_x; - s->opts.o_offset[1] = s->overlay_y; - s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; - s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; - s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; - s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; - - s->opts.o_size[0] = s->overlay_w; - s->opts.o_size[1] = s->overlay_h; - s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; - s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; - s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; - s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int overlay_vulkan_blend(FFFrameSync *fs) -{ - int err; - AVFilterContext *ctx = fs->parent; - OverlayVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - AVFrame *input_main, *input_overlay, *out; - - err = ff_framesync_get_frame(fs, 0, &input_main, 0); - if (err < 0) - goto fail; - err = ff_framesync_get_frame(fs, 1, &input_overlay, 0); - if (err < 0) - goto fail; - - if (!input_main || !input_overlay) - return 0; - - if (!s->initialized) { - AVHWFramesContext *main_fc = (AVHWFramesContext*)input_main->hw_frames_ctx->data; - AVHWFramesContext *overlay_fc = (AVHWFramesContext*)input_overlay->hw_frames_ctx->data; - if (main_fc->sw_format != overlay_fc->sw_format) { - av_log(ctx, AV_LOG_ERROR, "Mismatching sw formats!\n"); - return AVERROR(EINVAL); - } - - s->overlay_w = input_overlay->width; - s->overlay_h = input_overlay->height; - - RET(init_filter(ctx)); - } - - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, - out, (AVFrame *[]){ input_main, input_overlay }, 2, - VK_NULL_HANDLE, &s->opts, sizeof(s->opts))); - - err = av_frame_copy_props(out, input_main); - if (err < 0) - goto fail; - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&out); - return err; -} - -static int overlay_vulkan_config_output(AVFilterLink *outlink) -{ - int err; - AVFilterContext *avctx = outlink->src; - OverlayVulkanContext *s = avctx->priv; - - err = ff_vk_filter_config_output(outlink); - if (err < 0) - return err; - - err = ff_framesync_init_dualinput(&s->fs, avctx); - if (err < 0) - return err; - - return ff_framesync_configure(&s->fs); -} - -static int overlay_vulkan_activate(AVFilterContext *avctx) -{ - OverlayVulkanContext *s = avctx->priv; - - return ff_framesync_activate(&s->fs); -} - -static av_cold int overlay_vulkan_init(AVFilterContext *avctx) -{ - OverlayVulkanContext *s = avctx->priv; - - s->fs.on_event = &overlay_vulkan_blend; - - return ff_vk_filter_init(avctx); -} - -static void overlay_vulkan_uninit(AVFilterContext *avctx) -{ - OverlayVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - ff_vk_uninit(&s->vkctx); - ff_framesync_uninit(&s->fs); - - s->initialized = 0; -} - -#define OFFSET(x) offsetof(OverlayVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption overlay_vulkan_options[] = { - { "x", "Set horizontal offset", OFFSET(overlay_x), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, - { "y", "Set vertical offset", OFFSET(overlay_y), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(overlay_vulkan); - -static const AVFilterPad overlay_vulkan_inputs[] = { - { - .name = "main", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_input, - }, - { - .name = "overlay", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad overlay_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &overlay_vulkan_config_output, - }, -}; - -const FFFilter ff_vf_overlay_vulkan = { - .p.name = "overlay_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Overlay a source on top of another"), - .p.priv_class = &overlay_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(OverlayVulkanContext), - .init = &overlay_vulkan_init, - .uninit = &overlay_vulkan_uninit, - .activate = &overlay_vulkan_activate, - FILTER_INPUTS(overlay_vulkan_inputs), - FILTER_OUTPUTS(overlay_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 70bc9416a459885aa3a6bc8d5072126bc34ddb2a Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:59:04 +0000 Subject: [PATCH 020/118] Changing vulkan file directory --- libavfilter/vulkan/vf_overlay_vulkan.c | 347 +++++++++++++++++++++++++ 1 file changed, 347 insertions(+) create mode 100644 libavfilter/vulkan/vf_overlay_vulkan.c diff --git a/libavfilter/vulkan/vf_overlay_vulkan.c b/libavfilter/vulkan/vf_overlay_vulkan.c new file mode 100644 index 0000000000..05fc06c84e --- /dev/null +++ b/libavfilter/vulkan/vf_overlay_vulkan.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/framesync.h" +#include "libavfilter/video.h" + +typedef struct OverlayVulkanContext { + FFVulkanContext vkctx; + FFFrameSync fs; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + + /* Push constants / options */ + struct { + int32_t o_offset[2*3]; + int32_t o_size[2*3]; + } opts; + + int overlay_x; + int overlay_y; + int overlay_w; + int overlay_h; +} OverlayVulkanContext; + +static const char overlay_noalpha[] = { + C(0, void overlay_noalpha(int i, ivec2 pos) ) + C(0, { ) + C(1, if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) && + (pos.x < (o_offset[i].x + o_size[i].x)) && + (pos.y < (o_offset[i].y + o_size[i].y))) { ) + C(2, vec4 res = imageLoad(overlay_img[i], pos - o_offset[i]); ) + C(2, imageStore(output_img[i], pos, res); ) + C(1, } else { ) + C(2, vec4 res = imageLoad(main_img[i], pos); ) + C(2, imageStore(output_img[i], pos, res); ) + C(1, } ) + C(0, } ) +}; + +static const char overlay_alpha[] = { + C(0, void overlay_alpha_opaque(int i, ivec2 pos) ) + C(0, { ) + C(1, vec4 res = imageLoad(main_img[i], pos); ) + C(1, if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) && + (pos.x < (o_offset[i].x + o_size[i].x)) && + (pos.y < (o_offset[i].y + o_size[i].y))) { ) + C(2, vec4 ovr = imageLoad(overlay_img[i], pos - o_offset[i]); ) + C(2, res = ovr * ovr.a + res * (1.0f - ovr.a); ) + C(2, res.a = 1.0f; ) + C(2, imageStore(output_img[i], pos, res); ) + C(1, } ) + C(1, imageStore(output_img[i], pos, res); ) + C(0, } ) +}; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + OverlayVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(s->vkctx.output_format); + FFVulkanShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "overlay", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 32, 1, + 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, ivec2 o_offset[3]; ); + GLSLC(1, ivec2 o_size[3]; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "main_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "overlay_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); + + GLSLD( overlay_noalpha ); + GLSLD( overlay_alpha ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, int planes = %i; ,planes); + GLSLC(1, for (int i = 0; i < planes; i++) { ); + if (ialpha) + GLSLC(2, overlay_alpha_opaque(i, pos); ); + else + GLSLC(2, overlay_noalpha(i, pos); ); + GLSLC(1, } ); + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->opts.o_offset[0] = s->overlay_x; + s->opts.o_offset[1] = s->overlay_y; + s->opts.o_offset[2] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; + s->opts.o_offset[3] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; + s->opts.o_offset[4] = s->opts.o_offset[0] >> pix_desc->log2_chroma_w; + s->opts.o_offset[5] = s->opts.o_offset[1] >> pix_desc->log2_chroma_h; + + s->opts.o_size[0] = s->overlay_w; + s->opts.o_size[1] = s->overlay_h; + s->opts.o_size[2] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; + s->opts.o_size[3] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; + s->opts.o_size[4] = s->opts.o_size[0] >> pix_desc->log2_chroma_w; + s->opts.o_size[5] = s->opts.o_size[1] >> pix_desc->log2_chroma_h; + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int overlay_vulkan_blend(FFFrameSync *fs) +{ + int err; + AVFilterContext *ctx = fs->parent; + OverlayVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + AVFrame *input_main, *input_overlay, *out; + + err = ff_framesync_get_frame(fs, 0, &input_main, 0); + if (err < 0) + goto fail; + err = ff_framesync_get_frame(fs, 1, &input_overlay, 0); + if (err < 0) + goto fail; + + if (!input_main || !input_overlay) + return 0; + + if (!s->initialized) { + AVHWFramesContext *main_fc = (AVHWFramesContext*)input_main->hw_frames_ctx->data; + AVHWFramesContext *overlay_fc = (AVHWFramesContext*)input_overlay->hw_frames_ctx->data; + if (main_fc->sw_format != overlay_fc->sw_format) { + av_log(ctx, AV_LOG_ERROR, "Mismatching sw formats!\n"); + return AVERROR(EINVAL); + } + + s->overlay_w = input_overlay->width; + s->overlay_h = input_overlay->height; + + RET(init_filter(ctx)); + } + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, + out, (AVFrame *[]){ input_main, input_overlay }, 2, + VK_NULL_HANDLE, &s->opts, sizeof(s->opts))); + + err = av_frame_copy_props(out, input_main); + if (err < 0) + goto fail; + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&out); + return err; +} + +static int overlay_vulkan_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + OverlayVulkanContext *s = avctx->priv; + + err = ff_vk_filter_config_output(outlink); + if (err < 0) + return err; + + err = ff_framesync_init_dualinput(&s->fs, avctx); + if (err < 0) + return err; + + return ff_framesync_configure(&s->fs); +} + +static int overlay_vulkan_activate(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + + return ff_framesync_activate(&s->fs); +} + +static av_cold int overlay_vulkan_init(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + + s->fs.on_event = &overlay_vulkan_blend; + + return ff_vk_filter_init(avctx); +} + +static void overlay_vulkan_uninit(AVFilterContext *avctx) +{ + OverlayVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + ff_framesync_uninit(&s->fs); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(OverlayVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption overlay_vulkan_options[] = { + { "x", "Set horizontal offset", OFFSET(overlay_x), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, + { "y", "Set vertical offset", OFFSET(overlay_y), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(overlay_vulkan); + +static const AVFilterPad overlay_vulkan_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_input, + }, + { + .name = "overlay", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad overlay_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &overlay_vulkan_config_output, + }, +}; + +const FFFilter ff_vf_overlay_vulkan = { + .p.name = "overlay_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Overlay a source on top of another"), + .p.priv_class = &overlay_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(OverlayVulkanContext), + .init = &overlay_vulkan_init, + .uninit = &overlay_vulkan_uninit, + .activate = &overlay_vulkan_activate, + FILTER_INPUTS(overlay_vulkan_inputs), + FILTER_OUTPUTS(overlay_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From 8457b1cdc40e7fa03280de856b6c6ea526331b7e Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 12:59:52 +0000 Subject: [PATCH 021/118] Changing vulkan file directory --- libavfilter/vf_scale_vulkan.c | 508 ---------------------------------- 1 file changed, 508 deletions(-) delete mode 100644 libavfilter/vf_scale_vulkan.c diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c deleted file mode 100644 index c23cfe262f..0000000000 --- a/libavfilter/vf_scale_vulkan.c +++ /dev/null @@ -1,508 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" -#include "scale_eval.h" -#include "filters.h" -#include "colorspace.h" -#include "video.h" - -extern const char *ff_source_debayer_comp; - -enum ScalerFunc { - F_BILINEAR = 0, - F_NEAREST, - - F_NB, -}; - -enum DebayerFunc { - DB_BILINEAR = 0, - DB_BILINEAR_HQ, - - DB_NB, -}; - -typedef struct ScaleVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - VkSampler sampler; - - /* Push constants / options */ - struct { - float yuv_matrix[4][4]; - int crop_x; - int crop_y; - int crop_w; - int crop_h; - } opts; - - char *out_format_string; - char *w_expr; - char *h_expr; - - enum ScalerFunc scaler; - enum AVColorRange out_range; - enum DebayerFunc debayer; -} ScaleVulkanContext; - -static const char scale_bilinear[] = { - C(0, vec4 scale_bilinear(int idx, ivec2 pos, vec2 crop_range, vec2 crop_off)) - C(0, { ) - C(1, vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]); ) - C(1, npos *= crop_range; /* Reduce the range */ ) - C(1, npos += crop_off; /* Offset the start */ ) - C(1, return texture(input_img[idx], npos); ) - C(0, } ) -}; - -static const char rgb2yuv[] = { - C(0, vec4 rgb2yuv(vec4 src, int fullrange) ) - C(0, { ) - C(1, src *= yuv_matrix; ) - C(1, if (fullrange == 1) { ) - C(2, src += vec4(0.0, 0.5, 0.5, 0.0); ) - C(1, } else { ) - C(2, src *= vec4(219.0 / 255.0, 224.0 / 255.0, 224.0 / 255.0, 1.0); ) - C(2, src += vec4(16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0, 0.0); ) - C(1, } ) - C(1, return src; ) - C(0, } ) -}; - -static const char write_nv12[] = { - C(0, void write_nv12(vec4 src, ivec2 pos) ) - C(0, { ) - C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); ) - C(1, pos /= ivec2(2); ) - C(1, imageStore(output_img[1], pos, vec4(src.g, src.b, 0.0, 0.0)); ) - C(0, } ) -}; - -static const char write_420[] = { - C(0, void write_420(vec4 src, ivec2 pos) ) - C(0, { ) - C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); ) - C(1, pos /= ivec2(2); ) - C(1, imageStore(output_img[1], pos, vec4(src.g, 0.0, 0.0, 0.0)); ) - C(1, imageStore(output_img[2], pos, vec4(src.b, 0.0, 0.0, 0.0)); ) - C(0, } ) -}; - -static const char write_444[] = { - C(0, void write_444(vec4 src, ivec2 pos) ) - C(0, { ) - C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); ) - C(1, imageStore(output_img[1], pos, vec4(src.g, 0.0, 0.0, 0.0)); ) - C(1, imageStore(output_img[2], pos, vec4(src.b, 0.0, 0.0, 0.0)); ) - C(0, } ) -}; - -static int init_scale_shader(AVFilterContext *ctx, FFVulkanShader *shd, - FFVulkanDescriptorSetBinding *desc, AVFrame *in) -{ - ScaleVulkanContext *s = ctx->priv; - GLSLD( scale_bilinear ); - - if (s->vkctx.output_format != s->vkctx.input_format) { - GLSLD( rgb2yuv ); - } - - switch (s->vkctx.output_format) { - case AV_PIX_FMT_NV12: GLSLD(write_nv12); break; - case AV_PIX_FMT_YUV420P: GLSLD( write_420); break; - case AV_PIX_FMT_YUV444P: GLSLD( write_444); break; - default: break; - } - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height); - GLSLC(1, vec2 c_r = vec2(crop_w, crop_h) / in_d; ); - GLSLC(1, vec2 c_o = vec2(crop_x, crop_y) / in_d; ); - GLSLC(0, ); - - if (s->vkctx.output_format == s->vkctx.input_format) { - for (int i = 0; i < desc[1].elems; i++) { - GLSLF(1, size = imageSize(output_img[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - switch (s->scaler) { - case F_NEAREST: - case F_BILINEAR: - GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i); - GLSLF(2, imageStore(output_img[%i], pos, res); ,i); - break; - }; - GLSLC(1, } ); - } - } else { - GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); ); - GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG); - switch (s->vkctx.output_format) { - case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break; - case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break; - case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break; - default: return AVERROR(EINVAL); - } - } - - GLSLC(0, } ); - - if (s->vkctx.output_format != s->vkctx.input_format) { - const AVLumaCoefficients *lcoeffs; - double tmp_mat[3][3]; - - lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace); - if (!lcoeffs) { - av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n"); - return AVERROR(EINVAL); - } - - ff_fill_rgb2yuv_table(lcoeffs, tmp_mat); - - for (int y = 0; y < 3; y++) - for (int x = 0; x < 3; x++) - s->opts.yuv_matrix[x][y] = tmp_mat[x][y]; - s->opts.yuv_matrix[3][3] = 1.0; - } - - return 0; -} - -static int init_debayer_shader(ScaleVulkanContext *s, FFVulkanShader *shd, - FFVulkanDescriptorSetBinding *desc, AVFrame *in) -{ - GLSLD(ff_source_debayer_comp); - - GLSLC(0, void main(void)); - GLSLC(0, { ); - if (s->debayer == DB_BILINEAR) - GLSLC(1, debayer_bilinear();); - else if (s->debayer == DB_BILINEAR_HQ) - GLSLC(1, debayer_bilinear_hq();); - GLSLC(0, } ); - - shd->lg_size[0] <<= 1; - shd->lg_size[1] <<= 1; - - return 0; -} - -static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - VkFilter sampler_mode; - ScaleVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanShader *shd = &s->shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - int debayer = s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16; - int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format); - - switch (s->scaler) { - case F_NEAREST: - sampler_mode = VK_FILTER_NEAREST; - break; - case F_BILINEAR: - sampler_mode = VK_FILTER_LINEAR; - break; - }; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - - if (!debayer) - RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode)); - - RET(ff_vk_shader_init(vkctx, &s->shd, "scale", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 32, 1, - 0)); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_img", - .type = debayer ? - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE : - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .mem_layout = debayer ? - ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT) : - NULL, - .mem_quali = "readonly", - .dimensions = 2, - .elems = in_planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), - }, - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.output_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, mat4 yuv_matrix; ); - GLSLC(1, int crop_x; ); - GLSLC(1, int crop_y; ); - GLSLC(1, int crop_w; ); - GLSLC(1, int crop_h; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), - VK_SHADER_STAGE_COMPUTE_BIT); - - if (debayer) - err = init_debayer_shader(s, shd, desc, in); - else - err = init_scale_shader(ctx, shd, desc, in); - if (err < 0) - goto fail; - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - int err; - AVFilterContext *ctx = link->dst; - ScaleVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - if (!s->initialized) - RET(init_filter(ctx, in)); - - s->opts.crop_x = in->crop_left; - s->opts.crop_y = in->crop_top; - s->opts.crop_w = in->width - (in->crop_left + in->crop_right); - s->opts.crop_h = in->height - (in->crop_top + in->crop_bottom); - - RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in, - s->sampler, &s->opts, sizeof(s->opts))); - - err = av_frame_copy_props(out, in); - if (err < 0) - goto fail; - - if (out->width != in->width || out->height != in->height) { - av_frame_side_data_remove_by_props(&out->side_data, &out->nb_side_data, - AV_SIDE_DATA_PROP_SIZE_DEPENDENT); - } - - if (s->out_range != AVCOL_RANGE_UNSPECIFIED) - out->color_range = s->out_range; - if (s->vkctx.output_format != s->vkctx.input_format) - out->chroma_location = AVCHROMA_LOC_TOPLEFT; - - av_frame_free(&in); - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&in); - av_frame_free(&out); - return err; -} - -static int scale_vulkan_config_output(AVFilterLink *outlink) -{ - int err; - AVFilterContext *avctx = outlink->src; - ScaleVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - AVFilterLink *inlink = outlink->src->inputs[0]; - - err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, - &vkctx->output_width, - &vkctx->output_height); - if (err < 0) - return err; - - ff_scale_adjust_dimensions(inlink, &vkctx->output_width, &vkctx->output_height, 0, 1, 1.f); - - outlink->w = vkctx->output_width; - outlink->h = vkctx->output_height; - - if (s->out_format_string) { - s->vkctx.output_format = av_get_pix_fmt(s->out_format_string); - if (s->vkctx.output_format == AV_PIX_FMT_NONE) { - av_log(avctx, AV_LOG_ERROR, "Invalid output format.\n"); - return AVERROR(EINVAL); - } - } else { - s->vkctx.output_format = s->vkctx.input_format; - } - - if (s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16) { - if (s->vkctx.output_format == s->vkctx.input_format) { - s->vkctx.output_format = AV_PIX_FMT_RGBA64; - } else if (!ff_vk_mt_is_np_rgb(s->vkctx.output_format)) { - av_log(avctx, AV_LOG_ERROR, "Unsupported output format for debayer\n"); - return AVERROR(EINVAL); - } - if (inlink->w != outlink->w || inlink->w != outlink->w) { - av_log(avctx, AV_LOG_ERROR, "Scaling is not supported with debayering\n"); - return AVERROR_PATCHWELCOME; - } - } else if (s->vkctx.output_format != s->vkctx.input_format) { - if (!ff_vk_mt_is_np_rgb(s->vkctx.input_format)) { - av_log(avctx, AV_LOG_ERROR, "Unsupported input format for conversion\n"); - return AVERROR(EINVAL); - } - if (s->vkctx.output_format != AV_PIX_FMT_NV12 && - s->vkctx.output_format != AV_PIX_FMT_YUV420P && - s->vkctx.output_format != AV_PIX_FMT_YUV444P) { - av_log(avctx, AV_LOG_ERROR, "Unsupported output format\n"); - return AVERROR(EINVAL); - } - } else if (s->out_range != AVCOL_RANGE_UNSPECIFIED) { - av_log(avctx, AV_LOG_ERROR, "Cannot change range without converting format\n"); - return AVERROR(EINVAL); - } - - return ff_vk_filter_config_output(outlink); -} - -static void scale_vulkan_uninit(AVFilterContext *avctx) -{ - ScaleVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - if (s->sampler) - vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, - vkctx->hwctx->alloc); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -#define OFFSET(x) offsetof(ScaleVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption scale_vulkan_options[] = { - { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, - { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, - { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, .unit = "scaler" }, - { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "scaler" }, - { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, .unit = "scaler" }, - { "debayer", "Debayer algorithm to use", OFFSET(debayer), AV_OPT_TYPE_INT, {.i64 = DB_BILINEAR_HQ}, 0, DB_NB, .flags = FLAGS, .unit = "debayer" }, - { "bilinear", "Bilinear debayering (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "debayer" }, - { "bilinear_hq", "Bilinear debayering (high quality)", 0, AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR_HQ}, 0, 0, .flags = FLAGS, .unit = "debayer" }, - { "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, - { "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, .unit = "range" }, - { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, - { "limited", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, - { "jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, - { "mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, - { "tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, - { "pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(scale_vulkan); - -static const AVFilterPad scale_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &scale_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad scale_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &scale_vulkan_config_output, - }, -}; - -const FFFilter ff_vf_scale_vulkan = { - .p.name = "scale_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"), - .p.priv_class = &scale_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(ScaleVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &scale_vulkan_uninit, - FILTER_INPUTS(scale_vulkan_inputs), - FILTER_OUTPUTS(scale_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 3d6bcf11ca8f29611ecd22fc1709bc5efd211eb0 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:00:26 +0000 Subject: [PATCH 022/118] Changing vulkan file directory --- libavfilter/vulkan/vf_scale_vulkan.c | 508 +++++++++++++++++++++++++++ 1 file changed, 508 insertions(+) create mode 100644 libavfilter/vulkan/vf_scale_vulkan.c diff --git a/libavfilter/vulkan/vf_scale_vulkan.c b/libavfilter/vulkan/vf_scale_vulkan.c new file mode 100644 index 0000000000..f7e1ed7408 --- /dev/null +++ b/libavfilter/vulkan/vf_scale_vulkan.c @@ -0,0 +1,508 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" +#include "libavfilter/scale_eval.h" +#include "libavfilter/filters.h" +#include "libavfilter/colorspace.h" +#include "libavfilter/video.h" + +extern const char *ff_source_debayer_comp; + +enum ScalerFunc { + F_BILINEAR = 0, + F_NEAREST, + + F_NB, +}; + +enum DebayerFunc { + DB_BILINEAR = 0, + DB_BILINEAR_HQ, + + DB_NB, +}; + +typedef struct ScaleVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + VkSampler sampler; + + /* Push constants / options */ + struct { + float yuv_matrix[4][4]; + int crop_x; + int crop_y; + int crop_w; + int crop_h; + } opts; + + char *out_format_string; + char *w_expr; + char *h_expr; + + enum ScalerFunc scaler; + enum AVColorRange out_range; + enum DebayerFunc debayer; +} ScaleVulkanContext; + +static const char scale_bilinear[] = { + C(0, vec4 scale_bilinear(int idx, ivec2 pos, vec2 crop_range, vec2 crop_off)) + C(0, { ) + C(1, vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]); ) + C(1, npos *= crop_range; /* Reduce the range */ ) + C(1, npos += crop_off; /* Offset the start */ ) + C(1, return texture(input_img[idx], npos); ) + C(0, } ) +}; + +static const char rgb2yuv[] = { + C(0, vec4 rgb2yuv(vec4 src, int fullrange) ) + C(0, { ) + C(1, src *= yuv_matrix; ) + C(1, if (fullrange == 1) { ) + C(2, src += vec4(0.0, 0.5, 0.5, 0.0); ) + C(1, } else { ) + C(2, src *= vec4(219.0 / 255.0, 224.0 / 255.0, 224.0 / 255.0, 1.0); ) + C(2, src += vec4(16.0 / 255.0, 128.0 / 255.0, 128.0 / 255.0, 0.0); ) + C(1, } ) + C(1, return src; ) + C(0, } ) +}; + +static const char write_nv12[] = { + C(0, void write_nv12(vec4 src, ivec2 pos) ) + C(0, { ) + C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); ) + C(1, pos /= ivec2(2); ) + C(1, imageStore(output_img[1], pos, vec4(src.g, src.b, 0.0, 0.0)); ) + C(0, } ) +}; + +static const char write_420[] = { + C(0, void write_420(vec4 src, ivec2 pos) ) + C(0, { ) + C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); ) + C(1, pos /= ivec2(2); ) + C(1, imageStore(output_img[1], pos, vec4(src.g, 0.0, 0.0, 0.0)); ) + C(1, imageStore(output_img[2], pos, vec4(src.b, 0.0, 0.0, 0.0)); ) + C(0, } ) +}; + +static const char write_444[] = { + C(0, void write_444(vec4 src, ivec2 pos) ) + C(0, { ) + C(1, imageStore(output_img[0], pos, vec4(src.r, 0.0, 0.0, 0.0)); ) + C(1, imageStore(output_img[1], pos, vec4(src.g, 0.0, 0.0, 0.0)); ) + C(1, imageStore(output_img[2], pos, vec4(src.b, 0.0, 0.0, 0.0)); ) + C(0, } ) +}; + +static int init_scale_shader(AVFilterContext *ctx, FFVulkanShader *shd, + FFVulkanDescriptorSetBinding *desc, AVFrame *in) +{ + ScaleVulkanContext *s = ctx->priv; + GLSLD( scale_bilinear ); + + if (s->vkctx.output_format != s->vkctx.input_format) { + GLSLD( rgb2yuv ); + } + + switch (s->vkctx.output_format) { + case AV_PIX_FMT_NV12: GLSLD(write_nv12); break; + case AV_PIX_FMT_YUV420P: GLSLD( write_420); break; + case AV_PIX_FMT_YUV444P: GLSLD( write_444); break; + default: break; + } + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, vec2 in_d = vec2(%i, %i); ,in->width, in->height); + GLSLC(1, vec2 c_r = vec2(crop_w, crop_h) / in_d; ); + GLSLC(1, vec2 c_o = vec2(crop_x, crop_y) / in_d; ); + GLSLC(0, ); + + if (s->vkctx.output_format == s->vkctx.input_format) { + for (int i = 0; i < desc[1].elems; i++) { + GLSLF(1, size = imageSize(output_img[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + switch (s->scaler) { + case F_NEAREST: + case F_BILINEAR: + GLSLF(2, vec4 res = scale_bilinear(%i, pos, c_r, c_o); ,i); + GLSLF(2, imageStore(output_img[%i], pos, res); ,i); + break; + }; + GLSLC(1, } ); + } + } else { + GLSLC(1, vec4 res = scale_bilinear(0, pos, c_r, c_o); ); + GLSLF(1, res = rgb2yuv(res, %i); ,s->out_range == AVCOL_RANGE_JPEG); + switch (s->vkctx.output_format) { + case AV_PIX_FMT_NV12: GLSLC(1, write_nv12(res, pos); ); break; + case AV_PIX_FMT_YUV420P: GLSLC(1, write_420(res, pos); ); break; + case AV_PIX_FMT_YUV444P: GLSLC(1, write_444(res, pos); ); break; + default: return AVERROR(EINVAL); + } + } + + GLSLC(0, } ); + + if (s->vkctx.output_format != s->vkctx.input_format) { + const AVLumaCoefficients *lcoeffs; + double tmp_mat[3][3]; + + lcoeffs = av_csp_luma_coeffs_from_avcsp(in->colorspace); + if (!lcoeffs) { + av_log(ctx, AV_LOG_ERROR, "Unsupported colorspace\n"); + return AVERROR(EINVAL); + } + + ff_fill_rgb2yuv_table(lcoeffs, tmp_mat); + + for (int y = 0; y < 3; y++) + for (int x = 0; x < 3; x++) + s->opts.yuv_matrix[x][y] = tmp_mat[x][y]; + s->opts.yuv_matrix[3][3] = 1.0; + } + + return 0; +} + +static int init_debayer_shader(ScaleVulkanContext *s, FFVulkanShader *shd, + FFVulkanDescriptorSetBinding *desc, AVFrame *in) +{ + GLSLD(ff_source_debayer_comp); + + GLSLC(0, void main(void)); + GLSLC(0, { ); + if (s->debayer == DB_BILINEAR) + GLSLC(1, debayer_bilinear();); + else if (s->debayer == DB_BILINEAR_HQ) + GLSLC(1, debayer_bilinear_hq();); + GLSLC(0, } ); + + shd->lg_size[0] <<= 1; + shd->lg_size[1] <<= 1; + + return 0; +} + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + VkFilter sampler_mode; + ScaleVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + int debayer = s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16; + int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format); + + switch (s->scaler) { + case F_NEAREST: + sampler_mode = VK_FILTER_NEAREST; + break; + case F_BILINEAR: + sampler_mode = VK_FILTER_LINEAR; + break; + }; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + + if (!debayer) + RET(ff_vk_init_sampler(vkctx, &s->sampler, 0, sampler_mode)); + + RET(ff_vk_shader_init(vkctx, &s->shd, "scale", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 32, 1, + 0)); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_img", + .type = debayer ? + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE : + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .mem_layout = debayer ? + ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT) : + NULL, + .mem_quali = "readonly", + .dimensions = 2, + .elems = in_planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.output_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, mat4 yuv_matrix; ); + GLSLC(1, int crop_x; ); + GLSLC(1, int crop_y; ); + GLSLC(1, int crop_w; ); + GLSLC(1, int crop_h; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + if (debayer) + err = init_debayer_shader(s, shd, desc, in); + else + err = init_scale_shader(ctx, shd, desc, in); + if (err < 0) + goto fail; + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + ScaleVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + s->opts.crop_x = in->crop_left; + s->opts.crop_y = in->crop_top; + s->opts.crop_w = in->width - (in->crop_left + in->crop_right); + s->opts.crop_h = in->height - (in->crop_top + in->crop_bottom); + + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in, + s->sampler, &s->opts, sizeof(s->opts))); + + err = av_frame_copy_props(out, in); + if (err < 0) + goto fail; + + if (out->width != in->width || out->height != in->height) { + av_frame_side_data_remove_by_props(&out->side_data, &out->nb_side_data, + AV_SIDE_DATA_PROP_SIZE_DEPENDENT); + } + + if (s->out_range != AVCOL_RANGE_UNSPECIFIED) + out->color_range = s->out_range; + if (s->vkctx.output_format != s->vkctx.input_format) + out->chroma_location = AVCHROMA_LOC_TOPLEFT; + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static int scale_vulkan_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + ScaleVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + AVFilterLink *inlink = outlink->src->inputs[0]; + + err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, + &vkctx->output_width, + &vkctx->output_height); + if (err < 0) + return err; + + ff_scale_adjust_dimensions(inlink, &vkctx->output_width, &vkctx->output_height, 0, 1, 1.f); + + outlink->w = vkctx->output_width; + outlink->h = vkctx->output_height; + + if (s->out_format_string) { + s->vkctx.output_format = av_get_pix_fmt(s->out_format_string); + if (s->vkctx.output_format == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "Invalid output format.\n"); + return AVERROR(EINVAL); + } + } else { + s->vkctx.output_format = s->vkctx.input_format; + } + + if (s->vkctx.input_format == AV_PIX_FMT_BAYER_RGGB16) { + if (s->vkctx.output_format == s->vkctx.input_format) { + s->vkctx.output_format = AV_PIX_FMT_RGBA64; + } else if (!ff_vk_mt_is_np_rgb(s->vkctx.output_format)) { + av_log(avctx, AV_LOG_ERROR, "Unsupported output format for debayer\n"); + return AVERROR(EINVAL); + } + if (inlink->w != outlink->w || inlink->w != outlink->w) { + av_log(avctx, AV_LOG_ERROR, "Scaling is not supported with debayering\n"); + return AVERROR_PATCHWELCOME; + } + } else if (s->vkctx.output_format != s->vkctx.input_format) { + if (!ff_vk_mt_is_np_rgb(s->vkctx.input_format)) { + av_log(avctx, AV_LOG_ERROR, "Unsupported input format for conversion\n"); + return AVERROR(EINVAL); + } + if (s->vkctx.output_format != AV_PIX_FMT_NV12 && + s->vkctx.output_format != AV_PIX_FMT_YUV420P && + s->vkctx.output_format != AV_PIX_FMT_YUV444P) { + av_log(avctx, AV_LOG_ERROR, "Unsupported output format\n"); + return AVERROR(EINVAL); + } + } else if (s->out_range != AVCOL_RANGE_UNSPECIFIED) { + av_log(avctx, AV_LOG_ERROR, "Cannot change range without converting format\n"); + return AVERROR(EINVAL); + } + + return ff_vk_filter_config_output(outlink); +} + +static void scale_vulkan_uninit(AVFilterContext *avctx) +{ + ScaleVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(ScaleVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption scale_vulkan_options[] = { + { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, + { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, + { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, .unit = "scaler" }, + { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "scaler" }, + { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, .unit = "scaler" }, + { "debayer", "Debayer algorithm to use", OFFSET(debayer), AV_OPT_TYPE_INT, {.i64 = DB_BILINEAR_HQ}, 0, DB_NB, .flags = FLAGS, .unit = "debayer" }, + { "bilinear", "Bilinear debayering (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR}, 0, 0, .flags = FLAGS, .unit = "debayer" }, + { "bilinear_hq", "Bilinear debayering (high quality)", 0, AV_OPT_TYPE_CONST, {.i64 = DB_BILINEAR_HQ}, 0, 0, .flags = FLAGS, .unit = "debayer" }, + { "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, + { "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, .unit = "range" }, + { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, + { "limited", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, + { "jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, + { "mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, + { "tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, + { "pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(scale_vulkan); + +static const AVFilterPad scale_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &scale_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad scale_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &scale_vulkan_config_output, + }, +}; + +const FFFilter ff_vf_scale_vulkan = { + .p.name = "scale_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"), + .p.priv_class = &scale_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(ScaleVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &scale_vulkan_uninit, + FILTER_INPUTS(scale_vulkan_inputs), + FILTER_OUTPUTS(scale_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From 2095c9abde4bded42306b9cb8315a3b875448d08 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:01:11 +0000 Subject: [PATCH 023/118] Changing vulkan file directory --- libavfilter/vf_scdet_vulkan.c | 412 ---------------------------------- 1 file changed, 412 deletions(-) delete mode 100644 libavfilter/vf_scdet_vulkan.c diff --git a/libavfilter/vf_scdet_vulkan.c b/libavfilter/vf_scdet_vulkan.c deleted file mode 100644 index ee2bf248a2..0000000000 --- a/libavfilter/vf_scdet_vulkan.c +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Copyright 2025 (c) Niklas Haas - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/avassert.h" -#include "libavutil/vulkan_spirv.h" -#include "libavutil/opt.h" -#include "libavutil/timestamp.h" -#include "vulkan_filter.h" - -#include "filters.h" - -typedef struct SceneDetectVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - AVBufferPool *det_buf_pool; - - double threshold; - int sc_pass; - - int nb_planes; - double prev_mafd; - AVFrame *prev; - AVFrame *cur; -} SceneDetectVulkanContext; - -typedef struct SceneDetectBuf { -#define SLICES 16 - uint32_t frame_sad[SLICES]; -} SceneDetectBuf; - -static av_cold int init_filter(AVFilterContext *ctx) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - SceneDetectVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanShader *shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format); - const int lumaonly = !(pixdesc->flags & AV_PIX_FMT_FLAG_RGB) && - (pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR); - s->nb_planes = lumaonly ? 1 : av_pix_fmt_count_planes(s->vkctx.input_format); - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "scdet", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_KHR_shader_subgroup_arithmetic" }, 1, - 32, 32, 1, - 0)); - shd = &s->shd; - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "prev_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_UINT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.input_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, { - .name = "cur_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_UINT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = av_pix_fmt_count_planes(s->vkctx.input_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, { - .name = "sad_buffer", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "uint frame_sad[];", - } - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); - - GLSLC(0, shared uint wg_sum; ); - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLF(1, const uint slice = gl_WorkGroupID.x %% %u; ,SLICES); - GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLC(1, wg_sum = 0; ); - GLSLC(1, barrier(); ); - for (int i = 0; i < s->nb_planes; i++) { - GLSLF(1, if (IS_WITHIN(pos, imageSize(cur_img[%d]))) { ,i); - GLSLF(2, uvec4 prev = imageLoad(prev_img[%d], pos); ,i); - GLSLF(2, uvec4 cur = imageLoad(cur_img[%d], pos); ,i); - GLSLC(2, uvec4 sad = abs(ivec4(cur) - ivec4(prev)); ); - GLSLC(2, uint sum = subgroupAdd(sad.x + sad.y + sad.z); ); - GLSLC(2, if (subgroupElect()) ); - GLSLC(3, atomicAdd(wg_sum, sum); ); - GLSLC(1, } ); - } - GLSLC(1, barrier(); ); - GLSLC(1, if (gl_LocalInvocationIndex == 0) ); - GLSLC(2, atomicAdd(frame_sad[slice], wg_sum); ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static double evaluate(AVFilterContext *ctx, const SceneDetectBuf *buf) -{ - SceneDetectVulkanContext *s = ctx->priv; - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->vkctx.input_format); - const AVFilterLink *inlink = ctx->inputs[0]; - uint64_t count; - double mafd, diff; - - uint64_t sad = 0; - for (int i = 0; i < SLICES; i++) - sad += buf->frame_sad[i]; - - av_assert2(s->nb_planes == 1 || !(desc->log2_chroma_w || desc->log2_chroma_h)); - count = s->nb_planes * inlink->w * inlink->h; - mafd = (double) sad * 100.0 / count / (1ULL << desc->comp[0].depth); - diff = fabs(mafd - s->prev_mafd); - s->prev_mafd = mafd; - - return av_clipf(FFMIN(mafd, diff), 0.0, 100.0); -} - -static int scdet_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) -{ - int err; - AVFilterContext *ctx = link->dst; - SceneDetectVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - VkImageView prev_views[AV_NUM_DATA_POINTERS]; - VkImageView cur_views[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier2 img_bar[8]; - int nb_img_bar = 0; - - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - FFVkExecContext *exec = NULL; - AVBufferRef *buf = NULL; - FFVkBuffer *buf_vk; - - SceneDetectBuf *sad; - double score = 0.0; - char str[64]; - - if (!s->initialized) - RET(init_filter(ctx)); - - av_frame_free(&s->prev); - s->prev = s->cur; - s->cur = av_frame_clone(in); - if (!s->prev) - goto done; - - RET(ff_vk_get_pooled_buffer(vkctx, &s->det_buf_pool, &buf, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - NULL, - sizeof(SceneDetectBuf), - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); - buf_vk = (FFVkBuffer *)buf->data; - sad = (SceneDetectBuf *) buf_vk->mapped_mem; - - exec = ff_vk_exec_get(vkctx, &s->e); - ff_vk_exec_start(vkctx, exec); - - RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->prev, - VK_PIPELINE_STAGE_2_NONE, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, prev_views, s->prev, FF_VK_REP_UINT)); - - ff_vk_shader_update_img_array(vkctx, exec, &s->shd, s->prev, prev_views, 0, 0, - VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - - ff_vk_frame_barrier(vkctx, exec, s->prev, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->cur, - VK_PIPELINE_STAGE_2_NONE, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, cur_views, s->cur, FF_VK_REP_UINT)); - - ff_vk_shader_update_img_array(vkctx, exec, &s->shd, s->cur, cur_views, 0, 1, - VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); - - ff_vk_frame_barrier(vkctx, exec, s->cur, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - /* zero buffer */ - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_NONE, - .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = buf_vk->buf, - .size = buf_vk->size, - .offset = 0, - }, - .bufferMemoryBarrierCount = 1, - }); - - vk->CmdFillBuffer(exec->buf, buf_vk->buf, 0, buf_vk->size, 0x0); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = buf_vk->buf, - .size = buf_vk->size, - .offset = 0, - }, - .bufferMemoryBarrierCount = 1, - }); - - RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd, 0, 2, 0, - buf_vk, 0, buf_vk->size, - VK_FORMAT_UNDEFINED)); - - ff_vk_exec_bind_shader(vkctx, exec, &s->shd); - - vk->CmdDispatch(exec->buf, - FFALIGN(in->width, s->shd.lg_size[0]) / s->shd.lg_size[0], - FFALIGN(in->height, s->shd.lg_size[1]) / s->shd.lg_size[1], - s->shd.lg_size[2]); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT, - .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .dstAccessMask = VK_ACCESS_HOST_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = buf_vk->buf, - .size = buf_vk->size, - .offset = 0, - }, - .bufferMemoryBarrierCount = 1, - }); - - RET(ff_vk_exec_submit(vkctx, exec)); - ff_vk_exec_wait(vkctx, exec); - score = evaluate(ctx, sad); - -done: - snprintf(str, sizeof(str), "%0.3f", s->prev_mafd); - av_dict_set(&in->metadata, "lavfi.scd.mafd", str, 0); - snprintf(str, sizeof(str), "%0.3f", score); - av_dict_set(&in->metadata, "lavfi.scd.score", str, 0); - - if (score >= s->threshold) { - const char *pts = av_ts2timestr(in->pts, &link->time_base); - av_dict_set(&in->metadata, "lavfi.scd.time", pts, 0); - av_log(ctx, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time: %s\n", - score, pts); - } - - av_buffer_unref(&buf); - if (!s->sc_pass || score >= s->threshold) - return ff_filter_frame(outlink, in); - else { - av_frame_free(&in); - return 0; - } - -fail: - if (exec) - ff_vk_exec_discard_deps(&s->vkctx, exec); - av_frame_free(&in); - av_buffer_unref(&buf); - return err; -} - -static void scdet_vulkan_uninit(AVFilterContext *avctx) -{ - SceneDetectVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - av_frame_free(&s->prev); - av_frame_free(&s->cur); - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - av_buffer_pool_uninit(&s->det_buf_pool); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -#define OFFSET(x) offsetof(SceneDetectVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) -static const AVOption scdet_vulkan_options[] = { - { "threshold", "set scene change detect threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., FLAGS }, - { "t", "set scene change detect threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., FLAGS }, - { "sc_pass", "Set the flag to pass scene change frames", OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, - { "s", "Set the flag to pass scene change frames", OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, - { NULL } -}; - -AVFILTER_DEFINE_CLASS(scdet_vulkan); - -static const AVFilterPad scdet_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &scdet_vulkan_filter_frame, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad scdet_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &ff_vk_filter_config_output, - }, -}; - -const FFFilter ff_vf_scdet_vulkan = { - .p.name = "scdet_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Detect video scene change"), - .p.priv_class = &scdet_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(SceneDetectVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &scdet_vulkan_uninit, - FILTER_INPUTS(scdet_vulkan_inputs), - FILTER_OUTPUTS(scdet_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 9599b10a1242b06e66a0b0c595f69db4ee44fb78 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:02:05 +0000 Subject: [PATCH 024/118] Changing vulkan file directory --- libavfilter/vulkan/vf_scdet_vulkan.c | 412 +++++++++++++++++++++++++++ 1 file changed, 412 insertions(+) create mode 100644 libavfilter/vulkan/vf_scdet_vulkan.c diff --git a/libavfilter/vulkan/vf_scdet_vulkan.c b/libavfilter/vulkan/vf_scdet_vulkan.c new file mode 100644 index 0000000000..7b13a2936b --- /dev/null +++ b/libavfilter/vulkan/vf_scdet_vulkan.c @@ -0,0 +1,412 @@ +/* + * Copyright 2025 (c) Niklas Haas + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "libavutil/opt.h" +#include "libavutil/timestamp.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" + +typedef struct SceneDetectVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + AVBufferPool *det_buf_pool; + + double threshold; + int sc_pass; + + int nb_planes; + double prev_mafd; + AVFrame *prev; + AVFrame *cur; +} SceneDetectVulkanContext; + +typedef struct SceneDetectBuf { +#define SLICES 16 + uint32_t frame_sad[SLICES]; +} SceneDetectBuf; + +static av_cold int init_filter(AVFilterContext *ctx) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + SceneDetectVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanShader *shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->vkctx.input_format); + const int lumaonly = !(pixdesc->flags & AV_PIX_FMT_FLAG_RGB) && + (pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR); + s->nb_planes = lumaonly ? 1 : av_pix_fmt_count_planes(s->vkctx.input_format); + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "scdet", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_KHR_shader_subgroup_arithmetic" }, 1, + 32, 32, 1, + 0)); + shd = &s->shd; + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "prev_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_UINT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, { + .name = "cur_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_UINT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = av_pix_fmt_count_planes(s->vkctx.input_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, { + .name = "sad_buffer", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "uint frame_sad[];", + } + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); + + GLSLC(0, shared uint wg_sum; ); + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLF(1, const uint slice = gl_WorkGroupID.x %% %u; ,SLICES); + GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLC(1, wg_sum = 0; ); + GLSLC(1, barrier(); ); + for (int i = 0; i < s->nb_planes; i++) { + GLSLF(1, if (IS_WITHIN(pos, imageSize(cur_img[%d]))) { ,i); + GLSLF(2, uvec4 prev = imageLoad(prev_img[%d], pos); ,i); + GLSLF(2, uvec4 cur = imageLoad(cur_img[%d], pos); ,i); + GLSLC(2, uvec4 sad = abs(ivec4(cur) - ivec4(prev)); ); + GLSLC(2, uint sum = subgroupAdd(sad.x + sad.y + sad.z); ); + GLSLC(2, if (subgroupElect()) ); + GLSLC(3, atomicAdd(wg_sum, sum); ); + GLSLC(1, } ); + } + GLSLC(1, barrier(); ); + GLSLC(1, if (gl_LocalInvocationIndex == 0) ); + GLSLC(2, atomicAdd(frame_sad[slice], wg_sum); ); + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, &s->shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, &s->shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static double evaluate(AVFilterContext *ctx, const SceneDetectBuf *buf) +{ + SceneDetectVulkanContext *s = ctx->priv; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->vkctx.input_format); + const AVFilterLink *inlink = ctx->inputs[0]; + uint64_t count; + double mafd, diff; + + uint64_t sad = 0; + for (int i = 0; i < SLICES; i++) + sad += buf->frame_sad[i]; + + av_assert2(s->nb_planes == 1 || !(desc->log2_chroma_w || desc->log2_chroma_h)); + count = s->nb_planes * inlink->w * inlink->h; + mafd = (double) sad * 100.0 / count / (1ULL << desc->comp[0].depth); + diff = fabs(mafd - s->prev_mafd); + s->prev_mafd = mafd; + + return av_clipf(FFMIN(mafd, diff), 0.0, 100.0); +} + +static int scdet_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) +{ + int err; + AVFilterContext *ctx = link->dst; + SceneDetectVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + VkImageView prev_views[AV_NUM_DATA_POINTERS]; + VkImageView cur_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[8]; + int nb_img_bar = 0; + + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + FFVkExecContext *exec = NULL; + AVBufferRef *buf = NULL; + FFVkBuffer *buf_vk; + + SceneDetectBuf *sad; + double score = 0.0; + char str[64]; + + if (!s->initialized) + RET(init_filter(ctx)); + + av_frame_free(&s->prev); + s->prev = s->cur; + s->cur = av_frame_clone(in); + if (!s->prev) + goto done; + + RET(ff_vk_get_pooled_buffer(vkctx, &s->det_buf_pool, &buf, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + NULL, + sizeof(SceneDetectBuf), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); + buf_vk = (FFVkBuffer *)buf->data; + sad = (SceneDetectBuf *) buf_vk->mapped_mem; + + exec = ff_vk_exec_get(vkctx, &s->e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->prev, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, prev_views, s->prev, FF_VK_REP_UINT)); + + ff_vk_shader_update_img_array(vkctx, exec, &s->shd, s->prev, prev_views, 0, 0, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + + ff_vk_frame_barrier(vkctx, exec, s->prev, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->cur, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, cur_views, s->cur, FF_VK_REP_UINT)); + + ff_vk_shader_update_img_array(vkctx, exec, &s->shd, s->cur, cur_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + + ff_vk_frame_barrier(vkctx, exec, s->cur, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* zero buffer */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_NONE, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buf_vk->buf, + .size = buf_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + vk->CmdFillBuffer(exec->buf, buf_vk->buf, 0, buf_vk->size, 0x0); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buf_vk->buf, + .size = buf_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + RET(ff_vk_shader_update_desc_buffer(&s->vkctx, exec, &s->shd, 0, 2, 0, + buf_vk, 0, buf_vk->size, + VK_FORMAT_UNDEFINED)); + + ff_vk_exec_bind_shader(vkctx, exec, &s->shd); + + vk->CmdDispatch(exec->buf, + FFALIGN(in->width, s->shd.lg_size[0]) / s->shd.lg_size[0], + FFALIGN(in->height, s->shd.lg_size[1]) / s->shd.lg_size[1], + s->shd.lg_size[2]); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT, + .srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buf_vk->buf, + .size = buf_vk->size, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1, + }); + + RET(ff_vk_exec_submit(vkctx, exec)); + ff_vk_exec_wait(vkctx, exec); + score = evaluate(ctx, sad); + +done: + snprintf(str, sizeof(str), "%0.3f", s->prev_mafd); + av_dict_set(&in->metadata, "lavfi.scd.mafd", str, 0); + snprintf(str, sizeof(str), "%0.3f", score); + av_dict_set(&in->metadata, "lavfi.scd.score", str, 0); + + if (score >= s->threshold) { + const char *pts = av_ts2timestr(in->pts, &link->time_base); + av_dict_set(&in->metadata, "lavfi.scd.time", pts, 0); + av_log(ctx, AV_LOG_INFO, "lavfi.scd.score: %.3f, lavfi.scd.time: %s\n", + score, pts); + } + + av_buffer_unref(&buf); + if (!s->sc_pass || score >= s->threshold) + return ff_filter_frame(outlink, in); + else { + av_frame_free(&in); + return 0; + } + +fail: + if (exec) + ff_vk_exec_discard_deps(&s->vkctx, exec); + av_frame_free(&in); + av_buffer_unref(&buf); + return err; +} + +static void scdet_vulkan_uninit(AVFilterContext *avctx) +{ + SceneDetectVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + av_frame_free(&s->prev); + av_frame_free(&s->cur); + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + av_buffer_pool_uninit(&s->det_buf_pool); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(SceneDetectVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption scdet_vulkan_options[] = { + { "threshold", "set scene change detect threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., FLAGS }, + { "t", "set scene change detect threshold", OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl = 10.}, 0, 100., FLAGS }, + { "sc_pass", "Set the flag to pass scene change frames", OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, + { "s", "Set the flag to pass scene change frames", OFFSET(sc_pass), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(scdet_vulkan); + +static const AVFilterPad scdet_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &scdet_vulkan_filter_frame, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad scdet_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_vk_filter_config_output, + }, +}; + +const FFFilter ff_vf_scdet_vulkan = { + .p.name = "scdet_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Detect video scene change"), + .p.priv_class = &scdet_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(SceneDetectVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &scdet_vulkan_uninit, + FILTER_INPUTS(scdet_vulkan_inputs), + FILTER_OUTPUTS(scdet_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From de8026b88035e5dc927e1a3c48b17ed03188e914 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:02:44 +0000 Subject: [PATCH 025/118] Changing vulkan file directory --- libavfilter/vf_transpose_vulkan.c | 275 ------------------------------ 1 file changed, 275 deletions(-) delete mode 100644 libavfilter/vf_transpose_vulkan.c diff --git a/libavfilter/vf_transpose_vulkan.c b/libavfilter/vf_transpose_vulkan.c deleted file mode 100644 index 3fe2d11cb2..0000000000 --- a/libavfilter/vf_transpose_vulkan.c +++ /dev/null @@ -1,275 +0,0 @@ -/* - * copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com> - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" - -#include "filters.h" -#include "transpose.h" -#include "video.h" - -typedef struct TransposeVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - - int dir; - int passthrough; -} TransposeVulkanContext; - -static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - TransposeVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd = &s->shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "transpose", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 1, 1, - 0)); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "input_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), - .mem_quali = "readonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "output_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 size; ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - for (int i = 0; i < planes; i++) { - GLSLC(0, ); - GLSLF(1, size = imageSize(output_images[%i]); ,i); - GLSLC(1, if (IS_WITHIN(pos, size)) { ); - if (s->dir == TRANSPOSE_CCLOCK) - GLSLF(2, vec4 res = imageLoad(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i); - else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) { - GLSLF(2, vec4 res = imageLoad(input_images[%i], ivec2(size.yx - pos.yx)); ,i); - if (s->dir == TRANSPOSE_CLOCK) - GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); ); - } else - GLSLF(2, vec4 res = imageLoad(input_images[%i], pos.yx); ,i); - GLSLF(2, imageStore(output_images[%i], pos, res); ,i); - GLSLC(1, } ); - } - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int filter_frame(AVFilterLink *inlink, AVFrame *in) -{ - int err; - AVFrame *out = NULL; - AVFilterContext *ctx = inlink->dst; - TransposeVulkanContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - - if (s->passthrough) - return ff_filter_frame(outlink, in); - - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - err = AVERROR(ENOMEM); - goto fail; - } - - if (!s->initialized) - RET(init_filter(ctx, in)); - - RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in, - VK_NULL_HANDLE, NULL, 0)); - - RET(av_frame_copy_props(out, in)); - - if (in->sample_aspect_ratio.num) - out->sample_aspect_ratio = in->sample_aspect_ratio; - else { - out->sample_aspect_ratio.num = in->sample_aspect_ratio.den; - out->sample_aspect_ratio.den = in->sample_aspect_ratio.num; - } - - av_frame_free(&in); - - return ff_filter_frame(outlink, out); - -fail: - av_frame_free(&in); - av_frame_free(&out); - return err; -} - -static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx) -{ - TransposeVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -static int config_props_output(AVFilterLink *outlink) -{ - FilterLink *outl = ff_filter_link(outlink); - AVFilterContext *avctx = outlink->src; - TransposeVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - AVFilterLink *inlink = avctx->inputs[0]; - FilterLink *inl = ff_filter_link(inlink); - - if ((inlink->w >= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) || - (inlink->w <= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) { - av_log(avctx, AV_LOG_VERBOSE, - "w:%d h:%d -> w:%d h:%d (passthrough mode)\n", - inlink->w, inlink->h, inlink->w, inlink->h); - outl->hw_frames_ctx = av_buffer_ref(inl->hw_frames_ctx); - return outl->hw_frames_ctx ? 0 : AVERROR(ENOMEM); - } else { - s->passthrough = TRANSPOSE_PT_TYPE_NONE; - } - - vkctx->output_width = inlink->h; - vkctx->output_height = inlink->w; - - if (inlink->sample_aspect_ratio.num) - outlink->sample_aspect_ratio = av_div_q((AVRational) { 1, 1 }, - inlink->sample_aspect_ratio); - else - outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; - - return ff_vk_filter_config_output(outlink); -} - -#define OFFSET(x) offsetof(TransposeVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) - -static const AVOption transpose_vulkan_options[] = { - { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 7, FLAGS, .unit = "dir" }, - { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, - { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, .flags=FLAGS, .unit = "dir" }, - { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, .flags=FLAGS, .unit = "dir" }, - { "clock_flip", "rotate clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, - - { "passthrough", "do not apply transposition if the input matches the specified geometry", - OFFSET(passthrough), AV_OPT_TYPE_INT, {.i64=TRANSPOSE_PT_TYPE_NONE}, 0, INT_MAX, FLAGS, .unit = "passthrough" }, - { "none", "always apply transposition", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_NONE}, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, - { "portrait", "preserve portrait geometry", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_PORTRAIT}, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, - { "landscape", "preserve landscape geometry", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_LANDSCAPE}, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, - - { NULL } -}; - -AVFILTER_DEFINE_CLASS(transpose_vulkan); - -static const AVFilterPad transpose_vulkan_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .filter_frame = &filter_frame, - .config_props = &ff_vk_filter_config_input, - } -}; - -static const AVFilterPad transpose_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &config_props_output, - } -}; - -const FFFilter ff_vf_transpose_vulkan = { - .p.name = "transpose_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Transpose Vulkan Filter"), - .p.priv_class = &transpose_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(TransposeVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &transpose_vulkan_uninit, - FILTER_INPUTS(transpose_vulkan_inputs), - FILTER_OUTPUTS(transpose_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 42f2951e33ed10a48ab58529fcb69710693761f6 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:03:22 +0000 Subject: [PATCH 026/118] Changing vulkan file directory --- libavfilter/vulkan/vf_transpose_vulkan.c | 275 +++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 libavfilter/vulkan/vf_transpose_vulkan.c diff --git a/libavfilter/vulkan/vf_transpose_vulkan.c b/libavfilter/vulkan/vf_transpose_vulkan.c new file mode 100644 index 0000000000..1162c78588 --- /dev/null +++ b/libavfilter/vulkan/vf_transpose_vulkan.c @@ -0,0 +1,275 @@ +/* + * copyright (c) 2021 Wu Jianhua <jianhua.wu@intel.com> + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" + +#include "libavfilter/filters.h" +#include "libavfilter/transpose.h" +#include "libavfilter/video.h" + +typedef struct TransposeVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + + int dir; + int passthrough; +} TransposeVulkanContext; + +static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + TransposeVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "transpose", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 1, 1, + 0)); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "input_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "output_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 2, 0, 0)); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 size; ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + for (int i = 0; i < planes; i++) { + GLSLC(0, ); + GLSLF(1, size = imageSize(output_images[%i]); ,i); + GLSLC(1, if (IS_WITHIN(pos, size)) { ); + if (s->dir == TRANSPOSE_CCLOCK) + GLSLF(2, vec4 res = imageLoad(input_images[%i], ivec2(size.y - pos.y, pos.x)); ,i); + else if (s->dir == TRANSPOSE_CLOCK_FLIP || s->dir == TRANSPOSE_CLOCK) { + GLSLF(2, vec4 res = imageLoad(input_images[%i], ivec2(size.yx - pos.yx)); ,i); + if (s->dir == TRANSPOSE_CLOCK) + GLSLC(2, pos = ivec2(pos.x, size.y - pos.y); ); + } else + GLSLF(2, vec4 res = imageLoad(input_images[%i], pos.yx); ,i); + GLSLF(2, imageStore(output_images[%i], pos, res); ,i); + GLSLC(1, } ); + } + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + int err; + AVFrame *out = NULL; + AVFilterContext *ctx = inlink->dst; + TransposeVulkanContext *s = ctx->priv; + AVFilterLink *outlink = ctx->outputs[0]; + + if (s->passthrough) + return ff_filter_frame(outlink, in); + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) + RET(init_filter(ctx, in)); + + RET(ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, out, in, + VK_NULL_HANDLE, NULL, 0)); + + RET(av_frame_copy_props(out, in)); + + if (in->sample_aspect_ratio.num) + out->sample_aspect_ratio = in->sample_aspect_ratio; + else { + out->sample_aspect_ratio.num = in->sample_aspect_ratio.den; + out->sample_aspect_ratio.den = in->sample_aspect_ratio.num; + } + + av_frame_free(&in); + + return ff_filter_frame(outlink, out); + +fail: + av_frame_free(&in); + av_frame_free(&out); + return err; +} + +static av_cold void transpose_vulkan_uninit(AVFilterContext *avctx) +{ + TransposeVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static int config_props_output(AVFilterLink *outlink) +{ + FilterLink *outl = ff_filter_link(outlink); + AVFilterContext *avctx = outlink->src; + TransposeVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + AVFilterLink *inlink = avctx->inputs[0]; + FilterLink *inl = ff_filter_link(inlink); + + if ((inlink->w >= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) || + (inlink->w <= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) { + av_log(avctx, AV_LOG_VERBOSE, + "w:%d h:%d -> w:%d h:%d (passthrough mode)\n", + inlink->w, inlink->h, inlink->w, inlink->h); + outl->hw_frames_ctx = av_buffer_ref(inl->hw_frames_ctx); + return outl->hw_frames_ctx ? 0 : AVERROR(ENOMEM); + } else { + s->passthrough = TRANSPOSE_PT_TYPE_NONE; + } + + vkctx->output_width = inlink->h; + vkctx->output_height = inlink->w; + + if (inlink->sample_aspect_ratio.num) + outlink->sample_aspect_ratio = av_div_q((AVRational) { 1, 1 }, + inlink->sample_aspect_ratio); + else + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + + return ff_vk_filter_config_output(outlink); +} + +#define OFFSET(x) offsetof(TransposeVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + +static const AVOption transpose_vulkan_options[] = { + { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 7, FLAGS, .unit = "dir" }, + { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, + { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK }, .flags=FLAGS, .unit = "dir" }, + { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK }, .flags=FLAGS, .unit = "dir" }, + { "clock_flip", "rotate clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP }, .flags=FLAGS, .unit = "dir" }, + + { "passthrough", "do not apply transposition if the input matches the specified geometry", + OFFSET(passthrough), AV_OPT_TYPE_INT, {.i64=TRANSPOSE_PT_TYPE_NONE}, 0, INT_MAX, FLAGS, .unit = "passthrough" }, + { "none", "always apply transposition", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_NONE}, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, + { "portrait", "preserve portrait geometry", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_PORTRAIT}, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, + { "landscape", "preserve landscape geometry", 0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_LANDSCAPE}, INT_MIN, INT_MAX, FLAGS, .unit = "passthrough" }, + + { NULL } +}; + +AVFILTER_DEFINE_CLASS(transpose_vulkan); + +static const AVFilterPad transpose_vulkan_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = &filter_frame, + .config_props = &ff_vk_filter_config_input, + } +}; + +static const AVFilterPad transpose_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &config_props_output, + } +}; + +const FFFilter ff_vf_transpose_vulkan = { + .p.name = "transpose_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Transpose Vulkan Filter"), + .p.priv_class = &transpose_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(TransposeVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &transpose_vulkan_uninit, + FILTER_INPUTS(transpose_vulkan_inputs), + FILTER_OUTPUTS(transpose_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From d41036fd0ca230358cf26875df57e716f3306197 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:04:00 +0000 Subject: [PATCH 027/118] Changing vulkan file directory --- libavfilter/vf_xfade_vulkan.c | 725 ---------------------------------- 1 file changed, 725 deletions(-) delete mode 100644 libavfilter/vf_xfade_vulkan.c diff --git a/libavfilter/vf_xfade_vulkan.c b/libavfilter/vf_xfade_vulkan.c deleted file mode 100644 index 58e8797733..0000000000 --- a/libavfilter/vf_xfade_vulkan.c +++ /dev/null @@ -1,725 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/avassert.h" -#include "libavutil/random_seed.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" -#include "filters.h" -#include "video.h" - -#define IN_A 0 -#define IN_B 1 -#define IN_NB 2 - -typedef struct XFadeParameters { - float progress; -} XFadeParameters; - -typedef struct XFadeVulkanContext { - FFVulkanContext vkctx; - - int transition; - int64_t duration; - int64_t offset; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - VkSampler sampler; - - // PTS when the fade should start (in IN_A timebase) - int64_t start_pts; - - // PTS offset between IN_A and IN_B - int64_t inputs_offset_pts; - - // Duration of the transition - int64_t duration_pts; - - // Current PTS of the first input (IN_A) - int64_t pts; - - // If frames are currently just passed through - // unmodified, like before and after the actual - // transition. - int passthrough; - - int status[IN_NB]; -} XFadeVulkanContext; - -enum XFadeTransitions { - FADE, - WIPELEFT, - WIPERIGHT, - WIPEUP, - WIPEDOWN, - SLIDEDOWN, - SLIDEUP, - SLIDELEFT, - SLIDERIGHT, - CIRCLEOPEN, - CIRCLECLOSE, - DISSOLVE, - PIXELIZE, - WIPETL, - WIPETR, - WIPEBL, - WIPEBR, - NB_TRANSITIONS, -}; - -static const char transition_fade[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, mix(a, b, progress)); ) - C(0, } ) -}; - -static const char transition_wipeleft[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, int s = int(size.x * (1.0 - progress)); ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, pos.x > s ? b : a); ) - C(0, } ) -}; - -static const char transition_wiperight[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, int s = int(size.x * progress); ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, pos.x > s ? a : b); ) - C(0, } ) -}; - -static const char transition_wipeup[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, int s = int(size.y * (1.0 - progress)); ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, pos.y > s ? b : a); ) - C(0, } ) -}; - -static const char transition_wipedown[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, int s = int(size.y * progress); ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, pos.y > s ? a : b); ) - C(0, } ) -}; - -#define SHADER_SLIDE_COMMON \ - C(0, void slide(int idx, ivec2 pos, float progress, ivec2 direction) ) \ - C(0, { ) \ - C(1, ivec2 size = imageSize(output_images[idx]); ) \ - C(1, ivec2 pi = ivec2(progress * size); ) \ - C(1, ivec2 p = pos + pi * direction; ) \ - C(1, ivec2 f = p % size; ) \ - C(1, f = f + size * ivec2(f.x < 0, f.y < 0); ) \ - C(1, vec4 a = texture(a_images[idx], f); ) \ - C(1, vec4 b = texture(b_images[idx], f); ) \ - C(1, vec4 r = (p.y >= 0 && p.x >= 0 && size.y > p.y && size.x > p.x) ? a : b; ) \ - C(1, imageStore(output_images[idx], pos, r); ) \ - C(0, } ) - -static const char transition_slidedown[] = { - SHADER_SLIDE_COMMON - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, slide(idx, pos, progress, ivec2(0, -1)); ) - C(0, } ) -}; - -static const char transition_slideup[] = { - SHADER_SLIDE_COMMON - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, slide(idx, pos, progress, ivec2(0, +1)); ) - C(0, } ) -}; - -static const char transition_slideleft[] = { - SHADER_SLIDE_COMMON - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, slide(idx, pos, progress, ivec2(+1, 0)); ) - C(0, } ) -}; - -static const char transition_slideright[] = { - SHADER_SLIDE_COMMON - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, slide(idx, pos, progress, ivec2(-1, 0)); ) - C(0, } ) -}; - -#define SHADER_CIRCLE_COMMON \ - C(0, void circle(int idx, ivec2 pos, float progress, bool open) ) \ - C(0, { ) \ - C(1, const ivec2 half_size = imageSize(output_images[idx]) / 2; ) \ - C(1, const float z = dot(half_size, half_size); ) \ - C(1, float p = ((open ? (1.0 - progress) : progress) - 0.5) * 3.0; ) \ - C(1, ivec2 dsize = pos - half_size; ) \ - C(1, float sm = dot(dsize, dsize) / z + p; ) \ - C(1, vec4 a = texture(a_images[idx], pos); ) \ - C(1, vec4 b = texture(b_images[idx], pos); ) \ - C(1, imageStore(output_images[idx], pos, \ - mix(open ? b : a, open ? a : b, \ - smoothstep(0.f, 1.f, sm))); ) \ - C(0, } ) - -static const char transition_circleopen[] = { - SHADER_CIRCLE_COMMON - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, circle(idx, pos, progress, true); ) - C(0, } ) -}; - -static const char transition_circleclose[] = { - SHADER_CIRCLE_COMMON - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, circle(idx, pos, progress, false); ) - C(0, } ) -}; - -#define SHADER_FRAND_FUNC \ - C(0, float frand(vec2 v) ) \ - C(0, { ) \ - C(1, return fract(sin(dot(v, vec2(12.9898, 78.233))) * 43758.545); ) \ - C(0, } ) - -static const char transition_dissolve[] = { - SHADER_FRAND_FUNC - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, float sm = frand(pos) * 2.0 + (1.0 - progress) * 2.0 - 1.5; ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, sm >= 0.5 ? a : b); ) - C(0, } ) -}; - -static const char transition_pixelize[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, float d = min(progress, 1.0 - progress); ) - C(1, float dist = ceil(d * 50.0) / 50.0; ) - C(1, float sq = 2.0 * dist * min(size.x, size.y) / 20.0; ) - C(1, float sx = dist > 0.0 ? min((floor(pos.x / sq) + 0.5) * sq, size.x - 1) : pos.x; ) - C(1, float sy = dist > 0.0 ? min((floor(pos.y / sq) + 0.5) * sq, size.y - 1) : pos.y; ) - C(1, vec4 a = texture(a_images[idx], vec2(sx, sy)); ) - C(1, vec4 b = texture(b_images[idx], vec2(sx, sy)); ) - C(1, imageStore(output_images[idx], pos, mix(a, b, progress)); ) - C(0, } ) -}; - -static const char transition_wipetl[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, float zw = size.x * (1.0 - progress); ) - C(1, float zh = size.y * (1.0 - progress); ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, (pos.y <= zh && pos.x <= zw) ? a : b); ) - C(0, } ) -}; - -static const char transition_wipetr[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, float zw = size.x * (progress); ) - C(1, float zh = size.y * (1.0 - progress); ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, (pos.y <= zh && pos.x > zw) ? a : b); ) - C(0, } ) -}; - -static const char transition_wipebl[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, float zw = size.x * (1.0 - progress); ) - C(1, float zh = size.y * (progress); ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, (pos.y > zh && pos.x <= zw) ? a : b); ) - C(0, } ) -}; - -static const char transition_wipebr[] = { - C(0, void transition(int idx, ivec2 pos, float progress) ) - C(0, { ) - C(1, ivec2 size = imageSize(output_images[idx]); ) - C(1, float zw = size.x * (progress); ) - C(1, float zh = size.y * (progress); ) - C(1, vec4 a = texture(a_images[idx], pos); ) - C(1, vec4 b = texture(b_images[idx], pos); ) - C(1, imageStore(output_images[idx], pos, (pos.y > zh && pos.x > zw) ? a : b); ) - C(0, } ) -}; - -static const char* transitions_map[NB_TRANSITIONS] = { - [FADE] = transition_fade, - [WIPELEFT] = transition_wipeleft, - [WIPERIGHT] = transition_wiperight, - [WIPEUP] = transition_wipeup, - [WIPEDOWN] = transition_wipedown, - [SLIDEDOWN] = transition_slidedown, - [SLIDEUP] = transition_slideup, - [SLIDELEFT] = transition_slideleft, - [SLIDERIGHT] = transition_slideright, - [CIRCLEOPEN] = transition_circleopen, - [CIRCLECLOSE] = transition_circleclose, - [DISSOLVE] = transition_dissolve, - [PIXELIZE] = transition_pixelize, - [WIPETL] = transition_wipetl, - [WIPETR] = transition_wipetr, - [WIPEBL] = transition_wipebl, - [WIPEBR] = transition_wipebr, -}; - -static av_cold int init_vulkan(AVFilterContext *avctx) -{ - int err = 0; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - XFadeVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd = &s->shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(avctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); - RET(ff_vk_shader_init(vkctx, &s->shd, "xfade", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 32, 1, - 0)); - - desc = (FFVulkanDescriptorSetBinding []) { - { - .name = "a_images", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), - }, - { - .name = "b_images", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), - }, - { - .name = "output_images", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, float progress; ); - GLSLC(0, }; ); - - ff_vk_shader_add_push_const(&s->shd, 0, sizeof(XFadeParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - - // Add the right transition type function to the shader - GLSLD(transitions_map[s->transition]); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - GLSLF(1, int planes = %i; ,planes); - GLSLC(1, for (int i = 0; i < planes; i++) { ); - GLSLC(2, transition(i, pos, progress); ); - GLSLC(1, } ); - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int xfade_frame(AVFilterContext *avctx, AVFrame *frame_a, AVFrame *frame_b) -{ - int err; - AVFilterLink *outlink = avctx->outputs[0]; - XFadeVulkanContext *s = avctx->priv; - float progress; - - AVFrame *output = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!output) { - err = AVERROR(ENOMEM); - goto fail; - } - - if (!s->initialized) { - AVHWFramesContext *a_fc = (AVHWFramesContext*)frame_a->hw_frames_ctx->data; - AVHWFramesContext *b_fc = (AVHWFramesContext*)frame_b->hw_frames_ctx->data; - if (a_fc->sw_format != b_fc->sw_format) { - av_log(avctx, AV_LOG_ERROR, - "Currently the sw format of the first input needs to match the second!\n"); - return AVERROR(EINVAL); - } - RET(init_vulkan(avctx)); - } - - RET(av_frame_copy_props(output, frame_a)); - output->pts = s->pts; - - progress = av_clipf((float)(s->pts - s->start_pts) / s->duration_pts, - 0.f, 1.f); - - RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, output, - (AVFrame *[]){ frame_a, frame_b }, 2, s->sampler, - &(XFadeParameters){ progress }, sizeof(XFadeParameters))); - - return ff_filter_frame(outlink, output); - -fail: - av_frame_free(&output); - return err; -} - -static int config_props_output(AVFilterLink *outlink) -{ - int err; - AVFilterContext *avctx = outlink->src; - XFadeVulkanContext *s = avctx->priv; - AVFilterLink *inlink_a = avctx->inputs[IN_A]; - AVFilterLink *inlink_b = avctx->inputs[IN_B]; - FilterLink *il = ff_filter_link(inlink_a); - FilterLink *ol = ff_filter_link(outlink); - - if (inlink_a->w != inlink_b->w || inlink_a->h != inlink_b->h) { - av_log(avctx, AV_LOG_ERROR, "First input link %s parameters " - "(size %dx%d) do not match the corresponding " - "second input link %s parameters (size %dx%d)\n", - avctx->input_pads[IN_A].name, inlink_a->w, inlink_a->h, - avctx->input_pads[IN_B].name, inlink_b->w, inlink_b->h); - return AVERROR(EINVAL); - } - - if (inlink_a->time_base.num != inlink_b->time_base.num || - inlink_a->time_base.den != inlink_b->time_base.den) { - av_log(avctx, AV_LOG_ERROR, "First input link %s timebase " - "(%d/%d) does not match the corresponding " - "second input link %s timebase (%d/%d)\n", - avctx->input_pads[IN_A].name, inlink_a->time_base.num, inlink_a->time_base.den, - avctx->input_pads[IN_B].name, inlink_b->time_base.num, inlink_b->time_base.den); - return AVERROR(EINVAL); - } - - s->start_pts = s->inputs_offset_pts = AV_NOPTS_VALUE; - - outlink->time_base = inlink_a->time_base; - ol->frame_rate = il->frame_rate; - outlink->sample_aspect_ratio = inlink_a->sample_aspect_ratio; - - if (s->duration) - s->duration_pts = av_rescale_q(s->duration, AV_TIME_BASE_Q, inlink_a->time_base); - RET(ff_vk_filter_config_output(outlink)); - -fail: - return err; -} - -static int forward_frame(XFadeVulkanContext *s, - AVFilterLink *inlink, AVFilterLink *outlink) -{ - int64_t status_pts; - int ret = 0, status; - AVFrame *frame = NULL; - - ret = ff_inlink_consume_frame(inlink, &frame); - if (ret < 0) - return ret; - - if (ret > 0) { - // If we do not have an offset yet, it's because we - // never got a first input. Just offset to 0 - if (s->inputs_offset_pts == AV_NOPTS_VALUE) - s->inputs_offset_pts = -frame->pts; - - // We got a frame, nothing to do other than adjusting the timestamp - frame->pts += s->inputs_offset_pts; - return ff_filter_frame(outlink, frame); - } - - // Forward status with our timestamp - if (ff_inlink_acknowledge_status(inlink, &status, &status_pts)) { - if (s->inputs_offset_pts == AV_NOPTS_VALUE) - s->inputs_offset_pts = -status_pts; - - ff_outlink_set_status(outlink, status, status_pts + s->inputs_offset_pts); - return 0; - } - - // No frame available, request one if needed - if (ff_outlink_frame_wanted(outlink)) - ff_inlink_request_frame(inlink); - - return 0; -} - -static int activate(AVFilterContext *avctx) -{ - XFadeVulkanContext *s = avctx->priv; - AVFilterLink *in_a = avctx->inputs[IN_A]; - AVFilterLink *in_b = avctx->inputs[IN_B]; - AVFilterLink *outlink = avctx->outputs[0]; - int64_t status_pts; - - FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx); - - // Check if we already transitioned or IN_A ended prematurely, - // in which case just forward the frames from IN_B with adjusted - // timestamps until EOF. - if (s->status[IN_A] && !s->status[IN_B]) - return forward_frame(s, in_b, outlink); - - // We did not finish transitioning yet and the first stream - // did not end either, so check if there are more frames to consume. - if (ff_inlink_check_available_frame(in_a)) { - AVFrame *peeked_frame = ff_inlink_peek_frame(in_a, 0); - s->pts = peeked_frame->pts; - - if (s->start_pts == AV_NOPTS_VALUE) - s->start_pts = - s->pts + av_rescale_q(s->offset, AV_TIME_BASE_Q, in_a->time_base); - - // Check if we are not yet transitioning, in which case - // just request and forward the input frame. - if (s->start_pts > s->pts) { - AVFrame *frame_a = NULL; - s->passthrough = 1; - ff_inlink_consume_frame(in_a, &frame_a); - return ff_filter_frame(outlink, frame_a); - } - s->passthrough = 0; - - // We are transitioning, so we need a frame from IN_B - if (ff_inlink_check_available_frame(in_b)) { - int ret; - AVFrame *frame_a = NULL, *frame_b = NULL; - ff_inlink_consume_frame(avctx->inputs[IN_A], &frame_a); - ff_inlink_consume_frame(avctx->inputs[IN_B], &frame_b); - - // Calculate PTS offset to first input - if (s->inputs_offset_pts == AV_NOPTS_VALUE) - s->inputs_offset_pts = s->pts - frame_b->pts; - - // Check if we finished transitioning, in which case we - // report back EOF to IN_A as it is no longer needed. - if (s->pts - s->start_pts > s->duration_pts) { - s->status[IN_A] = AVERROR_EOF; - ff_inlink_set_status(in_a, AVERROR_EOF); - s->passthrough = 1; - } - ret = xfade_frame(avctx, frame_a, frame_b); - av_frame_free(&frame_a); - av_frame_free(&frame_b); - return ret; - } - - // We did not get a frame from IN_B, check its status. - if (ff_inlink_acknowledge_status(in_b, &s->status[IN_B], &status_pts)) { - // We should transition, but IN_B is EOF so just report EOF output now. - ff_outlink_set_status(outlink, s->status[IN_B], s->pts); - return 0; - } - - // We did not get a frame for IN_B but no EOF either, so just request more. - if (ff_outlink_frame_wanted(outlink)) { - ff_inlink_request_frame(in_b); - return 0; - } - } - - // We did not get a frame from IN_A, check its status. - if (ff_inlink_acknowledge_status(in_a, &s->status[IN_A], &status_pts)) { - // No more frames from IN_A, do not report EOF though, we will just - // forward the IN_B frames in the next activate calls. - s->passthrough = 1; - ff_filter_set_ready(avctx, 100); - return 0; - } - - // We have no frames yet from IN_A and no EOF, so request some. - if (ff_outlink_frame_wanted(outlink)) { - ff_inlink_request_frame(in_a); - return 0; - } - - return FFERROR_NOT_READY; -} - -static av_cold void uninit(AVFilterContext *avctx) -{ - XFadeVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - if (s->sampler) - vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, - vkctx->hwctx->alloc); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h) -{ - XFadeVulkanContext *s = inlink->dst->priv; - - return s->passthrough ? - ff_null_get_video_buffer (inlink, w, h) : - ff_default_get_video_buffer(inlink, w, h); -} - -#define OFFSET(x) offsetof(XFadeVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) - -static const AVOption xfade_vulkan_options[] = { - { "transition", "set cross fade transition", OFFSET(transition), AV_OPT_TYPE_INT, {.i64=FADE}, 0, NB_TRANSITIONS-1, FLAGS, .unit = "transition" }, - { "fade", "fade transition", 0, AV_OPT_TYPE_CONST, {.i64=FADE}, 0, 0, FLAGS, .unit = "transition" }, - { "wipeleft", "wipe left transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPELEFT}, 0, 0, FLAGS, .unit = "transition" }, - { "wiperight", "wipe right transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPERIGHT}, 0, 0, FLAGS, .unit = "transition" }, - { "wipeup", "wipe up transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPEUP}, 0, 0, FLAGS, .unit = "transition" }, - { "wipedown", "wipe down transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPEDOWN}, 0, 0, FLAGS, .unit = "transition" }, - { "slidedown", "slide down transition", 0, AV_OPT_TYPE_CONST, {.i64=SLIDEDOWN}, 0, 0, FLAGS, .unit = "transition" }, - { "slideup", "slide up transition", 0, AV_OPT_TYPE_CONST, {.i64=SLIDEUP}, 0, 0, FLAGS, .unit = "transition" }, - { "slideleft", "slide left transition", 0, AV_OPT_TYPE_CONST, {.i64=SLIDELEFT}, 0, 0, FLAGS, .unit = "transition" }, - { "slideright", "slide right transition", 0, AV_OPT_TYPE_CONST, {.i64=SLIDERIGHT}, 0, 0, FLAGS, .unit = "transition" }, - { "circleopen", "circleopen transition", 0, AV_OPT_TYPE_CONST, {.i64=CIRCLEOPEN}, 0, 0, FLAGS, .unit = "transition" }, - { "circleclose", "circleclose transition", 0, AV_OPT_TYPE_CONST, {.i64=CIRCLECLOSE}, 0, 0, FLAGS, .unit = "transition" }, - { "dissolve", "dissolve transition", 0, AV_OPT_TYPE_CONST, {.i64=DISSOLVE}, 0, 0, FLAGS, .unit = "transition" }, - { "pixelize", "pixelize transition", 0, AV_OPT_TYPE_CONST, {.i64=PIXELIZE}, 0, 0, FLAGS, .unit = "transition" }, - { "wipetl", "wipe top left transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPETL}, 0, 0, FLAGS, .unit = "transition" }, - { "wipetr", "wipe top right transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPETR}, 0, 0, FLAGS, .unit = "transition" }, - { "wipebl", "wipe bottom left transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPEBL}, 0, 0, FLAGS, .unit = "transition" }, - { "wipebr", "wipe bottom right transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPEBR}, 0, 0, FLAGS, .unit = "transition" }, - { "duration", "set cross fade duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=1000000}, 0, 60000000, FLAGS }, - { "offset", "set cross fade start relative to first input stream", OFFSET(offset), AV_OPT_TYPE_DURATION, {.i64=0}, INT64_MIN, INT64_MAX, FLAGS }, - { NULL } -}; - -AVFILTER_DEFINE_CLASS(xfade_vulkan); - -static const AVFilterPad xfade_vulkan_inputs[] = { - { - .name = "main", - .type = AVMEDIA_TYPE_VIDEO, - .get_buffer.video = &get_video_buffer, - .config_props = &ff_vk_filter_config_input, - }, - { - .name = "xfade", - .type = AVMEDIA_TYPE_VIDEO, - .get_buffer.video = &get_video_buffer, - .config_props = &ff_vk_filter_config_input, - }, -}; - -static const AVFilterPad xfade_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = &config_props_output, - }, -}; - -const FFFilter ff_vf_xfade_vulkan = { - .p.name = "xfade_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Cross fade one video with another video."), - .p.priv_class = &xfade_vulkan_class, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .priv_size = sizeof(XFadeVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &uninit, - .activate = &activate, - FILTER_INPUTS(xfade_vulkan_inputs), - FILTER_OUTPUTS(xfade_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 8de5912697bec07602562495fac65ed63b2b9e7c Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:04:41 +0000 Subject: [PATCH 028/118] Changing vulkan file directory --- libavfilter/vulkan/vf_xfade_vulkan.c | 725 +++++++++++++++++++++++++++ 1 file changed, 725 insertions(+) create mode 100644 libavfilter/vulkan/vf_xfade_vulkan.c diff --git a/libavfilter/vulkan/vf_xfade_vulkan.c b/libavfilter/vulkan/vf_xfade_vulkan.c new file mode 100644 index 0000000000..b125ae4d12 --- /dev/null +++ b/libavfilter/vulkan/vf_xfade_vulkan.c @@ -0,0 +1,725 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/random_seed.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" +#include "libavfilter/filters.h" +#include "libavfilter/video.h" + +#define IN_A 0 +#define IN_B 1 +#define IN_NB 2 + +typedef struct XFadeParameters { + float progress; +} XFadeParameters; + +typedef struct XFadeVulkanContext { + FFVulkanContext vkctx; + + int transition; + int64_t duration; + int64_t offset; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + VkSampler sampler; + + // PTS when the fade should start (in IN_A timebase) + int64_t start_pts; + + // PTS offset between IN_A and IN_B + int64_t inputs_offset_pts; + + // Duration of the transition + int64_t duration_pts; + + // Current PTS of the first input (IN_A) + int64_t pts; + + // If frames are currently just passed through + // unmodified, like before and after the actual + // transition. + int passthrough; + + int status[IN_NB]; +} XFadeVulkanContext; + +enum XFadeTransitions { + FADE, + WIPELEFT, + WIPERIGHT, + WIPEUP, + WIPEDOWN, + SLIDEDOWN, + SLIDEUP, + SLIDELEFT, + SLIDERIGHT, + CIRCLEOPEN, + CIRCLECLOSE, + DISSOLVE, + PIXELIZE, + WIPETL, + WIPETR, + WIPEBL, + WIPEBR, + NB_TRANSITIONS, +}; + +static const char transition_fade[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, mix(a, b, progress)); ) + C(0, } ) +}; + +static const char transition_wipeleft[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, int s = int(size.x * (1.0 - progress)); ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, pos.x > s ? b : a); ) + C(0, } ) +}; + +static const char transition_wiperight[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, int s = int(size.x * progress); ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, pos.x > s ? a : b); ) + C(0, } ) +}; + +static const char transition_wipeup[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, int s = int(size.y * (1.0 - progress)); ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, pos.y > s ? b : a); ) + C(0, } ) +}; + +static const char transition_wipedown[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, int s = int(size.y * progress); ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, pos.y > s ? a : b); ) + C(0, } ) +}; + +#define SHADER_SLIDE_COMMON \ + C(0, void slide(int idx, ivec2 pos, float progress, ivec2 direction) ) \ + C(0, { ) \ + C(1, ivec2 size = imageSize(output_images[idx]); ) \ + C(1, ivec2 pi = ivec2(progress * size); ) \ + C(1, ivec2 p = pos + pi * direction; ) \ + C(1, ivec2 f = p % size; ) \ + C(1, f = f + size * ivec2(f.x < 0, f.y < 0); ) \ + C(1, vec4 a = texture(a_images[idx], f); ) \ + C(1, vec4 b = texture(b_images[idx], f); ) \ + C(1, vec4 r = (p.y >= 0 && p.x >= 0 && size.y > p.y && size.x > p.x) ? a : b; ) \ + C(1, imageStore(output_images[idx], pos, r); ) \ + C(0, } ) + +static const char transition_slidedown[] = { + SHADER_SLIDE_COMMON + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, slide(idx, pos, progress, ivec2(0, -1)); ) + C(0, } ) +}; + +static const char transition_slideup[] = { + SHADER_SLIDE_COMMON + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, slide(idx, pos, progress, ivec2(0, +1)); ) + C(0, } ) +}; + +static const char transition_slideleft[] = { + SHADER_SLIDE_COMMON + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, slide(idx, pos, progress, ivec2(+1, 0)); ) + C(0, } ) +}; + +static const char transition_slideright[] = { + SHADER_SLIDE_COMMON + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, slide(idx, pos, progress, ivec2(-1, 0)); ) + C(0, } ) +}; + +#define SHADER_CIRCLE_COMMON \ + C(0, void circle(int idx, ivec2 pos, float progress, bool open) ) \ + C(0, { ) \ + C(1, const ivec2 half_size = imageSize(output_images[idx]) / 2; ) \ + C(1, const float z = dot(half_size, half_size); ) \ + C(1, float p = ((open ? (1.0 - progress) : progress) - 0.5) * 3.0; ) \ + C(1, ivec2 dsize = pos - half_size; ) \ + C(1, float sm = dot(dsize, dsize) / z + p; ) \ + C(1, vec4 a = texture(a_images[idx], pos); ) \ + C(1, vec4 b = texture(b_images[idx], pos); ) \ + C(1, imageStore(output_images[idx], pos, \ + mix(open ? b : a, open ? a : b, \ + smoothstep(0.f, 1.f, sm))); ) \ + C(0, } ) + +static const char transition_circleopen[] = { + SHADER_CIRCLE_COMMON + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, circle(idx, pos, progress, true); ) + C(0, } ) +}; + +static const char transition_circleclose[] = { + SHADER_CIRCLE_COMMON + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, circle(idx, pos, progress, false); ) + C(0, } ) +}; + +#define SHADER_FRAND_FUNC \ + C(0, float frand(vec2 v) ) \ + C(0, { ) \ + C(1, return fract(sin(dot(v, vec2(12.9898, 78.233))) * 43758.545); ) \ + C(0, } ) + +static const char transition_dissolve[] = { + SHADER_FRAND_FUNC + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, float sm = frand(pos) * 2.0 + (1.0 - progress) * 2.0 - 1.5; ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, sm >= 0.5 ? a : b); ) + C(0, } ) +}; + +static const char transition_pixelize[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, float d = min(progress, 1.0 - progress); ) + C(1, float dist = ceil(d * 50.0) / 50.0; ) + C(1, float sq = 2.0 * dist * min(size.x, size.y) / 20.0; ) + C(1, float sx = dist > 0.0 ? min((floor(pos.x / sq) + 0.5) * sq, size.x - 1) : pos.x; ) + C(1, float sy = dist > 0.0 ? min((floor(pos.y / sq) + 0.5) * sq, size.y - 1) : pos.y; ) + C(1, vec4 a = texture(a_images[idx], vec2(sx, sy)); ) + C(1, vec4 b = texture(b_images[idx], vec2(sx, sy)); ) + C(1, imageStore(output_images[idx], pos, mix(a, b, progress)); ) + C(0, } ) +}; + +static const char transition_wipetl[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, float zw = size.x * (1.0 - progress); ) + C(1, float zh = size.y * (1.0 - progress); ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, (pos.y <= zh && pos.x <= zw) ? a : b); ) + C(0, } ) +}; + +static const char transition_wipetr[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, float zw = size.x * (progress); ) + C(1, float zh = size.y * (1.0 - progress); ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, (pos.y <= zh && pos.x > zw) ? a : b); ) + C(0, } ) +}; + +static const char transition_wipebl[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, float zw = size.x * (1.0 - progress); ) + C(1, float zh = size.y * (progress); ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, (pos.y > zh && pos.x <= zw) ? a : b); ) + C(0, } ) +}; + +static const char transition_wipebr[] = { + C(0, void transition(int idx, ivec2 pos, float progress) ) + C(0, { ) + C(1, ivec2 size = imageSize(output_images[idx]); ) + C(1, float zw = size.x * (progress); ) + C(1, float zh = size.y * (progress); ) + C(1, vec4 a = texture(a_images[idx], pos); ) + C(1, vec4 b = texture(b_images[idx], pos); ) + C(1, imageStore(output_images[idx], pos, (pos.y > zh && pos.x > zw) ? a : b); ) + C(0, } ) +}; + +static const char* transitions_map[NB_TRANSITIONS] = { + [FADE] = transition_fade, + [WIPELEFT] = transition_wipeleft, + [WIPERIGHT] = transition_wiperight, + [WIPEUP] = transition_wipeup, + [WIPEDOWN] = transition_wipedown, + [SLIDEDOWN] = transition_slidedown, + [SLIDEUP] = transition_slideup, + [SLIDELEFT] = transition_slideleft, + [SLIDERIGHT] = transition_slideright, + [CIRCLEOPEN] = transition_circleopen, + [CIRCLECLOSE] = transition_circleclose, + [DISSOLVE] = transition_dissolve, + [PIXELIZE] = transition_pixelize, + [WIPETL] = transition_wipetl, + [WIPETR] = transition_wipetr, + [WIPEBL] = transition_wipebl, + [WIPEBR] = transition_wipebr, +}; + +static av_cold int init_vulkan(AVFilterContext *avctx) +{ + int err = 0; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + XFadeVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(avctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); + RET(ff_vk_shader_init(vkctx, &s->shd, "xfade", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 32, 1, + 0)); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "a_images", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "b_images", + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .samplers = DUP_SAMPLER(s->sampler), + }, + { + .name = "output_images", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc, 3, 0, 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, float progress; ); + GLSLC(0, }; ); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(XFadeParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + // Add the right transition type function to the shader + GLSLD(transitions_map[s->transition]); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + GLSLF(1, int planes = %i; ,planes); + GLSLC(1, for (int i = 0; i < planes; i++) { ); + GLSLC(2, transition(i, pos, progress); ); + GLSLC(1, } ); + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int xfade_frame(AVFilterContext *avctx, AVFrame *frame_a, AVFrame *frame_b) +{ + int err; + AVFilterLink *outlink = avctx->outputs[0]; + XFadeVulkanContext *s = avctx->priv; + float progress; + + AVFrame *output = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!output) { + err = AVERROR(ENOMEM); + goto fail; + } + + if (!s->initialized) { + AVHWFramesContext *a_fc = (AVHWFramesContext*)frame_a->hw_frames_ctx->data; + AVHWFramesContext *b_fc = (AVHWFramesContext*)frame_b->hw_frames_ctx->data; + if (a_fc->sw_format != b_fc->sw_format) { + av_log(avctx, AV_LOG_ERROR, + "Currently the sw format of the first input needs to match the second!\n"); + return AVERROR(EINVAL); + } + RET(init_vulkan(avctx)); + } + + RET(av_frame_copy_props(output, frame_a)); + output->pts = s->pts; + + progress = av_clipf((float)(s->pts - s->start_pts) / s->duration_pts, + 0.f, 1.f); + + RET(ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, output, + (AVFrame *[]){ frame_a, frame_b }, 2, s->sampler, + &(XFadeParameters){ progress }, sizeof(XFadeParameters))); + + return ff_filter_frame(outlink, output); + +fail: + av_frame_free(&output); + return err; +} + +static int config_props_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + XFadeVulkanContext *s = avctx->priv; + AVFilterLink *inlink_a = avctx->inputs[IN_A]; + AVFilterLink *inlink_b = avctx->inputs[IN_B]; + FilterLink *il = ff_filter_link(inlink_a); + FilterLink *ol = ff_filter_link(outlink); + + if (inlink_a->w != inlink_b->w || inlink_a->h != inlink_b->h) { + av_log(avctx, AV_LOG_ERROR, "First input link %s parameters " + "(size %dx%d) do not match the corresponding " + "second input link %s parameters (size %dx%d)\n", + avctx->input_pads[IN_A].name, inlink_a->w, inlink_a->h, + avctx->input_pads[IN_B].name, inlink_b->w, inlink_b->h); + return AVERROR(EINVAL); + } + + if (inlink_a->time_base.num != inlink_b->time_base.num || + inlink_a->time_base.den != inlink_b->time_base.den) { + av_log(avctx, AV_LOG_ERROR, "First input link %s timebase " + "(%d/%d) does not match the corresponding " + "second input link %s timebase (%d/%d)\n", + avctx->input_pads[IN_A].name, inlink_a->time_base.num, inlink_a->time_base.den, + avctx->input_pads[IN_B].name, inlink_b->time_base.num, inlink_b->time_base.den); + return AVERROR(EINVAL); + } + + s->start_pts = s->inputs_offset_pts = AV_NOPTS_VALUE; + + outlink->time_base = inlink_a->time_base; + ol->frame_rate = il->frame_rate; + outlink->sample_aspect_ratio = inlink_a->sample_aspect_ratio; + + if (s->duration) + s->duration_pts = av_rescale_q(s->duration, AV_TIME_BASE_Q, inlink_a->time_base); + RET(ff_vk_filter_config_output(outlink)); + +fail: + return err; +} + +static int forward_frame(XFadeVulkanContext *s, + AVFilterLink *inlink, AVFilterLink *outlink) +{ + int64_t status_pts; + int ret = 0, status; + AVFrame *frame = NULL; + + ret = ff_inlink_consume_frame(inlink, &frame); + if (ret < 0) + return ret; + + if (ret > 0) { + // If we do not have an offset yet, it's because we + // never got a first input. Just offset to 0 + if (s->inputs_offset_pts == AV_NOPTS_VALUE) + s->inputs_offset_pts = -frame->pts; + + // We got a frame, nothing to do other than adjusting the timestamp + frame->pts += s->inputs_offset_pts; + return ff_filter_frame(outlink, frame); + } + + // Forward status with our timestamp + if (ff_inlink_acknowledge_status(inlink, &status, &status_pts)) { + if (s->inputs_offset_pts == AV_NOPTS_VALUE) + s->inputs_offset_pts = -status_pts; + + ff_outlink_set_status(outlink, status, status_pts + s->inputs_offset_pts); + return 0; + } + + // No frame available, request one if needed + if (ff_outlink_frame_wanted(outlink)) + ff_inlink_request_frame(inlink); + + return 0; +} + +static int activate(AVFilterContext *avctx) +{ + XFadeVulkanContext *s = avctx->priv; + AVFilterLink *in_a = avctx->inputs[IN_A]; + AVFilterLink *in_b = avctx->inputs[IN_B]; + AVFilterLink *outlink = avctx->outputs[0]; + int64_t status_pts; + + FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx); + + // Check if we already transitioned or IN_A ended prematurely, + // in which case just forward the frames from IN_B with adjusted + // timestamps until EOF. + if (s->status[IN_A] && !s->status[IN_B]) + return forward_frame(s, in_b, outlink); + + // We did not finish transitioning yet and the first stream + // did not end either, so check if there are more frames to consume. + if (ff_inlink_check_available_frame(in_a)) { + AVFrame *peeked_frame = ff_inlink_peek_frame(in_a, 0); + s->pts = peeked_frame->pts; + + if (s->start_pts == AV_NOPTS_VALUE) + s->start_pts = + s->pts + av_rescale_q(s->offset, AV_TIME_BASE_Q, in_a->time_base); + + // Check if we are not yet transitioning, in which case + // just request and forward the input frame. + if (s->start_pts > s->pts) { + AVFrame *frame_a = NULL; + s->passthrough = 1; + ff_inlink_consume_frame(in_a, &frame_a); + return ff_filter_frame(outlink, frame_a); + } + s->passthrough = 0; + + // We are transitioning, so we need a frame from IN_B + if (ff_inlink_check_available_frame(in_b)) { + int ret; + AVFrame *frame_a = NULL, *frame_b = NULL; + ff_inlink_consume_frame(avctx->inputs[IN_A], &frame_a); + ff_inlink_consume_frame(avctx->inputs[IN_B], &frame_b); + + // Calculate PTS offset to first input + if (s->inputs_offset_pts == AV_NOPTS_VALUE) + s->inputs_offset_pts = s->pts - frame_b->pts; + + // Check if we finished transitioning, in which case we + // report back EOF to IN_A as it is no longer needed. + if (s->pts - s->start_pts > s->duration_pts) { + s->status[IN_A] = AVERROR_EOF; + ff_inlink_set_status(in_a, AVERROR_EOF); + s->passthrough = 1; + } + ret = xfade_frame(avctx, frame_a, frame_b); + av_frame_free(&frame_a); + av_frame_free(&frame_b); + return ret; + } + + // We did not get a frame from IN_B, check its status. + if (ff_inlink_acknowledge_status(in_b, &s->status[IN_B], &status_pts)) { + // We should transition, but IN_B is EOF so just report EOF output now. + ff_outlink_set_status(outlink, s->status[IN_B], s->pts); + return 0; + } + + // We did not get a frame for IN_B but no EOF either, so just request more. + if (ff_outlink_frame_wanted(outlink)) { + ff_inlink_request_frame(in_b); + return 0; + } + } + + // We did not get a frame from IN_A, check its status. + if (ff_inlink_acknowledge_status(in_a, &s->status[IN_A], &status_pts)) { + // No more frames from IN_A, do not report EOF though, we will just + // forward the IN_B frames in the next activate calls. + s->passthrough = 1; + ff_filter_set_ready(avctx, 100); + return 0; + } + + // We have no frames yet from IN_A and no EOF, so request some. + if (ff_outlink_frame_wanted(outlink)) { + ff_inlink_request_frame(in_a); + return 0; + } + + return FFERROR_NOT_READY; +} + +static av_cold void uninit(AVFilterContext *avctx) +{ + XFadeVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + if (s->sampler) + vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, + vkctx->hwctx->alloc); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h) +{ + XFadeVulkanContext *s = inlink->dst->priv; + + return s->passthrough ? + ff_null_get_video_buffer (inlink, w, h) : + ff_default_get_video_buffer(inlink, w, h); +} + +#define OFFSET(x) offsetof(XFadeVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + +static const AVOption xfade_vulkan_options[] = { + { "transition", "set cross fade transition", OFFSET(transition), AV_OPT_TYPE_INT, {.i64=FADE}, 0, NB_TRANSITIONS-1, FLAGS, .unit = "transition" }, + { "fade", "fade transition", 0, AV_OPT_TYPE_CONST, {.i64=FADE}, 0, 0, FLAGS, .unit = "transition" }, + { "wipeleft", "wipe left transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPELEFT}, 0, 0, FLAGS, .unit = "transition" }, + { "wiperight", "wipe right transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPERIGHT}, 0, 0, FLAGS, .unit = "transition" }, + { "wipeup", "wipe up transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPEUP}, 0, 0, FLAGS, .unit = "transition" }, + { "wipedown", "wipe down transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPEDOWN}, 0, 0, FLAGS, .unit = "transition" }, + { "slidedown", "slide down transition", 0, AV_OPT_TYPE_CONST, {.i64=SLIDEDOWN}, 0, 0, FLAGS, .unit = "transition" }, + { "slideup", "slide up transition", 0, AV_OPT_TYPE_CONST, {.i64=SLIDEUP}, 0, 0, FLAGS, .unit = "transition" }, + { "slideleft", "slide left transition", 0, AV_OPT_TYPE_CONST, {.i64=SLIDELEFT}, 0, 0, FLAGS, .unit = "transition" }, + { "slideright", "slide right transition", 0, AV_OPT_TYPE_CONST, {.i64=SLIDERIGHT}, 0, 0, FLAGS, .unit = "transition" }, + { "circleopen", "circleopen transition", 0, AV_OPT_TYPE_CONST, {.i64=CIRCLEOPEN}, 0, 0, FLAGS, .unit = "transition" }, + { "circleclose", "circleclose transition", 0, AV_OPT_TYPE_CONST, {.i64=CIRCLECLOSE}, 0, 0, FLAGS, .unit = "transition" }, + { "dissolve", "dissolve transition", 0, AV_OPT_TYPE_CONST, {.i64=DISSOLVE}, 0, 0, FLAGS, .unit = "transition" }, + { "pixelize", "pixelize transition", 0, AV_OPT_TYPE_CONST, {.i64=PIXELIZE}, 0, 0, FLAGS, .unit = "transition" }, + { "wipetl", "wipe top left transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPETL}, 0, 0, FLAGS, .unit = "transition" }, + { "wipetr", "wipe top right transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPETR}, 0, 0, FLAGS, .unit = "transition" }, + { "wipebl", "wipe bottom left transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPEBL}, 0, 0, FLAGS, .unit = "transition" }, + { "wipebr", "wipe bottom right transition", 0, AV_OPT_TYPE_CONST, {.i64=WIPEBR}, 0, 0, FLAGS, .unit = "transition" }, + { "duration", "set cross fade duration", OFFSET(duration), AV_OPT_TYPE_DURATION, {.i64=1000000}, 0, 60000000, FLAGS }, + { "offset", "set cross fade start relative to first input stream", OFFSET(offset), AV_OPT_TYPE_DURATION, {.i64=0}, INT64_MIN, INT64_MAX, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(xfade_vulkan); + +static const AVFilterPad xfade_vulkan_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .get_buffer.video = &get_video_buffer, + .config_props = &ff_vk_filter_config_input, + }, + { + .name = "xfade", + .type = AVMEDIA_TYPE_VIDEO, + .get_buffer.video = &get_video_buffer, + .config_props = &ff_vk_filter_config_input, + }, +}; + +static const AVFilterPad xfade_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &config_props_output, + }, +}; + +const FFFilter ff_vf_xfade_vulkan = { + .p.name = "xfade_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Cross fade one video with another video."), + .p.priv_class = &xfade_vulkan_class, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .priv_size = sizeof(XFadeVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &uninit, + .activate = &activate, + FILTER_INPUTS(xfade_vulkan_inputs), + FILTER_OUTPUTS(xfade_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From 3da68608435db38690f37502a661121950d0abe3 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:05:04 +0000 Subject: [PATCH 029/118] Changing vulkan file directory --- libavfilter/vsrc_testsrc_vulkan.c | 380 ------------------------------ 1 file changed, 380 deletions(-) delete mode 100644 libavfilter/vsrc_testsrc_vulkan.c diff --git a/libavfilter/vsrc_testsrc_vulkan.c b/libavfilter/vsrc_testsrc_vulkan.c deleted file mode 100644 index cb3c787213..0000000000 --- a/libavfilter/vsrc_testsrc_vulkan.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/random_seed.h" -#include "libavutil/csp.h" -#include "libavutil/opt.h" -#include "libavutil/vulkan_spirv.h" -#include "vulkan_filter.h" -#include "filters.h" -#include "colorspace.h" -#include "video.h" - -enum TestSrcVulkanMode { - TESTSRC_COLOR, -}; - -typedef struct TestSrcVulkanPushData { - float color_comp[4]; -} TestSrcVulkanPushData; - -typedef struct TestSrcVulkanContext { - FFVulkanContext vkctx; - - int initialized; - FFVkExecPool e; - AVVulkanDeviceQueueFamily *qf; - FFVulkanShader shd; - - /* Only used by color_vulkan */ - uint8_t color_rgba[4]; - - TestSrcVulkanPushData opts; - - int w, h; - int pw, ph; - char *out_format_string; - enum AVColorRange out_range; - unsigned int nb_frame; - AVRational time_base, frame_rate; - int64_t pts; - int64_t duration; ///< duration expressed in microseconds - AVRational sar; ///< sample aspect ratio - int draw_once; ///< draw only the first frame, always put out the same picture - int draw_once_reset; ///< draw only the first frame or in case of reset - AVFrame *picref; ///< cached reference containing the painted picture -} TestSrcVulkanContext; - -static av_cold int init_filter(AVFilterContext *ctx, enum TestSrcVulkanMode mode) -{ - int err; - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - TestSrcVulkanContext *s = ctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); - FFVulkanShader *shd = &s->shd; - FFVkSPIRVCompiler *spv; - FFVulkanDescriptorSetBinding *desc_set; - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->vkctx.output_format); - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); - if (!s->qf) { - av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); - err = AVERROR(ENOTSUP); - goto fail; - } - - RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_shader_init(vkctx, &s->shd, "scale", - VK_SHADER_STAGE_COMPUTE_BIT, - NULL, 0, - 32, 32, 1, - 0)); - - GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); - GLSLC(1, vec4 color_comp; ); - GLSLC(0, }; ); - GLSLC(0, ); - - ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), - VK_SHADER_STAGE_COMPUTE_BIT); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "output_img", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), - .mem_quali = "writeonly", - .dimensions = 2, - .elems = planes, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - - RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc_set, 1, 0, 0)); - - GLSLC(0, void main() ); - GLSLC(0, { ); - GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); - if (mode == TESTSRC_COLOR) { - double rgb2yuv[3][3]; - double rgbad[4]; - double yuvad[4]; - - enum AVColorSpace csp; - const AVLumaCoefficients *luma = NULL; - - s->draw_once = 1; - - if (desc->flags & AV_PIX_FMT_FLAG_RGB) - csp = AVCOL_SPC_RGB; - else - csp = AVCOL_SPC_SMPTE170M; - - if (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && !(luma = av_csp_luma_coeffs_from_avcsp(csp))) - return AVERROR(EINVAL); - else if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) - ff_fill_rgb2yuv_table(luma, rgb2yuv); - - for (int i = 0; i < 4; i++) - rgbad[i] = s->color_rgba[i] / 255.0; - - if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) - ff_matrix_mul_3x3_vec(yuvad, rgbad, rgb2yuv); - else - memcpy(yuvad, rgbad, sizeof(rgbad)); - - yuvad[3] = rgbad[3]; - - if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) { - for (int i = 0; i < 3; i++) { - int chroma = (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && i > 0); - if (s->out_range == AVCOL_RANGE_MPEG) { - yuvad[i] *= (chroma ? 224.0 : 219.0) / 255.0; - yuvad[i] += (chroma ? 128.0 : 16.0) / 255.0; - } else if (chroma) { - yuvad[i] += 0.5; - } - } - } - - /* Ensure we place the alpha appropriately for gray formats */ - if (desc->nb_components <= 2) - yuvad[1] = yuvad[3]; - - for (int i = 0; i < 4; i++) - s->opts.color_comp[i] = yuvad[i]; - - GLSLC(1, vec4 r; ); - GLSLC(0, ); - for (int i = 0, c_off = 0; i < planes; i++) { - for (int c = 0; c < desc->nb_components; c++) { - if (desc->comp[c].plane == i) { - int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8); - GLSLF(1, r[%i] = color_comp[%i]; ,off, c_off++); - } - } - GLSLF(1, imageStore(output_img[%i], pos, r); ,i); - GLSLC(0, ); - } - } - GLSLC(0, } ); - - RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); - - s->initialized = 1; - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - if (spv) - spv->uninit(&spv); - - return err; -} - -static int testsrc_vulkan_activate(AVFilterContext *ctx) -{ - int err; - AVFilterLink *outlink = ctx->outputs[0]; - TestSrcVulkanContext *s = ctx->priv; - AVFrame *frame; - - if (!s->initialized) { - enum TestSrcVulkanMode mode = TESTSRC_COLOR; - err = init_filter(ctx, mode); - if (err < 0) - return err; - } - - if (!ff_outlink_frame_wanted(outlink)) - return FFERROR_NOT_READY; - if (s->duration >= 0 && - av_rescale_q(s->pts, s->time_base, AV_TIME_BASE_Q) >= s->duration) { - ff_outlink_set_status(outlink, AVERROR_EOF, s->pts); - return 0; - } - - if (s->draw_once) { - if (s->draw_once_reset) { - av_frame_free(&s->picref); - s->draw_once_reset = 0; - } - if (!s->picref) { - s->picref = ff_get_video_buffer(outlink, s->w, s->h); - if (!s->picref) - return AVERROR(ENOMEM); - - err = ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, s->picref, NULL, - VK_NULL_HANDLE, &s->opts, sizeof(s->opts)); - if (err < 0) - return err; - } - frame = av_frame_clone(s->picref); - } else { - frame = ff_get_video_buffer(outlink, s->w, s->h); - } - - if (!frame) - return AVERROR(ENOMEM); - - frame->pts = s->pts; - frame->duration = 1; - frame->flags = AV_FRAME_FLAG_KEY; - frame->pict_type = AV_PICTURE_TYPE_I; - frame->sample_aspect_ratio = s->sar; - if (!s->draw_once) { - err = ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, frame, NULL, - VK_NULL_HANDLE, &s->opts, sizeof(s->opts)); - if (err < 0) { - av_frame_free(&frame); - return err; - } - } - - s->pts++; - s->nb_frame++; - - return ff_filter_frame(outlink, frame); -} - -static int testsrc_vulkan_config_props(AVFilterLink *outlink) -{ - int err; - FilterLink *l = ff_filter_link(outlink); - TestSrcVulkanContext *s = outlink->src->priv; - FFVulkanContext *vkctx = &s->vkctx; - - if (!s->out_format_string) { - vkctx->output_format = AV_PIX_FMT_YUV444P; - } else { - vkctx->output_format = av_get_pix_fmt(s->out_format_string); - if (vkctx->output_format == AV_PIX_FMT_NONE) { - av_log(vkctx, AV_LOG_ERROR, "Invalid output format.\n"); - return AVERROR(EINVAL); - } - } - - err = ff_vk_filter_init_context(outlink->src, vkctx, NULL, - s->w, s->h, vkctx->output_format); - if (err < 0) - return err; - - l->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref); - if (!l->hw_frames_ctx) - return AVERROR(ENOMEM); - - s->time_base = av_inv_q(s->frame_rate); - s->nb_frame = 0; - s->pts = 0; - - s->vkctx.output_width = s->w; - s->vkctx.output_height = s->h; - outlink->w = s->w; - outlink->h = s->h; - outlink->sample_aspect_ratio = s->sar; - l->frame_rate = s->frame_rate; - outlink->time_base = s->time_base; - - return 0; -} - -static void testsrc_vulkan_uninit(AVFilterContext *avctx) -{ - TestSrcVulkanContext *s = avctx->priv; - FFVulkanContext *vkctx = &s->vkctx; - - av_frame_free(&s->picref); - - ff_vk_exec_pool_free(vkctx, &s->e); - ff_vk_shader_free(vkctx, &s->shd); - - ff_vk_uninit(&s->vkctx); - - s->initialized = 0; -} - -#define OFFSET(x) offsetof(TestSrcVulkanContext, x) -#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) - -#define COMMON_OPTS \ - { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, \ - { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, \ - \ - { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "60" }, 0, INT_MAX, FLAGS }, \ - { "r", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "60" }, 0, INT_MAX, FLAGS }, \ - \ - { "duration", "set video duration", OFFSET(duration), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT64_MAX, FLAGS }, \ - { "d", "set video duration", OFFSET(duration), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT64_MAX, FLAGS }, \ - \ - { "sar", "set video sample aspect ratio", OFFSET(sar), AV_OPT_TYPE_RATIONAL, { .dbl = 1 }, 0, INT_MAX, FLAGS }, \ - \ - { "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, - -static const AVOption color_vulkan_options[] = { - { "color", "set color", OFFSET(color_rgba), AV_OPT_TYPE_COLOR, {.str = "black"}, 0, 0, FLAGS }, - { "c", "set color", OFFSET(color_rgba), AV_OPT_TYPE_COLOR, {.str = "black"}, 0, 0, FLAGS }, - COMMON_OPTS - { "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, .unit = "range" }, - { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, - { "limited", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, - { "jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, - { "mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, - { "tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, - { "pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, - { NULL }, -}; - -AVFILTER_DEFINE_CLASS(color_vulkan); - -static const AVFilterPad testsrc_vulkan_outputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, - .config_props = testsrc_vulkan_config_props, - }, -}; - -const FFFilter ff_vsrc_color_vulkan = { - .p.name = "color_vulkan", - .p.description = NULL_IF_CONFIG_SMALL("Generate a constant color (Vulkan)"), - .p.inputs = NULL, - .p.flags = AVFILTER_FLAG_HWDEVICE, - .p.priv_class = &color_vulkan_class, - .priv_size = sizeof(TestSrcVulkanContext), - .init = &ff_vk_filter_init, - .uninit = &testsrc_vulkan_uninit, - .activate = testsrc_vulkan_activate, - FILTER_OUTPUTS(testsrc_vulkan_outputs), - FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), - .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, -}; -- 2.49.1 From 6595f8ef44db685d10f38768882f2c8870b4bb74 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:05:37 +0000 Subject: [PATCH 030/118] Changing vulkan file directory --- libavfilter/vulkan/vsrc_testsrc_vulkan.c | 380 +++++++++++++++++++++++ 1 file changed, 380 insertions(+) create mode 100644 libavfilter/vulkan/vsrc_testsrc_vulkan.c diff --git a/libavfilter/vulkan/vsrc_testsrc_vulkan.c b/libavfilter/vulkan/vsrc_testsrc_vulkan.c new file mode 100644 index 0000000000..56b57a1e9f --- /dev/null +++ b/libavfilter/vulkan/vsrc_testsrc_vulkan.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/random_seed.h" +#include "libavutil/csp.h" +#include "libavutil/opt.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "vulkan_filter.h" +#include "libavfilter/filters.h" +#include "libavfilter/colorspace.h" +#include "libavfilter/video.h" + +enum TestSrcVulkanMode { + TESTSRC_COLOR, +}; + +typedef struct TestSrcVulkanPushData { + float color_comp[4]; +} TestSrcVulkanPushData; + +typedef struct TestSrcVulkanContext { + FFVulkanContext vkctx; + + int initialized; + FFVkExecPool e; + AVVulkanDeviceQueueFamily *qf; + FFVulkanShader shd; + + /* Only used by color_vulkan */ + uint8_t color_rgba[4]; + + TestSrcVulkanPushData opts; + + int w, h; + int pw, ph; + char *out_format_string; + enum AVColorRange out_range; + unsigned int nb_frame; + AVRational time_base, frame_rate; + int64_t pts; + int64_t duration; ///< duration expressed in microseconds + AVRational sar; ///< sample aspect ratio + int draw_once; ///< draw only the first frame, always put out the same picture + int draw_once_reset; ///< draw only the first frame or in case of reset + AVFrame *picref; ///< cached reference containing the painted picture +} TestSrcVulkanContext; + +static av_cold int init_filter(AVFilterContext *ctx, enum TestSrcVulkanMode mode) +{ + int err; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + TestSrcVulkanContext *s = ctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); + FFVulkanShader *shd = &s->shd; + FFVkSPIRVCompiler *spv; + FFVulkanDescriptorSetBinding *desc_set; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->vkctx.output_format); + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(ctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + s->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!s->qf) { + av_log(ctx, AV_LOG_ERROR, "Device has no compute queues\n"); + err = AVERROR(ENOTSUP); + goto fail; + } + + RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); + RET(ff_vk_shader_init(vkctx, &s->shd, "scale", + VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, + 32, 32, 1, + 0)); + + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); + GLSLC(1, vec4 color_comp; ); + GLSLC(0, }; ); + GLSLC(0, ); + + ff_vk_shader_add_push_const(&s->shd, 0, sizeof(s->opts), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "output_img", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format, FF_VK_REP_FLOAT), + .mem_quali = "writeonly", + .dimensions = 2, + .elems = planes, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + + RET(ff_vk_shader_add_descriptor_set(vkctx, &s->shd, desc_set, 1, 0, 0)); + + GLSLC(0, void main() ); + GLSLC(0, { ); + GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); + if (mode == TESTSRC_COLOR) { + double rgb2yuv[3][3]; + double rgbad[4]; + double yuvad[4]; + + enum AVColorSpace csp; + const AVLumaCoefficients *luma = NULL; + + s->draw_once = 1; + + if (desc->flags & AV_PIX_FMT_FLAG_RGB) + csp = AVCOL_SPC_RGB; + else + csp = AVCOL_SPC_SMPTE170M; + + if (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && !(luma = av_csp_luma_coeffs_from_avcsp(csp))) + return AVERROR(EINVAL); + else if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) + ff_fill_rgb2yuv_table(luma, rgb2yuv); + + for (int i = 0; i < 4; i++) + rgbad[i] = s->color_rgba[i] / 255.0; + + if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) + ff_matrix_mul_3x3_vec(yuvad, rgbad, rgb2yuv); + else + memcpy(yuvad, rgbad, sizeof(rgbad)); + + yuvad[3] = rgbad[3]; + + if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) { + for (int i = 0; i < 3; i++) { + int chroma = (!(desc->flags & AV_PIX_FMT_FLAG_RGB) && i > 0); + if (s->out_range == AVCOL_RANGE_MPEG) { + yuvad[i] *= (chroma ? 224.0 : 219.0) / 255.0; + yuvad[i] += (chroma ? 128.0 : 16.0) / 255.0; + } else if (chroma) { + yuvad[i] += 0.5; + } + } + } + + /* Ensure we place the alpha appropriately for gray formats */ + if (desc->nb_components <= 2) + yuvad[1] = yuvad[3]; + + for (int i = 0; i < 4; i++) + s->opts.color_comp[i] = yuvad[i]; + + GLSLC(1, vec4 r; ); + GLSLC(0, ); + for (int i = 0, c_off = 0; i < planes; i++) { + for (int c = 0; c < desc->nb_components; c++) { + if (desc->comp[c].plane == i) { + int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8); + GLSLF(1, r[%i] = color_comp[%i]; ,off, c_off++); + } + } + GLSLF(1, imageStore(output_img[%i], pos, r); ,i); + GLSLC(0, ); + } + } + GLSLC(0, } ); + + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(vkctx, &s->e, &s->shd)); + + s->initialized = 1; + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + if (spv) + spv->uninit(&spv); + + return err; +} + +static int testsrc_vulkan_activate(AVFilterContext *ctx) +{ + int err; + AVFilterLink *outlink = ctx->outputs[0]; + TestSrcVulkanContext *s = ctx->priv; + AVFrame *frame; + + if (!s->initialized) { + enum TestSrcVulkanMode mode = TESTSRC_COLOR; + err = init_filter(ctx, mode); + if (err < 0) + return err; + } + + if (!ff_outlink_frame_wanted(outlink)) + return FFERROR_NOT_READY; + if (s->duration >= 0 && + av_rescale_q(s->pts, s->time_base, AV_TIME_BASE_Q) >= s->duration) { + ff_outlink_set_status(outlink, AVERROR_EOF, s->pts); + return 0; + } + + if (s->draw_once) { + if (s->draw_once_reset) { + av_frame_free(&s->picref); + s->draw_once_reset = 0; + } + if (!s->picref) { + s->picref = ff_get_video_buffer(outlink, s->w, s->h); + if (!s->picref) + return AVERROR(ENOMEM); + + err = ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, s->picref, NULL, + VK_NULL_HANDLE, &s->opts, sizeof(s->opts)); + if (err < 0) + return err; + } + frame = av_frame_clone(s->picref); + } else { + frame = ff_get_video_buffer(outlink, s->w, s->h); + } + + if (!frame) + return AVERROR(ENOMEM); + + frame->pts = s->pts; + frame->duration = 1; + frame->flags = AV_FRAME_FLAG_KEY; + frame->pict_type = AV_PICTURE_TYPE_I; + frame->sample_aspect_ratio = s->sar; + if (!s->draw_once) { + err = ff_vk_filter_process_simple(&s->vkctx, &s->e, &s->shd, frame, NULL, + VK_NULL_HANDLE, &s->opts, sizeof(s->opts)); + if (err < 0) { + av_frame_free(&frame); + return err; + } + } + + s->pts++; + s->nb_frame++; + + return ff_filter_frame(outlink, frame); +} + +static int testsrc_vulkan_config_props(AVFilterLink *outlink) +{ + int err; + FilterLink *l = ff_filter_link(outlink); + TestSrcVulkanContext *s = outlink->src->priv; + FFVulkanContext *vkctx = &s->vkctx; + + if (!s->out_format_string) { + vkctx->output_format = AV_PIX_FMT_YUV444P; + } else { + vkctx->output_format = av_get_pix_fmt(s->out_format_string); + if (vkctx->output_format == AV_PIX_FMT_NONE) { + av_log(vkctx, AV_LOG_ERROR, "Invalid output format.\n"); + return AVERROR(EINVAL); + } + } + + err = ff_vk_filter_init_context(outlink->src, vkctx, NULL, + s->w, s->h, vkctx->output_format); + if (err < 0) + return err; + + l->hw_frames_ctx = av_buffer_ref(vkctx->frames_ref); + if (!l->hw_frames_ctx) + return AVERROR(ENOMEM); + + s->time_base = av_inv_q(s->frame_rate); + s->nb_frame = 0; + s->pts = 0; + + s->vkctx.output_width = s->w; + s->vkctx.output_height = s->h; + outlink->w = s->w; + outlink->h = s->h; + outlink->sample_aspect_ratio = s->sar; + l->frame_rate = s->frame_rate; + outlink->time_base = s->time_base; + + return 0; +} + +static void testsrc_vulkan_uninit(AVFilterContext *avctx) +{ + TestSrcVulkanContext *s = avctx->priv; + FFVulkanContext *vkctx = &s->vkctx; + + av_frame_free(&s->picref); + + ff_vk_exec_pool_free(vkctx, &s->e); + ff_vk_shader_free(vkctx, &s->shd); + + ff_vk_uninit(&s->vkctx); + + s->initialized = 0; +} + +#define OFFSET(x) offsetof(TestSrcVulkanContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) + +#define COMMON_OPTS \ + { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, \ + { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, \ + \ + { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "60" }, 0, INT_MAX, FLAGS }, \ + { "r", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "60" }, 0, INT_MAX, FLAGS }, \ + \ + { "duration", "set video duration", OFFSET(duration), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT64_MAX, FLAGS }, \ + { "d", "set video duration", OFFSET(duration), AV_OPT_TYPE_DURATION, { .i64 = -1 }, -1, INT64_MAX, FLAGS }, \ + \ + { "sar", "set video sample aspect ratio", OFFSET(sar), AV_OPT_TYPE_RATIONAL, { .dbl = 1 }, 0, INT_MAX, FLAGS }, \ + \ + { "format", "Output video format (software format of hardware frames)", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, + +static const AVOption color_vulkan_options[] = { + { "color", "set color", OFFSET(color_rgba), AV_OPT_TYPE_COLOR, {.str = "black"}, 0, 0, FLAGS }, + { "c", "set color", OFFSET(color_rgba), AV_OPT_TYPE_COLOR, {.str = "black"}, 0, 0, FLAGS }, + COMMON_OPTS + { "out_range", "Output colour range (from 0 to 2) (default 0)", OFFSET(out_range), AV_OPT_TYPE_INT, {.i64 = AVCOL_RANGE_UNSPECIFIED}, AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, .flags = FLAGS, .unit = "range" }, + { "full", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, + { "limited", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, + { "jpeg", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, + { "mpeg", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, + { "tv", "Limited range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, .unit = "range" }, + { "pc", "Full range", 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, .unit = "range" }, + { NULL }, +}; + +AVFILTER_DEFINE_CLASS(color_vulkan); + +static const AVFilterPad testsrc_vulkan_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = testsrc_vulkan_config_props, + }, +}; + +const FFFilter ff_vsrc_color_vulkan = { + .p.name = "color_vulkan", + .p.description = NULL_IF_CONFIG_SMALL("Generate a constant color (Vulkan)"), + .p.inputs = NULL, + .p.flags = AVFILTER_FLAG_HWDEVICE, + .p.priv_class = &color_vulkan_class, + .priv_size = sizeof(TestSrcVulkanContext), + .init = &ff_vk_filter_init, + .uninit = &testsrc_vulkan_uninit, + .activate = testsrc_vulkan_activate, + FILTER_OUTPUTS(testsrc_vulkan_outputs), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VULKAN), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.49.1 From 9dea04c66a3b8047af0573e3ce8d71d72855353b Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:07:12 +0000 Subject: [PATCH 031/118] Changing vulkan file directory --- libavfilter/vulkan_filter.c | 483 ------------------------------------ 1 file changed, 483 deletions(-) delete mode 100644 libavfilter/vulkan_filter.c diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c deleted file mode 100644 index a9f47741ed..0000000000 --- a/libavfilter/vulkan_filter.c +++ /dev/null @@ -1,483 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "filters.h" -#include "vulkan_filter.h" -#include "libavutil/vulkan_loader.h" - -int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, - AVBufferRef *frames_ref, - int width, int height, enum AVPixelFormat sw_format) -{ - int err; - AVHWFramesContext *frames_ctx; - AVHWDeviceContext *device_ctx; - AVVulkanFramesContext *vk_frames; - AVVulkanDeviceContext *vk_dev; - AVBufferRef *device_ref = avctx->hw_device_ctx; - - /* Check if context is reusable as-is */ - if (frames_ref) { - int no_storage = 0; - FFVulkanFunctions *vk; - VkImageUsageFlagBits usage_req; - const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format); - - frames_ctx = (AVHWFramesContext *)frames_ref->data; - device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; - vk_frames = frames_ctx->hwctx; - vk_dev = device_ctx->hwctx; - - /* Width and height mismatch */ - if (width != frames_ctx->width || - height != frames_ctx->height) - goto skip; - - /* Format mismatch */ - if (sw_format != frames_ctx->sw_format) - goto skip; - - /* Don't let linear through. */ - if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) - goto skip; - - s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, - vk_dev->nb_enabled_dev_extensions); - - /* More advanced format checks */ - err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); - if (err < 0) - return err; - vk = &s->vkfn; - - /* Usage mismatch */ - usage_req = VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT; - - /* If format supports hardware encoding, make sure - * the context includes it. */ - if (vk_frames->format[1] == VK_FORMAT_UNDEFINED && - (s->extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | - FF_VK_EXT_VIDEO_MAINTENANCE_1))) { - VkFormatProperties3 fprops = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3, - }; - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - .pNext = &fprops, - }; - vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, - vk_frames->format[0], - &prop); - if (fprops.optimalTilingFeatures & VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR) - usage_req |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; - } - - if ((vk_frames->usage & usage_req) != usage_req) - goto skip; - - /* Check if the subformats can do storage */ - for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) { - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - }; - vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i], - &prop); - no_storage |= !(prop.formatProperties.optimalTilingFeatures & - VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); - } - - /* Check if it's usable */ - if (no_storage) { -skip: - av_log(avctx, AV_LOG_VERBOSE, "Cannot reuse context, creating a new one\n"); - device_ref = frames_ctx->device_ref; - frames_ref = NULL; - } else { - av_log(avctx, AV_LOG_VERBOSE, "Reusing existing frames context\n"); - frames_ref = av_buffer_ref(frames_ref); - if (!frames_ref) - return AVERROR(ENOMEM); - } - } - - if (!frames_ref) { - if (!device_ref) { - av_log(avctx, AV_LOG_ERROR, - "Vulkan filtering requires a device context!\n"); - return AVERROR(EINVAL); - } - - frames_ref = av_hwframe_ctx_alloc(device_ref); - - frames_ctx = (AVHWFramesContext *)frames_ref->data; - frames_ctx->format = AV_PIX_FMT_VULKAN; - frames_ctx->sw_format = sw_format; - frames_ctx->width = width; - frames_ctx->height = height; - - vk_frames = frames_ctx->hwctx; - vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; - vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT; - - err = av_hwframe_ctx_init(frames_ref); - if (err < 0) { - av_buffer_unref(&frames_ref); - return err; - } - - device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; - vk_dev = device_ctx->hwctx; - } - - s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, - vk_dev->nb_enabled_dev_extensions); - s->extensions |= ff_vk_extensions_to_mask(vk_dev->enabled_inst_extensions, - vk_dev->nb_enabled_inst_extensions); - - err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); - if (err < 0) { - av_buffer_unref(&frames_ref); - return err; - } - - s->frames_ref = frames_ref; - s->frames = frames_ctx; - s->hwfc = vk_frames; - s->device = device_ctx; - s->hwctx = device_ctx->hwctx; - - err = ff_vk_load_props(s); - if (err < 0) - av_buffer_unref(&s->frames_ref); - - return err; -} - -int ff_vk_filter_config_input(AVFilterLink *inlink) -{ - FilterLink *l = ff_filter_link(inlink); - AVHWFramesContext *input_frames; - AVFilterContext *avctx = inlink->dst; - FFVulkanContext *s = inlink->dst->priv; - - if (!l->hw_frames_ctx) { - av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " - "hardware frames context on the input.\n"); - return AVERROR(EINVAL); - } - - input_frames = (AVHWFramesContext *)l->hw_frames_ctx->data; - if (input_frames->format != AV_PIX_FMT_VULKAN) - return AVERROR(EINVAL); - - /* Extract the device and default output format from the first input. */ - if (avctx->inputs[0] != inlink) - return 0; - - /* Save the ref, without reffing it */ - s->input_frames_ref = l->hw_frames_ctx; - - /* Defaults */ - s->input_format = input_frames->sw_format; - s->output_format = input_frames->sw_format; - s->output_width = inlink->w; - s->output_height = inlink->h; - - return 0; -} - -int ff_vk_filter_config_output(AVFilterLink *outlink) -{ - int err; - FilterLink *l = ff_filter_link(outlink); - FFVulkanContext *s = outlink->src->priv; - - av_buffer_unref(&l->hw_frames_ctx); - - err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref, - s->output_width, s->output_height, - s->output_format); - if (err < 0) - return err; - - l->hw_frames_ctx = av_buffer_ref(s->frames_ref); - if (!l->hw_frames_ctx) - return AVERROR(ENOMEM); - - outlink->w = s->output_width; - outlink->h = s->output_height; - - return err; -} - -int ff_vk_filter_init(AVFilterContext *avctx) -{ - FFVulkanContext *s = avctx->priv; - - s->output_format = AV_PIX_FMT_NONE; - - return 0; -} - -int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, - FFVulkanShader *shd, AVFrame *out_f, AVFrame *in_f, - VkSampler sampler, void *push_src, size_t push_size) -{ - int err = 0; - FFVulkanFunctions *vk = &vkctx->vkfn; - VkImageView in_views[AV_NUM_DATA_POINTERS]; - VkImageView out_views[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier2 img_bar[37]; - int nb_img_bar = 0; - VkImageLayout in_layout = sampler != VK_NULL_HANDLE ? - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : - VK_IMAGE_LAYOUT_GENERAL; - - /* Update descriptors and init the exec context */ - FFVkExecContext *exec = ff_vk_exec_get(vkctx, e); - ff_vk_exec_start(vkctx, exec); - - RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f, FF_VK_REP_FLOAT)); - ff_vk_shader_update_img_array(vkctx, exec, shd, out_f, out_views, 0, !!in_f, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - if (in_f) { - RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f, FF_VK_REP_FLOAT)); - ff_vk_shader_update_img_array(vkctx, exec, shd, in_f, in_views, 0, 0, - in_layout, - sampler); - } - - /* Bind pipeline, update push data */ - ff_vk_exec_bind_shader(vkctx, exec, shd); - if (push_src) - ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT, - 0, push_size, push_src); - - /* Add data sync barriers */ - ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - if (in_f) - ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - in_layout, - VK_QUEUE_FAMILY_IGNORED); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - - vk->CmdDispatch(exec->buf, - FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], - FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], - shd->lg_size[2]); - - return ff_vk_exec_submit(vkctx, exec); -fail: - ff_vk_exec_discard_deps(vkctx, exec); - return err; -} - -int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, - FFVulkanShader *shd_list[2], - AVFrame *out, AVFrame *tmp, AVFrame *in, - VkSampler sampler, void *push_src, size_t push_size) -{ - int err = 0; - FFVulkanFunctions *vk = &vkctx->vkfn; - VkImageView in_views[AV_NUM_DATA_POINTERS]; - VkImageView tmp_views[AV_NUM_DATA_POINTERS]; - VkImageView out_views[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier2 img_bar[37]; - int nb_img_bar = 0; - VkImageLayout in_layout = sampler != VK_NULL_HANDLE ? - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : - VK_IMAGE_LAYOUT_GENERAL; - - /* Update descriptors and init the exec context */ - FFVkExecContext *exec = ff_vk_exec_get(vkctx, e); - ff_vk_exec_start(vkctx, exec); - - RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - - RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT)); - RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp, FF_VK_REP_FLOAT)); - RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT)); - - ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - in_layout, - VK_QUEUE_FAMILY_IGNORED); - ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - - for (int i = 0; i < 2; i++) { - FFVulkanShader *shd = shd_list[i]; - AVFrame *src_f = !i ? in : tmp; - AVFrame *dst_f = !i ? tmp : out; - VkImageView *src_views = !i ? in_views : tmp_views; - VkImageView *dst_views = !i ? tmp_views : out_views; - - ff_vk_shader_update_img_array(vkctx, exec, shd, src_f, src_views, 0, 0, - !i ? in_layout : - VK_IMAGE_LAYOUT_GENERAL, - sampler); - ff_vk_shader_update_img_array(vkctx, exec, shd, dst_f, dst_views, 0, 1, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - - /* Bind pipeline, update push data */ - ff_vk_exec_bind_shader(vkctx, exec, shd); - if (push_src) - ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT, - 0, push_size, push_src); - - vk->CmdDispatch(exec->buf, - FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], - FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], - shd->lg_size[2]); - } - - return ff_vk_exec_submit(vkctx, exec); -fail: - ff_vk_exec_discard_deps(vkctx, exec); - return err; -} - -int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, - FFVulkanShader *shd, - AVFrame *out, AVFrame *in[], int nb_in, - VkSampler sampler, void *push_src, size_t push_size) -{ - int err = 0; - FFVulkanFunctions *vk = &vkctx->vkfn; - VkImageView in_views[16][AV_NUM_DATA_POINTERS]; - VkImageView out_views[AV_NUM_DATA_POINTERS]; - VkImageMemoryBarrier2 img_bar[128]; - int nb_img_bar = 0; - VkImageLayout in_layout = sampler != VK_NULL_HANDLE ? - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : - VK_IMAGE_LAYOUT_GENERAL; - - /* Update descriptors and init the exec context */ - FFVkExecContext *exec = ff_vk_exec_get(vkctx, e); - ff_vk_exec_start(vkctx, exec); - - /* Add deps and create temporary imageviews */ - RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT)); - for (int i = 0; i < nb_in; i++) { - RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i], - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i], FF_VK_REP_FLOAT)); - } - - /* Update descriptor sets */ - ff_vk_shader_update_img_array(vkctx, exec, shd, out, out_views, 0, nb_in, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - for (int i = 0; i < nb_in; i++) - ff_vk_shader_update_img_array(vkctx, exec, shd, in[i], in_views[i], 0, i, - in_layout, - sampler); - - /* Bind pipeline, update push data */ - ff_vk_exec_bind_shader(vkctx, exec, shd); - if (push_src) - ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT, - 0, push_size, push_src); - - /* Add data sync barriers */ - ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - for (int i = 0; i < nb_in; i++) - ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - in_layout, - VK_QUEUE_FAMILY_IGNORED); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - - vk->CmdDispatch(exec->buf, - FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], - FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], - shd->lg_size[2]); - - return ff_vk_exec_submit(vkctx, exec); -fail: - ff_vk_exec_discard_deps(vkctx, exec); - return err; -} -- 2.49.1 From b436dc72a98892a37880208ddaa213d4f3f6eacc Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:07:37 +0000 Subject: [PATCH 032/118] Changing vulkan file directory --- libavfilter/vulkan/vulkan_filter.c | 483 +++++++++++++++++++++++++++++ 1 file changed, 483 insertions(+) create mode 100644 libavfilter/vulkan/vulkan_filter.c diff --git a/libavfilter/vulkan/vulkan_filter.c b/libavfilter/vulkan/vulkan_filter.c new file mode 100644 index 0000000000..2fc37cc4c0 --- /dev/null +++ b/libavfilter/vulkan/vulkan_filter.c @@ -0,0 +1,483 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavfilter/filters.h" +#include "vulkan_filter.h" +#include "../../libavutil/vulkan/vulkan_loader.h" + +int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, + AVBufferRef *frames_ref, + int width, int height, enum AVPixelFormat sw_format) +{ + int err; + AVHWFramesContext *frames_ctx; + AVHWDeviceContext *device_ctx; + AVVulkanFramesContext *vk_frames; + AVVulkanDeviceContext *vk_dev; + AVBufferRef *device_ref = avctx->hw_device_ctx; + + /* Check if context is reusable as-is */ + if (frames_ref) { + int no_storage = 0; + FFVulkanFunctions *vk; + VkImageUsageFlagBits usage_req; + const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format); + + frames_ctx = (AVHWFramesContext *)frames_ref->data; + device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; + vk_frames = frames_ctx->hwctx; + vk_dev = device_ctx->hwctx; + + /* Width and height mismatch */ + if (width != frames_ctx->width || + height != frames_ctx->height) + goto skip; + + /* Format mismatch */ + if (sw_format != frames_ctx->sw_format) + goto skip; + + /* Don't let linear through. */ + if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) + goto skip; + + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); + + /* More advanced format checks */ + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) + return err; + vk = &s->vkfn; + + /* Usage mismatch */ + usage_req = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT; + + /* If format supports hardware encoding, make sure + * the context includes it. */ + if (vk_frames->format[1] == VK_FORMAT_UNDEFINED && + (s->extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | + FF_VK_EXT_VIDEO_MAINTENANCE_1))) { + VkFormatProperties3 fprops = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3, + }; + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + .pNext = &fprops, + }; + vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, + vk_frames->format[0], + &prop); + if (fprops.optimalTilingFeatures & VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR) + usage_req |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + } + + if ((vk_frames->usage & usage_req) != usage_req) + goto skip; + + /* Check if the subformats can do storage */ + for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) { + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i], + &prop); + no_storage |= !(prop.formatProperties.optimalTilingFeatures & + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } + + /* Check if it's usable */ + if (no_storage) { +skip: + av_log(avctx, AV_LOG_VERBOSE, "Cannot reuse context, creating a new one\n"); + device_ref = frames_ctx->device_ref; + frames_ref = NULL; + } else { + av_log(avctx, AV_LOG_VERBOSE, "Reusing existing frames context\n"); + frames_ref = av_buffer_ref(frames_ref); + if (!frames_ref) + return AVERROR(ENOMEM); + } + } + + if (!frames_ref) { + if (!device_ref) { + av_log(avctx, AV_LOG_ERROR, + "Vulkan filtering requires a device context!\n"); + return AVERROR(EINVAL); + } + + frames_ref = av_hwframe_ctx_alloc(device_ref); + + frames_ctx = (AVHWFramesContext *)frames_ref->data; + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = width; + frames_ctx->height = height; + + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + + err = av_hwframe_ctx_init(frames_ref); + if (err < 0) { + av_buffer_unref(&frames_ref); + return err; + } + + device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; + vk_dev = device_ctx->hwctx; + } + + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); + s->extensions |= ff_vk_extensions_to_mask(vk_dev->enabled_inst_extensions, + vk_dev->nb_enabled_inst_extensions); + + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) { + av_buffer_unref(&frames_ref); + return err; + } + + s->frames_ref = frames_ref; + s->frames = frames_ctx; + s->hwfc = vk_frames; + s->device = device_ctx; + s->hwctx = device_ctx->hwctx; + + err = ff_vk_load_props(s); + if (err < 0) + av_buffer_unref(&s->frames_ref); + + return err; +} + +int ff_vk_filter_config_input(AVFilterLink *inlink) +{ + FilterLink *l = ff_filter_link(inlink); + AVHWFramesContext *input_frames; + AVFilterContext *avctx = inlink->dst; + FFVulkanContext *s = inlink->dst->priv; + + if (!l->hw_frames_ctx) { + av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " + "hardware frames context on the input.\n"); + return AVERROR(EINVAL); + } + + input_frames = (AVHWFramesContext *)l->hw_frames_ctx->data; + if (input_frames->format != AV_PIX_FMT_VULKAN) + return AVERROR(EINVAL); + + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; + + /* Save the ref, without reffing it */ + s->input_frames_ref = l->hw_frames_ctx; + + /* Defaults */ + s->input_format = input_frames->sw_format; + s->output_format = input_frames->sw_format; + s->output_width = inlink->w; + s->output_height = inlink->h; + + return 0; +} + +int ff_vk_filter_config_output(AVFilterLink *outlink) +{ + int err; + FilterLink *l = ff_filter_link(outlink); + FFVulkanContext *s = outlink->src->priv; + + av_buffer_unref(&l->hw_frames_ctx); + + err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref, + s->output_width, s->output_height, + s->output_format); + if (err < 0) + return err; + + l->hw_frames_ctx = av_buffer_ref(s->frames_ref); + if (!l->hw_frames_ctx) + return AVERROR(ENOMEM); + + outlink->w = s->output_width; + outlink->h = s->output_height; + + return err; +} + +int ff_vk_filter_init(AVFilterContext *avctx) +{ + FFVulkanContext *s = avctx->priv; + + s->output_format = AV_PIX_FMT_NONE; + + return 0; +} + +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanShader *shd, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + VkImageLayout in_layout = sampler != VK_NULL_HANDLE ? + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : + VK_IMAGE_LAYOUT_GENERAL; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(vkctx, e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f, FF_VK_REP_FLOAT)); + ff_vk_shader_update_img_array(vkctx, exec, shd, out_f, out_views, 0, !!in_f, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + if (in_f) { + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f, FF_VK_REP_FLOAT)); + ff_vk_shader_update_img_array(vkctx, exec, shd, in_f, in_views, 0, 0, + in_layout, + sampler); + } + + /* Bind pipeline, update push data */ + ff_vk_exec_bind_shader(vkctx, exec, shd); + if (push_src) + ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + /* Add data sync barriers */ + ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + if (in_f) + ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + in_layout, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], + FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], + shd->lg_size[2]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} + +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanShader *shd_list[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView tmp_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + VkImageLayout in_layout = sampler != VK_NULL_HANDLE ? + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : + VK_IMAGE_LAYOUT_GENERAL; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(vkctx, e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in, FF_VK_REP_FLOAT)); + RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp, FF_VK_REP_FLOAT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT)); + + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + in_layout, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + for (int i = 0; i < 2; i++) { + FFVulkanShader *shd = shd_list[i]; + AVFrame *src_f = !i ? in : tmp; + AVFrame *dst_f = !i ? tmp : out; + VkImageView *src_views = !i ? in_views : tmp_views; + VkImageView *dst_views = !i ? tmp_views : out_views; + + ff_vk_shader_update_img_array(vkctx, exec, shd, src_f, src_views, 0, 0, + !i ? in_layout : + VK_IMAGE_LAYOUT_GENERAL, + sampler); + ff_vk_shader_update_img_array(vkctx, exec, shd, dst_f, dst_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + /* Bind pipeline, update push data */ + ff_vk_exec_bind_shader(vkctx, exec, shd); + if (push_src) + ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], + FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], + shd->lg_size[2]); + } + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} + +int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanShader *shd, + AVFrame *out, AVFrame *in[], int nb_in, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[16][AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[128]; + int nb_img_bar = 0; + VkImageLayout in_layout = sampler != VK_NULL_HANDLE ? + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : + VK_IMAGE_LAYOUT_GENERAL; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(vkctx, e); + ff_vk_exec_start(vkctx, exec); + + /* Add deps and create temporary imageviews */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out, FF_VK_REP_FLOAT)); + for (int i = 0; i < nb_in; i++) { + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i], + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i], FF_VK_REP_FLOAT)); + } + + /* Update descriptor sets */ + ff_vk_shader_update_img_array(vkctx, exec, shd, out, out_views, 0, nb_in, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + for (int i = 0; i < nb_in; i++) + ff_vk_shader_update_img_array(vkctx, exec, shd, in[i], in_views[i], 0, i, + in_layout, + sampler); + + /* Bind pipeline, update push data */ + ff_vk_exec_bind_shader(vkctx, exec, shd); + if (push_src) + ff_vk_shader_update_push_const(vkctx, exec, shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + /* Add data sync barriers */ + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + for (int i = 0; i < nb_in; i++) + ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + in_layout, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, shd->lg_size[0])/shd->lg_size[0], + FFALIGN(vkctx->output_height, shd->lg_size[1])/shd->lg_size[1], + shd->lg_size[2]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} -- 2.49.1 From d225975ae3b900213f891173d1d5ae82b81ae3c2 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:08:16 +0000 Subject: [PATCH 033/118] Changing vulkan file directory --- libavfilter/vulkan_filter.h | 65 ------------------------------------- 1 file changed, 65 deletions(-) delete mode 100644 libavfilter/vulkan_filter.h diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h deleted file mode 100644 index 6ed9c4de39..0000000000 --- a/libavfilter/vulkan_filter.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVFILTER_VULKAN_FILTER_H -#define AVFILTER_VULKAN_FILTER_H - -#include "avfilter.h" - -#include "libavutil/vulkan.h" - -/** - * General lavfi IO functions - */ -int ff_vk_filter_init (AVFilterContext *avctx); -int ff_vk_filter_config_input (AVFilterLink *inlink); -int ff_vk_filter_config_output(AVFilterLink *outlink); - -/** - * Can be called manually, if not using ff_vk_filter_config_output. - */ -int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, - AVBufferRef *frames_ref, - int width, int height, enum AVPixelFormat sw_format); - -/** - * Submit a compute shader with a zero/one input and single out for execution. - */ -int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, - FFVulkanShader *shd, AVFrame *out_f, AVFrame *in_f, - VkSampler sampler, void *push_src, size_t push_size); - -/** - * Submit a compute shader with a single in and single out with 2 stages. - */ -int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, - FFVulkanShader *shd_list[2], - AVFrame *out, AVFrame *tmp, AVFrame *in, - VkSampler sampler, void *push_src, size_t push_size); - -/** - * Up to 16 inputs, one output - */ -int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, - FFVulkanShader *shd, - AVFrame *out, AVFrame *in[], int nb_in, - VkSampler sampler, void *push_src, size_t push_size); - -#endif /* AVFILTER_VULKAN_FILTER_H */ -- 2.49.1 From 1306b83df47bacd1744cb698cbc86f390b951bd3 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:09:03 +0000 Subject: [PATCH 034/118] Changing vulkan file directory --- libavfilter/vulkan/vulkan_filter.h | 65 ++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 libavfilter/vulkan/vulkan_filter.h diff --git a/libavfilter/vulkan/vulkan_filter.h b/libavfilter/vulkan/vulkan_filter.h new file mode 100644 index 0000000000..045f0e6d73 --- /dev/null +++ b/libavfilter/vulkan/vulkan_filter.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_FILTER_H +#define AVFILTER_VULKAN_FILTER_H + +#include "libavfilter/avfilter.h" + +#include "../../libavutil/vulkan/vulkan.h" + +/** + * General lavfi IO functions + */ +int ff_vk_filter_init (AVFilterContext *avctx); +int ff_vk_filter_config_input (AVFilterLink *inlink); +int ff_vk_filter_config_output(AVFilterLink *outlink); + +/** + * Can be called manually, if not using ff_vk_filter_config_output. + */ +int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, + AVBufferRef *frames_ref, + int width, int height, enum AVPixelFormat sw_format); + +/** + * Submit a compute shader with a zero/one input and single out for execution. + */ +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanShader *shd, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Submit a compute shader with a single in and single out with 2 stages. + */ +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanShader *shd_list[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Up to 16 inputs, one output + */ +int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanShader *shd, + AVFrame *out, AVFrame *in[], int nb_in, + VkSampler sampler, void *push_src, size_t push_size); + +#endif /* AVFILTER_VULKAN_FILTER_H */ -- 2.49.1 From 198667bb79f9e3ea36e9d72620fa5a2c4c42b099 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:09:39 +0000 Subject: [PATCH 035/118] Changing vulkan file directory --- libavfilter/vulkan/vulkan_source.c | 188 +++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 libavfilter/vulkan/vulkan_source.c diff --git a/libavfilter/vulkan/vulkan_source.c b/libavfilter/vulkan/vulkan_source.c new file mode 100644 index 0000000000..16e709980a --- /dev/null +++ b/libavfilter/vulkan/vulkan_source.c @@ -0,0 +1,188 @@ +#include <stddef.h> +#include "vulkan_source.h" + +const char *ff_source_bwdif_comp = "\ +const vec4 coef_lf[2] = { vec4(4309), vec4(213), };\n\ +const vec4 coef_hf[3] = { vec4(5570), vec4(3801), vec4(1016) };\n\ +const vec4 coef_sp[2] = { vec4(5077), vec4(981), };\n\ +\n\ +vec4 process_intra(vec4 cur[4])\n\ +{\n\ + return (coef_sp[0]*(cur[1] + cur[2]) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13);\n\ +}\n\ +\n\ +void process_plane_intra(int idx, ivec2 pos)\n\ +{\n\ + vec4 dcur[4];\n\ + dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3));\n\ + dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1));\n\ + dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1));\n\ + dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3));\n\ + imageStore(dst[idx], pos, process_intra(dcur));\n\ +}\n\ +\n\ +vec4 process_line(vec4 prev2[5], vec4 prev1[2], vec4 cur[4], vec4 next1[2], vec4 next2[5])\n\ +{\n\ + vec4 fc = cur[1];\n\ + vec4 fe = cur[2];\n\ + vec4 fs = prev2[2] + next2[2];\n\ + vec4 fd = fs / 2;\n\ +\n\ + vec4 temp_diff[3];\n\ + temp_diff[0] = abs(prev2[2] - next2[2]);\n\ + temp_diff[1] = (abs(prev1[0] - fc) + abs(prev1[1] - fe)) / 2;\n\ + temp_diff[2] = (abs(next1[0] - fc) + abs(next1[1] - fe)) / 2;\n\ + vec4 diff = max(temp_diff[0] / 2, max(temp_diff[1], temp_diff[2]));\n\ + bvec4 diff_mask = equal(diff, vec4(0));\n\ +\n\ + vec4 fbs = prev2[1] + next2[1];\n\ + vec4 ffs = prev2[3] + next2[3];\n\ + vec4 fb = (fbs / 2) - fc;\n\ + vec4 ff = (ffs / 2) - fe;\n\ + vec4 dc = fd - fc;\n\ + vec4 de = fd - fe;\n\ + vec4 mmax = max(de, max(dc, min(fb, ff)));\n\ + vec4 mmin = min(de, min(dc, max(fb, ff)));\n\ + diff = max(diff, max(mmin, -mmax));\n\ +\n\ + vec4 interpolate_all = (((coef_hf[0]*(fs) - coef_hf[1]*(fbs + ffs) +\n\ + coef_hf[2]*(prev2[0] + next2[0] + prev2[4] + next2[4])) / 4) +\n\ + coef_lf[0]*(fc + fe) - coef_lf[1]*(cur[0] + cur[3])) / (1 << 13);\n\ + vec4 interpolate_cur = (coef_sp[0]*(fc + fe) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13);\n\ +\n\ + bvec4 interpolate_cnd1 = greaterThan(abs(fc - fe), temp_diff[0]);\n\ + vec4 interpol = mix(interpolate_cur, interpolate_all, interpolate_cnd1);\n\ + interpol = clamp(interpol, fd - diff, fd + diff);\n\ + return mix(interpol, fd, diff_mask);\n\ +}\n\ +\n\ +void process_plane(int idx, const ivec2 pos, bool filter_field,\n\ + bool is_intra, bool field_parity)\n\ +{\n\ + vec4 dcur[4];\n\ + vec4 prev1[2];\n\ + vec4 next1[2];\n\ + vec4 prev2[5];\n\ + vec4 next2[5];\n\ +\n\ + dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3));\n\ + dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1));\n\ + dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1));\n\ + dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3));\n\ +\n\ + prev1[0] = imageLoad(prev[idx], pos - ivec2(0, 1));\n\ + prev1[1] = imageLoad(prev[idx], pos + ivec2(0, 1));\n\ +\n\ + next1[0] = imageLoad(next[idx], pos - ivec2(0, 1));\n\ + next1[1] = imageLoad(next[idx], pos + ivec2(0, 1));\n\ +\n\ + if (field_parity) {\n\ + prev2[0] = imageLoad(prev[idx], pos - ivec2(0, 4));\n\ + prev2[1] = imageLoad(prev[idx], pos - ivec2(0, 2));\n\ + prev2[2] = imageLoad(prev[idx], pos);\n\ + prev2[3] = imageLoad(prev[idx], pos + ivec2(0, 2));\n\ + prev2[4] = imageLoad(prev[idx], pos + ivec2(0, 4));\n\ +\n\ + next2[0] = imageLoad(cur[idx], pos - ivec2(0, 4));\n\ + next2[1] = imageLoad(cur[idx], pos - ivec2(0, 2));\n\ + next2[2] = imageLoad(cur[idx], pos);\n\ + next2[3] = imageLoad(cur[idx], pos + ivec2(0, 2));\n\ + next2[4] = imageLoad(cur[idx], pos + ivec2(0, 4));\n\ + } else {\n\ + prev2[0] = imageLoad(cur[idx], pos - ivec2(0, 4));\n\ + prev2[1] = imageLoad(cur[idx], pos - ivec2(0, 2));\n\ + prev2[2] = imageLoad(cur[idx], pos);\n\ + prev2[3] = imageLoad(cur[idx], pos + ivec2(0, 2));\n\ + prev2[4] = imageLoad(cur[idx], pos + ivec2(0, 4));\n\ +\n\ + next2[0] = imageLoad(next[idx], pos - ivec2(0, 4));\n\ + next2[1] = imageLoad(next[idx], pos - ivec2(0, 2));\n\ + next2[2] = imageLoad(next[idx], pos);\n\ + next2[3] = imageLoad(next[idx], pos + ivec2(0, 2));\n\ + next2[4] = imageLoad(next[idx], pos + ivec2(0, 4));\n\ + }\n\ +\n\ + imageStore(dst[idx], pos, process_line(prev2, prev1, dcur, next1, next2));\n\ +}"; + +const char *ff_source_debayer_comp = "\ +#define LD(xo, yo) \\\n\ + (imageLoad(input_img[0], pos + ivec2((xo), (yo))).r)\n\ +\n\ +void debayer_bilinear(void)\n\ +{\n\ + ivec2 pos = ivec2(gl_GlobalInvocationID.xy) << 1;\n\ +\n\ + /* R basis */\n\ + vec4 tl = vec4(LD(0, 0),\n\ + (LD(1, 0) + LD(-1, 0) + LD(0, 1) + LD(0, -1)) / 4.0f,\n\ + (LD(-1, -1) + LD(1, 1) + LD(-1, 1) + LD(1, -1)) / 4.0f,\n\ + 1.0f);\n\ + imageStore(output_img[0], pos, tl);\n\ +\n\ + /* G1 basis */\n\ + vec4 tr = vec4((LD(2, 0) + LD(0, 0)) / 2.0f,\n\ + LD(1, 0),\n\ + (LD(1, 1) + LD(1, -1)) / 2.0f,\n\ + 1.0f);\n\ + imageStore(output_img[0], pos + ivec2(1, 0), tr);\n\ +\n\ + /* G2 basis */\n\ + vec4 bl = vec4((LD(0, 2) + LD(0, 0)) / 2.0f,\n\ + LD(0, 1),\n\ + (LD(1, 1) + LD(-1, 1)) / 2.0f,\n\ + 1.0f);\n\ + imageStore(output_img[0], pos + ivec2(0, 1), bl);\n\ +\n\ + /* B basis */\n\ + vec4 br = vec4((LD(0, 0) + LD(2, 2) + LD(0, 2) + LD(2, 0)) / 4.0f,\n\ + (LD(2, 1) + LD(0, 1) + LD(1, 2) + LD(1, 0)) / 4.0f,\n\ + LD(1, 1),\n\ + 1.0f);\n\ + imageStore(output_img[0], pos + ivec2(1, 1), br);\n\ +}\n\ +\n\ +void debayer_bilinear_hq(void)\n\ +{\n\ + ivec2 pos = ivec2(gl_GlobalInvocationID.xy) << 1;\n\ +\n\ + /* R basis */\n\ + vec4 tl = vec4(LD(0, 0),\n\ + (4.0f*LD(0, 0) + 2.0f*(LD(0, -1) + LD(0, 1) + LD(-1, 0) + LD(1, 0)) -\n\ + (LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 8.0f,\n\ + (12.0f*LD(0, 0) + 4.0f*(LD(-1, -1) + LD(-1, 1) + LD(1, -1) + LD(1, 1)) -\n\ + 3.0f*(LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 16.0f,\n\ + 1.0f);\n\ + imageStore(output_img[0], pos, tl);\n\ +\n\ + /* G1 basis */\n\ + vec4 tr = vec4((10.0f*LD(1, 0) + 8.0f*(LD(0, 0) + LD(2, 0)) -\n\ + 2.0f*(LD(0, -1) + LD(2, 1) + LD(0, 1) + LD(2, -1) + LD(-1, 0) + LD(3, 0)) +\n\ + LD(1, -2) + LD(1, 2)) / 16.0f,\n\ + LD(1, 0),\n\ + (10.0f*LD(1, 0) + 8.0f*(LD(1, -1) + LD(1, 1)) -\n\ + 2.0f*(LD(0, -1) + LD(0, 1) + LD(2, -1) + LD(2, 1) + LD(1, -2) + LD(1, 2)) +\n\ + LD(-1, 0) + LD(3, 0)) / 16.0f,\n\ + 1.0f);\n\ + imageStore(output_img[0], pos + ivec2(1, 0), tr);\n\ +\n\ + /* G2 basis */\n\ + vec4 bl = vec4((10.0f*LD(0, 1) + 8.0f*(LD(0, 0) + LD(0, 2)) -\n\ + 2.0f*(LD(-1, 0) + LD(-1, 2) + LD(1, 0) + LD(1, 2) + LD(0, -1) + LD(0, 3)) +\n\ + LD(-2, 1) + LD(2, 1)) / 16.0f,\n\ + LD(0, 1),\n\ + (10.0f*LD(0, 1) + 8.0f*(LD(-1, 1) + LD(1, 1)) -\n\ + 2.0f*(LD(-1, 0) + LD(1, 2) + LD(-1, 2) + LD(1, 0) + LD(-2, 1) + LD(2, 1)) +\n\ + LD(0, -1) + LD(0, 3)) / 16.0f,\n\ + 1.0f);\n\ + imageStore(output_img[0], pos + ivec2(0, 1), bl);\n\ +\n\ + /* B basis */\n\ + vec4 br = vec4((12.0f*LD(1, 1) + 4.0f*(LD(0, 0) + LD(0, 2) + LD(2, 0) + LD(2, 2)) -\n\ + 3.0f*(LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 16.0f,\n\ + (4.0f*LD(1, 1) + 2.0f*(LD(1, 0) + LD(1, 2) + LD(0, 1) + LD(2, 1)) -\n\ + (LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 8.0f,\n\ + LD(1, 1),\n\ + 1.0f);\n\ + imageStore(output_img[0], pos + ivec2(1, 1), br);\n\ +}"; -- 2.49.1 From 9dcc0332b7e6f28920bf3e5d26d1b96b5c930999 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:10:18 +0000 Subject: [PATCH 036/118] Changing vulkan file directory --- libavfilter/vulkan/vulkan_source.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 libavfilter/vulkan/vulkan_source.h diff --git a/libavfilter/vulkan/vulkan_source.h b/libavfilter/vulkan/vulkan_source.h new file mode 100644 index 0000000000..b756654852 --- /dev/null +++ b/libavfilter/vulkan/vulkan_source.h @@ -0,0 +1,25 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_SOURCE_H +#define AVFILTER_VULKAN_SOURCE_H + +extern const char *ff_source_bwdif_comp; +extern const char *ff_source_debayer_comp; + +#endif /* AVFILTER_VULKAN_SOURCE_H */ -- 2.49.1 From 10cb6227192a78e1fea6461fe7cd06ba96c424eb Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:10:47 +0000 Subject: [PATCH 037/118] Changing vulkan file directory --- libavfilter/vulkan/bwdif.comp | 122 ---------------------------------- 1 file changed, 122 deletions(-) delete mode 100644 libavfilter/vulkan/bwdif.comp diff --git a/libavfilter/vulkan/bwdif.comp b/libavfilter/vulkan/bwdif.comp deleted file mode 100644 index 5152464823..0000000000 --- a/libavfilter/vulkan/bwdif.comp +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -const vec4 coef_lf[2] = { vec4(4309), vec4(213), }; -const vec4 coef_hf[3] = { vec4(5570), vec4(3801), vec4(1016) }; -const vec4 coef_sp[2] = { vec4(5077), vec4(981), }; - -vec4 process_intra(vec4 cur[4]) -{ - return (coef_sp[0]*(cur[1] + cur[2]) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13); -} - -void process_plane_intra(int idx, ivec2 pos) -{ - vec4 dcur[4]; - dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3)); - dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1)); - dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1)); - dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3)); - imageStore(dst[idx], pos, process_intra(dcur)); -} - -vec4 process_line(vec4 prev2[5], vec4 prev1[2], vec4 cur[4], vec4 next1[2], vec4 next2[5]) -{ - vec4 fc = cur[1]; - vec4 fe = cur[2]; - vec4 fs = prev2[2] + next2[2]; - vec4 fd = fs / 2; - - vec4 temp_diff[3]; - temp_diff[0] = abs(prev2[2] - next2[2]); - temp_diff[1] = (abs(prev1[0] - fc) + abs(prev1[1] - fe)) / 2; - temp_diff[2] = (abs(next1[0] - fc) + abs(next1[1] - fe)) / 2; - vec4 diff = max(temp_diff[0] / 2, max(temp_diff[1], temp_diff[2])); - bvec4 diff_mask = equal(diff, vec4(0)); - - vec4 fbs = prev2[1] + next2[1]; - vec4 ffs = prev2[3] + next2[3]; - vec4 fb = (fbs / 2) - fc; - vec4 ff = (ffs / 2) - fe; - vec4 dc = fd - fc; - vec4 de = fd - fe; - vec4 mmax = max(de, max(dc, min(fb, ff))); - vec4 mmin = min(de, min(dc, max(fb, ff))); - diff = max(diff, max(mmin, -mmax)); - - vec4 interpolate_all = (((coef_hf[0]*(fs) - coef_hf[1]*(fbs + ffs) + - coef_hf[2]*(prev2[0] + next2[0] + prev2[4] + next2[4])) / 4) + - coef_lf[0]*(fc + fe) - coef_lf[1]*(cur[0] + cur[3])) / (1 << 13); - vec4 interpolate_cur = (coef_sp[0]*(fc + fe) - coef_sp[1]*(cur[0] + cur[3])) / (1 << 13); - - bvec4 interpolate_cnd1 = greaterThan(abs(fc - fe), temp_diff[0]); - vec4 interpol = mix(interpolate_cur, interpolate_all, interpolate_cnd1); - interpol = clamp(interpol, fd - diff, fd + diff); - return mix(interpol, fd, diff_mask); -} - -void process_plane(int idx, const ivec2 pos, bool filter_field, - bool is_intra, bool field_parity) -{ - vec4 dcur[4]; - vec4 prev1[2]; - vec4 next1[2]; - vec4 prev2[5]; - vec4 next2[5]; - - dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3)); - dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1)); - dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1)); - dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3)); - - prev1[0] = imageLoad(prev[idx], pos - ivec2(0, 1)); - prev1[1] = imageLoad(prev[idx], pos + ivec2(0, 1)); - - next1[0] = imageLoad(next[idx], pos - ivec2(0, 1)); - next1[1] = imageLoad(next[idx], pos + ivec2(0, 1)); - - if (field_parity) { - prev2[0] = imageLoad(prev[idx], pos - ivec2(0, 4)); - prev2[1] = imageLoad(prev[idx], pos - ivec2(0, 2)); - prev2[2] = imageLoad(prev[idx], pos); - prev2[3] = imageLoad(prev[idx], pos + ivec2(0, 2)); - prev2[4] = imageLoad(prev[idx], pos + ivec2(0, 4)); - - next2[0] = imageLoad(cur[idx], pos - ivec2(0, 4)); - next2[1] = imageLoad(cur[idx], pos - ivec2(0, 2)); - next2[2] = imageLoad(cur[idx], pos); - next2[3] = imageLoad(cur[idx], pos + ivec2(0, 2)); - next2[4] = imageLoad(cur[idx], pos + ivec2(0, 4)); - } else { - prev2[0] = imageLoad(cur[idx], pos - ivec2(0, 4)); - prev2[1] = imageLoad(cur[idx], pos - ivec2(0, 2)); - prev2[2] = imageLoad(cur[idx], pos); - prev2[3] = imageLoad(cur[idx], pos + ivec2(0, 2)); - prev2[4] = imageLoad(cur[idx], pos + ivec2(0, 4)); - - next2[0] = imageLoad(next[idx], pos - ivec2(0, 4)); - next2[1] = imageLoad(next[idx], pos - ivec2(0, 2)); - next2[2] = imageLoad(next[idx], pos); - next2[3] = imageLoad(next[idx], pos + ivec2(0, 2)); - next2[4] = imageLoad(next[idx], pos + ivec2(0, 4)); - } - - imageStore(dst[idx], pos, process_line(prev2, prev1, dcur, next1, next2)); -} -- 2.49.1 From c38b0cbce5614088c0f599da96ed798b2e67b9e3 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:10:58 +0000 Subject: [PATCH 038/118] Changing vulkan file directory --- libavfilter/vulkan/debayer.comp | 102 -------------------------------- 1 file changed, 102 deletions(-) delete mode 100644 libavfilter/vulkan/debayer.comp diff --git a/libavfilter/vulkan/debayer.comp b/libavfilter/vulkan/debayer.comp deleted file mode 100644 index c86c2f5eec..0000000000 --- a/libavfilter/vulkan/debayer.comp +++ /dev/null @@ -1,102 +0,0 @@ -/* - * - * Copyright (c) 2025 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#define LD(xo, yo) \ - (imageLoad(input_img[0], pos + ivec2((xo), (yo))).r) - -void debayer_bilinear(void) -{ - ivec2 pos = ivec2(gl_GlobalInvocationID.xy) << 1; - - /* R basis */ - vec4 tl = vec4(LD(0, 0), - (LD(1, 0) + LD(-1, 0) + LD(0, 1) + LD(0, -1)) / 4.0f, - (LD(-1, -1) + LD(1, 1) + LD(-1, 1) + LD(1, -1)) / 4.0f, - 1.0f); - imageStore(output_img[0], pos, tl); - - /* G1 basis */ - vec4 tr = vec4((LD(2, 0) + LD(0, 0)) / 2.0f, - LD(1, 0), - (LD(1, 1) + LD(1, -1)) / 2.0f, - 1.0f); - imageStore(output_img[0], pos + ivec2(1, 0), tr); - - /* G2 basis */ - vec4 bl = vec4((LD(0, 2) + LD(0, 0)) / 2.0f, - LD(0, 1), - (LD(1, 1) + LD(-1, 1)) / 2.0f, - 1.0f); - imageStore(output_img[0], pos + ivec2(0, 1), bl); - - /* B basis */ - vec4 br = vec4((LD(0, 0) + LD(2, 2) + LD(0, 2) + LD(2, 0)) / 4.0f, - (LD(2, 1) + LD(0, 1) + LD(1, 2) + LD(1, 0)) / 4.0f, - LD(1, 1), - 1.0f); - imageStore(output_img[0], pos + ivec2(1, 1), br); -} - -void debayer_bilinear_hq(void) -{ - ivec2 pos = ivec2(gl_GlobalInvocationID.xy) << 1; - - /* R basis */ - vec4 tl = vec4(LD(0, 0), - (4.0f*LD(0, 0) + 2.0f*(LD(0, -1) + LD(0, 1) + LD(-1, 0) + LD(1, 0)) - - (LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 8.0f, - (12.0f*LD(0, 0) + 4.0f*(LD(-1, -1) + LD(-1, 1) + LD(1, -1) + LD(1, 1)) - - 3.0f*(LD(0, -2) + LD(0, 2) + LD(-2, 0) + LD(2, 0))) / 16.0f, - 1.0f); - imageStore(output_img[0], pos, tl); - - /* G1 basis */ - vec4 tr = vec4((10.0f*LD(1, 0) + 8.0f*(LD(0, 0) + LD(2, 0)) - - 2.0f*(LD(0, -1) + LD(2, 1) + LD(0, 1) + LD(2, -1) + LD(-1, 0) + LD(3, 0)) + - LD(1, -2) + LD(1, 2)) / 16.0f, - LD(1, 0), - (10.0f*LD(1, 0) + 8.0f*(LD(1, -1) + LD(1, 1)) - - 2.0f*(LD(0, -1) + LD(0, 1) + LD(2, -1) + LD(2, 1) + LD(1, -2) + LD(1, 2)) + - LD(-1, 0) + LD(3, 0)) / 16.0f, - 1.0f); - imageStore(output_img[0], pos + ivec2(1, 0), tr); - - - /* G2 basis */ - vec4 bl = vec4((10.0f*LD(0, 1) + 8.0f*(LD(0, 0) + LD(0, 2)) - - 2.0f*(LD(-1, 0) + LD(-1, 2) + LD(1, 0) + LD(1, 2) + LD(0, -1) + LD(0, 3)) + - LD(-2, 1) + LD(2, 1)) / 16.0f, - LD(0, 1), - (10.0f*LD(0, 1) + 8.0f*(LD(-1, 1) + LD(1, 1)) - - 2.0f*(LD(-1, 0) + LD(1, 2) + LD(-1, 2) + LD(1, 0) + LD(-2, 1) + LD(2, 1)) + - LD(0, -1) + LD(0, 3)) / 16.0f, - 1.0f); - imageStore(output_img[0], pos + ivec2(0, 1), bl); - - /* B basis */ - vec4 br = vec4((12.0f*LD(1, 1) + 4.0f*(LD(0, 0) + LD(0, 2) + LD(2, 0) + LD(2, 2)) - - 3.0f*(LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 16.0f, - (4.0f*LD(1, 1) + 2.0f*(LD(1, 0) + LD(1, 2) + LD(0, 1) + LD(2, 1)) - - (LD(1, -1) + LD(1, 3) + LD(-1, 1) + LD(3, 1))) / 8.0f, - LD(1, 1), - 1.0f); - imageStore(output_img[0], pos + ivec2(1, 1), br); -} -- 2.49.1 From 205ac1c6183973e11f82feeef9743777dc5c9b94 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:14:07 +0000 Subject: [PATCH 039/118] Changing vulkan file directory --- libavutil/hwcontext_vulkan.c | 4790 ---------------------------------- 1 file changed, 4790 deletions(-) delete mode 100644 libavutil/hwcontext_vulkan.c diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c deleted file mode 100644 index 47f894f75f..0000000000 --- a/libavutil/hwcontext_vulkan.c +++ /dev/null @@ -1,4790 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#define VK_NO_PROTOTYPES -#define VK_ENABLE_BETA_EXTENSIONS - -#ifdef _WIN32 -#include <windows.h> /* Included to prevent conflicts with CreateSemaphore */ -#include <versionhelpers.h> -#include "compat/w32dlfcn.h" -#else -#include <dlfcn.h> -#include <unistd.h> -#endif - -#include "thread.h" - -#include "config.h" -#include "pixdesc.h" -#include "avstring.h" -#include "imgutils.h" -#include "hwcontext.h" -#include "hwcontext_internal.h" -#include "hwcontext_vulkan.h" -#include "mem.h" - -#include "vulkan.h" -#include "vulkan_loader.h" - -#if CONFIG_VAAPI -#include "hwcontext_vaapi.h" -#endif - -#if CONFIG_LIBDRM -#if CONFIG_VAAPI -#include <va/va_drmcommon.h> -#endif -#ifdef __linux__ -#include <sys/sysmacros.h> -#endif -#include <sys/stat.h> -#include <xf86drm.h> -#include <drm_fourcc.h> -#include "hwcontext_drm.h" -#endif - -#if HAVE_LINUX_DMA_BUF_H -#include <sys/ioctl.h> -#include <linux/dma-buf.h> -#endif - -#if CONFIG_CUDA -#include "hwcontext_cuda_internal.h" -#include "cuda_check.h" -#define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) -#endif - -typedef struct VulkanDeviceFeatures { - VkPhysicalDeviceFeatures2 device; - - VkPhysicalDeviceVulkan11Features vulkan_1_1; - VkPhysicalDeviceVulkan12Features vulkan_1_2; - VkPhysicalDeviceVulkan13Features vulkan_1_3; - VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore; - VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR subgroup_rotate; - VkPhysicalDeviceHostImageCopyFeaturesEXT host_image_copy; - -#ifdef VK_KHR_shader_expect_assume - VkPhysicalDeviceShaderExpectAssumeFeaturesKHR expect_assume; -#endif - - VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maintenance_1; -#ifdef VK_KHR_video_maintenance2 - VkPhysicalDeviceVideoMaintenance2FeaturesKHR video_maintenance_2; -#endif -#ifdef VK_KHR_video_decode_vp9 - VkPhysicalDeviceVideoDecodeVP9FeaturesKHR vp9_decode; -#endif -#ifdef VK_KHR_video_encode_av1 - VkPhysicalDeviceVideoEncodeAV1FeaturesKHR av1_encode; -#endif - - VkPhysicalDeviceShaderObjectFeaturesEXT shader_object; - VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperative_matrix; - VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptor_buffer; - VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float; - -#ifdef VK_KHR_shader_relaxed_extended_instruction - VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR relaxed_extended_instruction; -#endif -} VulkanDeviceFeatures; - -typedef struct VulkanDevicePriv { - /** - * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it. - */ - AVVulkanDeviceContext p; - - /* Vulkan library and loader functions */ - void *libvulkan; - - FFVulkanContext vkctx; - AVVulkanDeviceQueueFamily *compute_qf; - AVVulkanDeviceQueueFamily *transfer_qf; - - /* Properties */ - VkPhysicalDeviceProperties2 props; - VkPhysicalDeviceMemoryProperties mprops; - VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; - VkPhysicalDeviceDriverProperties dprops; - - /* Opaque FD external semaphore properties */ - VkExternalSemaphoreProperties ext_sem_props_opaque; - - /* Enabled features */ - VulkanDeviceFeatures feats; - - /* Queues */ - pthread_mutex_t **qf_mutex; - uint32_t nb_tot_qfs; - uint32_t img_qfs[64]; - uint32_t nb_img_qfs; - - /* Debug callback */ - VkDebugUtilsMessengerEXT debug_ctx; - - /* Settings */ - int use_linear_images; - - /* Option to allocate all image planes in a single allocation */ - int contiguous_planes; - - /* Disable multiplane images */ - int disable_multiplane; - - /* Disable host image transfer */ - int disable_host_transfer; - - /* Prefer memcpy over dynamic host pointer imports */ - int avoid_host_import; - - /* Maximum queues */ - int limit_queues; -} VulkanDevicePriv; - -typedef struct VulkanFramesPriv { - /** - * The public AVVulkanFramesContext. See hwcontext_vulkan.h for it. - */ - AVVulkanFramesContext p; - - /* Image conversions */ - FFVkExecPool compute_exec; - - /* Image transfers */ - FFVkExecPool upload_exec; - FFVkExecPool download_exec; - - /* Temporary buffer pools */ - AVBufferPool *tmp; - - /* Modifier info list to free at uninit */ - VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; - - /* Properties for DRM modifier for each plane in the image */ - VkDrmFormatModifierPropertiesEXT drm_format_modifier_properties[5]; -} VulkanFramesPriv; - -typedef struct AVVkFrameInternal { - pthread_mutex_t update_mutex; - -#if CONFIG_CUDA - /* Importing external memory into cuda is really expensive so we keep the - * memory imported all the time */ - AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */ - CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS]; - CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS]; - CUarray cu_array[AV_NUM_DATA_POINTERS]; - CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS]; -#ifdef _WIN32 - HANDLE ext_mem_handle[AV_NUM_DATA_POINTERS]; - HANDLE ext_sem_handle[AV_NUM_DATA_POINTERS]; -#endif -#endif -} AVVkFrameInternal; - -/* Initialize all structs in VulkanDeviceFeatures */ -static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *feats) -{ - VulkanDevicePriv *p = ctx->hwctx; - FFVulkanContext *s = &p->vkctx; - - feats->device = (VkPhysicalDeviceFeatures2) { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, - }; - - FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_1, FF_VK_EXT_NO_FLAG, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES); - FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_2, FF_VK_EXT_NO_FLAG, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES); - FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_3, FF_VK_EXT_NO_FLAG, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES); - - FF_VK_STRUCT_EXT(s, &feats->device, &feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES); - FF_VK_STRUCT_EXT(s, &feats->device, &feats->subgroup_rotate, FF_VK_EXT_SUBGROUP_ROTATE, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_ROTATE_FEATURES_KHR); - FF_VK_STRUCT_EXT(s, &feats->device, &feats->host_image_copy, FF_VK_EXT_HOST_IMAGE_COPY, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT); - -#ifdef VK_KHR_shader_expect_assume - FF_VK_STRUCT_EXT(s, &feats->device, &feats->expect_assume, FF_VK_EXT_EXPECT_ASSUME, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EXPECT_ASSUME_FEATURES_KHR); -#endif - - FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_1, FF_VK_EXT_VIDEO_MAINTENANCE_1, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR); -#ifdef VK_KHR_video_maintenance2 - FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_2, FF_VK_EXT_VIDEO_MAINTENANCE_2, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_2_FEATURES_KHR); -#endif -#ifdef VK_KHR_video_decode_vp9 - FF_VK_STRUCT_EXT(s, &feats->device, &feats->vp9_decode, FF_VK_EXT_VIDEO_DECODE_VP9, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR); -#endif -#ifdef VK_KHR_video_encode_av1 - FF_VK_STRUCT_EXT(s, &feats->device, &feats->av1_encode, FF_VK_EXT_VIDEO_ENCODE_AV1, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_AV1_FEATURES_KHR); -#endif - - FF_VK_STRUCT_EXT(s, &feats->device, &feats->shader_object, FF_VK_EXT_SHADER_OBJECT, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT); - FF_VK_STRUCT_EXT(s, &feats->device, &feats->cooperative_matrix, FF_VK_EXT_COOP_MATRIX, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR); - FF_VK_STRUCT_EXT(s, &feats->device, &feats->descriptor_buffer, FF_VK_EXT_DESCRIPTOR_BUFFER, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT); - FF_VK_STRUCT_EXT(s, &feats->device, &feats->atomic_float, FF_VK_EXT_ATOMIC_FLOAT, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT); - -#ifdef VK_KHR_shader_relaxed_extended_instruction - FF_VK_STRUCT_EXT(s, &feats->device, &feats->relaxed_extended_instruction, FF_VK_EXT_RELAXED_EXTENDED_INSTR, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR); -#endif -} - -/* Copy all needed device features */ -static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceFeatures *src) -{ -#define COPY_VAL(VAL) \ - do { \ - dst->VAL = src->VAL; \ - } while (0) \ - - COPY_VAL(device.features.shaderImageGatherExtended); - COPY_VAL(device.features.shaderStorageImageReadWithoutFormat); - COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat); - COPY_VAL(device.features.fragmentStoresAndAtomics); - COPY_VAL(device.features.vertexPipelineStoresAndAtomics); - COPY_VAL(device.features.shaderInt64); - COPY_VAL(device.features.shaderInt16); - COPY_VAL(device.features.shaderFloat64); - COPY_VAL(device.features.shaderStorageImageReadWithoutFormat); - COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat); - - COPY_VAL(vulkan_1_1.samplerYcbcrConversion); - COPY_VAL(vulkan_1_1.storagePushConstant16); - COPY_VAL(vulkan_1_1.storageBuffer16BitAccess); - COPY_VAL(vulkan_1_1.uniformAndStorageBuffer16BitAccess); - - COPY_VAL(vulkan_1_2.timelineSemaphore); - COPY_VAL(vulkan_1_2.scalarBlockLayout); - COPY_VAL(vulkan_1_2.bufferDeviceAddress); - COPY_VAL(vulkan_1_2.hostQueryReset); - COPY_VAL(vulkan_1_2.storagePushConstant8); - COPY_VAL(vulkan_1_2.shaderInt8); - COPY_VAL(vulkan_1_2.storageBuffer8BitAccess); - COPY_VAL(vulkan_1_2.uniformAndStorageBuffer8BitAccess); - COPY_VAL(vulkan_1_2.shaderFloat16); - COPY_VAL(vulkan_1_2.shaderBufferInt64Atomics); - COPY_VAL(vulkan_1_2.shaderSharedInt64Atomics); - COPY_VAL(vulkan_1_2.vulkanMemoryModel); - COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope); - COPY_VAL(vulkan_1_2.uniformBufferStandardLayout); - - COPY_VAL(vulkan_1_3.dynamicRendering); - COPY_VAL(vulkan_1_3.maintenance4); - COPY_VAL(vulkan_1_3.synchronization2); - COPY_VAL(vulkan_1_3.computeFullSubgroups); - COPY_VAL(vulkan_1_3.subgroupSizeControl); - COPY_VAL(vulkan_1_3.shaderZeroInitializeWorkgroupMemory); - COPY_VAL(vulkan_1_3.dynamicRendering); - - COPY_VAL(timeline_semaphore.timelineSemaphore); - COPY_VAL(subgroup_rotate.shaderSubgroupRotate); - COPY_VAL(host_image_copy.hostImageCopy); - - COPY_VAL(video_maintenance_1.videoMaintenance1); -#ifdef VK_KHR_video_maintenance2 - COPY_VAL(video_maintenance_2.videoMaintenance2); -#endif - -#ifdef VK_KHR_video_decode_vp9 - COPY_VAL(vp9_decode.videoDecodeVP9); -#endif - -#ifdef VK_KHR_video_encode_av1 - COPY_VAL(av1_encode.videoEncodeAV1); -#endif - - COPY_VAL(shader_object.shaderObject); - - COPY_VAL(cooperative_matrix.cooperativeMatrix); - - COPY_VAL(descriptor_buffer.descriptorBuffer); - COPY_VAL(descriptor_buffer.descriptorBufferPushDescriptors); - - COPY_VAL(atomic_float.shaderBufferFloat32Atomics); - COPY_VAL(atomic_float.shaderBufferFloat32AtomicAdd); - -#ifdef VK_KHR_shader_relaxed_extended_instruction - COPY_VAL(relaxed_extended_instruction.shaderRelaxedExtendedInstruction); -#endif - -#ifdef VK_KHR_shader_expect_assume - COPY_VAL(expect_assume.shaderExpectAssume); -#endif - -#undef COPY_VAL -} - -#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) -#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) - -static const struct FFVkFormatEntry { - VkFormat vkf; - enum AVPixelFormat pixfmt; - VkImageAspectFlags aspect; - int vk_planes; - int nb_images; - int nb_images_fallback; - const VkFormat fallback[5]; -} vk_formats_list[] = { - /* Gray formats */ - { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY12, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY14, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R32_UINT, AV_PIX_FMT_GRAY32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_UINT } }, - { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } }, - - /* RGB formats */ - { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, - { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, - { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } }, - { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } }, - { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } }, - { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, - { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, - { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, - { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, - { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, - { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, - { VK_FORMAT_A2B10G10R10_UNORM_PACK32, AV_PIX_FMT_X2BGR10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2B10G10R10_UNORM_PACK32 } }, - { VK_FORMAT_R32G32B32_SFLOAT, AV_PIX_FMT_RGBF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_SFLOAT } }, - { VK_FORMAT_R32G32B32A32_SFLOAT, AV_PIX_FMT_RGBAF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_SFLOAT } }, - { VK_FORMAT_R32G32B32_UINT, AV_PIX_FMT_RGB96, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_UINT } }, - { VK_FORMAT_R32G32B32A32_UINT, AV_PIX_FMT_RGBA128, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_UINT } }, - - /* Planar RGB */ - { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRP, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP10, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP12, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP14, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP16, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, - - /* Planar RGB + Alpha */ - { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP14, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R32_UINT, AV_PIX_FMT_GBRAP32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT } }, - { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, - - /* Bayer */ - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_BAYER_RGGB16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, - - /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ - { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - - /* Two-plane 422 YUV at 8, 10 and 16 bits */ - { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - - /* Two-plane 444 YUV at 8, 10 and 16 bits */ - { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, - { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, - - /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ - { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - /* Single plane 422 at 8, 10, 12 and 16 bits */ - { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, - { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, - { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, - { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, - { VK_FORMAT_G16B16G16R16_422_UNORM, AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, - - /* Planar YUVA 420 at 8, 10 and 16 bits */ - { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA420P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA420P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA420P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - /* Planar YUVA 422 at 8, 10, 12 and 16 bits */ - { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA422P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - /* Planar YUVA 444 at 8, 10, 12 and 16 bits */ - { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA444P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, - - /* Single plane 444 at 8, 10, 12 and 16 bits */ - { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_UYVA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, - { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, - { VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, - { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, -}; -static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list); - -const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) -{ - for (int i = 0; i < nb_vk_formats_list; i++) - if (vk_formats_list[i].pixfmt == p) - return vk_formats_list[i].fallback; - return NULL; -} - -static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p) -{ - for (int i = 0; i < nb_vk_formats_list; i++) - if (vk_formats_list[i].pixfmt == p) - return &vk_formats_list[i]; - return NULL; -} - -static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, - VkImageTiling tiling, - VkFormat fmts[AV_NUM_DATA_POINTERS], /* Output format list */ - int *nb_images, /* Output number of images */ - VkImageAspectFlags *aspect, /* Output aspect */ - VkImageUsageFlags *supported_usage, /* Output supported usage */ - int disable_multiplane, int need_storage) -{ - VulkanDevicePriv *priv = dev_ctx->hwctx; - AVVulkanDeviceContext *hwctx = &priv->p; - FFVulkanFunctions *vk = &priv->vkctx.vkfn; - - const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | - VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | - VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; - - for (int i = 0; i < nb_vk_formats_list; i++) { - if (vk_formats_list[i].pixfmt == p) { - VkFormatProperties3 fprops = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3, - }; - VkFormatProperties2 prop = { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - .pNext = &fprops, - }; - VkFormatFeatureFlagBits2 feats_primary, feats_secondary; - int basics_primary = 0, basics_secondary = 0; - int storage_primary = 0, storage_secondary = 0; - - vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, - vk_formats_list[i].vkf, - &prop); - - feats_primary = tiling == VK_IMAGE_TILING_LINEAR ? - fprops.linearTilingFeatures : fprops.optimalTilingFeatures; - basics_primary = (feats_primary & basic_flags) == basic_flags; - storage_primary = !!(feats_primary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); - - if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) { - vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, - vk_formats_list[i].fallback[0], - &prop); - feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ? - fprops.linearTilingFeatures : fprops.optimalTilingFeatures; - basics_secondary = (feats_secondary & basic_flags) == basic_flags; - storage_secondary = !!(feats_secondary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); - } else { - basics_secondary = basics_primary; - storage_secondary = storage_primary; - } - - if (basics_primary && - !(disable_multiplane && vk_formats_list[i].vk_planes > 1) && - (!need_storage || (need_storage && (storage_primary | storage_secondary)))) { - if (fmts) { - if (vk_formats_list[i].nb_images > 1) { - for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++) - fmts[j] = vk_formats_list[i].fallback[j]; - } else { - fmts[0] = vk_formats_list[i].vkf; - } - } - if (nb_images) - *nb_images = 1; - if (aspect) - *aspect = vk_formats_list[i].aspect; - if (supported_usage) - *supported_usage = ff_vk_map_feats_to_usage(feats_primary) | - ((need_storage && (storage_primary | storage_secondary)) ? - VK_IMAGE_USAGE_STORAGE_BIT : 0); - return 0; - } else if (basics_secondary && - (!need_storage || (need_storage && storage_secondary))) { - if (fmts) { - for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++) - fmts[j] = vk_formats_list[i].fallback[j]; - } - if (nb_images) - *nb_images = vk_formats_list[i].nb_images_fallback; - if (aspect) - *aspect = vk_formats_list[i].aspect; - if (supported_usage) - *supported_usage = ff_vk_map_feats_to_usage(feats_secondary); - return 0; - } else { - return AVERROR(ENOTSUP); - } - } - } - - return AVERROR(EINVAL); -} - -#if CONFIG_VULKAN_STATIC -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, - const char *pName); -#endif - -static int load_libvulkan(AVHWDeviceContext *ctx) -{ - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - -#if CONFIG_VULKAN_STATIC - hwctx->get_proc_addr = vkGetInstanceProcAddr; -#else - static const char *lib_names[] = { -#if defined(_WIN32) - "vulkan-1.dll", -#elif defined(__APPLE__) - "libvulkan.dylib", - "libvulkan.1.dylib", - "libMoltenVK.dylib", -#else - "libvulkan.so.1", - "libvulkan.so", -#endif - }; - - for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) { - p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL); - if (p->libvulkan) - break; - } - - if (!p->libvulkan) { - av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n"); - return AVERROR_UNKNOWN; - } - - hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr"); -#endif /* CONFIG_VULKAN_STATIC */ - - return 0; -} - -typedef struct VulkanOptExtension { - const char *name; - FFVulkanExtensions flag; -} VulkanOptExtension; - -static const VulkanOptExtension optional_instance_exts[] = { - { VK_EXT_LAYER_SETTINGS_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, -#ifdef __APPLE__ - { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, -#endif -}; - -static const VulkanOptExtension optional_device_exts[] = { - /* Misc or required by other extensions */ - { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_PORTABILITY_SUBSET }, - { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR }, - { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER }, - { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, - { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, - { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, - { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, - { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE }, -#ifdef VK_KHR_shader_expect_assume - { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME }, -#endif - { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 }, -#ifdef VK_KHR_video_maintenance2 - { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 }, -#endif - - /* Imports/exports */ - { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, - { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY }, - { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS }, - { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM }, - { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY }, -#ifdef _WIN32 - { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, - { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, -#endif - - /* Video encoding/decoding */ - { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, - { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, - { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, - { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, - { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, - { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, - { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, -#ifdef VK_KHR_video_decode_vp9 - { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_VP9 }, -#endif -#ifdef VK_KHR_video_encode_av1 - { VK_KHR_VIDEO_ENCODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_AV1 }, -#endif - { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 }, -}; - -static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, - VkDebugUtilsMessageTypeFlagsEXT messageType, - const VkDebugUtilsMessengerCallbackDataEXT *data, - void *priv) -{ - int l; - AVHWDeviceContext *ctx = priv; - - /* Ignore false positives */ - switch (data->messageIdNumber) { - case 0x086974c1: /* BestPractices-vkCreateCommandPool-command-buffer-reset */ - case 0xfd92477a: /* BestPractices-vkAllocateMemory-small-allocation */ - case 0x618ab1e7: /* VUID-VkImageViewCreateInfo-usage-02275 */ - case 0x30f4ac70: /* VUID-VkImageCreateInfo-pNext-06811 */ - return VK_FALSE; - default: - break; - } - - switch (severity) { - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break; - default: l = AV_LOG_DEBUG; break; - } - - av_log(ctx, l, "%s\n", data->pMessage); - for (int i = 0; i < data->cmdBufLabelCount; i++) - av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName); - - return VK_FALSE; -} - -#define ADD_VAL_TO_LIST(list, count, val) \ - do { \ - list = av_realloc_array(list, sizeof(*list), ++count); \ - if (!list) { \ - err = AVERROR(ENOMEM); \ - goto fail; \ - } \ - list[count - 1] = av_strdup(val); \ - if (!list[count - 1]) { \ - err = AVERROR(ENOMEM); \ - goto fail; \ - } \ - } while(0) - -#define RELEASE_PROPS(props, count) \ - if (props) { \ - for (int i = 0; i < count; i++) \ - av_free((void *)((props)[i])); \ - av_free((void *)props); \ - } - -enum FFVulkanDebugMode { - FF_VULKAN_DEBUG_NONE = 0, - /* Standard GPU-assisted validation */ - FF_VULKAN_DEBUG_VALIDATE = 1, - /* Passes printfs in shaders to the debug callback */ - FF_VULKAN_DEBUG_PRINTF = 2, - /* Enables extra printouts */ - FF_VULKAN_DEBUG_PRACTICES = 3, - /* Disables validation but keeps shader debug info and optimizations */ - FF_VULKAN_DEBUG_PROFILE = 4, - - FF_VULKAN_DEBUG_NB, -}; - -static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, - const char * const **dst, uint32_t *num, - enum FFVulkanDebugMode debug_mode) -{ - const char *tstr; - const char **extension_names = NULL; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - int err = 0, found, extensions_found = 0; - - const char *mod; - int optional_exts_num; - uint32_t sup_ext_count; - char *user_exts_str = NULL; - AVDictionaryEntry *user_exts; - VkExtensionProperties *sup_ext; - const VulkanOptExtension *optional_exts; - - if (!dev) { - mod = "instance"; - optional_exts = optional_instance_exts; - optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts); - user_exts = av_dict_get(opts, "instance_extensions", NULL, 0); - if (user_exts) { - user_exts_str = av_strdup(user_exts->value); - if (!user_exts_str) { - err = AVERROR(ENOMEM); - goto fail; - } - } - vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL); - sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); - if (!sup_ext) - return AVERROR(ENOMEM); - vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext); - } else { - mod = "device"; - optional_exts = optional_device_exts; - optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts); - user_exts = av_dict_get(opts, "device_extensions", NULL, 0); - if (user_exts) { - user_exts_str = av_strdup(user_exts->value); - if (!user_exts_str) { - err = AVERROR(ENOMEM); - goto fail; - } - } - vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, - &sup_ext_count, NULL); - sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); - if (!sup_ext) - return AVERROR(ENOMEM); - vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, - &sup_ext_count, sup_ext); - } - - for (int i = 0; i < optional_exts_num; i++) { - tstr = optional_exts[i].name; - found = 0; - - /* Intel has had a bad descriptor buffer implementation for a while */ - if (p->dprops.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA && - !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) - continue; - - if (dev && - ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) || - (debug_mode == FF_VULKAN_DEBUG_PRINTF) || - (debug_mode == FF_VULKAN_DEBUG_PRACTICES)) && - !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) { - continue; - } - - for (int j = 0; j < sup_ext_count; j++) { - if (!strcmp(tstr, sup_ext[j].extensionName)) { - found = 1; - break; - } - } - if (!found) - continue; - - av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); - p->vkctx.extensions |= optional_exts[i].flag; - ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); - } - - if (!dev && - ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) || - (debug_mode == FF_VULKAN_DEBUG_PRINTF) || - (debug_mode == FF_VULKAN_DEBUG_PRACTICES))) { - tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; - found = 0; - for (int j = 0; j < sup_ext_count; j++) { - if (!strcmp(tstr, sup_ext[j].extensionName)) { - found = 1; - break; - } - } - if (found) { - av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); - ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); - } else { - av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n", - tstr); - err = AVERROR(EINVAL); - goto fail; - } - } - -#ifdef VK_KHR_shader_relaxed_extended_instruction - if (((debug_mode == FF_VULKAN_DEBUG_PRINTF) || - (debug_mode == FF_VULKAN_DEBUG_PROFILE)) && dev) { - tstr = VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME; - found = 0; - for (int j = 0; j < sup_ext_count; j++) { - if (!strcmp(tstr, sup_ext[j].extensionName)) { - found = 1; - break; - } - } - if (found) { - av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); - ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); - } else { - av_log(ctx, AV_LOG_ERROR, "Debug_printf/profile enabled, but extension \"%s\" not found!\n", - tstr); - err = AVERROR(EINVAL); - goto fail; - } - } -#endif - - if (user_exts_str) { - char *save, *token = av_strtok(user_exts_str, "+", &save); - while (token) { - found = 0; - for (int j = 0; j < sup_ext_count; j++) { - if (!strcmp(token, sup_ext[j].extensionName)) { - found = 1; - break; - } - } - if (found) { - av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token); - ADD_VAL_TO_LIST(extension_names, extensions_found, token); - } else { - av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n", - mod, token); - } - token = av_strtok(NULL, "+", &save); - } - } - - *dst = extension_names; - *num = extensions_found; - - av_free(user_exts_str); - av_free(sup_ext); - return 0; - -fail: - RELEASE_PROPS(extension_names, extensions_found); - av_free(user_exts_str); - av_free(sup_ext); - return err; -} - -static int check_layers(AVHWDeviceContext *ctx, AVDictionary *opts, - const char * const **dst, uint32_t *num, - enum FFVulkanDebugMode *debug_mode) -{ - int err = 0; - VulkanDevicePriv *priv = ctx->hwctx; - FFVulkanFunctions *vk = &priv->vkctx.vkfn; - - static const char layer_standard_validation[] = { "VK_LAYER_KHRONOS_validation" }; - int layer_standard_validation_found = 0; - - uint32_t sup_layer_count; - VkLayerProperties *sup_layers; - - AVDictionaryEntry *user_layers = av_dict_get(opts, "layers", NULL, 0); - char *user_layers_str = NULL; - char *save, *token; - - const char **enabled_layers = NULL; - uint32_t enabled_layers_count = 0; - - AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0); - enum FFVulkanDebugMode mode; - - *debug_mode = mode = FF_VULKAN_DEBUG_NONE; - - /* Get a list of all layers */ - vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL); - sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties)); - if (!sup_layers) - return AVERROR(ENOMEM); - vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers); - - av_log(ctx, AV_LOG_VERBOSE, "Supported layers:\n"); - for (int i = 0; i < sup_layer_count; i++) - av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName); - - /* If no user layers or debug layers are given, return */ - if (!debug_opt && !user_layers) - goto end; - - /* Check for any properly supported validation layer */ - if (debug_opt) { - if (!strcmp(debug_opt->value, "profile")) { - mode = FF_VULKAN_DEBUG_PROFILE; - } else if (!strcmp(debug_opt->value, "printf")) { - mode = FF_VULKAN_DEBUG_PRINTF; - } else if (!strcmp(debug_opt->value, "validate")) { - mode = FF_VULKAN_DEBUG_VALIDATE; - } else if (!strcmp(debug_opt->value, "practices")) { - mode = FF_VULKAN_DEBUG_PRACTICES; - } else { - char *end_ptr = NULL; - int idx = strtol(debug_opt->value, &end_ptr, 10); - if (end_ptr == debug_opt->value || end_ptr[0] != '\0' || - idx < 0 || idx >= FF_VULKAN_DEBUG_NB) { - av_log(ctx, AV_LOG_ERROR, "Invalid debugging mode \"%s\"\n", - debug_opt->value); - err = AVERROR(EINVAL); - goto end; - } - mode = idx; - } - } - - /* If mode is VALIDATE or PRINTF, try to find the standard validation layer extension */ - if ((mode == FF_VULKAN_DEBUG_VALIDATE) || - (mode == FF_VULKAN_DEBUG_PRINTF) || - (mode == FF_VULKAN_DEBUG_PRACTICES)) { - for (int i = 0; i < sup_layer_count; i++) { - if (!strcmp(layer_standard_validation, sup_layers[i].layerName)) { - av_log(ctx, AV_LOG_VERBOSE, "Standard validation layer %s is enabled\n", - layer_standard_validation); - ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, layer_standard_validation); - *debug_mode = mode; - layer_standard_validation_found = 1; - break; - } - } - if (!layer_standard_validation_found) { - av_log(ctx, AV_LOG_ERROR, - "Validation Layer \"%s\" not supported\n", layer_standard_validation); - err = AVERROR(ENOTSUP); - goto end; - } - } else if (mode == FF_VULKAN_DEBUG_PROFILE) { - *debug_mode = mode; - } - - /* Process any custom layers enabled */ - if (user_layers) { - int found; - - user_layers_str = av_strdup(user_layers->value); - if (!user_layers_str) { - err = AVERROR(ENOMEM); - goto fail; - } - - token = av_strtok(user_layers_str, "+", &save); - while (token) { - found = 0; - - /* If debug=1/2 was specified as an option, skip this layer */ - if (!strcmp(layer_standard_validation, token) && layer_standard_validation_found) { - token = av_strtok(NULL, "+", &save); - break; - } - - /* Try to find the layer in the list of supported layers */ - for (int j = 0; j < sup_layer_count; j++) { - if (!strcmp(token, sup_layers[j].layerName)) { - found = 1; - break; - } - } - - if (found) { - av_log(ctx, AV_LOG_VERBOSE, "Using layer: %s\n", token); - ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token); - - /* If debug was not set as an option, force it */ - if (!strcmp(layer_standard_validation, token)) - *debug_mode = FF_VULKAN_DEBUG_VALIDATE; - } else { - av_log(ctx, AV_LOG_ERROR, - "Layer \"%s\" not supported\n", token); - err = AVERROR(EINVAL); - goto end; - } - - token = av_strtok(NULL, "+", &save); - } - } - -fail: -end: - av_free(sup_layers); - av_free(user_layers_str); - - if (err < 0) { - RELEASE_PROPS(enabled_layers, enabled_layers_count); - } else { - *dst = enabled_layers; - *num = enabled_layers_count; - } - - return err; -} - -/* Creates a VkInstance */ -static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts, - enum FFVulkanDebugMode *debug_mode) -{ - int err = 0; - VkResult ret; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - VkApplicationInfo application_info = { - .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, - .pApplicationName = "ffmpeg", - .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, - LIBAVUTIL_VERSION_MINOR, - LIBAVUTIL_VERSION_MICRO), - .pEngineName = "libavutil", - .apiVersion = VK_API_VERSION_1_3, - .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, - LIBAVUTIL_VERSION_MINOR, - LIBAVUTIL_VERSION_MICRO), - }; - VkValidationFeaturesEXT validation_features = { - .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, - }; - VkInstanceCreateInfo inst_props = { - .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, - .pApplicationInfo = &application_info, - }; - - if (!hwctx->get_proc_addr) { - err = load_libvulkan(ctx); - if (err < 0) - return err; - } - - err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0); - if (err < 0) { - av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n"); - return err; - } - - err = check_layers(ctx, opts, &inst_props.ppEnabledLayerNames, - &inst_props.enabledLayerCount, debug_mode); - if (err) - goto fail; - - /* Check for present/missing extensions */ - err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames, - &inst_props.enabledExtensionCount, *debug_mode); - hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames; - hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount; - if (err < 0) - goto fail; - - /* Enable debug features if needed */ - if (*debug_mode == FF_VULKAN_DEBUG_VALIDATE) { - static const VkValidationFeatureEnableEXT feat_list_validate[] = { - VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, - }; - validation_features.pEnabledValidationFeatures = feat_list_validate; - validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_validate); - inst_props.pNext = &validation_features; - } else if (*debug_mode == FF_VULKAN_DEBUG_PRINTF) { - static const VkValidationFeatureEnableEXT feat_list_debug[] = { - VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, - VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT, - }; - validation_features.pEnabledValidationFeatures = feat_list_debug; - validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_debug); - inst_props.pNext = &validation_features; - } else if (*debug_mode == FF_VULKAN_DEBUG_PRACTICES) { - static const VkValidationFeatureEnableEXT feat_list_practices[] = { - VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, - VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, - }; - validation_features.pEnabledValidationFeatures = feat_list_practices; - validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_practices); - inst_props.pNext = &validation_features; - } - -#ifdef __APPLE__ - for (int i = 0; i < inst_props.enabledExtensionCount; i++) { - if (!strcmp(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, - inst_props.ppEnabledExtensionNames[i])) { - inst_props.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; - break; - } - } -#endif - - /* Try to create the instance */ - ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); - - /* Check for errors */ - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0); - if (err < 0) { - av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n"); - goto fail; - } - - /* Setup debugging callback if needed */ - if ((*debug_mode == FF_VULKAN_DEBUG_VALIDATE) || - (*debug_mode == FF_VULKAN_DEBUG_PRINTF) || - (*debug_mode == FF_VULKAN_DEBUG_PRACTICES)) { - VkDebugUtilsMessengerCreateInfoEXT dbg = { - .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, - .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, - .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, - .pfnUserCallback = vk_dbg_callback, - .pUserData = ctx, - }; - - vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg, - hwctx->alloc, &p->debug_ctx); - } - - err = 0; - -fail: - RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount); - return err; -} - -typedef struct VulkanDeviceSelection { - uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */ - int has_uuid; - uint32_t drm_major; /* Will use this second unless !has_drm */ - uint32_t drm_minor; /* Will use this second unless !has_drm */ - uint32_t has_drm; /* has drm node info */ - const char *name; /* Will use this third unless NULL */ - uint32_t pci_device; /* Will use this fourth unless 0x0 */ - uint32_t vendor_id; /* Last resort to find something deterministic */ - int index; /* Finally fall back to index */ -} VulkanDeviceSelection; - -static const char *vk_dev_type(enum VkPhysicalDeviceType type) -{ - switch (type) { - case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated"; - case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete"; - case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual"; - case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software"; - default: return "unknown"; - } -} - -/* Finds a device */ -static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) -{ - int err = 0, choice = -1; - uint32_t num; - VkResult ret; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - VkPhysicalDevice *devices = NULL; - VkPhysicalDeviceIDProperties *idp = NULL; - VkPhysicalDeviceProperties2 *prop = NULL; - VkPhysicalDeviceDriverProperties *driver_prop = NULL; - VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL; - - ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL); - if (ret != VK_SUCCESS || !num) { - av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret)); - return AVERROR(ENODEV); - } - - devices = av_malloc_array(num, sizeof(VkPhysicalDevice)); - if (!devices) - return AVERROR(ENOMEM); - - ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR(ENODEV); - goto end; - } - - prop = av_calloc(num, sizeof(*prop)); - if (!prop) { - err = AVERROR(ENOMEM); - goto end; - } - - idp = av_calloc(num, sizeof(*idp)); - if (!idp) { - err = AVERROR(ENOMEM); - goto end; - } - - driver_prop = av_calloc(num, sizeof(*driver_prop)); - if (!driver_prop) { - err = AVERROR(ENOMEM); - goto end; - } - - if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) { - drm_prop = av_calloc(num, sizeof(*drm_prop)); - if (!drm_prop) { - err = AVERROR(ENOMEM); - goto end; - } - } - - av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n"); - for (int i = 0; i < num; i++) { - if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) { - drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT; - driver_prop[i].pNext = &drm_prop[i]; - } - driver_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; - idp[i].pNext = &driver_prop[i]; - idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; - prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - prop[i].pNext = &idp[i]; - - vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]); - av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, - prop[i].properties.deviceName, - vk_dev_type(prop[i].properties.deviceType), - prop[i].properties.deviceID); - } - - if (select->has_uuid) { - for (int i = 0; i < num; i++) { - if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) { - choice = i; - goto end; - } - } - av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); - err = AVERROR(ENODEV); - goto end; - } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) { - for (int i = 0; i < num; i++) { - if ((select->drm_major == drm_prop[i].primaryMajor && - select->drm_minor == drm_prop[i].primaryMinor) || - (select->drm_major == drm_prop[i].renderMajor && - select->drm_minor == drm_prop[i].renderMinor)) { - choice = i; - goto end; - } - } - av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n", - select->drm_major, select->drm_minor); - err = AVERROR(ENODEV); - goto end; - } else if (select->name) { - av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name); - for (int i = 0; i < num; i++) { - if (strstr(prop[i].properties.deviceName, select->name)) { - choice = i; - goto end; - } - } - av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n", - select->name); - err = AVERROR(ENODEV); - goto end; - } else if (select->pci_device) { - av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device); - for (int i = 0; i < num; i++) { - if (select->pci_device == prop[i].properties.deviceID) { - choice = i; - goto end; - } - } - av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n", - select->pci_device); - err = AVERROR(EINVAL); - goto end; - } else if (select->vendor_id) { - av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id); - for (int i = 0; i < num; i++) { - if (select->vendor_id == prop[i].properties.vendorID) { - choice = i; - goto end; - } - } - av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n", - select->vendor_id); - err = AVERROR(ENODEV); - goto end; - } else { - if (select->index < num) { - choice = select->index; - goto end; - } - av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n", - select->index); - err = AVERROR(ENODEV); - goto end; - } - -end: - if (choice > -1) { - av_log(ctx, AV_LOG_VERBOSE, "Device %d selected: %s (%s) (0x%x)\n", - choice, prop[choice].properties.deviceName, - vk_dev_type(prop[choice].properties.deviceType), - prop[choice].properties.deviceID); - hwctx->phys_dev = devices[choice]; - p->props = prop[choice]; - p->props.pNext = NULL; - p->dprops = driver_prop[choice]; - p->dprops.pNext = NULL; - } - - av_free(devices); - av_free(prop); - av_free(idp); - av_free(drm_prop); - av_free(driver_prop); - - return err; -} - -/* Picks the least used qf with the fewest unneeded flags, or -1 if none found */ -static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf, - VkQueueFlagBits flags) -{ - int index = -1; - uint32_t min_score = UINT32_MAX; - - for (int i = 0; i < num_qf; i++) { - VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags; - - /* Per the spec, reporting transfer caps is optional for these 2 types */ - if ((flags & VK_QUEUE_TRANSFER_BIT) && - (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT))) - qflags |= VK_QUEUE_TRANSFER_BIT; - - if (qflags & flags) { - uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits; - if (score < min_score) { - index = i; - min_score = score; - } - } - } - - if (index > -1) - qf[index].queueFamilyProperties.timestampValidBits++; - - return index; -} - -static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf, - VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf, - VkVideoCodecOperationFlagBitsKHR flags) -{ - int index = -1; - uint32_t min_score = UINT32_MAX; - - for (int i = 0; i < num_qf; i++) { - const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags; - const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations; - - if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR))) - continue; - - if (vflags & flags) { - uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits; - if (score < min_score) { - index = i; - min_score = score; - } - } - } - - if (index > -1) - qf[index].queueFamilyProperties.timestampValidBits++; - - return index; -} - -static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) -{ - uint32_t num; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - - VkQueueFamilyProperties2 *qf = NULL; - VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL; - - /* First get the number of queue families */ - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); - if (!num) { - av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); - return AVERROR_EXTERNAL; - } - - /* Then allocate memory */ - qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2)); - if (!qf) - return AVERROR(ENOMEM); - - qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR)); - if (!qf_vid) - return AVERROR(ENOMEM); - - for (uint32_t i = 0; i < num; i++) { - qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) { - .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, - }; - qf[i] = (VkQueueFamilyProperties2) { - .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, - .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL, - }; - } - - /* Finally retrieve the queue families */ - vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf); - - av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n"); - for (int i = 0; i < num; i++) { - av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s%s (queues: %i)\n", i, - ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "", - ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "", - ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "", - ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "", - ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "", - ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "", - ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ? " optical_flow" : "", - ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "", - qf[i].queueFamilyProperties.queueCount); - - /* We use this field to keep a score of how many times we've used that - * queue family in order to make better choices. */ - qf[i].queueFamilyProperties.timestampValidBits = 0; - } - - hwctx->nb_qf = 0; - - /* Pick each queue family to use. */ -#define PICK_QF(type, vid_op) \ - do { \ - uint32_t i; \ - uint32_t idx; \ - \ - if (vid_op) \ - idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \ - else \ - idx = pick_queue_family(qf, num, type); \ - \ - if (idx == -1) \ - continue; \ - \ - for (i = 0; i < hwctx->nb_qf; i++) { \ - if (hwctx->qf[i].idx == idx) { \ - hwctx->qf[i].flags |= type; \ - hwctx->qf[i].video_caps |= vid_op; \ - break; \ - } \ - } \ - if (i == hwctx->nb_qf) { \ - hwctx->qf[i].idx = idx; \ - hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \ - if (p->limit_queues || \ - p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) { \ - int max = p->limit_queues; \ - if (type == VK_QUEUE_GRAPHICS_BIT) \ - hwctx->qf[i].num = FFMIN(hwctx->qf[i].num, \ - max ? max : 1); \ - else if (max) \ - hwctx->qf[i].num = FFMIN(hwctx->qf[i].num, max); \ - } \ - hwctx->qf[i].flags = type; \ - hwctx->qf[i].video_caps = vid_op; \ - hwctx->nb_qf++; \ - } \ - } while (0) - - PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); - PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); - PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); - - PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR); - PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR); - - PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR); - PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR); - -#ifdef VK_KHR_video_decode_vp9 - PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR); -#endif - -#ifdef VK_KHR_video_encode_av1 - PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); -#endif - PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); - - av_free(qf); - av_free(qf_vid); - -#undef PICK_QF - - cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf, - sizeof(VkDeviceQueueCreateInfo)); - if (!cd->pQueueCreateInfos) - return AVERROR(ENOMEM); - - for (uint32_t i = 0; i < hwctx->nb_qf; i++) { - int dup = 0; - float *weights = NULL; - VkDeviceQueueCreateInfo *pc; - for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) { - if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) { - dup = 1; - break; - } - } - if (dup) - continue; - - weights = av_malloc_array(hwctx->qf[i].num, sizeof(float)); - if (!weights) { - for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) - av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities); - av_free((void *)cd->pQueueCreateInfos); - return AVERROR(ENOMEM); - } - - for (uint32_t j = 0; j < hwctx->qf[i].num; j++) - weights[j] = 1.0; - - pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos; - pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .queueFamilyIndex = hwctx->qf[i].idx, - .queueCount = hwctx->qf[i].num, - .pQueuePriorities = weights, - }; - } - -#if FF_API_VULKAN_FIXED_QUEUES -FF_DISABLE_DEPRECATION_WARNINGS - /* Setup deprecated fields */ - hwctx->queue_family_index = -1; - hwctx->queue_family_comp_index = -1; - hwctx->queue_family_tx_index = -1; - hwctx->queue_family_encode_index = -1; - hwctx->queue_family_decode_index = -1; - -#define SET_OLD_QF(field, nb_field, type) \ - do { \ - if (field < 0 && hwctx->qf[i].flags & type) { \ - field = hwctx->qf[i].idx; \ - nb_field = hwctx->qf[i].num; \ - } \ - } while (0) - - for (uint32_t i = 0; i < hwctx->nb_qf; i++) { - SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT); - SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT); - SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT); - SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); - SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR); - } - -#undef SET_OLD_QF -FF_ENABLE_DEPRECATION_WARNINGS -#endif - - return 0; -} - -/* Only resources created by vulkan_device_create should be released here, - * resources created by vulkan_device_init should be released by - * vulkan_device_uninit, to make sure we don't free user provided resources, - * and there is no leak. - */ -static void vulkan_device_free(AVHWDeviceContext *ctx) -{ - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - - if (hwctx->act_dev) - vk->DestroyDevice(hwctx->act_dev, hwctx->alloc); - - if (p->debug_ctx) - vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx, - hwctx->alloc); - - if (hwctx->inst) - vk->DestroyInstance(hwctx->inst, hwctx->alloc); - - if (p->libvulkan) - dlclose(p->libvulkan); - - RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions); - RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); -} - -static void vulkan_device_uninit(AVHWDeviceContext *ctx) -{ - VulkanDevicePriv *p = ctx->hwctx; - - for (uint32_t i = 0; i < p->nb_tot_qfs; i++) { - pthread_mutex_destroy(p->qf_mutex[i]); - av_freep(&p->qf_mutex[i]); - } - av_freep(&p->qf_mutex); - - ff_vk_uninit(&p->vkctx); -} - -static int vulkan_device_has_rebar(AVHWDeviceContext *ctx) -{ - VulkanDevicePriv *p = ctx->hwctx; - VkDeviceSize max_vram = 0, max_visible_vram = 0; - - /* Get device memory properties */ - for (int i = 0; i < p->mprops.memoryTypeCount; i++) { - const VkMemoryType type = p->mprops.memoryTypes[i]; - const VkMemoryHeap heap = p->mprops.memoryHeaps[type.heapIndex]; - if (!(type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) - continue; - max_vram = FFMAX(max_vram, heap.size); - if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - max_visible_vram = FFMAX(max_visible_vram, heap.size); - } - - return max_vram - max_visible_vram < 1024; /* 1 kB tolerance */ -} - -static int vulkan_device_create_internal(AVHWDeviceContext *ctx, - VulkanDeviceSelection *dev_select, - int disable_multiplane, - AVDictionary *opts, int flags) -{ - int err = 0; - VkResult ret; - AVDictionaryEntry *opt_d; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - enum FFVulkanDebugMode debug_mode = FF_VULKAN_DEBUG_NONE; - VulkanDeviceFeatures supported_feats = { 0 }; - VkDeviceCreateInfo dev_info = { - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - }; - - /* Create an instance if not given one */ - if ((err = create_instance(ctx, opts, &debug_mode))) - goto end; - - /* Find a physical device (if not given one) */ - if ((err = find_device(ctx, dev_select))) - goto end; - - /* Find and enable extensions for the physical device */ - if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames, - &dev_info.enabledExtensionCount, debug_mode))) { - for (int i = 0; i < dev_info.queueCreateInfoCount; i++) - av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities); - av_free((void *)dev_info.pQueueCreateInfos); - goto end; - } - - /* Get supported memory types */ - vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); - - /* Get all supported features for the physical device */ - device_features_init(ctx, &supported_feats); - vk->GetPhysicalDeviceFeatures2(hwctx->phys_dev, &supported_feats.device); - - /* Copy all needed features from those supported and activate them */ - device_features_init(ctx, &p->feats); - device_features_copy_needed(&p->feats, &supported_feats); - dev_info.pNext = p->feats.device.pNext; - dev_info.pEnabledFeatures = &p->feats.device.features; - - /* Setup enabled queue families */ - if ((err = setup_queue_families(ctx, &dev_info))) - goto end; - - /* Finally create the device */ - ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc, - &hwctx->act_dev); - - for (int i = 0; i < dev_info.queueCreateInfoCount; i++) - av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities); - av_free((void *)dev_info.pQueueCreateInfos); - - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n", - ff_vk_ret2str(ret)); - for (int i = 0; i < dev_info.enabledExtensionCount; i++) - av_free((void *)dev_info.ppEnabledExtensionNames[i]); - av_free((void *)dev_info.ppEnabledExtensionNames); - err = AVERROR_EXTERNAL; - goto end; - } - - /* Tiled images setting, use them by default */ - opt_d = av_dict_get(opts, "linear_images", NULL, 0); - if (opt_d) - p->use_linear_images = strtol(opt_d->value, NULL, 10); - - /* Limit queues to a given number if needed */ - opt_d = av_dict_get(opts, "limit_queues", NULL, 0); - if (opt_d) - p->limit_queues = strtol(opt_d->value, NULL, 10); - - /* The disable_multiplane argument takes precedent over the option */ - p->disable_multiplane = disable_multiplane; - if (!p->disable_multiplane) { - opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0); - if (opt_d) - p->disable_multiplane = strtol(opt_d->value, NULL, 10); - } - - /* Disable host pointer imports (by default on nvidia) */ - p->avoid_host_import = p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; - opt_d = av_dict_get(opts, "avoid_host_import", NULL, 0); - if (opt_d) - p->avoid_host_import = strtol(opt_d->value, NULL, 10); - - /* Set the public device feature struct and its pNext chain */ - hwctx->device_features = p->feats.device; - - /* Set the list of all active extensions */ - hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames; - hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount; - - /* The extension lists need to be freed */ - ctx->free = vulkan_device_free; - -end: - return err; -} - -static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index) -{ - VulkanDevicePriv *p = ctx->hwctx; - pthread_mutex_lock(&p->qf_mutex[queue_family][index]); -} - -static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index) -{ - VulkanDevicePriv *p = ctx->hwctx; - pthread_mutex_unlock(&p->qf_mutex[queue_family][index]); -} - -static int vulkan_device_init(AVHWDeviceContext *ctx) -{ - int err = 0; - uint32_t qf_num; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - VkQueueFamilyProperties2 *qf; - VkQueueFamilyVideoPropertiesKHR *qf_vid; - VkPhysicalDeviceExternalSemaphoreInfo ext_sem_props_info; - int graph_index, comp_index, tx_index, enc_index, dec_index; - - /* Set device extension flags */ - for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) { - for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) { - if (!strcmp(hwctx->enabled_dev_extensions[i], - optional_device_exts[j].name)) { - p->vkctx.extensions |= optional_device_exts[j].flag; - break; - } - } - } - - err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1); - if (err < 0) { - av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n"); - return err; - } - - p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - p->props.pNext = &p->hprops; - p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT; - p->hprops.pNext = &p->dprops; - p->dprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; - - vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props); - av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", - p->props.properties.deviceName); - av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); - av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n", - p->props.properties.limits.optimalBufferCopyRowPitchAlignment); - av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n", - p->props.properties.limits.minMemoryMapAlignment); - av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n", - p->props.properties.limits.nonCoherentAtomSize); - if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) - av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n", - p->hprops.minImportedHostPointerAlignment); - - vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL); - if (!qf_num) { - av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); - return AVERROR_EXTERNAL; - } - - ext_sem_props_info = (VkPhysicalDeviceExternalSemaphoreInfo) { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, - }; - - /* Opaque FD semaphore properties */ - ext_sem_props_info.handleType = -#ifdef _WIN32 - IsWindows8OrGreater() - ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT; -#else - VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; -#endif - p->ext_sem_props_opaque.sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES; - vk->GetPhysicalDeviceExternalSemaphoreProperties(hwctx->phys_dev, - &ext_sem_props_info, - &p->ext_sem_props_opaque); - - qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2)); - if (!qf) - return AVERROR(ENOMEM); - - qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR)); - if (!qf_vid) { - av_free(qf); - return AVERROR(ENOMEM); - } - - for (uint32_t i = 0; i < qf_num; i++) { - qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) { - .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, - }; - qf[i] = (VkQueueFamilyProperties2) { - .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, - .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL, - }; - } - - vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf); - - p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex)); - if (!p->qf_mutex) { - err = AVERROR(ENOMEM); - goto end; - } - p->nb_tot_qfs = qf_num; - - for (uint32_t i = 0; i < qf_num; i++) { - p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount, - sizeof(**p->qf_mutex)); - if (!p->qf_mutex[i]) { - err = AVERROR(ENOMEM); - goto end; - } - for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) { - err = pthread_mutex_init(&p->qf_mutex[i][j], NULL); - if (err != 0) { - av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n", - av_err2str(err)); - err = AVERROR(err); - goto end; - } - } - } - -#if FF_API_VULKAN_FIXED_QUEUES -FF_DISABLE_DEPRECATION_WARNINGS - graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1; - comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1; - tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1; - dec_index = hwctx->nb_decode_queues ? hwctx->queue_family_decode_index : -1; - enc_index = hwctx->nb_encode_queues ? hwctx->queue_family_encode_index : -1; - -#define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \ - do { \ - if (ctx_qf < 0 && required) { \ - av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \ - " in the context!\n", type); \ - err = AVERROR(EINVAL); \ - goto end; \ - } else if (fidx < 0 || ctx_qf < 0) { \ - break; \ - } else if (ctx_qf >= qf_num) { \ - av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \ - type, ctx_qf, qf_num); \ - err = AVERROR(EINVAL); \ - goto end; \ - } \ - \ - av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \ - " for%s%s%s%s%s\n", \ - ctx_qf, qc, \ - ctx_qf == graph_index ? " graphics" : "", \ - ctx_qf == comp_index ? " compute" : "", \ - ctx_qf == tx_index ? " transfers" : "", \ - ctx_qf == enc_index ? " encode" : "", \ - ctx_qf == dec_index ? " decode" : ""); \ - graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \ - comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \ - tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \ - enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \ - dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \ - } while (0) - - CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues); - CHECK_QUEUE("compute", 1, comp_index, hwctx->queue_family_comp_index, hwctx->nb_comp_queues); - CHECK_QUEUE("upload", 1, tx_index, hwctx->queue_family_tx_index, hwctx->nb_tx_queues); - CHECK_QUEUE("decode", 0, dec_index, hwctx->queue_family_decode_index, hwctx->nb_decode_queues); - CHECK_QUEUE("encode", 0, enc_index, hwctx->queue_family_encode_index, hwctx->nb_encode_queues); - -#undef CHECK_QUEUE - - /* Update the new queue family fields. If non-zero already, - * it means API users have set it. */ - if (!hwctx->nb_qf) { -#define ADD_QUEUE(ctx_qf, qc, flag) \ - do { \ - if (ctx_qf != -1) { \ - hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \ - .idx = ctx_qf, \ - .num = qc, \ - .flags = flag, \ - }; \ - } \ - } while (0) - - ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT); - ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT); - ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT); - ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR); - ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); -#undef ADD_QUEUE - } -FF_ENABLE_DEPRECATION_WARNINGS -#endif - - for (int i = 0; i < hwctx->nb_qf; i++) { - if (!hwctx->qf[i].video_caps && - hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR | - VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) { - hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations; - } - } - - /* Setup array for pQueueFamilyIndices with used queue families */ - p->nb_img_qfs = 0; - for (int i = 0; i < hwctx->nb_qf; i++) { - int seen = 0; - /* Make sure each entry is unique - * (VUID-VkBufferCreateInfo-sharingMode-01419) */ - for (int j = (i - 1); j >= 0; j--) { - if (hwctx->qf[i].idx == hwctx->qf[j].idx) { - seen = 1; - break; - } - } - if (!seen) - p->img_qfs[p->nb_img_qfs++] = hwctx->qf[i].idx; - } - - if (!hwctx->lock_queue) - hwctx->lock_queue = lock_queue; - if (!hwctx->unlock_queue) - hwctx->unlock_queue = unlock_queue; - - /* Re-query device capabilities, in case the device was created externally */ - vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); - - p->vkctx.device = ctx; - p->vkctx.hwctx = hwctx; - - ff_vk_load_props(&p->vkctx); - p->compute_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_COMPUTE_BIT, 0); - p->transfer_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_TRANSFER_BIT, 0); - - /* Re-query device capabilities, in case the device was created externally */ - vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); - - /* Only use host image transfers if ReBAR is enabled */ - p->disable_host_transfer = !vulkan_device_has_rebar(ctx); - -end: - av_free(qf_vid); - av_free(qf); - return err; -} - -static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device, - AVDictionary *opts, int flags) -{ - VulkanDeviceSelection dev_select = { 0 }; - if (device && device[0]) { - char *end = NULL; - dev_select.index = strtol(device, &end, 10); - if (end == device) { - dev_select.index = 0; - dev_select.name = device; - } - } - - return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); -} - -static int vulkan_device_derive(AVHWDeviceContext *ctx, - AVHWDeviceContext *src_ctx, - AVDictionary *opts, int flags) -{ - av_unused VulkanDeviceSelection dev_select = { 0 }; - - /* If there's only one device on the system, then even if its not covered - * by the following checks (e.g. non-PCIe ARM GPU), having an empty - * dev_select will mean it'll get picked. */ - switch(src_ctx->type) { -#if CONFIG_VAAPI - case AV_HWDEVICE_TYPE_VAAPI: { - AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; - VADisplay dpy = src_hwctx->display; -#if VA_CHECK_VERSION(1, 15, 0) - VAStatus vas; - VADisplayAttribute attr = { - .type = VADisplayPCIID, - }; -#endif - const char *vendor; - -#if VA_CHECK_VERSION(1, 15, 0) - vas = vaGetDisplayAttributes(dpy, &attr, 1); - if (vas == VA_STATUS_SUCCESS && attr.flags != VA_DISPLAY_ATTRIB_NOT_SUPPORTED) - dev_select.pci_device = (attr.value & 0xFFFF); -#endif - - if (!dev_select.pci_device) { - vendor = vaQueryVendorString(dpy); - if (!vendor) { - av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n"); - return AVERROR_EXTERNAL; - } - - if (strstr(vendor, "AMD")) - dev_select.vendor_id = 0x1002; - } - - return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); - } -#endif -#if CONFIG_LIBDRM - case AV_HWDEVICE_TYPE_DRM: { - int err; - struct stat drm_node_info; - drmDevice *drm_dev_info; - AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; - - err = fstat(src_hwctx->fd, &drm_node_info); - if (err) { - av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n", - av_err2str(AVERROR(errno))); - return AVERROR_EXTERNAL; - } - - dev_select.drm_major = major(drm_node_info.st_dev); - dev_select.drm_minor = minor(drm_node_info.st_dev); - dev_select.has_drm = 1; - - err = drmGetDevice(src_hwctx->fd, &drm_dev_info); - if (err) { - av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n", - av_err2str(AVERROR(errno))); - return AVERROR_EXTERNAL; - } - - if (drm_dev_info->bustype == DRM_BUS_PCI) - dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id; - - drmFreeDevice(&drm_dev_info); - - return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); - } -#endif -#if CONFIG_CUDA - case AV_HWDEVICE_TYPE_CUDA: { - AVHWDeviceContext *cuda_cu = src_ctx; - AVCUDADeviceContext *src_hwctx = src_ctx->hwctx; - AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal; - CudaFunctions *cu = cu_internal->cuda_dl; - - int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid, - cu_internal->cuda_device)); - if (ret < 0) { - av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n"); - return AVERROR_EXTERNAL; - } - - dev_select.has_uuid = 1; - - /* - * CUDA is not able to import multiplane images, so always derive a - * Vulkan device with multiplane disabled. - */ - return vulkan_device_create_internal(ctx, &dev_select, 1, opts, flags); - } -#endif - default: - return AVERROR(ENOSYS); - } -} - -static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, - const void *hwconfig, - AVHWFramesConstraints *constraints) -{ - int count = 0; - VulkanDevicePriv *p = ctx->hwctx; - - for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { - count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, - p->use_linear_images ? VK_IMAGE_TILING_LINEAR : - VK_IMAGE_TILING_OPTIMAL, - NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0; - } - - constraints->valid_sw_formats = av_malloc_array(count + 1, - sizeof(enum AVPixelFormat)); - if (!constraints->valid_sw_formats) - return AVERROR(ENOMEM); - - count = 0; - for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { - if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, - p->use_linear_images ? VK_IMAGE_TILING_LINEAR : - VK_IMAGE_TILING_OPTIMAL, - NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0) { - constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt; - } - } - - constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE; - - constraints->min_width = 1; - constraints->min_height = 1; - constraints->max_width = p->props.properties.limits.maxImageDimension2D; - constraints->max_height = p->props.properties.limits.maxImageDimension2D; - - constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat)); - if (!constraints->valid_hw_formats) - return AVERROR(ENOMEM); - - constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN; - constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; - - return 0; -} - -static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, const void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) -{ - VkResult ret; - int index = -1; - VulkanDevicePriv *p = ctx->hwctx; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - AVVulkanDeviceContext *dev_hwctx = &p->p; - VkMemoryAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = alloc_extension, - .allocationSize = req->size, - }; - - /* The vulkan spec requires memory types to be sorted in the "optimal" - * order, so the first matching type we find will be the best/fastest one */ - for (int i = 0; i < p->mprops.memoryTypeCount; i++) { - const VkMemoryType *type = &p->mprops.memoryTypes[i]; - - /* The memory type must be supported by the requirements (bitfield) */ - if (!(req->memoryTypeBits & (1 << i))) - continue; - - /* The memory type flags must include our properties */ - if ((type->propertyFlags & req_flags) != req_flags) - continue; - - /* The memory type must be large enough */ - if (req->size > p->mprops.memoryHeaps[type->heapIndex].size) - continue; - - /* Found a suitable memory type */ - index = i; - break; - } - - if (index < 0) { - av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", - req_flags); - return AVERROR(EINVAL); - } - - alloc_info.memoryTypeIndex = index; - - ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info, - dev_hwctx->alloc, mem); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", - ff_vk_ret2str(ret)); - return AVERROR(ENOMEM); - } - - *mem_flags |= p->mprops.memoryTypes[index].propertyFlags; - - return 0; -} - -static void vulkan_free_internal(AVVkFrame *f) -{ - av_unused AVVkFrameInternal *internal = f->internal; - -#if CONFIG_CUDA - if (internal->cuda_fc_ref) { - AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; - int planes = av_pix_fmt_count_planes(cuda_fc->sw_format); - AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; - AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; - AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; - CudaFunctions *cu = cu_internal->cuda_dl; - - for (int i = 0; i < planes; i++) { - if (internal->cu_sem[i]) - CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i])); - if (internal->cu_mma[i]) - CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i])); - if (internal->ext_mem[i]) - CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i])); -#ifdef _WIN32 - if (internal->ext_sem_handle[i]) - CloseHandle(internal->ext_sem_handle[i]); - if (internal->ext_mem_handle[i]) - CloseHandle(internal->ext_mem_handle[i]); -#endif - } - - av_buffer_unref(&internal->cuda_fc_ref); - } -#endif - - pthread_mutex_destroy(&internal->update_mutex); - av_freep(&f->internal); -} - -static void vulkan_frame_free(AVHWFramesContext *hwfc, AVVkFrame *f) -{ - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - int nb_images = ff_vk_count_images(f); - int nb_sems = 0; - - while (nb_sems < FF_ARRAY_ELEMS(f->sem) && f->sem[nb_sems]) - nb_sems++; - - if (nb_sems) { - VkSemaphoreWaitInfo sem_wait = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, - .flags = 0x0, - .pSemaphores = f->sem, - .pValues = f->sem_value, - .semaphoreCount = nb_sems, - }; - - vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); - } - - vulkan_free_internal(f); - - for (int i = 0; i < nb_images; i++) { - vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); - vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); - } - - av_free(f); -} - -static void vulkan_frame_free_cb(void *opaque, uint8_t *data) -{ - vulkan_frame_free(opaque, (AVVkFrame*)data); -} - -static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, - void *alloc_pnext, size_t alloc_pnext_stride) -{ - int img_cnt = 0, err; - VkResult ret; - AVHWDeviceContext *ctx = hwfc->device_ctx; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; - - while (f->img[img_cnt]) { - int use_ded_mem; - VkImageMemoryRequirementsInfo2 req_desc = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, - .image = f->img[img_cnt], - }; - VkMemoryDedicatedAllocateInfo ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride), - }; - VkMemoryDedicatedRequirements ded_req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, - }; - VkMemoryRequirements2 req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, - .pNext = &ded_req, - }; - - vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req); - - if (f->tiling == VK_IMAGE_TILING_LINEAR) - req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size, - p->props.properties.limits.minMemoryMapAlignment); - - /* In case the implementation prefers/requires dedicated allocation */ - use_ded_mem = ded_req.prefersDedicatedAllocation | - ded_req.requiresDedicatedAllocation; - if (use_ded_mem) - ded_alloc.image = f->img[img_cnt]; - - /* Allocate memory */ - if ((err = alloc_mem(ctx, &req.memoryRequirements, - f->tiling == VK_IMAGE_TILING_LINEAR ? - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, - &f->flags, &f->mem[img_cnt]))) - return err; - - f->size[img_cnt] = req.memoryRequirements.size; - bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; - bind_info[img_cnt].image = f->img[img_cnt]; - bind_info[img_cnt].memory = f->mem[img_cnt]; - - img_cnt++; - } - - /* Bind the allocated memory to the images */ - ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -enum PrepMode { - PREP_MODE_GENERAL, - PREP_MODE_WRITE, - PREP_MODE_EXTERNAL_EXPORT, - PREP_MODE_EXTERNAL_IMPORT, - PREP_MODE_DECODING_DST, - PREP_MODE_DECODING_DPB, - PREP_MODE_ENCODING_DPB, -}; - -static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx, - AVVkFrame *frame, enum PrepMode pmode) -{ - int err; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; - int nb_img_bar = 0; - - uint32_t dst_qf = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; - VkImageLayout new_layout; - VkAccessFlags2 new_access; - VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE; - - /* This is dirty - but it works. The vulkan.c dependency system doesn't - * free non-refcounted frames, and non-refcounted hardware frames cannot - * happen anywhere outside of here. */ - AVBufferRef tmp_ref = { - .data = (uint8_t *)hwfc, - }; - AVFrame tmp_frame = { - .data[0] = (uint8_t *)frame, - .hw_frames_ctx = &tmp_ref, - }; - - VkCommandBuffer cmd_buf; - FFVkExecContext *exec = ff_vk_exec_get(&p->vkctx, ectx); - cmd_buf = exec->buf; - ff_vk_exec_start(&p->vkctx, exec); - - err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame, - VK_PIPELINE_STAGE_2_NONE, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); - if (err < 0) - return err; - - switch (pmode) { - case PREP_MODE_GENERAL: - new_layout = VK_IMAGE_LAYOUT_GENERAL; - new_access = VK_ACCESS_TRANSFER_WRITE_BIT; - break; - case PREP_MODE_WRITE: - new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - new_access = VK_ACCESS_TRANSFER_WRITE_BIT; - break; - case PREP_MODE_EXTERNAL_IMPORT: - new_layout = VK_IMAGE_LAYOUT_GENERAL; - new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; - break; - case PREP_MODE_EXTERNAL_EXPORT: - new_layout = VK_IMAGE_LAYOUT_GENERAL; - new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; - dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; - src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; - break; - case PREP_MODE_DECODING_DST: - new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; - new_access = VK_ACCESS_TRANSFER_WRITE_BIT; - break; - case PREP_MODE_DECODING_DPB: - new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; - new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; - break; - case PREP_MODE_ENCODING_DPB: - new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR; - new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; - break; - } - - ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar, - src_stage, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - new_access, new_layout, dst_qf); - - vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - - err = ff_vk_exec_submit(&p->vkctx, exec); - if (err < 0) - return err; - - /* We can do this because there are no real dependencies */ - ff_vk_exec_discard_deps(&p->vkctx, exec); - - return 0; -} - -static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format, - int frame_w, int frame_h, int plane) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); - - /* Currently always true unless gray + alpha support is added */ - if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB || - !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) { - *w = frame_w; - *h = frame_h; - return; - } - - *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w); - *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h); -} - -static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, - VkImageTiling tiling, VkImageUsageFlagBits usage, - VkImageCreateFlags flags, int nb_layers, - void *create_pnext) -{ - int err; - VkResult ret; - AVVulkanFramesContext *hwfc_vk = hwfc->hwctx; - AVHWDeviceContext *ctx = hwfc->device_ctx; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - AVVkFrame *f; - - VkSemaphoreTypeCreateInfo sem_type_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, - .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, - .initialValue = 0, - }; - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &sem_type_info, - }; - - VkExportSemaphoreCreateInfo ext_sem_info_opaque = { - .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, -#ifdef _WIN32 - .handleTypes = IsWindows8OrGreater() - ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, -#else - .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, -#endif - }; - - /* Check if exporting is supported before chaining any structs */ - if (p->ext_sem_props_opaque.externalSemaphoreFeatures & VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT) { - if (p->vkctx.extensions & (FF_VK_EXT_EXTERNAL_WIN32_SEM | FF_VK_EXT_EXTERNAL_FD_SEM)) - ff_vk_link_struct(&sem_type_info, &ext_sem_info_opaque); - } - - f = av_vk_frame_alloc(); - if (!f) { - av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); - return AVERROR(ENOMEM); - } - - // TODO: check width and height for alignment in case of multiplanar (must be mod-2 if subsampled) - - /* Create the images */ - for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) { - VkImageCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = create_pnext, - .imageType = VK_IMAGE_TYPE_2D, - .format = hwfc_vk->format[i], - .extent.depth = 1, - .mipLevels = 1, - .arrayLayers = nb_layers, - .flags = flags, - .tiling = tiling, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .usage = usage, - .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = p->img_qfs, - .queueFamilyIndexCount = p->nb_img_qfs, - .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, - }; - - get_plane_wh(&create_info.extent.width, &create_info.extent.height, - hwfc->sw_format, hwfc->width, hwfc->height, i); - - ret = vk->CreateImage(hwctx->act_dev, &create_info, - hwctx->alloc, &f->img[i]); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR(EINVAL); - goto fail; - } - - /* Create semaphore */ - ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[i]); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; - f->layout[i] = create_info.initialLayout; - f->access[i] = 0x0; - f->sem_value[i] = 0; - } - - f->flags = 0x0; - f->tiling = tiling; - - *frame = f; - return 0; - -fail: - vulkan_frame_free(hwfc, f); - return err; -} - -/* Checks if an export flag is enabled, and if it is ORs it with *iexp */ -static void try_export_flags(AVHWFramesContext *hwfc, - VkExternalMemoryHandleTypeFlags *comp_handle_types, - VkExternalMemoryHandleTypeFlagBits *iexp, - VkExternalMemoryHandleTypeFlagBits exp) -{ - VkResult ret; - AVVulkanFramesContext *hwctx = hwfc->hwctx; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - AVVulkanDeviceContext *dev_hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - - const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info = - ff_vk_find_struct(hwctx->create_pnext, - VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); - int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info; - int nb_mods; - - VkExternalImageFormatProperties eprops = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, - }; - VkImageFormatProperties2 props = { - .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, - .pNext = &eprops, - }; - VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, - .pNext = NULL, - .pQueueFamilyIndices = p->img_qfs, - .queueFamilyIndexCount = p->nb_img_qfs, - .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, - }; - VkPhysicalDeviceExternalImageFormatInfo enext = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, - .handleType = exp, - .pNext = has_mods ? &phy_dev_mod_info : NULL, - }; - VkPhysicalDeviceImageFormatInfo2 pinfo = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, - .pNext = !exp ? NULL : &enext, - .format = vk_find_format_entry(hwfc->sw_format)->vkf, - .type = VK_IMAGE_TYPE_2D, - .tiling = hwctx->tiling, - .usage = hwctx->usage, - .flags = VK_IMAGE_CREATE_ALIAS_BIT, - }; - - nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1; - for (int i = 0; i < nb_mods; i++) { - if (has_mods) - phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[i]; - - ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev, - &pinfo, &props); - - if (ret == VK_SUCCESS) { - *iexp |= exp; - *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes; - } - } -} - -static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) -{ - int err; - AVVkFrame *f; - AVBufferRef *avbuf = NULL; - AVHWFramesContext *hwfc = opaque; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - VulkanFramesPriv *fp = hwfc->hwctx; - AVVulkanFramesContext *hwctx = &fp->p; - VkExternalMemoryHandleTypeFlags e = 0x0; - VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; - - VkExternalMemoryImageCreateInfo eiinfo = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, - .pNext = hwctx->create_pnext, - }; - -#ifdef _WIN32 - if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) - try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater() - ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT); -#else - if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) - try_export_flags(hwfc, &eiinfo.handleTypes, &e, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); - - if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_DMABUF_MEMORY && - hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) - try_export_flags(hwfc, &eiinfo.handleTypes, &e, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); -#endif - - for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { - eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; - eminfo[i].pNext = hwctx->alloc_pnext[i]; - eminfo[i].handleTypes = e; - } - - err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, - hwctx->nb_layers, - eiinfo.handleTypes ? &eiinfo : hwctx->create_pnext); - if (err) - return NULL; - - err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo)); - if (err) - goto fail; - - if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && - !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) - err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB); - else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) - err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST); - else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) - err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB); - else if (hwctx->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) - err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE); - else - err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_GENERAL); - if (err) - goto fail; - - avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame), - vulkan_frame_free_cb, hwfc, 0); - if (!avbuf) - goto fail; - - return avbuf; - -fail: - vulkan_frame_free(hwfc, f); - return NULL; -} - -static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) -{ - pthread_mutex_lock(&vkf->internal->update_mutex); -} - -static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) -{ - pthread_mutex_unlock(&vkf->internal->update_mutex); -} - -static void vulkan_frames_uninit(AVHWFramesContext *hwfc) -{ - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - VulkanFramesPriv *fp = hwfc->hwctx; - - if (fp->modifier_info) { - if (fp->modifier_info->pDrmFormatModifiers) - av_freep(&fp->modifier_info->pDrmFormatModifiers); - av_freep(&fp->modifier_info); - } - - ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec); - ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec); - ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec); - - av_buffer_pool_uninit(&fp->tmp); -} - -static int vulkan_frames_init(AVHWFramesContext *hwfc) -{ - int err; - AVVkFrame *f; - VulkanFramesPriv *fp = hwfc->hwctx; - AVVulkanFramesContext *hwctx = &fp->p; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - AVVulkanDeviceContext *dev_hwctx = &p->p; - VkImageUsageFlagBits supported_usage; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - const struct FFVkFormatEntry *fmt; - int disable_multiplane = p->disable_multiplane || - (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE); - - /* Defaults */ - if (!hwctx->nb_layers) - hwctx->nb_layers = 1; - - /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */ - if (p->use_linear_images && - (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)) - hwctx->tiling = VK_IMAGE_TILING_LINEAR; - - - fmt = vk_find_format_entry(hwfc->sw_format); - if (!fmt) { - av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n", - av_get_pix_fmt_name(hwfc->sw_format)); - return AVERROR(EINVAL); - } - - if (hwctx->format[0] != VK_FORMAT_UNDEFINED) { - if (hwctx->format[0] != fmt->vkf) { - for (int i = 0; i < fmt->nb_images_fallback; i++) { - if (hwctx->format[i] != fmt->fallback[i]) { - av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given " - "for the current sw_format %s!\n", - av_get_pix_fmt_name(hwfc->sw_format)); - return AVERROR(EINVAL); - } - } - } - - /* Check if the sw_format itself is supported */ - err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, - hwctx->tiling, NULL, - NULL, NULL, &supported_usage, 0, - !hwctx->usage || - (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT)); - if (err < 0) { - av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n", - av_get_pix_fmt_name(hwfc->sw_format)); - return AVERROR(EINVAL); - } - } else { - err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, - hwctx->tiling, hwctx->format, NULL, - NULL, &supported_usage, - disable_multiplane, - !hwctx->usage || - (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT)); - if (err < 0) - return err; - } - - /* Nvidia is violating the spec because they thought no one would use this. */ - if (p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY && - (((fmt->nb_images == 1) && (fmt->vk_planes > 1)) || - (av_pix_fmt_desc_get(hwfc->sw_format)->nb_components == 1))) - supported_usage &= ~VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT; - - /* Image usage flags */ - if (!hwctx->usage) { - hwctx->usage = supported_usage & (VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT); - - if ((p->vkctx.extensions & FF_VK_EXT_HOST_IMAGE_COPY) && !p->disable_host_transfer) - hwctx->usage |= supported_usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT; - - /* Enables encoding of images, if supported by format and extensions */ - if ((supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) && - (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | - FF_VK_EXT_VIDEO_MAINTENANCE_1))) - hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; - } - - /* Image creation flags. - * Only fill them in automatically if the image is not going to be used as - * a DPB-only image, and we have SAMPLED/STORAGE bits set. */ - if (!hwctx->img_flags) { - int is_lone_dpb = ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) || - ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && - !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))); - int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT); - hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; - if (sampleable && !is_lone_dpb) { - hwctx->img_flags |= VK_IMAGE_CREATE_ALIAS_BIT; - if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf)) - hwctx->img_flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; - } - } - - /* If the image has an ENCODE_SRC usage, and the maintenance1 - * extension is supported, check if it has a profile list. - * If there's no profile list, or it has no encode operations, - * then allow creating the image with no specific profile. */ - if ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) && - (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | - FF_VK_EXT_VIDEO_MAINTENANCE_1))) { - const VkVideoProfileListInfoKHR *pl; - pl = ff_vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); - if (!pl) { - hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR; - } else { - uint32_t i; - for (i = 0; i < pl->profileCount; i++) { - /* Video ops start at exactly 0x00010000 */ - if (pl->pProfiles[i].videoCodecOperation & 0xFFFF0000) - break; - } - if (i == pl->profileCount) - hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR; - } - } - - if (!hwctx->lock_frame) - hwctx->lock_frame = lock_frame; - - if (!hwctx->unlock_frame) - hwctx->unlock_frame = unlock_frame; - - err = ff_vk_exec_pool_init(&p->vkctx, p->compute_qf, &fp->compute_exec, - p->compute_qf->num, 0, 0, 0, NULL); - if (err) - return err; - - err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->upload_exec, - p->transfer_qf->num*2, 0, 0, 0, NULL); - if (err) - return err; - - err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->download_exec, - p->transfer_qf->num, 0, 0, 0, NULL); - if (err) - return err; - - /* Test to see if allocation will fail */ - err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, - hwctx->nb_layers, hwctx->create_pnext); - if (err) - return err; - - /* Collect `VkDrmFormatModifierPropertiesEXT` for each plane. Required for DRM export. */ - if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS && hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { - VkImageDrmFormatModifierPropertiesEXT drm_mod = { - .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, - }; - err = vk->GetImageDrmFormatModifierPropertiesEXT(dev_hwctx->act_dev, f->img[0], - &drm_mod); - if (err != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to get image DRM format modifier properties"); - vulkan_frame_free(hwfc, f); - return AVERROR_EXTERNAL; - } - for (int i = 0; i < fmt->vk_planes; ++i) { - VkDrmFormatModifierPropertiesListEXT modp; - VkFormatProperties2 fmtp; - VkDrmFormatModifierPropertiesEXT *mod_props = NULL; - - modp = (VkDrmFormatModifierPropertiesListEXT) { - .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, - }; - fmtp = (VkFormatProperties2) { - .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, - .pNext = &modp, - }; - - /* query drmFormatModifierCount by keeping pDrmFormatModifierProperties NULL */ - vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt->fallback[i], &fmtp); - - modp.pDrmFormatModifierProperties = - av_calloc(modp.drmFormatModifierCount, sizeof(*modp.pDrmFormatModifierProperties)); - if (!modp.pDrmFormatModifierProperties) { - vulkan_frame_free(hwfc, f); - return AVERROR(ENOMEM); - } - vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt->fallback[i], &fmtp); - - for (uint32_t i = 0; i < modp.drmFormatModifierCount; ++i) { - VkDrmFormatModifierPropertiesEXT *m = &modp.pDrmFormatModifierProperties[i]; - if (m->drmFormatModifier == drm_mod.drmFormatModifier) { - mod_props = m; - break; - } - } - - if (mod_props == NULL) { - av_log(hwfc, AV_LOG_ERROR, "No DRM format modifier properties found for modifier 0x%016"PRIx64"\n", - drm_mod.drmFormatModifier); - av_free(modp.pDrmFormatModifierProperties); - vulkan_frame_free(hwfc, f); - return AVERROR_EXTERNAL; - } - - fp->drm_format_modifier_properties[i] = *mod_props; - av_free(modp.pDrmFormatModifierProperties); - } - } - - vulkan_frame_free(hwfc, f); - - /* If user did not specify a pool, hwfc->pool will be set to the internal one - * in hwcontext.c just after this gets called */ - if (!hwfc->pool) { - ffhwframesctx(hwfc)->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), - hwfc, vulkan_pool_alloc, - NULL); - if (!ffhwframesctx(hwfc)->pool_internal) - return AVERROR(ENOMEM); - } - - return 0; -} - -static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame) -{ - frame->buf[0] = av_buffer_pool_get(hwfc->pool); - if (!frame->buf[0]) - return AVERROR(ENOMEM); - - frame->data[0] = frame->buf[0]->data; - frame->format = AV_PIX_FMT_VULKAN; - frame->width = hwfc->width; - frame->height = hwfc->height; - - return 0; -} - -static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc, - enum AVHWFrameTransferDirection dir, - enum AVPixelFormat **formats) -{ - enum AVPixelFormat *fmts; - int n = 2; - -#if CONFIG_CUDA - n++; -#endif - fmts = av_malloc_array(n, sizeof(*fmts)); - if (!fmts) - return AVERROR(ENOMEM); - - n = 0; - fmts[n++] = hwfc->sw_format; -#if CONFIG_CUDA - fmts[n++] = AV_PIX_FMT_CUDA; -#endif - fmts[n++] = AV_PIX_FMT_NONE; - - *formats = fmts; - return 0; -} - -#if CONFIG_LIBDRM -static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) -{ - vulkan_frame_free(hwfc, hwmap->priv); -} - -static const struct { - uint32_t drm_fourcc; - VkFormat vk_format; -} vulkan_drm_format_map[] = { - { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM }, - { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM }, - { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM }, - { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM }, - { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM }, - { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM }, - { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM }, - { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM }, - { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM }, - { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM }, - { DRM_FORMAT_ARGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 }, - { DRM_FORMAT_ABGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 }, - { DRM_FORMAT_XRGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 }, - { DRM_FORMAT_XBGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 }, - - // All these DRM_FORMATs were added in the same libdrm commit. -#ifdef DRM_FORMAT_XYUV8888 - { DRM_FORMAT_XYUV8888, VK_FORMAT_R8G8B8A8_UNORM }, - { DRM_FORMAT_XVYU2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 } , - { DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16 } , - { DRM_FORMAT_XVYU16161616, VK_FORMAT_R16G16B16A16_UNORM } , -#endif -}; - -static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc) -{ - for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) - if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc) - return vulkan_drm_format_map[i].vk_format; - return VK_FORMAT_UNDEFINED; -} - -static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame, - const AVFrame *src, int flags) -{ - int err = 0; - VkResult ret; - AVVkFrame *f; - int bind_counts = 0; - AVHWDeviceContext *ctx = hwfc->device_ctx; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; - VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES]; - VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES]; - - for (int i = 0; i < desc->nb_layers; i++) { - if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) { - av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n", - desc->layers[i].format); - return AVERROR(EINVAL); - } - } - - if (!(f = av_vk_frame_alloc())) { - av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); - err = AVERROR(ENOMEM); - goto fail; - } - - f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; - - for (int i = 0; i < desc->nb_layers; i++) { - const int planes = desc->layers[i].nb_planes; - - /* Semaphore */ - VkSemaphoreTypeCreateInfo sem_type_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, - .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, - .initialValue = 0, - }; - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &sem_type_info, - }; - - /* Image creation */ - VkSubresourceLayout ext_img_layouts[AV_DRM_MAX_PLANES]; - VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = { - .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, - .drmFormatModifier = desc->objects[0].format_modifier, - .drmFormatModifierPlaneCount = planes, - .pPlaneLayouts = (const VkSubresourceLayout *)&ext_img_layouts, - }; - VkExternalMemoryImageCreateInfo ext_img_spec = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, - .pNext = &ext_img_mod_spec, - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, - }; - VkImageCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = &ext_img_spec, - .imageType = VK_IMAGE_TYPE_2D, - .format = drm_to_vulkan_fmt(desc->layers[i].format), - .extent.depth = 1, - .mipLevels = 1, - .arrayLayers = 1, - .flags = 0x0, - .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ - .usage = 0x0, /* filled in below */ - .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = p->img_qfs, - .queueFamilyIndexCount = p->nb_img_qfs, - .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, - }; - - /* Image format verification */ - VkExternalImageFormatProperties ext_props = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, - }; - VkImageFormatProperties2 props_ret = { - .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, - .pNext = &ext_props, - }; - VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, - .drmFormatModifier = ext_img_mod_spec.drmFormatModifier, - .pQueueFamilyIndices = create_info.pQueueFamilyIndices, - .queueFamilyIndexCount = create_info.queueFamilyIndexCount, - .sharingMode = create_info.sharingMode, - }; - VkPhysicalDeviceExternalImageFormatInfo props_ext = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, - .pNext = &props_drm_mod, - .handleType = ext_img_spec.handleTypes, - }; - VkPhysicalDeviceImageFormatInfo2 fmt_props; - - if (flags & AV_HWFRAME_MAP_READ) - create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT; - if (flags & AV_HWFRAME_MAP_WRITE) - create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT; - - fmt_props = (VkPhysicalDeviceImageFormatInfo2) { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, - .pNext = &props_ext, - .format = create_info.format, - .type = create_info.imageType, - .tiling = create_info.tiling, - .usage = create_info.usage, - .flags = create_info.flags, - }; - - /* Check if importing is possible for this combination of parameters */ - ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->phys_dev, - &fmt_props, &props_ret); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - /* Set the image width/height */ - get_plane_wh(&create_info.extent.width, &create_info.extent.height, - hwfc->sw_format, src->width, src->height, i); - - /* Set the subresource layout based on the layer properties */ - for (int j = 0; j < planes; j++) { - ext_img_layouts[j].offset = desc->layers[i].planes[j].offset; - ext_img_layouts[j].rowPitch = desc->layers[i].planes[j].pitch; - ext_img_layouts[j].size = 0; /* The specs say so for all 3 */ - ext_img_layouts[j].arrayPitch = 0; - ext_img_layouts[j].depthPitch = 0; - } - - /* Create image */ - ret = vk->CreateImage(hwctx->act_dev, &create_info, - hwctx->alloc, &f->img[i]); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR(EINVAL); - goto fail; - } - - ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[i]); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - f->queue_family[i] = VK_QUEUE_FAMILY_EXTERNAL; - f->layout[i] = create_info.initialLayout; - f->access[i] = 0x0; - f->sem_value[i] = 0; - } - - for (int i = 0; i < desc->nb_layers; i++) { - /* Memory requirements */ - VkImageMemoryRequirementsInfo2 req_desc = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, - .image = f->img[i], - }; - VkMemoryDedicatedRequirements ded_req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, - }; - VkMemoryRequirements2 req2 = { - .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, - .pNext = &ded_req, - }; - - /* Allocation/importing */ - VkMemoryFdPropertiesKHR fdmp = { - .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, - }; - /* This assumes that a layer will never be constructed from multiple - * objects. If that was to happen in the real world, this code would - * need to import each plane separately. - */ - VkImportMemoryFdInfoKHR idesc = { - .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd), - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, - }; - VkMemoryDedicatedAllocateInfo ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = &idesc, - .image = req_desc.image, - }; - - /* Get object properties */ - ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev, - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, - idesc.fd, &fdmp); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - close(idesc.fd); - goto fail; - } - - vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2); - - /* Only a single bit must be set, not a range, and it must match */ - req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits; - - err = alloc_mem(ctx, &req2.memoryRequirements, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - (ded_req.prefersDedicatedAllocation || - ded_req.requiresDedicatedAllocation) ? - &ded_alloc : ded_alloc.pNext, - &f->flags, &f->mem[i]); - if (err) { - close(idesc.fd); - return err; - } - - f->size[i] = req2.memoryRequirements.size; - } - - for (int i = 0; i < desc->nb_layers; i++) { - const int planes = desc->layers[i].nb_planes; - for (int j = 0; j < planes; j++) { - VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT : - j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT : - VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT; - - plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO; - plane_info[bind_counts].pNext = NULL; - plane_info[bind_counts].planeAspect = aspect; - - bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; - bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL; - bind_info[bind_counts].image = f->img[i]; - bind_info[bind_counts].memory = f->mem[i]; - - /* Offset is already signalled via pPlaneLayouts above */ - bind_info[bind_counts].memoryOffset = 0; - - bind_counts++; - } - } - - /* Bind the allocated memory to the images */ - ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - *frame = f; - - return 0; - -fail: - vulkan_frame_free(hwfc, f); - - return err; -} - -static int vulkan_map_from_drm_frame_sync(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src, int flags) -{ - int err; - VkResult ret; - AVHWDeviceContext *ctx = hwfc->device_ctx; - VulkanDevicePriv *p = ctx->hwctx; - VulkanFramesPriv *fp = hwfc->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - - const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; - -#ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE - if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM) { - VkCommandBuffer cmd_buf; - FFVkExecContext *exec; - VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; - VkSemaphore drm_sync_sem[AV_DRM_MAX_PLANES] = { 0 }; - int nb_img_bar = 0; - - for (int i = 0; i < desc->nb_objects; i++) { - VkSemaphoreTypeCreateInfo sem_type_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, - .semaphoreType = VK_SEMAPHORE_TYPE_BINARY, - }; - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &sem_type_info, - }; - VkImportSemaphoreFdInfoKHR import_info; - struct dma_buf_export_sync_file implicit_fd_info = { - .flags = DMA_BUF_SYNC_READ, - .fd = -1, - }; - - if (ioctl(desc->objects[i].fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE, - &implicit_fd_info)) { - err = AVERROR(errno); - av_log(hwctx, AV_LOG_ERROR, "Failed to retrieve implicit DRM sync file: %s\n", - av_err2str(err)); - for (; i >= 0; i--) - vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc); - return err; - } - - ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &drm_sync_sem[i]); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - for (; i >= 0; i--) - vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc); - return err; - } - - import_info = (VkImportSemaphoreFdInfoKHR) { - .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, - .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, - .semaphore = drm_sync_sem[i], - .fd = implicit_fd_info.fd, - }; - - ret = vk->ImportSemaphoreFdKHR(hwctx->act_dev, &import_info); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to import semaphore: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - for (; i >= 0; i--) - vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc); - return err; - } - } - - exec = ff_vk_exec_get(&p->vkctx, &fp->compute_exec); - cmd_buf = exec->buf; - - ff_vk_exec_start(&p->vkctx, exec); - - /* Ownership of semaphores is passed */ - err = ff_vk_exec_add_dep_bool_sem(&p->vkctx, exec, - drm_sync_sem, desc->nb_objects, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, 1); - if (err < 0) - return err; - - err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, dst, - VK_PIPELINE_STAGE_2_NONE, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); - if (err < 0) - return err; - - ff_vk_frame_barrier(&p->vkctx, exec, dst, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_NONE, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - ((flags & AV_HWFRAME_MAP_READ) ? - VK_ACCESS_2_SHADER_SAMPLED_READ_BIT : 0x0) | - ((flags & AV_HWFRAME_MAP_WRITE) ? - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT : 0x0), - VK_IMAGE_LAYOUT_GENERAL, - p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]); - - vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - - err = ff_vk_exec_submit(&p->vkctx, exec); - if (err < 0) - return err; - } else -#endif - { - AVVkFrame *f = (AVVkFrame *)dst->data[0]; - av_log(hwctx, AV_LOG_WARNING, "No support for synchronization when importing DMA-BUFs, " - "image may be corrupted.\n"); - err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT); - if (err) - return err; - } - - return 0; -} - -static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src, int flags) -{ - int err = 0; - AVVkFrame *f; - - if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src, flags))) - return err; - - /* The unmapping function will free this */ - dst->data[0] = (uint8_t *)f; - dst->width = src->width; - dst->height = src->height; - - err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, - &vulkan_unmap_from_drm, f); - if (err < 0) - goto fail; - - err = vulkan_map_from_drm_frame_sync(hwfc, dst, src, flags); - if (err < 0) - return err; - - av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n"); - - return 0; - -fail: - vulkan_frame_free(hwfc->device_ctx->hwctx, f); - dst->data[0] = NULL; - return err; -} - -#if CONFIG_VAAPI -static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc, - AVFrame *dst, const AVFrame *src, - int flags) -{ - int err; - AVFrame *tmp = av_frame_alloc(); - AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; - AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx; - VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3]; - - if (!tmp) - return AVERROR(ENOMEM); - - /* We have to sync since like the previous comment said, no semaphores */ - vaSyncSurface(vaapi_ctx->display, surface_id); - - tmp->format = AV_PIX_FMT_DRM_PRIME; - - err = av_hwframe_map(tmp, src, flags); - if (err < 0) - goto fail; - - err = vulkan_map_from_drm(dst_fc, dst, tmp, flags); - if (err < 0) - goto fail; - - err = ff_hwframe_map_replace(dst, src); - -fail: - av_frame_free(&tmp); - return err; -} -#endif -#endif - -#if CONFIG_CUDA -static int export_mem_to_cuda(AVHWDeviceContext *ctx, - AVHWDeviceContext *cuda_cu, CudaFunctions *cu, - AVVkFrameInternal *dst_int, int idx, - VkDeviceMemory mem, size_t size) -{ - VkResult ret; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - -#ifdef _WIN32 - CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { - .type = IsWindows8OrGreater() - ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 - : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT, - .size = size, - }; - VkMemoryGetWin32HandleInfoKHR export_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, - .memory = mem, - .handleType = IsWindows8OrGreater() - ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, - }; - - ret = vk->GetMemoryWin32HandleKHR(hwctx->act_dev, &export_info, - &ext_desc.handle.win32.handle); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - dst_int->ext_mem_handle[idx] = ext_desc.handle.win32.handle; -#else - CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { - .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, - .size = size, - }; - VkMemoryGetFdInfoKHR export_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, - .memory = mem, - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, - }; - - ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info, - &ext_desc.handle.fd); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } -#endif - - ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[idx], &ext_desc)); - if (ret < 0) { -#ifndef _WIN32 - close(ext_desc.handle.fd); -#endif - return AVERROR_EXTERNAL; - } - - return 0; -} - -static int export_sem_to_cuda(AVHWDeviceContext *ctx, - AVHWDeviceContext *cuda_cu, CudaFunctions *cu, - AVVkFrameInternal *dst_int, int idx, - VkSemaphore sem) -{ - VkResult ret; - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - -#ifdef _WIN32 - VkSemaphoreGetWin32HandleInfoKHR sem_export = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR, - .semaphore = sem, - .handleType = IsWindows8OrGreater() - ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, - }; - CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { - .type = 10 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 */, - }; -#else - VkSemaphoreGetFdInfoKHR sem_export = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, - .semaphore = sem, - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, - }; - CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { - .type = 9 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD */, - }; -#endif - -#ifdef _WIN32 - ret = vk->GetSemaphoreWin32HandleKHR(hwctx->act_dev, &sem_export, - &ext_sem_desc.handle.win32.handle); -#else - ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export, - &ext_sem_desc.handle.fd); -#endif - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } -#ifdef _WIN32 - dst_int->ext_sem_handle[idx] = ext_sem_desc.handle.win32.handle; -#endif - - ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[idx], - &ext_sem_desc)); - if (ret < 0) { -#ifndef _WIN32 - close(ext_sem_desc.handle.fd); -#endif - return AVERROR_EXTERNAL; - } - - return 0; -} - -static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, - AVBufferRef *cuda_hwfc, - const AVFrame *frame) -{ - int err; - VkResult ret; - AVVkFrame *dst_f; - AVVkFrameInternal *dst_int; - AVHWDeviceContext *ctx = hwfc->device_ctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); - VulkanDevicePriv *p = ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - int nb_images; - - AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data; - AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; - AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; - AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; - CudaFunctions *cu = cu_internal->cuda_dl; - CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 : - CU_AD_FORMAT_UNSIGNED_INT8; - - dst_f = (AVVkFrame *)frame->data[0]; - dst_int = dst_f->internal; - - if (!dst_int->cuda_fc_ref) { - size_t offsets[3] = { 0 }; - - dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); - if (!dst_int->cuda_fc_ref) - return AVERROR(ENOMEM); - - nb_images = ff_vk_count_images(dst_f); - for (int i = 0; i < nb_images; i++) { - err = export_mem_to_cuda(ctx, cuda_cu, cu, dst_int, i, - dst_f->mem[i], dst_f->size[i]); - if (err < 0) - goto fail; - - err = export_sem_to_cuda(ctx, cuda_cu, cu, dst_int, i, - dst_f->sem[i]); - if (err < 0) - goto fail; - } - - if (nb_images != planes) { - for (int i = 0; i < planes; i++) { - VkImageSubresource subres = { - .aspectMask = i == 2 ? VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT : - i == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT : - VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT - }; - VkSubresourceLayout layout = { 0 }; - vk->GetImageSubresourceLayout(hwctx->act_dev, dst_f->img[FFMIN(i, nb_images - 1)], - &subres, &layout); - offsets[i] = layout.offset; - } - } - - for (int i = 0; i < planes; i++) { - CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { - .offset = offsets[i], - .arrayDesc = { - .Depth = 0, - .Format = cufmt, - .NumChannels = 1 + ((planes == 2) && i), - .Flags = 0, - }, - .numLevels = 1, - }; - int p_w, p_h; - - get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i); - tex_desc.arrayDesc.Width = p_w; - tex_desc.arrayDesc.Height = p_h; - - ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i], - dst_int->ext_mem[FFMIN(i, nb_images - 1)], - &tex_desc)); - if (ret < 0) { - err = AVERROR_EXTERNAL; - goto fail; - } - - ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i], - dst_int->cu_mma[i], 0)); - if (ret < 0) { - err = AVERROR_EXTERNAL; - goto fail; - } - - } - } - - return 0; - -fail: - vulkan_free_internal(dst_f); - return err; -} - -static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, - AVFrame *dst, const AVFrame *src) -{ - int err; - CUcontext dummy; - AVVkFrame *dst_f; - AVVkFrameInternal *dst_int; - VulkanFramesPriv *fp = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); - - AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; - AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; - AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; - AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; - CudaFunctions *cu = cu_internal->cuda_dl; - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; - - dst_f = (AVVkFrame *)dst->data[0]; - - err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT); - if (err < 0) - return err; - - err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); - if (err < 0) - return err; - - err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst); - if (err < 0) { - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - return err; - } - - dst_int = dst_f->internal; - - for (int i = 0; i < planes; i++) { - s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0; - s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1; - } - - err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, - planes, cuda_dev->stream)); - if (err < 0) - goto fail; - - for (int i = 0; i < planes; i++) { - CUDA_MEMCPY2D cpy = { - .srcMemoryType = CU_MEMORYTYPE_DEVICE, - .srcDevice = (CUdeviceptr)src->data[i], - .srcPitch = src->linesize[i], - .srcY = 0, - - .dstMemoryType = CU_MEMORYTYPE_ARRAY, - .dstArray = dst_int->cu_array[i], - }; - - int p_w, p_h; - get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i); - - cpy.WidthInBytes = p_w * desc->comp[i].step; - cpy.Height = p_h; - - err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); - if (err < 0) - goto fail; - } - - err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, - planes, cuda_dev->stream)); - if (err < 0) - goto fail; - - for (int i = 0; i < planes; i++) - dst_f->sem_value[i]++; - - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - - av_log(hwfc, AV_LOG_VERBOSE, "Transferred CUDA image to Vulkan!\n"); - - return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT); - -fail: - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - vulkan_free_internal(dst_f); - av_buffer_unref(&dst->buf[0]); - return err; -} -#endif - -static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src, int flags) -{ - av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - - switch (src->format) { -#if CONFIG_LIBDRM -#if CONFIG_VAAPI - case AV_PIX_FMT_VAAPI: - if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) - return vulkan_map_from_vaapi(hwfc, dst, src, flags); - else - return AVERROR(ENOSYS); -#endif - case AV_PIX_FMT_DRM_PRIME: - if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) - return vulkan_map_from_drm(hwfc, dst, src, flags); - else - return AVERROR(ENOSYS); -#endif - default: - return AVERROR(ENOSYS); - } -} - -#if CONFIG_LIBDRM -typedef struct VulkanDRMMapping { - AVDRMFrameDescriptor drm_desc; - AVVkFrame *source; -} VulkanDRMMapping; - -static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) -{ - AVDRMFrameDescriptor *drm_desc = hwmap->priv; - - for (int i = 0; i < drm_desc->nb_objects; i++) - close(drm_desc->objects[i].fd); - - av_free(drm_desc); -} - -static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt) -{ - for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) - if (vulkan_drm_format_map[i].vk_format == vkfmt) - return vulkan_drm_format_map[i].drm_fourcc; - return DRM_FORMAT_INVALID; -} - -#define MAX_MEMORY_PLANES 4 -static VkImageAspectFlags plane_index_to_aspect(int plane) { - if (plane == 0) return VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT; - if (plane == 1) return VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT; - if (plane == 2) return VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT; - if (plane == 3) return VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT; - - av_assert2 (0 && "Invalid plane index"); - return VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT; -} - -static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src, int flags) -{ - int err = 0; - VkResult ret; - AVVkFrame *f = (AVVkFrame *)src->data[0]; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - VulkanFramesPriv *fp = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - VkImageDrmFormatModifierPropertiesEXT drm_mod = { - .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, - }; - VkSemaphoreWaitInfo wait_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, - .flags = 0x0, - .semaphoreCount = planes, - }; - - AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc)); - if (!drm_desc) - return AVERROR(ENOMEM); - - err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT); - if (err < 0) - goto end; - - /* Wait for the operation to finish so we can cleanly export it. */ - wait_info.pSemaphores = f->sem; - wait_info.pValues = f->sem_value; - - vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX); - - err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc); - if (err < 0) - goto end; - - ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], - &drm_mod); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n"); - err = AVERROR_EXTERNAL; - goto end; - } - - for (int i = 0; (i < planes) && (f->mem[i]); i++) { - VkMemoryGetFdInfoKHR export_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, - .memory = f->mem[i], - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, - }; - - ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info, - &drm_desc->objects[i].fd); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); - err = AVERROR_EXTERNAL; - goto end; - } - - drm_desc->nb_objects++; - drm_desc->objects[i].size = f->size[i]; - drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier; - } - - drm_desc->nb_layers = planes; - for (int i = 0; i < drm_desc->nb_layers; i++) { - VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i]; - - drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt); - drm_desc->layers[i].nb_planes = fp->drm_format_modifier_properties[i].drmFormatModifierPlaneCount; - - if (drm_desc->layers[i].nb_planes > MAX_MEMORY_PLANES) { - av_log(hwfc, AV_LOG_ERROR, "Too many memory planes for DRM format!\n"); - err = AVERROR_EXTERNAL; - goto end; - } - - for (int j = 0; j < drm_desc->layers[i].nb_planes; j++) { - VkSubresourceLayout layout; - VkImageSubresource sub = { - .aspectMask = plane_index_to_aspect(j), - }; - - drm_desc->layers[i].planes[j].object_index = FFMIN(i, drm_desc->nb_objects - 1); - - vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); - drm_desc->layers[i].planes[j].offset = layout.offset; - drm_desc->layers[i].planes[j].pitch = layout.rowPitch; - } - - if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) { - av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n"); - err = AVERROR_PATCHWELCOME; - goto end; - } - - - if (f->tiling == VK_IMAGE_TILING_OPTIMAL) - continue; - - } - - dst->width = src->width; - dst->height = src->height; - dst->data[0] = (uint8_t *)drm_desc; - - av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n"); - - return 0; - -end: - av_free(drm_desc); - return err; -} - -#if CONFIG_VAAPI -static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src, int flags) -{ - int err; - AVFrame *tmp = av_frame_alloc(); - if (!tmp) - return AVERROR(ENOMEM); - - tmp->format = AV_PIX_FMT_DRM_PRIME; - - err = vulkan_map_to_drm(hwfc, tmp, src, flags); - if (err < 0) - goto fail; - - err = av_hwframe_map(dst, tmp, flags); - if (err < 0) - goto fail; - - err = ff_hwframe_map_replace(dst, src); - -fail: - av_frame_free(&tmp); - return err; -} -#endif -#endif - -static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src, int flags) -{ - av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - - switch (dst->format) { -#if CONFIG_LIBDRM - case AV_PIX_FMT_DRM_PRIME: - if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) - return vulkan_map_to_drm(hwfc, dst, src, flags); - else - return AVERROR(ENOSYS); -#if CONFIG_VAAPI - case AV_PIX_FMT_VAAPI: - if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) - return vulkan_map_to_vaapi(hwfc, dst, src, flags); - else - return AVERROR(ENOSYS); -#endif -#endif - default: - break; - } - return AVERROR(ENOSYS); -} - -static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf, - AVFrame *swf, VkBufferImageCopy *region, - int planes, int upload) -{ - VkResult ret; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - AVVulkanDeviceContext *hwctx = &p->p; - - FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data; - - const VkMappedMemoryRange flush_info = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = vkbuf->mem, - .size = VK_WHOLE_SIZE, - }; - - if (!upload && !(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1, - &flush_info); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } - - if (upload) { - for (int i = 0; i < planes; i++) - av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset, - region[i].bufferRowLength, - swf->data[i], - swf->linesize[i], - swf->linesize[i], - region[i].imageExtent.height); - } else { - for (int i = 0; i < planes; i++) - av_image_copy_plane(swf->data[i], - swf->linesize[i], - vkbuf->mapped_mem + region[i].bufferOffset, - region[i].bufferRowLength, - swf->linesize[i], - region[i].imageExtent.height); - } - - if (upload && !(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1, - &flush_info); - if (ret != VK_SUCCESS) { - av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } - - return 0; -} - -static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst, - AVFrame *swf, VkBufferImageCopy *region, int upload) -{ - int err; - uint32_t p_w, p_h; - VulkanFramesPriv *fp = hwfc->hwctx; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - const int planes = av_pix_fmt_count_planes(swf->format); - VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT : - VK_BUFFER_USAGE_TRANSFER_DST_BIT; - - size_t buf_offset = 0; - for (int i = 0; i < planes; i++) { - get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); - - region[i] = (VkBufferImageCopy) { - .bufferOffset = buf_offset, - .bufferRowLength = FFALIGN(swf->linesize[i], - p->props.properties.limits.optimalBufferCopyRowPitchAlignment), - .bufferImageHeight = p_h, - .imageSubresource.layerCount = 1, - .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, - /* Rest of the fields adjusted/filled in later */ - }; - - buf_offset += FFALIGN(p_h*region[i].bufferRowLength, - p->props.properties.limits.optimalBufferCopyOffsetAlignment); - } - - err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst, buf_usage, - NULL, buf_offset, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT); - if (err < 0) - return err; - - return 0; -} - -static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs, - AVFrame *swf, VkBufferImageCopy *region, int upload) -{ - int err; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - - int nb_src_bufs; - const int planes = av_pix_fmt_count_planes(swf->format); - VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT : - VK_BUFFER_USAGE_TRANSFER_DST_BIT; - - /* We can't host map images with negative strides */ - for (int i = 0; i < planes; i++) - if (swf->linesize[i] < 0) - return AVERROR(EINVAL); - - /* Count the number of buffers in the software frame */ - nb_src_bufs = 0; - while (swf->buf[nb_src_bufs]) - nb_src_bufs++; - - /* Single buffer contains all planes */ - if (nb_src_bufs == 1) { - err = ff_vk_host_map_buffer(&p->vkctx, &dst[0], - swf->data[0], swf->buf[0], - buf_usage); - if (err < 0) - return err; - (*nb_bufs)++; - - for (int i = 0; i < planes; i++) - region[i].bufferOffset = ((FFVkBuffer *)dst[0]->data)->virtual_offset + - swf->data[i] - swf->data[0]; - } else if (nb_src_bufs == planes) { /* One buffer per plane */ - for (int i = 0; i < planes; i++) { - err = ff_vk_host_map_buffer(&p->vkctx, &dst[i], - swf->data[i], swf->buf[i], - buf_usage); - if (err < 0) - goto fail; - (*nb_bufs)++; - - region[i].bufferOffset = ((FFVkBuffer *)dst[i]->data)->virtual_offset; - } - } else { - /* Weird layout (3 planes, 2 buffers), patch welcome, fallback to copy */ - return AVERROR_PATCHWELCOME; - } - - return 0; - -fail: - for (int i = 0; i < (*nb_bufs); i++) - av_buffer_unref(&dst[i]); - return err; -} - -static int vulkan_transfer_host(AVHWFramesContext *hwfc, AVFrame *hwf, - AVFrame *swf, int upload) -{ - VulkanFramesPriv *fp = hwfc->hwctx; - AVVulkanFramesContext *hwfc_vk = &fp->p; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - AVVulkanDeviceContext *hwctx = &p->p; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - - AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0]; - const int planes = av_pix_fmt_count_planes(swf->format); - const int nb_images = ff_vk_count_images(hwf_vk); - - VkSemaphoreWaitInfo sem_wait; - VkHostImageLayoutTransitionInfoEXT layout_ch_info[AV_NUM_DATA_POINTERS]; - int nb_layout_ch = 0; - - hwfc_vk->lock_frame(hwfc, hwf_vk); - - for (int i = 0; i < nb_images; i++) { - int compat = 0; - for (int j = 0; j < p->vkctx.host_image_props.copySrcLayoutCount; j++) { - if (hwf_vk->layout[i] == p->vkctx.host_image_props.pCopySrcLayouts[j]) { - compat = 1; - break; - } - } - if (compat) - continue; - - layout_ch_info[nb_layout_ch] = (VkHostImageLayoutTransitionInfoEXT) { - .sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO_EXT, - .image = hwf_vk->img[i], - .oldLayout = hwf_vk->layout[i], - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1, - }, - }; - - hwf_vk->layout[i] = layout_ch_info[nb_layout_ch].newLayout; - nb_layout_ch++; - } - - sem_wait = (VkSemaphoreWaitInfo) { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, - .pSemaphores = hwf_vk->sem, - .pValues = hwf_vk->sem_value, - .semaphoreCount = nb_images, - }; - - vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); - - if (nb_layout_ch) - vk->TransitionImageLayoutEXT(hwctx->act_dev, - nb_layout_ch, layout_ch_info); - - if (upload) { - VkMemoryToImageCopyEXT region_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT, - .imageSubresource = { - .layerCount = 1, - }, - }; - VkCopyMemoryToImageInfoEXT copy_info = { - .sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT, - .flags = VK_HOST_IMAGE_COPY_MEMCPY_EXT, - .regionCount = 1, - .pRegions = ®ion_info, - }; - for (int i = 0; i < planes; i++) { - int img_idx = FFMIN(i, (nb_images - 1)); - uint32_t p_w, p_h; - get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); - - region_info.pHostPointer = swf->data[i]; - region_info.imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i); - region_info.imageExtent = (VkExtent3D){ p_w, p_h, 1 }; - copy_info.dstImage = hwf_vk->img[img_idx]; - copy_info.dstImageLayout = hwf_vk->layout[img_idx]; - - vk->CopyMemoryToImageEXT(hwctx->act_dev, ©_info); - } - } else { - VkImageToMemoryCopyEXT region_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT, - .imageSubresource = { - .layerCount = 1, - }, - }; - VkCopyImageToMemoryInfoEXT copy_info = { - .sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_MEMORY_INFO_EXT, - .flags = VK_HOST_IMAGE_COPY_MEMCPY_EXT, - .regionCount = 1, - .pRegions = ®ion_info, - }; - for (int i = 0; i < planes; i++) { - int img_idx = FFMIN(i, (nb_images - 1)); - uint32_t p_w, p_h; - get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); - - region_info.pHostPointer = swf->data[i]; - region_info.imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i); - region_info.imageExtent = (VkExtent3D){ p_w, p_h, 1 }; - copy_info.srcImage = hwf_vk->img[img_idx]; - copy_info.srcImageLayout = hwf_vk->layout[img_idx]; - - vk->CopyImageToMemoryEXT(hwctx->act_dev, ©_info); - } - } - - hwfc_vk->unlock_frame(hwfc, hwf_vk); - - return 0; -} - -static int vulkan_transfer_frame(AVHWFramesContext *hwfc, - AVFrame *swf, AVFrame *hwf, - int upload) -{ - int err; - VulkanFramesPriv *fp = hwfc->hwctx; - AVVulkanFramesContext *hwctx = &fp->p; - VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - FFVulkanFunctions *vk = &p->vkctx.vkfn; - - int host_mapped = 0; - - AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0]; - VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane - - const int planes = av_pix_fmt_count_planes(swf->format); - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format); - const int nb_images = ff_vk_count_images(hwf_vk); - - VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; - int nb_img_bar = 0; - - AVBufferRef *bufs[AV_NUM_DATA_POINTERS]; - int nb_bufs = 0; - - VkCommandBuffer cmd_buf; - FFVkExecContext *exec; - - /* Sanity checking */ - if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { - av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); - return AVERROR(EINVAL); - } - - if (swf->width > hwfc->width || swf->height > hwfc->height) - return AVERROR(EINVAL); - - if (hwctx->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT) - return vulkan_transfer_host(hwfc, hwf, swf, upload); - - for (int i = 0; i < av_pix_fmt_count_planes(swf->format); i++) { - uint32_t p_w, p_h; - get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); - - /* Buffer region for this plane */ - region[i] = (VkBufferImageCopy) { - .bufferOffset = 0, - .bufferRowLength = swf->linesize[i], - .bufferImageHeight = p_h, - .imageSubresource.layerCount = 1, - .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, - /* Rest of the fields adjusted/filled in later */ - }; - } - - /* Setup buffers first */ - if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY && !p->avoid_host_import) { - err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload); - if (err >= 0) - host_mapped = 1; - } - - if (!host_mapped) { - err = get_plane_buf(hwfc, &bufs[0], swf, region, upload); - if (err < 0) - goto end; - nb_bufs = 1; - - if (upload) { - err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1); - if (err < 0) - goto end; - } - } - - exec = ff_vk_exec_get(&p->vkctx, &fp->upload_exec); - cmd_buf = exec->buf; - - ff_vk_exec_start(&p->vkctx, exec); - - /* Prep destination Vulkan frame */ - err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_TRANSFER_BIT); - if (err < 0) - goto end; - - /* No need to declare buf deps for synchronous transfers (downloads) */ - if (upload) { - /* Add the software frame backing the buffers if we're host mapping */ - if (host_mapped) { - err = ff_vk_exec_add_dep_sw_frame(&p->vkctx, exec, swf); - if (err < 0) { - ff_vk_exec_discard_deps(&p->vkctx, exec); - goto end; - } - } - - /* Add the buffers as a dependency */ - err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1); - if (err < 0) { - ff_vk_exec_discard_deps(&p->vkctx, exec); - goto end; - } - } - - ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, - upload ? VK_ACCESS_TRANSFER_WRITE_BIT : - VK_ACCESS_TRANSFER_READ_BIT, - upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]); - - vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - - for (int i = 0; i < planes; i++) { - int buf_idx = FFMIN(i, (nb_bufs - 1)); - int img_idx = FFMIN(i, (nb_images - 1)); - FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data; - - uint32_t orig_stride = region[i].bufferRowLength; - region[i].bufferRowLength /= desc->comp[i].step; - region[i].imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i); - - if (upload) - vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, - hwf_vk->img[img_idx], - img_bar[img_idx].newLayout, - 1, ®ion[i]); - else - vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx], - img_bar[img_idx].newLayout, - vkbuf->buf, - 1, ®ion[i]); - - region[i].bufferRowLength = orig_stride; - } - - err = ff_vk_exec_submit(&p->vkctx, exec); - if (err < 0) { - ff_vk_exec_discard_deps(&p->vkctx, exec); - } else if (!upload) { - ff_vk_exec_wait(&p->vkctx, exec); - if (!host_mapped) - err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0); - } - -end: - for (int i = 0; i < nb_bufs; i++) - av_buffer_unref(&bufs[i]); - - return err; -} - -static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src) -{ - av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - - switch (src->format) { -#if CONFIG_CUDA - case AV_PIX_FMT_CUDA: -#ifdef _WIN32 - if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && - (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) -#else - if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && - (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) -#endif - return vulkan_transfer_data_from_cuda(hwfc, dst, src); -#endif - default: - if (src->hw_frames_ctx) - return AVERROR(ENOSYS); - else - return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1); - } -} - -#if CONFIG_CUDA -static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src) -{ - int err; - CUcontext dummy; - AVVkFrame *dst_f; - AVVkFrameInternal *dst_int; - VulkanFramesPriv *fp = hwfc->hwctx; - const int planes = av_pix_fmt_count_planes(hwfc->sw_format); - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); - int nb_images; - - AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data; - AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; - AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; - AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; - CudaFunctions *cu = cu_internal->cuda_dl; - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; - - dst_f = (AVVkFrame *)src->data[0]; - nb_images = ff_vk_count_images(dst_f); - - err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT); - if (err < 0) - return err; - - err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); - if (err < 0) - return err; - - err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src); - if (err < 0) { - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - return err; - } - - dst_int = dst_f->internal; - - for (int i = 0; i < planes; i++) { - s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0; - s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1; - } - - err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, - nb_images, cuda_dev->stream)); - if (err < 0) - goto fail; - - for (int i = 0; i < planes; i++) { - CUDA_MEMCPY2D cpy = { - .dstMemoryType = CU_MEMORYTYPE_DEVICE, - .dstDevice = (CUdeviceptr)dst->data[i], - .dstPitch = dst->linesize[i], - .dstY = 0, - - .srcMemoryType = CU_MEMORYTYPE_ARRAY, - .srcArray = dst_int->cu_array[i], - }; - - int w, h; - get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i); - - cpy.WidthInBytes = w * desc->comp[i].step; - cpy.Height = h; - - err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); - if (err < 0) - goto fail; - } - - err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, - nb_images, cuda_dev->stream)); - if (err < 0) - goto fail; - - for (int i = 0; i < planes; i++) - dst_f->sem_value[i]++; - - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - - av_log(hwfc, AV_LOG_VERBOSE, "Transferred Vulkan image to CUDA!\n"); - - return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT); - -fail: - CHECK_CU(cu->cuCtxPopCurrent(&dummy)); - vulkan_free_internal(dst_f); - av_buffer_unref(&dst->buf[0]); - return err; -} -#endif - -static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, - const AVFrame *src) -{ - av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; - - switch (dst->format) { -#if CONFIG_CUDA - case AV_PIX_FMT_CUDA: -#ifdef _WIN32 - if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && - (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) -#else - if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && - (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) -#endif - return vulkan_transfer_data_to_cuda(hwfc, dst, src); -#endif - default: - if (dst->hw_frames_ctx) - return AVERROR(ENOSYS); - else - return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0); - } -} - -static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, - AVHWFramesContext *src_fc, int flags) -{ - return vulkan_frames_init(dst_fc); -} - -AVVkFrame *av_vk_frame_alloc(void) -{ - int err; - AVVkFrame *f = av_mallocz(sizeof(AVVkFrame)); - if (!f) - return NULL; - - f->internal = av_mallocz(sizeof(*f->internal)); - if (!f->internal) { - av_free(f); - return NULL; - } - - err = pthread_mutex_init(&f->internal->update_mutex, NULL); - if (err != 0) { - av_free(f->internal); - av_free(f); - return NULL; - } - - return f; -} - -const HWContextType ff_hwcontext_type_vulkan = { - .type = AV_HWDEVICE_TYPE_VULKAN, - .name = "Vulkan", - - .device_hwctx_size = sizeof(VulkanDevicePriv), - .frames_hwctx_size = sizeof(VulkanFramesPriv), - - .device_init = &vulkan_device_init, - .device_uninit = &vulkan_device_uninit, - .device_create = &vulkan_device_create, - .device_derive = &vulkan_device_derive, - - .frames_get_constraints = &vulkan_frames_get_constraints, - .frames_init = vulkan_frames_init, - .frames_get_buffer = vulkan_get_buffer, - .frames_uninit = vulkan_frames_uninit, - - .transfer_get_formats = vulkan_transfer_get_formats, - .transfer_data_to = vulkan_transfer_data_to, - .transfer_data_from = vulkan_transfer_data_from, - - .map_to = vulkan_map_to, - .map_from = vulkan_map_from, - .frames_derive_to = &vulkan_frames_derive_to, - - .pix_fmts = (const enum AVPixelFormat []) { - AV_PIX_FMT_VULKAN, - AV_PIX_FMT_NONE - }, -}; -- 2.49.1 From 93ee15398bfca4b5e884145d2a1673a632196ad3 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:15:26 +0000 Subject: [PATCH 040/118] Changing vulkan file directory --- libavutil/vulkan/hwcontext_vulkan.c | 4790 +++++++++++++++++++++++++++ 1 file changed, 4790 insertions(+) create mode 100644 libavutil/vulkan/hwcontext_vulkan.c diff --git a/libavutil/vulkan/hwcontext_vulkan.c b/libavutil/vulkan/hwcontext_vulkan.c new file mode 100644 index 0000000000..858bbaba12 --- /dev/null +++ b/libavutil/vulkan/hwcontext_vulkan.c @@ -0,0 +1,4790 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define VK_NO_PROTOTYPES +#define VK_ENABLE_BETA_EXTENSIONS + +#ifdef _WIN32 +#include <windows.h> /* Included to prevent conflicts with CreateSemaphore */ +#include <versionhelpers.h> +#include "compat/w32dlfcn.h" +#else +#include <dlfcn.h> +#include <unistd.h> +#endif + +#include "libavutil/thread.h" + +#include "libavutil/config.h" +#include "libavutil/pixdesc.h" +#include "libavutil/avstring.h" +#include "libavutil/imgutils.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_internal.h" +#include "hwcontext_vulkan.h" +#include "libavutil/mem.h" + +#include "vulkan.h" +#include "vulkan_loader.h" + +#if CONFIG_VAAPI +#include "hwcontext_vaapi.h" +#endif + +#if CONFIG_LIBDRM +#if CONFIG_VAAPI +#include <va/va_drmcommon.h> +#endif +#ifdef __linux__ +#include <sys/sysmacros.h> +#endif +#include <sys/stat.h> +#include <xf86drm.h> +#include <drm_fourcc.h> +#include "hwcontext_drm.h" +#endif + +#if HAVE_LINUX_DMA_BUF_H +#include <sys/ioctl.h> +#include <linux/dma-buf.h> +#endif + +#if CONFIG_CUDA +#include "hwcontext_cuda_internal.h" +#include "cuda_check.h" +#define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) +#endif + +typedef struct VulkanDeviceFeatures { + VkPhysicalDeviceFeatures2 device; + + VkPhysicalDeviceVulkan11Features vulkan_1_1; + VkPhysicalDeviceVulkan12Features vulkan_1_2; + VkPhysicalDeviceVulkan13Features vulkan_1_3; + VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore; + VkPhysicalDeviceShaderSubgroupRotateFeaturesKHR subgroup_rotate; + VkPhysicalDeviceHostImageCopyFeaturesEXT host_image_copy; + +#ifdef VK_KHR_shader_expect_assume + VkPhysicalDeviceShaderExpectAssumeFeaturesKHR expect_assume; +#endif + + VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maintenance_1; +#ifdef VK_KHR_video_maintenance2 + VkPhysicalDeviceVideoMaintenance2FeaturesKHR video_maintenance_2; +#endif +#ifdef VK_KHR_video_decode_vp9 + VkPhysicalDeviceVideoDecodeVP9FeaturesKHR vp9_decode; +#endif +#ifdef VK_KHR_video_encode_av1 + VkPhysicalDeviceVideoEncodeAV1FeaturesKHR av1_encode; +#endif + + VkPhysicalDeviceShaderObjectFeaturesEXT shader_object; + VkPhysicalDeviceCooperativeMatrixFeaturesKHR cooperative_matrix; + VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptor_buffer; + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float; + +#ifdef VK_KHR_shader_relaxed_extended_instruction + VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR relaxed_extended_instruction; +#endif +} VulkanDeviceFeatures; + +typedef struct VulkanDevicePriv { + /** + * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it. + */ + AVVulkanDeviceContext p; + + /* Vulkan library and loader functions */ + void *libvulkan; + + FFVulkanContext vkctx; + AVVulkanDeviceQueueFamily *compute_qf; + AVVulkanDeviceQueueFamily *transfer_qf; + + /* Properties */ + VkPhysicalDeviceProperties2 props; + VkPhysicalDeviceMemoryProperties mprops; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; + VkPhysicalDeviceDriverProperties dprops; + + /* Opaque FD external semaphore properties */ + VkExternalSemaphoreProperties ext_sem_props_opaque; + + /* Enabled features */ + VulkanDeviceFeatures feats; + + /* Queues */ + pthread_mutex_t **qf_mutex; + uint32_t nb_tot_qfs; + uint32_t img_qfs[64]; + uint32_t nb_img_qfs; + + /* Debug callback */ + VkDebugUtilsMessengerEXT debug_ctx; + + /* Settings */ + int use_linear_images; + + /* Option to allocate all image planes in a single allocation */ + int contiguous_planes; + + /* Disable multiplane images */ + int disable_multiplane; + + /* Disable host image transfer */ + int disable_host_transfer; + + /* Prefer memcpy over dynamic host pointer imports */ + int avoid_host_import; + + /* Maximum queues */ + int limit_queues; +} VulkanDevicePriv; + +typedef struct VulkanFramesPriv { + /** + * The public AVVulkanFramesContext. See hwcontext_vulkan.h for it. + */ + AVVulkanFramesContext p; + + /* Image conversions */ + FFVkExecPool compute_exec; + + /* Image transfers */ + FFVkExecPool upload_exec; + FFVkExecPool download_exec; + + /* Temporary buffer pools */ + AVBufferPool *tmp; + + /* Modifier info list to free at uninit */ + VkImageDrmFormatModifierListCreateInfoEXT *modifier_info; + + /* Properties for DRM modifier for each plane in the image */ + VkDrmFormatModifierPropertiesEXT drm_format_modifier_properties[5]; +} VulkanFramesPriv; + +typedef struct AVVkFrameInternal { + pthread_mutex_t update_mutex; + +#if CONFIG_CUDA + /* Importing external memory into cuda is really expensive so we keep the + * memory imported all the time */ + AVBufferRef *cuda_fc_ref; /* Need to keep it around for uninit */ + CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS]; + CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS]; + CUarray cu_array[AV_NUM_DATA_POINTERS]; + CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS]; +#ifdef _WIN32 + HANDLE ext_mem_handle[AV_NUM_DATA_POINTERS]; + HANDLE ext_sem_handle[AV_NUM_DATA_POINTERS]; +#endif +#endif +} AVVkFrameInternal; + +/* Initialize all structs in VulkanDeviceFeatures */ +static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *feats) +{ + VulkanDevicePriv *p = ctx->hwctx; + FFVulkanContext *s = &p->vkctx; + + feats->device = (VkPhysicalDeviceFeatures2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + }; + + FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_1, FF_VK_EXT_NO_FLAG, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES); + FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_2, FF_VK_EXT_NO_FLAG, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES); + FF_VK_STRUCT_EXT(s, &feats->device, &feats->vulkan_1_3, FF_VK_EXT_NO_FLAG, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES); + + FF_VK_STRUCT_EXT(s, &feats->device, &feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES); + FF_VK_STRUCT_EXT(s, &feats->device, &feats->subgroup_rotate, FF_VK_EXT_SUBGROUP_ROTATE, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_ROTATE_FEATURES_KHR); + FF_VK_STRUCT_EXT(s, &feats->device, &feats->host_image_copy, FF_VK_EXT_HOST_IMAGE_COPY, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT); + +#ifdef VK_KHR_shader_expect_assume + FF_VK_STRUCT_EXT(s, &feats->device, &feats->expect_assume, FF_VK_EXT_EXPECT_ASSUME, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EXPECT_ASSUME_FEATURES_KHR); +#endif + + FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_1, FF_VK_EXT_VIDEO_MAINTENANCE_1, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR); +#ifdef VK_KHR_video_maintenance2 + FF_VK_STRUCT_EXT(s, &feats->device, &feats->video_maintenance_2, FF_VK_EXT_VIDEO_MAINTENANCE_2, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_2_FEATURES_KHR); +#endif +#ifdef VK_KHR_video_decode_vp9 + FF_VK_STRUCT_EXT(s, &feats->device, &feats->vp9_decode, FF_VK_EXT_VIDEO_DECODE_VP9, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_DECODE_VP9_FEATURES_KHR); +#endif +#ifdef VK_KHR_video_encode_av1 + FF_VK_STRUCT_EXT(s, &feats->device, &feats->av1_encode, FF_VK_EXT_VIDEO_ENCODE_AV1, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_AV1_FEATURES_KHR); +#endif + + FF_VK_STRUCT_EXT(s, &feats->device, &feats->shader_object, FF_VK_EXT_SHADER_OBJECT, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT); + FF_VK_STRUCT_EXT(s, &feats->device, &feats->cooperative_matrix, FF_VK_EXT_COOP_MATRIX, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR); + FF_VK_STRUCT_EXT(s, &feats->device, &feats->descriptor_buffer, FF_VK_EXT_DESCRIPTOR_BUFFER, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT); + FF_VK_STRUCT_EXT(s, &feats->device, &feats->atomic_float, FF_VK_EXT_ATOMIC_FLOAT, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT); + +#ifdef VK_KHR_shader_relaxed_extended_instruction + FF_VK_STRUCT_EXT(s, &feats->device, &feats->relaxed_extended_instruction, FF_VK_EXT_RELAXED_EXTENDED_INSTR, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR); +#endif +} + +/* Copy all needed device features */ +static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceFeatures *src) +{ +#define COPY_VAL(VAL) \ + do { \ + dst->VAL = src->VAL; \ + } while (0) \ + + COPY_VAL(device.features.shaderImageGatherExtended); + COPY_VAL(device.features.shaderStorageImageReadWithoutFormat); + COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat); + COPY_VAL(device.features.fragmentStoresAndAtomics); + COPY_VAL(device.features.vertexPipelineStoresAndAtomics); + COPY_VAL(device.features.shaderInt64); + COPY_VAL(device.features.shaderInt16); + COPY_VAL(device.features.shaderFloat64); + COPY_VAL(device.features.shaderStorageImageReadWithoutFormat); + COPY_VAL(device.features.shaderStorageImageWriteWithoutFormat); + + COPY_VAL(vulkan_1_1.samplerYcbcrConversion); + COPY_VAL(vulkan_1_1.storagePushConstant16); + COPY_VAL(vulkan_1_1.storageBuffer16BitAccess); + COPY_VAL(vulkan_1_1.uniformAndStorageBuffer16BitAccess); + + COPY_VAL(vulkan_1_2.timelineSemaphore); + COPY_VAL(vulkan_1_2.scalarBlockLayout); + COPY_VAL(vulkan_1_2.bufferDeviceAddress); + COPY_VAL(vulkan_1_2.hostQueryReset); + COPY_VAL(vulkan_1_2.storagePushConstant8); + COPY_VAL(vulkan_1_2.shaderInt8); + COPY_VAL(vulkan_1_2.storageBuffer8BitAccess); + COPY_VAL(vulkan_1_2.uniformAndStorageBuffer8BitAccess); + COPY_VAL(vulkan_1_2.shaderFloat16); + COPY_VAL(vulkan_1_2.shaderBufferInt64Atomics); + COPY_VAL(vulkan_1_2.shaderSharedInt64Atomics); + COPY_VAL(vulkan_1_2.vulkanMemoryModel); + COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope); + COPY_VAL(vulkan_1_2.uniformBufferStandardLayout); + + COPY_VAL(vulkan_1_3.dynamicRendering); + COPY_VAL(vulkan_1_3.maintenance4); + COPY_VAL(vulkan_1_3.synchronization2); + COPY_VAL(vulkan_1_3.computeFullSubgroups); + COPY_VAL(vulkan_1_3.subgroupSizeControl); + COPY_VAL(vulkan_1_3.shaderZeroInitializeWorkgroupMemory); + COPY_VAL(vulkan_1_3.dynamicRendering); + + COPY_VAL(timeline_semaphore.timelineSemaphore); + COPY_VAL(subgroup_rotate.shaderSubgroupRotate); + COPY_VAL(host_image_copy.hostImageCopy); + + COPY_VAL(video_maintenance_1.videoMaintenance1); +#ifdef VK_KHR_video_maintenance2 + COPY_VAL(video_maintenance_2.videoMaintenance2); +#endif + +#ifdef VK_KHR_video_decode_vp9 + COPY_VAL(vp9_decode.videoDecodeVP9); +#endif + +#ifdef VK_KHR_video_encode_av1 + COPY_VAL(av1_encode.videoEncodeAV1); +#endif + + COPY_VAL(shader_object.shaderObject); + + COPY_VAL(cooperative_matrix.cooperativeMatrix); + + COPY_VAL(descriptor_buffer.descriptorBuffer); + COPY_VAL(descriptor_buffer.descriptorBufferPushDescriptors); + + COPY_VAL(atomic_float.shaderBufferFloat32Atomics); + COPY_VAL(atomic_float.shaderBufferFloat32AtomicAdd); + +#ifdef VK_KHR_shader_relaxed_extended_instruction + COPY_VAL(relaxed_extended_instruction.shaderRelaxedExtendedInstruction); +#endif + +#ifdef VK_KHR_shader_expect_assume + COPY_VAL(expect_assume.shaderExpectAssume); +#endif + +#undef COPY_VAL +} + +#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) +#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) + +static const struct FFVkFormatEntry { + VkFormat vkf; + enum AVPixelFormat pixfmt; + VkImageAspectFlags aspect; + int vk_planes; + int nb_images; + int nb_images_fallback; + const VkFormat fallback[5]; +} vk_formats_list[] = { + /* Gray formats */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY12, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY14, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_UINT, AV_PIX_FMT_GRAY32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_UINT } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32_SFLOAT } }, + + /* RGB formats */ + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8_UNORM } }, + { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8_UNORM } }, + { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16_UNORM } }, + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R5G6B5_UNORM_PACK16 } }, + { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B5G6R5_UNORM_PACK16 } }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2R10G10B10_UNORM_PACK32 } }, + { VK_FORMAT_A2B10G10R10_UNORM_PACK32, AV_PIX_FMT_X2BGR10, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_A2B10G10R10_UNORM_PACK32 } }, + { VK_FORMAT_R32G32B32_SFLOAT, AV_PIX_FMT_RGBF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_SFLOAT } }, + { VK_FORMAT_R32G32B32A32_SFLOAT, AV_PIX_FMT_RGBAF32, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_SFLOAT } }, + { VK_FORMAT_R32G32B32_UINT, AV_PIX_FMT_RGB96, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32_UINT } }, + { VK_FORMAT_R32G32B32A32_UINT, AV_PIX_FMT_RGBA128, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R32G32B32A32_UINT } }, + + /* Planar RGB */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRP, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP10, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP12, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP14, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRP16, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT, 3, 3, 3, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + + /* Planar RGB + Alpha */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP14, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R32_UINT, AV_PIX_FMT_GBRAP32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32_UINT } }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } }, + + /* Bayer */ + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_BAYER_RGGB16, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16_UNORM } }, + + /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Two-plane 422 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Two-plane 444 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8G8_UNORM } }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE, 2, 1, 2, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16G16_UNORM } }, + + /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE, 3, 1, 3, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Single plane 422 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R8G8B8A8_UNORM } }, + { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_G16B16G16R16_422_UNORM, AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + + /* Planar YUVA 420 at 8, 10 and 16 bits */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA420P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA420P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA420P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Planar YUVA 422 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA422P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA422P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Planar YUVA 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_YUVA444P, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P10, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P12, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_YUVA444P16, VK_IMAGE_ASPECT_COLOR_BIT, 4, 4, 4, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } }, + + /* Single plane 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_UYVA, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_B8G8R8A8_UNORM } }, + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_XV48, VK_IMAGE_ASPECT_COLOR_BIT, 1, 1, 1, { VK_FORMAT_R16G16B16A16_UNORM } }, +}; +static const int nb_vk_formats_list = FF_ARRAY_ELEMS(vk_formats_list); + +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p) +{ + for (int i = 0; i < nb_vk_formats_list; i++) + if (vk_formats_list[i].pixfmt == p) + return vk_formats_list[i].fallback; + return NULL; +} + +static const struct FFVkFormatEntry *vk_find_format_entry(enum AVPixelFormat p) +{ + for (int i = 0; i < nb_vk_formats_list; i++) + if (vk_formats_list[i].pixfmt == p) + return &vk_formats_list[i]; + return NULL; +} + +static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p, + VkImageTiling tiling, + VkFormat fmts[AV_NUM_DATA_POINTERS], /* Output format list */ + int *nb_images, /* Output number of images */ + VkImageAspectFlags *aspect, /* Output aspect */ + VkImageUsageFlags *supported_usage, /* Output supported usage */ + int disable_multiplane, int need_storage) +{ + VulkanDevicePriv *priv = dev_ctx->hwctx; + AVVulkanDeviceContext *hwctx = &priv->p; + FFVulkanFunctions *vk = &priv->vkctx.vkfn; + + const VkFormatFeatureFlagBits2 basic_flags = VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; + + for (int i = 0; i < nb_vk_formats_list; i++) { + if (vk_formats_list[i].pixfmt == p) { + VkFormatProperties3 fprops = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3, + }; + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + .pNext = &fprops, + }; + VkFormatFeatureFlagBits2 feats_primary, feats_secondary; + int basics_primary = 0, basics_secondary = 0; + int storage_primary = 0, storage_secondary = 0; + + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, + vk_formats_list[i].vkf, + &prop); + + feats_primary = tiling == VK_IMAGE_TILING_LINEAR ? + fprops.linearTilingFeatures : fprops.optimalTilingFeatures; + basics_primary = (feats_primary & basic_flags) == basic_flags; + storage_primary = !!(feats_primary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + + if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) { + vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev, + vk_formats_list[i].fallback[0], + &prop); + feats_secondary = tiling == VK_IMAGE_TILING_LINEAR ? + fprops.linearTilingFeatures : fprops.optimalTilingFeatures; + basics_secondary = (feats_secondary & basic_flags) == basic_flags; + storage_secondary = !!(feats_secondary & VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } else { + basics_secondary = basics_primary; + storage_secondary = storage_primary; + } + + if (basics_primary && + !(disable_multiplane && vk_formats_list[i].vk_planes > 1) && + (!need_storage || (need_storage && (storage_primary | storage_secondary)))) { + if (fmts) { + if (vk_formats_list[i].nb_images > 1) { + for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++) + fmts[j] = vk_formats_list[i].fallback[j]; + } else { + fmts[0] = vk_formats_list[i].vkf; + } + } + if (nb_images) + *nb_images = 1; + if (aspect) + *aspect = vk_formats_list[i].aspect; + if (supported_usage) + *supported_usage = ff_vk_map_feats_to_usage(feats_primary) | + ((need_storage && (storage_primary | storage_secondary)) ? + VK_IMAGE_USAGE_STORAGE_BIT : 0); + return 0; + } else if (basics_secondary && + (!need_storage || (need_storage && storage_secondary))) { + if (fmts) { + for (int j = 0; j < vk_formats_list[i].nb_images_fallback; j++) + fmts[j] = vk_formats_list[i].fallback[j]; + } + if (nb_images) + *nb_images = vk_formats_list[i].nb_images_fallback; + if (aspect) + *aspect = vk_formats_list[i].aspect; + if (supported_usage) + *supported_usage = ff_vk_map_feats_to_usage(feats_secondary); + return 0; + } else { + return AVERROR(ENOTSUP); + } + } + } + + return AVERROR(EINVAL); +} + +#if CONFIG_VULKAN_STATIC +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr(VkInstance instance, + const char *pName); +#endif + +static int load_libvulkan(AVHWDeviceContext *ctx) +{ + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + +#if CONFIG_VULKAN_STATIC + hwctx->get_proc_addr = vkGetInstanceProcAddr; +#else + static const char *lib_names[] = { +#if defined(_WIN32) + "vulkan-1.dll", +#elif defined(__APPLE__) + "libvulkan.dylib", + "libvulkan.1.dylib", + "libMoltenVK.dylib", +#else + "libvulkan.so.1", + "libvulkan.so", +#endif + }; + + for (int i = 0; i < FF_ARRAY_ELEMS(lib_names); i++) { + p->libvulkan = dlopen(lib_names[i], RTLD_NOW | RTLD_LOCAL); + if (p->libvulkan) + break; + } + + if (!p->libvulkan) { + av_log(ctx, AV_LOG_ERROR, "Unable to open the libvulkan library!\n"); + return AVERROR_UNKNOWN; + } + + hwctx->get_proc_addr = (PFN_vkGetInstanceProcAddr)dlsym(p->libvulkan, "vkGetInstanceProcAddr"); +#endif /* CONFIG_VULKAN_STATIC */ + + return 0; +} + +typedef struct VulkanOptExtension { + const char *name; + FFVulkanExtensions flag; +} VulkanOptExtension; + +static const VulkanOptExtension optional_instance_exts[] = { + { VK_EXT_LAYER_SETTINGS_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, +#ifdef __APPLE__ + { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, +#endif +}; + +static const VulkanOptExtension optional_device_exts[] = { + /* Misc or required by other extensions */ + { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_PORTABILITY_SUBSET }, + { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR }, + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER }, + { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, + { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, + { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, + { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, + { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE }, +#ifdef VK_KHR_shader_expect_assume + { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME }, +#endif + { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 }, +#ifdef VK_KHR_video_maintenance2 + { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 }, +#endif + + /* Imports/exports */ + { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, + { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY }, + { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS }, + { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM }, + { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY }, +#ifdef _WIN32 + { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, + { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, +#endif + + /* Video encoding/decoding */ + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, + { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, + { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, +#ifdef VK_KHR_video_decode_vp9 + { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_VP9 }, +#endif +#ifdef VK_KHR_video_encode_av1 + { VK_KHR_VIDEO_ENCODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_AV1 }, +#endif + { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 }, +}; + +static VkBool32 VKAPI_CALL vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT *data, + void *priv) +{ + int l; + AVHWDeviceContext *ctx = priv; + + /* Ignore false positives */ + switch (data->messageIdNumber) { + case 0x086974c1: /* BestPractices-vkCreateCommandPool-command-buffer-reset */ + case 0xfd92477a: /* BestPractices-vkAllocateMemory-small-allocation */ + case 0x618ab1e7: /* VUID-VkImageViewCreateInfo-usage-02275 */ + case 0x30f4ac70: /* VUID-VkImageCreateInfo-pNext-06811 */ + return VK_FALSE; + default: + break; + } + + switch (severity) { + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break; + case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break; + default: l = AV_LOG_DEBUG; break; + } + + av_log(ctx, l, "%s\n", data->pMessage); + for (int i = 0; i < data->cmdBufLabelCount; i++) + av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName); + + return VK_FALSE; +} + +#define ADD_VAL_TO_LIST(list, count, val) \ + do { \ + list = av_realloc_array(list, sizeof(*list), ++count); \ + if (!list) { \ + err = AVERROR(ENOMEM); \ + goto fail; \ + } \ + list[count - 1] = av_strdup(val); \ + if (!list[count - 1]) { \ + err = AVERROR(ENOMEM); \ + goto fail; \ + } \ + } while(0) + +#define RELEASE_PROPS(props, count) \ + if (props) { \ + for (int i = 0; i < count; i++) \ + av_free((void *)((props)[i])); \ + av_free((void *)props); \ + } + +enum FFVulkanDebugMode { + FF_VULKAN_DEBUG_NONE = 0, + /* Standard GPU-assisted validation */ + FF_VULKAN_DEBUG_VALIDATE = 1, + /* Passes printfs in shaders to the debug callback */ + FF_VULKAN_DEBUG_PRINTF = 2, + /* Enables extra printouts */ + FF_VULKAN_DEBUG_PRACTICES = 3, + /* Disables validation but keeps shader debug info and optimizations */ + FF_VULKAN_DEBUG_PROFILE = 4, + + FF_VULKAN_DEBUG_NB, +}; + +static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, + const char * const **dst, uint32_t *num, + enum FFVulkanDebugMode debug_mode) +{ + const char *tstr; + const char **extension_names = NULL; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + int err = 0, found, extensions_found = 0; + + const char *mod; + int optional_exts_num; + uint32_t sup_ext_count; + char *user_exts_str = NULL; + AVDictionaryEntry *user_exts; + VkExtensionProperties *sup_ext; + const VulkanOptExtension *optional_exts; + + if (!dev) { + mod = "instance"; + optional_exts = optional_instance_exts; + optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts); + user_exts = av_dict_get(opts, "instance_extensions", NULL, 0); + if (user_exts) { + user_exts_str = av_strdup(user_exts->value); + if (!user_exts_str) { + err = AVERROR(ENOMEM); + goto fail; + } + } + vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL); + sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); + if (!sup_ext) + return AVERROR(ENOMEM); + vk->EnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext); + } else { + mod = "device"; + optional_exts = optional_device_exts; + optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts); + user_exts = av_dict_get(opts, "device_extensions", NULL, 0); + if (user_exts) { + user_exts_str = av_strdup(user_exts->value); + if (!user_exts_str) { + err = AVERROR(ENOMEM); + goto fail; + } + } + vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, + &sup_ext_count, NULL); + sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties)); + if (!sup_ext) + return AVERROR(ENOMEM); + vk->EnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL, + &sup_ext_count, sup_ext); + } + + for (int i = 0; i < optional_exts_num; i++) { + tstr = optional_exts[i].name; + found = 0; + + /* Intel has had a bad descriptor buffer implementation for a while */ + if (p->dprops.driverID == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA && + !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) + continue; + + if (dev && + ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) || + (debug_mode == FF_VULKAN_DEBUG_PRINTF) || + (debug_mode == FF_VULKAN_DEBUG_PRACTICES)) && + !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) { + continue; + } + + for (int j = 0; j < sup_ext_count; j++) { + if (!strcmp(tstr, sup_ext[j].extensionName)) { + found = 1; + break; + } + } + if (!found) + continue; + + av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); + p->vkctx.extensions |= optional_exts[i].flag; + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + } + + if (!dev && + ((debug_mode == FF_VULKAN_DEBUG_VALIDATE) || + (debug_mode == FF_VULKAN_DEBUG_PRINTF) || + (debug_mode == FF_VULKAN_DEBUG_PRACTICES))) { + tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME; + found = 0; + for (int j = 0; j < sup_ext_count; j++) { + if (!strcmp(tstr, sup_ext[j].extensionName)) { + found = 1; + break; + } + } + if (found) { + av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + } else { + av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n", + tstr); + err = AVERROR(EINVAL); + goto fail; + } + } + +#ifdef VK_KHR_shader_relaxed_extended_instruction + if (((debug_mode == FF_VULKAN_DEBUG_PRINTF) || + (debug_mode == FF_VULKAN_DEBUG_PROFILE)) && dev) { + tstr = VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME; + found = 0; + for (int j = 0; j < sup_ext_count; j++) { + if (!strcmp(tstr, sup_ext[j].extensionName)) { + found = 1; + break; + } + } + if (found) { + av_log(ctx, AV_LOG_VERBOSE, "Using %s extension %s\n", mod, tstr); + ADD_VAL_TO_LIST(extension_names, extensions_found, tstr); + } else { + av_log(ctx, AV_LOG_ERROR, "Debug_printf/profile enabled, but extension \"%s\" not found!\n", + tstr); + err = AVERROR(EINVAL); + goto fail; + } + } +#endif + + if (user_exts_str) { + char *save, *token = av_strtok(user_exts_str, "+", &save); + while (token) { + found = 0; + for (int j = 0; j < sup_ext_count; j++) { + if (!strcmp(token, sup_ext[j].extensionName)) { + found = 1; + break; + } + } + if (found) { + av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, token); + ADD_VAL_TO_LIST(extension_names, extensions_found, token); + } else { + av_log(ctx, AV_LOG_WARNING, "%s extension \"%s\" not found, excluding.\n", + mod, token); + } + token = av_strtok(NULL, "+", &save); + } + } + + *dst = extension_names; + *num = extensions_found; + + av_free(user_exts_str); + av_free(sup_ext); + return 0; + +fail: + RELEASE_PROPS(extension_names, extensions_found); + av_free(user_exts_str); + av_free(sup_ext); + return err; +} + +static int check_layers(AVHWDeviceContext *ctx, AVDictionary *opts, + const char * const **dst, uint32_t *num, + enum FFVulkanDebugMode *debug_mode) +{ + int err = 0; + VulkanDevicePriv *priv = ctx->hwctx; + FFVulkanFunctions *vk = &priv->vkctx.vkfn; + + static const char layer_standard_validation[] = { "VK_LAYER_KHRONOS_validation" }; + int layer_standard_validation_found = 0; + + uint32_t sup_layer_count; + VkLayerProperties *sup_layers; + + AVDictionaryEntry *user_layers = av_dict_get(opts, "layers", NULL, 0); + char *user_layers_str = NULL; + char *save, *token; + + const char **enabled_layers = NULL; + uint32_t enabled_layers_count = 0; + + AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0); + enum FFVulkanDebugMode mode; + + *debug_mode = mode = FF_VULKAN_DEBUG_NONE; + + /* Get a list of all layers */ + vk->EnumerateInstanceLayerProperties(&sup_layer_count, NULL); + sup_layers = av_malloc_array(sup_layer_count, sizeof(VkLayerProperties)); + if (!sup_layers) + return AVERROR(ENOMEM); + vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers); + + av_log(ctx, AV_LOG_VERBOSE, "Supported layers:\n"); + for (int i = 0; i < sup_layer_count; i++) + av_log(ctx, AV_LOG_VERBOSE, "\t%s\n", sup_layers[i].layerName); + + /* If no user layers or debug layers are given, return */ + if (!debug_opt && !user_layers) + goto end; + + /* Check for any properly supported validation layer */ + if (debug_opt) { + if (!strcmp(debug_opt->value, "profile")) { + mode = FF_VULKAN_DEBUG_PROFILE; + } else if (!strcmp(debug_opt->value, "printf")) { + mode = FF_VULKAN_DEBUG_PRINTF; + } else if (!strcmp(debug_opt->value, "validate")) { + mode = FF_VULKAN_DEBUG_VALIDATE; + } else if (!strcmp(debug_opt->value, "practices")) { + mode = FF_VULKAN_DEBUG_PRACTICES; + } else { + char *end_ptr = NULL; + int idx = strtol(debug_opt->value, &end_ptr, 10); + if (end_ptr == debug_opt->value || end_ptr[0] != '\0' || + idx < 0 || idx >= FF_VULKAN_DEBUG_NB) { + av_log(ctx, AV_LOG_ERROR, "Invalid debugging mode \"%s\"\n", + debug_opt->value); + err = AVERROR(EINVAL); + goto end; + } + mode = idx; + } + } + + /* If mode is VALIDATE or PRINTF, try to find the standard validation layer extension */ + if ((mode == FF_VULKAN_DEBUG_VALIDATE) || + (mode == FF_VULKAN_DEBUG_PRINTF) || + (mode == FF_VULKAN_DEBUG_PRACTICES)) { + for (int i = 0; i < sup_layer_count; i++) { + if (!strcmp(layer_standard_validation, sup_layers[i].layerName)) { + av_log(ctx, AV_LOG_VERBOSE, "Standard validation layer %s is enabled\n", + layer_standard_validation); + ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, layer_standard_validation); + *debug_mode = mode; + layer_standard_validation_found = 1; + break; + } + } + if (!layer_standard_validation_found) { + av_log(ctx, AV_LOG_ERROR, + "Validation Layer \"%s\" not supported\n", layer_standard_validation); + err = AVERROR(ENOTSUP); + goto end; + } + } else if (mode == FF_VULKAN_DEBUG_PROFILE) { + *debug_mode = mode; + } + + /* Process any custom layers enabled */ + if (user_layers) { + int found; + + user_layers_str = av_strdup(user_layers->value); + if (!user_layers_str) { + err = AVERROR(ENOMEM); + goto fail; + } + + token = av_strtok(user_layers_str, "+", &save); + while (token) { + found = 0; + + /* If debug=1/2 was specified as an option, skip this layer */ + if (!strcmp(layer_standard_validation, token) && layer_standard_validation_found) { + token = av_strtok(NULL, "+", &save); + break; + } + + /* Try to find the layer in the list of supported layers */ + for (int j = 0; j < sup_layer_count; j++) { + if (!strcmp(token, sup_layers[j].layerName)) { + found = 1; + break; + } + } + + if (found) { + av_log(ctx, AV_LOG_VERBOSE, "Using layer: %s\n", token); + ADD_VAL_TO_LIST(enabled_layers, enabled_layers_count, token); + + /* If debug was not set as an option, force it */ + if (!strcmp(layer_standard_validation, token)) + *debug_mode = FF_VULKAN_DEBUG_VALIDATE; + } else { + av_log(ctx, AV_LOG_ERROR, + "Layer \"%s\" not supported\n", token); + err = AVERROR(EINVAL); + goto end; + } + + token = av_strtok(NULL, "+", &save); + } + } + +fail: +end: + av_free(sup_layers); + av_free(user_layers_str); + + if (err < 0) { + RELEASE_PROPS(enabled_layers, enabled_layers_count); + } else { + *dst = enabled_layers; + *num = enabled_layers_count; + } + + return err; +} + +/* Creates a VkInstance */ +static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts, + enum FFVulkanDebugMode *debug_mode) +{ + int err = 0; + VkResult ret; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkApplicationInfo application_info = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pApplicationName = "ffmpeg", + .applicationVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, + LIBAVUTIL_VERSION_MINOR, + LIBAVUTIL_VERSION_MICRO), + .pEngineName = "libavutil", + .apiVersion = VK_API_VERSION_1_3, + .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR, + LIBAVUTIL_VERSION_MINOR, + LIBAVUTIL_VERSION_MICRO), + }; + VkValidationFeaturesEXT validation_features = { + .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, + }; + VkInstanceCreateInfo inst_props = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &application_info, + }; + + if (!hwctx->get_proc_addr) { + err = load_libvulkan(ctx); + if (err < 0) + return err; + } + + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 0, 0); + if (err < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to load instance enumeration functions!\n"); + return err; + } + + err = check_layers(ctx, opts, &inst_props.ppEnabledLayerNames, + &inst_props.enabledLayerCount, debug_mode); + if (err) + goto fail; + + /* Check for present/missing extensions */ + err = check_extensions(ctx, 0, opts, &inst_props.ppEnabledExtensionNames, + &inst_props.enabledExtensionCount, *debug_mode); + hwctx->enabled_inst_extensions = inst_props.ppEnabledExtensionNames; + hwctx->nb_enabled_inst_extensions = inst_props.enabledExtensionCount; + if (err < 0) + goto fail; + + /* Enable debug features if needed */ + if (*debug_mode == FF_VULKAN_DEBUG_VALIDATE) { + static const VkValidationFeatureEnableEXT feat_list_validate[] = { + VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, + }; + validation_features.pEnabledValidationFeatures = feat_list_validate; + validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_validate); + inst_props.pNext = &validation_features; + } else if (*debug_mode == FF_VULKAN_DEBUG_PRINTF) { + static const VkValidationFeatureEnableEXT feat_list_debug[] = { + VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, + VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, + VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT, + }; + validation_features.pEnabledValidationFeatures = feat_list_debug; + validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_debug); + inst_props.pNext = &validation_features; + } else if (*debug_mode == FF_VULKAN_DEBUG_PRACTICES) { + static const VkValidationFeatureEnableEXT feat_list_practices[] = { + VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT, + VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, + }; + validation_features.pEnabledValidationFeatures = feat_list_practices; + validation_features.enabledValidationFeatureCount = FF_ARRAY_ELEMS(feat_list_practices); + inst_props.pNext = &validation_features; + } + +#ifdef __APPLE__ + for (int i = 0; i < inst_props.enabledExtensionCount; i++) { + if (!strcmp(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, + inst_props.ppEnabledExtensionNames[i])) { + inst_props.flags |= VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; + break; + } + } +#endif + + /* Try to create the instance */ + ret = vk->CreateInstance(&inst_props, hwctx->alloc, &hwctx->inst); + + /* Check for errors */ + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 0); + if (err < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to load instance functions!\n"); + goto fail; + } + + /* Setup debugging callback if needed */ + if ((*debug_mode == FF_VULKAN_DEBUG_VALIDATE) || + (*debug_mode == FF_VULKAN_DEBUG_PRINTF) || + (*debug_mode == FF_VULKAN_DEBUG_PRACTICES)) { + VkDebugUtilsMessengerCreateInfoEXT dbg = { + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = vk_dbg_callback, + .pUserData = ctx, + }; + + vk->CreateDebugUtilsMessengerEXT(hwctx->inst, &dbg, + hwctx->alloc, &p->debug_ctx); + } + + err = 0; + +fail: + RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount); + return err; +} + +typedef struct VulkanDeviceSelection { + uint8_t uuid[VK_UUID_SIZE]; /* Will use this first unless !has_uuid */ + int has_uuid; + uint32_t drm_major; /* Will use this second unless !has_drm */ + uint32_t drm_minor; /* Will use this second unless !has_drm */ + uint32_t has_drm; /* has drm node info */ + const char *name; /* Will use this third unless NULL */ + uint32_t pci_device; /* Will use this fourth unless 0x0 */ + uint32_t vendor_id; /* Last resort to find something deterministic */ + int index; /* Finally fall back to index */ +} VulkanDeviceSelection; + +static const char *vk_dev_type(enum VkPhysicalDeviceType type) +{ + switch (type) { + case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: return "integrated"; + case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: return "discrete"; + case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: return "virtual"; + case VK_PHYSICAL_DEVICE_TYPE_CPU: return "software"; + default: return "unknown"; + } +} + +/* Finds a device */ +static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select) +{ + int err = 0, choice = -1; + uint32_t num; + VkResult ret; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkPhysicalDevice *devices = NULL; + VkPhysicalDeviceIDProperties *idp = NULL; + VkPhysicalDeviceProperties2 *prop = NULL; + VkPhysicalDeviceDriverProperties *driver_prop = NULL; + VkPhysicalDeviceDrmPropertiesEXT *drm_prop = NULL; + + ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, NULL); + if (ret != VK_SUCCESS || !num) { + av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", ff_vk_ret2str(ret)); + return AVERROR(ENODEV); + } + + devices = av_malloc_array(num, sizeof(VkPhysicalDevice)); + if (!devices) + return AVERROR(ENOMEM); + + ret = vk->EnumeratePhysicalDevices(hwctx->inst, &num, devices); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR(ENODEV); + goto end; + } + + prop = av_calloc(num, sizeof(*prop)); + if (!prop) { + err = AVERROR(ENOMEM); + goto end; + } + + idp = av_calloc(num, sizeof(*idp)); + if (!idp) { + err = AVERROR(ENOMEM); + goto end; + } + + driver_prop = av_calloc(num, sizeof(*driver_prop)); + if (!driver_prop) { + err = AVERROR(ENOMEM); + goto end; + } + + if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) { + drm_prop = av_calloc(num, sizeof(*drm_prop)); + if (!drm_prop) { + err = AVERROR(ENOMEM); + goto end; + } + } + + av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n"); + for (int i = 0; i < num; i++) { + if (p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) { + drm_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT; + driver_prop[i].pNext = &drm_prop[i]; + } + driver_prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + idp[i].pNext = &driver_prop[i]; + idp[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; + prop[i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + prop[i].pNext = &idp[i]; + + vk->GetPhysicalDeviceProperties2(devices[i], &prop[i]); + av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, + prop[i].properties.deviceName, + vk_dev_type(prop[i].properties.deviceType), + prop[i].properties.deviceID); + } + + if (select->has_uuid) { + for (int i = 0; i < num; i++) { + if (!strncmp(idp[i].deviceUUID, select->uuid, VK_UUID_SIZE)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device by given UUID!\n"); + err = AVERROR(ENODEV); + goto end; + } else if ((p->vkctx.extensions & FF_VK_EXT_DEVICE_DRM) && select->has_drm) { + for (int i = 0; i < num; i++) { + if ((select->drm_major == drm_prop[i].primaryMajor && + select->drm_minor == drm_prop[i].primaryMinor) || + (select->drm_major == drm_prop[i].renderMajor && + select->drm_minor == drm_prop[i].renderMinor)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device by given DRM node numbers %i:%i!\n", + select->drm_major, select->drm_minor); + err = AVERROR(ENODEV); + goto end; + } else if (select->name) { + av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name); + for (int i = 0; i < num; i++) { + if (strstr(prop[i].properties.deviceName, select->name)) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n", + select->name); + err = AVERROR(ENODEV); + goto end; + } else if (select->pci_device) { + av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device); + for (int i = 0; i < num; i++) { + if (select->pci_device == prop[i].properties.deviceID) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n", + select->pci_device); + err = AVERROR(EINVAL); + goto end; + } else if (select->vendor_id) { + av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id); + for (int i = 0; i < num; i++) { + if (select->vendor_id == prop[i].properties.vendorID) { + choice = i; + goto end; + } + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n", + select->vendor_id); + err = AVERROR(ENODEV); + goto end; + } else { + if (select->index < num) { + choice = select->index; + goto end; + } + av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n", + select->index); + err = AVERROR(ENODEV); + goto end; + } + +end: + if (choice > -1) { + av_log(ctx, AV_LOG_VERBOSE, "Device %d selected: %s (%s) (0x%x)\n", + choice, prop[choice].properties.deviceName, + vk_dev_type(prop[choice].properties.deviceType), + prop[choice].properties.deviceID); + hwctx->phys_dev = devices[choice]; + p->props = prop[choice]; + p->props.pNext = NULL; + p->dprops = driver_prop[choice]; + p->dprops.pNext = NULL; + } + + av_free(devices); + av_free(prop); + av_free(idp); + av_free(drm_prop); + av_free(driver_prop); + + return err; +} + +/* Picks the least used qf with the fewest unneeded flags, or -1 if none found */ +static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf, + VkQueueFlagBits flags) +{ + int index = -1; + uint32_t min_score = UINT32_MAX; + + for (int i = 0; i < num_qf; i++) { + VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags; + + /* Per the spec, reporting transfer caps is optional for these 2 types */ + if ((flags & VK_QUEUE_TRANSFER_BIT) && + (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT))) + qflags |= VK_QUEUE_TRANSFER_BIT; + + if (qflags & flags) { + uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits; + if (score < min_score) { + index = i; + min_score = score; + } + } + } + + if (index > -1) + qf[index].queueFamilyProperties.timestampValidBits++; + + return index; +} + +static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf, + VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf, + VkVideoCodecOperationFlagBitsKHR flags) +{ + int index = -1; + uint32_t min_score = UINT32_MAX; + + for (int i = 0; i < num_qf; i++) { + const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags; + const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations; + + if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR))) + continue; + + if (vflags & flags) { + uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits; + if (score < min_score) { + index = i; + min_score = score; + } + } + } + + if (index > -1) + qf[index].queueFamilyProperties.timestampValidBits++; + + return index; +} + +static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd) +{ + uint32_t num; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + + VkQueueFamilyProperties2 *qf = NULL; + VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL; + + /* First get the number of queue families */ + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL); + if (!num) { + av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); + return AVERROR_EXTERNAL; + } + + /* Then allocate memory */ + qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2)); + if (!qf) + return AVERROR(ENOMEM); + + qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR)); + if (!qf_vid) + return AVERROR(ENOMEM); + + for (uint32_t i = 0; i < num; i++) { + qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, + }; + qf[i] = (VkQueueFamilyProperties2) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, + .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL, + }; + } + + /* Finally retrieve the queue families */ + vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf); + + av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n"); + for (int i = 0; i < num; i++) { + av_log(ctx, AV_LOG_VERBOSE, " %i:%s%s%s%s%s%s%s%s (queues: %i)\n", i, + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_OPTICAL_FLOW_BIT_NV) ? " optical_flow" : "", + ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "", + qf[i].queueFamilyProperties.queueCount); + + /* We use this field to keep a score of how many times we've used that + * queue family in order to make better choices. */ + qf[i].queueFamilyProperties.timestampValidBits = 0; + } + + hwctx->nb_qf = 0; + + /* Pick each queue family to use. */ +#define PICK_QF(type, vid_op) \ + do { \ + uint32_t i; \ + uint32_t idx; \ + \ + if (vid_op) \ + idx = pick_video_queue_family(qf, qf_vid, num, vid_op); \ + else \ + idx = pick_queue_family(qf, num, type); \ + \ + if (idx == -1) \ + continue; \ + \ + for (i = 0; i < hwctx->nb_qf; i++) { \ + if (hwctx->qf[i].idx == idx) { \ + hwctx->qf[i].flags |= type; \ + hwctx->qf[i].video_caps |= vid_op; \ + break; \ + } \ + } \ + if (i == hwctx->nb_qf) { \ + hwctx->qf[i].idx = idx; \ + hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \ + if (p->limit_queues || \ + p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY) { \ + int max = p->limit_queues; \ + if (type == VK_QUEUE_GRAPHICS_BIT) \ + hwctx->qf[i].num = FFMIN(hwctx->qf[i].num, \ + max ? max : 1); \ + else if (max) \ + hwctx->qf[i].num = FFMIN(hwctx->qf[i].num, max); \ + } \ + hwctx->qf[i].flags = type; \ + hwctx->qf[i].video_caps = vid_op; \ + hwctx->nb_qf++; \ + } \ + } while (0) + + PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); + PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); + PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR); + + PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR); + PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR); + + PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR); + PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR); + +#ifdef VK_KHR_video_decode_vp9 + PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR); +#endif + +#ifdef VK_KHR_video_encode_av1 + PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR); +#endif + PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR); + + av_free(qf); + av_free(qf_vid); + +#undef PICK_QF + + cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf, + sizeof(VkDeviceQueueCreateInfo)); + if (!cd->pQueueCreateInfos) + return AVERROR(ENOMEM); + + for (uint32_t i = 0; i < hwctx->nb_qf; i++) { + int dup = 0; + float *weights = NULL; + VkDeviceQueueCreateInfo *pc; + for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) { + if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) { + dup = 1; + break; + } + } + if (dup) + continue; + + weights = av_malloc_array(hwctx->qf[i].num, sizeof(float)); + if (!weights) { + for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) + av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities); + av_free((void *)cd->pQueueCreateInfos); + return AVERROR(ENOMEM); + } + + for (uint32_t j = 0; j < hwctx->qf[i].num; j++) + weights[j] = 1.0; + + pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos; + pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueFamilyIndex = hwctx->qf[i].idx, + .queueCount = hwctx->qf[i].num, + .pQueuePriorities = weights, + }; + } + +#if FF_API_VULKAN_FIXED_QUEUES +FF_DISABLE_DEPRECATION_WARNINGS + /* Setup deprecated fields */ + hwctx->queue_family_index = -1; + hwctx->queue_family_comp_index = -1; + hwctx->queue_family_tx_index = -1; + hwctx->queue_family_encode_index = -1; + hwctx->queue_family_decode_index = -1; + +#define SET_OLD_QF(field, nb_field, type) \ + do { \ + if (field < 0 && hwctx->qf[i].flags & type) { \ + field = hwctx->qf[i].idx; \ + nb_field = hwctx->qf[i].num; \ + } \ + } while (0) + + for (uint32_t i = 0; i < hwctx->nb_qf; i++) { + SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT); + SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT); + SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT); + SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); + SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR); + } + +#undef SET_OLD_QF +FF_ENABLE_DEPRECATION_WARNINGS +#endif + + return 0; +} + +/* Only resources created by vulkan_device_create should be released here, + * resources created by vulkan_device_init should be released by + * vulkan_device_uninit, to make sure we don't free user provided resources, + * and there is no leak. + */ +static void vulkan_device_free(AVHWDeviceContext *ctx) +{ + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + + if (hwctx->act_dev) + vk->DestroyDevice(hwctx->act_dev, hwctx->alloc); + + if (p->debug_ctx) + vk->DestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx, + hwctx->alloc); + + if (hwctx->inst) + vk->DestroyInstance(hwctx->inst, hwctx->alloc); + + if (p->libvulkan) + dlclose(p->libvulkan); + + RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions); + RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions); +} + +static void vulkan_device_uninit(AVHWDeviceContext *ctx) +{ + VulkanDevicePriv *p = ctx->hwctx; + + for (uint32_t i = 0; i < p->nb_tot_qfs; i++) { + pthread_mutex_destroy(p->qf_mutex[i]); + av_freep(&p->qf_mutex[i]); + } + av_freep(&p->qf_mutex); + + ff_vk_uninit(&p->vkctx); +} + +static int vulkan_device_has_rebar(AVHWDeviceContext *ctx) +{ + VulkanDevicePriv *p = ctx->hwctx; + VkDeviceSize max_vram = 0, max_visible_vram = 0; + + /* Get device memory properties */ + for (int i = 0; i < p->mprops.memoryTypeCount; i++) { + const VkMemoryType type = p->mprops.memoryTypes[i]; + const VkMemoryHeap heap = p->mprops.memoryHeaps[type.heapIndex]; + if (!(type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) + continue; + max_vram = FFMAX(max_vram, heap.size); + if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + max_visible_vram = FFMAX(max_visible_vram, heap.size); + } + + return max_vram - max_visible_vram < 1024; /* 1 kB tolerance */ +} + +static int vulkan_device_create_internal(AVHWDeviceContext *ctx, + VulkanDeviceSelection *dev_select, + int disable_multiplane, + AVDictionary *opts, int flags) +{ + int err = 0; + VkResult ret; + AVDictionaryEntry *opt_d; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + enum FFVulkanDebugMode debug_mode = FF_VULKAN_DEBUG_NONE; + VulkanDeviceFeatures supported_feats = { 0 }; + VkDeviceCreateInfo dev_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + }; + + /* Create an instance if not given one */ + if ((err = create_instance(ctx, opts, &debug_mode))) + goto end; + + /* Find a physical device (if not given one) */ + if ((err = find_device(ctx, dev_select))) + goto end; + + /* Find and enable extensions for the physical device */ + if ((err = check_extensions(ctx, 1, opts, &dev_info.ppEnabledExtensionNames, + &dev_info.enabledExtensionCount, debug_mode))) { + for (int i = 0; i < dev_info.queueCreateInfoCount; i++) + av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities); + av_free((void *)dev_info.pQueueCreateInfos); + goto end; + } + + /* Get supported memory types */ + vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); + + /* Get all supported features for the physical device */ + device_features_init(ctx, &supported_feats); + vk->GetPhysicalDeviceFeatures2(hwctx->phys_dev, &supported_feats.device); + + /* Copy all needed features from those supported and activate them */ + device_features_init(ctx, &p->feats); + device_features_copy_needed(&p->feats, &supported_feats); + dev_info.pNext = p->feats.device.pNext; + dev_info.pEnabledFeatures = &p->feats.device.features; + + /* Setup enabled queue families */ + if ((err = setup_queue_families(ctx, &dev_info))) + goto end; + + /* Finally create the device */ + ret = vk->CreateDevice(hwctx->phys_dev, &dev_info, hwctx->alloc, + &hwctx->act_dev); + + for (int i = 0; i < dev_info.queueCreateInfoCount; i++) + av_free((void *)dev_info.pQueueCreateInfos[i].pQueuePriorities); + av_free((void *)dev_info.pQueueCreateInfos); + + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n", + ff_vk_ret2str(ret)); + for (int i = 0; i < dev_info.enabledExtensionCount; i++) + av_free((void *)dev_info.ppEnabledExtensionNames[i]); + av_free((void *)dev_info.ppEnabledExtensionNames); + err = AVERROR_EXTERNAL; + goto end; + } + + /* Tiled images setting, use them by default */ + opt_d = av_dict_get(opts, "linear_images", NULL, 0); + if (opt_d) + p->use_linear_images = strtol(opt_d->value, NULL, 10); + + /* Limit queues to a given number if needed */ + opt_d = av_dict_get(opts, "limit_queues", NULL, 0); + if (opt_d) + p->limit_queues = strtol(opt_d->value, NULL, 10); + + /* The disable_multiplane argument takes precedent over the option */ + p->disable_multiplane = disable_multiplane; + if (!p->disable_multiplane) { + opt_d = av_dict_get(opts, "disable_multiplane", NULL, 0); + if (opt_d) + p->disable_multiplane = strtol(opt_d->value, NULL, 10); + } + + /* Disable host pointer imports (by default on nvidia) */ + p->avoid_host_import = p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; + opt_d = av_dict_get(opts, "avoid_host_import", NULL, 0); + if (opt_d) + p->avoid_host_import = strtol(opt_d->value, NULL, 10); + + /* Set the public device feature struct and its pNext chain */ + hwctx->device_features = p->feats.device; + + /* Set the list of all active extensions */ + hwctx->enabled_dev_extensions = dev_info.ppEnabledExtensionNames; + hwctx->nb_enabled_dev_extensions = dev_info.enabledExtensionCount; + + /* The extension lists need to be freed */ + ctx->free = vulkan_device_free; + +end: + return err; +} + +static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index) +{ + VulkanDevicePriv *p = ctx->hwctx; + pthread_mutex_lock(&p->qf_mutex[queue_family][index]); +} + +static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index) +{ + VulkanDevicePriv *p = ctx->hwctx; + pthread_mutex_unlock(&p->qf_mutex[queue_family][index]); +} + +static int vulkan_device_init(AVHWDeviceContext *ctx) +{ + int err = 0; + uint32_t qf_num; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkQueueFamilyProperties2 *qf; + VkQueueFamilyVideoPropertiesKHR *qf_vid; + VkPhysicalDeviceExternalSemaphoreInfo ext_sem_props_info; + int graph_index, comp_index, tx_index, enc_index, dec_index; + + /* Set device extension flags */ + for (int i = 0; i < hwctx->nb_enabled_dev_extensions; i++) { + for (int j = 0; j < FF_ARRAY_ELEMS(optional_device_exts); j++) { + if (!strcmp(hwctx->enabled_dev_extensions[i], + optional_device_exts[j].name)) { + p->vkctx.extensions |= optional_device_exts[j].flag; + break; + } + } + } + + err = ff_vk_load_functions(ctx, vk, p->vkctx.extensions, 1, 1); + if (err < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to load functions!\n"); + return err; + } + + p->props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + p->props.pNext = &p->hprops; + p->hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT; + p->hprops.pNext = &p->dprops; + p->dprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + + vk->GetPhysicalDeviceProperties2(hwctx->phys_dev, &p->props); + av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", + p->props.properties.deviceName); + av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n"); + av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %"PRIu64"\n", + p->props.properties.limits.optimalBufferCopyRowPitchAlignment); + av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %"SIZE_SPECIFIER"\n", + p->props.properties.limits.minMemoryMapAlignment); + av_log(ctx, AV_LOG_VERBOSE, " nonCoherentAtomSize: %"PRIu64"\n", + p->props.properties.limits.nonCoherentAtomSize); + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) + av_log(ctx, AV_LOG_VERBOSE, " minImportedHostPointerAlignment: %"PRIu64"\n", + p->hprops.minImportedHostPointerAlignment); + + vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL); + if (!qf_num) { + av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n"); + return AVERROR_EXTERNAL; + } + + ext_sem_props_info = (VkPhysicalDeviceExternalSemaphoreInfo) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO, + }; + + /* Opaque FD semaphore properties */ + ext_sem_props_info.handleType = +#ifdef _WIN32 + IsWindows8OrGreater() + ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT + : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT; +#else + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; +#endif + p->ext_sem_props_opaque.sType = VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES; + vk->GetPhysicalDeviceExternalSemaphoreProperties(hwctx->phys_dev, + &ext_sem_props_info, + &p->ext_sem_props_opaque); + + qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2)); + if (!qf) + return AVERROR(ENOMEM); + + qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR)); + if (!qf_vid) { + av_free(qf); + return AVERROR(ENOMEM); + } + + for (uint32_t i = 0; i < qf_num; i++) { + qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, + }; + qf[i] = (VkQueueFamilyProperties2) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, + .pNext = p->vkctx.extensions & FF_VK_EXT_VIDEO_QUEUE ? &qf_vid[i] : NULL, + }; + } + + vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf); + + p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex)); + if (!p->qf_mutex) { + err = AVERROR(ENOMEM); + goto end; + } + p->nb_tot_qfs = qf_num; + + for (uint32_t i = 0; i < qf_num; i++) { + p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount, + sizeof(**p->qf_mutex)); + if (!p->qf_mutex[i]) { + err = AVERROR(ENOMEM); + goto end; + } + for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) { + err = pthread_mutex_init(&p->qf_mutex[i][j], NULL); + if (err != 0) { + av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n", + av_err2str(err)); + err = AVERROR(err); + goto end; + } + } + } + +#if FF_API_VULKAN_FIXED_QUEUES +FF_DISABLE_DEPRECATION_WARNINGS + graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1; + comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1; + tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1; + dec_index = hwctx->nb_decode_queues ? hwctx->queue_family_decode_index : -1; + enc_index = hwctx->nb_encode_queues ? hwctx->queue_family_encode_index : -1; + +#define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \ + do { \ + if (ctx_qf < 0 && required) { \ + av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \ + " in the context!\n", type); \ + err = AVERROR(EINVAL); \ + goto end; \ + } else if (fidx < 0 || ctx_qf < 0) { \ + break; \ + } else if (ctx_qf >= qf_num) { \ + av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \ + type, ctx_qf, qf_num); \ + err = AVERROR(EINVAL); \ + goto end; \ + } \ + \ + av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \ + " for%s%s%s%s%s\n", \ + ctx_qf, qc, \ + ctx_qf == graph_index ? " graphics" : "", \ + ctx_qf == comp_index ? " compute" : "", \ + ctx_qf == tx_index ? " transfers" : "", \ + ctx_qf == enc_index ? " encode" : "", \ + ctx_qf == dec_index ? " decode" : ""); \ + graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \ + comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \ + tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \ + enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \ + dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \ + } while (0) + + CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues); + CHECK_QUEUE("compute", 1, comp_index, hwctx->queue_family_comp_index, hwctx->nb_comp_queues); + CHECK_QUEUE("upload", 1, tx_index, hwctx->queue_family_tx_index, hwctx->nb_tx_queues); + CHECK_QUEUE("decode", 0, dec_index, hwctx->queue_family_decode_index, hwctx->nb_decode_queues); + CHECK_QUEUE("encode", 0, enc_index, hwctx->queue_family_encode_index, hwctx->nb_encode_queues); + +#undef CHECK_QUEUE + + /* Update the new queue family fields. If non-zero already, + * it means API users have set it. */ + if (!hwctx->nb_qf) { +#define ADD_QUEUE(ctx_qf, qc, flag) \ + do { \ + if (ctx_qf != -1) { \ + hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \ + .idx = ctx_qf, \ + .num = qc, \ + .flags = flag, \ + }; \ + } \ + } while (0) + + ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT); + ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT); + ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT); + ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR); + ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR); +#undef ADD_QUEUE + } +FF_ENABLE_DEPRECATION_WARNINGS +#endif + + for (int i = 0; i < hwctx->nb_qf; i++) { + if (!hwctx->qf[i].video_caps && + hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR | + VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) { + hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations; + } + } + + /* Setup array for pQueueFamilyIndices with used queue families */ + p->nb_img_qfs = 0; + for (int i = 0; i < hwctx->nb_qf; i++) { + int seen = 0; + /* Make sure each entry is unique + * (VUID-VkBufferCreateInfo-sharingMode-01419) */ + for (int j = (i - 1); j >= 0; j--) { + if (hwctx->qf[i].idx == hwctx->qf[j].idx) { + seen = 1; + break; + } + } + if (!seen) + p->img_qfs[p->nb_img_qfs++] = hwctx->qf[i].idx; + } + + if (!hwctx->lock_queue) + hwctx->lock_queue = lock_queue; + if (!hwctx->unlock_queue) + hwctx->unlock_queue = unlock_queue; + + /* Re-query device capabilities, in case the device was created externally */ + vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); + + p->vkctx.device = ctx; + p->vkctx.hwctx = hwctx; + + ff_vk_load_props(&p->vkctx); + p->compute_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_COMPUTE_BIT, 0); + p->transfer_qf = ff_vk_qf_find(&p->vkctx, VK_QUEUE_TRANSFER_BIT, 0); + + /* Re-query device capabilities, in case the device was created externally */ + vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops); + + /* Only use host image transfers if ReBAR is enabled */ + p->disable_host_transfer = !vulkan_device_has_rebar(ctx); + +end: + av_free(qf_vid); + av_free(qf); + return err; +} + +static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device, + AVDictionary *opts, int flags) +{ + VulkanDeviceSelection dev_select = { 0 }; + if (device && device[0]) { + char *end = NULL; + dev_select.index = strtol(device, &end, 10); + if (end == device) { + dev_select.index = 0; + dev_select.name = device; + } + } + + return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); +} + +static int vulkan_device_derive(AVHWDeviceContext *ctx, + AVHWDeviceContext *src_ctx, + AVDictionary *opts, int flags) +{ + av_unused VulkanDeviceSelection dev_select = { 0 }; + + /* If there's only one device on the system, then even if its not covered + * by the following checks (e.g. non-PCIe ARM GPU), having an empty + * dev_select will mean it'll get picked. */ + switch(src_ctx->type) { +#if CONFIG_VAAPI + case AV_HWDEVICE_TYPE_VAAPI: { + AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx; + VADisplay dpy = src_hwctx->display; +#if VA_CHECK_VERSION(1, 15, 0) + VAStatus vas; + VADisplayAttribute attr = { + .type = VADisplayPCIID, + }; +#endif + const char *vendor; + +#if VA_CHECK_VERSION(1, 15, 0) + vas = vaGetDisplayAttributes(dpy, &attr, 1); + if (vas == VA_STATUS_SUCCESS && attr.flags != VA_DISPLAY_ATTRIB_NOT_SUPPORTED) + dev_select.pci_device = (attr.value & 0xFFFF); +#endif + + if (!dev_select.pci_device) { + vendor = vaQueryVendorString(dpy); + if (!vendor) { + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from VAAPI!\n"); + return AVERROR_EXTERNAL; + } + + if (strstr(vendor, "AMD")) + dev_select.vendor_id = 0x1002; + } + + return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); + } +#endif +#if CONFIG_LIBDRM + case AV_HWDEVICE_TYPE_DRM: { + int err; + struct stat drm_node_info; + drmDevice *drm_dev_info; + AVDRMDeviceContext *src_hwctx = src_ctx->hwctx; + + err = fstat(src_hwctx->fd, &drm_node_info); + if (err) { + av_log(ctx, AV_LOG_ERROR, "Unable to get node info from DRM fd: %s!\n", + av_err2str(AVERROR(errno))); + return AVERROR_EXTERNAL; + } + + dev_select.drm_major = major(drm_node_info.st_dev); + dev_select.drm_minor = minor(drm_node_info.st_dev); + dev_select.has_drm = 1; + + err = drmGetDevice(src_hwctx->fd, &drm_dev_info); + if (err) { + av_log(ctx, AV_LOG_ERROR, "Unable to get device info from DRM fd: %s!\n", + av_err2str(AVERROR(errno))); + return AVERROR_EXTERNAL; + } + + if (drm_dev_info->bustype == DRM_BUS_PCI) + dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id; + + drmFreeDevice(&drm_dev_info); + + return vulkan_device_create_internal(ctx, &dev_select, 0, opts, flags); + } +#endif +#if CONFIG_CUDA + case AV_HWDEVICE_TYPE_CUDA: { + AVHWDeviceContext *cuda_cu = src_ctx; + AVCUDADeviceContext *src_hwctx = src_ctx->hwctx; + AVCUDADeviceContextInternal *cu_internal = src_hwctx->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + + int ret = CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid, + cu_internal->cuda_device)); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "Unable to get UUID from CUDA!\n"); + return AVERROR_EXTERNAL; + } + + dev_select.has_uuid = 1; + + /* + * CUDA is not able to import multiplane images, so always derive a + * Vulkan device with multiplane disabled. + */ + return vulkan_device_create_internal(ctx, &dev_select, 1, opts, flags); + } +#endif + default: + return AVERROR(ENOSYS); + } +} + +static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx, + const void *hwconfig, + AVHWFramesConstraints *constraints) +{ + int count = 0; + VulkanDevicePriv *p = ctx->hwctx; + + for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { + count += vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, + p->use_linear_images ? VK_IMAGE_TILING_LINEAR : + VK_IMAGE_TILING_OPTIMAL, + NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0; + } + + constraints->valid_sw_formats = av_malloc_array(count + 1, + sizeof(enum AVPixelFormat)); + if (!constraints->valid_sw_formats) + return AVERROR(ENOMEM); + + count = 0; + for (enum AVPixelFormat i = 0; i < nb_vk_formats_list; i++) { + if (vkfmt_from_pixfmt2(ctx, vk_formats_list[i].pixfmt, + p->use_linear_images ? VK_IMAGE_TILING_LINEAR : + VK_IMAGE_TILING_OPTIMAL, + NULL, NULL, NULL, NULL, p->disable_multiplane, 1) >= 0) { + constraints->valid_sw_formats[count++] = vk_formats_list[i].pixfmt; + } + } + + constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE; + + constraints->min_width = 1; + constraints->min_height = 1; + constraints->max_width = p->props.properties.limits.maxImageDimension2D; + constraints->max_height = p->props.properties.limits.maxImageDimension2D; + + constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat)); + if (!constraints->valid_hw_formats) + return AVERROR(ENOMEM); + + constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN; + constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE; + + return 0; +} + +static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, const void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +{ + VkResult ret; + int index = -1; + VulkanDevicePriv *p = ctx->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + AVVulkanDeviceContext *dev_hwctx = &p->p; + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + .allocationSize = req->size, + }; + + /* The vulkan spec requires memory types to be sorted in the "optimal" + * order, so the first matching type we find will be the best/fastest one */ + for (int i = 0; i < p->mprops.memoryTypeCount; i++) { + const VkMemoryType *type = &p->mprops.memoryTypes[i]; + + /* The memory type must be supported by the requirements (bitfield) */ + if (!(req->memoryTypeBits & (1 << i))) + continue; + + /* The memory type flags must include our properties */ + if ((type->propertyFlags & req_flags) != req_flags) + continue; + + /* The memory type must be large enough */ + if (req->size > p->mprops.memoryHeaps[type->heapIndex].size) + continue; + + /* Found a suitable memory type */ + index = i; + break; + } + + if (index < 0) { + av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + req_flags); + return AVERROR(EINVAL); + } + + alloc_info.memoryTypeIndex = index; + + ret = vk->AllocateMemory(dev_hwctx->act_dev, &alloc_info, + dev_hwctx->alloc, mem); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR(ENOMEM); + } + + *mem_flags |= p->mprops.memoryTypes[index].propertyFlags; + + return 0; +} + +static void vulkan_free_internal(AVVkFrame *f) +{ + av_unused AVVkFrameInternal *internal = f->internal; + +#if CONFIG_CUDA + if (internal->cuda_fc_ref) { + AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data; + int planes = av_pix_fmt_count_planes(cuda_fc->sw_format); + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + + for (int i = 0; i < planes; i++) { + if (internal->cu_sem[i]) + CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i])); + if (internal->cu_mma[i]) + CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i])); + if (internal->ext_mem[i]) + CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[i])); +#ifdef _WIN32 + if (internal->ext_sem_handle[i]) + CloseHandle(internal->ext_sem_handle[i]); + if (internal->ext_mem_handle[i]) + CloseHandle(internal->ext_mem_handle[i]); +#endif + } + + av_buffer_unref(&internal->cuda_fc_ref); + } +#endif + + pthread_mutex_destroy(&internal->update_mutex); + av_freep(&f->internal); +} + +static void vulkan_frame_free(AVHWFramesContext *hwfc, AVVkFrame *f) +{ + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + int nb_images = ff_vk_count_images(f); + int nb_sems = 0; + + while (nb_sems < FF_ARRAY_ELEMS(f->sem) && f->sem[nb_sems]) + nb_sems++; + + if (nb_sems) { + VkSemaphoreWaitInfo sem_wait = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .flags = 0x0, + .pSemaphores = f->sem, + .pValues = f->sem_value, + .semaphoreCount = nb_sems, + }; + + vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); + } + + vulkan_free_internal(f); + + for (int i = 0; i < nb_images; i++) { + vk->DestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); + vk->FreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vk->DestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); + } + + av_free(f); +} + +static void vulkan_frame_free_cb(void *opaque, uint8_t *data) +{ + vulkan_frame_free(opaque, (AVVkFrame*)data); +} + +static int alloc_bind_mem(AVHWFramesContext *hwfc, AVVkFrame *f, + void *alloc_pnext, size_t alloc_pnext_stride) +{ + int img_cnt = 0, err; + VkResult ret; + AVHWDeviceContext *ctx = hwfc->device_ctx; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } }; + + while (f->img[img_cnt]) { + int use_ded_mem; + VkImageMemoryRequirementsInfo2 req_desc = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, + .image = f->img[img_cnt], + }; + VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = (void *)(((uint8_t *)alloc_pnext) + img_cnt*alloc_pnext_stride), + }; + VkMemoryDedicatedRequirements ded_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + }; + VkMemoryRequirements2 req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = &ded_req, + }; + + vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req); + + if (f->tiling == VK_IMAGE_TILING_LINEAR) + req.memoryRequirements.size = FFALIGN(req.memoryRequirements.size, + p->props.properties.limits.minMemoryMapAlignment); + + /* In case the implementation prefers/requires dedicated allocation */ + use_ded_mem = ded_req.prefersDedicatedAllocation | + ded_req.requiresDedicatedAllocation; + if (use_ded_mem) + ded_alloc.image = f->img[img_cnt]; + + /* Allocate memory */ + if ((err = alloc_mem(ctx, &req.memoryRequirements, + f->tiling == VK_IMAGE_TILING_LINEAR ? + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + &f->flags, &f->mem[img_cnt]))) + return err; + + f->size[img_cnt] = req.memoryRequirements.size; + bind_info[img_cnt].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[img_cnt].image = f->img[img_cnt]; + bind_info[img_cnt].memory = f->mem[img_cnt]; + + img_cnt++; + } + + /* Bind the allocated memory to the images */ + ret = vk->BindImageMemory2(hwctx->act_dev, img_cnt, bind_info); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +enum PrepMode { + PREP_MODE_GENERAL, + PREP_MODE_WRITE, + PREP_MODE_EXTERNAL_EXPORT, + PREP_MODE_EXTERNAL_IMPORT, + PREP_MODE_DECODING_DST, + PREP_MODE_DECODING_DPB, + PREP_MODE_ENCODING_DPB, +}; + +static int prepare_frame(AVHWFramesContext *hwfc, FFVkExecPool *ectx, + AVVkFrame *frame, enum PrepMode pmode) +{ + int err; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + int nb_img_bar = 0; + + uint32_t dst_qf = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; + VkImageLayout new_layout; + VkAccessFlags2 new_access; + VkPipelineStageFlagBits2 src_stage = VK_PIPELINE_STAGE_2_NONE; + + /* This is dirty - but it works. The vulkan.c dependency system doesn't + * free non-refcounted frames, and non-refcounted hardware frames cannot + * happen anywhere outside of here. */ + AVBufferRef tmp_ref = { + .data = (uint8_t *)hwfc, + }; + AVFrame tmp_frame = { + .data[0] = (uint8_t *)frame, + .hw_frames_ctx = &tmp_ref, + }; + + VkCommandBuffer cmd_buf; + FFVkExecContext *exec = ff_vk_exec_get(&p->vkctx, ectx); + cmd_buf = exec->buf; + ff_vk_exec_start(&p->vkctx, exec); + + err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, &tmp_frame, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); + if (err < 0) + return err; + + switch (pmode) { + case PREP_MODE_GENERAL: + new_layout = VK_IMAGE_LAYOUT_GENERAL; + new_access = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + case PREP_MODE_WRITE: + new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + new_access = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + case PREP_MODE_EXTERNAL_IMPORT: + new_layout = VK_IMAGE_LAYOUT_GENERAL; + new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + break; + case PREP_MODE_EXTERNAL_EXPORT: + new_layout = VK_IMAGE_LAYOUT_GENERAL; + new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR; + src_stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + break; + case PREP_MODE_DECODING_DST: + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR; + new_access = VK_ACCESS_TRANSFER_WRITE_BIT; + break; + case PREP_MODE_DECODING_DPB: + new_layout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR; + new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + break; + case PREP_MODE_ENCODING_DPB: + new_layout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_DPB_KHR; + new_access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; + break; + } + + ff_vk_frame_barrier(&p->vkctx, exec, &tmp_frame, img_bar, &nb_img_bar, + src_stage, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + new_access, new_layout, dst_qf); + + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + err = ff_vk_exec_submit(&p->vkctx, exec); + if (err < 0) + return err; + + /* We can do this because there are no real dependencies */ + ff_vk_exec_discard_deps(&p->vkctx, exec); + + return 0; +} + +static inline void get_plane_wh(uint32_t *w, uint32_t *h, enum AVPixelFormat format, + int frame_w, int frame_h, int plane) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format); + + /* Currently always true unless gray + alpha support is added */ + if (!plane || (plane == 3) || desc->flags & AV_PIX_FMT_FLAG_RGB || + !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) { + *w = frame_w; + *h = frame_h; + return; + } + + *w = AV_CEIL_RSHIFT(frame_w, desc->log2_chroma_w); + *h = AV_CEIL_RSHIFT(frame_h, desc->log2_chroma_h); +} + +static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, + VkImageTiling tiling, VkImageUsageFlagBits usage, + VkImageCreateFlags flags, int nb_layers, + void *create_pnext) +{ + int err; + VkResult ret; + AVVulkanFramesContext *hwfc_vk = hwfc->hwctx; + AVHWDeviceContext *ctx = hwfc->device_ctx; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + AVVkFrame *f; + + VkSemaphoreTypeCreateInfo sem_type_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = 0, + }; + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &sem_type_info, + }; + + VkExportSemaphoreCreateInfo ext_sem_info_opaque = { + .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, +#ifdef _WIN32 + .handleTypes = IsWindows8OrGreater() + ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT + : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, +#else + .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, +#endif + }; + + /* Check if exporting is supported before chaining any structs */ + if (p->ext_sem_props_opaque.externalSemaphoreFeatures & VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT) { + if (p->vkctx.extensions & (FF_VK_EXT_EXTERNAL_WIN32_SEM | FF_VK_EXT_EXTERNAL_FD_SEM)) + ff_vk_link_struct(&sem_type_info, &ext_sem_info_opaque); + } + + f = av_vk_frame_alloc(); + if (!f) { + av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); + return AVERROR(ENOMEM); + } + + // TODO: check width and height for alignment in case of multiplanar (must be mod-2 if subsampled) + + /* Create the images */ + for (int i = 0; (hwfc_vk->format[i] != VK_FORMAT_UNDEFINED); i++) { + VkImageCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = create_pnext, + .imageType = VK_IMAGE_TYPE_2D, + .format = hwfc_vk->format[i], + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = nb_layers, + .flags = flags, + .tiling = tiling, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .usage = usage, + .samples = VK_SAMPLE_COUNT_1_BIT, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, + }; + + get_plane_wh(&create_info.extent.width, &create_info.extent.height, + hwfc->sw_format, hwfc->width, hwfc->height, i); + + ret = vk->CreateImage(hwctx->act_dev, &create_info, + hwctx->alloc, &f->img[i]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR(EINVAL); + goto fail; + } + + /* Create semaphore */ + ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]; + f->layout[i] = create_info.initialLayout; + f->access[i] = 0x0; + f->sem_value[i] = 0; + } + + f->flags = 0x0; + f->tiling = tiling; + + *frame = f; + return 0; + +fail: + vulkan_frame_free(hwfc, f); + return err; +} + +/* Checks if an export flag is enabled, and if it is ORs it with *iexp */ +static void try_export_flags(AVHWFramesContext *hwfc, + VkExternalMemoryHandleTypeFlags *comp_handle_types, + VkExternalMemoryHandleTypeFlagBits *iexp, + VkExternalMemoryHandleTypeFlagBits exp) +{ + VkResult ret; + AVVulkanFramesContext *hwctx = hwfc->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + AVVulkanDeviceContext *dev_hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + + const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info = + ff_vk_find_struct(hwctx->create_pnext, + VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); + int has_mods = hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info; + int nb_mods; + + VkExternalImageFormatProperties eprops = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, + }; + VkImageFormatProperties2 props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + .pNext = &eprops, + }; + VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, + .pNext = NULL, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, + }; + VkPhysicalDeviceExternalImageFormatInfo enext = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, + .handleType = exp, + .pNext = has_mods ? &phy_dev_mod_info : NULL, + }; + VkPhysicalDeviceImageFormatInfo2 pinfo = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .pNext = !exp ? NULL : &enext, + .format = vk_find_format_entry(hwfc->sw_format)->vkf, + .type = VK_IMAGE_TYPE_2D, + .tiling = hwctx->tiling, + .usage = hwctx->usage, + .flags = VK_IMAGE_CREATE_ALIAS_BIT, + }; + + nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1; + for (int i = 0; i < nb_mods; i++) { + if (has_mods) + phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[i]; + + ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev, + &pinfo, &props); + + if (ret == VK_SUCCESS) { + *iexp |= exp; + *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes; + } + } +} + +static AVBufferRef *vulkan_pool_alloc(void *opaque, size_t size) +{ + int err; + AVVkFrame *f; + AVBufferRef *avbuf = NULL; + AVHWFramesContext *hwfc = opaque; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + VulkanFramesPriv *fp = hwfc->hwctx; + AVVulkanFramesContext *hwctx = &fp->p; + VkExternalMemoryHandleTypeFlags e = 0x0; + VkExportMemoryAllocateInfo eminfo[AV_NUM_DATA_POINTERS]; + + VkExternalMemoryImageCreateInfo eiinfo = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = hwctx->create_pnext, + }; + +#ifdef _WIN32 + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) + try_export_flags(hwfc, &eiinfo.handleTypes, &e, IsWindows8OrGreater() + ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT + : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT); +#else + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) + try_export_flags(hwfc, &eiinfo.handleTypes, &e, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT); + + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_DMABUF_MEMORY && + hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) + try_export_flags(hwfc, &eiinfo.handleTypes, &e, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); +#endif + + for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) { + eminfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + eminfo[i].pNext = hwctx->alloc_pnext[i]; + eminfo[i].handleTypes = e; + } + + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, + hwctx->nb_layers, + eiinfo.handleTypes ? &eiinfo : hwctx->create_pnext); + if (err) + return NULL; + + err = alloc_bind_mem(hwfc, f, eminfo, sizeof(*eminfo)); + if (err) + goto fail; + + if ( (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && + !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DPB); + else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_DECODING_DST); + else if (hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_ENCODING_DPB); + else if (hwctx->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_WRITE); + else + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_GENERAL); + if (err) + goto fail; + + avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame), + vulkan_frame_free_cb, hwfc, 0); + if (!avbuf) + goto fail; + + return avbuf; + +fail: + vulkan_frame_free(hwfc, f); + return NULL; +} + +static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) +{ + pthread_mutex_lock(&vkf->internal->update_mutex); +} + +static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf) +{ + pthread_mutex_unlock(&vkf->internal->update_mutex); +} + +static void vulkan_frames_uninit(AVHWFramesContext *hwfc) +{ + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + VulkanFramesPriv *fp = hwfc->hwctx; + + if (fp->modifier_info) { + if (fp->modifier_info->pDrmFormatModifiers) + av_freep(&fp->modifier_info->pDrmFormatModifiers); + av_freep(&fp->modifier_info); + } + + ff_vk_exec_pool_free(&p->vkctx, &fp->compute_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->upload_exec); + ff_vk_exec_pool_free(&p->vkctx, &fp->download_exec); + + av_buffer_pool_uninit(&fp->tmp); +} + +static int vulkan_frames_init(AVHWFramesContext *hwfc) +{ + int err; + AVVkFrame *f; + VulkanFramesPriv *fp = hwfc->hwctx; + AVVulkanFramesContext *hwctx = &fp->p; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + AVVulkanDeviceContext *dev_hwctx = &p->p; + VkImageUsageFlagBits supported_usage; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + const struct FFVkFormatEntry *fmt; + int disable_multiplane = p->disable_multiplane || + (hwctx->flags & AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE); + + /* Defaults */ + if (!hwctx->nb_layers) + hwctx->nb_layers = 1; + + /* VK_IMAGE_TILING_OPTIMAL == 0, can't check for it really */ + if (p->use_linear_images && + (hwctx->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)) + hwctx->tiling = VK_IMAGE_TILING_LINEAR; + + + fmt = vk_find_format_entry(hwfc->sw_format); + if (!fmt) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported pixel format: %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); + return AVERROR(EINVAL); + } + + if (hwctx->format[0] != VK_FORMAT_UNDEFINED) { + if (hwctx->format[0] != fmt->vkf) { + for (int i = 0; i < fmt->nb_images_fallback; i++) { + if (hwctx->format[i] != fmt->fallback[i]) { + av_log(hwfc, AV_LOG_ERROR, "Incompatible Vulkan format given " + "for the current sw_format %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); + return AVERROR(EINVAL); + } + } + } + + /* Check if the sw_format itself is supported */ + err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, + hwctx->tiling, NULL, + NULL, NULL, &supported_usage, 0, + !hwctx->usage || + (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT)); + if (err < 0) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported sw format: %s!\n", + av_get_pix_fmt_name(hwfc->sw_format)); + return AVERROR(EINVAL); + } + } else { + err = vkfmt_from_pixfmt2(hwfc->device_ctx, hwfc->sw_format, + hwctx->tiling, hwctx->format, NULL, + NULL, &supported_usage, + disable_multiplane, + !hwctx->usage || + (hwctx->usage & VK_IMAGE_USAGE_STORAGE_BIT)); + if (err < 0) + return err; + } + + /* Nvidia is violating the spec because they thought no one would use this. */ + if (p->dprops.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY && + (((fmt->nb_images == 1) && (fmt->vk_planes > 1)) || + (av_pix_fmt_desc_get(hwfc->sw_format)->nb_components == 1))) + supported_usage &= ~VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT; + + /* Image usage flags */ + if (!hwctx->usage) { + hwctx->usage = supported_usage & (VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT); + + if ((p->vkctx.extensions & FF_VK_EXT_HOST_IMAGE_COPY) && !p->disable_host_transfer) + hwctx->usage |= supported_usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT; + + /* Enables encoding of images, if supported by format and extensions */ + if ((supported_usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) && + (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | + FF_VK_EXT_VIDEO_MAINTENANCE_1))) + hwctx->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + } + + /* Image creation flags. + * Only fill them in automatically if the image is not going to be used as + * a DPB-only image, and we have SAMPLED/STORAGE bits set. */ + if (!hwctx->img_flags) { + int is_lone_dpb = ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR) || + ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) && + !(hwctx->usage & VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))); + int sampleable = hwctx->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT); + hwctx->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + if (sampleable && !is_lone_dpb) { + hwctx->img_flags |= VK_IMAGE_CREATE_ALIAS_BIT; + if ((fmt->vk_planes > 1) && (hwctx->format[0] == fmt->vkf)) + hwctx->img_flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT; + } + } + + /* If the image has an ENCODE_SRC usage, and the maintenance1 + * extension is supported, check if it has a profile list. + * If there's no profile list, or it has no encode operations, + * then allow creating the image with no specific profile. */ + if ((hwctx->usage & VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR) && + (p->vkctx.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | + FF_VK_EXT_VIDEO_MAINTENANCE_1))) { + const VkVideoProfileListInfoKHR *pl; + pl = ff_vk_find_struct(hwctx->create_pnext, VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); + if (!pl) { + hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR; + } else { + uint32_t i; + for (i = 0; i < pl->profileCount; i++) { + /* Video ops start at exactly 0x00010000 */ + if (pl->pProfiles[i].videoCodecOperation & 0xFFFF0000) + break; + } + if (i == pl->profileCount) + hwctx->img_flags |= VK_IMAGE_CREATE_VIDEO_PROFILE_INDEPENDENT_BIT_KHR; + } + } + + if (!hwctx->lock_frame) + hwctx->lock_frame = lock_frame; + + if (!hwctx->unlock_frame) + hwctx->unlock_frame = unlock_frame; + + err = ff_vk_exec_pool_init(&p->vkctx, p->compute_qf, &fp->compute_exec, + p->compute_qf->num, 0, 0, 0, NULL); + if (err) + return err; + + err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->upload_exec, + p->transfer_qf->num*2, 0, 0, 0, NULL); + if (err) + return err; + + err = ff_vk_exec_pool_init(&p->vkctx, p->transfer_qf, &fp->download_exec, + p->transfer_qf->num, 0, 0, 0, NULL); + if (err) + return err; + + /* Test to see if allocation will fail */ + err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage, hwctx->img_flags, + hwctx->nb_layers, hwctx->create_pnext); + if (err) + return err; + + /* Collect `VkDrmFormatModifierPropertiesEXT` for each plane. Required for DRM export. */ + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS && hwctx->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + VkImageDrmFormatModifierPropertiesEXT drm_mod = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, + }; + err = vk->GetImageDrmFormatModifierPropertiesEXT(dev_hwctx->act_dev, f->img[0], + &drm_mod); + if (err != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to get image DRM format modifier properties"); + vulkan_frame_free(hwfc, f); + return AVERROR_EXTERNAL; + } + for (int i = 0; i < fmt->vk_planes; ++i) { + VkDrmFormatModifierPropertiesListEXT modp; + VkFormatProperties2 fmtp; + VkDrmFormatModifierPropertiesEXT *mod_props = NULL; + + modp = (VkDrmFormatModifierPropertiesListEXT) { + .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, + }; + fmtp = (VkFormatProperties2) { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + .pNext = &modp, + }; + + /* query drmFormatModifierCount by keeping pDrmFormatModifierProperties NULL */ + vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt->fallback[i], &fmtp); + + modp.pDrmFormatModifierProperties = + av_calloc(modp.drmFormatModifierCount, sizeof(*modp.pDrmFormatModifierProperties)); + if (!modp.pDrmFormatModifierProperties) { + vulkan_frame_free(hwfc, f); + return AVERROR(ENOMEM); + } + vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->phys_dev, fmt->fallback[i], &fmtp); + + for (uint32_t i = 0; i < modp.drmFormatModifierCount; ++i) { + VkDrmFormatModifierPropertiesEXT *m = &modp.pDrmFormatModifierProperties[i]; + if (m->drmFormatModifier == drm_mod.drmFormatModifier) { + mod_props = m; + break; + } + } + + if (mod_props == NULL) { + av_log(hwfc, AV_LOG_ERROR, "No DRM format modifier properties found for modifier 0x%016"PRIx64"\n", + drm_mod.drmFormatModifier); + av_free(modp.pDrmFormatModifierProperties); + vulkan_frame_free(hwfc, f); + return AVERROR_EXTERNAL; + } + + fp->drm_format_modifier_properties[i] = *mod_props; + av_free(modp.pDrmFormatModifierProperties); + } + } + + vulkan_frame_free(hwfc, f); + + /* If user did not specify a pool, hwfc->pool will be set to the internal one + * in hwcontext.c just after this gets called */ + if (!hwfc->pool) { + ffhwframesctx(hwfc)->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame), + hwfc, vulkan_pool_alloc, + NULL); + if (!ffhwframesctx(hwfc)->pool_internal) + return AVERROR(ENOMEM); + } + + return 0; +} + +static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame) +{ + frame->buf[0] = av_buffer_pool_get(hwfc->pool); + if (!frame->buf[0]) + return AVERROR(ENOMEM); + + frame->data[0] = frame->buf[0]->data; + frame->format = AV_PIX_FMT_VULKAN; + frame->width = hwfc->width; + frame->height = hwfc->height; + + return 0; +} + +static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) +{ + enum AVPixelFormat *fmts; + int n = 2; + +#if CONFIG_CUDA + n++; +#endif + fmts = av_malloc_array(n, sizeof(*fmts)); + if (!fmts) + return AVERROR(ENOMEM); + + n = 0; + fmts[n++] = hwfc->sw_format; +#if CONFIG_CUDA + fmts[n++] = AV_PIX_FMT_CUDA; +#endif + fmts[n++] = AV_PIX_FMT_NONE; + + *formats = fmts; + return 0; +} + +#if CONFIG_LIBDRM +static void vulkan_unmap_from_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) +{ + vulkan_frame_free(hwfc, hwmap->priv); +} + +static const struct { + uint32_t drm_fourcc; + VkFormat vk_format; +} vulkan_drm_format_map[] = { + { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM }, + { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM }, + { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM }, + { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM }, + { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM }, + { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM }, + { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM }, + { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM }, + { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM }, + { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM }, + { DRM_FORMAT_ARGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 }, + { DRM_FORMAT_ABGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 }, + { DRM_FORMAT_XRGB2101010, VK_FORMAT_A2B10G10R10_UNORM_PACK32 }, + { DRM_FORMAT_XBGR2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 }, + + // All these DRM_FORMATs were added in the same libdrm commit. +#ifdef DRM_FORMAT_XYUV8888 + { DRM_FORMAT_XYUV8888, VK_FORMAT_R8G8B8A8_UNORM }, + { DRM_FORMAT_XVYU2101010, VK_FORMAT_A2R10G10B10_UNORM_PACK32 } , + { DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16 } , + { DRM_FORMAT_XVYU16161616, VK_FORMAT_R16G16B16A16_UNORM } , +#endif +}; + +static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) + if (vulkan_drm_format_map[i].drm_fourcc == drm_fourcc) + return vulkan_drm_format_map[i].vk_format; + return VK_FORMAT_UNDEFINED; +} + +static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **frame, + const AVFrame *src, int flags) +{ + int err = 0; + VkResult ret; + AVVkFrame *f; + int bind_counts = 0; + AVHWDeviceContext *ctx = hwfc->device_ctx; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; + VkBindImageMemoryInfo bind_info[AV_DRM_MAX_PLANES]; + VkBindImagePlaneMemoryInfo plane_info[AV_DRM_MAX_PLANES]; + + for (int i = 0; i < desc->nb_layers; i++) { + if (drm_to_vulkan_fmt(desc->layers[i].format) == VK_FORMAT_UNDEFINED) { + av_log(ctx, AV_LOG_ERROR, "Unsupported DMABUF layer format %#08x!\n", + desc->layers[i].format); + return AVERROR(EINVAL); + } + } + + if (!(f = av_vk_frame_alloc())) { + av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n"); + err = AVERROR(ENOMEM); + goto fail; + } + + f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; + + for (int i = 0; i < desc->nb_layers; i++) { + const int planes = desc->layers[i].nb_planes; + + /* Semaphore */ + VkSemaphoreTypeCreateInfo sem_type_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = 0, + }; + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &sem_type_info, + }; + + /* Image creation */ + VkSubresourceLayout ext_img_layouts[AV_DRM_MAX_PLANES]; + VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, + .drmFormatModifier = desc->objects[0].format_modifier, + .drmFormatModifierPlaneCount = planes, + .pPlaneLayouts = (const VkSubresourceLayout *)&ext_img_layouts, + }; + VkExternalMemoryImageCreateInfo ext_img_spec = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = &ext_img_mod_spec, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + VkImageCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = &ext_img_spec, + .imageType = VK_IMAGE_TYPE_2D, + .format = drm_to_vulkan_fmt(desc->layers[i].format), + .extent.depth = 1, + .mipLevels = 1, + .arrayLayers = 1, + .flags = 0x0, + .tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, /* specs say so */ + .usage = 0x0, /* filled in below */ + .samples = VK_SAMPLE_COUNT_1_BIT, + .pQueueFamilyIndices = p->img_qfs, + .queueFamilyIndexCount = p->nb_img_qfs, + .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : + VK_SHARING_MODE_EXCLUSIVE, + }; + + /* Image format verification */ + VkExternalImageFormatProperties ext_props = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR, + }; + VkImageFormatProperties2 props_ret = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + .pNext = &ext_props, + }; + VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, + .drmFormatModifier = ext_img_mod_spec.drmFormatModifier, + .pQueueFamilyIndices = create_info.pQueueFamilyIndices, + .queueFamilyIndexCount = create_info.queueFamilyIndexCount, + .sharingMode = create_info.sharingMode, + }; + VkPhysicalDeviceExternalImageFormatInfo props_ext = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, + .pNext = &props_drm_mod, + .handleType = ext_img_spec.handleTypes, + }; + VkPhysicalDeviceImageFormatInfo2 fmt_props; + + if (flags & AV_HWFRAME_MAP_READ) + create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + if (flags & AV_HWFRAME_MAP_WRITE) + create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + fmt_props = (VkPhysicalDeviceImageFormatInfo2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .pNext = &props_ext, + .format = create_info.format, + .type = create_info.imageType, + .tiling = create_info.tiling, + .usage = create_info.usage, + .flags = create_info.flags, + }; + + /* Check if importing is possible for this combination of parameters */ + ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->phys_dev, + &fmt_props, &props_ret); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Cannot map DRM frame to Vulkan: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Set the image width/height */ + get_plane_wh(&create_info.extent.width, &create_info.extent.height, + hwfc->sw_format, src->width, src->height, i); + + /* Set the subresource layout based on the layer properties */ + for (int j = 0; j < planes; j++) { + ext_img_layouts[j].offset = desc->layers[i].planes[j].offset; + ext_img_layouts[j].rowPitch = desc->layers[i].planes[j].pitch; + ext_img_layouts[j].size = 0; /* The specs say so for all 3 */ + ext_img_layouts[j].arrayPitch = 0; + ext_img_layouts[j].depthPitch = 0; + } + + /* Create image */ + ret = vk->CreateImage(hwctx->act_dev, &create_info, + hwctx->alloc, &f->img[i]); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR(EINVAL); + goto fail; + } + + ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + f->queue_family[i] = VK_QUEUE_FAMILY_EXTERNAL; + f->layout[i] = create_info.initialLayout; + f->access[i] = 0x0; + f->sem_value[i] = 0; + } + + for (int i = 0; i < desc->nb_layers; i++) { + /* Memory requirements */ + VkImageMemoryRequirementsInfo2 req_desc = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, + .image = f->img[i], + }; + VkMemoryDedicatedRequirements ded_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + }; + VkMemoryRequirements2 req2 = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = &ded_req, + }; + + /* Allocation/importing */ + VkMemoryFdPropertiesKHR fdmp = { + .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR, + }; + /* This assumes that a layer will never be constructed from multiple + * objects. If that was to happen in the real world, this code would + * need to import each plane separately. + */ + VkImportMemoryFdInfoKHR idesc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, + .fd = dup(desc->objects[desc->layers[i].planes[0].object_index].fd), + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = &idesc, + .image = req_desc.image, + }; + + /* Get object properties */ + ret = vk->GetMemoryFdPropertiesKHR(hwctx->act_dev, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + idesc.fd, &fdmp); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to get FD properties: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + close(idesc.fd); + goto fail; + } + + vk->GetImageMemoryRequirements2(hwctx->act_dev, &req_desc, &req2); + + /* Only a single bit must be set, not a range, and it must match */ + req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits; + + err = alloc_mem(ctx, &req2.memoryRequirements, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + (ded_req.prefersDedicatedAllocation || + ded_req.requiresDedicatedAllocation) ? + &ded_alloc : ded_alloc.pNext, + &f->flags, &f->mem[i]); + if (err) { + close(idesc.fd); + return err; + } + + f->size[i] = req2.memoryRequirements.size; + } + + for (int i = 0; i < desc->nb_layers; i++) { + const int planes = desc->layers[i].nb_planes; + for (int j = 0; j < planes; j++) { + VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT : + j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT : + VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT; + + plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO; + plane_info[bind_counts].pNext = NULL; + plane_info[bind_counts].planeAspect = aspect; + + bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO; + bind_info[bind_counts].pNext = planes > 1 ? &plane_info[bind_counts] : NULL; + bind_info[bind_counts].image = f->img[i]; + bind_info[bind_counts].memory = f->mem[i]; + + /* Offset is already signalled via pPlaneLayouts above */ + bind_info[bind_counts].memoryOffset = 0; + + bind_counts++; + } + } + + /* Bind the allocated memory to the images */ + ret = vk->BindImageMemory2(hwctx->act_dev, bind_counts, bind_info); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + *frame = f; + + return 0; + +fail: + vulkan_frame_free(hwfc, f); + + return err; +} + +static int vulkan_map_from_drm_frame_sync(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err; + VkResult ret; + AVHWDeviceContext *ctx = hwfc->device_ctx; + VulkanDevicePriv *p = ctx->hwctx; + VulkanFramesPriv *fp = hwfc->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + + const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0]; + +#ifdef DMA_BUF_IOCTL_EXPORT_SYNC_FILE + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM) { + VkCommandBuffer cmd_buf; + FFVkExecContext *exec; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + VkSemaphore drm_sync_sem[AV_DRM_MAX_PLANES] = { 0 }; + int nb_img_bar = 0; + + for (int i = 0; i < desc->nb_objects; i++) { + VkSemaphoreTypeCreateInfo sem_type_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .semaphoreType = VK_SEMAPHORE_TYPE_BINARY, + }; + VkSemaphoreCreateInfo sem_spawn = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &sem_type_info, + }; + VkImportSemaphoreFdInfoKHR import_info; + struct dma_buf_export_sync_file implicit_fd_info = { + .flags = DMA_BUF_SYNC_READ, + .fd = -1, + }; + + if (ioctl(desc->objects[i].fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE, + &implicit_fd_info)) { + err = AVERROR(errno); + av_log(hwctx, AV_LOG_ERROR, "Failed to retrieve implicit DRM sync file: %s\n", + av_err2str(err)); + for (; i >= 0; i--) + vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc); + return err; + } + + ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &drm_sync_sem[i]); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + for (; i >= 0; i--) + vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc); + return err; + } + + import_info = (VkImportSemaphoreFdInfoKHR) { + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, + .semaphore = drm_sync_sem[i], + .fd = implicit_fd_info.fd, + }; + + ret = vk->ImportSemaphoreFdKHR(hwctx->act_dev, &import_info); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to import semaphore: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + for (; i >= 0; i--) + vk->DestroySemaphore(hwctx->act_dev, drm_sync_sem[i], hwctx->alloc); + return err; + } + } + + exec = ff_vk_exec_get(&p->vkctx, &fp->compute_exec); + cmd_buf = exec->buf; + + ff_vk_exec_start(&p->vkctx, exec); + + /* Ownership of semaphores is passed */ + err = ff_vk_exec_add_dep_bool_sem(&p->vkctx, exec, + drm_sync_sem, desc->nb_objects, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, 1); + if (err < 0) + return err; + + err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, dst, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT); + if (err < 0) + return err; + + ff_vk_frame_barrier(&p->vkctx, exec, dst, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_NONE, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + ((flags & AV_HWFRAME_MAP_READ) ? + VK_ACCESS_2_SHADER_SAMPLED_READ_BIT : 0x0) | + ((flags & AV_HWFRAME_MAP_WRITE) ? + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT : 0x0), + VK_IMAGE_LAYOUT_GENERAL, + p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]); + + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + err = ff_vk_exec_submit(&p->vkctx, exec); + if (err < 0) + return err; + } else +#endif + { + AVVkFrame *f = (AVVkFrame *)dst->data[0]; + av_log(hwctx, AV_LOG_WARNING, "No support for synchronization when importing DMA-BUFs, " + "image may be corrupted.\n"); + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_IMPORT); + if (err) + return err; + } + + return 0; +} + +static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err = 0; + AVVkFrame *f; + + if ((err = vulkan_map_from_drm_frame_desc(hwfc, &f, src, flags))) + return err; + + /* The unmapping function will free this */ + dst->data[0] = (uint8_t *)f; + dst->width = src->width; + dst->height = src->height; + + err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src, + &vulkan_unmap_from_drm, f); + if (err < 0) + goto fail; + + err = vulkan_map_from_drm_frame_sync(hwfc, dst, src, flags); + if (err < 0) + return err; + + av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n"); + + return 0; + +fail: + vulkan_frame_free(hwfc->device_ctx->hwctx, f); + dst->data[0] = NULL; + return err; +} + +#if CONFIG_VAAPI +static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc, + AVFrame *dst, const AVFrame *src, + int flags) +{ + int err; + AVFrame *tmp = av_frame_alloc(); + AVHWFramesContext *vaapi_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; + AVVAAPIDeviceContext *vaapi_ctx = vaapi_fc->device_ctx->hwctx; + VASurfaceID surface_id = (VASurfaceID)(uintptr_t)src->data[3]; + + if (!tmp) + return AVERROR(ENOMEM); + + /* We have to sync since like the previous comment said, no semaphores */ + vaSyncSurface(vaapi_ctx->display, surface_id); + + tmp->format = AV_PIX_FMT_DRM_PRIME; + + err = av_hwframe_map(tmp, src, flags); + if (err < 0) + goto fail; + + err = vulkan_map_from_drm(dst_fc, dst, tmp, flags); + if (err < 0) + goto fail; + + err = ff_hwframe_map_replace(dst, src); + +fail: + av_frame_free(&tmp); + return err; +} +#endif +#endif + +#if CONFIG_CUDA +static int export_mem_to_cuda(AVHWDeviceContext *ctx, + AVHWDeviceContext *cuda_cu, CudaFunctions *cu, + AVVkFrameInternal *dst_int, int idx, + VkDeviceMemory mem, size_t size) +{ + VkResult ret; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + +#ifdef _WIN32 + CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { + .type = IsWindows8OrGreater() + ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 + : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT, + .size = size, + }; + VkMemoryGetWin32HandleInfoKHR export_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, + .memory = mem, + .handleType = IsWindows8OrGreater() + ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT + : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, + }; + + ret = vk->GetMemoryWin32HandleKHR(hwctx->act_dev, &export_info, + &ext_desc.handle.win32.handle); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to export the image as a Win32 Handle: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + dst_int->ext_mem_handle[idx] = ext_desc.handle.win32.handle; +#else + CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { + .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, + .size = size, + }; + VkMemoryGetFdInfoKHR export_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = mem, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, + }; + + ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info, + &ext_desc.handle.fd); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Unable to export the image as a FD: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } +#endif + + ret = CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[idx], &ext_desc)); + if (ret < 0) { +#ifndef _WIN32 + close(ext_desc.handle.fd); +#endif + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int export_sem_to_cuda(AVHWDeviceContext *ctx, + AVHWDeviceContext *cuda_cu, CudaFunctions *cu, + AVVkFrameInternal *dst_int, int idx, + VkSemaphore sem) +{ + VkResult ret; + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + +#ifdef _WIN32 + VkSemaphoreGetWin32HandleInfoKHR sem_export = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR, + .semaphore = sem, + .handleType = IsWindows8OrGreater() + ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT + : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, + }; + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { + .type = 10 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 */, + }; +#else + VkSemaphoreGetFdInfoKHR sem_export = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = sem, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { + .type = 9 /* TODO: CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD */, + }; +#endif + +#ifdef _WIN32 + ret = vk->GetSemaphoreWin32HandleKHR(hwctx->act_dev, &sem_export, + &ext_sem_desc.handle.win32.handle); +#else + ret = vk->GetSemaphoreFdKHR(hwctx->act_dev, &sem_export, + &ext_sem_desc.handle.fd); +#endif + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } +#ifdef _WIN32 + dst_int->ext_sem_handle[idx] = ext_sem_desc.handle.win32.handle; +#endif + + ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[idx], + &ext_sem_desc)); + if (ret < 0) { +#ifndef _WIN32 + close(ext_sem_desc.handle.fd); +#endif + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, + AVBufferRef *cuda_hwfc, + const AVFrame *frame) +{ + int err; + VkResult ret; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + AVHWDeviceContext *ctx = hwfc->device_ctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + VulkanDevicePriv *p = ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + int nb_images; + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)cuda_hwfc->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + CUarray_format cufmt = desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 : + CU_AD_FORMAT_UNSIGNED_INT8; + + dst_f = (AVVkFrame *)frame->data[0]; + dst_int = dst_f->internal; + + if (!dst_int->cuda_fc_ref) { + size_t offsets[3] = { 0 }; + + dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc); + if (!dst_int->cuda_fc_ref) + return AVERROR(ENOMEM); + + nb_images = ff_vk_count_images(dst_f); + for (int i = 0; i < nb_images; i++) { + err = export_mem_to_cuda(ctx, cuda_cu, cu, dst_int, i, + dst_f->mem[i], dst_f->size[i]); + if (err < 0) + goto fail; + + err = export_sem_to_cuda(ctx, cuda_cu, cu, dst_int, i, + dst_f->sem[i]); + if (err < 0) + goto fail; + } + + if (nb_images != planes) { + for (int i = 0; i < planes; i++) { + VkImageSubresource subres = { + .aspectMask = i == 2 ? VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT : + i == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT : + VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT + }; + VkSubresourceLayout layout = { 0 }; + vk->GetImageSubresourceLayout(hwctx->act_dev, dst_f->img[FFMIN(i, nb_images - 1)], + &subres, &layout); + offsets[i] = layout.offset; + } + } + + for (int i = 0; i < planes; i++) { + CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { + .offset = offsets[i], + .arrayDesc = { + .Depth = 0, + .Format = cufmt, + .NumChannels = 1 + ((planes == 2) && i), + .Flags = 0, + }, + .numLevels = 1, + }; + int p_w, p_h; + + get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i); + tex_desc.arrayDesc.Width = p_w; + tex_desc.arrayDesc.Height = p_h; + + ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[i], + dst_int->ext_mem[FFMIN(i, nb_images - 1)], + &tex_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[i], + dst_int->cu_mma[i], 0)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; + } + + } + } + + return 0; + +fail: + vulkan_free_internal(dst_f); + return err; +} + +static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, + AVFrame *dst, const AVFrame *src) +{ + int err; + CUcontext dummy; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + VulkanFramesPriv *fp = hwfc->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)src->hw_frames_ctx->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; + + dst_f = (AVVkFrame *)dst->data[0]; + + err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT); + if (err < 0) + return err; + + err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); + if (err < 0) + return err; + + err = vulkan_export_to_cuda(hwfc, src->hw_frames_ctx, dst); + if (err < 0) { + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return err; + } + + dst_int = dst_f->internal; + + for (int i = 0; i < planes; i++) { + s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0; + s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1; + } + + err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, + planes, cuda_dev->stream)); + if (err < 0) + goto fail; + + for (int i = 0; i < planes; i++) { + CUDA_MEMCPY2D cpy = { + .srcMemoryType = CU_MEMORYTYPE_DEVICE, + .srcDevice = (CUdeviceptr)src->data[i], + .srcPitch = src->linesize[i], + .srcY = 0, + + .dstMemoryType = CU_MEMORYTYPE_ARRAY, + .dstArray = dst_int->cu_array[i], + }; + + int p_w, p_h; + get_plane_wh(&p_w, &p_h, hwfc->sw_format, hwfc->width, hwfc->height, i); + + cpy.WidthInBytes = p_w * desc->comp[i].step; + cpy.Height = p_h; + + err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); + if (err < 0) + goto fail; + } + + err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, + planes, cuda_dev->stream)); + if (err < 0) + goto fail; + + for (int i = 0; i < planes; i++) + dst_f->sem_value[i]++; + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + + av_log(hwfc, AV_LOG_VERBOSE, "Transferred CUDA image to Vulkan!\n"); + + return err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT); + +fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + vulkan_free_internal(dst_f); + av_buffer_unref(&dst->buf[0]); + return err; +} +#endif + +static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + + switch (src->format) { +#if CONFIG_LIBDRM +#if CONFIG_VAAPI + case AV_PIX_FMT_VAAPI: + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) + return vulkan_map_from_vaapi(hwfc, dst, src, flags); + else + return AVERROR(ENOSYS); +#endif + case AV_PIX_FMT_DRM_PRIME: + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) + return vulkan_map_from_drm(hwfc, dst, src, flags); + else + return AVERROR(ENOSYS); +#endif + default: + return AVERROR(ENOSYS); + } +} + +#if CONFIG_LIBDRM +typedef struct VulkanDRMMapping { + AVDRMFrameDescriptor drm_desc; + AVVkFrame *source; +} VulkanDRMMapping; + +static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) +{ + AVDRMFrameDescriptor *drm_desc = hwmap->priv; + + for (int i = 0; i < drm_desc->nb_objects; i++) + close(drm_desc->objects[i].fd); + + av_free(drm_desc); +} + +static inline uint32_t vulkan_fmt_to_drm(VkFormat vkfmt) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(vulkan_drm_format_map); i++) + if (vulkan_drm_format_map[i].vk_format == vkfmt) + return vulkan_drm_format_map[i].drm_fourcc; + return DRM_FORMAT_INVALID; +} + +#define MAX_MEMORY_PLANES 4 +static VkImageAspectFlags plane_index_to_aspect(int plane) { + if (plane == 0) return VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT; + if (plane == 1) return VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT; + if (plane == 2) return VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT; + if (plane == 3) return VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT; + + av_assert2 (0 && "Invalid plane index"); + return VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT; +} + +static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err = 0; + VkResult ret; + AVVkFrame *f = (AVVkFrame *)src->data[0]; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + VulkanFramesPriv *fp = hwfc->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + VkImageDrmFormatModifierPropertiesEXT drm_mod = { + .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT, + }; + VkSemaphoreWaitInfo wait_info = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .flags = 0x0, + .semaphoreCount = planes, + }; + + AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc)); + if (!drm_desc) + return AVERROR(ENOMEM); + + err = prepare_frame(hwfc, &fp->compute_exec, f, PREP_MODE_EXTERNAL_EXPORT); + if (err < 0) + goto end; + + /* Wait for the operation to finish so we can cleanly export it. */ + wait_info.pSemaphores = f->sem; + wait_info.pValues = f->sem_value; + + vk->WaitSemaphores(hwctx->act_dev, &wait_info, UINT64_MAX); + + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &vulkan_unmap_to_drm, drm_desc); + if (err < 0) + goto end; + + ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img[0], + &drm_mod); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n"); + err = AVERROR_EXTERNAL; + goto end; + } + + for (int i = 0; (i < planes) && (f->mem[i]); i++) { + VkMemoryGetFdInfoKHR export_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .memory = f->mem[i], + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + + ret = vk->GetMemoryFdKHR(hwctx->act_dev, &export_info, + &drm_desc->objects[i].fd); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n"); + err = AVERROR_EXTERNAL; + goto end; + } + + drm_desc->nb_objects++; + drm_desc->objects[i].size = f->size[i]; + drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier; + } + + drm_desc->nb_layers = planes; + for (int i = 0; i < drm_desc->nb_layers; i++) { + VkFormat plane_vkfmt = av_vkfmt_from_pixfmt(hwfc->sw_format)[i]; + + drm_desc->layers[i].format = vulkan_fmt_to_drm(plane_vkfmt); + drm_desc->layers[i].nb_planes = fp->drm_format_modifier_properties[i].drmFormatModifierPlaneCount; + + if (drm_desc->layers[i].nb_planes > MAX_MEMORY_PLANES) { + av_log(hwfc, AV_LOG_ERROR, "Too many memory planes for DRM format!\n"); + err = AVERROR_EXTERNAL; + goto end; + } + + for (int j = 0; j < drm_desc->layers[i].nb_planes; j++) { + VkSubresourceLayout layout; + VkImageSubresource sub = { + .aspectMask = plane_index_to_aspect(j), + }; + + drm_desc->layers[i].planes[j].object_index = FFMIN(i, drm_desc->nb_objects - 1); + + vk->GetImageSubresourceLayout(hwctx->act_dev, f->img[i], &sub, &layout); + drm_desc->layers[i].planes[j].offset = layout.offset; + drm_desc->layers[i].planes[j].pitch = layout.rowPitch; + } + + if (drm_desc->layers[i].format == DRM_FORMAT_INVALID) { + av_log(hwfc, AV_LOG_ERROR, "Cannot map to DRM layer, unsupported!\n"); + err = AVERROR_PATCHWELCOME; + goto end; + } + + + if (f->tiling == VK_IMAGE_TILING_OPTIMAL) + continue; + + } + + dst->width = src->width; + dst->height = src->height; + dst->data[0] = (uint8_t *)drm_desc; + + av_log(hwfc, AV_LOG_VERBOSE, "Mapped AVVkFrame to a DRM object!\n"); + + return 0; + +end: + av_free(drm_desc); + return err; +} + +#if CONFIG_VAAPI +static int vulkan_map_to_vaapi(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + int err; + AVFrame *tmp = av_frame_alloc(); + if (!tmp) + return AVERROR(ENOMEM); + + tmp->format = AV_PIX_FMT_DRM_PRIME; + + err = vulkan_map_to_drm(hwfc, tmp, src, flags); + if (err < 0) + goto fail; + + err = av_hwframe_map(dst, tmp, flags); + if (err < 0) + goto fail; + + err = ff_hwframe_map_replace(dst, src); + +fail: + av_frame_free(&tmp); + return err; +} +#endif +#endif + +static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src, int flags) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + + switch (dst->format) { +#if CONFIG_LIBDRM + case AV_PIX_FMT_DRM_PRIME: + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) + return vulkan_map_to_drm(hwfc, dst, src, flags); + else + return AVERROR(ENOSYS); +#if CONFIG_VAAPI + case AV_PIX_FMT_VAAPI: + if (p->vkctx.extensions & FF_VK_EXT_DRM_MODIFIER_FLAGS) + return vulkan_map_to_vaapi(hwfc, dst, src, flags); + else + return AVERROR(ENOSYS); +#endif +#endif + default: + break; + } + return AVERROR(ENOSYS); +} + +static int copy_buffer_data(AVHWFramesContext *hwfc, AVBufferRef *buf, + AVFrame *swf, VkBufferImageCopy *region, + int planes, int upload) +{ + VkResult ret; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + AVVulkanDeviceContext *hwctx = &p->p; + + FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data; + + const VkMappedMemoryRange flush_info = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = vkbuf->mem, + .size = VK_WHOLE_SIZE, + }; + + if (!upload && !(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1, + &flush_info); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + if (upload) { + for (int i = 0; i < planes; i++) + av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset, + region[i].bufferRowLength, + swf->data[i], + swf->linesize[i], + swf->linesize[i], + region[i].imageExtent.height); + } else { + for (int i = 0; i < planes; i++) + av_image_copy_plane(swf->data[i], + swf->linesize[i], + vkbuf->mapped_mem + region[i].bufferOffset, + region[i].bufferRowLength, + swf->linesize[i], + region[i].imageExtent.height); + } + + if (upload && !(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1, + &flush_info); + if (ret != VK_SUCCESS) { + av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + return 0; +} + +static int get_plane_buf(AVHWFramesContext *hwfc, AVBufferRef **dst, + AVFrame *swf, VkBufferImageCopy *region, int upload) +{ + int err; + uint32_t p_w, p_h; + VulkanFramesPriv *fp = hwfc->hwctx; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + const int planes = av_pix_fmt_count_planes(swf->format); + VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT : + VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + size_t buf_offset = 0; + for (int i = 0; i < planes; i++) { + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + + region[i] = (VkBufferImageCopy) { + .bufferOffset = buf_offset, + .bufferRowLength = FFALIGN(swf->linesize[i], + p->props.properties.limits.optimalBufferCopyRowPitchAlignment), + .bufferImageHeight = p_h, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, + /* Rest of the fields adjusted/filled in later */ + }; + + buf_offset += FFALIGN(p_h*region[i].bufferRowLength, + p->props.properties.limits.optimalBufferCopyOffsetAlignment); + } + + err = ff_vk_get_pooled_buffer(&p->vkctx, &fp->tmp, dst, buf_usage, + NULL, buf_offset, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (err < 0) + return err; + + return 0; +} + +static int host_map_frame(AVHWFramesContext *hwfc, AVBufferRef **dst, int *nb_bufs, + AVFrame *swf, VkBufferImageCopy *region, int upload) +{ + int err; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + + int nb_src_bufs; + const int planes = av_pix_fmt_count_planes(swf->format); + VkBufferUsageFlags buf_usage = upload ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT : + VK_BUFFER_USAGE_TRANSFER_DST_BIT; + + /* We can't host map images with negative strides */ + for (int i = 0; i < planes; i++) + if (swf->linesize[i] < 0) + return AVERROR(EINVAL); + + /* Count the number of buffers in the software frame */ + nb_src_bufs = 0; + while (swf->buf[nb_src_bufs]) + nb_src_bufs++; + + /* Single buffer contains all planes */ + if (nb_src_bufs == 1) { + err = ff_vk_host_map_buffer(&p->vkctx, &dst[0], + swf->data[0], swf->buf[0], + buf_usage); + if (err < 0) + return err; + (*nb_bufs)++; + + for (int i = 0; i < planes; i++) + region[i].bufferOffset = ((FFVkBuffer *)dst[0]->data)->virtual_offset + + swf->data[i] - swf->data[0]; + } else if (nb_src_bufs == planes) { /* One buffer per plane */ + for (int i = 0; i < planes; i++) { + err = ff_vk_host_map_buffer(&p->vkctx, &dst[i], + swf->data[i], swf->buf[i], + buf_usage); + if (err < 0) + goto fail; + (*nb_bufs)++; + + region[i].bufferOffset = ((FFVkBuffer *)dst[i]->data)->virtual_offset; + } + } else { + /* Weird layout (3 planes, 2 buffers), patch welcome, fallback to copy */ + return AVERROR_PATCHWELCOME; + } + + return 0; + +fail: + for (int i = 0; i < (*nb_bufs); i++) + av_buffer_unref(&dst[i]); + return err; +} + +static int vulkan_transfer_host(AVHWFramesContext *hwfc, AVFrame *hwf, + AVFrame *swf, int upload) +{ + VulkanFramesPriv *fp = hwfc->hwctx; + AVVulkanFramesContext *hwfc_vk = &fp->p; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + AVVulkanDeviceContext *hwctx = &p->p; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + + AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0]; + const int planes = av_pix_fmt_count_planes(swf->format); + const int nb_images = ff_vk_count_images(hwf_vk); + + VkSemaphoreWaitInfo sem_wait; + VkHostImageLayoutTransitionInfoEXT layout_ch_info[AV_NUM_DATA_POINTERS]; + int nb_layout_ch = 0; + + hwfc_vk->lock_frame(hwfc, hwf_vk); + + for (int i = 0; i < nb_images; i++) { + int compat = 0; + for (int j = 0; j < p->vkctx.host_image_props.copySrcLayoutCount; j++) { + if (hwf_vk->layout[i] == p->vkctx.host_image_props.pCopySrcLayouts[j]) { + compat = 1; + break; + } + } + if (compat) + continue; + + layout_ch_info[nb_layout_ch] = (VkHostImageLayoutTransitionInfoEXT) { + .sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO_EXT, + .image = hwf_vk->img[i], + .oldLayout = hwf_vk->layout[i], + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + }, + }; + + hwf_vk->layout[i] = layout_ch_info[nb_layout_ch].newLayout; + nb_layout_ch++; + } + + sem_wait = (VkSemaphoreWaitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = hwf_vk->sem, + .pValues = hwf_vk->sem_value, + .semaphoreCount = nb_images, + }; + + vk->WaitSemaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); + + if (nb_layout_ch) + vk->TransitionImageLayoutEXT(hwctx->act_dev, + nb_layout_ch, layout_ch_info); + + if (upload) { + VkMemoryToImageCopyEXT region_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT, + .imageSubresource = { + .layerCount = 1, + }, + }; + VkCopyMemoryToImageInfoEXT copy_info = { + .sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO_EXT, + .flags = VK_HOST_IMAGE_COPY_MEMCPY_EXT, + .regionCount = 1, + .pRegions = ®ion_info, + }; + for (int i = 0; i < planes; i++) { + int img_idx = FFMIN(i, (nb_images - 1)); + uint32_t p_w, p_h; + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + + region_info.pHostPointer = swf->data[i]; + region_info.imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i); + region_info.imageExtent = (VkExtent3D){ p_w, p_h, 1 }; + copy_info.dstImage = hwf_vk->img[img_idx]; + copy_info.dstImageLayout = hwf_vk->layout[img_idx]; + + vk->CopyMemoryToImageEXT(hwctx->act_dev, ©_info); + } + } else { + VkImageToMemoryCopyEXT region_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY_EXT, + .imageSubresource = { + .layerCount = 1, + }, + }; + VkCopyImageToMemoryInfoEXT copy_info = { + .sType = VK_STRUCTURE_TYPE_COPY_IMAGE_TO_MEMORY_INFO_EXT, + .flags = VK_HOST_IMAGE_COPY_MEMCPY_EXT, + .regionCount = 1, + .pRegions = ®ion_info, + }; + for (int i = 0; i < planes; i++) { + int img_idx = FFMIN(i, (nb_images - 1)); + uint32_t p_w, p_h; + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + + region_info.pHostPointer = swf->data[i]; + region_info.imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i); + region_info.imageExtent = (VkExtent3D){ p_w, p_h, 1 }; + copy_info.srcImage = hwf_vk->img[img_idx]; + copy_info.srcImageLayout = hwf_vk->layout[img_idx]; + + vk->CopyImageToMemoryEXT(hwctx->act_dev, ©_info); + } + } + + hwfc_vk->unlock_frame(hwfc, hwf_vk); + + return 0; +} + +static int vulkan_transfer_frame(AVHWFramesContext *hwfc, + AVFrame *swf, AVFrame *hwf, + int upload) +{ + int err; + VulkanFramesPriv *fp = hwfc->hwctx; + AVVulkanFramesContext *hwctx = &fp->p; + VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + FFVulkanFunctions *vk = &p->vkctx.vkfn; + + int host_mapped = 0; + + AVVkFrame *hwf_vk = (AVVkFrame *)hwf->data[0]; + VkBufferImageCopy region[AV_NUM_DATA_POINTERS]; // always one per plane + + const int planes = av_pix_fmt_count_planes(swf->format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(swf->format); + const int nb_images = ff_vk_count_images(hwf_vk); + + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + int nb_img_bar = 0; + + AVBufferRef *bufs[AV_NUM_DATA_POINTERS]; + int nb_bufs = 0; + + VkCommandBuffer cmd_buf; + FFVkExecContext *exec; + + /* Sanity checking */ + if ((swf->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(swf->format))) { + av_log(hwfc, AV_LOG_ERROR, "Unsupported software frame pixel format!\n"); + return AVERROR(EINVAL); + } + + if (swf->width > hwfc->width || swf->height > hwfc->height) + return AVERROR(EINVAL); + + if (hwctx->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT) + return vulkan_transfer_host(hwfc, hwf, swf, upload); + + for (int i = 0; i < av_pix_fmt_count_planes(swf->format); i++) { + uint32_t p_w, p_h; + get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height, i); + + /* Buffer region for this plane */ + region[i] = (VkBufferImageCopy) { + .bufferOffset = 0, + .bufferRowLength = swf->linesize[i], + .bufferImageHeight = p_h, + .imageSubresource.layerCount = 1, + .imageExtent = (VkExtent3D){ p_w, p_h, 1 }, + /* Rest of the fields adjusted/filled in later */ + }; + } + + /* Setup buffers first */ + if (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY && !p->avoid_host_import) { + err = host_map_frame(hwfc, bufs, &nb_bufs, swf, region, upload); + if (err >= 0) + host_mapped = 1; + } + + if (!host_mapped) { + err = get_plane_buf(hwfc, &bufs[0], swf, region, upload); + if (err < 0) + goto end; + nb_bufs = 1; + + if (upload) { + err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 1); + if (err < 0) + goto end; + } + } + + exec = ff_vk_exec_get(&p->vkctx, &fp->upload_exec); + cmd_buf = exec->buf; + + ff_vk_exec_start(&p->vkctx, exec); + + /* Prep destination Vulkan frame */ + err = ff_vk_exec_add_dep_frame(&p->vkctx, exec, hwf, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_TRANSFER_BIT); + if (err < 0) + goto end; + + /* No need to declare buf deps for synchronous transfers (downloads) */ + if (upload) { + /* Add the software frame backing the buffers if we're host mapping */ + if (host_mapped) { + err = ff_vk_exec_add_dep_sw_frame(&p->vkctx, exec, swf); + if (err < 0) { + ff_vk_exec_discard_deps(&p->vkctx, exec); + goto end; + } + } + + /* Add the buffers as a dependency */ + err = ff_vk_exec_add_dep_buf(&p->vkctx, exec, bufs, nb_bufs, 1); + if (err < 0) { + ff_vk_exec_discard_deps(&p->vkctx, exec); + goto end; + } + } + + ff_vk_frame_barrier(&p->vkctx, exec, hwf, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR, + upload ? VK_ACCESS_TRANSFER_WRITE_BIT : + VK_ACCESS_TRANSFER_READ_BIT, + upload ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0]); + + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + for (int i = 0; i < planes; i++) { + int buf_idx = FFMIN(i, (nb_bufs - 1)); + int img_idx = FFMIN(i, (nb_images - 1)); + FFVkBuffer *vkbuf = (FFVkBuffer *)bufs[buf_idx]->data; + + uint32_t orig_stride = region[i].bufferRowLength; + region[i].bufferRowLength /= desc->comp[i].step; + region[i].imageSubresource.aspectMask = ff_vk_aspect_flag(hwf, i); + + if (upload) + vk->CmdCopyBufferToImage(cmd_buf, vkbuf->buf, + hwf_vk->img[img_idx], + img_bar[img_idx].newLayout, + 1, ®ion[i]); + else + vk->CmdCopyImageToBuffer(cmd_buf, hwf_vk->img[img_idx], + img_bar[img_idx].newLayout, + vkbuf->buf, + 1, ®ion[i]); + + region[i].bufferRowLength = orig_stride; + } + + err = ff_vk_exec_submit(&p->vkctx, exec); + if (err < 0) { + ff_vk_exec_discard_deps(&p->vkctx, exec); + } else if (!upload) { + ff_vk_exec_wait(&p->vkctx, exec); + if (!host_mapped) + err = copy_buffer_data(hwfc, bufs[0], swf, region, planes, 0); + } + +end: + for (int i = 0; i < nb_bufs; i++) + av_buffer_unref(&bufs[i]); + + return err; +} + +static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + + switch (src->format) { +#if CONFIG_CUDA + case AV_PIX_FMT_CUDA: +#ifdef _WIN32 + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) +#else + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) +#endif + return vulkan_transfer_data_from_cuda(hwfc, dst, src); +#endif + default: + if (src->hw_frames_ctx) + return AVERROR(ENOSYS); + else + return vulkan_transfer_frame(hwfc, (AVFrame *)src, dst, 1); + } +} + +#if CONFIG_CUDA +static int vulkan_transfer_data_to_cuda(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + int err; + CUcontext dummy; + AVVkFrame *dst_f; + AVVkFrameInternal *dst_int; + VulkanFramesPriv *fp = hwfc->hwctx; + const int planes = av_pix_fmt_count_planes(hwfc->sw_format); + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + int nb_images; + + AVHWFramesContext *cuda_fc = (AVHWFramesContext*)dst->hw_frames_ctx->data; + AVHWDeviceContext *cuda_cu = cuda_fc->device_ctx; + AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; + AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; + CudaFunctions *cu = cu_internal->cuda_dl; + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; + + dst_f = (AVVkFrame *)src->data[0]; + nb_images = ff_vk_count_images(dst_f); + + err = prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_EXPORT); + if (err < 0) + return err; + + err = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); + if (err < 0) + return err; + + err = vulkan_export_to_cuda(hwfc, dst->hw_frames_ctx, src); + if (err < 0) { + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return err; + } + + dst_int = dst_f->internal; + + for (int i = 0; i < planes; i++) { + s_w_par[i].params.fence.value = dst_f->sem_value[i] + 0; + s_s_par[i].params.fence.value = dst_f->sem_value[i] + 1; + } + + err = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, + nb_images, cuda_dev->stream)); + if (err < 0) + goto fail; + + for (int i = 0; i < planes; i++) { + CUDA_MEMCPY2D cpy = { + .dstMemoryType = CU_MEMORYTYPE_DEVICE, + .dstDevice = (CUdeviceptr)dst->data[i], + .dstPitch = dst->linesize[i], + .dstY = 0, + + .srcMemoryType = CU_MEMORYTYPE_ARRAY, + .srcArray = dst_int->cu_array[i], + }; + + int w, h; + get_plane_wh(&w, &h, hwfc->sw_format, hwfc->width, hwfc->height, i); + + cpy.WidthInBytes = w * desc->comp[i].step; + cpy.Height = h; + + err = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream)); + if (err < 0) + goto fail; + } + + err = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, + nb_images, cuda_dev->stream)); + if (err < 0) + goto fail; + + for (int i = 0; i < planes; i++) + dst_f->sem_value[i]++; + + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + + av_log(hwfc, AV_LOG_VERBOSE, "Transferred Vulkan image to CUDA!\n"); + + return prepare_frame(hwfc, &fp->upload_exec, dst_f, PREP_MODE_EXTERNAL_IMPORT); + +fail: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + vulkan_free_internal(dst_f); + av_buffer_unref(&dst->buf[0]); + return err; +} +#endif + +static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst, + const AVFrame *src) +{ + av_unused VulkanDevicePriv *p = hwfc->device_ctx->hwctx; + + switch (dst->format) { +#if CONFIG_CUDA + case AV_PIX_FMT_CUDA: +#ifdef _WIN32 + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM)) +#else + if ((p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_MEMORY) && + (p->vkctx.extensions & FF_VK_EXT_EXTERNAL_FD_SEM)) +#endif + return vulkan_transfer_data_to_cuda(hwfc, dst, src); +#endif + default: + if (dst->hw_frames_ctx) + return AVERROR(ENOSYS); + else + return vulkan_transfer_frame(hwfc, dst, (AVFrame *)src, 0); + } +} + +static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc, + AVHWFramesContext *src_fc, int flags) +{ + return vulkan_frames_init(dst_fc); +} + +AVVkFrame *av_vk_frame_alloc(void) +{ + int err; + AVVkFrame *f = av_mallocz(sizeof(AVVkFrame)); + if (!f) + return NULL; + + f->internal = av_mallocz(sizeof(*f->internal)); + if (!f->internal) { + av_free(f); + return NULL; + } + + err = pthread_mutex_init(&f->internal->update_mutex, NULL); + if (err != 0) { + av_free(f->internal); + av_free(f); + return NULL; + } + + return f; +} + +const HWContextType ff_hwcontext_type_vulkan = { + .type = AV_HWDEVICE_TYPE_VULKAN, + .name = "Vulkan", + + .device_hwctx_size = sizeof(VulkanDevicePriv), + .frames_hwctx_size = sizeof(VulkanFramesPriv), + + .device_init = &vulkan_device_init, + .device_uninit = &vulkan_device_uninit, + .device_create = &vulkan_device_create, + .device_derive = &vulkan_device_derive, + + .frames_get_constraints = &vulkan_frames_get_constraints, + .frames_init = vulkan_frames_init, + .frames_get_buffer = vulkan_get_buffer, + .frames_uninit = vulkan_frames_uninit, + + .transfer_get_formats = vulkan_transfer_get_formats, + .transfer_data_to = vulkan_transfer_data_to, + .transfer_data_from = vulkan_transfer_data_from, + + .map_to = vulkan_map_to, + .map_from = vulkan_map_from, + .frames_derive_to = &vulkan_frames_derive_to, + + .pix_fmts = (const enum AVPixelFormat []) { + AV_PIX_FMT_VULKAN, + AV_PIX_FMT_NONE + }, +}; -- 2.49.1 From ac1c988371e59a65551c506829bcb8107dd0ef8a Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:15:54 +0000 Subject: [PATCH 041/118] Changing vulkan file directory --- libavutil/hwcontext_vulkan.h | 377 ----------------------------------- 1 file changed, 377 deletions(-) delete mode 100644 libavutil/hwcontext_vulkan.h diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h deleted file mode 100644 index 15cf515668..0000000000 --- a/libavutil/hwcontext_vulkan.h +++ /dev/null @@ -1,377 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVUTIL_HWCONTEXT_VULKAN_H -#define AVUTIL_HWCONTEXT_VULKAN_H - -#if defined(_WIN32) && !defined(VK_USE_PLATFORM_WIN32_KHR) -#define VK_USE_PLATFORM_WIN32_KHR -#endif -#include <vulkan/vulkan.h> - -#include "pixfmt.h" -#include "frame.h" -#include "hwcontext.h" - -typedef struct AVVkFrame AVVkFrame; - -typedef struct AVVulkanDeviceQueueFamily { - /* Queue family index */ - int idx; - /* Number of queues in the queue family in use */ - int num; - /* Queue family capabilities. Must be non-zero. - * Flags may be removed to indicate the queue family may not be used - * for a given purpose. */ - VkQueueFlagBits flags; - /* Vulkan implementations are allowed to list multiple video queues - * which differ in what they can encode or decode. */ - VkVideoCodecOperationFlagBitsKHR video_caps; -} AVVulkanDeviceQueueFamily; - -/** - * @file - * API-specific header for AV_HWDEVICE_TYPE_VULKAN. - * - * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs - * with the data pointer set to an AVVkFrame. - */ - -/** - * Main Vulkan context, allocated as AVHWDeviceContext.hwctx. - * All of these can be set before init to change what the context uses - */ -typedef struct AVVulkanDeviceContext { - /** - * Custom memory allocator, else NULL - */ - const VkAllocationCallbacks *alloc; - - /** - * Pointer to a vkGetInstanceProcAddr loading function. - * If unset, will dynamically load and use libvulkan. - */ - PFN_vkGetInstanceProcAddr get_proc_addr; - - /** - * Vulkan instance. Must be at least version 1.3. - */ - VkInstance inst; - - /** - * Physical device - */ - VkPhysicalDevice phys_dev; - - /** - * Active device - */ - VkDevice act_dev; - - /** - * This structure should be set to the set of features that present and enabled - * during device creation. When a device is created by FFmpeg, it will default to - * enabling all that are present of the shaderImageGatherExtended, - * fragmentStoresAndAtomics, shaderInt64 and vertexPipelineStoresAndAtomics features. - */ - VkPhysicalDeviceFeatures2 device_features; - - /** - * Enabled instance extensions. - * If supplying your own device context, set this to an array of strings, with - * each entry containing the specified Vulkan extension string to enable. - * Duplicates are possible and accepted. - * If no extensions are enabled, set these fields to NULL, and 0 respectively. - */ - const char * const *enabled_inst_extensions; - int nb_enabled_inst_extensions; - - /** - * Enabled device extensions. By default, VK_KHR_external_memory_fd, - * VK_EXT_external_memory_dma_buf, VK_EXT_image_drm_format_modifier, - * VK_KHR_external_semaphore_fd and VK_EXT_external_memory_host are enabled if found. - * If supplying your own device context, these fields takes the same format as - * the above fields, with the same conditions that duplicates are possible - * and accepted, and that NULL and 0 respectively means no extensions are enabled. - */ - const char * const *enabled_dev_extensions; - int nb_enabled_dev_extensions; - -#if FF_API_VULKAN_FIXED_QUEUES - /** - * Queue family index for graphics operations, and the number of queues - * enabled for it. If unavailable, will be set to -1. Not required. - * av_hwdevice_create() will attempt to find a dedicated queue for each - * queue family, or pick the one with the least unrelated flags set. - * Queue indices here may overlap if a queue has to share capabilities. - */ - attribute_deprecated - int queue_family_index; - attribute_deprecated - int nb_graphics_queues; - - /** - * Queue family index for transfer operations and the number of queues - * enabled. Required. - */ - attribute_deprecated - int queue_family_tx_index; - attribute_deprecated - int nb_tx_queues; - - /** - * Queue family index for compute operations and the number of queues - * enabled. Required. - */ - attribute_deprecated - int queue_family_comp_index; - attribute_deprecated - int nb_comp_queues; - - /** - * Queue family index for video encode ops, and the amount of queues enabled. - * If the device doesn't support such, queue_family_encode_index will be -1. - * Not required. - */ - attribute_deprecated - int queue_family_encode_index; - attribute_deprecated - int nb_encode_queues; - - /** - * Queue family index for video decode ops, and the amount of queues enabled. - * If the device doesn't support such, queue_family_decode_index will be -1. - * Not required. - */ - attribute_deprecated - int queue_family_decode_index; - attribute_deprecated - int nb_decode_queues; -#endif - - /** - * Locks a queue, preventing other threads from submitting any command - * buffers to this queue. - * If set to NULL, will be set to lavu-internal functions that utilize a - * mutex. - */ - void (*lock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); - - /** - * Similar to lock_queue(), unlocks a queue. Must only be called after locking. - */ - void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); - - /** - * Queue families used. Must be preferentially ordered. List may contain - * duplicates. - * - * For compatibility reasons, all the enabled queue families listed above - * (queue_family_(tx/comp/encode/decode)_index) must also be included in - * this list until they're removed after deprecation. - */ - AVVulkanDeviceQueueFamily qf[64]; - int nb_qf; -} AVVulkanDeviceContext; - -/** - * Defines the behaviour of frame allocation. - */ -typedef enum AVVkFrameFlags { - /* Unless this flag is set, autodetected flags will be OR'd based on the - * device and tiling during av_hwframe_ctx_init(). */ - AV_VK_FRAME_FLAG_NONE = (1ULL << 0), - - /* Disables multiplane images. - * This is required to export/import images from CUDA. */ - AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE = (1ULL << 2), -} AVVkFrameFlags; - -/** - * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options - */ -typedef struct AVVulkanFramesContext { - /** - * Controls the tiling of allocated frames. - * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling. - * Can be set to VK_IMAGE_TILING_LINEAR to force linear images, - * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed - * images. - * @note Imported frames from other APIs ignore this. - */ - VkImageTiling tiling; - - /** - * Defines extra usage of output frames. If non-zero, all flags MUST be - * supported by the VkFormat. Otherwise, will use supported flags amongst: - * - VK_IMAGE_USAGE_SAMPLED_BIT - * - VK_IMAGE_USAGE_STORAGE_BIT - * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT - * - VK_IMAGE_USAGE_TRANSFER_DST_BIT - */ - VkImageUsageFlagBits usage; - - /** - * Extension data for image creation. - * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure - * can be added to specify the exact modifier to use. - * - * Additional structures may be added at av_hwframe_ctx_init() time, - * which will be freed automatically on uninit(), so users must only free - * any structures they've allocated themselves. - */ - void *create_pnext; - - /** - * Extension data for memory allocation. Must have as many entries as - * the number of planes of the sw_format. - * This will be chained to VkExportMemoryAllocateInfo, which is used - * to make all pool images exportable to other APIs if the necessary - * extensions are present in enabled_dev_extensions. - */ - void *alloc_pnext[AV_NUM_DATA_POINTERS]; - - /** - * A combination of AVVkFrameFlags. Unless AV_VK_FRAME_FLAG_NONE is set, - * autodetected flags will be OR'd based on the device and tiling during - * av_hwframe_ctx_init(). - */ - AVVkFrameFlags flags; - - /** - * Flags to set during image creation. If unset, defaults to - * VK_IMAGE_CREATE_ALIAS_BIT. - */ - VkImageCreateFlags img_flags; - - /** - * Vulkan format for each image. MUST be compatible with the pixel format. - * If unset, will be automatically set. - * There are at most two compatible formats for a frame - a multiplane - * format, and a single-plane multi-image format. - */ - VkFormat format[AV_NUM_DATA_POINTERS]; - - /** - * Number of layers each image will have. - */ - int nb_layers; - - /** - * Locks a frame, preventing other threads from changing frame properties. - * Users SHOULD only ever lock just before command submission in order - * to get accurate frame properties, and unlock immediately after command - * submission without waiting for it to finish. - * - * If unset, will be set to lavu-internal functions that utilize a mutex. - */ - void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); - - /** - * Similar to lock_frame(), unlocks a frame. Must only be called after locking. - */ - void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); -} AVVulkanFramesContext; - -/* - * Frame structure. - * - * @note the size of this structure is not part of the ABI, to allocate - * you must use @av_vk_frame_alloc(). - */ -struct AVVkFrame { - /** - * Vulkan images to which the memory is bound to. - * May be one for multiplane formats, or multiple. - */ - VkImage img[AV_NUM_DATA_POINTERS]; - - /** - * Tiling for the frame. - */ - VkImageTiling tiling; - - /** - * Memory backing the images. Either one, or as many as there are planes - * in the sw_format. - * In case of having multiple VkImages, but one memory, the offset field - * will indicate the bound offset for each image. - */ - VkDeviceMemory mem[AV_NUM_DATA_POINTERS]; - size_t size[AV_NUM_DATA_POINTERS]; - - /** - * OR'd flags for all memory allocated - */ - VkMemoryPropertyFlagBits flags; - - /** - * Updated after every barrier. One per VkImage. - */ - VkAccessFlagBits access[AV_NUM_DATA_POINTERS]; - VkImageLayout layout[AV_NUM_DATA_POINTERS]; - - /** - * Synchronization timeline semaphores, one for each VkImage. - * Must not be freed manually. Must be waited on at every submission using - * the value in sem_value, and must be signalled at every submission, - * using an incremented value. - */ - VkSemaphore sem[AV_NUM_DATA_POINTERS]; - - /** - * Up to date semaphore value at which each image becomes accessible. - * One per VkImage. - * Clients must wait on this value when submitting a command queue, - * and increment it when signalling. - */ - uint64_t sem_value[AV_NUM_DATA_POINTERS]; - - /** - * Internal data. - */ - struct AVVkFrameInternal *internal; - - /** - * Describes the binding offset of each image to the VkDeviceMemory. - * One per VkImage. - */ - ptrdiff_t offset[AV_NUM_DATA_POINTERS]; - - /** - * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if - * the image was allocated with the CONCURRENT concurrency option. - * One per VkImage. - */ - uint32_t queue_family[AV_NUM_DATA_POINTERS]; -}; - -/** - * Allocates a single AVVkFrame and initializes everything as 0. - * @note Must be freed via av_free() - */ -AVVkFrame *av_vk_frame_alloc(void); - -/** - * Returns the optimal per-plane Vulkan format for a given sw_format, - * one for each plane. - * Returns NULL on unsupported formats. - */ -const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p); - -#endif /* AVUTIL_HWCONTEXT_VULKAN_H */ -- 2.49.1 From 120ebdcf6a643d4dbc3fa597c7c48e52aa6f1194 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:16:23 +0000 Subject: [PATCH 042/118] Changing vulkan file directory --- libavutil/vulkan/hwcontext_vulkan.h | 377 ++++++++++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 libavutil/vulkan/hwcontext_vulkan.h diff --git a/libavutil/vulkan/hwcontext_vulkan.h b/libavutil/vulkan/hwcontext_vulkan.h new file mode 100644 index 0000000000..11a01b9565 --- /dev/null +++ b/libavutil/vulkan/hwcontext_vulkan.h @@ -0,0 +1,377 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_HWCONTEXT_VULKAN_H +#define AVUTIL_HWCONTEXT_VULKAN_H + +#if defined(_WIN32) && !defined(VK_USE_PLATFORM_WIN32_KHR) +#define VK_USE_PLATFORM_WIN32_KHR +#endif +#include <vulkan/vulkan.h> + +#include "libavutil/pixfmt.h" +#include "libavutil/frame.h" +#include "libavutil/hwcontext.h" + +typedef struct AVVkFrame AVVkFrame; + +typedef struct AVVulkanDeviceQueueFamily { + /* Queue family index */ + int idx; + /* Number of queues in the queue family in use */ + int num; + /* Queue family capabilities. Must be non-zero. + * Flags may be removed to indicate the queue family may not be used + * for a given purpose. */ + VkQueueFlagBits flags; + /* Vulkan implementations are allowed to list multiple video queues + * which differ in what they can encode or decode. */ + VkVideoCodecOperationFlagBitsKHR video_caps; +} AVVulkanDeviceQueueFamily; + +/** + * @file + * API-specific header for AV_HWDEVICE_TYPE_VULKAN. + * + * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs + * with the data pointer set to an AVVkFrame. + */ + +/** + * Main Vulkan context, allocated as AVHWDeviceContext.hwctx. + * All of these can be set before init to change what the context uses + */ +typedef struct AVVulkanDeviceContext { + /** + * Custom memory allocator, else NULL + */ + const VkAllocationCallbacks *alloc; + + /** + * Pointer to a vkGetInstanceProcAddr loading function. + * If unset, will dynamically load and use libvulkan. + */ + PFN_vkGetInstanceProcAddr get_proc_addr; + + /** + * Vulkan instance. Must be at least version 1.3. + */ + VkInstance inst; + + /** + * Physical device + */ + VkPhysicalDevice phys_dev; + + /** + * Active device + */ + VkDevice act_dev; + + /** + * This structure should be set to the set of features that present and enabled + * during device creation. When a device is created by FFmpeg, it will default to + * enabling all that are present of the shaderImageGatherExtended, + * fragmentStoresAndAtomics, shaderInt64 and vertexPipelineStoresAndAtomics features. + */ + VkPhysicalDeviceFeatures2 device_features; + + /** + * Enabled instance extensions. + * If supplying your own device context, set this to an array of strings, with + * each entry containing the specified Vulkan extension string to enable. + * Duplicates are possible and accepted. + * If no extensions are enabled, set these fields to NULL, and 0 respectively. + */ + const char * const *enabled_inst_extensions; + int nb_enabled_inst_extensions; + + /** + * Enabled device extensions. By default, VK_KHR_external_memory_fd, + * VK_EXT_external_memory_dma_buf, VK_EXT_image_drm_format_modifier, + * VK_KHR_external_semaphore_fd and VK_EXT_external_memory_host are enabled if found. + * If supplying your own device context, these fields takes the same format as + * the above fields, with the same conditions that duplicates are possible + * and accepted, and that NULL and 0 respectively means no extensions are enabled. + */ + const char * const *enabled_dev_extensions; + int nb_enabled_dev_extensions; + +#if FF_API_VULKAN_FIXED_QUEUES + /** + * Queue family index for graphics operations, and the number of queues + * enabled for it. If unavaiable, will be set to -1. Not required. + * av_hwdevice_create() will attempt to find a dedicated queue for each + * queue family, or pick the one with the least unrelated flags set. + * Queue indices here may overlap if a queue has to share capabilities. + */ + attribute_deprecated + int queue_family_index; + attribute_deprecated + int nb_graphics_queues; + + /** + * Queue family index for transfer operations and the number of queues + * enabled. Required. + */ + attribute_deprecated + int queue_family_tx_index; + attribute_deprecated + int nb_tx_queues; + + /** + * Queue family index for compute operations and the number of queues + * enabled. Required. + */ + attribute_deprecated + int queue_family_comp_index; + attribute_deprecated + int nb_comp_queues; + + /** + * Queue family index for video encode ops, and the amount of queues enabled. + * If the device doesn't support such, queue_family_encode_index will be -1. + * Not required. + */ + attribute_deprecated + int queue_family_encode_index; + attribute_deprecated + int nb_encode_queues; + + /** + * Queue family index for video decode ops, and the amount of queues enabled. + * If the device doesn't support such, queue_family_decode_index will be -1. + * Not required. + */ + attribute_deprecated + int queue_family_decode_index; + attribute_deprecated + int nb_decode_queues; +#endif + + /** + * Locks a queue, preventing other threads from submitting any command + * buffers to this queue. + * If set to NULL, will be set to lavu-internal functions that utilize a + * mutex. + */ + void (*lock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); + + /** + * Similar to lock_queue(), unlocks a queue. Must only be called after locking. + */ + void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index); + + /** + * Queue families used. Must be preferentially ordered. List may contain + * duplicates. + * + * For compatibility reasons, all the enabled queue families listed above + * (queue_family_(tx/comp/encode/decode)_index) must also be included in + * this list until they're removed after deprecation. + */ + AVVulkanDeviceQueueFamily qf[64]; + int nb_qf; +} AVVulkanDeviceContext; + +/** + * Defines the behaviour of frame allocation. + */ +typedef enum AVVkFrameFlags { + /* Unless this flag is set, autodetected flags will be OR'd based on the + * device and tiling during av_hwframe_ctx_init(). */ + AV_VK_FRAME_FLAG_NONE = (1ULL << 0), + + /* Disables multiplane images. + * This is required to export/import images from CUDA. */ + AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE = (1ULL << 2), +} AVVkFrameFlags; + +/** + * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options + */ +typedef struct AVVulkanFramesContext { + /** + * Controls the tiling of allocated frames. + * If left as VK_IMAGE_TILING_OPTIMAL (0), will use optimal tiling. + * Can be set to VK_IMAGE_TILING_LINEAR to force linear images, + * or VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT to force DMABUF-backed + * images. + * @note Imported frames from other APIs ignore this. + */ + VkImageTiling tiling; + + /** + * Defines extra usage of output frames. If non-zero, all flags MUST be + * supported by the VkFormat. Otherwise, will use supported flags amongst: + * - VK_IMAGE_USAGE_SAMPLED_BIT + * - VK_IMAGE_USAGE_STORAGE_BIT + * - VK_IMAGE_USAGE_TRANSFER_SRC_BIT + * - VK_IMAGE_USAGE_TRANSFER_DST_BIT + */ + VkImageUsageFlagBits usage; + + /** + * Extension data for image creation. + * If DRM tiling is used, a VkImageDrmFormatModifierListCreateInfoEXT structure + * can be added to specify the exact modifier to use. + * + * Additional structures may be added at av_hwframe_ctx_init() time, + * which will be freed automatically on uninit(), so users must only free + * any structures they've allocated themselves. + */ + void *create_pnext; + + /** + * Extension data for memory allocation. Must have as many entries as + * the number of planes of the sw_format. + * This will be chained to VkExportMemoryAllocateInfo, which is used + * to make all pool images exportable to other APIs if the necessary + * extensions are present in enabled_dev_extensions. + */ + void *alloc_pnext[AV_NUM_DATA_POINTERS]; + + /** + * A combination of AVVkFrameFlags. Unless AV_VK_FRAME_FLAG_NONE is set, + * autodetected flags will be OR'd based on the device and tiling during + * av_hwframe_ctx_init(). + */ + AVVkFrameFlags flags; + + /** + * Flags to set during image creation. If unset, defaults to + * VK_IMAGE_CREATE_ALIAS_BIT. + */ + VkImageCreateFlags img_flags; + + /** + * Vulkan format for each image. MUST be compatible with the pixel format. + * If unset, will be automatically set. + * There are at most two compatible formats for a frame - a multiplane + * format, and a single-plane multi-image format. + */ + VkFormat format[AV_NUM_DATA_POINTERS]; + + /** + * Number of layers each image will have. + */ + int nb_layers; + + /** + * Locks a frame, preventing other threads from changing frame properties. + * Users SHOULD only ever lock just before command submission in order + * to get accurate frame properties, and unlock immediately after command + * submission without waiting for it to finish. + * + * If unset, will be set to lavu-internal functions that utilize a mutex. + */ + void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); + + /** + * Similar to lock_frame(), unlocks a frame. Must only be called after locking. + */ + void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf); +} AVVulkanFramesContext; + +/* + * Frame structure. + * + * @note the size of this structure is not part of the ABI, to allocate + * you must use @av_vk_frame_alloc(). + */ +struct AVVkFrame { + /** + * Vulkan images to which the memory is bound to. + * May be one for multiplane formats, or multiple. + */ + VkImage img[AV_NUM_DATA_POINTERS]; + + /** + * Tiling for the frame. + */ + VkImageTiling tiling; + + /** + * Memory backing the images. Either one, or as many as there are planes + * in the sw_format. + * In case of having multiple VkImages, but one memory, the offset field + * will indicate the bound offset for each image. + */ + VkDeviceMemory mem[AV_NUM_DATA_POINTERS]; + size_t size[AV_NUM_DATA_POINTERS]; + + /** + * OR'd flags for all memory allocated + */ + VkMemoryPropertyFlagBits flags; + + /** + * Updated after every barrier. One per VkImage. + */ + VkAccessFlagBits access[AV_NUM_DATA_POINTERS]; + VkImageLayout layout[AV_NUM_DATA_POINTERS]; + + /** + * Synchronization timeline semaphores, one for each VkImage. + * Must not be freed manually. Must be waited on at every submission using + * the value in sem_value, and must be signalled at every submission, + * using an incremented value. + */ + VkSemaphore sem[AV_NUM_DATA_POINTERS]; + + /** + * Up to date semaphore value at which each image becomes accessible. + * One per VkImage. + * Clients must wait on this value when submitting a command queue, + * and increment it when signalling. + */ + uint64_t sem_value[AV_NUM_DATA_POINTERS]; + + /** + * Internal data. + */ + struct AVVkFrameInternal *internal; + + /** + * Describes the binding offset of each image to the VkDeviceMemory. + * One per VkImage. + */ + ptrdiff_t offset[AV_NUM_DATA_POINTERS]; + + /** + * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if + * the image was allocated with the CONCURRENT concurrency option. + * One per VkImage. + */ + uint32_t queue_family[AV_NUM_DATA_POINTERS]; +}; + +/** + * Allocates a single AVVkFrame and initializes everything as 0. + * @note Must be freed via av_free() + */ +AVVkFrame *av_vk_frame_alloc(void); + +/** + * Returns the optimal per-plane Vulkan format for a given sw_format, + * one for each plane. + * Returns NULL on unsupported formats. + */ +const VkFormat *av_vkfmt_from_pixfmt(enum AVPixelFormat p); + +#endif /* AVUTIL_HWCONTEXT_VULKAN_H */ -- 2.49.1 From c56d89f5dae247d832327c68cf301251c0f2dbef Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:16:52 +0000 Subject: [PATCH 043/118] Changing vulkan file directory --- libavutil/vulkan.c | 3029 -------------------------------------------- 1 file changed, 3029 deletions(-) delete mode 100644 libavutil/vulkan.c diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c deleted file mode 100644 index ef755ad6f7..0000000000 --- a/libavutil/vulkan.c +++ /dev/null @@ -1,3029 +0,0 @@ -/* - * Copyright (c) Lynne - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "avassert.h" -#include "mem.h" - -#include "vulkan.h" -#include "libavutil/vulkan_loader.h" - -const VkComponentMapping ff_comp_identity_map = { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, -}; - -/* Converts return values to strings */ -const char *ff_vk_ret2str(VkResult res) -{ -#define CASE(VAL) case VAL: return #VAL - switch (res) { - CASE(VK_SUCCESS); - CASE(VK_NOT_READY); - CASE(VK_TIMEOUT); - CASE(VK_EVENT_SET); - CASE(VK_EVENT_RESET); - CASE(VK_INCOMPLETE); - CASE(VK_ERROR_OUT_OF_HOST_MEMORY); - CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); - CASE(VK_ERROR_INITIALIZATION_FAILED); - CASE(VK_ERROR_DEVICE_LOST); - CASE(VK_ERROR_MEMORY_MAP_FAILED); - CASE(VK_ERROR_LAYER_NOT_PRESENT); - CASE(VK_ERROR_EXTENSION_NOT_PRESENT); - CASE(VK_ERROR_FEATURE_NOT_PRESENT); - CASE(VK_ERROR_INCOMPATIBLE_DRIVER); - CASE(VK_ERROR_TOO_MANY_OBJECTS); - CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); - CASE(VK_ERROR_FRAGMENTED_POOL); - CASE(VK_ERROR_UNKNOWN); - CASE(VK_ERROR_OUT_OF_POOL_MEMORY); - CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); - CASE(VK_ERROR_FRAGMENTATION); - CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS); - CASE(VK_PIPELINE_COMPILE_REQUIRED); - CASE(VK_ERROR_SURFACE_LOST_KHR); - CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); - CASE(VK_SUBOPTIMAL_KHR); - CASE(VK_ERROR_OUT_OF_DATE_KHR); - CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); - CASE(VK_ERROR_VALIDATION_FAILED_EXT); - CASE(VK_ERROR_INVALID_SHADER_NV); - CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR); - CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR); - CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR); - CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR); - CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR); - CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); - CASE(VK_ERROR_NOT_PERMITTED_KHR); - CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); - CASE(VK_THREAD_IDLE_KHR); - CASE(VK_THREAD_DONE_KHR); - CASE(VK_OPERATION_DEFERRED_KHR); - CASE(VK_OPERATION_NOT_DEFERRED_KHR); - default: return "Unknown error"; - } -#undef CASE -} - -/* Malitia pura, Khronos */ -#define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \ - dst_t ff_vk_map_ ##src_name## _to_ ##dst_name(src_t src) \ - { \ - dst_t dst = 0x0; \ - MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \ - VK_IMAGE_USAGE_SAMPLED_BIT); \ - MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \ - VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \ - MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \ - VK_IMAGE_USAGE_TRANSFER_DST_BIT); \ - MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \ - VK_IMAGE_USAGE_STORAGE_BIT); \ - MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \ - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \ - MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \ - VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \ - MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \ - VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \ - MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \ - VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \ - MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \ - VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \ - MAP_TO(VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT, \ - VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT); \ - return dst; \ - } - -#define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1; -FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage) -#undef MAP_TO -#define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2; -FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats) -#undef MAP_TO -#undef FN_MAP_TO - -static void load_enabled_qfs(FFVulkanContext *s) -{ - s->nb_qfs = 0; - for (int i = 0; i < s->hwctx->nb_qf; i++) { - /* Skip duplicates */ - int skip = 0; - for (int j = 0; j < s->nb_qfs; j++) { - if (s->qfs[j] == s->hwctx->qf[i].idx) { - skip = 1; - break; - } - } - if (skip) - continue; - - s->qfs[s->nb_qfs++] = s->hwctx->qf[i].idx; - } -} - -int ff_vk_load_props(FFVulkanContext *s) -{ - FFVulkanFunctions *vk = &s->vkfn; - - s->props = (VkPhysicalDeviceProperties2) { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, - }; - - FF_VK_STRUCT_EXT(s, &s->props, &s->props_11, FF_VK_EXT_NO_FLAG, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES); - FF_VK_STRUCT_EXT(s, &s->props, &s->driver_props, FF_VK_EXT_NO_FLAG, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES); - FF_VK_STRUCT_EXT(s, &s->props, &s->subgroup_props, FF_VK_EXT_NO_FLAG, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES); - - FF_VK_STRUCT_EXT(s, &s->props, &s->push_desc_props, FF_VK_EXT_PUSH_DESCRIPTOR, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR); - FF_VK_STRUCT_EXT(s, &s->props, &s->hprops, FF_VK_EXT_EXTERNAL_HOST_MEMORY, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT); - FF_VK_STRUCT_EXT(s, &s->props, &s->coop_matrix_props, FF_VK_EXT_COOP_MATRIX, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR); - FF_VK_STRUCT_EXT(s, &s->props, &s->desc_buf_props, FF_VK_EXT_DESCRIPTOR_BUFFER, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT); - FF_VK_STRUCT_EXT(s, &s->props, &s->optical_flow_props, FF_VK_EXT_OPTICAL_FLOW, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV); - FF_VK_STRUCT_EXT(s, &s->props, &s->host_image_props, FF_VK_EXT_HOST_IMAGE_COPY, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_PROPERTIES_EXT); - - s->feats = (VkPhysicalDeviceFeatures2) { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, - }; - - FF_VK_STRUCT_EXT(s, &s->feats, &s->feats_12, FF_VK_EXT_NO_FLAG, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES); - FF_VK_STRUCT_EXT(s, &s->feats, &s->atomic_float_feats, FF_VK_EXT_ATOMIC_FLOAT, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT); - - /* Try allocating 1024 layouts */ - s->host_image_copy_layouts = av_malloc(sizeof(*s->host_image_copy_layouts)*1024); - s->host_image_props.pCopySrcLayouts = s->host_image_copy_layouts; - s->host_image_props.copySrcLayoutCount = 512; - s->host_image_props.pCopyDstLayouts = s->host_image_copy_layouts + 512; - s->host_image_props.copyDstLayoutCount = 512; - - vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); - - /* Check if we had enough memory for all layouts */ - if (s->host_image_props.copySrcLayoutCount == 512 || - s->host_image_props.copyDstLayoutCount == 512) { - VkImageLayout *new_array; - size_t new_size; - s->host_image_props.pCopySrcLayouts = - s->host_image_props.pCopyDstLayouts = NULL; - s->host_image_props.copySrcLayoutCount = - s->host_image_props.copyDstLayoutCount = 0; - vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); - - new_size = s->host_image_props.copySrcLayoutCount + - s->host_image_props.copyDstLayoutCount; - new_size *= sizeof(*s->host_image_copy_layouts); - new_array = av_realloc(s->host_image_copy_layouts, new_size); - if (!new_array) - return AVERROR(ENOMEM); - - s->host_image_copy_layouts = new_array; - s->host_image_props.pCopySrcLayouts = new_array; - s->host_image_props.pCopyDstLayouts = new_array + s->host_image_props.copySrcLayoutCount; - vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); - } - - vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); - vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats); - - load_enabled_qfs(s); - - if (s->qf_props) - return 0; - - vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL); - - s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props)); - if (!s->qf_props) - return AVERROR(ENOMEM); - - s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props)); - if (!s->qf_props) { - av_freep(&s->qf_props); - return AVERROR(ENOMEM); - } - - s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props)); - if (!s->video_props) { - av_freep(&s->qf_props); - av_freep(&s->query_props); - return AVERROR(ENOMEM); - } - - for (uint32_t i = 0; i < s->tot_nb_qfs; i++) { - s->qf_props[i] = (VkQueueFamilyProperties2) { - .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, - }; - - FF_VK_STRUCT_EXT(s, &s->qf_props[i], &s->query_props[i], FF_VK_EXT_NO_FLAG, - VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR); - FF_VK_STRUCT_EXT(s, &s->qf_props[i], &s->video_props[i], FF_VK_EXT_VIDEO_QUEUE, - VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR); - } - - vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props); - - if (s->extensions & FF_VK_EXT_COOP_MATRIX) { - vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev, - &s->coop_mat_props_nb, NULL); - - if (s->coop_mat_props_nb) { - s->coop_mat_props = av_malloc_array(s->coop_mat_props_nb, - sizeof(VkCooperativeMatrixPropertiesKHR)); - for (int i = 0; i < s->coop_mat_props_nb; i++) { - s->coop_mat_props[i] = (VkCooperativeMatrixPropertiesKHR) { - .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, - }; - } - - vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev, - &s->coop_mat_props_nb, - s->coop_mat_props); - } - } - - return 0; -} - -AVVulkanDeviceQueueFamily *ff_vk_qf_find(FFVulkanContext *s, - VkQueueFlagBits dev_family, - VkVideoCodecOperationFlagBitsKHR vid_ops) -{ - for (int i = 0; i < s->hwctx->nb_qf; i++) { - if ((s->hwctx->qf[i].flags & dev_family) && - (s->hwctx->qf[i].video_caps & vid_ops) == vid_ops) { - return &s->hwctx->qf[i]; - } - } - return NULL; -} - -void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) -{ - FFVulkanFunctions *vk = &s->vkfn; - - for (int i = 0; i < pool->pool_size; i++) { - FFVkExecContext *e = &pool->contexts[i]; - - if (e->fence) { - if (e->had_submission) - vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); - vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); - } - - ff_vk_exec_discard_deps(s, e); - - av_free(e->frame_deps); - av_free(e->sw_frame_deps); - av_free(e->buf_deps); - av_free(e->queue_family_dst); - av_free(e->layout_dst); - av_free(e->access_dst); - av_free(e->frame_update); - av_free(e->frame_locked); - av_free(e->sem_sig); - av_free(e->sem_sig_val_dst); - av_free(e->sem_wait); - } - - /* Free shader-specific data */ - for (int i = 0; i < pool->nb_reg_shd; i++) { - FFVulkanShaderData *sd = &pool->reg_shd[i]; - - if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - for (int j = 0; j < sd->nb_descriptor_sets; j++) { - FFVulkanDescriptorSetData *set_data = &sd->desc_set_buf[j]; - if (set_data->buf.mem) - ff_vk_unmap_buffer(s, &set_data->buf, 0); - ff_vk_free_buf(s, &set_data->buf); - } - } - - if (sd->desc_pool) - vk->DestroyDescriptorPool(s->hwctx->act_dev, sd->desc_pool, - s->hwctx->alloc); - - av_freep(&sd->desc_set_buf); - av_freep(&sd->desc_bind); - av_freep(&sd->desc_sets); - } - - av_freep(&pool->reg_shd); - - for (int i = 0; i < pool->pool_size; i++) { - if (pool->cmd_buf_pools[i]) - vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pools[i], - 1, &pool->cmd_bufs[i]); - - if (pool->cmd_buf_pools[i]) - vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pools[i], s->hwctx->alloc); - } - if (pool->query_pool) - vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc); - - av_free(pool->query_data); - av_free(pool->cmd_buf_pools); - av_free(pool->cmd_bufs); - av_free(pool->contexts); -} - -int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, - FFVkExecPool *pool, int nb_contexts, - int nb_queries, VkQueryType query_type, int query_64bit, - const void *query_create_pnext) -{ - int err; - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - VkCommandPoolCreateInfo cqueue_create; - VkCommandBufferAllocateInfo cbuf_create; - - const VkQueryPoolVideoEncodeFeedbackCreateInfoKHR *ef = NULL; - - atomic_init(&pool->idx, 0); - - if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { - ef = ff_vk_find_struct(query_create_pnext, - VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR); - if (!ef) - return AVERROR(EINVAL); - } - - /* Allocate space for command buffer pools */ - pool->cmd_buf_pools = av_malloc(nb_contexts*sizeof(*pool->cmd_buf_pools)); - if (!pool->cmd_buf_pools) { - err = AVERROR(ENOMEM); - goto fail; - } - - /* Allocate space for command buffers */ - pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs)); - if (!pool->cmd_bufs) { - err = AVERROR(ENOMEM); - goto fail; - } - - for (int i = 0; i < nb_contexts; i++) { - /* Create command pool */ - cqueue_create = (VkCommandPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | - VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = qf->idx, - }; - - ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, - s->hwctx->alloc, &pool->cmd_buf_pools[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - /* Allocate command buffer */ - cbuf_create = (VkCommandBufferAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandPool = pool->cmd_buf_pools[i], - .commandBufferCount = 1, - }; - ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, - &pool->cmd_bufs[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - } - - /* Query pool */ - if (nb_queries) { - VkQueryPoolCreateInfo query_pool_info = { - .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, - .pNext = query_create_pnext, - .queryType = query_type, - .queryCount = nb_queries*nb_contexts, - }; - ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, - s->hwctx->alloc, &pool->query_pool); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - pool->nb_queries = nb_queries; - pool->query_status_stride = 1 + 1; /* One result, one status by default */ - pool->query_results = nb_queries; - pool->query_statuses = nb_queries; - - /* Video encode queries produce two results per query */ - if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { - int nb_results = av_popcount(ef->encodeFeedbackFlags); - pool->query_status_stride = nb_results + 1; - pool->query_results *= nb_results; - } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { - pool->query_status_stride = 1; - pool->query_results = 0; - } - - pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); - - /* Allocate space for the query data */ - pool->query_data = av_calloc(nb_contexts, pool->qd_size); - if (!pool->query_data) { - err = AVERROR(ENOMEM); - goto fail; - } - } - - /* Allocate space for the contexts */ - pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts)); - if (!pool->contexts) { - err = AVERROR(ENOMEM); - goto fail; - } - - pool->pool_size = nb_contexts; - - /* Init contexts */ - for (int i = 0; i < pool->pool_size; i++) { - FFVkExecContext *e = &pool->contexts[i]; - VkFenceCreateInfo fence_create = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .flags = VK_FENCE_CREATE_SIGNALED_BIT, - }; - - /* Fence */ - ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc, - &e->fence); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - e->idx = i; - e->parent = pool; - - /* Query data */ - e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i; - e->query_idx = nb_queries*i; - - /* Command buffer */ - e->buf = pool->cmd_bufs[i]; - - /* Queue index distribution */ - e->qi = i % qf->num; - e->qf = qf->idx; - vk->GetDeviceQueue(s->hwctx->act_dev, qf->idx, e->qi, &e->queue); - } - - return 0; - -fail: - ff_vk_exec_pool_free(s, pool); - return err; -} - -VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, - void **data, VkQueryResultFlagBits flags) -{ - FFVulkanFunctions *vk = &s->vkfn; - const FFVkExecPool *pool = e->parent; - VkQueryResultFlags qf = flags & ~(VK_QUERY_RESULT_64_BIT | - VK_QUERY_RESULT_WITH_STATUS_BIT_KHR); - - if (!e->query_data) { - av_log(s, AV_LOG_ERROR, "Requested a query with a NULL query_data pointer!\n"); - return VK_INCOMPLETE; - } - - qf |= pool->query_64bit ? - VK_QUERY_RESULT_64_BIT : 0x0; - qf |= pool->query_statuses ? - VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; - - if (data) - *data = e->query_data; - - return vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool, - e->query_idx, - pool->nb_queries, - pool->qd_size, e->query_data, - pool->qd_size, qf); -} - -FFVkExecContext *ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool) -{ - return &pool->contexts[atomic_fetch_add(&pool->idx, 1) % pool->pool_size]; -} - -void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); - ff_vk_exec_discard_deps(s, e); -} - -int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - const FFVkExecPool *pool = e->parent; - - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - - /* Wait for the fence to be signalled */ - vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &e->fence); - - /* Discard queue dependencies */ - ff_vk_exec_discard_deps(s, e); - - ret = vk->BeginCommandBuffer(e->buf, &cmd_start); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (pool->nb_queries) - vk->CmdResetQueryPool(e->buf, pool->query_pool, - e->query_idx, pool->nb_queries); - - return 0; -} - -void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) -{ - for (int j = 0; j < e->nb_buf_deps; j++) - av_buffer_unref(&e->buf_deps[j]); - e->nb_buf_deps = 0; - - for (int j = 0; j < e->nb_sw_frame_deps; j++) - av_frame_free(&e->sw_frame_deps[j]); - e->nb_sw_frame_deps = 0; - - for (int j = 0; j < e->nb_frame_deps; j++) { - AVFrame *f = e->frame_deps[j]; - if (e->frame_locked[j]) { - AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; - AVVulkanFramesContext *vkfc = hwfc->hwctx; - AVVkFrame *vkf = (AVVkFrame *)f->data[0]; - vkfc->unlock_frame(hwfc, vkf); - e->frame_locked[j] = 0; - } - e->frame_update[j] = 0; - } - e->nb_frame_deps = 0; - - e->sem_wait_cnt = 0; - e->sem_sig_cnt = 0; - e->sem_sig_val_dst_cnt = 0; -} - -int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps, int ref) -{ - AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size, - (e->nb_buf_deps + nb_deps) * sizeof(*dst)); - if (!dst) { - ff_vk_exec_discard_deps(s, e); - return AVERROR(ENOMEM); - } - - e->buf_deps = dst; - - for (int i = 0; i < nb_deps; i++) { - if (!deps[i]) - continue; - - e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i]; - if (!e->buf_deps[e->nb_buf_deps]) { - ff_vk_exec_discard_deps(s, e); - return AVERROR(ENOMEM); - } - e->nb_buf_deps++; - } - - return 0; -} - -int ff_vk_exec_add_dep_sw_frame(FFVulkanContext *s, FFVkExecContext *e, - AVFrame *f) -{ - AVFrame **dst = av_fast_realloc(e->sw_frame_deps, &e->sw_frame_deps_alloc_size, - (e->nb_sw_frame_deps + 1) * sizeof(*dst)); - if (!dst) { - ff_vk_exec_discard_deps(s, e); - return AVERROR(ENOMEM); - } - - e->sw_frame_deps = dst; - - e->sw_frame_deps[e->nb_sw_frame_deps] = av_frame_clone(f); - if (!e->sw_frame_deps[e->nb_sw_frame_deps]) { - ff_vk_exec_discard_deps(s, e); - return AVERROR(ENOMEM); - } - - e->nb_sw_frame_deps++; - - return 0; -} - -#define ARR_REALLOC(str, arr, alloc_s, cnt) \ - do { \ - arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \ - if (!arr) { \ - ff_vk_exec_discard_deps(s, e); \ - return AVERROR(ENOMEM); \ - } \ - str->arr = arr; \ - } while (0) - -typedef struct TempSyncCtx { - int nb_sem; - VkSemaphore sem[]; -} TempSyncCtx; - -static void destroy_tmp_semaphores(void *opaque, uint8_t *data) -{ - FFVulkanContext *s = opaque; - FFVulkanFunctions *vk = &s->vkfn; - TempSyncCtx *ts = (TempSyncCtx *)data; - - for (int i = 0; i < ts->nb_sem; i++) - vk->DestroySemaphore(s->hwctx->act_dev, ts->sem[i], s->hwctx->alloc); - - av_free(ts); -} - -int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e, - VkSemaphore sem, uint64_t val, - VkPipelineStageFlagBits2 stage) -{ - VkSemaphoreSubmitInfo *sem_wait; - ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); - - e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = sem, - .value = val, - .stageMask = stage, - }; - - return 0; -} - -int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e, - VkSemaphore *sem, int nb, - VkPipelineStageFlagBits2 stage, - int wait) -{ - int err; - size_t buf_size; - AVBufferRef *buf; - TempSyncCtx *ts; - FFVulkanFunctions *vk = &s->vkfn; - - /* Do not transfer ownership if we're signalling a binary semaphore, - * since we're probably exporting it. */ - if (!wait) { - for (int i = 0; i < nb; i++) { - VkSemaphoreSubmitInfo *sem_sig; - ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); - - e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = sem[i], - .stageMask = stage, - }; - } - - return 0; - } - - buf_size = sizeof(*ts) + sizeof(VkSemaphore)*nb; - ts = av_mallocz(buf_size); - if (!ts) { - err = AVERROR(ENOMEM); - goto fail; - } - - memcpy(ts->sem, sem, nb*sizeof(*sem)); - ts->nb_sem = nb; - - buf = av_buffer_create((uint8_t *)ts, buf_size, destroy_tmp_semaphores, s, 0); - if (!buf) { - av_free(ts); - err = AVERROR(ENOMEM); - goto fail; - } - - err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); - if (err < 0) { - av_buffer_unref(&buf); - return err; - } - - for (int i = 0; i < nb; i++) { - err = ff_vk_exec_add_dep_wait_sem(s, e, sem[i], 0, stage); - if (err < 0) - return err; - } - - return 0; - -fail: - for (int i = 0; i < nb; i++) - vk->DestroySemaphore(s->hwctx->act_dev, sem[i], s->hwctx->alloc); - - return err; -} - -int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, - VkPipelineStageFlagBits2 wait_stage, - VkPipelineStageFlagBits2 signal_stage) -{ - uint8_t *frame_locked; - uint8_t *frame_update; - AVFrame **frame_deps; - AVBufferRef **buf_deps; - VkImageLayout *layout_dst; - uint32_t *queue_family_dst; - VkAccessFlagBits *access_dst; - - AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; - AVVulkanFramesContext *vkfc = hwfc->hwctx; - AVVkFrame *vkf = (AVVkFrame *)f->data[0]; - int nb_images = ff_vk_count_images(vkf); - - /* Don't add duplicates */ - for (int i = 0; i < e->nb_frame_deps; i++) - if (e->frame_deps[i]->data[0] == f->data[0]) - return 1; - - ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); - ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps); - ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps); - - ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps); - ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); - ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); - - /* prepare_frame in hwcontext_vulkan.c uses the regular frame management - * code but has no frame yet, and it doesn't need to actually store a ref - * to the frame. */ - if (f->buf[0]) { - ARR_REALLOC(e, buf_deps, &e->buf_deps_alloc_size, e->nb_buf_deps); - e->buf_deps[e->nb_buf_deps] = av_buffer_ref(f->buf[0]); - if (!e->buf_deps[e->nb_buf_deps]) { - ff_vk_exec_discard_deps(s, e); - return AVERROR(ENOMEM); - } - e->nb_buf_deps++; - } - - e->frame_deps[e->nb_frame_deps] = f; - - vkfc->lock_frame(hwfc, vkf); - e->frame_locked[e->nb_frame_deps] = 1; - e->frame_update[e->nb_frame_deps] = 0; - e->nb_frame_deps++; - - for (int i = 0; i < nb_images; i++) { - VkSemaphoreSubmitInfo *sem_wait; - VkSemaphoreSubmitInfo *sem_sig; - uint64_t **sem_sig_val_dst; - - ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); - ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); - ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); - - e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = vkf->sem[i], - .value = vkf->sem_value[i], - .stageMask = wait_stage, - }; - - e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .semaphore = vkf->sem[i], - .value = vkf->sem_value[i] + 1, - .stageMask = signal_stage, - }; - - e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i]; - e->sem_sig_val_dst_cnt++; - } - - return 0; -} - -void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, - VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar) -{ - int i; - for (i = 0; i < e->nb_frame_deps; i++) - if (e->frame_deps[i]->data[0] == f->data[0]) - break; - av_assert0(i < e->nb_frame_deps); - - /* Don't update duplicates */ - if (nb_img_bar && !e->frame_update[i]) - (*nb_img_bar)++; - - e->queue_family_dst[i] = bar->dstQueueFamilyIndex; - e->access_dst[i] = bar->dstAccessMask; - e->layout_dst[i] = bar->newLayout; - e->frame_update[i] = 1; -} - -int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, - VkSemaphore *dst, uint64_t *dst_val, - AVFrame *f) -{ - uint64_t **sem_sig_val_dst; - AVVkFrame *vkf = (AVVkFrame *)f->data[0]; - - /* Reject unknown frames */ - int i; - for (i = 0; i < e->nb_frame_deps; i++) - if (e->frame_deps[i]->data[0] == f->data[0]) - break; - if (i == e->nb_frame_deps) - return AVERROR(EINVAL); - - ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); - - *dst = vkf->sem[0]; - *dst_val = vkf->sem_value[0]; - - e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val; - e->sem_sig_val_dst_cnt++; - - return 0; -} - -int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, - .commandBuffer = e->buf, - }; - VkSubmitInfo2 submit_info = (VkSubmitInfo2) { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, - .pCommandBufferInfos = &cmd_buf_info, - .commandBufferInfoCount = 1, - .pWaitSemaphoreInfos = e->sem_wait, - .waitSemaphoreInfoCount = e->sem_wait_cnt, - .pSignalSemaphoreInfos = e->sem_sig, - .signalSemaphoreInfoCount = e->sem_sig_cnt, - }; - - ret = vk->EndCommandBuffer(e->buf); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - ff_vk_ret2str(ret)); - ff_vk_exec_discard_deps(s, e); - return AVERROR_EXTERNAL; - } - - s->hwctx->lock_queue(s->device, e->qf, e->qi); - ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence); - s->hwctx->unlock_queue(s->device, e->qf, e->qi); - - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", - ff_vk_ret2str(ret)); - ff_vk_exec_discard_deps(s, e); - return AVERROR_EXTERNAL; - } - - for (int i = 0; i < e->sem_sig_val_dst_cnt; i++) - *e->sem_sig_val_dst[i] += 1; - - /* Unlock all frames */ - for (int j = 0; j < e->nb_frame_deps; j++) { - if (e->frame_locked[j]) { - AVFrame *f = e->frame_deps[j]; - AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; - AVVulkanFramesContext *vkfc = hwfc->hwctx; - AVVkFrame *vkf = (AVVkFrame *)f->data[0]; - - if (e->frame_update[j]) { - int nb_images = ff_vk_count_images(vkf); - for (int i = 0; i < nb_images; i++) { - vkf->layout[i] = e->layout_dst[j]; - vkf->access[i] = e->access_dst[j]; - vkf->queue_family[i] = e->queue_family_dst[j]; - } - } - vkfc->unlock_frame(hwfc, vkf); - e->frame_locked[j] = 0; - } - } - - e->had_submission = 1; - - return 0; -} - -int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) -{ - VkResult ret; - int index = -1; - FFVulkanFunctions *vk = &s->vkfn; - - VkMemoryAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = alloc_extension, - }; - - alloc_info.allocationSize = req->size; - - /* The vulkan spec requires memory types to be sorted in the "optimal" - * order, so the first matching type we find will be the best/fastest one */ - for (int i = 0; i < s->mprops.memoryTypeCount; i++) { - /* The memory type must be supported by the requirements (bitfield) */ - if (!(req->memoryTypeBits & (1 << i))) - continue; - - /* The memory type flags must include our properties */ - if ((req_flags != UINT32_MAX) && - ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)) - continue; - - /* Found a suitable memory type */ - index = i; - break; - } - - if (index < 0) { - av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", - req_flags); - return AVERROR(EINVAL); - } - - alloc_info.memoryTypeIndex = index; - - ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, - s->hwctx->alloc, mem); - if (ret != VK_SUCCESS) - return AVERROR(ENOMEM); - - if (mem_flags) - *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; - - return 0; -} - -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, - void *pNext, void *alloc_pNext, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) -{ - int err; - VkResult ret; - int use_ded_mem; - FFVulkanFunctions *vk = &s->vkfn; - - /* Buffer usage flags corresponding to buffer descriptor types */ - const VkBufferUsageFlags desc_usage = - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; - - if ((s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && (usage & desc_usage)) - usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; - - VkBufferCreateInfo buf_spawn = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = pNext, - .usage = usage, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .size = flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ? - FFALIGN(size, s->props.properties.limits.minMemoryMapAlignment) : - size, - }; - - VkMemoryAllocateFlagsInfo alloc_flags = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, - .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, - }; - VkBufferMemoryRequirementsInfo2 req_desc = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, - }; - VkMemoryDedicatedAllocateInfo ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = alloc_pNext, - }; - VkMemoryDedicatedRequirements ded_req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, - }; - VkMemoryRequirements2 req = { - .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, - .pNext = &ded_req, - }; - - av_log(s, AV_LOG_DEBUG, "Creating a buffer of %"SIZE_SPECIFIER" bytes, " - "usage: 0x%x, flags: 0x%x\n", - size, usage, flags); - - ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &buf->buf); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - req_desc.buffer = buf->buf; - - vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req); - - /* In case the implementation prefers/requires dedicated allocation */ - use_ded_mem = ded_req.prefersDedicatedAllocation | - ded_req.requiresDedicatedAllocation; - if (use_ded_mem) { - ded_alloc.buffer = buf->buf; - ded_alloc.pNext = alloc_pNext; - alloc_pNext = &ded_alloc; - } - - if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { - alloc_flags.pNext = alloc_pNext; - alloc_pNext = &alloc_flags; - } - - err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext, - &buf->flags, &buf->mem); - if (err) - return err; - - ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { - VkBufferDeviceAddressInfo address_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, - .buffer = buf->buf, - }; - buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); - } - - buf->size = size; - - return 0; -} - -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], - int nb_buffers, int invalidate) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange inval_list[64]; - int inval_count = 0; - - for (int i = 0; i < nb_buffers; i++) { - void *dst; - ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0, - VK_WHOLE_SIZE, 0, &dst); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - mem[i] = buf[i]->mapped_mem = dst; - } - - if (!invalidate) - return 0; - - for (int i = 0; i < nb_buffers; i++) { - const VkMappedMemoryRange ival_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i]->mem, - .size = VK_WHOLE_SIZE, - }; - if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) - continue; - inval_list[inval_count++] = ival_buf; - } - - if (inval_count) { - ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, - inval_list); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } - - return 0; -} - -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, - int flush) -{ - int err = 0; - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange flush_list[64]; - int flush_count = 0; - - if (flush) { - for (int i = 0; i < nb_buffers; i++) { - const VkMappedMemoryRange flush_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i]->mem, - .size = VK_WHOLE_SIZE, - }; - - av_assert0(!buf[i]->host_ref); - if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) - continue; - flush_list[flush_count++] = flush_buf; - } - } - - if (flush_count) { - ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, - flush_list); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ - } - } - - for (int i = 0; i < nb_buffers; i++) { - vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem); - buf[i]->mapped_mem = NULL; - } - - return err; -} - -void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) -{ - FFVulkanFunctions *vk = &s->vkfn; - - if (!buf || !s->hwctx) - return; - - if (buf->mapped_mem && !buf->host_ref) - ff_vk_unmap_buffer(s, buf, 0); - if (buf->buf != VK_NULL_HANDLE) - vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); - if (buf->mem != VK_NULL_HANDLE) - vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); - if (buf->host_ref) - av_buffer_unref(&buf->host_ref); - - buf->buf = VK_NULL_HANDLE; - buf->mem = VK_NULL_HANDLE; - buf->mapped_mem = NULL; -} - -static void free_data_buf(void *opaque, uint8_t *data) -{ - FFVulkanContext *ctx = opaque; - FFVkBuffer *buf = (FFVkBuffer *)data; - ff_vk_free_buf(ctx, buf); - av_free(data); -} - -static AVBufferRef *alloc_data_buf(void *opaque, size_t size) -{ - AVBufferRef *ref; - uint8_t *buf = av_mallocz(size); - if (!buf) - return NULL; - - ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); - if (!ref) - av_free(buf); - return ref; -} - -int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, - AVBufferRef **buf, VkBufferUsageFlags usage, - void *create_pNext, size_t size, - VkMemoryPropertyFlagBits mem_props) -{ - int err; - AVBufferRef *ref; - FFVkBuffer *data; - - *buf = NULL; - - if (!(*buf_pool)) { - *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx, - alloc_data_buf, NULL); - if (!(*buf_pool)) - return AVERROR(ENOMEM); - } - - *buf = ref = av_buffer_pool_get(*buf_pool); - if (!ref) - return AVERROR(ENOMEM); - - data = (FFVkBuffer *)ref->data; - data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; - data->access = VK_ACCESS_2_NONE; - - if (data->size >= size) - return 0; - - ff_vk_free_buf(ctx, data); - memset(data, 0, sizeof(*data)); - - err = ff_vk_create_buf(ctx, data, size, - create_pNext, NULL, usage, - mem_props); - if (err < 0) { - av_buffer_unref(&ref); - *buf = NULL; - return err; - } - - if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { - err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0); - if (err < 0) { - av_buffer_unref(&ref); - *buf = NULL; - return err; - } - } - - return 0; -} - -static int create_mapped_buffer(FFVulkanContext *s, - FFVkBuffer *vkb, VkBufferUsageFlags usage, - size_t size, - VkExternalMemoryBufferCreateInfo *create_desc, - VkImportMemoryHostPointerInfoEXT *import_desc, - VkMemoryHostPointerPropertiesEXT props) -{ - int err; - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - VkBufferCreateInfo buf_spawn = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = create_desc, - .usage = usage, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .size = size, - }; - VkMemoryRequirements req = { - .size = size, - .alignment = s->hprops.minImportedHostPointerAlignment, - .memoryTypeBits = props.memoryTypeBits, - }; - - err = ff_vk_alloc_mem(s, &req, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - import_desc, &vkb->flags, &vkb->mem); - if (err < 0) - return err; - - ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &vkb->buf); - if (ret != VK_SUCCESS) { - vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc); - return AVERROR_EXTERNAL; - } - - ret = vk->BindBufferMemory(s->hwctx->act_dev, vkb->buf, vkb->mem, 0); - if (ret != VK_SUCCESS) { - vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc); - vk->DestroyBuffer(s->hwctx->act_dev, vkb->buf, s->hwctx->alloc); - return AVERROR_EXTERNAL; - } - - return 0; -} - -static void destroy_avvkbuf(void *opaque, uint8_t *data) -{ - FFVulkanContext *s = opaque; - FFVkBuffer *buf = (FFVkBuffer *)data; - ff_vk_free_buf(s, buf); - av_free(buf); -} - -int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst, - uint8_t *src_data, const AVBufferRef *src_buf, - VkBufferUsageFlags usage) -{ - int err; - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - VkExternalMemoryBufferCreateInfo create_desc = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, - }; - VkMemoryAllocateFlagsInfo alloc_flags = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, - .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, - }; - VkImportMemoryHostPointerInfoEXT import_desc = { - .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, - .pNext = usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? &alloc_flags : NULL, - }; - VkMemoryHostPointerPropertiesEXT props; - - AVBufferRef *ref; - FFVkBuffer *vkb; - size_t offs; - size_t buffer_size; - - *dst = NULL; - - /* Get the previous point at which mapping was possible and use it */ - offs = (uintptr_t)src_data % s->hprops.minImportedHostPointerAlignment; - import_desc.pHostPointer = src_data - offs; - - props = (VkMemoryHostPointerPropertiesEXT) { - VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, - }; - ret = vk->GetMemoryHostPointerPropertiesEXT(s->hwctx->act_dev, - import_desc.handleType, - import_desc.pHostPointer, - &props); - if (!(ret == VK_SUCCESS && props.memoryTypeBits)) - return AVERROR(EINVAL); - - /* Ref the source buffer */ - ref = av_buffer_ref(src_buf); - if (!ref) - return AVERROR(ENOMEM); - - /* Add the offset at the start, which gets ignored */ - const ptrdiff_t src_offset = src_data - src_buf->data; - buffer_size = offs + (src_buf->size - src_offset); - buffer_size = FFALIGN(buffer_size, s->props.properties.limits.minMemoryMapAlignment); - buffer_size = FFALIGN(buffer_size, s->hprops.minImportedHostPointerAlignment); - - /* Create a buffer struct */ - vkb = av_mallocz(sizeof(*vkb)); - if (!vkb) { - av_buffer_unref(&ref); - return AVERROR(ENOMEM); - } - - err = create_mapped_buffer(s, vkb, usage, - buffer_size, &create_desc, &import_desc, - props); - if (err < 0) { - av_buffer_unref(&ref); - av_free(vkb); - return err; - } - - if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { - VkBufferDeviceAddressInfo address_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, - .buffer = vkb->buf, - }; - vkb->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); - } - - vkb->host_ref = ref; - vkb->virtual_offset = offs; - vkb->address += offs; - vkb->mapped_mem = src_data; - vkb->size = buffer_size - offs; - vkb->flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - - /* Create a ref */ - *dst = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), - destroy_avvkbuf, s, 0); - if (!(*dst)) { - destroy_avvkbuf(s, (uint8_t *)vkb); - *dst = NULL; - return AVERROR(ENOMEM); - } - - return 0; -} - -int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, - VkShaderStageFlagBits stage) -{ - VkPushConstantRange *pc; - - shd->push_consts = av_realloc_array(shd->push_consts, - sizeof(*shd->push_consts), - shd->push_consts_num + 1); - if (!shd->push_consts) - return AVERROR(ENOMEM); - - pc = &shd->push_consts[shd->push_consts_num++]; - memset(pc, 0, sizeof(*pc)); - - pc->stageFlags = stage; - pc->offset = offset; - pc->size = size; - - return 0; -} - -int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, - int unnorm_coords, VkFilter filt) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - VkSamplerCreateInfo sampler_info = { - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .magFilter = filt, - .minFilter = sampler_info.magFilter, - .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST : - VK_SAMPLER_MIPMAP_MODE_LINEAR, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeV = sampler_info.addressModeU, - .addressModeW = sampler_info.addressModeU, - .anisotropyEnable = VK_FALSE, - .compareOp = VK_COMPARE_OP_NEVER, - .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, - .unnormalizedCoordinates = unnorm_coords, - }; - - ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, - s->hwctx->alloc, sampler); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -VkImageAspectFlags ff_vk_aspect_flag(AVFrame *f, int p) -{ - AVVkFrame *vkf = (AVVkFrame *)f->data[0]; - AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; - int nb_images = ff_vk_count_images(vkf); - int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); - - static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_PLANE_0_BIT, - VK_IMAGE_ASPECT_PLANE_1_BIT, - VK_IMAGE_ASPECT_PLANE_2_BIT, }; - - if (ff_vk_mt_is_np_rgb(hwfc->sw_format) || (nb_planes == nb_images)) - return VK_IMAGE_ASPECT_COLOR_BIT; - - return plane_aspect[p]; -} - -int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) -{ - if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA || - pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 || - pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 || - pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 || - pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 || - pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0 || - pix_fmt == AV_PIX_FMT_GBRP10 || pix_fmt == AV_PIX_FMT_GBRP12 || - pix_fmt == AV_PIX_FMT_GBRP14 || pix_fmt == AV_PIX_FMT_GBRP16 || - pix_fmt == AV_PIX_FMT_GBRAP || pix_fmt == AV_PIX_FMT_GBRAP10 || - pix_fmt == AV_PIX_FMT_GBRAP12 || pix_fmt == AV_PIX_FMT_GBRAP14 || - pix_fmt == AV_PIX_FMT_GBRAP16 || pix_fmt == AV_PIX_FMT_GBRAP32 || - pix_fmt == AV_PIX_FMT_GBRPF32 || pix_fmt == AV_PIX_FMT_GBRAPF32 || - pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 || - pix_fmt == AV_PIX_FMT_RGBAF32 || pix_fmt == AV_PIX_FMT_RGBF32 || - pix_fmt == AV_PIX_FMT_RGBA128 || pix_fmt == AV_PIX_FMT_RGB96 || - pix_fmt == AV_PIX_FMT_GBRP || pix_fmt == AV_PIX_FMT_BAYER_RGGB16) - return 1; - return 0; -} - -void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4], int inv) -{ - switch (pix_fmt) { - case AV_PIX_FMT_GBRP: - case AV_PIX_FMT_GBRAP: - case AV_PIX_FMT_GBRAP10: - case AV_PIX_FMT_GBRAP12: - case AV_PIX_FMT_GBRAP14: - case AV_PIX_FMT_GBRAP16: - case AV_PIX_FMT_GBRP10: - case AV_PIX_FMT_GBRP12: - case AV_PIX_FMT_GBRP14: - case AV_PIX_FMT_GBRP16: - case AV_PIX_FMT_GBRPF32: - case AV_PIX_FMT_GBRAP32: - case AV_PIX_FMT_GBRAPF32: - lut[0] = 1; - lut[1] = 2; - lut[2] = 0; - lut[3] = 3; - break; - default: - lut[0] = 0; - lut[1] = 1; - lut[2] = 2; - lut[3] = 3; - break; - } - - if (inv) { - int lut_tmp[4] = { lut[0], lut[1], lut[2], lut[3] }; - for (int i = 0; i < 4; i++) - lut[lut_tmp[i]] = i; - } - - return; -} - -const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, - enum FFVkShaderRepFormat rep_fmt) -{ - switch (pix_fmt) { - case AV_PIX_FMT_RGBA: - case AV_PIX_FMT_BGRA: - case AV_PIX_FMT_RGB24: - case AV_PIX_FMT_BGR24: - case AV_PIX_FMT_BGR0: - case AV_PIX_FMT_RGB0: - case AV_PIX_FMT_RGB565: - case AV_PIX_FMT_BGR565: - case AV_PIX_FMT_UYVA: - case AV_PIX_FMT_YUYV422: - case AV_PIX_FMT_UYVY422: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "rgba8ui", - [FF_VK_REP_FLOAT] = "rgba8", - [FF_VK_REP_INT] = "rgba8i", - [FF_VK_REP_UINT] = "rgba8ui", - }; - return rep_tab[rep_fmt]; - } - case AV_PIX_FMT_X2RGB10: - case AV_PIX_FMT_X2BGR10: - case AV_PIX_FMT_Y210: - case AV_PIX_FMT_XV30: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "rgb10_a2ui", - [FF_VK_REP_FLOAT] = "rgb10_a2", - [FF_VK_REP_INT] = NULL, - [FF_VK_REP_UINT] = "rgb10_a2ui", - }; - return rep_tab[rep_fmt]; - } - case AV_PIX_FMT_RGB48: - case AV_PIX_FMT_RGBA64: - case AV_PIX_FMT_Y212: - case AV_PIX_FMT_Y216: - case AV_PIX_FMT_XV36: - case AV_PIX_FMT_XV48: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "rgba16ui", - [FF_VK_REP_FLOAT] = "rgba16", - [FF_VK_REP_INT] = "rgba16i", - [FF_VK_REP_UINT] = "rgba16ui", - }; - return rep_tab[rep_fmt]; - } - case AV_PIX_FMT_RGBF32: - case AV_PIX_FMT_RGBAF32: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "rgba32f", - [FF_VK_REP_FLOAT] = "rgba32f", - [FF_VK_REP_INT] = "rgba32i", - [FF_VK_REP_UINT] = "rgba32ui", - }; - return rep_tab[rep_fmt]; - } - case AV_PIX_FMT_RGB96: - case AV_PIX_FMT_RGBA128: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "rgba32ui", - [FF_VK_REP_FLOAT] = NULL, - [FF_VK_REP_INT] = "rgba32i", - [FF_VK_REP_UINT] = "rgba32ui", - }; - return rep_tab[rep_fmt]; - } - case AV_PIX_FMT_GBRP: - case AV_PIX_FMT_GRAY8: - case AV_PIX_FMT_GBRAP: - case AV_PIX_FMT_YUV420P: - case AV_PIX_FMT_YUV422P: - case AV_PIX_FMT_YUV444P: - case AV_PIX_FMT_YUVA420P: - case AV_PIX_FMT_YUVA422P: - case AV_PIX_FMT_YUVA444P: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "r8ui", - [FF_VK_REP_FLOAT] = "r8", - [FF_VK_REP_INT] = "r8i", - [FF_VK_REP_UINT] = "r8ui", - }; - return rep_tab[rep_fmt]; - }; - case AV_PIX_FMT_GRAY10: - case AV_PIX_FMT_GRAY12: - case AV_PIX_FMT_GRAY14: - case AV_PIX_FMT_GRAY16: - case AV_PIX_FMT_GBRAP10: - case AV_PIX_FMT_GBRAP12: - case AV_PIX_FMT_GBRAP14: - case AV_PIX_FMT_GBRAP16: - case AV_PIX_FMT_GBRP10: - case AV_PIX_FMT_GBRP12: - case AV_PIX_FMT_GBRP14: - case AV_PIX_FMT_GBRP16: - case AV_PIX_FMT_YUV420P10: - case AV_PIX_FMT_YUV420P12: - case AV_PIX_FMT_YUV420P16: - case AV_PIX_FMT_YUV422P10: - case AV_PIX_FMT_YUV422P12: - case AV_PIX_FMT_YUV422P16: - case AV_PIX_FMT_YUV444P10: - case AV_PIX_FMT_YUV444P12: - case AV_PIX_FMT_YUV444P16: - case AV_PIX_FMT_YUVA420P10: - case AV_PIX_FMT_YUVA420P16: - case AV_PIX_FMT_YUVA422P10: - case AV_PIX_FMT_YUVA422P12: - case AV_PIX_FMT_YUVA422P16: - case AV_PIX_FMT_YUVA444P10: - case AV_PIX_FMT_YUVA444P12: - case AV_PIX_FMT_YUVA444P16: - case AV_PIX_FMT_BAYER_RGGB16: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "r16ui", - [FF_VK_REP_FLOAT] = "r16f", - [FF_VK_REP_INT] = "r16i", - [FF_VK_REP_UINT] = "r16ui", - }; - return rep_tab[rep_fmt]; - }; - case AV_PIX_FMT_GRAY32: - case AV_PIX_FMT_GRAYF32: - case AV_PIX_FMT_GBRPF32: - case AV_PIX_FMT_GBRAPF32: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "r32f", - [FF_VK_REP_FLOAT] = "r32f", - [FF_VK_REP_INT] = "r32i", - [FF_VK_REP_UINT] = "r32ui", - }; - return rep_tab[rep_fmt]; - }; - case AV_PIX_FMT_GBRAP32: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "r32ui", - [FF_VK_REP_FLOAT] = NULL, - [FF_VK_REP_INT] = "r32i", - [FF_VK_REP_UINT] = "r32ui", - }; - return rep_tab[rep_fmt]; - }; - case AV_PIX_FMT_NV12: - case AV_PIX_FMT_NV16: - case AV_PIX_FMT_NV24: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "rg8ui", - [FF_VK_REP_FLOAT] = "rg8", - [FF_VK_REP_INT] = "rg8i", - [FF_VK_REP_UINT] = "rg8ui", - }; - return rep_tab[rep_fmt]; - }; - case AV_PIX_FMT_P010: - case AV_PIX_FMT_P210: - case AV_PIX_FMT_P410: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "rgb10_a2ui", - [FF_VK_REP_FLOAT] = "rgb10_a2", - [FF_VK_REP_INT] = NULL, - [FF_VK_REP_UINT] = "rgb10_a2ui", - }; - return rep_tab[rep_fmt]; - }; - case AV_PIX_FMT_P012: - case AV_PIX_FMT_P016: - case AV_PIX_FMT_P212: - case AV_PIX_FMT_P216: - case AV_PIX_FMT_P412: - case AV_PIX_FMT_P416: { - const char *rep_tab[] = { - [FF_VK_REP_NATIVE] = "rg16ui", - [FF_VK_REP_FLOAT] = "rg16", - [FF_VK_REP_INT] = "rg16i", - [FF_VK_REP_UINT] = "rg16ui", - }; - return rep_tab[rep_fmt]; - }; - default: - return "rgba32f"; - } -} - -typedef struct ImageViewCtx { - int nb_views; - VkImageView views[]; -} ImageViewCtx; - -static void destroy_imageviews(void *opaque, uint8_t *data) -{ - FFVulkanContext *s = opaque; - FFVulkanFunctions *vk = &s->vkfn; - ImageViewCtx *iv = (ImageViewCtx *)data; - - for (int i = 0; i < iv->nb_views; i++) - vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); - - av_free(iv); -} - -static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt) -{ -#define REPS_FMT(fmt) \ - [FF_VK_REP_NATIVE] = fmt ## _UINT, \ - [FF_VK_REP_FLOAT] = fmt ## _UNORM, \ - [FF_VK_REP_INT] = fmt ## _SINT, \ - [FF_VK_REP_UINT] = fmt ## _UINT, - -#define REPS_FMT_PACK(fmt, num) \ - [FF_VK_REP_NATIVE] = fmt ## _UINT_PACK ## num, \ - [FF_VK_REP_FLOAT] = fmt ## _UNORM_PACK ## num, \ - [FF_VK_REP_INT] = fmt ## _SINT_PACK ## num, \ - [FF_VK_REP_UINT] = fmt ## _UINT_PACK ## num, - - const VkFormat fmts_map[][4] = { - { REPS_FMT_PACK(VK_FORMAT_A2B10G10R10, 32) }, - { REPS_FMT_PACK(VK_FORMAT_A2R10G10B10, 32) }, - { - VK_FORMAT_B5G6R5_UNORM_PACK16, - VK_FORMAT_B5G6R5_UNORM_PACK16, - VK_FORMAT_UNDEFINED, - VK_FORMAT_UNDEFINED, - }, - { - VK_FORMAT_R5G6B5_UNORM_PACK16, - VK_FORMAT_R5G6B5_UNORM_PACK16, - VK_FORMAT_UNDEFINED, - VK_FORMAT_UNDEFINED, - }, - { REPS_FMT(VK_FORMAT_B8G8R8) }, - { REPS_FMT(VK_FORMAT_B8G8R8A8) }, - { REPS_FMT(VK_FORMAT_R8) }, - { REPS_FMT(VK_FORMAT_R8G8) }, - { REPS_FMT(VK_FORMAT_R8G8B8) }, - { REPS_FMT(VK_FORMAT_R8G8B8A8) }, - { REPS_FMT(VK_FORMAT_R16) }, - { REPS_FMT(VK_FORMAT_R16G16) }, - { REPS_FMT(VK_FORMAT_R16G16B16) }, - { REPS_FMT(VK_FORMAT_R16G16B16A16) }, - { - VK_FORMAT_R32_UINT, - VK_FORMAT_R32_SFLOAT, - VK_FORMAT_R32_SINT, - VK_FORMAT_R32_UINT, - }, - { - VK_FORMAT_R32G32B32_SFLOAT, - VK_FORMAT_R32G32B32_SFLOAT, - VK_FORMAT_UNDEFINED, - VK_FORMAT_UNDEFINED, - }, - { - VK_FORMAT_R32G32B32A32_SFLOAT, - VK_FORMAT_R32G32B32A32_SFLOAT, - VK_FORMAT_UNDEFINED, - VK_FORMAT_UNDEFINED, - }, - { - VK_FORMAT_R32G32B32_UINT, - VK_FORMAT_UNDEFINED, - VK_FORMAT_R32G32B32_SINT, - VK_FORMAT_R32G32B32_UINT, - }, - { - VK_FORMAT_R32G32B32A32_UINT, - VK_FORMAT_UNDEFINED, - VK_FORMAT_R32G32B32A32_SINT, - VK_FORMAT_R32G32B32A32_UINT, - }, - }; -#undef REPS_FMT_PACK -#undef REPS_FMT - - if (fmt == VK_FORMAT_UNDEFINED) - return VK_FORMAT_UNDEFINED; - - for (int i = 0; i < FF_ARRAY_ELEMS(fmts_map); i++) { - if (fmts_map[i][FF_VK_REP_NATIVE] == fmt || - fmts_map[i][FF_VK_REP_FLOAT] == fmt || - fmts_map[i][FF_VK_REP_INT] == fmt || - fmts_map[i][FF_VK_REP_UINT] == fmt) - return fmts_map[i][rep_fmt]; - } - - return VK_FORMAT_UNDEFINED; -} - -int ff_vk_create_imageview(FFVulkanContext *s, - VkImageView *img_view, VkImageAspectFlags *aspect, - AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; - AVVulkanFramesContext *vkfc = hwfc->hwctx; - const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); - AVVkFrame *vkf = (AVVkFrame *)f->data[0]; - const int nb_images = ff_vk_count_images(vkf); - - VkImageViewUsageCreateInfo view_usage_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, - .usage = vkfc->usage & - (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR | - VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)), - }; - VkImageViewCreateInfo view_create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = &view_usage_info, - .image = vkf->img[FFMIN(plane, nb_images - 1)], - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = map_fmt_to_rep(rep_fmts[plane], rep_fmt), - .components = ff_comp_identity_map, - .subresourceRange = { - .aspectMask = ff_vk_aspect_flag(f, plane), - .levelCount = 1, - .layerCount = 1, - }, - }; - if (view_create_info.format == VK_FORMAT_UNDEFINED) { - av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation " - "of format %i and mode %i\n", - rep_fmts[plane], rep_fmt); - return AVERROR(EINVAL); - } - - ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, - s->hwctx->alloc, img_view); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - *aspect = view_create_info.subresourceRange.aspectMask; - - return 0; -} - -int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, - VkImageView views[AV_NUM_DATA_POINTERS], - AVFrame *f, enum FFVkShaderRepFormat rep_fmt) -{ - int err; - VkResult ret; - AVBufferRef *buf; - FFVulkanFunctions *vk = &s->vkfn; - AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; - AVVulkanFramesContext *vkfc = hwfc->hwctx; - const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); - AVVkFrame *vkf = (AVVkFrame *)f->data[0]; - const int nb_images = ff_vk_count_images(vkf); - const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); - - ImageViewCtx *iv; - const size_t buf_size = sizeof(*iv) + nb_planes*sizeof(VkImageView); - iv = av_mallocz(buf_size); - if (!iv) - return AVERROR(ENOMEM); - - for (int i = 0; i < nb_planes; i++) { - VkImageViewUsageCreateInfo view_usage_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, - .usage = vkfc->usage & - (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR | - VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)), - }; - VkImageViewCreateInfo view_create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = &view_usage_info, - .image = vkf->img[FFMIN(i, nb_images - 1)], - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = map_fmt_to_rep(rep_fmts[i], rep_fmt), - .components = ff_comp_identity_map, - .subresourceRange = { - .aspectMask = ff_vk_aspect_flag(f, i), - .levelCount = 1, - .layerCount = 1, - }, - }; - if (view_create_info.format == VK_FORMAT_UNDEFINED) { - av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation " - "of format %i and mode %i\n", - rep_fmts[i], rep_fmt); - err = AVERROR(EINVAL); - goto fail; - } - - ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, - s->hwctx->alloc, &iv->views[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } - - iv->nb_views++; - } - - buf = av_buffer_create((uint8_t *)iv, buf_size, destroy_imageviews, s, 0); - if (!buf) { - err = AVERROR(ENOMEM); - goto fail; - } - - /* Add to queue dependencies */ - err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); - if (err < 0) - av_buffer_unref(&buf); - - memcpy(views, iv->views, nb_planes*sizeof(*views)); - - return err; - -fail: - for (int i = 0; i < iv->nb_views; i++) - vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); - av_free(iv); - return err; -} - -void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, - AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, - VkPipelineStageFlags src_stage, - VkPipelineStageFlags dst_stage, - VkAccessFlagBits new_access, - VkImageLayout new_layout, - uint32_t new_qf) -{ - int found = -1; - AVVkFrame *vkf = (AVVkFrame *)pic->data[0]; - const int nb_images = ff_vk_count_images(vkf); - for (int i = 0; i < e->nb_frame_deps; i++) - if (e->frame_deps[i]->data[0] == pic->data[0]) { - if (e->frame_update[i]) - found = i; - break; - } - - for (int i = 0; i < nb_images; i++) { - bar[*nb_bar] = (VkImageMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, - .pNext = NULL, - .srcStageMask = src_stage, - .dstStageMask = dst_stage, - .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i], - .dstAccessMask = new_access, - .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0], - .newLayout = new_layout, - .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0], - .dstQueueFamilyIndex = new_qf, - .image = vkf->img[i], - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .layerCount = 1, - .levelCount = 1, - }, - }; - *nb_bar += 1; - } - - ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); -} - -int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, - VkPipelineStageFlags stage, - const char *extensions[], int nb_extensions, - int lg_x, int lg_y, int lg_z, - uint32_t required_subgroup_size) -{ - av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); - - shd->name = name; - shd->stage = stage; - shd->lg_size[0] = lg_x; - shd->lg_size[1] = lg_y; - shd->lg_size[2] = lg_z; - - switch (shd->stage) { - case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: - case VK_SHADER_STAGE_CALLABLE_BIT_KHR: - case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: - case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: - case VK_SHADER_STAGE_MISS_BIT_KHR: - case VK_SHADER_STAGE_RAYGEN_BIT_KHR: - shd->bind_point = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; - break; - case VK_SHADER_STAGE_COMPUTE_BIT: - shd->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; - break; - default: - shd->bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; - break; - }; - - if (required_subgroup_size) { - shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO; - shd->subgroup_info.requiredSubgroupSize = required_subgroup_size; - } - - av_bprintf(&shd->src, "/* %s shader: %s */\n", - (stage == VK_SHADER_STAGE_TASK_BIT_EXT || - stage == VK_SHADER_STAGE_MESH_BIT_EXT) ? - "Mesh" : - (shd->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) ? - "Raytrace" : - (shd->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) ? - "Compute" : "Graphics", - name); - GLSLF(0, #version %i ,460); - GLSLC(0, ); - - /* Common utilities */ - GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); - GLSLC(0, ); - GLSLC(0, #extension GL_EXT_scalar_block_layout : require ); - GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : require ); - GLSLC(0, #extension GL_EXT_control_flow_attributes : require ); - GLSLC(0, #extension GL_EXT_shader_image_load_formatted : require ); - if (s->extensions & FF_VK_EXT_EXPECT_ASSUME) { - GLSLC(0, #extension GL_EXT_expect_assume : require ); - } else { - GLSLC(0, #define assumeEXT(x) (x) ); - GLSLC(0, #define expectEXT(x, c) (x) ); - } - if ((s->extensions & FF_VK_EXT_DEBUG_UTILS) && - (s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR)) { - GLSLC(0, #extension GL_EXT_debug_printf : require ); - GLSLC(0, #define DEBUG ); - } - - if (stage == VK_SHADER_STAGE_TASK_BIT_EXT || - stage == VK_SHADER_STAGE_MESH_BIT_EXT) - GLSLC(0, #extension GL_EXT_mesh_shader : require ); - - for (int i = 0; i < nb_extensions; i++) - GLSLF(0, #extension %s : %s ,extensions[i], "require"); - GLSLC(0, ); - - GLSLF(0, layout (local_size_x = %i, local_size_y = %i, local_size_z = %i) in; - , shd->lg_size[0], shd->lg_size[1], shd->lg_size[2]); - GLSLC(0, ); - - return 0; -} - -void ff_vk_shader_print(void *ctx, FFVulkanShader *shd, int prio) -{ - int line = 0; - const char *p = shd->src.str; - const char *start = p; - const size_t len = strlen(p); - - AVBPrint buf; - av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); - - for (int i = 0; i < len; i++) { - if (p[i] == '\n') { - av_bprintf(&buf, "%i\t", ++line); - av_bprint_append_data(&buf, start, &p[i] - start + 1); - start = &p[i + 1]; - } - } - - av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str); - av_bprint_finalize(&buf, NULL); -} - -static int init_pipeline_layout(FFVulkanContext *s, FFVulkanShader *shd) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - VkPipelineLayoutCreateInfo pipeline_layout_info; - - /* Finally create the pipeline layout */ - pipeline_layout_info = (VkPipelineLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pSetLayouts = shd->desc_layout, - .setLayoutCount = shd->nb_descriptor_sets, - .pushConstantRangeCount = shd->push_consts_num, - .pPushConstantRanges = shd->push_consts, - }; - - ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info, - s->hwctx->alloc, &shd->pipeline_layout); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -static int create_shader_module(FFVulkanContext *s, FFVulkanShader *shd, - VkShaderModule *mod, - uint8_t *spirv, size_t spirv_len) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - VkShaderModuleCreateInfo shader_module_info = { - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = NULL, - .flags = 0x0, - .pCode = (void *)spirv, - .codeSize = spirv_len, - }; - - ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_module_info, - s->hwctx->alloc, mod); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Error creating shader module: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -static int init_compute_pipeline(FFVulkanContext *s, FFVulkanShader *shd, - VkShaderModule mod, const char *entrypoint) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - VkComputePipelineCreateInfo pipeline_create_info = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? - VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0, - .layout = shd->pipeline_layout, - .stage = (VkPipelineShaderStageCreateInfo) { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = shd->subgroup_info.requiredSubgroupSize ? - &shd->subgroup_info : NULL, - .pName = entrypoint, - .flags = shd->subgroup_info.requiredSubgroupSize ? - VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT : 0x0, - .stage = shd->stage, - .module = mod, - }, - }; - - ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, - &pipeline_create_info, - s->hwctx->alloc, &shd->pipeline); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -static int create_shader_object(FFVulkanContext *s, FFVulkanShader *shd, - uint8_t *spirv, size_t spirv_len, - const char *entrypoint) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - size_t shader_size = 0; - - VkShaderCreateInfoEXT shader_obj_create = { - .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, - .flags = shd->subgroup_info.requiredSubgroupSize ? - VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT : 0x0, - .stage = shd->stage, - .nextStage = 0, - .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT, - .pCode = spirv, - .codeSize = spirv_len, - .pName = entrypoint, - .pSetLayouts = shd->desc_layout, - .setLayoutCount = shd->nb_descriptor_sets, - .pushConstantRangeCount = shd->push_consts_num, - .pPushConstantRanges = shd->push_consts, - .pSpecializationInfo = NULL, - }; - - ret = vk->CreateShadersEXT(s->hwctx->act_dev, 1, &shader_obj_create, - s->hwctx->alloc, &shd->object); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to create shader object: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (vk->GetShaderBinaryDataEXT(s->hwctx->act_dev, shd->object, - &shader_size, NULL) == VK_SUCCESS) - av_log(s, AV_LOG_VERBOSE, "Shader %s size: %zu binary (%zu SPIR-V)\n", - shd->name, shader_size, spirv_len); - - return 0; -} - -static int init_descriptors(FFVulkanContext *s, FFVulkanShader *shd) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - shd->desc_layout = av_malloc_array(shd->nb_descriptor_sets, - sizeof(*shd->desc_layout)); - if (!shd->desc_layout) - return AVERROR(ENOMEM); - - if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { - int has_singular = 0; - int max_descriptors = 0; - for (int i = 0; i < shd->nb_descriptor_sets; i++) { - max_descriptors = FFMAX(max_descriptors, shd->desc_set[i].nb_bindings); - if (shd->desc_set[i].singular) - has_singular = 1; - } - shd->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) && - (max_descriptors <= s->push_desc_props.maxPushDescriptors) && - (shd->nb_descriptor_sets == 1) && - (has_singular == 0); - } - - for (int i = 0; i < shd->nb_descriptor_sets; i++) { - FFVulkanDescriptorSet *set = &shd->desc_set[i]; - VkDescriptorSetLayoutCreateInfo desc_layout_create = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = set->nb_bindings, - .pBindings = set->binding, - .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? - VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : - (shd->use_push) ? - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : - 0x0, - }; - - ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, - &desc_layout_create, - s->hwctx->alloc, - &shd->desc_layout[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, shd->desc_layout[i], - &set->layout_size); - - set->aligned_size = FFALIGN(set->layout_size, - s->desc_buf_props.descriptorBufferOffsetAlignment); - - for (int j = 0; j < set->nb_bindings; j++) - vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, - shd->desc_layout[i], - j, - &set->binding_offset[j]); - } - } - - return 0; -} - -int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, - uint8_t *spirv, size_t spirv_len, - const char *entrypoint) -{ - int err; - FFVulkanFunctions *vk = &s->vkfn; - - err = init_descriptors(s, shd); - if (err < 0) - return err; - - err = init_pipeline_layout(s, shd); - if (err < 0) - return err; - - if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - shd->bound_buffer_indices = av_calloc(shd->nb_descriptor_sets, - sizeof(*shd->bound_buffer_indices)); - if (!shd->bound_buffer_indices) - return AVERROR(ENOMEM); - - for (int i = 0; i < shd->nb_descriptor_sets; i++) - shd->bound_buffer_indices[i] = i; - } - - if (s->extensions & FF_VK_EXT_SHADER_OBJECT) { - err = create_shader_object(s, shd, spirv, spirv_len, entrypoint); - if (err < 0) - return err; - } else { - VkShaderModule mod; - err = create_shader_module(s, shd, &mod, spirv, spirv_len); - if (err < 0) - return err; - - switch (shd->bind_point) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - err = init_compute_pipeline(s, shd, mod, entrypoint); - break; - default: - av_log(s, AV_LOG_ERROR, "Unsupported shader type: %i\n", - shd->bind_point); - err = AVERROR(EINVAL); - break; - }; - - vk->DestroyShaderModule(s->hwctx->act_dev, mod, s->hwctx->alloc); - if (err < 0) - return err; - } - - return 0; -} - -static const struct descriptor_props { - size_t struct_size; /* Size of the opaque which updates the descriptor */ - const char *type; - int is_uniform; - int mem_quali; /* Can use a memory qualifier */ - int dim_needed; /* Must indicate dimension */ - int buf_content; /* Must indicate buffer contents */ -} descriptor_props[] = { - [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, }, - [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, }, - [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, }, - [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, }, - [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, }, - [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, - [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, - [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, - [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, - [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, }, - [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, -}; - -int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, - FFVulkanDescriptorSetBinding *desc, int nb, - int singular, int print_to_shader_only) -{ - int has_sampler = 0; - FFVulkanDescriptorSet *set; - - if (print_to_shader_only) - goto print; - - /* Actual layout allocated for the pipeline */ - set = av_realloc_array(shd->desc_set, - sizeof(*shd->desc_set), - shd->nb_descriptor_sets + 1); - if (!set) - return AVERROR(ENOMEM); - shd->desc_set = set; - - set = &set[shd->nb_descriptor_sets]; - memset(set, 0, sizeof(*set)); - - set->binding = av_calloc(nb, sizeof(*set->binding)); - if (!set->binding) - return AVERROR(ENOMEM); - - set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset)); - if (!set->binding_offset) { - av_freep(&set->binding); - return AVERROR(ENOMEM); - } - - for (int i = 0; i < nb; i++) { - set->binding[i].binding = i; - set->binding[i].descriptorType = desc[i].type; - set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1); - set->binding[i].stageFlags = desc[i].stages; - set->binding[i].pImmutableSamplers = desc[i].samplers; - - if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER || - desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - has_sampler |= 1; - } - - set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; - if (has_sampler) - set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - - if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { - for (int i = 0; i < nb; i++) { - int j; - VkDescriptorPoolSize *desc_pool_size; - for (j = 0; j < shd->nb_desc_pool_size; j++) - if (shd->desc_pool_size[j].type == desc[i].type) - break; - if (j >= shd->nb_desc_pool_size) { - desc_pool_size = av_realloc_array(shd->desc_pool_size, - sizeof(*desc_pool_size), - shd->nb_desc_pool_size + 1); - if (!desc_pool_size) - return AVERROR(ENOMEM); - - shd->desc_pool_size = desc_pool_size; - shd->nb_desc_pool_size++; - memset(&desc_pool_size[j], 0, sizeof(VkDescriptorPoolSize)); - } - shd->desc_pool_size[j].type = desc[i].type; - shd->desc_pool_size[j].descriptorCount += FFMAX(desc[i].elems, 1); - } - } - - set->singular = singular; - set->nb_bindings = nb; - shd->nb_descriptor_sets++; - -print: - /* Write shader info */ - for (int i = 0; i < nb; i++) { - const struct descriptor_props *prop = &descriptor_props[desc[i].type]; - GLSLA("layout (set = %i, binding = %i", FFMAX(shd->nb_descriptor_sets - 1, 0), i); - - if (desc[i].mem_layout && - (desc[i].type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)) - GLSLA(", %s", desc[i].mem_layout); - - GLSLA(")"); - - if (prop->is_uniform) - GLSLA(" uniform"); - - if (prop->mem_quali && desc[i].mem_quali) - GLSLA(" %s", desc[i].mem_quali); - - if (prop->type) { - GLSLA(" "); - if (desc[i].type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { - if (desc[i].mem_layout) { - int len = strlen(desc[i].mem_layout); - if (desc[i].mem_layout[len - 1] == 'i' && - desc[i].mem_layout[len - 2] == 'u') { - GLSLA("u"); - } else if (desc[i].mem_layout[len - 1] == 'i') { - GLSLA("i"); - } - } - } - GLSLA("%s", prop->type); - } - - if (prop->dim_needed) - GLSLA("%iD", desc[i].dimensions); - - GLSLA(" %s", desc[i].name); - - if (prop->buf_content) { - GLSLA(" {\n "); - if (desc[i].buf_elems) { - GLSLA("%s", desc[i].buf_content); - GLSLA("[%i];", desc[i].buf_elems); - } else { - GLSLA("%s", desc[i].buf_content); - } - GLSLA("\n}"); - } else if (desc[i].elems > 0) { - GLSLA("[%i]", desc[i].elems); - } - - GLSLA(";"); - GLSLA("\n"); - } - GLSLA("\n"); - - return 0; -} - -int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, - FFVulkanShader *shd) -{ - int err; - FFVulkanShaderData *sd; - - if (!shd->nb_descriptor_sets) - return 0; - - sd = av_realloc_array(pool->reg_shd, - sizeof(*pool->reg_shd), - pool->nb_reg_shd + 1); - if (!sd) - return AVERROR(ENOMEM); - - pool->reg_shd = sd; - sd = &sd[pool->nb_reg_shd++]; - memset(sd, 0, sizeof(*sd)); - - sd->shd = shd; - sd->nb_descriptor_sets = shd->nb_descriptor_sets; - - if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - sd->desc_bind = av_malloc_array(sd->nb_descriptor_sets, sizeof(*sd->desc_bind)); - if (!sd->desc_bind) - return AVERROR(ENOMEM); - - sd->desc_set_buf = av_calloc(sd->nb_descriptor_sets, sizeof(*sd->desc_set_buf)); - if (!sd->desc_set_buf) - return AVERROR(ENOMEM); - - for (int i = 0; i < sd->nb_descriptor_sets; i++) { - FFVulkanDescriptorSet *set = &shd->desc_set[i]; - FFVulkanDescriptorSetData *sdb = &sd->desc_set_buf[i]; - int nb = set->singular ? 1 : pool->pool_size; - - err = ff_vk_create_buf(s, &sdb->buf, - set->aligned_size*nb, - NULL, NULL, set->usage, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - if (err < 0) - return err; - - err = ff_vk_map_buffer(s, &sdb->buf, &sdb->desc_mem, 0); - if (err < 0) - return err; - - sd->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, - .usage = set->usage, - .address = sdb->buf.address, - }; - } - } else if (!shd->use_push) { - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - VkDescriptorSetLayout *tmp_layouts; - VkDescriptorSetAllocateInfo set_alloc_info; - VkDescriptorPoolCreateInfo pool_create_info; - - for (int i = 0; i < shd->nb_desc_pool_size; i++) - shd->desc_pool_size[i].descriptorCount *= pool->pool_size; - - pool_create_info = (VkDescriptorPoolCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .flags = 0, - .pPoolSizes = shd->desc_pool_size, - .poolSizeCount = shd->nb_desc_pool_size, - .maxSets = sd->nb_descriptor_sets*pool->pool_size, - }; - - ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, - s->hwctx->alloc, &sd->desc_pool); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to create descriptor pool: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - tmp_layouts = av_malloc_array(pool_create_info.maxSets, sizeof(*tmp_layouts)); - if (!tmp_layouts) - return AVERROR(ENOMEM); - - /* Colate each execution context's descriptor set layouts */ - for (int i = 0; i < pool->pool_size; i++) - for (int j = 0; j < sd->nb_descriptor_sets; j++) - tmp_layouts[i*sd->nb_descriptor_sets + j] = shd->desc_layout[j]; - - set_alloc_info = (VkDescriptorSetAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = sd->desc_pool, - .pSetLayouts = tmp_layouts, - .descriptorSetCount = pool_create_info.maxSets, - }; - - sd->desc_sets = av_malloc_array(pool_create_info.maxSets, - sizeof(*tmp_layouts)); - if (!sd->desc_sets) { - av_free(tmp_layouts); - return AVERROR(ENOMEM); - } - ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &set_alloc_info, - sd->desc_sets); - av_free(tmp_layouts); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", - ff_vk_ret2str(ret)); - av_freep(&sd->desc_sets); - return AVERROR_EXTERNAL; - } - } - - return 0; -} - -static inline FFVulkanShaderData *get_shd_data(FFVkExecContext *e, - FFVulkanShader *shd) -{ - for (int i = 0; i < e->parent->nb_reg_shd; i++) - if (e->parent->reg_shd[i].shd == shd) - return &e->parent->reg_shd[i]; - return NULL; -} - -static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, int set, - int bind_idx, int array_idx, - VkDescriptorGetInfoEXT *desc_get_info, - size_t desc_size) -{ - FFVulkanFunctions *vk = &s->vkfn; - FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; - FFVulkanShaderData *sd = get_shd_data(e, shd); - const size_t exec_offset = desc_set->singular ? 0 : desc_set->aligned_size*e->idx; - - void *desc = sd->desc_set_buf[set].desc_mem + /* Base */ - exec_offset + /* Execution context */ - desc_set->binding_offset[bind_idx] + /* Descriptor binding */ - array_idx*desc_size; /* Array position */ - - vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); -} - -static inline void update_set_pool_write(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, int set, - VkWriteDescriptorSet *write_info) -{ - FFVulkanFunctions *vk = &s->vkfn; - FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; - FFVulkanShaderData *sd = get_shd_data(e, shd); - - if (desc_set->singular) { - for (int i = 0; i < e->parent->pool_size; i++) { - write_info->dstSet = sd->desc_sets[i*sd->nb_descriptor_sets + set]; - vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); - } - } else { - if (shd->use_push) { - vk->CmdPushDescriptorSetKHR(e->buf, - shd->bind_point, - shd->pipeline_layout, - set, 1, - write_info); - } else { - write_info->dstSet = sd->desc_sets[e->idx*sd->nb_descriptor_sets + set]; - vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); - } - } -} - -int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, int set, int bind, int offs, - VkImageView view, VkImageLayout layout, - VkSampler sampler) -{ - FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; - - if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - VkDescriptorGetInfoEXT desc_get_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, - .type = desc_set->binding[bind].descriptorType, - }; - VkDescriptorImageInfo desc_img_info = { - .imageView = view, - .sampler = sampler, - .imageLayout = layout, - }; - size_t desc_size; - - switch (desc_get_info.type) { - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - desc_get_info.data.pSampledImage = &desc_img_info; - desc_size = s->desc_buf_props.sampledImageDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - desc_get_info.data.pStorageImage = &desc_img_info; - desc_size = s->desc_buf_props.storageImageDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - desc_get_info.data.pInputAttachmentImage = &desc_img_info; - desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - desc_get_info.data.pCombinedImageSampler = &desc_img_info; - desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; - break; - default: - av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", - set, bind, desc_get_info.type); - return AVERROR(EINVAL); - break; - }; - - update_set_descriptor(s, e, shd, set, bind, offs, - &desc_get_info, desc_size); - } else { - VkDescriptorImageInfo desc_pool_write_info_img = { - .sampler = sampler, - .imageView = view, - .imageLayout = layout, - }; - VkWriteDescriptorSet desc_pool_write_info = { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = bind, - .descriptorCount = 1, - .dstArrayElement = offs, - .descriptorType = desc_set->binding[bind].descriptorType, - .pImageInfo = &desc_pool_write_info_img, - }; - update_set_pool_write(s, e, shd, set, &desc_pool_write_info); - } - - return 0; -} - -void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, AVFrame *f, - VkImageView *views, int set, int binding, - VkImageLayout layout, VkSampler sampler) -{ - AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; - const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); - - for (int i = 0; i < nb_planes; i++) - ff_vk_shader_update_img(s, e, shd, set, binding, i, - views[i], layout, sampler); -} - -int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, - int set, int bind, int elem, - FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, - VkFormat fmt) -{ - FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; - - if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - VkDescriptorGetInfoEXT desc_get_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, - .type = desc_set->binding[bind].descriptorType, - }; - VkDescriptorAddressInfoEXT desc_buf_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT, - .address = buf->address + offset, - .range = len, - .format = fmt, - }; - size_t desc_size; - - switch (desc_get_info.type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - desc_get_info.data.pUniformBuffer = &desc_buf_info; - desc_size = s->desc_buf_props.uniformBufferDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - desc_get_info.data.pStorageBuffer = &desc_buf_info; - desc_size = s->desc_buf_props.storageBufferDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; - desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; - desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; - break; - default: - av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", - set, bind, desc_get_info.type); - return AVERROR(EINVAL); - break; - }; - - update_set_descriptor(s, e, shd, set, bind, elem, &desc_get_info, desc_size); - } else { - VkDescriptorBufferInfo desc_pool_write_info_buf = { - .buffer = buf->buf, - .offset = buf->virtual_offset + offset, - .range = len, - }; - VkWriteDescriptorSet desc_pool_write_info = { - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstBinding = bind, - .descriptorCount = 1, - .dstArrayElement = elem, - .descriptorType = desc_set->binding[bind].descriptorType, - .pBufferInfo = &desc_pool_write_info_buf, - }; - update_set_pool_write(s, e, shd, set, &desc_pool_write_info); - } - - return 0; -} - -void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, - VkShaderStageFlagBits stage, - int offset, size_t size, void *src) -{ - FFVulkanFunctions *vk = &s->vkfn; - vk->CmdPushConstants(e->buf, shd->pipeline_layout, - stage, offset, size, src); -} - -void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd) -{ - FFVulkanFunctions *vk = &s->vkfn; - VkDeviceSize offsets[1024]; - FFVulkanShaderData *sd = get_shd_data(e, shd); - - if (s->extensions & FF_VK_EXT_SHADER_OBJECT) { - VkShaderStageFlagBits stages = shd->stage; - vk->CmdBindShadersEXT(e->buf, 1, &stages, &shd->object); - } else { - vk->CmdBindPipeline(e->buf, shd->bind_point, shd->pipeline); - } - - if (sd && sd->nb_descriptor_sets) { - if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - for (int i = 0; i < sd->nb_descriptor_sets; i++) - offsets[i] = shd->desc_set[i].singular ? 0 : shd->desc_set[i].aligned_size*e->idx; - - /* Bind descriptor buffers */ - vk->CmdBindDescriptorBuffersEXT(e->buf, sd->nb_descriptor_sets, sd->desc_bind); - /* Binding offsets */ - vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, shd->bind_point, shd->pipeline_layout, - 0, sd->nb_descriptor_sets, - shd->bound_buffer_indices, offsets); - } else if (!shd->use_push) { - vk->CmdBindDescriptorSets(e->buf, shd->bind_point, shd->pipeline_layout, - 0, sd->nb_descriptor_sets, - &sd->desc_sets[e->idx*sd->nb_descriptor_sets], - 0, NULL); - } - } -} - -void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd) -{ - FFVulkanFunctions *vk = &s->vkfn; - - av_bprint_finalize(&shd->src, NULL); - -#if 0 - if (shd->shader.module) - vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, - s->hwctx->alloc); -#endif - - if (shd->object) - vk->DestroyShaderEXT(s->hwctx->act_dev, shd->object, s->hwctx->alloc); - if (shd->pipeline) - vk->DestroyPipeline(s->hwctx->act_dev, shd->pipeline, s->hwctx->alloc); - if (shd->pipeline_layout) - vk->DestroyPipelineLayout(s->hwctx->act_dev, shd->pipeline_layout, - s->hwctx->alloc); - - for (int i = 0; i < shd->nb_descriptor_sets; i++) { - FFVulkanDescriptorSet *set = &shd->desc_set[i]; - av_free(set->binding); - av_free(set->binding_offset); - } - - if (shd->desc_layout) { - for (int i = 0; i < shd->nb_descriptor_sets; i++) - if (shd->desc_layout[i]) - vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, shd->desc_layout[i], - s->hwctx->alloc); - } - - av_freep(&shd->desc_pool_size); - av_freep(&shd->desc_layout); - av_freep(&shd->desc_set); - av_freep(&shd->bound_buffer_indices); - av_freep(&shd->push_consts); - shd->push_consts_num = 0; -} - -void ff_vk_uninit(FFVulkanContext *s) -{ - av_freep(&s->query_props); - av_freep(&s->qf_props); - av_freep(&s->video_props); - av_freep(&s->coop_mat_props); - av_freep(&s->host_image_copy_layouts); - - av_buffer_unref(&s->device_ref); - av_buffer_unref(&s->frames_ref); -} - -int ff_vk_init(FFVulkanContext *s, void *log_parent, - AVBufferRef *device_ref, AVBufferRef *frames_ref) -{ - int err; - - static const AVClass vulkan_context_class = { - .class_name = "vk", - .version = LIBAVUTIL_VERSION_INT, - .parent_log_context_offset = offsetof(FFVulkanContext, log_parent), - }; - - memset(s, 0, sizeof(*s)); - s->log_parent = log_parent; - s->class = &vulkan_context_class; - - if (frames_ref) { - s->frames_ref = av_buffer_ref(frames_ref); - if (!s->frames_ref) - return AVERROR(ENOMEM); - - s->frames = (AVHWFramesContext *)s->frames_ref->data; - s->hwfc = s->frames->hwctx; - - device_ref = s->frames->device_ref; - } - - s->device_ref = av_buffer_ref(device_ref); - if (!s->device_ref) { - ff_vk_uninit(s); - return AVERROR(ENOMEM); - } - - s->device = (AVHWDeviceContext *)s->device_ref->data; - s->hwctx = s->device->hwctx; - - s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, - s->hwctx->nb_enabled_dev_extensions); - s->extensions |= ff_vk_extensions_to_mask(s->hwctx->enabled_inst_extensions, - s->hwctx->nb_enabled_inst_extensions); - - err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1); - if (err < 0) { - ff_vk_uninit(s); - return err; - } - - err = ff_vk_load_props(s); - if (err < 0) { - ff_vk_uninit(s); - return err; - } - - return 0; -} -- 2.49.1 From 748d3cd28818b7d8af7fc255b1b22dd9cf50b5a6 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:17:34 +0000 Subject: [PATCH 044/118] Changing vulkan file directory --- libavutil/vulkan/vulkan.c | 3029 +++++++++++++++++++++++++++++++++++++ 1 file changed, 3029 insertions(+) create mode 100644 libavutil/vulkan/vulkan.c diff --git a/libavutil/vulkan/vulkan.c b/libavutil/vulkan/vulkan.c new file mode 100644 index 0000000000..8f716c8b45 --- /dev/null +++ b/libavutil/vulkan/vulkan.c @@ -0,0 +1,3029 @@ +/* + * Copyright (c) Lynne + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/mem.h" + +#include "vulkan.h" +#include "libavutil/vulkan_loader.h" + +const VkComponentMapping ff_comp_identity_map = { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, +}; + +/* Converts return values to strings */ +const char *ff_vk_ret2str(VkResult res) +{ +#define CASE(VAL) case VAL: return #VAL + switch (res) { + CASE(VK_SUCCESS); + CASE(VK_NOT_READY); + CASE(VK_TIMEOUT); + CASE(VK_EVENT_SET); + CASE(VK_EVENT_RESET); + CASE(VK_INCOMPLETE); + CASE(VK_ERROR_OUT_OF_HOST_MEMORY); + CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); + CASE(VK_ERROR_INITIALIZATION_FAILED); + CASE(VK_ERROR_DEVICE_LOST); + CASE(VK_ERROR_MEMORY_MAP_FAILED); + CASE(VK_ERROR_LAYER_NOT_PRESENT); + CASE(VK_ERROR_EXTENSION_NOT_PRESENT); + CASE(VK_ERROR_FEATURE_NOT_PRESENT); + CASE(VK_ERROR_INCOMPATIBLE_DRIVER); + CASE(VK_ERROR_TOO_MANY_OBJECTS); + CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); + CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_UNKNOWN); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_FRAGMENTATION); + CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS); + CASE(VK_PIPELINE_COMPILE_REQUIRED); + CASE(VK_ERROR_SURFACE_LOST_KHR); + CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); + CASE(VK_SUBOPTIMAL_KHR); + CASE(VK_ERROR_OUT_OF_DATE_KHR); + CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); + CASE(VK_ERROR_VALIDATION_FAILED_EXT); + CASE(VK_ERROR_INVALID_SHADER_NV); + CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR); + CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); + CASE(VK_ERROR_NOT_PERMITTED_KHR); + CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); + CASE(VK_THREAD_IDLE_KHR); + CASE(VK_THREAD_DONE_KHR); + CASE(VK_OPERATION_DEFERRED_KHR); + CASE(VK_OPERATION_NOT_DEFERRED_KHR); + default: return "Unknown error"; + } +#undef CASE +} + +/* Malitia pura, Khronos */ +#define FN_MAP_TO(dst_t, dst_name, src_t, src_name) \ + dst_t ff_vk_map_ ##src_name## _to_ ##dst_name(src_t src) \ + { \ + dst_t dst = 0x0; \ + MAP_TO(VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT, \ + VK_IMAGE_USAGE_SAMPLED_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT, \ + VK_IMAGE_USAGE_TRANSFER_SRC_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT, \ + VK_IMAGE_USAGE_TRANSFER_DST_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT, \ + VK_IMAGE_USAGE_STORAGE_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT, \ + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_OUTPUT_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_DECODE_DPB_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_DPB_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR, \ + VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR); \ + MAP_TO(VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT, \ + VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT); \ + return dst; \ + } + +#define MAP_TO(flag1, flag2) if (src & flag2) dst |= flag1; +FN_MAP_TO(VkFormatFeatureFlagBits2, feats, VkImageUsageFlags, usage) +#undef MAP_TO +#define MAP_TO(flag1, flag2) if (src & flag1) dst |= flag2; +FN_MAP_TO(VkImageUsageFlags, usage, VkFormatFeatureFlagBits2, feats) +#undef MAP_TO +#undef FN_MAP_TO + +static void load_enabled_qfs(FFVulkanContext *s) +{ + s->nb_qfs = 0; + for (int i = 0; i < s->hwctx->nb_qf; i++) { + /* Skip duplicates */ + int skip = 0; + for (int j = 0; j < s->nb_qfs; j++) { + if (s->qfs[j] == s->hwctx->qf[i].idx) { + skip = 1; + break; + } + } + if (skip) + continue; + + s->qfs[s->nb_qfs++] = s->hwctx->qf[i].idx; + } +} + +int ff_vk_load_props(FFVulkanContext *s) +{ + FFVulkanFunctions *vk = &s->vkfn; + + s->props = (VkPhysicalDeviceProperties2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + }; + + FF_VK_STRUCT_EXT(s, &s->props, &s->props_11, FF_VK_EXT_NO_FLAG, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES); + FF_VK_STRUCT_EXT(s, &s->props, &s->driver_props, FF_VK_EXT_NO_FLAG, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES); + FF_VK_STRUCT_EXT(s, &s->props, &s->subgroup_props, FF_VK_EXT_NO_FLAG, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES); + + FF_VK_STRUCT_EXT(s, &s->props, &s->push_desc_props, FF_VK_EXT_PUSH_DESCRIPTOR, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR); + FF_VK_STRUCT_EXT(s, &s->props, &s->hprops, FF_VK_EXT_EXTERNAL_HOST_MEMORY, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT); + FF_VK_STRUCT_EXT(s, &s->props, &s->coop_matrix_props, FF_VK_EXT_COOP_MATRIX, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR); + FF_VK_STRUCT_EXT(s, &s->props, &s->desc_buf_props, FF_VK_EXT_DESCRIPTOR_BUFFER, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT); + FF_VK_STRUCT_EXT(s, &s->props, &s->optical_flow_props, FF_VK_EXT_OPTICAL_FLOW, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV); + FF_VK_STRUCT_EXT(s, &s->props, &s->host_image_props, FF_VK_EXT_HOST_IMAGE_COPY, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_PROPERTIES_EXT); + + s->feats = (VkPhysicalDeviceFeatures2) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + }; + + FF_VK_STRUCT_EXT(s, &s->feats, &s->feats_12, FF_VK_EXT_NO_FLAG, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES); + FF_VK_STRUCT_EXT(s, &s->feats, &s->atomic_float_feats, FF_VK_EXT_ATOMIC_FLOAT, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT); + + /* Try allocating 1024 layouts */ + s->host_image_copy_layouts = av_malloc(sizeof(*s->host_image_copy_layouts)*1024); + s->host_image_props.pCopySrcLayouts = s->host_image_copy_layouts; + s->host_image_props.copySrcLayoutCount = 512; + s->host_image_props.pCopyDstLayouts = s->host_image_copy_layouts + 512; + s->host_image_props.copyDstLayoutCount = 512; + + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); + + /* Check if we had enough memory for all layouts */ + if (s->host_image_props.copySrcLayoutCount == 512 || + s->host_image_props.copyDstLayoutCount == 512) { + VkImageLayout *new_array; + size_t new_size; + s->host_image_props.pCopySrcLayouts = + s->host_image_props.pCopyDstLayouts = NULL; + s->host_image_props.copySrcLayoutCount = + s->host_image_props.copyDstLayoutCount = 0; + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); + + new_size = s->host_image_props.copySrcLayoutCount + + s->host_image_props.copyDstLayoutCount; + new_size *= sizeof(*s->host_image_copy_layouts); + new_array = av_realloc(s->host_image_copy_layouts, new_size); + if (!new_array) + return AVERROR(ENOMEM); + + s->host_image_copy_layouts = new_array; + s->host_image_props.pCopySrcLayouts = new_array; + s->host_image_props.pCopyDstLayouts = new_array + s->host_image_props.copySrcLayoutCount; + vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); + } + + vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats); + + load_enabled_qfs(s); + + if (s->qf_props) + return 0; + + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL); + + s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props)); + if (!s->qf_props) + return AVERROR(ENOMEM); + + s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props)); + if (!s->qf_props) { + av_freep(&s->qf_props); + return AVERROR(ENOMEM); + } + + s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props)); + if (!s->video_props) { + av_freep(&s->qf_props); + av_freep(&s->query_props); + return AVERROR(ENOMEM); + } + + for (uint32_t i = 0; i < s->tot_nb_qfs; i++) { + s->qf_props[i] = (VkQueueFamilyProperties2) { + .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, + }; + + FF_VK_STRUCT_EXT(s, &s->qf_props[i], &s->query_props[i], FF_VK_EXT_NO_FLAG, + VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR); + FF_VK_STRUCT_EXT(s, &s->qf_props[i], &s->video_props[i], FF_VK_EXT_VIDEO_QUEUE, + VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR); + } + + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props); + + if (s->extensions & FF_VK_EXT_COOP_MATRIX) { + vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev, + &s->coop_mat_props_nb, NULL); + + if (s->coop_mat_props_nb) { + s->coop_mat_props = av_malloc_array(s->coop_mat_props_nb, + sizeof(VkCooperativeMatrixPropertiesKHR)); + for (int i = 0; i < s->coop_mat_props_nb; i++) { + s->coop_mat_props[i] = (VkCooperativeMatrixPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, + }; + } + + vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev, + &s->coop_mat_props_nb, + s->coop_mat_props); + } + } + + return 0; +} + +AVVulkanDeviceQueueFamily *ff_vk_qf_find(FFVulkanContext *s, + VkQueueFlagBits dev_family, + VkVideoCodecOperationFlagBitsKHR vid_ops) +{ + for (int i = 0; i < s->hwctx->nb_qf; i++) { + if ((s->hwctx->qf[i].flags & dev_family) && + (s->hwctx->qf[i].video_caps & vid_ops) == vid_ops) { + return &s->hwctx->qf[i]; + } + } + return NULL; +} + +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) +{ + FFVulkanFunctions *vk = &s->vkfn; + + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + + if (e->fence) { + if (e->had_submission) + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + } + + ff_vk_exec_discard_deps(s, e); + + av_free(e->frame_deps); + av_free(e->sw_frame_deps); + av_free(e->buf_deps); + av_free(e->queue_family_dst); + av_free(e->layout_dst); + av_free(e->access_dst); + av_free(e->frame_update); + av_free(e->frame_locked); + av_free(e->sem_sig); + av_free(e->sem_sig_val_dst); + av_free(e->sem_wait); + } + + /* Free shader-specific data */ + for (int i = 0; i < pool->nb_reg_shd; i++) { + FFVulkanShaderData *sd = &pool->reg_shd[i]; + + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + for (int j = 0; j < sd->nb_descriptor_sets; j++) { + FFVulkanDescriptorSetData *set_data = &sd->desc_set_buf[j]; + if (set_data->buf.mem) + ff_vk_unmap_buffer(s, &set_data->buf, 0); + ff_vk_free_buf(s, &set_data->buf); + } + } + + if (sd->desc_pool) + vk->DestroyDescriptorPool(s->hwctx->act_dev, sd->desc_pool, + s->hwctx->alloc); + + av_freep(&sd->desc_set_buf); + av_freep(&sd->desc_bind); + av_freep(&sd->desc_sets); + } + + av_freep(&pool->reg_shd); + + for (int i = 0; i < pool->pool_size; i++) { + if (pool->cmd_buf_pools[i]) + vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pools[i], + 1, &pool->cmd_bufs[i]); + + if (pool->cmd_buf_pools[i]) + vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pools[i], s->hwctx->alloc); + } + if (pool->query_pool) + vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc); + + av_free(pool->query_data); + av_free(pool->cmd_buf_pools); + av_free(pool->cmd_bufs); + av_free(pool->contexts); +} + +int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkCommandPoolCreateInfo cqueue_create; + VkCommandBufferAllocateInfo cbuf_create; + + const VkQueryPoolVideoEncodeFeedbackCreateInfoKHR *ef = NULL; + + atomic_init(&pool->idx, 0); + + if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { + ef = ff_vk_find_struct(query_create_pnext, + VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR); + if (!ef) + return AVERROR(EINVAL); + } + + /* Allocate space for command buffer pools */ + pool->cmd_buf_pools = av_malloc(nb_contexts*sizeof(*pool->cmd_buf_pools)); + if (!pool->cmd_buf_pools) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Allocate space for command buffers */ + pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs)); + if (!pool->cmd_bufs) { + err = AVERROR(ENOMEM); + goto fail; + } + + for (int i = 0; i < nb_contexts; i++) { + /* Create command pool */ + cqueue_create = (VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = qf->idx, + }; + + ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &pool->cmd_buf_pools[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate command buffer */ + cbuf_create = (VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandPool = pool->cmd_buf_pools[i], + .commandBufferCount = 1, + }; + ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, + &pool->cmd_bufs[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + } + + /* Query pool */ + if (nb_queries) { + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = query_create_pnext, + .queryType = query_type, + .queryCount = nb_queries*nb_contexts, + }; + ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, + s->hwctx->alloc, &pool->query_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + pool->nb_queries = nb_queries; + pool->query_status_stride = 1 + 1; /* One result, one status by default */ + pool->query_results = nb_queries; + pool->query_statuses = nb_queries; + + /* Video encode queries produce two results per query */ + if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { + int nb_results = av_popcount(ef->encodeFeedbackFlags); + pool->query_status_stride = nb_results + 1; + pool->query_results *= nb_results; + } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + pool->query_status_stride = 1; + pool->query_results = 0; + } + + pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); + + /* Allocate space for the query data */ + pool->query_data = av_calloc(nb_contexts, pool->qd_size); + if (!pool->query_data) { + err = AVERROR(ENOMEM); + goto fail; + } + } + + /* Allocate space for the contexts */ + pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts)); + if (!pool->contexts) { + err = AVERROR(ENOMEM); + goto fail; + } + + pool->pool_size = nb_contexts; + + /* Init contexts */ + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + VkFenceCreateInfo fence_create = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + + /* Fence */ + ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc, + &e->fence); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + e->idx = i; + e->parent = pool; + + /* Query data */ + e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i; + e->query_idx = nb_queries*i; + + /* Command buffer */ + e->buf = pool->cmd_bufs[i]; + + /* Queue index distribution */ + e->qi = i % qf->num; + e->qf = qf->idx; + vk->GetDeviceQueue(s->hwctx->act_dev, qf->idx, e->qi, &e->queue); + } + + return 0; + +fail: + ff_vk_exec_pool_free(s, pool); + return err; +} + +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, VkQueryResultFlagBits flags) +{ + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + VkQueryResultFlags qf = flags & ~(VK_QUERY_RESULT_64_BIT | + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR); + + if (!e->query_data) { + av_log(s, AV_LOG_ERROR, "Requested a query with a NULL query_data pointer!\n"); + return VK_INCOMPLETE; + } + + qf |= pool->query_64bit ? + VK_QUERY_RESULT_64_BIT : 0x0; + qf |= pool->query_statuses ? + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; + + if (data) + *data = e->query_data; + + return vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool, + e->query_idx, + pool->nb_queries, + pool->qd_size, e->query_data, + pool->qd_size, qf); +} + +FFVkExecContext *ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool) +{ + return &pool->contexts[atomic_fetch_add(&pool->idx, 1) % pool->pool_size]; +} + +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + ff_vk_exec_discard_deps(s, e); +} + +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + /* Wait for the fence to be signalled */ + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(s->hwctx->act_dev, 1, &e->fence); + + /* Discard queue dependencies */ + ff_vk_exec_discard_deps(s, e); + + ret = vk->BeginCommandBuffer(e->buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (pool->nb_queries) + vk->CmdResetQueryPool(e->buf, pool->query_pool, + e->query_idx, pool->nb_queries); + + return 0; +} + +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) +{ + for (int j = 0; j < e->nb_buf_deps; j++) + av_buffer_unref(&e->buf_deps[j]); + e->nb_buf_deps = 0; + + for (int j = 0; j < e->nb_sw_frame_deps; j++) + av_frame_free(&e->sw_frame_deps[j]); + e->nb_sw_frame_deps = 0; + + for (int j = 0; j < e->nb_frame_deps; j++) { + AVFrame *f = e->frame_deps[j]; + if (e->frame_locked[j]) { + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + e->frame_update[j] = 0; + } + e->nb_frame_deps = 0; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; + e->sem_sig_val_dst_cnt = 0; +} + +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref) +{ + AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size, + (e->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + e->buf_deps = dst; + + for (int i = 0; i < nb_deps; i++) { + if (!deps[i]) + continue; + + e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i]; + if (!e->buf_deps[e->nb_buf_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + e->nb_buf_deps++; + } + + return 0; +} + +int ff_vk_exec_add_dep_sw_frame(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *f) +{ + AVFrame **dst = av_fast_realloc(e->sw_frame_deps, &e->sw_frame_deps_alloc_size, + (e->nb_sw_frame_deps + 1) * sizeof(*dst)); + if (!dst) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + e->sw_frame_deps = dst; + + e->sw_frame_deps[e->nb_sw_frame_deps] = av_frame_clone(f); + if (!e->sw_frame_deps[e->nb_sw_frame_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + e->nb_sw_frame_deps++; + + return 0; +} + +#define ARR_REALLOC(str, arr, alloc_s, cnt) \ + do { \ + arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \ + if (!arr) { \ + ff_vk_exec_discard_deps(s, e); \ + return AVERROR(ENOMEM); \ + } \ + str->arr = arr; \ + } while (0) + +typedef struct TempSyncCtx { + int nb_sem; + VkSemaphore sem[]; +} TempSyncCtx; + +static void destroy_tmp_semaphores(void *opaque, uint8_t *data) +{ + FFVulkanContext *s = opaque; + FFVulkanFunctions *vk = &s->vkfn; + TempSyncCtx *ts = (TempSyncCtx *)data; + + for (int i = 0; i < ts->nb_sem; i++) + vk->DestroySemaphore(s->hwctx->act_dev, ts->sem[i], s->hwctx->alloc); + + av_free(ts); +} + +int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore sem, uint64_t val, + VkPipelineStageFlagBits2 stage) +{ + VkSemaphoreSubmitInfo *sem_wait; + ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); + + e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = sem, + .value = val, + .stageMask = stage, + }; + + return 0; +} + +int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *sem, int nb, + VkPipelineStageFlagBits2 stage, + int wait) +{ + int err; + size_t buf_size; + AVBufferRef *buf; + TempSyncCtx *ts; + FFVulkanFunctions *vk = &s->vkfn; + + /* Do not transfer ownership if we're signalling a binary semaphore, + * since we're probably exporting it. */ + if (!wait) { + for (int i = 0; i < nb; i++) { + VkSemaphoreSubmitInfo *sem_sig; + ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); + + e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = sem[i], + .stageMask = stage, + }; + } + + return 0; + } + + buf_size = sizeof(*ts) + sizeof(VkSemaphore)*nb; + ts = av_mallocz(buf_size); + if (!ts) { + err = AVERROR(ENOMEM); + goto fail; + } + + memcpy(ts->sem, sem, nb*sizeof(*sem)); + ts->nb_sem = nb; + + buf = av_buffer_create((uint8_t *)ts, buf_size, destroy_tmp_semaphores, s, 0); + if (!buf) { + av_free(ts); + err = AVERROR(ENOMEM); + goto fail; + } + + err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); + if (err < 0) { + av_buffer_unref(&buf); + return err; + } + + for (int i = 0; i < nb; i++) { + err = ff_vk_exec_add_dep_wait_sem(s, e, sem[i], 0, stage); + if (err < 0) + return err; + } + + return 0; + +fail: + for (int i = 0; i < nb; i++) + vk->DestroySemaphore(s->hwctx->act_dev, sem[i], s->hwctx->alloc); + + return err; +} + +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage) +{ + uint8_t *frame_locked; + uint8_t *frame_update; + AVFrame **frame_deps; + AVBufferRef **buf_deps; + VkImageLayout *layout_dst; + uint32_t *queue_family_dst; + VkAccessFlagBits *access_dst; + + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + int nb_images = ff_vk_count_images(vkf); + + /* Don't add duplicates */ + for (int i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + return 1; + + ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps); + + ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); + + /* prepare_frame in hwcontext_vulkan.c uses the regular frame management + * code but has no frame yet, and it doesn't need to actually store a ref + * to the frame. */ + if (f->buf[0]) { + ARR_REALLOC(e, buf_deps, &e->buf_deps_alloc_size, e->nb_buf_deps); + e->buf_deps[e->nb_buf_deps] = av_buffer_ref(f->buf[0]); + if (!e->buf_deps[e->nb_buf_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + e->nb_buf_deps++; + } + + e->frame_deps[e->nb_frame_deps] = f; + + vkfc->lock_frame(hwfc, vkf); + e->frame_locked[e->nb_frame_deps] = 1; + e->frame_update[e->nb_frame_deps] = 0; + e->nb_frame_deps++; + + for (int i = 0; i < nb_images; i++) { + VkSemaphoreSubmitInfo *sem_wait; + VkSemaphoreSubmitInfo *sem_sig; + uint64_t **sem_sig_val_dst; + + ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); + ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i], + .stageMask = wait_stage, + }; + + e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i] + 1, + .stageMask = signal_stage, + }; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i]; + e->sem_sig_val_dst_cnt++; + } + + return 0; +} + +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar) +{ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + av_assert0(i < e->nb_frame_deps); + + /* Don't update duplicates */ + if (nb_img_bar && !e->frame_update[i]) + (*nb_img_bar)++; + + e->queue_family_dst[i] = bar->dstQueueFamilyIndex; + e->access_dst[i] = bar->dstAccessMask; + e->layout_dst[i] = bar->newLayout; + e->frame_update[i] = 1; +} + +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f) +{ + uint64_t **sem_sig_val_dst; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + /* Reject unknown frames */ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + if (i == e->nb_frame_deps) + return AVERROR(EINVAL); + + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + *dst = vkf->sem[0]; + *dst_val = vkf->sem_value[0]; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val; + e->sem_sig_val_dst_cnt++; + + return 0; +} + +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = e->buf, + }; + VkSubmitInfo2 submit_info = (VkSubmitInfo2) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pCommandBufferInfos = &cmd_buf_info, + .commandBufferInfoCount = 1, + .pWaitSemaphoreInfos = e->sem_wait, + .waitSemaphoreInfoCount = e->sem_wait_cnt, + .pSignalSemaphoreInfos = e->sem_sig, + .signalSemaphoreInfoCount = e->sem_sig_cnt, + }; + + ret = vk->EndCommandBuffer(e->buf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + s->hwctx->lock_queue(s->device, e->qf, e->qi); + ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence); + s->hwctx->unlock_queue(s->device, e->qf, e->qi); + + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + for (int i = 0; i < e->sem_sig_val_dst_cnt; i++) + *e->sem_sig_val_dst[i] += 1; + + /* Unlock all frames */ + for (int j = 0; j < e->nb_frame_deps; j++) { + if (e->frame_locked[j]) { + AVFrame *f = e->frame_deps[j]; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + if (e->frame_update[j]) { + int nb_images = ff_vk_count_images(vkf); + for (int i = 0; i < nb_images; i++) { + vkf->layout[i] = e->layout_dst[j]; + vkf->access[i] = e->access_dst[j]; + vkf->queue_family[i] = e->queue_family_dst[j]; + } + } + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + } + + e->had_submission = 1; + + return 0; +} + +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +{ + VkResult ret; + int index = -1; + FFVulkanFunctions *vk = &s->vkfn; + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + }; + + alloc_info.allocationSize = req->size; + + /* The vulkan spec requires memory types to be sorted in the "optimal" + * order, so the first matching type we find will be the best/fastest one */ + for (int i = 0; i < s->mprops.memoryTypeCount; i++) { + /* The memory type must be supported by the requirements (bitfield) */ + if (!(req->memoryTypeBits & (1 << i))) + continue; + + /* The memory type flags must include our properties */ + if ((req_flags != UINT32_MAX) && + ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)) + continue; + + /* Found a suitable memory type */ + index = i; + break; + } + + if (index < 0) { + av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + req_flags); + return AVERROR(EINVAL); + } + + alloc_info.memoryTypeIndex = index; + + ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, + s->hwctx->alloc, mem); + if (ret != VK_SUCCESS) + return AVERROR(ENOMEM); + + if (mem_flags) + *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; + + return 0; +} + +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +{ + int err; + VkResult ret; + int use_ded_mem; + FFVulkanFunctions *vk = &s->vkfn; + + /* Buffer usage flags corresponding to buffer descriptor types */ + const VkBufferUsageFlags desc_usage = + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + + if ((s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && (usage & desc_usage)) + usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = pNext, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .size = flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ? + FFALIGN(size, s->props.properties.limits.minMemoryMapAlignment) : + size, + }; + + VkMemoryAllocateFlagsInfo alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; + VkBufferMemoryRequirementsInfo2 req_desc = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, + }; + VkMemoryDedicatedAllocateInfo ded_alloc = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, + .pNext = alloc_pNext, + }; + VkMemoryDedicatedRequirements ded_req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, + }; + VkMemoryRequirements2 req = { + .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, + .pNext = &ded_req, + }; + + av_log(s, AV_LOG_DEBUG, "Creating a buffer of %"SIZE_SPECIFIER" bytes, " + "usage: 0x%x, flags: 0x%x\n", + size, usage, flags); + + ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &buf->buf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + req_desc.buffer = buf->buf; + + vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req); + + /* In case the implementation prefers/requires dedicated allocation */ + use_ded_mem = ded_req.prefersDedicatedAllocation | + ded_req.requiresDedicatedAllocation; + if (use_ded_mem) { + ded_alloc.buffer = buf->buf; + ded_alloc.pNext = alloc_pNext; + alloc_pNext = &ded_alloc; + } + + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + alloc_flags.pNext = alloc_pNext; + alloc_pNext = &alloc_flags; + } + + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext, + &buf->flags, &buf->mem); + if (err) + return err; + + ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buf->buf, + }; + buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); + } + + buf->size = size; + + return 0; +} + +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], + int nb_buffers, int invalidate) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkMappedMemoryRange inval_list[64]; + int inval_count = 0; + + for (int i = 0; i < nb_buffers; i++) { + void *dst; + ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0, + VK_WHOLE_SIZE, 0, &dst); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + mem[i] = buf[i]->mapped_mem = dst; + } + + if (!invalidate) + return 0; + + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange ival_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i]->mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + inval_list[inval_count++] = ival_buf; + } + + if (inval_count) { + ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, + inval_list); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + return 0; +} + +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, + int flush) +{ + int err = 0; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkMappedMemoryRange flush_list[64]; + int flush_count = 0; + + if (flush) { + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i]->mem, + .size = VK_WHOLE_SIZE, + }; + + av_assert0(!buf[i]->host_ref); + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + flush_list[flush_count++] = flush_buf; + } + } + + if (flush_count) { + ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, + flush_list); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ + } + } + + for (int i = 0; i < nb_buffers; i++) { + vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem); + buf[i]->mapped_mem = NULL; + } + + return err; +} + +void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) +{ + FFVulkanFunctions *vk = &s->vkfn; + + if (!buf || !s->hwctx) + return; + + if (buf->mapped_mem && !buf->host_ref) + ff_vk_unmap_buffer(s, buf, 0); + if (buf->buf != VK_NULL_HANDLE) + vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); + if (buf->mem != VK_NULL_HANDLE) + vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); + if (buf->host_ref) + av_buffer_unref(&buf->host_ref); + + buf->buf = VK_NULL_HANDLE; + buf->mem = VK_NULL_HANDLE; + buf->mapped_mem = NULL; +} + +static void free_data_buf(void *opaque, uint8_t *data) +{ + FFVulkanContext *ctx = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(ctx, buf); + av_free(data); +} + +static AVBufferRef *alloc_data_buf(void *opaque, size_t size) +{ + AVBufferRef *ref; + uint8_t *buf = av_mallocz(size); + if (!buf) + return NULL; + + ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); + if (!ref) + av_free(buf); + return ref; +} + +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props) +{ + int err; + AVBufferRef *ref; + FFVkBuffer *data; + + *buf = NULL; + + if (!(*buf_pool)) { + *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx, + alloc_data_buf, NULL); + if (!(*buf_pool)) + return AVERROR(ENOMEM); + } + + *buf = ref = av_buffer_pool_get(*buf_pool); + if (!ref) + return AVERROR(ENOMEM); + + data = (FFVkBuffer *)ref->data; + data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + data->access = VK_ACCESS_2_NONE; + + if (data->size >= size) + return 0; + + ff_vk_free_buf(ctx, data); + memset(data, 0, sizeof(*data)); + + err = ff_vk_create_buf(ctx, data, size, + create_pNext, NULL, usage, + mem_props); + if (err < 0) { + av_buffer_unref(&ref); + *buf = NULL; + return err; + } + + if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0); + if (err < 0) { + av_buffer_unref(&ref); + *buf = NULL; + return err; + } + } + + return 0; +} + +static int create_mapped_buffer(FFVulkanContext *s, + FFVkBuffer *vkb, VkBufferUsageFlags usage, + size_t size, + VkExternalMemoryBufferCreateInfo *create_desc, + VkImportMemoryHostPointerInfoEXT *import_desc, + VkMemoryHostPointerPropertiesEXT props) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = create_desc, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .size = size, + }; + VkMemoryRequirements req = { + .size = size, + .alignment = s->hprops.minImportedHostPointerAlignment, + .memoryTypeBits = props.memoryTypeBits, + }; + + err = ff_vk_alloc_mem(s, &req, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + import_desc, &vkb->flags, &vkb->mem); + if (err < 0) + return err; + + ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &vkb->buf); + if (ret != VK_SUCCESS) { + vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc); + return AVERROR_EXTERNAL; + } + + ret = vk->BindBufferMemory(s->hwctx->act_dev, vkb->buf, vkb->mem, 0); + if (ret != VK_SUCCESS) { + vk->FreeMemory(s->hwctx->act_dev, vkb->mem, s->hwctx->alloc); + vk->DestroyBuffer(s->hwctx->act_dev, vkb->buf, s->hwctx->alloc); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static void destroy_avvkbuf(void *opaque, uint8_t *data) +{ + FFVulkanContext *s = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(s, buf); + av_free(buf); +} + +int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst, + uint8_t *src_data, const AVBufferRef *src_buf, + VkBufferUsageFlags usage) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkExternalMemoryBufferCreateInfo create_desc = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + }; + VkMemoryAllocateFlagsInfo alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; + VkImportMemoryHostPointerInfoEXT import_desc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + .pNext = usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ? &alloc_flags : NULL, + }; + VkMemoryHostPointerPropertiesEXT props; + + AVBufferRef *ref; + FFVkBuffer *vkb; + size_t offs; + size_t buffer_size; + + *dst = NULL; + + /* Get the previous point at which mapping was possible and use it */ + offs = (uintptr_t)src_data % s->hprops.minImportedHostPointerAlignment; + import_desc.pHostPointer = src_data - offs; + + props = (VkMemoryHostPointerPropertiesEXT) { + VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT, + }; + ret = vk->GetMemoryHostPointerPropertiesEXT(s->hwctx->act_dev, + import_desc.handleType, + import_desc.pHostPointer, + &props); + if (!(ret == VK_SUCCESS && props.memoryTypeBits)) + return AVERROR(EINVAL); + + /* Ref the source buffer */ + ref = av_buffer_ref(src_buf); + if (!ref) + return AVERROR(ENOMEM); + + /* Add the offset at the start, which gets ignored */ + const ptrdiff_t src_offset = src_data - src_buf->data; + buffer_size = offs + (src_buf->size - src_offset); + buffer_size = FFALIGN(buffer_size, s->props.properties.limits.minMemoryMapAlignment); + buffer_size = FFALIGN(buffer_size, s->hprops.minImportedHostPointerAlignment); + + /* Create a buffer struct */ + vkb = av_mallocz(sizeof(*vkb)); + if (!vkb) { + av_buffer_unref(&ref); + return AVERROR(ENOMEM); + } + + err = create_mapped_buffer(s, vkb, usage, + buffer_size, &create_desc, &import_desc, + props); + if (err < 0) { + av_buffer_unref(&ref); + av_free(vkb); + return err; + } + + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = vkb->buf, + }; + vkb->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); + } + + vkb->host_ref = ref; + vkb->virtual_offset = offs; + vkb->address += offs; + vkb->mapped_mem = src_data; + vkb->size = buffer_size - offs; + vkb->flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + /* Create a ref */ + *dst = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), + destroy_avvkbuf, s, 0); + if (!(*dst)) { + destroy_avvkbuf(s, (uint8_t *)vkb); + *dst = NULL; + return AVERROR(ENOMEM); + } + + return 0; +} + +int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, + VkShaderStageFlagBits stage) +{ + VkPushConstantRange *pc; + + shd->push_consts = av_realloc_array(shd->push_consts, + sizeof(*shd->push_consts), + shd->push_consts_num + 1); + if (!shd->push_consts) + return AVERROR(ENOMEM); + + pc = &shd->push_consts[shd->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); + + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; + + return 0; +} + +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkSamplerCreateInfo sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = filt, + .minFilter = sampler_info.magFilter, + .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST : + VK_SAMPLER_MIPMAP_MODE_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = sampler_info.addressModeU, + .addressModeW = sampler_info.addressModeU, + .anisotropyEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, + .unnormalizedCoordinates = unnorm_coords, + }; + + ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, + s->hwctx->alloc, sampler); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +VkImageAspectFlags ff_vk_aspect_flag(AVFrame *f, int p) +{ + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + int nb_images = ff_vk_count_images(vkf); + int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); + + static const VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + + if (ff_vk_mt_is_np_rgb(hwfc->sw_format) || (nb_planes == nb_images)) + return VK_IMAGE_ASPECT_COLOR_BIT; + + return plane_aspect[p]; +} + +int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) +{ + if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA || + pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 || + pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 || + pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 || + pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 || + pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0 || + pix_fmt == AV_PIX_FMT_GBRP10 || pix_fmt == AV_PIX_FMT_GBRP12 || + pix_fmt == AV_PIX_FMT_GBRP14 || pix_fmt == AV_PIX_FMT_GBRP16 || + pix_fmt == AV_PIX_FMT_GBRAP || pix_fmt == AV_PIX_FMT_GBRAP10 || + pix_fmt == AV_PIX_FMT_GBRAP12 || pix_fmt == AV_PIX_FMT_GBRAP14 || + pix_fmt == AV_PIX_FMT_GBRAP16 || pix_fmt == AV_PIX_FMT_GBRAP32 || + pix_fmt == AV_PIX_FMT_GBRPF32 || pix_fmt == AV_PIX_FMT_GBRAPF32 || + pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 || + pix_fmt == AV_PIX_FMT_RGBAF32 || pix_fmt == AV_PIX_FMT_RGBF32 || + pix_fmt == AV_PIX_FMT_RGBA128 || pix_fmt == AV_PIX_FMT_RGB96 || + pix_fmt == AV_PIX_FMT_GBRP || pix_fmt == AV_PIX_FMT_BAYER_RGGB16) + return 1; + return 0; +} + +void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4], int inv) +{ + switch (pix_fmt) { + case AV_PIX_FMT_GBRP: + case AV_PIX_FMT_GBRAP: + case AV_PIX_FMT_GBRAP10: + case AV_PIX_FMT_GBRAP12: + case AV_PIX_FMT_GBRAP14: + case AV_PIX_FMT_GBRAP16: + case AV_PIX_FMT_GBRP10: + case AV_PIX_FMT_GBRP12: + case AV_PIX_FMT_GBRP14: + case AV_PIX_FMT_GBRP16: + case AV_PIX_FMT_GBRPF32: + case AV_PIX_FMT_GBRAP32: + case AV_PIX_FMT_GBRAPF32: + lut[0] = 1; + lut[1] = 2; + lut[2] = 0; + lut[3] = 3; + break; + default: + lut[0] = 0; + lut[1] = 1; + lut[2] = 2; + lut[3] = 3; + break; + } + + if (inv) { + int lut_tmp[4] = { lut[0], lut[1], lut[2], lut[3] }; + for (int i = 0; i < 4; i++) + lut[lut_tmp[i]] = i; + } + + return; +} + +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, + enum FFVkShaderRepFormat rep_fmt) +{ + switch (pix_fmt) { + case AV_PIX_FMT_RGBA: + case AV_PIX_FMT_BGRA: + case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_BGR24: + case AV_PIX_FMT_BGR0: + case AV_PIX_FMT_RGB0: + case AV_PIX_FMT_RGB565: + case AV_PIX_FMT_BGR565: + case AV_PIX_FMT_UYVA: + case AV_PIX_FMT_YUYV422: + case AV_PIX_FMT_UYVY422: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "rgba8ui", + [FF_VK_REP_FLOAT] = "rgba8", + [FF_VK_REP_INT] = "rgba8i", + [FF_VK_REP_UINT] = "rgba8ui", + }; + return rep_tab[rep_fmt]; + } + case AV_PIX_FMT_X2RGB10: + case AV_PIX_FMT_X2BGR10: + case AV_PIX_FMT_Y210: + case AV_PIX_FMT_XV30: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "rgb10_a2ui", + [FF_VK_REP_FLOAT] = "rgb10_a2", + [FF_VK_REP_INT] = NULL, + [FF_VK_REP_UINT] = "rgb10_a2ui", + }; + return rep_tab[rep_fmt]; + } + case AV_PIX_FMT_RGB48: + case AV_PIX_FMT_RGBA64: + case AV_PIX_FMT_Y212: + case AV_PIX_FMT_Y216: + case AV_PIX_FMT_XV36: + case AV_PIX_FMT_XV48: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "rgba16ui", + [FF_VK_REP_FLOAT] = "rgba16", + [FF_VK_REP_INT] = "rgba16i", + [FF_VK_REP_UINT] = "rgba16ui", + }; + return rep_tab[rep_fmt]; + } + case AV_PIX_FMT_RGBF32: + case AV_PIX_FMT_RGBAF32: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "rgba32f", + [FF_VK_REP_FLOAT] = "rgba32f", + [FF_VK_REP_INT] = "rgba32i", + [FF_VK_REP_UINT] = "rgba32ui", + }; + return rep_tab[rep_fmt]; + } + case AV_PIX_FMT_RGB96: + case AV_PIX_FMT_RGBA128: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "rgba32ui", + [FF_VK_REP_FLOAT] = NULL, + [FF_VK_REP_INT] = "rgba32i", + [FF_VK_REP_UINT] = "rgba32ui", + }; + return rep_tab[rep_fmt]; + } + case AV_PIX_FMT_GBRP: + case AV_PIX_FMT_GRAY8: + case AV_PIX_FMT_GBRAP: + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUVA420P: + case AV_PIX_FMT_YUVA422P: + case AV_PIX_FMT_YUVA444P: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "r8ui", + [FF_VK_REP_FLOAT] = "r8", + [FF_VK_REP_INT] = "r8i", + [FF_VK_REP_UINT] = "r8ui", + }; + return rep_tab[rep_fmt]; + }; + case AV_PIX_FMT_GRAY10: + case AV_PIX_FMT_GRAY12: + case AV_PIX_FMT_GRAY14: + case AV_PIX_FMT_GRAY16: + case AV_PIX_FMT_GBRAP10: + case AV_PIX_FMT_GBRAP12: + case AV_PIX_FMT_GBRAP14: + case AV_PIX_FMT_GBRAP16: + case AV_PIX_FMT_GBRP10: + case AV_PIX_FMT_GBRP12: + case AV_PIX_FMT_GBRP14: + case AV_PIX_FMT_GBRP16: + case AV_PIX_FMT_YUV420P10: + case AV_PIX_FMT_YUV420P12: + case AV_PIX_FMT_YUV420P16: + case AV_PIX_FMT_YUV422P10: + case AV_PIX_FMT_YUV422P12: + case AV_PIX_FMT_YUV422P16: + case AV_PIX_FMT_YUV444P10: + case AV_PIX_FMT_YUV444P12: + case AV_PIX_FMT_YUV444P16: + case AV_PIX_FMT_YUVA420P10: + case AV_PIX_FMT_YUVA420P16: + case AV_PIX_FMT_YUVA422P10: + case AV_PIX_FMT_YUVA422P12: + case AV_PIX_FMT_YUVA422P16: + case AV_PIX_FMT_YUVA444P10: + case AV_PIX_FMT_YUVA444P12: + case AV_PIX_FMT_YUVA444P16: + case AV_PIX_FMT_BAYER_RGGB16: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "r16ui", + [FF_VK_REP_FLOAT] = "r16f", + [FF_VK_REP_INT] = "r16i", + [FF_VK_REP_UINT] = "r16ui", + }; + return rep_tab[rep_fmt]; + }; + case AV_PIX_FMT_GRAY32: + case AV_PIX_FMT_GRAYF32: + case AV_PIX_FMT_GBRPF32: + case AV_PIX_FMT_GBRAPF32: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "r32f", + [FF_VK_REP_FLOAT] = "r32f", + [FF_VK_REP_INT] = "r32i", + [FF_VK_REP_UINT] = "r32ui", + }; + return rep_tab[rep_fmt]; + }; + case AV_PIX_FMT_GBRAP32: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "r32ui", + [FF_VK_REP_FLOAT] = NULL, + [FF_VK_REP_INT] = "r32i", + [FF_VK_REP_UINT] = "r32ui", + }; + return rep_tab[rep_fmt]; + }; + case AV_PIX_FMT_NV12: + case AV_PIX_FMT_NV16: + case AV_PIX_FMT_NV24: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "rg8ui", + [FF_VK_REP_FLOAT] = "rg8", + [FF_VK_REP_INT] = "rg8i", + [FF_VK_REP_UINT] = "rg8ui", + }; + return rep_tab[rep_fmt]; + }; + case AV_PIX_FMT_P010: + case AV_PIX_FMT_P210: + case AV_PIX_FMT_P410: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "rgb10_a2ui", + [FF_VK_REP_FLOAT] = "rgb10_a2", + [FF_VK_REP_INT] = NULL, + [FF_VK_REP_UINT] = "rgb10_a2ui", + }; + return rep_tab[rep_fmt]; + }; + case AV_PIX_FMT_P012: + case AV_PIX_FMT_P016: + case AV_PIX_FMT_P212: + case AV_PIX_FMT_P216: + case AV_PIX_FMT_P412: + case AV_PIX_FMT_P416: { + const char *rep_tab[] = { + [FF_VK_REP_NATIVE] = "rg16ui", + [FF_VK_REP_FLOAT] = "rg16", + [FF_VK_REP_INT] = "rg16i", + [FF_VK_REP_UINT] = "rg16ui", + }; + return rep_tab[rep_fmt]; + }; + default: + return "rgba32f"; + } +} + +typedef struct ImageViewCtx { + int nb_views; + VkImageView views[]; +} ImageViewCtx; + +static void destroy_imageviews(void *opaque, uint8_t *data) +{ + FFVulkanContext *s = opaque; + FFVulkanFunctions *vk = &s->vkfn; + ImageViewCtx *iv = (ImageViewCtx *)data; + + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + + av_free(iv); +} + +static VkFormat map_fmt_to_rep(VkFormat fmt, enum FFVkShaderRepFormat rep_fmt) +{ +#define REPS_FMT(fmt) \ + [FF_VK_REP_NATIVE] = fmt ## _UINT, \ + [FF_VK_REP_FLOAT] = fmt ## _UNORM, \ + [FF_VK_REP_INT] = fmt ## _SINT, \ + [FF_VK_REP_UINT] = fmt ## _UINT, + +#define REPS_FMT_PACK(fmt, num) \ + [FF_VK_REP_NATIVE] = fmt ## _UINT_PACK ## num, \ + [FF_VK_REP_FLOAT] = fmt ## _UNORM_PACK ## num, \ + [FF_VK_REP_INT] = fmt ## _SINT_PACK ## num, \ + [FF_VK_REP_UINT] = fmt ## _UINT_PACK ## num, + + const VkFormat fmts_map[][4] = { + { REPS_FMT_PACK(VK_FORMAT_A2B10G10R10, 32) }, + { REPS_FMT_PACK(VK_FORMAT_A2R10G10B10, 32) }, + { + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_B5G6R5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_FORMAT_UNDEFINED, + }, + { + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, + VK_FORMAT_UNDEFINED, + }, + { REPS_FMT(VK_FORMAT_B8G8R8) }, + { REPS_FMT(VK_FORMAT_B8G8R8A8) }, + { REPS_FMT(VK_FORMAT_R8) }, + { REPS_FMT(VK_FORMAT_R8G8) }, + { REPS_FMT(VK_FORMAT_R8G8B8) }, + { REPS_FMT(VK_FORMAT_R8G8B8A8) }, + { REPS_FMT(VK_FORMAT_R16) }, + { REPS_FMT(VK_FORMAT_R16G16) }, + { REPS_FMT(VK_FORMAT_R16G16B16) }, + { REPS_FMT(VK_FORMAT_R16G16B16A16) }, + { + VK_FORMAT_R32_UINT, + VK_FORMAT_R32_SFLOAT, + VK_FORMAT_R32_SINT, + VK_FORMAT_R32_UINT, + }, + { + VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_R32G32B32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_FORMAT_UNDEFINED, + }, + { + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_UNDEFINED, + VK_FORMAT_UNDEFINED, + }, + { + VK_FORMAT_R32G32B32_UINT, + VK_FORMAT_UNDEFINED, + VK_FORMAT_R32G32B32_SINT, + VK_FORMAT_R32G32B32_UINT, + }, + { + VK_FORMAT_R32G32B32A32_UINT, + VK_FORMAT_UNDEFINED, + VK_FORMAT_R32G32B32A32_SINT, + VK_FORMAT_R32G32B32A32_UINT, + }, + }; +#undef REPS_FMT_PACK +#undef REPS_FMT + + if (fmt == VK_FORMAT_UNDEFINED) + return VK_FORMAT_UNDEFINED; + + for (int i = 0; i < FF_ARRAY_ELEMS(fmts_map); i++) { + if (fmts_map[i][FF_VK_REP_NATIVE] == fmt || + fmts_map[i][FF_VK_REP_FLOAT] == fmt || + fmts_map[i][FF_VK_REP_INT] == fmt || + fmts_map[i][FF_VK_REP_UINT] == fmt) + return fmts_map[i][rep_fmt]; + } + + return VK_FORMAT_UNDEFINED; +} + +int ff_vk_create_imageview(FFVulkanContext *s, + VkImageView *img_view, VkImageAspectFlags *aspect, + AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + + VkImageViewUsageCreateInfo view_usage_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = vkfc->usage & + (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)), + }; + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &view_usage_info, + .image = vkf->img[FFMIN(plane, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = map_fmt_to_rep(rep_fmts[plane], rep_fmt), + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = ff_vk_aspect_flag(f, plane), + .levelCount = 1, + .layerCount = 1, + }, + }; + if (view_create_info.format == VK_FORMAT_UNDEFINED) { + av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation " + "of format %i and mode %i\n", + rep_fmts[plane], rep_fmt); + return AVERROR(EINVAL); + } + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, img_view); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + *aspect = view_create_info.subresourceRange.aspectMask; + + return 0; +} + +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f, enum FFVkShaderRepFormat rep_fmt) +{ + int err; + VkResult ret; + AVBufferRef *buf; + FFVulkanFunctions *vk = &s->vkfn; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); + + ImageViewCtx *iv; + const size_t buf_size = sizeof(*iv) + nb_planes*sizeof(VkImageView); + iv = av_mallocz(buf_size); + if (!iv) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_planes; i++) { + VkImageViewUsageCreateInfo view_usage_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = vkfc->usage & + (~(VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)), + }; + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &view_usage_info, + .image = vkf->img[FFMIN(i, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = map_fmt_to_rep(rep_fmts[i], rep_fmt), + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = ff_vk_aspect_flag(f, i), + .levelCount = 1, + .layerCount = 1, + }, + }; + if (view_create_info.format == VK_FORMAT_UNDEFINED) { + av_log(s, AV_LOG_ERROR, "Unable to find a compatible representation " + "of format %i and mode %i\n", + rep_fmts[i], rep_fmt); + err = AVERROR(EINVAL); + goto fail; + } + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, &iv->views[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + iv->nb_views++; + } + + buf = av_buffer_create((uint8_t *)iv, buf_size, destroy_imageviews, s, 0); + if (!buf) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Add to queue dependencies */ + err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); + if (err < 0) + av_buffer_unref(&buf); + + memcpy(views, iv->views, nb_planes*sizeof(*views)); + + return err; + +fail: + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); + return err; +} + +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf) +{ + int found = -1; + AVVkFrame *vkf = (AVVkFrame *)pic->data[0]; + const int nb_images = ff_vk_count_images(vkf); + for (int i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == pic->data[0]) { + if (e->frame_update[i]) + found = i; + break; + } + + for (int i = 0; i < nb_images; i++) { + bar[*nb_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = src_stage, + .dstStageMask = dst_stage, + .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i], + .dstAccessMask = new_access, + .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0], + .newLayout = new_layout, + .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0], + .dstQueueFamilyIndex = new_qf, + .image = vkf->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + .levelCount = 1, + }, + }; + *nb_bar += 1; + } + + ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); +} + +int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, + VkPipelineStageFlags stage, + const char *extensions[], int nb_extensions, + int lg_x, int lg_y, int lg_z, + uint32_t required_subgroup_size) +{ + av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); + + shd->name = name; + shd->stage = stage; + shd->lg_size[0] = lg_x; + shd->lg_size[1] = lg_y; + shd->lg_size[2] = lg_z; + + switch (shd->stage) { + case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: + case VK_SHADER_STAGE_CALLABLE_BIT_KHR: + case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: + case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: + case VK_SHADER_STAGE_MISS_BIT_KHR: + case VK_SHADER_STAGE_RAYGEN_BIT_KHR: + shd->bind_point = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; + break; + case VK_SHADER_STAGE_COMPUTE_BIT: + shd->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + break; + default: + shd->bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; + break; + }; + + if (required_subgroup_size) { + shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO; + shd->subgroup_info.requiredSubgroupSize = required_subgroup_size; + } + + av_bprintf(&shd->src, "/* %s shader: %s */\n", + (stage == VK_SHADER_STAGE_TASK_BIT_EXT || + stage == VK_SHADER_STAGE_MESH_BIT_EXT) ? + "Mesh" : + (shd->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) ? + "Raytrace" : + (shd->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) ? + "Compute" : "Graphics", + name); + GLSLF(0, #version %i ,460); + GLSLC(0, ); + + /* Common utilities */ + GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); + GLSLC(0, ); + GLSLC(0, #extension GL_EXT_scalar_block_layout : require ); + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : require ); + GLSLC(0, #extension GL_EXT_control_flow_attributes : require ); + GLSLC(0, #extension GL_EXT_shader_image_load_formatted : require ); + if (s->extensions & FF_VK_EXT_EXPECT_ASSUME) { + GLSLC(0, #extension GL_EXT_expect_assume : require ); + } else { + GLSLC(0, #define assumeEXT(x) (x) ); + GLSLC(0, #define expectEXT(x, c) (x) ); + } + if ((s->extensions & FF_VK_EXT_DEBUG_UTILS) && + (s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR)) { + GLSLC(0, #extension GL_EXT_debug_printf : require ); + GLSLC(0, #define DEBUG ); + } + + if (stage == VK_SHADER_STAGE_TASK_BIT_EXT || + stage == VK_SHADER_STAGE_MESH_BIT_EXT) + GLSLC(0, #extension GL_EXT_mesh_shader : require ); + + for (int i = 0; i < nb_extensions; i++) + GLSLF(0, #extension %s : %s ,extensions[i], "require"); + GLSLC(0, ); + + GLSLF(0, layout (local_size_x = %i, local_size_y = %i, local_size_z = %i) in; + , shd->lg_size[0], shd->lg_size[1], shd->lg_size[2]); + GLSLC(0, ); + + return 0; +} + +void ff_vk_shader_print(void *ctx, FFVulkanShader *shd, int prio) +{ + int line = 0; + const char *p = shd->src.str; + const char *start = p; + const size_t len = strlen(p); + + AVBPrint buf; + av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); + + for (int i = 0; i < len; i++) { + if (p[i] == '\n') { + av_bprintf(&buf, "%i\t", ++line); + av_bprint_append_data(&buf, start, &p[i] - start + 1); + start = &p[i + 1]; + } + } + + av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str); + av_bprint_finalize(&buf, NULL); +} + +static int init_pipeline_layout(FFVulkanContext *s, FFVulkanShader *shd) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkPipelineLayoutCreateInfo pipeline_layout_info; + + /* Finally create the pipeline layout */ + pipeline_layout_info = (VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pSetLayouts = shd->desc_layout, + .setLayoutCount = shd->nb_descriptor_sets, + .pushConstantRangeCount = shd->push_consts_num, + .pPushConstantRanges = shd->push_consts, + }; + + ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info, + s->hwctx->alloc, &shd->pipeline_layout); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int create_shader_module(FFVulkanContext *s, FFVulkanShader *shd, + VkShaderModule *mod, + uint8_t *spirv, size_t spirv_len) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkShaderModuleCreateInfo shader_module_info = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = NULL, + .flags = 0x0, + .pCode = (void *)spirv, + .codeSize = spirv_len, + }; + + ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_module_info, + s->hwctx->alloc, mod); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Error creating shader module: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int init_compute_pipeline(FFVulkanContext *s, FFVulkanShader *shd, + VkShaderModule mod, const char *entrypoint) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkComputePipelineCreateInfo pipeline_create_info = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? + VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0, + .layout = shd->pipeline_layout, + .stage = (VkPipelineShaderStageCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = shd->subgroup_info.requiredSubgroupSize ? + &shd->subgroup_info : NULL, + .pName = entrypoint, + .flags = shd->subgroup_info.requiredSubgroupSize ? + VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT : 0x0, + .stage = shd->stage, + .module = mod, + }, + }; + + ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, + &pipeline_create_info, + s->hwctx->alloc, &shd->pipeline); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static int create_shader_object(FFVulkanContext *s, FFVulkanShader *shd, + uint8_t *spirv, size_t spirv_len, + const char *entrypoint) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + size_t shader_size = 0; + + VkShaderCreateInfoEXT shader_obj_create = { + .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, + .flags = shd->subgroup_info.requiredSubgroupSize ? + VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT : 0x0, + .stage = shd->stage, + .nextStage = 0, + .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT, + .pCode = spirv, + .codeSize = spirv_len, + .pName = entrypoint, + .pSetLayouts = shd->desc_layout, + .setLayoutCount = shd->nb_descriptor_sets, + .pushConstantRangeCount = shd->push_consts_num, + .pPushConstantRanges = shd->push_consts, + .pSpecializationInfo = NULL, + }; + + ret = vk->CreateShadersEXT(s->hwctx->act_dev, 1, &shader_obj_create, + s->hwctx->alloc, &shd->object); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to create shader object: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (vk->GetShaderBinaryDataEXT(s->hwctx->act_dev, shd->object, + &shader_size, NULL) == VK_SUCCESS) + av_log(s, AV_LOG_VERBOSE, "Shader %s size: %"SIZE_SPECIFIER" binary (%"SIZE_SPECIFIER" SPIR-V)\n", + shd->name, shader_size, spirv_len); + + return 0; +} + +static int init_descriptors(FFVulkanContext *s, FFVulkanShader *shd) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + shd->desc_layout = av_malloc_array(shd->nb_descriptor_sets, + sizeof(*shd->desc_layout)); + if (!shd->desc_layout) + return AVERROR(ENOMEM); + + if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { + int has_singular = 0; + int max_descriptors = 0; + for (int i = 0; i < shd->nb_descriptor_sets; i++) { + max_descriptors = FFMAX(max_descriptors, shd->desc_set[i].nb_bindings); + if (shd->desc_set[i].singular) + has_singular = 1; + } + shd->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) && + (max_descriptors <= s->push_desc_props.maxPushDescriptors) && + (shd->nb_descriptor_sets == 1) && + (has_singular == 0); + } + + for (int i = 0; i < shd->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &shd->desc_set[i]; + VkDescriptorSetLayoutCreateInfo desc_layout_create = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = set->nb_bindings, + .pBindings = set->binding, + .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? + VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : + (shd->use_push) ? + VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : + 0x0, + }; + + ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, + &desc_layout_create, + s->hwctx->alloc, + &shd->desc_layout[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, shd->desc_layout[i], + &set->layout_size); + + set->aligned_size = FFALIGN(set->layout_size, + s->desc_buf_props.descriptorBufferOffsetAlignment); + + for (int j = 0; j < set->nb_bindings; j++) + vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, + shd->desc_layout[i], + j, + &set->binding_offset[j]); + } + } + + return 0; +} + +int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, + uint8_t *spirv, size_t spirv_len, + const char *entrypoint) +{ + int err; + FFVulkanFunctions *vk = &s->vkfn; + + err = init_descriptors(s, shd); + if (err < 0) + return err; + + err = init_pipeline_layout(s, shd); + if (err < 0) + return err; + + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + shd->bound_buffer_indices = av_calloc(shd->nb_descriptor_sets, + sizeof(*shd->bound_buffer_indices)); + if (!shd->bound_buffer_indices) + return AVERROR(ENOMEM); + + for (int i = 0; i < shd->nb_descriptor_sets; i++) + shd->bound_buffer_indices[i] = i; + } + + if (s->extensions & FF_VK_EXT_SHADER_OBJECT) { + err = create_shader_object(s, shd, spirv, spirv_len, entrypoint); + if (err < 0) + return err; + } else { + VkShaderModule mod; + err = create_shader_module(s, shd, &mod, spirv, spirv_len); + if (err < 0) + return err; + + switch (shd->bind_point) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + err = init_compute_pipeline(s, shd, mod, entrypoint); + break; + default: + av_log(s, AV_LOG_ERROR, "Unsupported shader type: %i\n", + shd->bind_point); + err = AVERROR(EINVAL); + break; + }; + + vk->DestroyShaderModule(s->hwctx->act_dev, mod, s->hwctx->alloc); + if (err < 0) + return err; + } + + return 0; +} + +static const struct descriptor_props { + size_t struct_size; /* Size of the opaque which updates the descriptor */ + const char *type; + int is_uniform; + int mem_quali; /* Can use a memory qualifier */ + int dim_needed; /* Must indicate dimension */ + int buf_content; /* Must indicate buffer contents */ +} descriptor_props[] = { + [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, }, + [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, }, + [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, + [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, + [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, +}; + +int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int singular, int print_to_shader_only) +{ + int has_sampler = 0; + FFVulkanDescriptorSet *set; + + if (print_to_shader_only) + goto print; + + /* Actual layout allocated for the pipeline */ + set = av_realloc_array(shd->desc_set, + sizeof(*shd->desc_set), + shd->nb_descriptor_sets + 1); + if (!set) + return AVERROR(ENOMEM); + shd->desc_set = set; + + set = &set[shd->nb_descriptor_sets]; + memset(set, 0, sizeof(*set)); + + set->binding = av_calloc(nb, sizeof(*set->binding)); + if (!set->binding) + return AVERROR(ENOMEM); + + set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset)); + if (!set->binding_offset) { + av_freep(&set->binding); + return AVERROR(ENOMEM); + } + + for (int i = 0; i < nb; i++) { + set->binding[i].binding = i; + set->binding[i].descriptorType = desc[i].type; + set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + set->binding[i].stageFlags = desc[i].stages; + set->binding[i].pImmutableSamplers = desc[i].samplers; + + if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + has_sampler |= 1; + } + + set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + if (has_sampler) + set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; + + if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { + for (int i = 0; i < nb; i++) { + int j; + VkDescriptorPoolSize *desc_pool_size; + for (j = 0; j < shd->nb_desc_pool_size; j++) + if (shd->desc_pool_size[j].type == desc[i].type) + break; + if (j >= shd->nb_desc_pool_size) { + desc_pool_size = av_realloc_array(shd->desc_pool_size, + sizeof(*desc_pool_size), + shd->nb_desc_pool_size + 1); + if (!desc_pool_size) + return AVERROR(ENOMEM); + + shd->desc_pool_size = desc_pool_size; + shd->nb_desc_pool_size++; + memset(&desc_pool_size[j], 0, sizeof(VkDescriptorPoolSize)); + } + shd->desc_pool_size[j].type = desc[i].type; + shd->desc_pool_size[j].descriptorCount += FFMAX(desc[i].elems, 1); + } + } + + set->singular = singular; + set->nb_bindings = nb; + shd->nb_descriptor_sets++; + +print: + /* Write shader info */ + for (int i = 0; i < nb; i++) { + const struct descriptor_props *prop = &descriptor_props[desc[i].type]; + GLSLA("layout (set = %i, binding = %i", FFMAX(shd->nb_descriptor_sets - 1, 0), i); + + if (desc[i].mem_layout && + (desc[i].type != VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)) + GLSLA(", %s", desc[i].mem_layout); + + GLSLA(")"); + + if (prop->is_uniform) + GLSLA(" uniform"); + + if (prop->mem_quali && desc[i].mem_quali) + GLSLA(" %s", desc[i].mem_quali); + + if (prop->type) { + GLSLA(" "); + if (desc[i].type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + if (desc[i].mem_layout) { + int len = strlen(desc[i].mem_layout); + if (desc[i].mem_layout[len - 1] == 'i' && + desc[i].mem_layout[len - 2] == 'u') { + GLSLA("u"); + } else if (desc[i].mem_layout[len - 1] == 'i') { + GLSLA("i"); + } + } + } + GLSLA("%s", prop->type); + } + + if (prop->dim_needed) + GLSLA("%iD", desc[i].dimensions); + + GLSLA(" %s", desc[i].name); + + if (prop->buf_content) { + GLSLA(" {\n "); + if (desc[i].buf_elems) { + GLSLA("%s", desc[i].buf_content); + GLSLA("[%i];", desc[i].buf_elems); + } else { + GLSLA("%s", desc[i].buf_content); + } + GLSLA("\n}"); + } else if (desc[i].elems > 0) { + GLSLA("[%i]", desc[i].elems); + } + + GLSLA(";"); + GLSLA("\n"); + } + GLSLA("\n"); + + return 0; +} + +int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanShader *shd) +{ + int err; + FFVulkanShaderData *sd; + + if (!shd->nb_descriptor_sets) + return 0; + + sd = av_realloc_array(pool->reg_shd, + sizeof(*pool->reg_shd), + pool->nb_reg_shd + 1); + if (!sd) + return AVERROR(ENOMEM); + + pool->reg_shd = sd; + sd = &sd[pool->nb_reg_shd++]; + memset(sd, 0, sizeof(*sd)); + + sd->shd = shd; + sd->nb_descriptor_sets = shd->nb_descriptor_sets; + + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + sd->desc_bind = av_malloc_array(sd->nb_descriptor_sets, sizeof(*sd->desc_bind)); + if (!sd->desc_bind) + return AVERROR(ENOMEM); + + sd->desc_set_buf = av_calloc(sd->nb_descriptor_sets, sizeof(*sd->desc_set_buf)); + if (!sd->desc_set_buf) + return AVERROR(ENOMEM); + + for (int i = 0; i < sd->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &shd->desc_set[i]; + FFVulkanDescriptorSetData *sdb = &sd->desc_set_buf[i]; + int nb = set->singular ? 1 : pool->pool_size; + + err = ff_vk_create_buf(s, &sdb->buf, + set->aligned_size*nb, + NULL, NULL, set->usage, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + + err = ff_vk_map_buffer(s, &sdb->buf, &sdb->desc_mem, 0); + if (err < 0) + return err; + + sd->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, + .usage = set->usage, + .address = sdb->buf.address, + }; + } + } else if (!shd->use_push) { + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkDescriptorSetLayout *tmp_layouts; + VkDescriptorSetAllocateInfo set_alloc_info; + VkDescriptorPoolCreateInfo pool_create_info; + + for (int i = 0; i < shd->nb_desc_pool_size; i++) + shd->desc_pool_size[i].descriptorCount *= pool->pool_size; + + pool_create_info = (VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .flags = 0, + .pPoolSizes = shd->desc_pool_size, + .poolSizeCount = shd->nb_desc_pool_size, + .maxSets = sd->nb_descriptor_sets*pool->pool_size, + }; + + ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, + s->hwctx->alloc, &sd->desc_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to create descriptor pool: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp_layouts = av_malloc_array(pool_create_info.maxSets, sizeof(*tmp_layouts)); + if (!tmp_layouts) + return AVERROR(ENOMEM); + + /* Colate each execution context's descriptor set layouts */ + for (int i = 0; i < pool->pool_size; i++) + for (int j = 0; j < sd->nb_descriptor_sets; j++) + tmp_layouts[i*sd->nb_descriptor_sets + j] = shd->desc_layout[j]; + + set_alloc_info = (VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = sd->desc_pool, + .pSetLayouts = tmp_layouts, + .descriptorSetCount = pool_create_info.maxSets, + }; + + sd->desc_sets = av_malloc_array(pool_create_info.maxSets, + sizeof(*tmp_layouts)); + if (!sd->desc_sets) { + av_free(tmp_layouts); + return AVERROR(ENOMEM); + } + ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &set_alloc_info, + sd->desc_sets); + av_free(tmp_layouts); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", + ff_vk_ret2str(ret)); + av_freep(&sd->desc_sets); + return AVERROR_EXTERNAL; + } + } + + return 0; +} + +static inline FFVulkanShaderData *get_shd_data(FFVkExecContext *e, + FFVulkanShader *shd) +{ + for (int i = 0; i < e->parent->nb_reg_shd; i++) + if (e->parent->reg_shd[i].shd == shd) + return &e->parent->reg_shd[i]; + return NULL; +} + +static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, int set, + int bind_idx, int array_idx, + VkDescriptorGetInfoEXT *desc_get_info, + size_t desc_size) +{ + FFVulkanFunctions *vk = &s->vkfn; + FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; + FFVulkanShaderData *sd = get_shd_data(e, shd); + const size_t exec_offset = desc_set->singular ? 0 : desc_set->aligned_size*e->idx; + + void *desc = sd->desc_set_buf[set].desc_mem + /* Base */ + exec_offset + /* Execution context */ + desc_set->binding_offset[bind_idx] + /* Descriptor binding */ + array_idx*desc_size; /* Array position */ + + vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); +} + +static inline void update_set_pool_write(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, int set, + VkWriteDescriptorSet *write_info) +{ + FFVulkanFunctions *vk = &s->vkfn; + FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; + FFVulkanShaderData *sd = get_shd_data(e, shd); + + if (desc_set->singular) { + for (int i = 0; i < e->parent->pool_size; i++) { + write_info->dstSet = sd->desc_sets[i*sd->nb_descriptor_sets + set]; + vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); + } + } else { + if (shd->use_push) { + vk->CmdPushDescriptorSetKHR(e->buf, + shd->bind_point, + shd->pipeline_layout, + set, 1, + write_info); + } else { + write_info->dstSet = sd->desc_sets[e->idx*sd->nb_descriptor_sets + set]; + vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); + } + } +} + +int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, + VkSampler sampler) +{ + FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; + + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorImageInfo desc_img_info = { + .imageView = view, + .sampler = sampler, + .imageLayout = layout, + }; + size_t desc_size; + + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + desc_get_info.data.pSampledImage = &desc_img_info; + desc_size = s->desc_buf_props.sampledImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + desc_get_info.data.pStorageImage = &desc_img_info; + desc_size = s->desc_buf_props.storageImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + desc_get_info.data.pInputAttachmentImage = &desc_img_info; + desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + desc_get_info.data.pCombinedImageSampler = &desc_img_info; + desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; + + update_set_descriptor(s, e, shd, set, bind, offs, + &desc_get_info, desc_size); + } else { + VkDescriptorImageInfo desc_pool_write_info_img = { + .sampler = sampler, + .imageView = view, + .imageLayout = layout, + }; + VkWriteDescriptorSet desc_pool_write_info = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = bind, + .descriptorCount = 1, + .dstArrayElement = offs, + .descriptorType = desc_set->binding[bind].descriptorType, + .pImageInfo = &desc_pool_write_info_img, + }; + update_set_pool_write(s, e, shd, set, &desc_pool_write_info); + } + + return 0; +} + +void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler) +{ + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); + + for (int i = 0; i < nb_planes; i++) + ff_vk_shader_update_img(s, e, shd, set, binding, i, + views[i], layout, sampler); +} + +int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, + int set, int bind, int elem, + FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, + VkFormat fmt) +{ + FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; + + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorAddressInfoEXT desc_buf_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT, + .address = buf->address + offset, + .range = len, + .format = fmt, + }; + size_t desc_size; + + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + desc_get_info.data.pUniformBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + desc_get_info.data.pStorageBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; + + update_set_descriptor(s, e, shd, set, bind, elem, &desc_get_info, desc_size); + } else { + VkDescriptorBufferInfo desc_pool_write_info_buf = { + .buffer = buf->buf, + .offset = buf->virtual_offset + offset, + .range = len, + }; + VkWriteDescriptorSet desc_pool_write_info = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = bind, + .descriptorCount = 1, + .dstArrayElement = elem, + .descriptorType = desc_set->binding[bind].descriptorType, + .pBufferInfo = &desc_pool_write_info_buf, + }; + update_set_pool_write(s, e, shd, set, &desc_pool_write_info); + } + + return 0; +} + +void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->CmdPushConstants(e->buf, shd->pipeline_layout, + stage, offset, size, src); +} + +void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd) +{ + FFVulkanFunctions *vk = &s->vkfn; + VkDeviceSize offsets[1024]; + FFVulkanShaderData *sd = get_shd_data(e, shd); + + if (s->extensions & FF_VK_EXT_SHADER_OBJECT) { + VkShaderStageFlagBits stages = shd->stage; + vk->CmdBindShadersEXT(e->buf, 1, &stages, &shd->object); + } else { + vk->CmdBindPipeline(e->buf, shd->bind_point, shd->pipeline); + } + + if (sd && sd->nb_descriptor_sets) { + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + for (int i = 0; i < sd->nb_descriptor_sets; i++) + offsets[i] = shd->desc_set[i].singular ? 0 : shd->desc_set[i].aligned_size*e->idx; + + /* Bind descriptor buffers */ + vk->CmdBindDescriptorBuffersEXT(e->buf, sd->nb_descriptor_sets, sd->desc_bind); + /* Binding offsets */ + vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, shd->bind_point, shd->pipeline_layout, + 0, sd->nb_descriptor_sets, + shd->bound_buffer_indices, offsets); + } else if (!shd->use_push) { + vk->CmdBindDescriptorSets(e->buf, shd->bind_point, shd->pipeline_layout, + 0, sd->nb_descriptor_sets, + &sd->desc_sets[e->idx*sd->nb_descriptor_sets], + 0, NULL); + } + } +} + +void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd) +{ + FFVulkanFunctions *vk = &s->vkfn; + + av_bprint_finalize(&shd->src, NULL); + +#if 0 + if (shd->shader.module) + vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, + s->hwctx->alloc); +#endif + + if (shd->object) + vk->DestroyShaderEXT(s->hwctx->act_dev, shd->object, s->hwctx->alloc); + if (shd->pipeline) + vk->DestroyPipeline(s->hwctx->act_dev, shd->pipeline, s->hwctx->alloc); + if (shd->pipeline_layout) + vk->DestroyPipelineLayout(s->hwctx->act_dev, shd->pipeline_layout, + s->hwctx->alloc); + + for (int i = 0; i < shd->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &shd->desc_set[i]; + av_free(set->binding); + av_free(set->binding_offset); + } + + if (shd->desc_layout) { + for (int i = 0; i < shd->nb_descriptor_sets; i++) + if (shd->desc_layout[i]) + vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, shd->desc_layout[i], + s->hwctx->alloc); + } + + av_freep(&shd->desc_pool_size); + av_freep(&shd->desc_layout); + av_freep(&shd->desc_set); + av_freep(&shd->bound_buffer_indices); + av_freep(&shd->push_consts); + shd->push_consts_num = 0; +} + +void ff_vk_uninit(FFVulkanContext *s) +{ + av_freep(&s->query_props); + av_freep(&s->qf_props); + av_freep(&s->video_props); + av_freep(&s->coop_mat_props); + av_freep(&s->host_image_copy_layouts); + + av_buffer_unref(&s->device_ref); + av_buffer_unref(&s->frames_ref); +} + +int ff_vk_init(FFVulkanContext *s, void *log_parent, + AVBufferRef *device_ref, AVBufferRef *frames_ref) +{ + int err; + + static const AVClass vulkan_context_class = { + .class_name = "vk", + .version = LIBAVUTIL_VERSION_INT, + .parent_log_context_offset = offsetof(FFVulkanContext, log_parent), + }; + + memset(s, 0, sizeof(*s)); + s->log_parent = log_parent; + s->class = &vulkan_context_class; + + if (frames_ref) { + s->frames_ref = av_buffer_ref(frames_ref); + if (!s->frames_ref) + return AVERROR(ENOMEM); + + s->frames = (AVHWFramesContext *)s->frames_ref->data; + s->hwfc = s->frames->hwctx; + + device_ref = s->frames->device_ref; + } + + s->device_ref = av_buffer_ref(device_ref); + if (!s->device_ref) { + ff_vk_uninit(s); + return AVERROR(ENOMEM); + } + + s->device = (AVHWDeviceContext *)s->device_ref->data; + s->hwctx = s->device->hwctx; + + s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, + s->hwctx->nb_enabled_dev_extensions); + s->extensions |= ff_vk_extensions_to_mask(s->hwctx->enabled_inst_extensions, + s->hwctx->nb_enabled_inst_extensions); + + err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1); + if (err < 0) { + ff_vk_uninit(s); + return err; + } + + err = ff_vk_load_props(s); + if (err < 0) { + ff_vk_uninit(s); + return err; + } + + return 0; +} -- 2.49.1 From b1e3cb34245f62dc74c3c26a2fc6d6b330d957ca Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:18:20 +0000 Subject: [PATCH 045/118] Changing vulkan file directory --- libavutil/vulkan.h | 665 --------------------------------------------- 1 file changed, 665 deletions(-) delete mode 100644 libavutil/vulkan.h diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h deleted file mode 100644 index e1c9a5792f..0000000000 --- a/libavutil/vulkan.h +++ /dev/null @@ -1,665 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVUTIL_VULKAN_H -#define AVUTIL_VULKAN_H - -#define VK_NO_PROTOTYPES - -#include <stdatomic.h> - -#include "pixdesc.h" -#include "bprint.h" -#include "hwcontext.h" -#include "vulkan_functions.h" -#include "hwcontext_vulkan.h" - -/* GLSL management macros */ -#define INDENT(N) INDENT_##N -#define INDENT_0 -#define INDENT_1 INDENT_0 " " -#define INDENT_2 INDENT_1 INDENT_1 -#define INDENT_3 INDENT_2 INDENT_1 -#define INDENT_4 INDENT_3 INDENT_1 -#define INDENT_5 INDENT_4 INDENT_1 -#define INDENT_6 INDENT_5 INDENT_1 -#define C(N, S) INDENT(N) #S "\n" - -#define GLSLC(N, S) \ - do { \ - av_bprintf(&shd->src, C(N, S)); \ - } while (0) - -#define GLSLA(...) \ - do { \ - av_bprintf(&shd->src, __VA_ARGS__); \ - } while (0) - -#define GLSLF(N, S, ...) \ - do { \ - av_bprintf(&shd->src, C(N, S), __VA_ARGS__); \ - } while (0) - -#define GLSLD(D) \ - do { \ - av_bprintf(&shd->src, "\n"); \ - av_bprint_append_data(&shd->src, D, strlen(D)); \ - av_bprintf(&shd->src, "\n"); \ - } while (0) - -/* Helper, pretty much every Vulkan return value needs to be checked */ -#define RET(x) \ - do { \ - if ((err = (x)) < 0) \ - goto fail; \ - } while (0) - -#define DUP_SAMPLER(x) { x, x, x, x } - -typedef struct FFVulkanDescriptorSetBinding { - const char *name; - VkDescriptorType type; - const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */ - const char *mem_quali; /* readonly, writeonly, etc. */ - const char *buf_content; /* For buffers */ - uint32_t dimensions; /* Needed for e.g. sampler%iD */ - uint32_t elems; /* 0 - scalar, 1 or more - vector */ - VkShaderStageFlags stages; - uint32_t buf_elems; /* Appends [buf_elems] to the contents. Avoids manually printing to a string. */ - VkSampler samplers[4]; /* Sampler to use for all elems */ -} FFVulkanDescriptorSetBinding; - -typedef struct FFVkBuffer { - VkBuffer buf; - VkDeviceMemory mem; - VkMemoryPropertyFlagBits flags; - size_t size; - VkDeviceAddress address; - - /* Local use only */ - VkPipelineStageFlags2 stage; - VkAccessFlags2 access; - - /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE or - * via ff_vk_host_map_buffer */ - uint8_t *mapped_mem; - - /* Set by ff_vk_host_map_buffer. This is the offset at which the buffer data - * actually begins at. - * The address and mapped_mem fields will be offset by this amount. */ - size_t virtual_offset; - - /* If host mapping, reference to the backing host memory buffer */ - AVBufferRef *host_ref; -} FFVkBuffer; - -typedef struct FFVkExecContext { - uint32_t idx; - const struct FFVkExecPool *parent; - int had_submission; - - /* Queue for the execution context */ - VkQueue queue; - int qf; - int qi; - - /* Command buffer for the context */ - VkCommandBuffer buf; - - /* Fence for the command buffer */ - VkFence fence; - - /* Opaque data, untouched, free to use by users */ - void *opaque; - - void *query_data; - int query_idx; - - /* Buffer dependencies */ - AVBufferRef **buf_deps; - int nb_buf_deps; - unsigned int buf_deps_alloc_size; - - /* Frame dependencies */ - AVFrame **frame_deps; - unsigned int frame_deps_alloc_size; - int nb_frame_deps; - - /* Software frame dependencies */ - AVFrame **sw_frame_deps; - unsigned int sw_frame_deps_alloc_size; - int nb_sw_frame_deps; - - VkSemaphoreSubmitInfo *sem_wait; - unsigned int sem_wait_alloc; - int sem_wait_cnt; - - VkSemaphoreSubmitInfo *sem_sig; - unsigned int sem_sig_alloc; - int sem_sig_cnt; - - uint64_t **sem_sig_val_dst; - unsigned int sem_sig_val_dst_alloc; - int sem_sig_val_dst_cnt; - - uint8_t *frame_locked; - unsigned int frame_locked_alloc_size; - - VkAccessFlagBits *access_dst; - unsigned int access_dst_alloc; - - VkImageLayout *layout_dst; - unsigned int layout_dst_alloc; - - uint32_t *queue_family_dst; - unsigned int queue_family_dst_alloc; - - uint8_t *frame_update; - unsigned int frame_update_alloc_size; -} FFVkExecContext; - -typedef struct FFVulkanDescriptorSet { - /* Descriptor buffer */ - VkDeviceSize layout_size; - VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ - VkBufferUsageFlags usage; - - VkDescriptorSetLayoutBinding *binding; - VkDeviceSize *binding_offset; - int nb_bindings; - - /* Descriptor set is shared between all submissions */ - int singular; -} FFVulkanDescriptorSet; - -typedef struct FFVulkanShader { - /* Name for id/debugging purposes */ - const char *name; - - /* Shader text */ - AVBPrint src; - - /* Compute shader local group sizes */ - int lg_size[3]; - - /* Shader bind point/type */ - VkPipelineStageFlags stage; - VkPipelineBindPoint bind_point; - - /* Creation info */ - VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroup_info; - - /* Base shader object */ - VkShaderEXT object; - VkPipeline pipeline; - - /* Pipeline layout */ - VkPipelineLayout pipeline_layout; - - /* Push consts */ - VkPushConstantRange *push_consts; - int push_consts_num; - - /* Descriptor sets */ - FFVulkanDescriptorSet *desc_set; - int nb_descriptor_sets; - - /* Descriptor buffer */ - VkDescriptorSetLayout *desc_layout; - uint32_t *bound_buffer_indices; - - /* Descriptor pool */ - int use_push; - VkDescriptorPoolSize *desc_pool_size; - int nb_desc_pool_size; -} FFVulkanShader; - -typedef struct FFVulkanDescriptorSetData { - /* Descriptor buffer */ - FFVkBuffer buf; - uint8_t *desc_mem; -} FFVulkanDescriptorSetData; - -typedef struct FFVulkanShaderData { - /* Shader to which this data belongs to */ - FFVulkanShader *shd; - int nb_descriptor_sets; - - /* Descriptor buffer */ - FFVulkanDescriptorSetData *desc_set_buf; - VkDescriptorBufferBindingInfoEXT *desc_bind; - - /* Descriptor pools */ - VkDescriptorSet *desc_sets; - VkDescriptorPool desc_pool; -} FFVulkanShaderData; - -typedef struct FFVkExecPool { - FFVkExecContext *contexts; - atomic_uint_least64_t idx; - - VkCommandPool *cmd_buf_pools; - VkCommandBuffer *cmd_bufs; - int pool_size; - - VkQueryPool query_pool; - void *query_data; - int query_results; - int query_statuses; - int query_64bit; - int query_status_stride; - int nb_queries; - size_t qd_size; - - /* Registered shaders' data */ - FFVulkanShaderData *reg_shd; - int nb_reg_shd; -} FFVkExecPool; - -typedef struct FFVulkanContext { - const AVClass *class; - void *log_parent; - - FFVulkanFunctions vkfn; - FFVulkanExtensions extensions; - VkPhysicalDeviceProperties2 props; - VkPhysicalDeviceVulkan11Properties props_11; - VkPhysicalDeviceDriverProperties driver_props; - VkPhysicalDeviceMemoryProperties mprops; - VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; - VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; - VkPhysicalDeviceSubgroupSizeControlProperties subgroup_props; - VkPhysicalDeviceCooperativeMatrixPropertiesKHR coop_matrix_props; - VkPhysicalDevicePushDescriptorPropertiesKHR push_desc_props; - VkPhysicalDeviceOpticalFlowPropertiesNV optical_flow_props; - VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; - VkQueueFamilyVideoPropertiesKHR *video_props; - VkQueueFamilyProperties2 *qf_props; - int tot_nb_qfs; - VkPhysicalDeviceHostImageCopyPropertiesEXT host_image_props; - VkImageLayout *host_image_copy_layouts; - - VkCooperativeMatrixPropertiesKHR *coop_mat_props; - uint32_t coop_mat_props_nb; - - VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats; - VkPhysicalDeviceVulkan12Features feats_12; - VkPhysicalDeviceFeatures2 feats; - - AVBufferRef *device_ref; - AVHWDeviceContext *device; - AVVulkanDeviceContext *hwctx; - - AVBufferRef *input_frames_ref; - AVBufferRef *frames_ref; - AVHWFramesContext *frames; - AVVulkanFramesContext *hwfc; - - uint32_t qfs[64]; - int nb_qfs; - - /* Properties */ - int output_width; - int output_height; - enum AVPixelFormat output_format; - enum AVPixelFormat input_format; -} FFVulkanContext; - -static inline int ff_vk_count_images(AVVkFrame *f) -{ - int cnt = 0; - while (cnt < FF_ARRAY_ELEMS(f->img) && f->img[cnt]) - cnt++; - - return cnt; -} - -static inline const void *ff_vk_find_struct(const void *chain, VkStructureType stype) -{ - const VkBaseInStructure *in = chain; - while (in) { - if (in->sType == stype) - return in; - - in = in->pNext; - } - - return NULL; -} - -static inline void ff_vk_link_struct(void *chain, const void *in) -{ - VkBaseOutStructure *out = chain; - while (out->pNext) - out = out->pNext; - - out->pNext = (void *)in; -} - -#define FF_VK_STRUCT_EXT(CTX, BASE, STRUCT_P, EXT_FLAG, TYPE) \ - do { \ - if ((EXT_FLAG == FF_VK_EXT_NO_FLAG) || \ - ((CTX)->extensions & EXT_FLAG)) { \ - (STRUCT_P)->sType = TYPE; \ - ff_vk_link_struct(BASE, STRUCT_P); \ - } \ - } while (0) - -/* Identity mapping - r = r, b = b, g = g, a = a */ -extern const VkComponentMapping ff_comp_identity_map; - -/** - * Initializes the AVClass, in case this context is not used - * as the main user's context. - * May use either a frames context reference, or a device context reference. - */ -int ff_vk_init(FFVulkanContext *s, void *log_parent, - AVBufferRef *device_ref, AVBufferRef *frames_ref); - -/** - * Converts Vulkan return values to strings - */ -const char *ff_vk_ret2str(VkResult res); - -/** - * Map between usage and features. - */ -VkImageUsageFlags ff_vk_map_feats_to_usage(VkFormatFeatureFlagBits2 feats); -VkFormatFeatureFlagBits2 ff_vk_map_usage_to_feats(VkImageUsageFlags usage); - -/** - * Returns 1 if pixfmt is a usable RGB format. - */ -int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); - -/** - * Since storage images may not be swizzled, we have to do this in the - * shader itself. This fills in a lookup table to do it. - */ -void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4], int inv); - -/** - * Get the aspect flag for a plane from an image. - */ -VkImageAspectFlags ff_vk_aspect_flag(AVFrame *f, int p); - -/** - * Returns the format to use for images in shaders. - */ -enum FFVkShaderRepFormat { - /* Native format with no conversion. May require casting. */ - FF_VK_REP_NATIVE = 0, - /* Float conversion of the native format. */ - FF_VK_REP_FLOAT, - /* Signed integer version of the native format */ - FF_VK_REP_INT, - /* Unsigned integer version of the native format */ - FF_VK_REP_UINT, -}; -const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, - enum FFVkShaderRepFormat rep_fmt); - -/** - * Loads props/mprops/driver_props - */ -int ff_vk_load_props(FFVulkanContext *s); - -/** - * Chooses an appropriate QF. - */ -AVVulkanDeviceQueueFamily *ff_vk_qf_find(FFVulkanContext *s, - VkQueueFlagBits dev_family, - VkVideoCodecOperationFlagBitsKHR vid_ops); - -/** - * Allocates/frees an execution pool. - * If used in a multi-threaded context, there must be at least as many contexts - * as there are threads. - * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add() - * has been called. - */ -int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, - FFVkExecPool *pool, int nb_contexts, - int nb_queries, VkQueryType query_type, int query_64bit, - const void *query_create_pnext); -void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool); - -/** - * Retrieve an execution pool. Threadsafe. - */ -FFVkExecContext *ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool); - -/** - * Performs nb_queries queries and returns their results and statuses. - * 64_BIT and WITH_STATUS flags are ignored as 64_BIT must be specified via - * query_64bit in ff_vk_exec_pool_init() and WITH_STATUS is always enabled. - */ -VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, - void **data, VkQueryResultFlagBits flags); - -/** - * Start/submit/wait an execution. - * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait() - * is not necessary (unless using it is just better). - */ -int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e); -int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e); -void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Execution dependency management. - * Can attach buffers to executions that will only be unref'd once the - * buffer has finished executing. - * Adding a frame dep will *lock the frame*, until either the dependencies - * are discarded, the execution is submitted, or a failure happens. - * update_frame will update the frame's properties before it is unlocked, - * only if submission was successful. - */ -int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps, int ref); -int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e, - VkSemaphore sem, uint64_t val, - VkPipelineStageFlagBits2 stage); -int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e, - VkSemaphore *sem, int nb, - VkPipelineStageFlagBits2 stage, - int wait); /* Ownership transferred if !wait */ -int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, - VkPipelineStageFlagBits2 wait_stage, - VkPipelineStageFlagBits2 signal_stage); -int ff_vk_exec_add_dep_sw_frame(FFVulkanContext *s, FFVkExecContext *e, - AVFrame *f); -void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, - VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar); -int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, - VkSemaphore *dst, uint64_t *dst_val, - AVFrame *f); -void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Create a single imageview for a given plane. - */ -int ff_vk_create_imageview(FFVulkanContext *s, - VkImageView *img_view, VkImageAspectFlags *aspect, - AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt); - -/** - * Create an imageview and add it as a dependency to an execution. - */ -int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, - VkImageView views[AV_NUM_DATA_POINTERS], - AVFrame *f, enum FFVkShaderRepFormat rep_fmt); - -void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, - AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, - VkPipelineStageFlags src_stage, - VkPipelineStageFlags dst_stage, - VkAccessFlagBits new_access, - VkImageLayout new_layout, - uint32_t new_qf); - -/** - * Memory/buffer/image allocation helpers. - */ -int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, - void *pNext, void *alloc_pNext, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); - -/** - * Buffer management code. - */ -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], - int nb_buffers, int invalidate); -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, - int flush); - -static inline int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, - int invalidate) -{ - return ff_vk_map_buffers(s, (FFVkBuffer *[]){ buf }, mem, - 1, invalidate); -} - -static inline int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush) -{ - return ff_vk_unmap_buffers(s, (FFVkBuffer *[]){ buf }, 1, flush); -} - -void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); - -/** Initialize a pool and create AVBufferRefs containing FFVkBuffer. - * Threadsafe to use. Buffers are automatically mapped on creation if - * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT is set in mem_props. Users should - * synchronize access themselvesd. Mainly meant for device-local buffers. */ -int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, - AVBufferRef **buf, VkBufferUsageFlags usage, - void *create_pNext, size_t size, - VkMemoryPropertyFlagBits mem_props); - -/** Maps a system RAM buffer into a Vulkan buffer. - * References the source buffer. - */ -int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst, - uint8_t *src_data, const AVBufferRef *src_buf, - VkBufferUsageFlags usage); - -/** - * Create a sampler. - */ -int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, - int unnorm_coords, VkFilter filt); - -/** - * Initialize a shader object, with a specific set of extensions, type+bind, - * local group size, and subgroup requirements. - */ -int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, - VkPipelineStageFlags stage, - const char *extensions[], int nb_extensions, - int lg_x, int lg_y, int lg_z, - uint32_t required_subgroup_size); - -/** - * Output the shader code as logging data, with a specific - * priority. - */ -void ff_vk_shader_print(void *ctx, FFVulkanShader *shd, int prio); - -/** - * Link a shader into an executable. - */ -int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, - uint8_t *spirv, size_t spirv_len, - const char *entrypoint); - -/** - * Add/update push constants for execution. - */ -int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, - VkShaderStageFlagBits stage); - -/** - * Add descriptor to a shader. Must be called before shader init. - */ -int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, - FFVulkanDescriptorSetBinding *desc, int nb, - int singular, int print_to_shader_only); - -/** - * Register a shader with an exec pool. - * Pool may be NULL if all descriptor sets are read-only. - */ -int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, - FFVulkanShader *shd); - -/** - * Bind a shader. - */ -void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd); - -/** - * Update push constant in a shader. - * Must be called before binding the shader. - */ -void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, - VkShaderStageFlagBits stage, - int offset, size_t size, void *src); - -/** - * Update a descriptor in a buffer with a buffer. - * Must be called before binding the shader. - */ -int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, - int set, int bind, int elem, - FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, - VkFormat fmt); - -/** - * Sets an image descriptor for specified shader and binding. - */ -int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, int set, int bind, int offs, - VkImageView view, VkImageLayout layout, - VkSampler sampler); - -/** - * Update a descriptor in a buffer with an image array.. - * Must be called before binding the shader. - */ -void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanShader *shd, AVFrame *f, - VkImageView *views, int set, int binding, - VkImageLayout layout, VkSampler sampler); - -/** - * Free a shader. - */ -void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd); - -/** - * Frees main context. - */ -void ff_vk_uninit(FFVulkanContext *s); - -#endif /* AVUTIL_VULKAN_H */ -- 2.49.1 From de4430792c625521171e760e1664969858a10838 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:18:41 +0000 Subject: [PATCH 046/118] Changing vulkan file directory --- libavutil/vulkan/vulkan.h | 665 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 665 insertions(+) create mode 100644 libavutil/vulkan/vulkan.h diff --git a/libavutil/vulkan/vulkan.h b/libavutil/vulkan/vulkan.h new file mode 100644 index 0000000000..84f78a6485 --- /dev/null +++ b/libavutil/vulkan/vulkan.h @@ -0,0 +1,665 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_VULKAN_H +#define AVUTIL_VULKAN_H + +#define VK_NO_PROTOTYPES + +#include <stdatomic.h> + +#include "libavutil/pixdesc.h" +#include "libavutil/bprint.h" +#include "libavutil/hwcontext.h" +#include "vulkan_functions.h" +#include "hwcontext_vulkan.h" + +/* GLSL management macros */ +#define INDENT(N) INDENT_##N +#define INDENT_0 +#define INDENT_1 INDENT_0 " " +#define INDENT_2 INDENT_1 INDENT_1 +#define INDENT_3 INDENT_2 INDENT_1 +#define INDENT_4 INDENT_3 INDENT_1 +#define INDENT_5 INDENT_4 INDENT_1 +#define INDENT_6 INDENT_5 INDENT_1 +#define C(N, S) INDENT(N) #S "\n" + +#define GLSLC(N, S) \ + do { \ + av_bprintf(&shd->src, C(N, S)); \ + } while (0) + +#define GLSLA(...) \ + do { \ + av_bprintf(&shd->src, __VA_ARGS__); \ + } while (0) + +#define GLSLF(N, S, ...) \ + do { \ + av_bprintf(&shd->src, C(N, S), __VA_ARGS__); \ + } while (0) + +#define GLSLD(D) \ + do { \ + av_bprintf(&shd->src, "\n"); \ + av_bprint_append_data(&shd->src, D, strlen(D)); \ + av_bprintf(&shd->src, "\n"); \ + } while (0) + +/* Helper, pretty much every Vulkan return value needs to be checked */ +#define RET(x) \ + do { \ + if ((err = (x)) < 0) \ + goto fail; \ + } while (0) + +#define DUP_SAMPLER(x) { x, x, x, x } + +typedef struct FFVulkanDescriptorSetBinding { + const char *name; + VkDescriptorType type; + const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */ + const char *mem_quali; /* readonly, writeonly, etc. */ + const char *buf_content; /* For buffers */ + uint32_t dimensions; /* Needed for e.g. sampler%iD */ + uint32_t elems; /* 0 - scalar, 1 or more - vector */ + VkShaderStageFlags stages; + uint32_t buf_elems; /* Appends [buf_elems] to the contents. Avoids manually printing to a string. */ + VkSampler samplers[4]; /* Sampler to use for all elems */ +} FFVulkanDescriptorSetBinding; + +typedef struct FFVkBuffer { + VkBuffer buf; + VkDeviceMemory mem; + VkMemoryPropertyFlagBits flags; + size_t size; + VkDeviceAddress address; + + /* Local use only */ + VkPipelineStageFlags2 stage; + VkAccessFlags2 access; + + /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE or + * via ff_vk_host_map_buffer */ + uint8_t *mapped_mem; + + /* Set by ff_vk_host_map_buffer. This is the offset at which the buffer data + * actually begins at. + * The address and mapped_mem fields will be offset by this amount. */ + size_t virtual_offset; + + /* If host mapping, reference to the backing host memory buffer */ + AVBufferRef *host_ref; +} FFVkBuffer; + +typedef struct FFVkExecContext { + uint32_t idx; + const struct FFVkExecPool *parent; + int had_submission; + + /* Queue for the execution context */ + VkQueue queue; + int qf; + int qi; + + /* Command buffer for the context */ + VkCommandBuffer buf; + + /* Fence for the command buffer */ + VkFence fence; + + /* Opaque data, untouched, free to use by users */ + void *opaque; + + void *query_data; + int query_idx; + + /* Buffer dependencies */ + AVBufferRef **buf_deps; + int nb_buf_deps; + unsigned int buf_deps_alloc_size; + + /* Frame dependencies */ + AVFrame **frame_deps; + unsigned int frame_deps_alloc_size; + int nb_frame_deps; + + /* Software frame dependencies */ + AVFrame **sw_frame_deps; + unsigned int sw_frame_deps_alloc_size; + int nb_sw_frame_deps; + + VkSemaphoreSubmitInfo *sem_wait; + unsigned int sem_wait_alloc; + int sem_wait_cnt; + + VkSemaphoreSubmitInfo *sem_sig; + unsigned int sem_sig_alloc; + int sem_sig_cnt; + + uint64_t **sem_sig_val_dst; + unsigned int sem_sig_val_dst_alloc; + int sem_sig_val_dst_cnt; + + uint8_t *frame_locked; + unsigned int frame_locked_alloc_size; + + VkAccessFlagBits *access_dst; + unsigned int access_dst_alloc; + + VkImageLayout *layout_dst; + unsigned int layout_dst_alloc; + + uint32_t *queue_family_dst; + unsigned int queue_family_dst_alloc; + + uint8_t *frame_update; + unsigned int frame_update_alloc_size; +} FFVkExecContext; + +typedef struct FFVulkanDescriptorSet { + /* Descriptor buffer */ + VkDeviceSize layout_size; + VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ + VkBufferUsageFlags usage; + + VkDescriptorSetLayoutBinding *binding; + VkDeviceSize *binding_offset; + int nb_bindings; + + /* Descriptor set is shared between all submissions */ + int singular; +} FFVulkanDescriptorSet; + +typedef struct FFVulkanShader { + /* Name for id/debugging purposes */ + const char *name; + + /* Shader text */ + AVBPrint src; + + /* Compute shader local group sizes */ + int lg_size[3]; + + /* Shader bind point/type */ + VkPipelineStageFlags stage; + VkPipelineBindPoint bind_point; + + /* Creation info */ + VkPipelineShaderStageRequiredSubgroupSizeCreateInfo subgroup_info; + + /* Base shader object */ + VkShaderEXT object; + VkPipeline pipeline; + + /* Pipeline layout */ + VkPipelineLayout pipeline_layout; + + /* Push consts */ + VkPushConstantRange *push_consts; + int push_consts_num; + + /* Descriptor sets */ + FFVulkanDescriptorSet *desc_set; + int nb_descriptor_sets; + + /* Descriptor buffer */ + VkDescriptorSetLayout *desc_layout; + uint32_t *bound_buffer_indices; + + /* Descriptor pool */ + int use_push; + VkDescriptorPoolSize *desc_pool_size; + int nb_desc_pool_size; +} FFVulkanShader; + +typedef struct FFVulkanDescriptorSetData { + /* Descriptor buffer */ + FFVkBuffer buf; + uint8_t *desc_mem; +} FFVulkanDescriptorSetData; + +typedef struct FFVulkanShaderData { + /* Shader to which this data belongs to */ + FFVulkanShader *shd; + int nb_descriptor_sets; + + /* Descriptor buffer */ + FFVulkanDescriptorSetData *desc_set_buf; + VkDescriptorBufferBindingInfoEXT *desc_bind; + + /* Descriptor pools */ + VkDescriptorSet *desc_sets; + VkDescriptorPool desc_pool; +} FFVulkanShaderData; + +typedef struct FFVkExecPool { + FFVkExecContext *contexts; + atomic_uint_least64_t idx; + + VkCommandPool *cmd_buf_pools; + VkCommandBuffer *cmd_bufs; + int pool_size; + + VkQueryPool query_pool; + void *query_data; + int query_results; + int query_statuses; + int query_64bit; + int query_status_stride; + int nb_queries; + size_t qd_size; + + /* Registered shaders' data */ + FFVulkanShaderData *reg_shd; + int nb_reg_shd; +} FFVkExecPool; + +typedef struct FFVulkanContext { + const AVClass *class; + void *log_parent; + + FFVulkanFunctions vkfn; + FFVulkanExtensions extensions; + VkPhysicalDeviceProperties2 props; + VkPhysicalDeviceVulkan11Properties props_11; + VkPhysicalDeviceDriverProperties driver_props; + VkPhysicalDeviceMemoryProperties mprops; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; + VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; + VkPhysicalDeviceSubgroupSizeControlProperties subgroup_props; + VkPhysicalDeviceCooperativeMatrixPropertiesKHR coop_matrix_props; + VkPhysicalDevicePushDescriptorPropertiesKHR push_desc_props; + VkPhysicalDeviceOpticalFlowPropertiesNV optical_flow_props; + VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; + VkQueueFamilyVideoPropertiesKHR *video_props; + VkQueueFamilyProperties2 *qf_props; + int tot_nb_qfs; + VkPhysicalDeviceHostImageCopyPropertiesEXT host_image_props; + VkImageLayout *host_image_copy_layouts; + + VkCooperativeMatrixPropertiesKHR *coop_mat_props; + uint32_t coop_mat_props_nb; + + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feats; + VkPhysicalDeviceVulkan12Features feats_12; + VkPhysicalDeviceFeatures2 feats; + + AVBufferRef *device_ref; + AVHWDeviceContext *device; + AVVulkanDeviceContext *hwctx; + + AVBufferRef *input_frames_ref; + AVBufferRef *frames_ref; + AVHWFramesContext *frames; + AVVulkanFramesContext *hwfc; + + uint32_t qfs[64]; + int nb_qfs; + + /* Properties */ + int output_width; + int output_height; + enum AVPixelFormat output_format; + enum AVPixelFormat input_format; +} FFVulkanContext; + +static inline int ff_vk_count_images(AVVkFrame *f) +{ + int cnt = 0; + while (cnt < FF_ARRAY_ELEMS(f->img) && f->img[cnt]) + cnt++; + + return cnt; +} + +static inline const void *ff_vk_find_struct(const void *chain, VkStructureType stype) +{ + const VkBaseInStructure *in = chain; + while (in) { + if (in->sType == stype) + return in; + + in = in->pNext; + } + + return NULL; +} + +static inline void ff_vk_link_struct(void *chain, const void *in) +{ + VkBaseOutStructure *out = chain; + while (out->pNext) + out = out->pNext; + + out->pNext = (void *)in; +} + +#define FF_VK_STRUCT_EXT(CTX, BASE, STRUCT_P, EXT_FLAG, TYPE) \ + do { \ + if ((EXT_FLAG == FF_VK_EXT_NO_FLAG) || \ + ((CTX)->extensions & EXT_FLAG)) { \ + (STRUCT_P)->sType = TYPE; \ + ff_vk_link_struct(BASE, STRUCT_P); \ + } \ + } while (0) + +/* Identity mapping - r = r, b = b, g = g, a = a */ +extern const VkComponentMapping ff_comp_identity_map; + +/** + * Initializes the AVClass, in case this context is not used + * as the main user's context. + * May use either a frames context reference, or a device context reference. + */ +int ff_vk_init(FFVulkanContext *s, void *log_parent, + AVBufferRef *device_ref, AVBufferRef *frames_ref); + +/** + * Converts Vulkan return values to strings + */ +const char *ff_vk_ret2str(VkResult res); + +/** + * Map between usage and features. + */ +VkImageUsageFlags ff_vk_map_feats_to_usage(VkFormatFeatureFlagBits2 feats); +VkFormatFeatureFlagBits2 ff_vk_map_usage_to_feats(VkImageUsageFlags usage); + +/** + * Returns 1 if pixfmt is a usable RGB format. + */ +int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); + +/** + * Since storage images may not be swizzled, we have to do this in the + * shader itself. This fills in a lookup table to do it. + */ +void ff_vk_set_perm(enum AVPixelFormat pix_fmt, int lut[4], int inv); + +/** + * Get the aspect flag for a plane from an image. + */ +VkImageAspectFlags ff_vk_aspect_flag(AVFrame *f, int p); + +/** + * Returns the format to use for images in shaders. + */ +enum FFVkShaderRepFormat { + /* Native format with no conversion. May require casting. */ + FF_VK_REP_NATIVE = 0, + /* Float conversion of the native format. */ + FF_VK_REP_FLOAT, + /* Signed integer version of the native format */ + FF_VK_REP_INT, + /* Unsigned integer version of the native format */ + FF_VK_REP_UINT, +}; +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, + enum FFVkShaderRepFormat rep_fmt); + +/** + * Loads props/mprops/driver_props + */ +int ff_vk_load_props(FFVulkanContext *s); + +/** + * Chooses an appropriate QF. + */ +AVVulkanDeviceQueueFamily *ff_vk_qf_find(FFVulkanContext *s, + VkQueueFlagBits dev_family, + VkVideoCodecOperationFlagBitsKHR vid_ops); + +/** + * Allocates/frees an execution pool. + * If used in a multi-threaded context, there must be at least as many contexts + * as there are threads. + * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add() + * has been called. + */ +int ff_vk_exec_pool_init(FFVulkanContext *s, AVVulkanDeviceQueueFamily *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext); +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool); + +/** + * Retrieve an execution pool. Threadsafe. + */ +FFVkExecContext *ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool); + +/** + * Performs nb_queries queries and returns their results and statuses. + * 64_BIT and WITH_STATUS flags are ignored as 64_BIT must be specified via + * query_64bit in ff_vk_exec_pool_init() and WITH_STATUS is always enabled. + */ +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, VkQueryResultFlagBits flags); + +/** + * Start/submit/wait an execution. + * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait() + * is not necessary (unless using it is just better). + */ +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e); +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e); + +/** + * Execution dependency management. + * Can attach buffers to executions that will only be unref'd once the + * buffer has finished executing. + * Adding a frame dep will *lock the frame*, until either the dependencies + * are discarded, the execution is submitted, or a failure happens. + * update_frame will update the frame's properties before it is unlocked, + * only if submission was successful. + */ +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref); +int ff_vk_exec_add_dep_wait_sem(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore sem, uint64_t val, + VkPipelineStageFlagBits2 stage); +int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *sem, int nb, + VkPipelineStageFlagBits2 stage, + int wait); /* Ownership transferred if !wait */ +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage); +int ff_vk_exec_add_dep_sw_frame(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *f); +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar); +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f); +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); + +/** + * Create a single imageview for a given plane. + */ +int ff_vk_create_imageview(FFVulkanContext *s, + VkImageView *img_view, VkImageAspectFlags *aspect, + AVFrame *f, int plane, enum FFVkShaderRepFormat rep_fmt); + +/** + * Create an imageview and add it as a dependency to an execution. + */ +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f, enum FFVkShaderRepFormat rep_fmt); + +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf); + +/** + * Memory/buffer/image allocation helpers. + */ +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); + +/** + * Buffer management code. + */ +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], + int nb_buffers, int invalidate); +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, + int flush); + +static inline int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, + int invalidate) +{ + return ff_vk_map_buffers(s, (FFVkBuffer *[]){ buf }, mem, + 1, invalidate); +} + +static inline int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush) +{ + return ff_vk_unmap_buffers(s, (FFVkBuffer *[]){ buf }, 1, flush); +} + +void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); + +/** Initialize a pool and create AVBufferRefs containing FFVkBuffer. + * Threadsafe to use. Buffers are automatically mapped on creation if + * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT is set in mem_props. Users should + * synchronize access themselvesd. Mainly meant for device-local buffers. */ +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props); + +/** Maps a system RAM buffer into a Vulkan buffer. + * References the source buffer. + */ +int ff_vk_host_map_buffer(FFVulkanContext *s, AVBufferRef **dst, + uint8_t *src_data, const AVBufferRef *src_buf, + VkBufferUsageFlags usage); + +/** + * Create a sampler. + */ +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt); + +/** + * Initialize a shader object, with a specific set of extensions, type+bind, + * local group size, and subgroup requirements. + */ +int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, + VkPipelineStageFlags stage, + const char *extensions[], int nb_extensions, + int lg_x, int lg_y, int lg_z, + uint32_t required_subgroup_size); + +/** + * Output the shader code as logging data, with a specific + * priority. + */ +void ff_vk_shader_print(void *ctx, FFVulkanShader *shd, int prio); + +/** + * Link a shader into an executable. + */ +int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, + uint8_t *spirv, size_t spirv_len, + const char *entrypoint); + +/** + * Add/update push constants for execution. + */ +int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, + VkShaderStageFlagBits stage); + +/** + * Add descriptor to a shader. Must be called before shader init. + */ +int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int singular, int print_to_shader_only); + +/** + * Register a shader with an exec pool. + * Pool may be NULL if all descriptor sets are read-only. + */ +int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanShader *shd); + +/** + * Bind a shader. + */ +void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd); + +/** + * Update push constant in a shader. + * Must be called before binding the shader. + */ +void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src); + +/** + * Update a descriptor in a buffer with a buffer. + * Must be called before binding the shader. + */ +int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, + int set, int bind, int elem, + FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, + VkFormat fmt); + +/** + * Sets an image descriptor for specified shader and binding. + */ +int ff_vk_shader_update_img(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, + VkSampler sampler); + +/** + * Update a descriptor in a buffer with an image array.. + * Must be called before binding the shader. + */ +void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler); + +/** + * Free a shader. + */ +void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd); + +/** + * Frees main context. + */ +void ff_vk_uninit(FFVulkanContext *s); + +#endif /* AVUTIL_VULKAN_H */ -- 2.49.1 From 012604071c97e873dec9a6548e271a6f07dbd73c Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:19:19 +0000 Subject: [PATCH 047/118] Changing vulkan file directory --- libavutil/vulkan_glslang.c | 308 ------------------------------------- 1 file changed, 308 deletions(-) delete mode 100644 libavutil/vulkan_glslang.c diff --git a/libavutil/vulkan_glslang.c b/libavutil/vulkan_glslang.c deleted file mode 100644 index 4cf54de854..0000000000 --- a/libavutil/vulkan_glslang.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <pthread.h> - -#include <glslang/build_info.h> -#include <glslang/Include/glslang_c_interface.h> - -#include "vulkan_spirv.h" -#include "libavutil/mem.h" -#include "libavutil/avassert.h" - -static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER; -static int glslc_refcount = 0; - -static const glslang_resource_t glslc_resource_limits = { - .max_lights = 32, - .max_clip_planes = 6, - .max_texture_units = 32, - .max_texture_coords = 32, - .max_vertex_attribs = 64, - .max_vertex_uniform_components = 4096, - .max_varying_floats = 64, - .max_vertex_texture_image_units = 32, - .max_combined_texture_image_units = 80, - .max_texture_image_units = 32, - .max_fragment_uniform_components = 4096, - .max_draw_buffers = 32, - .max_vertex_uniform_vectors = 128, - .max_varying_vectors = 8, - .max_fragment_uniform_vectors = 16, - .max_vertex_output_vectors = 16, - .max_fragment_input_vectors = 15, - .min_program_texel_offset = -8, - .max_program_texel_offset = 7, - .max_clip_distances = 8, - .max_compute_work_group_count_x = 65535, - .max_compute_work_group_count_y = 65535, - .max_compute_work_group_count_z = 65535, - .max_compute_work_group_size_x = 1024, - .max_compute_work_group_size_y = 1024, - .max_compute_work_group_size_z = 64, - .max_compute_uniform_components = 1024, - .max_compute_texture_image_units = 16, - .max_compute_image_uniforms = 8, - .max_compute_atomic_counters = 8, - .max_compute_atomic_counter_buffers = 1, - .max_varying_components = 60, - .max_vertex_output_components = 64, - .max_geometry_input_components = 64, - .max_geometry_output_components = 128, - .max_fragment_input_components = 128, - .max_image_units = 8, - .max_combined_image_units_and_fragment_outputs = 8, - .max_combined_shader_output_resources = 8, - .max_image_samples = 0, - .max_vertex_image_uniforms = 0, - .max_tess_control_image_uniforms = 0, - .max_tess_evaluation_image_uniforms = 0, - .max_geometry_image_uniforms = 0, - .max_fragment_image_uniforms = 8, - .max_combined_image_uniforms = 8, - .max_geometry_texture_image_units = 16, - .max_geometry_output_vertices = 256, - .max_geometry_total_output_components = 1024, - .max_geometry_uniform_components = 1024, - .max_geometry_varying_components = 64, - .max_tess_control_input_components = 128, - .max_tess_control_output_components = 128, - .max_tess_control_texture_image_units = 16, - .max_tess_control_uniform_components = 1024, - .max_tess_control_total_output_components = 4096, - .max_tess_evaluation_input_components = 128, - .max_tess_evaluation_output_components = 128, - .max_tess_evaluation_texture_image_units = 16, - .max_tess_evaluation_uniform_components = 1024, - .max_tess_patch_components = 120, - .max_patch_vertices = 32, - .max_tess_gen_level = 64, - .max_viewports = 16, - .max_vertex_atomic_counters = 0, - .max_tess_control_atomic_counters = 0, - .max_tess_evaluation_atomic_counters = 0, - .max_geometry_atomic_counters = 0, - .max_fragment_atomic_counters = 8, - .max_combined_atomic_counters = 8, - .max_atomic_counter_bindings = 1, - .max_vertex_atomic_counter_buffers = 0, - .max_tess_control_atomic_counter_buffers = 0, - .max_tess_evaluation_atomic_counter_buffers = 0, - .max_geometry_atomic_counter_buffers = 0, - .max_fragment_atomic_counter_buffers = 1, - .max_combined_atomic_counter_buffers = 1, - .max_atomic_counter_buffer_size = 16384, - .max_transform_feedback_buffers = 4, - .max_transform_feedback_interleaved_components = 64, - .max_cull_distances = 8, - .max_combined_clip_and_cull_distances = 8, - .max_samples = 4, - .max_mesh_output_vertices_nv = 256, - .max_mesh_output_primitives_nv = 512, - .max_mesh_work_group_size_x_nv = 32, - .max_mesh_work_group_size_y_nv = 1, - .max_mesh_work_group_size_z_nv = 1, - .max_task_work_group_size_x_nv = 32, - .max_task_work_group_size_y_nv = 1, - .max_task_work_group_size_z_nv = 1, - .max_mesh_view_count_nv = 4, - .maxDualSourceDrawBuffersEXT = 1, - - .limits = { - .non_inductive_for_loops = 1, - .while_loops = 1, - .do_while_loops = 1, - .general_uniform_indexing = 1, - .general_attribute_matrix_vector_indexing = 1, - .general_varying_indexing = 1, - .general_sampler_indexing = 1, - .general_variable_indexing = 1, - .general_constant_matrix_vector_indexing = 1, - } -}; - -static int glslc_shader_compile(FFVulkanContext *s, FFVkSPIRVCompiler *ctx, - FFVulkanShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, - void **opaque) -{ - const char *messages; - glslang_shader_t *glslc_shader; - glslang_program_t *glslc_program; - - static const glslang_stage_t glslc_stage[] = { - [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_STAGE_VERTEX, - [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_STAGE_FRAGMENT, - [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_STAGE_COMPUTE, -#if ((GLSLANG_VERSION_MAJOR) > 12) - [VK_SHADER_STAGE_TASK_BIT_EXT] = GLSLANG_STAGE_TASK, - [VK_SHADER_STAGE_MESH_BIT_EXT] = GLSLANG_STAGE_MESH, - [VK_SHADER_STAGE_RAYGEN_BIT_KHR] = GLSLANG_STAGE_RAYGEN, - [VK_SHADER_STAGE_INTERSECTION_BIT_KHR] = GLSLANG_STAGE_INTERSECT, - [VK_SHADER_STAGE_ANY_HIT_BIT_KHR] = GLSLANG_STAGE_ANYHIT, - [VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR] = GLSLANG_STAGE_CLOSESTHIT, - [VK_SHADER_STAGE_MISS_BIT_KHR] = GLSLANG_STAGE_MISS, - [VK_SHADER_STAGE_CALLABLE_BIT_KHR] = GLSLANG_STAGE_CALLABLE, -#endif - }; - - const glslang_input_t glslc_input = { - .language = GLSLANG_SOURCE_GLSL, - .stage = glslc_stage[shd->stage], - .client = GLSLANG_CLIENT_VULKAN, -#if ((GLSLANG_VERSION_MAJOR) >= 12) - .client_version = GLSLANG_TARGET_VULKAN_1_3, - .target_language_version = GLSLANG_TARGET_SPV_1_6, -#else - .client_version = GLSLANG_TARGET_VULKAN_1_2, - .target_language_version = GLSLANG_TARGET_SPV_1_5, -#endif - .target_language = GLSLANG_TARGET_SPV, - .code = shd->src.str, - .default_version = 460, - .default_profile = GLSLANG_NO_PROFILE, - .force_default_version_and_profile = false, - .forward_compatible = false, - .messages = GLSLANG_MSG_DEFAULT_BIT, - .resource = &glslc_resource_limits, - }; - -#if ((GLSLANG_VERSION_MAJOR) >= 12) - glslang_spv_options_t glslc_opts = { - .generate_debug_info = !!(s->extensions & (FF_VK_EXT_DEBUG_UTILS | FF_VK_EXT_RELAXED_EXTENDED_INSTR)), - .emit_nonsemantic_shader_debug_info = !!(s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR), - .emit_nonsemantic_shader_debug_source = !!(s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR), - .disable_optimizer = !!(s->extensions & FF_VK_EXT_DEBUG_UTILS), - .strip_debug_info = !(s->extensions & (FF_VK_EXT_DEBUG_UTILS | FF_VK_EXT_RELAXED_EXTENDED_INSTR)), - .optimize_size = 0, - .disassemble = 0, - .validate = 1, - /* .compile_only = 0, */ - }; -#endif - - av_assert0(glslc_refcount); - - *opaque = NULL; - - if (!(glslc_shader = glslang_shader_create(&glslc_input))) - return AVERROR(ENOMEM); - - if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) { - ff_vk_shader_print(s, shd, AV_LOG_WARNING); - av_log(s, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n", - glslang_shader_get_info_log(glslc_shader), - glslang_shader_get_info_debug_log(glslc_shader)); - glslang_shader_delete(glslc_shader); - return AVERROR(EINVAL); - } - - if (!glslang_shader_parse(glslc_shader, &glslc_input)) { - ff_vk_shader_print(s, shd, AV_LOG_WARNING); - av_log(s, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n", - glslang_shader_get_info_log(glslc_shader), - glslang_shader_get_info_debug_log(glslc_shader)); - glslang_shader_delete(glslc_shader); - return AVERROR(EINVAL); - } - - if (!(glslc_program = glslang_program_create())) { - glslang_shader_delete(glslc_shader); - return AVERROR(EINVAL); - } - - glslang_program_add_shader(glslc_program, glslc_shader); - - if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT | - GLSLANG_MSG_VULKAN_RULES_BIT)) { - ff_vk_shader_print(s, shd, AV_LOG_WARNING); - av_log(s, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n", - glslang_program_get_info_log(glslc_program), - glslang_program_get_info_debug_log(glslc_program)); - glslang_program_delete(glslc_program); - glslang_shader_delete(glslc_shader); - return AVERROR(EINVAL); - } - -#if ((GLSLANG_VERSION_MAJOR) >= 12) - glslang_program_SPIRV_generate_with_options(glslc_program, glslc_input.stage, &glslc_opts); -#else - glslang_program_SPIRV_generate(glslc_program, glslc_input.stage); -#endif - - messages = glslang_program_SPIRV_get_messages(glslc_program); - if (messages) { - ff_vk_shader_print(s, shd, AV_LOG_WARNING); - av_log(s, AV_LOG_WARNING, "%s\n", messages); - } else { - ff_vk_shader_print(s, shd, AV_LOG_TRACE); - } - - glslang_shader_delete(glslc_shader); - - *size = glslang_program_SPIRV_get_size(glslc_program) * sizeof(unsigned int); - *data = (void *)glslang_program_SPIRV_get_ptr(glslc_program); - *opaque = glslc_program; - - return 0; -} - -static void glslc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) -{ - if (!opaque || !*opaque) - return; - - av_assert0(glslc_refcount); - glslang_program_delete(*opaque); - *opaque = NULL; -} - -static void glslc_uninit(FFVkSPIRVCompiler **ctx) -{ - if (!ctx || !*ctx) - return; - - pthread_mutex_lock(&glslc_mutex); - if (glslc_refcount && (--glslc_refcount == 0)) - glslang_finalize_process(); - pthread_mutex_unlock(&glslc_mutex); - - av_freep(ctx); -} - -FFVkSPIRVCompiler *ff_vk_glslang_init(void) -{ - FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); - if (!ret) - return NULL; - - ret->compile_shader = glslc_shader_compile; - ret->free_shader = glslc_shader_free; - ret->uninit = glslc_uninit; - - pthread_mutex_lock(&glslc_mutex); - if (!glslc_refcount++) { - if (!glslang_initialize_process()) { - av_freep(&ret); - glslc_refcount--; - } - } - pthread_mutex_unlock(&glslc_mutex); - - return ret; -} -- 2.49.1 From 191253f3cfe9e5bc7bb35b6a7eff0ce1a753d81d Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:19:43 +0000 Subject: [PATCH 048/118] Changing vulkan file directory --- libavutil/vulkan/vulkan_glslang.c | 308 ++++++++++++++++++++++++++++++ 1 file changed, 308 insertions(+) create mode 100644 libavutil/vulkan/vulkan_glslang.c diff --git a/libavutil/vulkan/vulkan_glslang.c b/libavutil/vulkan/vulkan_glslang.c new file mode 100644 index 0000000000..4cf54de854 --- /dev/null +++ b/libavutil/vulkan/vulkan_glslang.c @@ -0,0 +1,308 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <pthread.h> + +#include <glslang/build_info.h> +#include <glslang/Include/glslang_c_interface.h> + +#include "vulkan_spirv.h" +#include "libavutil/mem.h" +#include "libavutil/avassert.h" + +static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER; +static int glslc_refcount = 0; + +static const glslang_resource_t glslc_resource_limits = { + .max_lights = 32, + .max_clip_planes = 6, + .max_texture_units = 32, + .max_texture_coords = 32, + .max_vertex_attribs = 64, + .max_vertex_uniform_components = 4096, + .max_varying_floats = 64, + .max_vertex_texture_image_units = 32, + .max_combined_texture_image_units = 80, + .max_texture_image_units = 32, + .max_fragment_uniform_components = 4096, + .max_draw_buffers = 32, + .max_vertex_uniform_vectors = 128, + .max_varying_vectors = 8, + .max_fragment_uniform_vectors = 16, + .max_vertex_output_vectors = 16, + .max_fragment_input_vectors = 15, + .min_program_texel_offset = -8, + .max_program_texel_offset = 7, + .max_clip_distances = 8, + .max_compute_work_group_count_x = 65535, + .max_compute_work_group_count_y = 65535, + .max_compute_work_group_count_z = 65535, + .max_compute_work_group_size_x = 1024, + .max_compute_work_group_size_y = 1024, + .max_compute_work_group_size_z = 64, + .max_compute_uniform_components = 1024, + .max_compute_texture_image_units = 16, + .max_compute_image_uniforms = 8, + .max_compute_atomic_counters = 8, + .max_compute_atomic_counter_buffers = 1, + .max_varying_components = 60, + .max_vertex_output_components = 64, + .max_geometry_input_components = 64, + .max_geometry_output_components = 128, + .max_fragment_input_components = 128, + .max_image_units = 8, + .max_combined_image_units_and_fragment_outputs = 8, + .max_combined_shader_output_resources = 8, + .max_image_samples = 0, + .max_vertex_image_uniforms = 0, + .max_tess_control_image_uniforms = 0, + .max_tess_evaluation_image_uniforms = 0, + .max_geometry_image_uniforms = 0, + .max_fragment_image_uniforms = 8, + .max_combined_image_uniforms = 8, + .max_geometry_texture_image_units = 16, + .max_geometry_output_vertices = 256, + .max_geometry_total_output_components = 1024, + .max_geometry_uniform_components = 1024, + .max_geometry_varying_components = 64, + .max_tess_control_input_components = 128, + .max_tess_control_output_components = 128, + .max_tess_control_texture_image_units = 16, + .max_tess_control_uniform_components = 1024, + .max_tess_control_total_output_components = 4096, + .max_tess_evaluation_input_components = 128, + .max_tess_evaluation_output_components = 128, + .max_tess_evaluation_texture_image_units = 16, + .max_tess_evaluation_uniform_components = 1024, + .max_tess_patch_components = 120, + .max_patch_vertices = 32, + .max_tess_gen_level = 64, + .max_viewports = 16, + .max_vertex_atomic_counters = 0, + .max_tess_control_atomic_counters = 0, + .max_tess_evaluation_atomic_counters = 0, + .max_geometry_atomic_counters = 0, + .max_fragment_atomic_counters = 8, + .max_combined_atomic_counters = 8, + .max_atomic_counter_bindings = 1, + .max_vertex_atomic_counter_buffers = 0, + .max_tess_control_atomic_counter_buffers = 0, + .max_tess_evaluation_atomic_counter_buffers = 0, + .max_geometry_atomic_counter_buffers = 0, + .max_fragment_atomic_counter_buffers = 1, + .max_combined_atomic_counter_buffers = 1, + .max_atomic_counter_buffer_size = 16384, + .max_transform_feedback_buffers = 4, + .max_transform_feedback_interleaved_components = 64, + .max_cull_distances = 8, + .max_combined_clip_and_cull_distances = 8, + .max_samples = 4, + .max_mesh_output_vertices_nv = 256, + .max_mesh_output_primitives_nv = 512, + .max_mesh_work_group_size_x_nv = 32, + .max_mesh_work_group_size_y_nv = 1, + .max_mesh_work_group_size_z_nv = 1, + .max_task_work_group_size_x_nv = 32, + .max_task_work_group_size_y_nv = 1, + .max_task_work_group_size_z_nv = 1, + .max_mesh_view_count_nv = 4, + .maxDualSourceDrawBuffersEXT = 1, + + .limits = { + .non_inductive_for_loops = 1, + .while_loops = 1, + .do_while_loops = 1, + .general_uniform_indexing = 1, + .general_attribute_matrix_vector_indexing = 1, + .general_varying_indexing = 1, + .general_sampler_indexing = 1, + .general_variable_indexing = 1, + .general_constant_matrix_vector_indexing = 1, + } +}; + +static int glslc_shader_compile(FFVulkanContext *s, FFVkSPIRVCompiler *ctx, + FFVulkanShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, + void **opaque) +{ + const char *messages; + glslang_shader_t *glslc_shader; + glslang_program_t *glslc_program; + + static const glslang_stage_t glslc_stage[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_STAGE_VERTEX, + [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_STAGE_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_STAGE_COMPUTE, +#if ((GLSLANG_VERSION_MAJOR) > 12) + [VK_SHADER_STAGE_TASK_BIT_EXT] = GLSLANG_STAGE_TASK, + [VK_SHADER_STAGE_MESH_BIT_EXT] = GLSLANG_STAGE_MESH, + [VK_SHADER_STAGE_RAYGEN_BIT_KHR] = GLSLANG_STAGE_RAYGEN, + [VK_SHADER_STAGE_INTERSECTION_BIT_KHR] = GLSLANG_STAGE_INTERSECT, + [VK_SHADER_STAGE_ANY_HIT_BIT_KHR] = GLSLANG_STAGE_ANYHIT, + [VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR] = GLSLANG_STAGE_CLOSESTHIT, + [VK_SHADER_STAGE_MISS_BIT_KHR] = GLSLANG_STAGE_MISS, + [VK_SHADER_STAGE_CALLABLE_BIT_KHR] = GLSLANG_STAGE_CALLABLE, +#endif + }; + + const glslang_input_t glslc_input = { + .language = GLSLANG_SOURCE_GLSL, + .stage = glslc_stage[shd->stage], + .client = GLSLANG_CLIENT_VULKAN, +#if ((GLSLANG_VERSION_MAJOR) >= 12) + .client_version = GLSLANG_TARGET_VULKAN_1_3, + .target_language_version = GLSLANG_TARGET_SPV_1_6, +#else + .client_version = GLSLANG_TARGET_VULKAN_1_2, + .target_language_version = GLSLANG_TARGET_SPV_1_5, +#endif + .target_language = GLSLANG_TARGET_SPV, + .code = shd->src.str, + .default_version = 460, + .default_profile = GLSLANG_NO_PROFILE, + .force_default_version_and_profile = false, + .forward_compatible = false, + .messages = GLSLANG_MSG_DEFAULT_BIT, + .resource = &glslc_resource_limits, + }; + +#if ((GLSLANG_VERSION_MAJOR) >= 12) + glslang_spv_options_t glslc_opts = { + .generate_debug_info = !!(s->extensions & (FF_VK_EXT_DEBUG_UTILS | FF_VK_EXT_RELAXED_EXTENDED_INSTR)), + .emit_nonsemantic_shader_debug_info = !!(s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR), + .emit_nonsemantic_shader_debug_source = !!(s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR), + .disable_optimizer = !!(s->extensions & FF_VK_EXT_DEBUG_UTILS), + .strip_debug_info = !(s->extensions & (FF_VK_EXT_DEBUG_UTILS | FF_VK_EXT_RELAXED_EXTENDED_INSTR)), + .optimize_size = 0, + .disassemble = 0, + .validate = 1, + /* .compile_only = 0, */ + }; +#endif + + av_assert0(glslc_refcount); + + *opaque = NULL; + + if (!(glslc_shader = glslang_shader_create(&glslc_input))) + return AVERROR(ENOMEM); + + if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) { + ff_vk_shader_print(s, shd, AV_LOG_WARNING); + av_log(s, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n", + glslang_shader_get_info_log(glslc_shader), + glslang_shader_get_info_debug_log(glslc_shader)); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + if (!glslang_shader_parse(glslc_shader, &glslc_input)) { + ff_vk_shader_print(s, shd, AV_LOG_WARNING); + av_log(s, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n", + glslang_shader_get_info_log(glslc_shader), + glslang_shader_get_info_debug_log(glslc_shader)); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + if (!(glslc_program = glslang_program_create())) { + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + glslang_program_add_shader(glslc_program, glslc_shader); + + if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT | + GLSLANG_MSG_VULKAN_RULES_BIT)) { + ff_vk_shader_print(s, shd, AV_LOG_WARNING); + av_log(s, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n", + glslang_program_get_info_log(glslc_program), + glslang_program_get_info_debug_log(glslc_program)); + glslang_program_delete(glslc_program); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + +#if ((GLSLANG_VERSION_MAJOR) >= 12) + glslang_program_SPIRV_generate_with_options(glslc_program, glslc_input.stage, &glslc_opts); +#else + glslang_program_SPIRV_generate(glslc_program, glslc_input.stage); +#endif + + messages = glslang_program_SPIRV_get_messages(glslc_program); + if (messages) { + ff_vk_shader_print(s, shd, AV_LOG_WARNING); + av_log(s, AV_LOG_WARNING, "%s\n", messages); + } else { + ff_vk_shader_print(s, shd, AV_LOG_TRACE); + } + + glslang_shader_delete(glslc_shader); + + *size = glslang_program_SPIRV_get_size(glslc_program) * sizeof(unsigned int); + *data = (void *)glslang_program_SPIRV_get_ptr(glslc_program); + *opaque = glslc_program; + + return 0; +} + +static void glslc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) +{ + if (!opaque || !*opaque) + return; + + av_assert0(glslc_refcount); + glslang_program_delete(*opaque); + *opaque = NULL; +} + +static void glslc_uninit(FFVkSPIRVCompiler **ctx) +{ + if (!ctx || !*ctx) + return; + + pthread_mutex_lock(&glslc_mutex); + if (glslc_refcount && (--glslc_refcount == 0)) + glslang_finalize_process(); + pthread_mutex_unlock(&glslc_mutex); + + av_freep(ctx); +} + +FFVkSPIRVCompiler *ff_vk_glslang_init(void) +{ + FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); + if (!ret) + return NULL; + + ret->compile_shader = glslc_shader_compile; + ret->free_shader = glslc_shader_free; + ret->uninit = glslc_uninit; + + pthread_mutex_lock(&glslc_mutex); + if (!glslc_refcount++) { + if (!glslang_initialize_process()) { + av_freep(&ret); + glslc_refcount--; + } + } + pthread_mutex_unlock(&glslc_mutex); + + return ret; +} -- 2.49.1 From 26619748f956a43131838999423772a32ce290a6 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:20:09 +0000 Subject: [PATCH 049/118] Changing vulkan file directory --- libavutil/vulkan_functions.h | 283 ----------------------------------- 1 file changed, 283 deletions(-) delete mode 100644 libavutil/vulkan_functions.h diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h deleted file mode 100644 index 4d20b7d806..0000000000 --- a/libavutil/vulkan_functions.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVUTIL_VULKAN_FUNCTIONS_H -#define AVUTIL_VULKAN_FUNCTIONS_H - -#define VK_NO_PROTOTYPES -#define VK_ENABLE_BETA_EXTENSIONS - -#include "hwcontext.h" -#include "hwcontext_vulkan.h" - -/* An enum of bitflags for every optional extension we need */ -typedef uint64_t FFVulkanExtensions; - -/* NOTE: when adding new entries, make sure to update ff_vk_extensions_to_mask */ -#define FF_VK_EXT_EXTERNAL_DMABUF_MEMORY (1ULL << 0) /* VK_EXT_external_memory_dma_buf */ -#define FF_VK_EXT_DRM_MODIFIER_FLAGS (1ULL << 1) /* VK_EXT_image_drm_format_modifier */ -#define FF_VK_EXT_EXTERNAL_FD_MEMORY (1ULL << 2) /* VK_KHR_external_memory_fd */ -#define FF_VK_EXT_EXTERNAL_FD_SEM (1ULL << 3) /* VK_KHR_external_semaphore_fd */ -#define FF_VK_EXT_EXTERNAL_HOST_MEMORY (1ULL << 4) /* VK_EXT_external_memory_host */ -#define FF_VK_EXT_DEBUG_UTILS (1ULL << 5) /* VK_EXT_debug_utils */ - -#define FF_VK_EXT_EXTERNAL_WIN32_MEMORY (1ULL << 6) /* VK_KHR_external_memory_win32 */ -#define FF_VK_EXT_EXTERNAL_WIN32_SEM (1ULL << 7) /* VK_KHR_external_semaphore_win32 */ - -#define FF_VK_EXT_DESCRIPTOR_BUFFER (1ULL << 8) /* VK_EXT_descriptor_buffer */ -#define FF_VK_EXT_DEVICE_DRM (1ULL << 9) /* VK_EXT_physical_device_drm */ -#define FF_VK_EXT_ATOMIC_FLOAT (1ULL << 10) /* VK_EXT_shader_atomic_float */ -#define FF_VK_EXT_COOP_MATRIX (1ULL << 11) /* VK_KHR_cooperative_matrix */ -#define FF_VK_EXT_OPTICAL_FLOW (1ULL << 12) /* VK_NV_optical_flow */ -#define FF_VK_EXT_SHADER_OBJECT (1ULL << 13) /* VK_EXT_shader_object */ -#define FF_VK_EXT_PUSH_DESCRIPTOR (1ULL << 14) /* VK_KHR_push_descriptor */ -#define FF_VK_EXT_RELAXED_EXTENDED_INSTR (1ULL << 15) /* VK_KHR_shader_relaxed_extended_instruction */ -#define FF_VK_EXT_EXPECT_ASSUME (1ULL << 16) /* VK_KHR_shader_expect_assume */ -#define FF_VK_EXT_SUBGROUP_ROTATE (1ULL << 17) /* VK_KHR_shader_subgroup_rotate */ -#define FF_VK_EXT_HOST_IMAGE_COPY (1ULL << 18) /* VK_EXT_host_image_copy */ - -/* Video extensions */ -#define FF_VK_EXT_VIDEO_QUEUE (1ULL << 36) /* VK_KHR_video_queue */ -#define FF_VK_EXT_VIDEO_MAINTENANCE_1 (1ULL << 37) /* VK_KHR_video_maintenance1 */ -#define FF_VK_EXT_VIDEO_MAINTENANCE_2 (1ULL << 38) /* VK_KHR_video_maintenance2 */ - -#define FF_VK_EXT_VIDEO_DECODE_QUEUE (1ULL << 40) /* VK_KHR_video_decode_queue */ -#define FF_VK_EXT_VIDEO_DECODE_H264 (1ULL << 41) /* VK_KHR_video_decode_h264 */ -#define FF_VK_EXT_VIDEO_DECODE_H265 (1ULL << 42) /* VK_KHR_video_decode_h265 */ -#define FF_VK_EXT_VIDEO_DECODE_VP9 (1ULL << 43) /* VK_KHR_video_decode_vp9 */ -#define FF_VK_EXT_VIDEO_DECODE_AV1 (1ULL << 44) /* VK_KHR_video_decode_av1 */ - -#define FF_VK_EXT_VIDEO_ENCODE_QUEUE (1ULL << 50) /* VK_KHR_video_encode_queue */ -#define FF_VK_EXT_VIDEO_ENCODE_H264 (1ULL << 51) /* VK_KHR_video_encode_h264 */ -#define FF_VK_EXT_VIDEO_ENCODE_H265 (1ULL << 52) /* VK_KHR_video_encode_h265 */ -#define FF_VK_EXT_VIDEO_ENCODE_AV1 (1ULL << 53) /* VK_KHR_video_encode_av1 */ - -#define FF_VK_EXT_PORTABILITY_SUBSET (1ULL << 62) -#define FF_VK_EXT_NO_FLAG (1ULL << 63) - -/* Macro containing every function that we utilize in our codebase */ -#define FN_LIST(MACRO) \ - /* Instance */ \ - MACRO(0, 0, FF_VK_EXT_NO_FLAG, EnumerateInstanceExtensionProperties) \ - MACRO(0, 0, FF_VK_EXT_NO_FLAG, EnumerateInstanceLayerProperties) \ - MACRO(0, 0, FF_VK_EXT_NO_FLAG, CreateInstance) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyInstance) \ - \ - /* Debug */ \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDebugUtilsMessengerEXT) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDebugUtilsMessengerEXT) \ - \ - /* Device */ \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetDeviceProcAddr) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDevice) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFeatures2) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceExternalSemaphoreProperties) \ - MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoCapabilitiesKHR) \ - MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoFormatPropertiesKHR) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, DeviceWaitIdle) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDevice) \ - \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, EnumeratePhysicalDevices) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, EnumerateDeviceExtensionProperties) \ - \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties2) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceMemoryProperties) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \ - MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties2) \ - MACRO(1, 0, FF_VK_EXT_COOP_MATRIX, GetPhysicalDeviceCooperativeMatrixPropertiesKHR) \ - \ - /* Command pool */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateCommandPool) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyCommandPool) \ - \ - /* Command buffer */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, AllocateCommandBuffers) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, BeginCommandBuffer) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, EndCommandBuffer) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, FreeCommandBuffers) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdDispatch) \ - \ - /* Queue */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetDeviceQueue) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit2) \ - \ - /* Fences */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateFence) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, WaitForFences) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetFenceStatus) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, ResetFences) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyFence) \ - \ - /* Semaphores */ \ - MACRO(1, 1, FF_VK_EXT_EXTERNAL_FD_SEM, GetSemaphoreFdKHR) \ - MACRO(1, 1, FF_VK_EXT_EXTERNAL_FD_SEM, ImportSemaphoreFdKHR) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSemaphore) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, WaitSemaphores) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySemaphore) \ - \ - /* Memory */ \ - MACRO(1, 1, FF_VK_EXT_EXTERNAL_FD_MEMORY, GetMemoryFdKHR) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetMemoryFdPropertiesKHR) \ - MACRO(1, 1, FF_VK_EXT_EXTERNAL_HOST_MEMORY, GetMemoryHostPointerPropertiesEXT) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, AllocateMemory) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, MapMemory) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, FlushMappedMemoryRanges) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, InvalidateMappedMemoryRanges) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, UnmapMemory) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, FreeMemory) \ - \ - /* Commands */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBindDescriptorSets) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPushConstants) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBindPipeline) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBufferToImage) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyImageToBuffer) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdClearColorImage) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBuffer) \ - \ - /* Buffer */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdFillBuffer) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \ - \ - /* Image */ \ - MACRO(1, 1, FF_VK_EXT_DRM_MODIFIER_FLAGS, GetImageDrmFormatModifierPropertiesEXT) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetImageMemoryRequirements2) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateImage) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindImageMemory2) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetImageSubresourceLayout) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyImage) \ - \ - /* ImageView */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateImageView) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyImageView) \ - \ - /* DescriptorSet */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorSetLayout) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, AllocateDescriptorSets) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorPool) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorPool) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorSetLayout) \ - \ - /* Descriptor buffers */ \ - MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutSizeEXT) \ - MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutBindingOffsetEXT) \ - MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorEXT) \ - MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdBindDescriptorBuffersEXT) \ - MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdSetDescriptorBufferOffsetsEXT) \ - \ - /* DescriptorUpdateTemplate */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \ - \ - /* Descriptors */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSets) \ - MACRO(1, 1, FF_VK_EXT_PUSH_DESCRIPTOR, CmdPushDescriptorSetKHR) \ - \ - /* Queries */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetQueryPoolResults) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, ResetQueryPool) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBeginQuery) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdEndQuery) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdResetQueryPool) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyQueryPool) \ - \ - /* sync2 */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier2) \ - \ - /* Host image copy */ \ - MACRO(1, 1, FF_VK_EXT_HOST_IMAGE_COPY, TransitionImageLayoutEXT) \ - MACRO(1, 1, FF_VK_EXT_HOST_IMAGE_COPY, CopyMemoryToImageEXT) \ - MACRO(1, 1, FF_VK_EXT_HOST_IMAGE_COPY, CopyImageToMemoryEXT) \ - \ - /* Video queue */ \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionParametersKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, GetVideoSessionMemoryRequirementsKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, BindVideoSessionMemoryKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdBeginVideoCodingKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdControlVideoCodingKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdEndVideoCodingKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionParametersKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionKHR) \ - \ - /* Video decoding */ \ - MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \ - \ - /* Video encoding */ \ - MACRO(1, 1, FF_VK_EXT_VIDEO_ENCODE_QUEUE, CmdEncodeVideoKHR) \ - MACRO(1, 1, FF_VK_EXT_VIDEO_ENCODE_QUEUE, GetEncodedVideoSessionParametersKHR) \ - MACRO(1, 0, FF_VK_EXT_VIDEO_ENCODE_QUEUE, GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR) \ - \ - /* Pipeline */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \ - \ - /* PipelineLayout */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateComputePipelines) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipeline) \ - \ - /* Sampler */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSamplerYcbcrConversion) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \ - \ - /* Optical flow */ \ - MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, BindOpticalFlowSessionImageNV) \ - MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, CmdOpticalFlowExecuteNV) \ - MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, CreateOpticalFlowSessionNV) \ - MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, DestroyOpticalFlowSessionNV) \ - MACRO(1, 0, FF_VK_EXT_OPTICAL_FLOW, GetPhysicalDeviceOpticalFlowImageFormatsNV)\ - \ - /* Shaders */ \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateShaderModule) \ - MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyShaderModule) \ - MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, CmdBindShadersEXT) \ - MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, CreateShadersEXT) \ - MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, DestroyShaderEXT) \ - MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, GetShaderBinaryDataEXT) - -/* Macro containing every win32 specific function that we utilize in our codebase */ -#define FN_LIST_WIN32(MACRO) \ - MACRO(1, 1, FF_VK_EXT_EXTERNAL_WIN32_SEM, GetSemaphoreWin32HandleKHR) \ - MACRO(1, 1, FF_VK_EXT_EXTERNAL_WIN32_MEMORY, GetMemoryWin32HandleKHR) - -/* Macro to turn a function name into a definition */ -#define PFN_DEF(req_inst, req_dev, ext_flag, name) \ - PFN_vk##name name; - -/* Structure with the definition of all listed functions */ -typedef struct FFVulkanFunctions { - FN_LIST(PFN_DEF) -#ifdef _WIN32 - FN_LIST_WIN32(PFN_DEF) -#endif -} FFVulkanFunctions; - -#endif /* AVUTIL_VULKAN_FUNCTIONS_H */ -- 2.49.1 From f59ede45385c6e962bb2eb42271160c38c033593 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:20:32 +0000 Subject: [PATCH 050/118] Changing vulkan file directory --- libavutil/vulkan/vulkan_functions.h | 283 ++++++++++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 libavutil/vulkan/vulkan_functions.h diff --git a/libavutil/vulkan/vulkan_functions.h b/libavutil/vulkan/vulkan_functions.h new file mode 100644 index 0000000000..2872e813e3 --- /dev/null +++ b/libavutil/vulkan/vulkan_functions.h @@ -0,0 +1,283 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_VULKAN_FUNCTIONS_H +#define AVUTIL_VULKAN_FUNCTIONS_H + +#define VK_NO_PROTOTYPES +#define VK_ENABLE_BETA_EXTENSIONS + +#include "libavutil/hwcontext.h" +#include "hwcontext_vulkan.h" + +/* An enum of bitflags for every optional extension we need */ +typedef uint64_t FFVulkanExtensions; + +/* NOTE: when adding new entries, make sure to update ff_vk_extensions_to_mask */ +#define FF_VK_EXT_EXTERNAL_DMABUF_MEMORY (1ULL << 0) /* VK_EXT_external_memory_dma_buf */ +#define FF_VK_EXT_DRM_MODIFIER_FLAGS (1ULL << 1) /* VK_EXT_image_drm_format_modifier */ +#define FF_VK_EXT_EXTERNAL_FD_MEMORY (1ULL << 2) /* VK_KHR_external_memory_fd */ +#define FF_VK_EXT_EXTERNAL_FD_SEM (1ULL << 3) /* VK_KHR_external_semaphore_fd */ +#define FF_VK_EXT_EXTERNAL_HOST_MEMORY (1ULL << 4) /* VK_EXT_external_memory_host */ +#define FF_VK_EXT_DEBUG_UTILS (1ULL << 5) /* VK_EXT_debug_utils */ + +#define FF_VK_EXT_EXTERNAL_WIN32_MEMORY (1ULL << 6) /* VK_KHR_external_memory_win32 */ +#define FF_VK_EXT_EXTERNAL_WIN32_SEM (1ULL << 7) /* VK_KHR_external_semaphore_win32 */ + +#define FF_VK_EXT_DESCRIPTOR_BUFFER (1ULL << 8) /* VK_EXT_descriptor_buffer */ +#define FF_VK_EXT_DEVICE_DRM (1ULL << 9) /* VK_EXT_physical_device_drm */ +#define FF_VK_EXT_ATOMIC_FLOAT (1ULL << 10) /* VK_EXT_shader_atomic_float */ +#define FF_VK_EXT_COOP_MATRIX (1ULL << 11) /* VK_KHR_cooperative_matrix */ +#define FF_VK_EXT_OPTICAL_FLOW (1ULL << 12) /* VK_NV_optical_flow */ +#define FF_VK_EXT_SHADER_OBJECT (1ULL << 13) /* VK_EXT_shader_object */ +#define FF_VK_EXT_PUSH_DESCRIPTOR (1ULL << 14) /* VK_KHR_push_descriptor */ +#define FF_VK_EXT_RELAXED_EXTENDED_INSTR (1ULL << 15) /* VK_KHR_shader_relaxed_extended_instruction */ +#define FF_VK_EXT_EXPECT_ASSUME (1ULL << 16) /* VK_KHR_shader_expect_assume */ +#define FF_VK_EXT_SUBGROUP_ROTATE (1ULL << 17) /* VK_KHR_shader_subgroup_rotate */ +#define FF_VK_EXT_HOST_IMAGE_COPY (1ULL << 18) /* VK_EXT_host_image_copy */ + +/* Video extensions */ +#define FF_VK_EXT_VIDEO_QUEUE (1ULL << 36) /* VK_KHR_video_queue */ +#define FF_VK_EXT_VIDEO_MAINTENANCE_1 (1ULL << 37) /* VK_KHR_video_maintenance1 */ +#define FF_VK_EXT_VIDEO_MAINTENANCE_2 (1ULL << 38) /* VK_KHR_video_maintenance2 */ + +#define FF_VK_EXT_VIDEO_DECODE_QUEUE (1ULL << 40) /* VK_KHR_video_decode_queue */ +#define FF_VK_EXT_VIDEO_DECODE_H264 (1ULL << 41) /* VK_KHR_video_decode_h264 */ +#define FF_VK_EXT_VIDEO_DECODE_H265 (1ULL << 42) /* VK_KHR_video_decode_h265 */ +#define FF_VK_EXT_VIDEO_DECODE_VP9 (1ULL << 43) /* VK_KHR_video_decode_vp9 */ +#define FF_VK_EXT_VIDEO_DECODE_AV1 (1ULL << 44) /* VK_KHR_video_decode_av1 */ + +#define FF_VK_EXT_VIDEO_ENCODE_QUEUE (1ULL << 50) /* VK_KHR_video_encode_queue */ +#define FF_VK_EXT_VIDEO_ENCODE_H264 (1ULL << 51) /* VK_KHR_video_encode_h264 */ +#define FF_VK_EXT_VIDEO_ENCODE_H265 (1ULL << 52) /* VK_KHR_video_encode_h265 */ +#define FF_VK_EXT_VIDEO_ENCODE_AV1 (1ULL << 53) /* VK_KHR_video_encode_av1 */ + +#define FF_VK_EXT_PORTABILITY_SUBSET (1ULL << 62) +#define FF_VK_EXT_NO_FLAG (1ULL << 63) + +/* Macro containing every function that we utilize in our codebase */ +#define FN_LIST(MACRO) \ + /* Instance */ \ + MACRO(0, 0, FF_VK_EXT_NO_FLAG, EnumerateInstanceExtensionProperties) \ + MACRO(0, 0, FF_VK_EXT_NO_FLAG, EnumerateInstanceLayerProperties) \ + MACRO(0, 0, FF_VK_EXT_NO_FLAG, CreateInstance) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyInstance) \ + \ + /* Debug */ \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDebugUtilsMessengerEXT) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDebugUtilsMessengerEXT) \ + \ + /* Device */ \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetDeviceProcAddr) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, CreateDevice) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFeatures2) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceExternalSemaphoreProperties) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoCapabilitiesKHR) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_QUEUE, GetPhysicalDeviceVideoFormatPropertiesKHR) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, DeviceWaitIdle) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, DestroyDevice) \ + \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, EnumeratePhysicalDevices) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, EnumerateDeviceExtensionProperties) \ + \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceProperties2) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceMemoryProperties) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceFormatProperties2) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceImageFormatProperties2) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties) \ + MACRO(1, 0, FF_VK_EXT_NO_FLAG, GetPhysicalDeviceQueueFamilyProperties2) \ + MACRO(1, 0, FF_VK_EXT_COOP_MATRIX, GetPhysicalDeviceCooperativeMatrixPropertiesKHR) \ + \ + /* Command pool */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateCommandPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyCommandPool) \ + \ + /* Command buffer */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, AllocateCommandBuffers) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, BeginCommandBuffer) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, EndCommandBuffer) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, FreeCommandBuffers) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdDispatch) \ + \ + /* Queue */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetDeviceQueue) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit2) \ + \ + /* Fences */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateFence) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, WaitForFences) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetFenceStatus) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, ResetFences) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyFence) \ + \ + /* Semaphores */ \ + MACRO(1, 1, FF_VK_EXT_EXTERNAL_FD_SEM, GetSemaphoreFdKHR) \ + MACRO(1, 1, FF_VK_EXT_EXTERNAL_FD_SEM, ImportSemaphoreFdKHR) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSemaphore) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, WaitSemaphores) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySemaphore) \ + \ + /* Memory */ \ + MACRO(1, 1, FF_VK_EXT_EXTERNAL_FD_MEMORY, GetMemoryFdKHR) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetMemoryFdPropertiesKHR) \ + MACRO(1, 1, FF_VK_EXT_EXTERNAL_HOST_MEMORY, GetMemoryHostPointerPropertiesEXT) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, AllocateMemory) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, MapMemory) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, FlushMappedMemoryRanges) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, InvalidateMappedMemoryRanges) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, UnmapMemory) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, FreeMemory) \ + \ + /* Commands */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBindDescriptorSets) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPushConstants) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBindPipeline) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBufferToImage) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyImageToBuffer) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdClearColorImage) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBuffer) \ + \ + /* Buffer */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateBuffer) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindBufferMemory) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferDeviceAddress) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdFillBuffer) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyBuffer) \ + \ + /* Image */ \ + MACRO(1, 1, FF_VK_EXT_DRM_MODIFIER_FLAGS, GetImageDrmFormatModifierPropertiesEXT) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetImageMemoryRequirements2) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateImage) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, BindImageMemory2) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetImageSubresourceLayout) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyImage) \ + \ + /* ImageView */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateImageView) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyImageView) \ + \ + /* DescriptorSet */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorSetLayout) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, AllocateDescriptorSets) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorSetLayout) \ + \ + /* Descriptor buffers */ \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutSizeEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorSetLayoutBindingOffsetEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, GetDescriptorEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdBindDescriptorBuffersEXT) \ + MACRO(1, 1, FF_VK_EXT_DESCRIPTOR_BUFFER, CmdSetDescriptorBufferOffsetsEXT) \ + \ + /* DescriptorUpdateTemplate */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \ + \ + /* Descriptors */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSets) \ + MACRO(1, 1, FF_VK_EXT_PUSH_DESCRIPTOR, CmdPushDescriptorSetKHR) \ + \ + /* Queries */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetQueryPoolResults) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, ResetQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdBeginQuery) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdEndQuery) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdResetQueryPool) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyQueryPool) \ + \ + /* sync2 */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier2) \ + \ + /* Host image copy */ \ + MACRO(1, 1, FF_VK_EXT_HOST_IMAGE_COPY, TransitionImageLayoutEXT) \ + MACRO(1, 1, FF_VK_EXT_HOST_IMAGE_COPY, CopyMemoryToImageEXT) \ + MACRO(1, 1, FF_VK_EXT_HOST_IMAGE_COPY, CopyImageToMemoryEXT) \ + \ + /* Video queue */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CreateVideoSessionParametersKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, GetVideoSessionMemoryRequirementsKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, BindVideoSessionMemoryKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdBeginVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdControlVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, CmdEndVideoCodingKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionParametersKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_QUEUE, DestroyVideoSessionKHR) \ + \ + /* Video decoding */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_DECODE_QUEUE, CmdDecodeVideoKHR) \ + \ + /* Video encoding */ \ + MACRO(1, 1, FF_VK_EXT_VIDEO_ENCODE_QUEUE, CmdEncodeVideoKHR) \ + MACRO(1, 1, FF_VK_EXT_VIDEO_ENCODE_QUEUE, GetEncodedVideoSessionParametersKHR) \ + MACRO(1, 0, FF_VK_EXT_VIDEO_ENCODE_QUEUE, GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR) \ + \ + /* Pipeline */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreatePipelineLayout) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipelineLayout) \ + \ + /* PipelineLayout */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateComputePipelines) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyPipeline) \ + \ + /* Sampler */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSamplerYcbcrConversion) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySamplerYcbcrConversion) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateSampler) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroySampler) \ + \ + /* Optical flow */ \ + MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, BindOpticalFlowSessionImageNV) \ + MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, CmdOpticalFlowExecuteNV) \ + MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, CreateOpticalFlowSessionNV) \ + MACRO(1, 1, FF_VK_EXT_OPTICAL_FLOW, DestroyOpticalFlowSessionNV) \ + MACRO(1, 0, FF_VK_EXT_OPTICAL_FLOW, GetPhysicalDeviceOpticalFlowImageFormatsNV)\ + \ + /* Shaders */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateShaderModule) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyShaderModule) \ + MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, CmdBindShadersEXT) \ + MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, CreateShadersEXT) \ + MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, DestroyShaderEXT) \ + MACRO(1, 1, FF_VK_EXT_SHADER_OBJECT, GetShaderBinaryDataEXT) + +/* Macro containing every win32 specific function that we utilize in our codebase */ +#define FN_LIST_WIN32(MACRO) \ + MACRO(1, 1, FF_VK_EXT_EXTERNAL_WIN32_SEM, GetSemaphoreWin32HandleKHR) \ + MACRO(1, 1, FF_VK_EXT_EXTERNAL_WIN32_MEMORY, GetMemoryWin32HandleKHR) + +/* Macro to turn a function name into a definition */ +#define PFN_DEF(req_inst, req_dev, ext_flag, name) \ + PFN_vk##name name; + +/* Structure with the definition of all listed functions */ +typedef struct FFVulkanFunctions { + FN_LIST(PFN_DEF) +#ifdef _WIN32 + FN_LIST_WIN32(PFN_DEF) +#endif +} FFVulkanFunctions; + +#endif /* AVUTIL_VULKAN_FUNCTIONS_H */ -- 2.49.1 From e94ad64fe640af98f32aebc1ebf9a84d997a8b78 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:20:52 +0000 Subject: [PATCH 051/118] Changing vulkan file directory --- libavutil/vulkan_shaderc.c | 144 ------------------------------------- 1 file changed, 144 deletions(-) delete mode 100644 libavutil/vulkan_shaderc.c diff --git a/libavutil/vulkan_shaderc.c b/libavutil/vulkan_shaderc.c deleted file mode 100644 index e563b642df..0000000000 --- a/libavutil/vulkan_shaderc.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <shaderc/shaderc.h> - -#include "libavutil/mem.h" -#include "vulkan_spirv.h" - -static int shdc_shader_compile(FFVulkanContext *s, FFVkSPIRVCompiler *ctx, - FFVulkanShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, - void **opaque) -{ - int loglevel, err, warn, ret; - const char *status, *message; - shaderc_compilation_result_t res; - static const char *shdc_result[] = { - [shaderc_compilation_status_success] = "success", - [shaderc_compilation_status_invalid_stage] = "invalid stage", - [shaderc_compilation_status_compilation_error] = "error", - [shaderc_compilation_status_internal_error] = "internal error", - [shaderc_compilation_status_null_result_object] = "no result", - [shaderc_compilation_status_invalid_assembly] = "invalid assembly", - }; - static const shaderc_shader_kind shdc_kind[] = { - [VK_SHADER_STAGE_VERTEX_BIT] = shaderc_glsl_vertex_shader, - [VK_SHADER_STAGE_FRAGMENT_BIT] = shaderc_glsl_fragment_shader, - [VK_SHADER_STAGE_COMPUTE_BIT] = shaderc_glsl_compute_shader, - [VK_SHADER_STAGE_MESH_BIT_EXT] = shaderc_mesh_shader, - [VK_SHADER_STAGE_TASK_BIT_EXT] = shaderc_task_shader, - [VK_SHADER_STAGE_RAYGEN_BIT_KHR] = shaderc_raygen_shader, - [VK_SHADER_STAGE_ANY_HIT_BIT_KHR] = shaderc_anyhit_shader, - [VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR] = shaderc_closesthit_shader, - [VK_SHADER_STAGE_MISS_BIT_KHR] = shaderc_miss_shader, - [VK_SHADER_STAGE_INTERSECTION_BIT_KHR] = shaderc_intersection_shader, - [VK_SHADER_STAGE_CALLABLE_BIT_KHR] = shaderc_callable_shader, - }; - - shaderc_compile_options_t opts = shaderc_compile_options_initialize(); - *opaque = NULL; - if (!opts) - return AVERROR(ENOMEM); - - shaderc_compile_options_set_target_env(opts, shaderc_target_env_vulkan, - shaderc_env_version_vulkan_1_3); - shaderc_compile_options_set_target_spirv(opts, shaderc_spirv_version_1_6); - - /* If either extension is set, turn on debug info */ - if (s->extensions & (FF_VK_EXT_DEBUG_UTILS | FF_VK_EXT_RELAXED_EXTENDED_INSTR)) - shaderc_compile_options_set_generate_debug_info(opts); - - if (s->extensions & FF_VK_EXT_DEBUG_UTILS) - shaderc_compile_options_set_optimization_level(opts, - shaderc_optimization_level_zero); - else - shaderc_compile_options_set_optimization_level(opts, - shaderc_optimization_level_performance); - - res = shaderc_compile_into_spv((shaderc_compiler_t)ctx->priv, - shd->src.str, strlen(shd->src.str), - shdc_kind[shd->stage], - shd->name, entrypoint, opts); - shaderc_compile_options_release(opts); - - ret = shaderc_result_get_compilation_status(res); - err = shaderc_result_get_num_errors(res); - warn = shaderc_result_get_num_warnings(res); - message = shaderc_result_get_error_message(res); - - if (ret != shaderc_compilation_status_success && !err) - err = 1; - - loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_TRACE; - - ff_vk_shader_print(s, shd, loglevel); - if (message && (err || warn)) - av_log(s, loglevel, "%s\n", message); - status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown"; - av_log(s, loglevel, "shaderc compile status '%s' (%d errors, %d warnings)\n", - status, err, warn); - - if (err > 0) - return AVERROR(EINVAL); - - *data = (uint8_t *)shaderc_result_get_bytes(res); - *size = shaderc_result_get_length(res); - *opaque = res; - - return 0; -} - -static void shdc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) -{ - if (!opaque || !*opaque) - return; - - shaderc_result_release((shaderc_compilation_result_t)*opaque); - *opaque = NULL; -} - -static void shdc_uninit(FFVkSPIRVCompiler **ctx) -{ - FFVkSPIRVCompiler *s; - - if (!ctx || !*ctx) - return; - - s = *ctx; - - shaderc_compiler_release((shaderc_compiler_t)s->priv); - av_freep(ctx); -} - -FFVkSPIRVCompiler *ff_vk_shaderc_init(void) -{ - FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); - if (!ret) - return NULL; - - ret->compile_shader = shdc_shader_compile; - ret->free_shader = shdc_shader_free; - ret->uninit = shdc_uninit; - - ret->priv = (void *)shaderc_compiler_initialize(); - if (!ret->priv) - av_freep(&ret); - - return ret; -} -- 2.49.1 From 6db4f480449a581d1cb03fe6e37f6e84f9f39b91 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:21:17 +0000 Subject: [PATCH 052/118] Changing vulkan file directory --- libavutil/vulkan/vulkan_shaderc.c | 144 ++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 libavutil/vulkan/vulkan_shaderc.c diff --git a/libavutil/vulkan/vulkan_shaderc.c b/libavutil/vulkan/vulkan_shaderc.c new file mode 100644 index 0000000000..e563b642df --- /dev/null +++ b/libavutil/vulkan/vulkan_shaderc.c @@ -0,0 +1,144 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <shaderc/shaderc.h> + +#include "libavutil/mem.h" +#include "vulkan_spirv.h" + +static int shdc_shader_compile(FFVulkanContext *s, FFVkSPIRVCompiler *ctx, + FFVulkanShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, + void **opaque) +{ + int loglevel, err, warn, ret; + const char *status, *message; + shaderc_compilation_result_t res; + static const char *shdc_result[] = { + [shaderc_compilation_status_success] = "success", + [shaderc_compilation_status_invalid_stage] = "invalid stage", + [shaderc_compilation_status_compilation_error] = "error", + [shaderc_compilation_status_internal_error] = "internal error", + [shaderc_compilation_status_null_result_object] = "no result", + [shaderc_compilation_status_invalid_assembly] = "invalid assembly", + }; + static const shaderc_shader_kind shdc_kind[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = shaderc_glsl_vertex_shader, + [VK_SHADER_STAGE_FRAGMENT_BIT] = shaderc_glsl_fragment_shader, + [VK_SHADER_STAGE_COMPUTE_BIT] = shaderc_glsl_compute_shader, + [VK_SHADER_STAGE_MESH_BIT_EXT] = shaderc_mesh_shader, + [VK_SHADER_STAGE_TASK_BIT_EXT] = shaderc_task_shader, + [VK_SHADER_STAGE_RAYGEN_BIT_KHR] = shaderc_raygen_shader, + [VK_SHADER_STAGE_ANY_HIT_BIT_KHR] = shaderc_anyhit_shader, + [VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR] = shaderc_closesthit_shader, + [VK_SHADER_STAGE_MISS_BIT_KHR] = shaderc_miss_shader, + [VK_SHADER_STAGE_INTERSECTION_BIT_KHR] = shaderc_intersection_shader, + [VK_SHADER_STAGE_CALLABLE_BIT_KHR] = shaderc_callable_shader, + }; + + shaderc_compile_options_t opts = shaderc_compile_options_initialize(); + *opaque = NULL; + if (!opts) + return AVERROR(ENOMEM); + + shaderc_compile_options_set_target_env(opts, shaderc_target_env_vulkan, + shaderc_env_version_vulkan_1_3); + shaderc_compile_options_set_target_spirv(opts, shaderc_spirv_version_1_6); + + /* If either extension is set, turn on debug info */ + if (s->extensions & (FF_VK_EXT_DEBUG_UTILS | FF_VK_EXT_RELAXED_EXTENDED_INSTR)) + shaderc_compile_options_set_generate_debug_info(opts); + + if (s->extensions & FF_VK_EXT_DEBUG_UTILS) + shaderc_compile_options_set_optimization_level(opts, + shaderc_optimization_level_zero); + else + shaderc_compile_options_set_optimization_level(opts, + shaderc_optimization_level_performance); + + res = shaderc_compile_into_spv((shaderc_compiler_t)ctx->priv, + shd->src.str, strlen(shd->src.str), + shdc_kind[shd->stage], + shd->name, entrypoint, opts); + shaderc_compile_options_release(opts); + + ret = shaderc_result_get_compilation_status(res); + err = shaderc_result_get_num_errors(res); + warn = shaderc_result_get_num_warnings(res); + message = shaderc_result_get_error_message(res); + + if (ret != shaderc_compilation_status_success && !err) + err = 1; + + loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_TRACE; + + ff_vk_shader_print(s, shd, loglevel); + if (message && (err || warn)) + av_log(s, loglevel, "%s\n", message); + status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown"; + av_log(s, loglevel, "shaderc compile status '%s' (%d errors, %d warnings)\n", + status, err, warn); + + if (err > 0) + return AVERROR(EINVAL); + + *data = (uint8_t *)shaderc_result_get_bytes(res); + *size = shaderc_result_get_length(res); + *opaque = res; + + return 0; +} + +static void shdc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) +{ + if (!opaque || !*opaque) + return; + + shaderc_result_release((shaderc_compilation_result_t)*opaque); + *opaque = NULL; +} + +static void shdc_uninit(FFVkSPIRVCompiler **ctx) +{ + FFVkSPIRVCompiler *s; + + if (!ctx || !*ctx) + return; + + s = *ctx; + + shaderc_compiler_release((shaderc_compiler_t)s->priv); + av_freep(ctx); +} + +FFVkSPIRVCompiler *ff_vk_shaderc_init(void) +{ + FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); + if (!ret) + return NULL; + + ret->compile_shader = shdc_shader_compile; + ret->free_shader = shdc_shader_free; + ret->uninit = shdc_uninit; + + ret->priv = (void *)shaderc_compiler_initialize(); + if (!ret->priv) + av_freep(&ret); + + return ret; +} -- 2.49.1 From e3bd7cc4a2f9b58a29f110664ff178a820879fac Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:21:39 +0000 Subject: [PATCH 053/118] Changing vulkan file directory --- libavutil/vulkan_loader.h | 189 -------------------------------------- 1 file changed, 189 deletions(-) delete mode 100644 libavutil/vulkan_loader.h diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h deleted file mode 100644 index 3207511663..0000000000 --- a/libavutil/vulkan_loader.h +++ /dev/null @@ -1,189 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVUTIL_VULKAN_LOADER_H -#define AVUTIL_VULKAN_LOADER_H - -#include <stdio.h> - -#include "avassert.h" -#include "vulkan_functions.h" - -/* Macro to turn a function name into a loader struct */ -#define PFN_LOAD_INFO(req_inst, req_dev, ext_flag, name) \ - { \ - req_inst, \ - req_dev, \ - offsetof(FFVulkanFunctions, name), \ - ext_flag, \ - }, - -static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, - int nb_extensions) -{ - static const struct ExtensionMap { - const char *name; - FFVulkanExtensions flag; - } extension_map[] = { - /* Instance extensions */ - { VK_EXT_DEBUG_UTILS_EXTENSION_NAME, FF_VK_EXT_DEBUG_UTILS }, - - /* Device extensions */ -#ifdef VK_KHR_shader_relaxed_extended_instruction - { VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME, FF_VK_EXT_RELAXED_EXTENDED_INSTR }, -#endif - { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY }, - { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS }, - { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, - { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM }, - { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY }, - { VK_EXT_DEBUG_UTILS_EXTENSION_NAME, FF_VK_EXT_DEBUG_UTILS }, - { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, - { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, - { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, - { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW }, - { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, - { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE }, - { VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME, FF_VK_EXT_HOST_IMAGE_COPY }, - { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 }, -#ifdef VK_KHR_video_maintenance2 - { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 }, -#endif -#ifdef _WIN32 - { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, - { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, -#endif - { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, - { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, - { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, - { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, - { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, - { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, - { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, - { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, -#ifdef VK_KHR_video_decode_vp9 - { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_VP9 }, -#endif - { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 }, - { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR }, -#ifdef VK_KHR_shader_expect_assume - { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME }, -#endif -#ifdef VK_KHR_video_encode_av1 - { VK_KHR_VIDEO_ENCODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_AV1 }, -#endif - }; - - FFVulkanExtensions mask = 0x0; - - for (int i = 0; i < nb_extensions; i++) { - for (int j = 0; j < FF_ARRAY_ELEMS(extension_map); j++) { - if (!strcmp(extensions[i], extension_map[j].name)) { - mask |= extension_map[j].flag; - continue; - } - } - } - - return mask; -} - -/** - * Function loader. - * Vulkan function from scratch loading happens in 3 stages - the first one - * is before any initialization has happened, and you have neither an instance - * structure nor a device structure. At this stage, you can only get the bare - * minimals to initialize an instance. - * The second stage is when you have an instance. At this stage, you can - * initialize a VkDevice, and have an idea of what extensions each device - * supports. - * Finally, in the third stage, you can proceed and load all core functions, - * plus you can be sure that any extensions you've enabled during device - * initialization will be available. - */ -static inline int ff_vk_load_functions(AVHWDeviceContext *ctx, - FFVulkanFunctions *vk, - uint64_t extensions_mask, - int has_inst, int has_dev) -{ - AVVulkanDeviceContext *hwctx = ctx->hwctx; - - static const struct FunctionLoadInfo { - char req_inst; - char req_dev; - uint16_t struct_offset; - FFVulkanExtensions ext_flag; - } vk_load_info[] = { - FN_LIST(PFN_LOAD_INFO) -#ifdef _WIN32 - FN_LIST_WIN32(PFN_LOAD_INFO) -#endif - }; - // Concatenate the names to avoid relocations. The resulting string - // will end with \0\0 -#define FUNC_NAME(req_inst, req_dev, ext_flag, name) "vk"#name"\0" - const char *name = - FN_LIST(FUNC_NAME) -#ifdef _WIN32 - FN_LIST_WIN32(FUNC_NAME) -#endif - ; -#undef FUNC_NAME - - for (int i = 0; i < FF_ARRAY_ELEMS(vk_load_info); name += strlen(name) + 1, i++) { - const struct FunctionLoadInfo *load = &vk_load_info[i]; - static const char extensions[][4] = { "", "EXT", "KHR" }; - PFN_vkVoidFunction fn; - - if (load->req_dev && !has_dev) - continue; - if (load->req_inst && !has_inst) - continue; - - for (int j = 0; j < FF_ARRAY_ELEMS(extensions); j++) { - char ext_name[128]; - av_unused int n; - - n = snprintf(ext_name, sizeof(ext_name), "%s%s", name, extensions[j]); - av_assert1(n < sizeof(ext_name)); - - if (load->req_dev) - fn = vk->GetDeviceProcAddr(hwctx->act_dev, ext_name); - else if (load->req_inst) - fn = hwctx->get_proc_addr(hwctx->inst, ext_name); - else - fn = hwctx->get_proc_addr(NULL, ext_name); - - if (fn) - break; - } - - if (!fn && ((extensions_mask &~ FF_VK_EXT_NO_FLAG) & load->ext_flag)) { - av_log(ctx, AV_LOG_ERROR, "Loader error, function \"%s\" indicated " - "as supported, but got NULL function pointer!\n", name); - return AVERROR_EXTERNAL; - } - - *(PFN_vkVoidFunction *)((uint8_t *)vk + load->struct_offset) = fn; - } - av_assert1(*name == '\0'); - - return 0; -} - -#endif /* AVUTIL_VULKAN_LOADER_H */ -- 2.49.1 From 65de1208860ccdeb016a365f60942aecb68b272b Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:22:07 +0000 Subject: [PATCH 054/118] Changing vulkan file directory --- libavutil/vulkan/vulkan_loader.h | 189 +++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 libavutil/vulkan/vulkan_loader.h diff --git a/libavutil/vulkan/vulkan_loader.h b/libavutil/vulkan/vulkan_loader.h new file mode 100644 index 0000000000..7d0f4bee3b --- /dev/null +++ b/libavutil/vulkan/vulkan_loader.h @@ -0,0 +1,189 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_VULKAN_LOADER_H +#define AVUTIL_VULKAN_LOADER_H + +#include <stdio.h> + +#include "libavutil/avassert.h" +#include "vulkan_functions.h" + +/* Macro to turn a function name into a loader struct */ +#define PFN_LOAD_INFO(req_inst, req_dev, ext_flag, name) \ + { \ + req_inst, \ + req_dev, \ + offsetof(FFVulkanFunctions, name), \ + ext_flag, \ + }, + +static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, + int nb_extensions) +{ + static const struct ExtensionMap { + const char *name; + FFVulkanExtensions flag; + } extension_map[] = { + /* Instance extensions */ + { VK_EXT_DEBUG_UTILS_EXTENSION_NAME, FF_VK_EXT_DEBUG_UTILS }, + + /* Device extensions */ +#ifdef VK_KHR_shader_relaxed_extended_instruction + { VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME, FF_VK_EXT_RELAXED_EXTENDED_INSTR }, +#endif + { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_DMABUF_MEMORY }, + { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, FF_VK_EXT_DRM_MODIFIER_FLAGS }, + { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_MEMORY }, + { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_FD_SEM }, + { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_HOST_MEMORY }, + { VK_EXT_DEBUG_UTILS_EXTENSION_NAME, FF_VK_EXT_DEBUG_UTILS }, + { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, + { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, + { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, + { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW }, + { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, + { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME, FF_VK_EXT_SUBGROUP_ROTATE }, + { VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME, FF_VK_EXT_HOST_IMAGE_COPY }, + { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 }, +#ifdef VK_KHR_video_maintenance2 + { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 }, +#endif +#ifdef _WIN32 + { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY }, + { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM }, +#endif + { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, + { VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_QUEUE }, + { VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_QUEUE }, + { VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_QUEUE }, + { VK_KHR_VIDEO_ENCODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H264 }, + { VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H264 }, + { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, + { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, +#ifdef VK_KHR_video_decode_vp9 + { VK_KHR_VIDEO_DECODE_VP9_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_VP9 }, +#endif + { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 }, + { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR }, +#ifdef VK_KHR_shader_expect_assume + { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME }, +#endif +#ifdef VK_KHR_video_encode_av1 + { VK_KHR_VIDEO_ENCODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_AV1 }, +#endif + }; + + FFVulkanExtensions mask = 0x0; + + for (int i = 0; i < nb_extensions; i++) { + for (int j = 0; j < FF_ARRAY_ELEMS(extension_map); j++) { + if (!strcmp(extensions[i], extension_map[j].name)) { + mask |= extension_map[j].flag; + continue; + } + } + } + + return mask; +} + +/** + * Function loader. + * Vulkan function from scratch loading happens in 3 stages - the first one + * is before any initialization has happened, and you have neither an instance + * structure nor a device structure. At this stage, you can only get the bare + * minimals to initialize an instance. + * The second stage is when you have an instance. At this stage, you can + * initialize a VkDevice, and have an idea of what extensions each device + * supports. + * Finally, in the third stage, you can proceed and load all core functions, + * plus you can be sure that any extensions you've enabled during device + * initialization will be available. + */ +static inline int ff_vk_load_functions(AVHWDeviceContext *ctx, + FFVulkanFunctions *vk, + uint64_t extensions_mask, + int has_inst, int has_dev) +{ + AVVulkanDeviceContext *hwctx = ctx->hwctx; + + static const struct FunctionLoadInfo { + char req_inst; + char req_dev; + uint16_t struct_offset; + FFVulkanExtensions ext_flag; + } vk_load_info[] = { + FN_LIST(PFN_LOAD_INFO) +#ifdef _WIN32 + FN_LIST_WIN32(PFN_LOAD_INFO) +#endif + }; + // Concatenate the names to avoid relocations. The resulting string + // will end with \0\0 +#define FUNC_NAME(req_inst, req_dev, ext_flag, name) "vk"#name"\0" + const char *name = + FN_LIST(FUNC_NAME) +#ifdef _WIN32 + FN_LIST_WIN32(FUNC_NAME) +#endif + ; +#undef FUNC_NAME + + for (int i = 0; i < FF_ARRAY_ELEMS(vk_load_info); name += strlen(name) + 1, i++) { + const struct FunctionLoadInfo *load = &vk_load_info[i]; + static const char extensions[][4] = { "", "EXT", "KHR" }; + PFN_vkVoidFunction fn; + + if (load->req_dev && !has_dev) + continue; + if (load->req_inst && !has_inst) + continue; + + for (int j = 0; j < FF_ARRAY_ELEMS(extensions); j++) { + char ext_name[128]; + av_unused int n; + + n = snprintf(ext_name, sizeof(ext_name), "%s%s", name, extensions[j]); + av_assert1(n < sizeof(ext_name)); + + if (load->req_dev) + fn = vk->GetDeviceProcAddr(hwctx->act_dev, ext_name); + else if (load->req_inst) + fn = hwctx->get_proc_addr(hwctx->inst, ext_name); + else + fn = hwctx->get_proc_addr(NULL, ext_name); + + if (fn) + break; + } + + if (!fn && ((extensions_mask &~ FF_VK_EXT_NO_FLAG) & load->ext_flag)) { + av_log(ctx, AV_LOG_ERROR, "Loader error, function \"%s\" indicated " + "as supported, but got NULL function pointer!\n", name); + return AVERROR_EXTERNAL; + } + + *(PFN_vkVoidFunction *)((uint8_t *)vk + load->struct_offset) = fn; + } + av_assert1(*name == '\0'); + + return 0; +} + +#endif /* AVUTIL_VULKAN_LOADER_H */ -- 2.49.1 From eeeedaf0a99b28f28fe9dc3fcf4d53da2f8a6768 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:22:46 +0000 Subject: [PATCH 055/118] Changing vulkan file directory --- libavutil/vulkan_spirv.h | 44 ---------------------------------------- 1 file changed, 44 deletions(-) delete mode 100644 libavutil/vulkan_spirv.h diff --git a/libavutil/vulkan_spirv.h b/libavutil/vulkan_spirv.h deleted file mode 100644 index c13b50f8e7..0000000000 --- a/libavutil/vulkan_spirv.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVUTIL_VULKAN_SPIRV_H -#define AVUTIL_VULKAN_SPIRV_H - -#include "vulkan.h" - -#include "config.h" - -typedef struct FFVkSPIRVCompiler { - void *priv; - int (*compile_shader)(FFVulkanContext *s, struct FFVkSPIRVCompiler *ctx, - FFVulkanShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, void **opaque); - void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); - void (*uninit)(struct FFVkSPIRVCompiler **ctx); -} FFVkSPIRVCompiler; - -#if CONFIG_LIBGLSLANG -FFVkSPIRVCompiler *ff_vk_glslang_init(void); -#define ff_vk_spirv_init ff_vk_glslang_init -#endif -#if CONFIG_LIBSHADERC -FFVkSPIRVCompiler *ff_vk_shaderc_init(void); -#define ff_vk_spirv_init ff_vk_shaderc_init -#endif - -#endif /* AVUTIL_VULKAN_H */ -- 2.49.1 From 88912f077961397f1795b0204800d1af01c26831 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:23:12 +0000 Subject: [PATCH 056/118] Changing vulkan file directory --- libavutil/vulkan/vulkan_spirv.h | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 libavutil/vulkan/vulkan_spirv.h diff --git a/libavutil/vulkan/vulkan_spirv.h b/libavutil/vulkan/vulkan_spirv.h new file mode 100644 index 0000000000..7797ae4d79 --- /dev/null +++ b/libavutil/vulkan/vulkan_spirv.h @@ -0,0 +1,44 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_VULKAN_SPIRV_H +#define AVUTIL_VULKAN_SPIRV_H + +#include "vulkan.h" + +#include "libavutil/config.h" + +typedef struct FFVkSPIRVCompiler { + void *priv; + int (*compile_shader)(FFVulkanContext *s, struct FFVkSPIRVCompiler *ctx, + FFVulkanShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, void **opaque); + void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); + void (*uninit)(struct FFVkSPIRVCompiler **ctx); +} FFVkSPIRVCompiler; + +#if CONFIG_LIBGLSLANG +FFVkSPIRVCompiler *ff_vk_glslang_init(void); +#define ff_vk_spirv_init ff_vk_glslang_init +#endif +#if CONFIG_LIBSHADERC +FFVkSPIRVCompiler *ff_vk_shaderc_init(void); +#define ff_vk_spirv_init ff_vk_shaderc_init +#endif + +#endif /* AVUTIL_VULKAN_H */ -- 2.49.1 From f76c060e15f29cb8f328720ea613e24029f1925a Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:24:25 +0000 Subject: [PATCH 057/118] Changing vulkan file directory --- libavcodec/ffv1_vulkan.c | 123 --------------------------------------- 1 file changed, 123 deletions(-) delete mode 100644 libavcodec/ffv1_vulkan.c diff --git a/libavcodec/ffv1_vulkan.c b/libavcodec/ffv1_vulkan.c deleted file mode 100644 index 6f49e2ebb1..0000000000 --- a/libavcodec/ffv1_vulkan.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2025 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "ffv1_vulkan.h" -#include "libavutil/crc.h" - -int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f) -{ - int err; - uint8_t *buf_mapped; - - RET(ff_vk_map_buffer(s, vkb, &buf_mapped, 0)); - - for (int i = 1; i < 256; i++) { - buf_mapped[256 + i] = f->state_transition[i]; - buf_mapped[256 - i] = 256 - (int)f->state_transition[i]; - } - - RET(ff_vk_unmap_buffer(s, vkb, 1)); - -fail: - return err; -} - -static int init_state_transition_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f, - int (*write_data)(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f)) -{ - int err; - size_t buf_len = 512*sizeof(uint8_t); - - RET(ff_vk_create_buf(s, vkb, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - - write_data(s, vkb, f); - -fail: - return err; -} - -int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f) -{ - return init_state_transition_data(s, vkb, f, - ff_ffv1_vk_update_state_transition_data); -} - -int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f) -{ - int err; - - int16_t *buf_mapped; - size_t buf_len = MAX_QUANT_TABLES* - MAX_CONTEXT_INPUTS* - MAX_QUANT_TABLE_SIZE*sizeof(int16_t); - - RET(ff_vk_create_buf(s, vkb, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0)); - - memcpy(buf_mapped, f->quant_tables, - sizeof(f->quant_tables)); - - RET(ff_vk_unmap_buffer(s, vkb, 1)); - -fail: - return err; -} - -int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f) -{ - int err; - - uint32_t *buf_mapped; - size_t buf_len = 256*sizeof(int32_t); - - RET(ff_vk_create_buf(s, vkb, - buf_len, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0)); - - memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len); - - RET(ff_vk_unmap_buffer(s, vkb, 1)); - -fail: - return err; -} -- 2.49.1 From 57b1aa48e9fe9372b7198102a96ab46e474500ee Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:25:02 +0000 Subject: [PATCH 058/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_vulkan.c | 123 ++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 libavcodec/vulkan/ffv1_vulkan.c diff --git a/libavcodec/vulkan/ffv1_vulkan.c b/libavcodec/vulkan/ffv1_vulkan.c new file mode 100644 index 0000000000..6f49e2ebb1 --- /dev/null +++ b/libavcodec/vulkan/ffv1_vulkan.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2025 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "ffv1_vulkan.h" +#include "libavutil/crc.h" + +int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + uint8_t *buf_mapped; + + RET(ff_vk_map_buffer(s, vkb, &buf_mapped, 0)); + + for (int i = 1; i < 256; i++) { + buf_mapped[256 + i] = f->state_transition[i]; + buf_mapped[256 - i] = 256 - (int)f->state_transition[i]; + } + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} + +static int init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f, + int (*write_data)(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f)) +{ + int err; + size_t buf_len = 512*sizeof(uint8_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + + write_data(s, vkb, f); + +fail: + return err; +} + +int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + return init_state_transition_data(s, vkb, f, + ff_ffv1_vk_update_state_transition_data); +} + +int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + + int16_t *buf_mapped; + size_t buf_len = MAX_QUANT_TABLES* + MAX_CONTEXT_INPUTS* + MAX_QUANT_TABLE_SIZE*sizeof(int16_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0)); + + memcpy(buf_mapped, f->quant_tables, + sizeof(f->quant_tables)); + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} + +int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f) +{ + int err; + + uint32_t *buf_mapped; + size_t buf_len = 256*sizeof(int32_t); + + RET(ff_vk_create_buf(s, vkb, + buf_len, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(s, vkb, (void *)&buf_mapped, 0)); + + memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len); + + RET(ff_vk_unmap_buffer(s, vkb, 1)); + +fail: + return err; +} -- 2.49.1 From b1f6c33c3e839daa20b777282a8502b660f61c12 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:25:28 +0000 Subject: [PATCH 059/118] Changing vulkan file directory --- libavcodec/ffv1enc_vulkan.c | 1848 ----------------------------------- 1 file changed, 1848 deletions(-) delete mode 100644 libavcodec/ffv1enc_vulkan.c diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c deleted file mode 100644 index 259bc75d4c..0000000000 --- a/libavcodec/ffv1enc_vulkan.c +++ /dev/null @@ -1,1848 +0,0 @@ -/* - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/mem.h" -#include "libavutil/vulkan.h" -#include "libavutil/vulkan_spirv.h" - -#include "avcodec.h" -#include "internal.h" -#include "hwconfig.h" -#include "encode.h" -#include "libavutil/opt.h" -#include "codec_internal.h" - -#include "ffv1.h" -#include "ffv1enc.h" -#include "ffv1_vulkan.h" - -/* Parallel Golomb alignment */ -#define LG_ALIGN_W 32 -#define LG_ALIGN_H 32 - -/* Unlike the decoder, we need 4 lines (but really only 3) */ -#define RGB_LINECACHE 4 - -typedef struct VulkanEncodeFFv1FrameData { - /* Output data */ - AVBufferRef *out_data_ref; - - /* Results data */ - AVBufferRef *results_data_ref; - - /* Copied from the source */ - int64_t pts; - int64_t duration; - void *frame_opaque; - AVBufferRef *frame_opaque_ref; - - int key_frame; -} VulkanEncodeFFv1FrameData; - -typedef struct VulkanEncodeFFv1Context { - FFV1Context ctx; - AVFrame *frame; - - FFVulkanContext s; - AVVulkanDeviceQueueFamily *qf; - FFVkExecPool exec_pool; - - AVVulkanDeviceQueueFamily *transfer_qf; - FFVkExecPool transfer_exec_pool; - - VkBufferCopy *buf_regions; - VulkanEncodeFFv1FrameData *exec_ctx_info; - int in_flight; - int async_depth; - size_t max_heap_size; - - FFVulkanShader setup; - FFVulkanShader rct_search; - FFVulkanShader reset; - FFVulkanShader enc; - - /* Constant read-only buffers */ - FFVkBuffer quant_buf; - FFVkBuffer rangecoder_static_buf; - FFVkBuffer crc_tab_buf; - - /* Slice data buffer pool */ - AVBufferPool *slice_data_pool; - AVBufferRef *keyframe_slice_data_ref; - - /* Output data buffer */ - AVBufferPool *out_data_pool; - - /* Slice results buffer */ - AVBufferPool *results_data_pool; - - /* Intermediate frame pool */ - AVBufferRef *intermediate_frames_ref; - - /* Representation mode */ - enum FFVkShaderRepFormat rep_fmt; - - int num_h_slices; - int num_v_slices; - int force_pcm; - int optimize_rct; - - int is_rgb; - int ppi; - int chunks; -} VulkanEncodeFFv1Context; - -extern const char *ff_source_common_comp; -extern const char *ff_source_rangecoder_comp; -extern const char *ff_source_ffv1_vlc_comp; -extern const char *ff_source_ffv1_common_comp; -extern const char *ff_source_ffv1_reset_comp; -extern const char *ff_source_ffv1_rct_search_comp; -extern const char *ff_source_ffv1_enc_setup_comp; -extern const char *ff_source_ffv1_enc_comp; - -typedef struct FFv1VkParameters { - VkDeviceAddress slice_state; - VkDeviceAddress scratch_data; - VkDeviceAddress out_data; - - int32_t fmt_lut[4]; - int32_t sar[2]; - uint32_t chroma_shift[2]; - - uint32_t plane_state_size; - uint32_t context_count; - uint32_t crcref; - uint32_t slice_size_max; - int rct_offset; - - uint8_t extend_lookup[8]; - uint8_t bits_per_raw_sample; - uint8_t context_model; - uint8_t version; - uint8_t micro_version; - uint8_t force_pcm; - uint8_t key_frame; - uint8_t components; - uint8_t planes; - uint8_t codec_planes; - uint8_t planar_rgb; - uint8_t transparency; - uint8_t colorspace; - uint8_t pic_mode; - uint8_t ec; - uint8_t ppi; - uint8_t chunks; - uint8_t rct_search; - uint8_t padding[3]; -} FFv1VkParameters; - -static void add_push_data(FFVulkanShader *shd) -{ - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf slice_state; ); - GLSLC(1, u8buf scratch_data; ); - GLSLC(1, u8buf out_data; ); - GLSLC(0, ); - GLSLC(1, ivec4 fmt_lut; ); - GLSLC(1, ivec2 sar; ); - GLSLC(1, uvec2 chroma_shift; ); - GLSLC(0, ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint context_count; ); - GLSLC(1, uint32_t crcref; ); - GLSLC(1, uint32_t slice_size_max; ); - GLSLC(1, int rct_offset; ); - GLSLC(0, ); - GLSLC(1, uint8_t extend_lookup[8]; ); - GLSLC(1, uint8_t bits_per_raw_sample; ); - GLSLC(1, uint8_t context_model; ); - GLSLC(1, uint8_t version; ); - GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t force_pcm; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t components; ); - GLSLC(1, uint8_t planes; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t planar_rgb; ); - GLSLC(1, uint8_t transparency; ); - GLSLC(1, uint8_t colorspace; ); - GLSLC(1, uint8_t pic_mode; ); - GLSLC(1, uint8_t ec; ); - GLSLC(1, uint8_t ppi; ); - GLSLC(1, uint8_t chunks; ); - GLSLC(1, uint8_t rct_search; ); - GLSLC(1, uint8_t padding[3]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), - VK_SHADER_STAGE_COMPUTE_BIT); -} - -typedef struct FFv1VkRCTSearchParameters { - int fmt_lut[4]; - int rct_offset; - uint8_t planar_rgb; - uint8_t transparency; - uint8_t key_frame; - uint8_t force_pcm; - uint8_t version; - uint8_t micro_version; - uint8_t padding[2]; -} FFv1VkRCTSearchParameters; - -static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec, - AVFrame *enc_in, VkImageView *enc_in_views, - FFVkBuffer *slice_data_buf, uint32_t slice_data_size) -{ - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - FFVulkanFunctions *vk = &fv->s.vkfn; - AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data; - FFv1VkRCTSearchParameters pd; - - /* Update descriptors */ - ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct_search, - 0, 0, 0, - slice_data_buf, - 0, slice_data_size*f->slice_count, - VK_FORMAT_UNDEFINED); - ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct_search, - enc_in, enc_in_views, - 0, 1, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - - ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct_search); - - pd = (FFv1VkRCTSearchParameters) { - .rct_offset = 1 << f->bits_per_raw_sample, - .planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) && - (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1), - .transparency = f->transparency, - .key_frame = f->key_frame, - .force_pcm = fv->force_pcm, - .version = f->version, - .micro_version = f->micro_version, - }; - - if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 || - avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 || - avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14) - memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); - else - ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1); - - ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct_search, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - - vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); - - return 0; -} - -static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, - FFVkExecContext *exec, - const AVFrame *pict) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - FFVulkanFunctions *vk = &fv->s.vkfn; - - VulkanEncodeFFv1FrameData *fd = exec->opaque; - FFv1VkParameters pd; - - /* Slice data */ - AVBufferRef *slice_data_ref; - FFVkBuffer *slice_data_buf; - uint32_t plane_state_size; - uint32_t slice_state_size; - uint32_t slice_data_size; - - /* Output data */ - size_t maxsize; - FFVkBuffer *out_data_buf; - - /* Results data */ - FFVkBuffer *results_data_buf; - - int has_inter = avctx->gop_size > 1; - uint32_t context_count = f->context_count[f->context_model]; - const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); - - AVFrame *src = (AVFrame *)pict; - VkImageView src_views[AV_NUM_DATA_POINTERS]; - - AVFrame *tmp = NULL; - VkImageView tmp_views[AV_NUM_DATA_POINTERS]; - - VkImageMemoryBarrier2 img_bar[37]; - int nb_img_bar = 0; - VkBufferMemoryBarrier2 buf_bar[8]; - int nb_buf_bar = 0; - - /* Start recording */ - ff_vk_exec_start(&fv->s, exec); - - /* Frame state */ - f->cur_enc_frame = pict; - if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) { - av_buffer_unref(&fv->keyframe_slice_data_ref); - f->key_frame = fd->key_frame = 1; - f->gob_count++; - } else { - f->key_frame = fd->key_frame = 0; - } - - f->slice_count = f->max_slice_count; - - /* Allocate slice buffer data */ - if (f->ac == AC_GOLOMB_RICE) - plane_state_size = 8; - else - plane_state_size = CONTEXT_SIZE; - - plane_state_size *= context_count; - slice_state_size = plane_state_size*f->plane_count; - - slice_data_size = 256; /* Overestimation for the SliceContext struct */ - slice_state_size += slice_data_size; - slice_state_size = FFALIGN(slice_state_size, 8); - - /* Allocate slice data buffer */ - slice_data_ref = fv->keyframe_slice_data_ref; - if (!slice_data_ref) { - RET(ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool, - &slice_data_ref, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, slice_state_size*f->slice_count, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); - - /* Only save it if we're going to use it again */ - if (has_inter) - fv->keyframe_slice_data_ref = slice_data_ref; - } - slice_data_buf = (FFVkBuffer *)slice_data_ref->data; - ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter); - - /* Allocate results buffer */ - RET(ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool, - &fd->results_data_ref, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, 2*f->slice_count*sizeof(uint64_t), - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - results_data_buf = (FFVkBuffer *)fd->results_data_ref->data; - ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->results_data_ref, 1, 1); - - /* Output buffer size */ - maxsize = ff_ffv1_encode_buffer_size(avctx); - maxsize = FFMIN(maxsize, fv->s.props_11.maxMemoryAllocationSize); - - /* Allocate output buffer */ - RET(ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool, - &fd->out_data_ref, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, maxsize, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - (maxsize < fv->max_heap_size ? - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0) | - (!(fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) ? - VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0x0))); - out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; - ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1); - - /* Prepare input frame */ - RET(ff_vk_exec_add_dep_frame(&fv->s, exec, src, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - - RET(ff_vk_create_imageviews(&fv->s, exec, src_views, src, - fv->rep_fmt)); - ff_vk_frame_barrier(&fv->s, exec, src, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - if (fv->is_rgb) { - /* Create a temporaty frame */ - tmp = av_frame_alloc(); - if (!(tmp)) - return AVERROR(ENOMEM); - - RET(av_hwframe_get_buffer(fv->intermediate_frames_ref, - tmp, 0)); - - RET(ff_vk_exec_add_dep_frame(&fv->s, exec, tmp, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(&fv->s, exec, tmp_views, - tmp, - fv->rep_fmt)); - } - - /* Setup shader */ - ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->setup, - 1, 0, 0, - slice_data_buf, - 0, slice_data_size*f->slice_count, - VK_FORMAT_UNDEFINED); - ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup, - src, src_views, - 1, 1, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - - /* Add a buffer barrier between previous and current frame */ - if (!f->key_frame) { - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = slice_data_buf->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = slice_data_buf->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = slice_data_buf->buf, - .size = VK_WHOLE_SIZE, - .offset = 0, - }; - } - - if (fv->optimize_rct) { - RET(run_rct_search(avctx, exec, - src, src_views, - slice_data_buf, slice_data_size)); - - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = slice_data_buf->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = slice_data_buf->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = slice_data_buf->buf, - .size = slice_data_size*f->slice_count, - .offset = 0, - }; - } - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - nb_img_bar = 0; - if (nb_buf_bar) { - slice_data_buf->stage = buf_bar[0].dstStageMask; - slice_data_buf->access = buf_bar[0].dstAccessMask; - nb_buf_bar = 0; - } - - /* Run setup shader */ - ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup); - pd = (FFv1VkParameters) { - .slice_state = slice_data_buf->address + f->slice_count*256, - .out_data = out_data_buf->address, - .bits_per_raw_sample = f->bits_per_raw_sample, - .sar[0] = pict->sample_aspect_ratio.num, - .sar[1] = pict->sample_aspect_ratio.den, - .chroma_shift[0] = f->chroma_h_shift, - .chroma_shift[1] = f->chroma_v_shift, - .plane_state_size = plane_state_size, - .context_count = context_count, - .crcref = f->crcref, - .rct_offset = 1 << f->bits_per_raw_sample, - .slice_size_max = out_data_buf->size / f->slice_count, - .context_model = fv->ctx.context_model, - .version = f->version, - .micro_version = f->micro_version, - .force_pcm = fv->force_pcm, - .key_frame = f->key_frame, - .components = fmt_desc->nb_components, - .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt), - .codec_planes = f->plane_count, - .planar_rgb = ff_vk_mt_is_np_rgb(avctx->sw_pix_fmt) && - (ff_vk_count_images((AVVkFrame *)src->data[0]) > 1), - .transparency = f->transparency, - .colorspace = f->colorspace, - .pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 : - !(pict->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 2 : 1, - .ec = f->ec, - .ppi = fv->ppi, - .chunks = fv->chunks, - .rct_search = fv->optimize_rct, - }; - - /* For some reason the C FFv1 encoder/decoder treats these differently */ - if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 || - avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 || - avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14) - memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); - else - ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1); - - for (int i = 0; i < f->quant_table_count; i++) - pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) || - (f->quant_tables[i][4][127] != 0); - ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); - - /* Clean up temporary image */ - if (fv->is_rgb) { - AVVkFrame *vkf = (AVVkFrame *)tmp->data[0]; - vk->CmdClearColorImage(exec->buf, vkf->img[0], VK_IMAGE_LAYOUT_GENERAL, - &((VkClearColorValue) { 0 }), - 1, &((VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1, - })); - } - - /* Setup shader modified the slice data buffer */ - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = slice_data_buf->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = slice_data_buf->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = slice_data_buf->buf, - .size = slice_data_size*f->slice_count, - .offset = 0, - }; - - if (f->key_frame || f->version > 3) { - FFv1VkResetParameters pd_reset; - - ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->reset, - 1, 0, 0, - slice_data_buf, - 0, slice_data_size*f->slice_count, - VK_FORMAT_UNDEFINED); - - /* Run setup shader */ - ff_vk_exec_bind_shader(&fv->s, exec, &fv->reset); - pd_reset = (FFv1VkResetParameters) { - .slice_state = slice_data_buf->address + f->slice_count*256, - .plane_state_size = plane_state_size, - .codec_planes = f->plane_count, - .key_frame = f->key_frame, - }; - for (int i = 0; i < f->quant_table_count; i++) - pd_reset.context_count[i] = f->context_count[i]; - - ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd_reset), &pd_reset); - - /* Sync between setup and reset shaders */ - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - slice_data_buf->stage = buf_bar[0].dstStageMask; - slice_data_buf->access = buf_bar[0].dstAccessMask; - nb_buf_bar = 0; - - vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, - f->plane_count); - } - - /* If the reset shader ran, insert a barrier now. */ - if (f->key_frame || f->version > 3) { - /* Reset shader modified the slice data buffer */ - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = slice_data_buf->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = slice_data_buf->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = slice_data_buf->buf, - .size = slice_data_buf->size - slice_data_size*f->slice_count, - .offset = slice_data_size*f->slice_count, - }; - } - - if (fv->is_rgb) { - ff_vk_frame_barrier(&fv->s, exec, tmp, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - } - - /* Final barrier before encoding */ - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - nb_img_bar = 0; - if (nb_buf_bar) { - slice_data_buf->stage = buf_bar[0].dstStageMask; - slice_data_buf->access = buf_bar[0].dstAccessMask; - nb_buf_bar = 0; - } - - /* Main encode shader */ - ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->enc, - 1, 0, 0, - slice_data_buf, - 0, slice_data_size*f->slice_count, - VK_FORMAT_UNDEFINED); - ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc, - src, src_views, - 1, 1, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - ff_vk_shader_update_desc_buffer(&fv->s, exec, - &fv->enc, 1, 2, 0, - results_data_buf, - 0, results_data_buf->size, - VK_FORMAT_UNDEFINED); - if (fv->is_rgb) - ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc, - tmp, tmp_views, - 1, 3, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - - ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc); - ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); - - /* Submit */ - err = ff_vk_exec_submit(&fv->s, exec); - if (err < 0) - return err; - - f->picture_number++; - - /* This, if needed, was referenced by the execution context - * as it was declared as a dependency. */ - av_frame_free(&tmp); - return 0; - -fail: - av_frame_free(&tmp); - ff_vk_exec_discard_deps(&fv->s, exec); - - return err; -} - -static int transfer_slices(AVCodecContext *avctx, - VkBufferCopy *buf_regions, int nb_regions, - VulkanEncodeFFv1FrameData *fd, - uint8_t *dst, AVBufferRef *dst_ref) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFVulkanFunctions *vk = &fv->s.vkfn; - FFVkExecContext *exec; - - FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; - - AVBufferRef *mapped_ref; - FFVkBuffer *mapped_buf; - - VkBufferMemoryBarrier2 buf_bar[8]; - int nb_buf_bar = 0; - - err = ff_vk_host_map_buffer(&fv->s, &mapped_ref, dst, dst_ref, - VK_BUFFER_USAGE_TRANSFER_DST_BIT); - if (err < 0) - return err; - - mapped_buf = (FFVkBuffer *)mapped_ref->data; - - /* Transfer the slices */ - exec = ff_vk_exec_get(&fv->s, &fv->transfer_exec_pool); - ff_vk_exec_start(&fv->s, exec); - - ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 0); - fd->out_data_ref = NULL; /* Ownership passed */ - - ff_vk_exec_add_dep_buf(&fv->s, exec, &mapped_ref, 1, 0); - mapped_ref = NULL; /* Ownership passed */ - - /* Ensure the output buffer is finished */ - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = out_data_buf->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, - .srcAccessMask = out_data_buf->access, - .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = out_data_buf->buf, - .size = VK_WHOLE_SIZE, - .offset = 0, - }; - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - out_data_buf->stage = buf_bar[0].dstStageMask; - out_data_buf->access = buf_bar[0].dstAccessMask; - nb_buf_bar = 0; - - for (int i = 0; i < nb_regions; i++) - buf_regions[i].dstOffset += mapped_buf->virtual_offset; - - vk->CmdCopyBuffer(exec->buf, - out_data_buf->buf, mapped_buf->buf, - nb_regions, buf_regions); - - /* Submit */ - err = ff_vk_exec_submit(&fv->s, exec); - if (err < 0) - return err; - - /* We need the encoded data immediately */ - ff_vk_exec_wait(&fv->s, exec); - - return 0; -} - -static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec, - AVPacket *pkt) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - FFVulkanFunctions *vk = &fv->s.vkfn; - VulkanEncodeFFv1FrameData *fd = exec->opaque; - - FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; - FFVkBuffer *results_data_buf = (FFVkBuffer *)fd->results_data_ref->data; - uint64_t *sc; - - /* Make sure encoding's done */ - ff_vk_exec_wait(&fv->s, exec); - - /* Invalidate slice/output data if needed */ - if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkMappedMemoryRange invalidate_data = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = results_data_buf->mem, - .offset = 0, - .size = VK_WHOLE_SIZE, - }; - vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev, - 1, &invalidate_data); - } - - /* Calculate final size */ - pkt->size = 0; - for (int i = 0; i < f->slice_count; i++) { - sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2]; - av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64", " - "src offset = %"PRIu64"\n", - i, sc[0], sc[1]); - - fv->buf_regions[i] = (VkBufferCopy) { - .srcOffset = sc[1], - .dstOffset = pkt->size, - .size = sc[0], - }; - pkt->size += sc[0]; - } - av_log(avctx, AV_LOG_VERBOSE, "Encoded data: %iMiB\n", pkt->size / (1024*1024)); - av_buffer_unref(&fd->results_data_ref); /* No need for this buffer anymore */ - - /* Allocate packet */ - if ((err = ff_get_encode_buffer(avctx, pkt, pkt->size, 0)) < 0) - return err; - - pkt->pts = fd->pts; - pkt->dts = fd->pts; - pkt->duration = fd->duration; - pkt->flags |= AV_PKT_FLAG_KEY * fd->key_frame; - - if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) { - pkt->opaque = fd->frame_opaque; - pkt->opaque_ref = fd->frame_opaque_ref; - fd->frame_opaque_ref = NULL; - } - - /* Try using host mapped memory transfers first */ - if (fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) { - err = transfer_slices(avctx, fv->buf_regions, f->slice_count, fd, - pkt->data, pkt->buf); - if (err >= 0) - return err; - } - - /* Invalidate slice/output data if needed */ - if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkMappedMemoryRange invalidate_data = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = out_data_buf->mem, - .offset = 0, - .size = VK_WHOLE_SIZE, - }; - vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev, - 1, &invalidate_data); - } - - /* Copy each slice */ - for (int i = 0; i < f->slice_count; i++) { - VkBufferCopy *region = &fv->buf_regions[i]; - memcpy(pkt->data + region->dstOffset, - out_data_buf->mapped_mem + region->srcOffset, - region->size); - } - - av_buffer_unref(&fd->out_data_ref); - - return 0; -} - -static int vulkan_encode_ffv1_receive_packet(AVCodecContext *avctx, - AVPacket *pkt) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - VulkanEncodeFFv1FrameData *fd; - FFVkExecContext *exec; - AVFrame *frame; - - while (1) { - /* Roll an execution context */ - exec = ff_vk_exec_get(&fv->s, &fv->exec_pool); - - /* If it had a frame, immediately output it */ - if (exec->had_submission) { - exec->had_submission = 0; - fv->in_flight--; - return get_packet(avctx, exec, pkt); - } - - /* Get next frame to encode */ - frame = fv->frame; - err = ff_encode_get_frame(avctx, frame); - if (err < 0 && err != AVERROR_EOF) { - return err; - } else if (err == AVERROR_EOF) { - if (!fv->in_flight) - return err; - continue; - } - - /* Encode frame */ - fd = exec->opaque; - fd->pts = frame->pts; - fd->duration = frame->duration; - if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) { - fd->frame_opaque = frame->opaque; - fd->frame_opaque_ref = frame->opaque_ref; - frame->opaque_ref = NULL; - } - - err = vulkan_encode_ffv1_submit_frame(avctx, exec, frame); - av_frame_unref(frame); - if (err < 0) - return err; - - fv->in_flight++; - if (fv->in_flight < fv->async_depth) - return AVERROR(EAGAIN); - } - - return 0; -} - -static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - AVHWFramesContext *frames_ctx; - AVVulkanFramesContext *vk_frames; - - fv->intermediate_frames_ref = av_hwframe_ctx_alloc(fv->s.device_ref); - if (!fv->intermediate_frames_ref) - return AVERROR(ENOMEM); - - frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; - frames_ctx->format = AV_PIX_FMT_VULKAN; - frames_ctx->sw_format = sw_format; - frames_ctx->width = fv->s.frames->width; - frames_ctx->height = f->num_v_slices*RGB_LINECACHE; - - vk_frames = frames_ctx->hwctx; - vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; - vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT; - vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; - - err = av_hwframe_ctx_init(fv->intermediate_frames_ref); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", - av_get_pix_fmt_name(sw_format), av_err2str(err)); - av_buffer_unref(&fv->intermediate_frames_ref); - return err; - } - - return 0; -} - -static int check_support(AVHWFramesConstraints *constraints, - enum AVPixelFormat fmt) -{ - for (int i = 0; constraints->valid_sw_formats[i]; i++) { - if (constraints->valid_sw_formats[i] == fmt) - return 1; - } - return 0; -} - -static enum AVPixelFormat get_supported_rgb_buffer_fmt(AVCodecContext *avctx) -{ - VulkanEncodeFFv1Context *fv = avctx->priv_data; - - enum AVPixelFormat fmt; - AVHWFramesConstraints *constraints; - constraints = av_hwdevice_get_hwframe_constraints(fv->s.device_ref, - NULL); - - /* What we'd like to optimally have */ - fmt = fv->ctx.use32bit ? - (fv->ctx.transparency ? AV_PIX_FMT_RGBA128 : AV_PIX_FMT_RGB96) : - (fv->ctx.transparency ? AV_PIX_FMT_RGBA64 : AV_PIX_FMT_RGB48); - if (check_support(constraints, fmt)) - goto end; - - if (fv->ctx.use32bit) { - if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128))) - goto end; - } else { - if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA64))) - goto end; - - if (!fv->ctx.transparency && - check_support(constraints, (fmt = AV_PIX_FMT_RGB96))) - goto end; - - if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128))) - goto end; - } - - fmt = AV_PIX_FMT_NONE; - -end: - av_hwframe_constraints_free(&constraints); - return fmt; -} - -static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) -{ - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - int smp_bits = fv->ctx.use32bit ? 32 : 16; - - av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE); - av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); - - if (f->ac == AC_GOLOMB_RICE) { - av_bprintf(&shd->src, "#define PB_UNALIGNED\n" ); - av_bprintf(&shd->src, "#define GOLOMB\n" ); - } - - if (fv->is_rgb) - av_bprintf(&shd->src, "#define RGB\n"); - - GLSLF(0, #define TYPE int%i_t ,smp_bits); - GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); - GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); - GLSLD(ff_source_rangecoder_comp); - - if (f->ac == AC_GOLOMB_RICE) - GLSLD(ff_source_ffv1_vlc_comp); - - GLSLD(ff_source_ffv1_common_comp); -} - -static int init_rct_search_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - FFVulkanShader *shd = &fv->rct_search; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct_search", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2", - "GL_EXT_null_initializer" }, 3, - 32, 32, 1, - 0)); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, ivec4 fmt_lut; ); - GLSLC(1, int rct_offset; ); - GLSLC(1, uint8_t planar_rgb; ); - GLSLC(1, uint8_t transparency; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t force_pcm; ); - GLSLC(1, uint8_t version; ); - GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t padding[3]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - - /* Never used */ - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", - }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", - }, - }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 1)); - - define_shared_code(avctx, shd); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, - }, - { - .name = "src", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, - fv->rep_fmt), - .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); - - GLSLD(ff_source_ffv1_rct_search_comp); - - RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - FFVulkanShader *shd = &fv->setup; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - RET(ff_vk_shader_init(&fv->s, shd, "ffv1_setup", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 1, 1, 1, - 0)); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - add_push_data(shd); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - av_bprintf(&shd->src, "#define FULL_RENORM\n"); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", - }, - { /* This descriptor is never used */ - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", - }, - }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0)); - - define_shared_code(avctx, shd); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, - }, - { - .name = "src", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, - fv->rep_fmt), - .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); - - GLSLD(ff_source_ffv1_enc_setup_comp); - - RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - FFVulkanShader *shd = &fv->reset; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - int wg_dim = FFMIN(fv->s.props.properties.limits.maxComputeWorkGroupSize[0], 1024); - - RET(ff_vk_shader_init(&fv->s, shd, "ffv1_reset", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - wg_dim, 1, 1, - 0)); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); - GLSLC(1, u8buf slice_state; ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t version; ); - GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t padding[1]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", - }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", - }, - }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0)); - - define_shared_code(avctx, shd); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, - }, - }; - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0)); - - GLSLD(ff_source_ffv1_reset_comp); - - RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) -{ - int err; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - FFVulkanShader *shd = &fv->enc; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - int use_cached_reader = fv->ctx.ac != AC_GOLOMB_RICE; - - RET(ff_vk_shader_init(&fv->s, shd, "ffv1_enc", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - use_cached_reader ? CONTEXT_SIZE : 1, 1, 1, - 0)); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - add_push_data(shd); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - - if (use_cached_reader) - av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n"); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", - }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", - }, - { - .name = "crc_ieee_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint32_t crc_ieee[256];", - }, - }; - - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 1, 0)); - - define_shared_code(avctx, shd); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, - }, - { - .name = "src", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, - fv->rep_fmt), - .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "results_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "writeonly", - .buf_content = "uint64_t slice_results[2048];", - }, - { /* place holder for desc_set[3] */ - }, - }; - if (fv->is_rgb) { - AVHWFramesContext *intermediate_frames_ctx; - intermediate_frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; - desc_set[3] = (FFVulkanDescriptorSetBinding) { - .name = "tmp", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(intermediate_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }; - } - RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3 + fv->is_rgb, 0, 0)); - - GLSLD(ff_source_ffv1_enc_comp); - - RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) -{ - int err; - size_t maxsize, max_heap_size, max_host_size; - VulkanEncodeFFv1Context *fv = avctx->priv_data; - FFV1Context *f = &fv->ctx; - FFVkSPIRVCompiler *spv; - - if ((err = ff_ffv1_common_init(avctx, f)) < 0) - return err; - - if (f->ac == 1) - f->ac = AC_RANGE_CUSTOM_TAB; - - err = ff_ffv1_encode_setup_plane_info(avctx, avctx->sw_pix_fmt); - if (err < 0) - return err; - - /* Target version 3 by default */ - f->version = 3; - - err = ff_ffv1_encode_init(avctx); - if (err < 0) - return err; - - /* Rice coding did not support high bit depths */ - if (f->bits_per_raw_sample > (f->version > 3 ? 16 : 8)) { - if (f->ac == AC_GOLOMB_RICE) { - av_log(avctx, AV_LOG_WARNING, "bits_per_raw_sample > 8, " - "forcing range coder\n"); - f->ac = AC_RANGE_CUSTOM_TAB; - } - } - - if (f->version < 4 && avctx->gop_size > 1) { - av_log(avctx, AV_LOG_ERROR, "Using inter frames requires version 4 (-level 4)\n"); - return AVERROR_INVALIDDATA; - } - - if (f->version == 4 && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { - av_log(avctx, AV_LOG_ERROR, "Version 4 is experimental and requires -strict -2\n"); - return AVERROR_INVALIDDATA; - } - - /* We target version 4.3 */ - if (f->version == 4 && f->micro_version > 4) - f->micro_version = 3; - - //if (fv->ctx.ac == AC_GOLOMB_RICE) { - if (0) { - int w_a = FFALIGN(avctx->width, LG_ALIGN_W); - int h_a = FFALIGN(avctx->height, LG_ALIGN_H); - int w_sl, h_sl; - - /* Pixels per line an invocation handles */ - int ppi = 0; - /* Chunk size */ - int chunks = 0; - - do { - if (ppi < 2) - ppi++; - chunks++; - w_sl = w_a / (LG_ALIGN_W*ppi); - h_sl = h_a / (LG_ALIGN_H*chunks); - } while (w_sl > MAX_SLICES / h_sl); - - av_log(avctx, AV_LOG_VERBOSE, "Slice config: %ix%i, %i total\n", - LG_ALIGN_W*ppi, LG_ALIGN_H*chunks, w_sl*h_sl); - av_log(avctx, AV_LOG_VERBOSE, "Horizontal slices: %i (%i pixels per invoc)\n", - w_sl, ppi); - av_log(avctx, AV_LOG_VERBOSE, "Vertical slices: %i (%i chunks)\n", - h_sl, chunks); - - f->num_h_slices = w_sl; - f->num_v_slices = h_sl; - - fv->ppi = ppi; - fv->chunks = chunks; - } else { - f->num_h_slices = fv->num_h_slices; - f->num_v_slices = fv->num_v_slices; - - if (f->num_h_slices <= 0 && f->num_v_slices <= 0) { - if (avctx->slices) { - err = ff_ffv1_encode_determine_slices(avctx); - if (err < 0) - return err; - } else { - f->num_h_slices = 32; - f->num_v_slices = 32; - } - } else if (f->num_h_slices && f->num_v_slices <= 0) { - f->num_v_slices = MAX_SLICES / f->num_h_slices; - } else if (f->num_v_slices && f->num_h_slices <= 0) { - f->num_h_slices = MAX_SLICES / f->num_v_slices; - } - - f->num_h_slices = FFMIN(f->num_h_slices, avctx->width); - f->num_v_slices = FFMIN(f->num_v_slices, avctx->height); - - if (f->num_h_slices * f->num_v_slices > MAX_SLICES) { - av_log(avctx, AV_LOG_ERROR, "Too many slices (%i), maximum supported " - "by the standard is %i\n", - f->num_h_slices * f->num_v_slices, MAX_SLICES); - return AVERROR_PATCHWELCOME; - } - } - - f->max_slice_count = f->num_h_slices * f->num_v_slices; - - if ((err = ff_ffv1_write_extradata(avctx)) < 0) - return err; - - if (f->version < 4) { - if (((f->chroma_h_shift > 0) && (avctx->width % (64 << f->chroma_h_shift))) || - ((f->chroma_v_shift > 0) && (avctx->height % (64 << f->chroma_v_shift)))) { - av_log(avctx, AV_LOG_ERROR, "Encoding frames with subsampling and unaligned " - "dimensions is only supported in version 4 (-level 4)\n"); - return AVERROR_PATCHWELCOME; - } - } - - if (fv->force_pcm) { - if (f->version < 4) { - av_log(avctx, AV_LOG_ERROR, "PCM coding only supported by version 4 (-level 4)\n"); - return AVERROR_INVALIDDATA; - } else if (f->ac == AC_GOLOMB_RICE) { - av_log(avctx, AV_LOG_ERROR, "PCM coding requires range coding\n"); - return AVERROR_INVALIDDATA; - } - } - - /* Init Vulkan */ - err = ff_vk_init(&fv->s, avctx, NULL, avctx->hw_frames_ctx); - if (err < 0) - return err; - - fv->qf = ff_vk_qf_find(&fv->s, VK_QUEUE_COMPUTE_BIT, 0); - if (!fv->qf) { - av_log(avctx, AV_LOG_ERROR, "Device has no compute queues!\n"); - return err; - } - - /* Try to measure VRAM size */ - max_heap_size = 0; - max_host_size = 0; - for (int i = 0; i < fv->s.mprops.memoryHeapCount; i++) { - if (fv->s.mprops.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) - max_heap_size = FFMAX(fv->max_heap_size, - fv->s.mprops.memoryHeaps[i].size); - if (!(fv->s.mprops.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)) - max_host_size = FFMAX(max_host_size, - fv->s.mprops.memoryHeaps[i].size); - } - fv->max_heap_size = max_heap_size; - - maxsize = ff_ffv1_encode_buffer_size(avctx); - if (maxsize > fv->s.props_11.maxMemoryAllocationSize) { - av_log(avctx, AV_LOG_WARNING, "Encoding buffer size (%zu) larger " - "than maximum device allocation (%zu), clipping\n", - maxsize, fv->s.props_11.maxMemoryAllocationSize); - maxsize = fv->s.props_11.maxMemoryAllocationSize; - } - - if (max_heap_size < maxsize) { - av_log(avctx, AV_LOG_WARNING, "Encoding buffer (%zu) larger than VRAM (%zu), " - "using host memory (slower)\n", - maxsize, fv->max_heap_size); - - /* Keep 1/2th of RAM as headroom */ - max_heap_size = max_host_size - (max_host_size >> 1); - } else { - /* Keep 1/8th of VRAM as headroom */ - max_heap_size = max_heap_size - (max_heap_size >> 3); - } - - av_log(avctx, AV_LOG_INFO, "Async buffers: %zuMiB per context, %zuMiB total, depth: %i\n", - maxsize / (1024*1024), - (fv->async_depth * maxsize) / (1024*1024), - fv->async_depth); - - err = ff_vk_exec_pool_init(&fv->s, fv->qf, &fv->exec_pool, - fv->async_depth, - 0, 0, 0, NULL); - if (err < 0) - return err; - - fv->transfer_qf = ff_vk_qf_find(&fv->s, VK_QUEUE_TRANSFER_BIT, 0); - if (!fv->transfer_qf) { - av_log(avctx, AV_LOG_ERROR, "Device has no transfer queues!\n"); - return err; - } - - err = ff_vk_exec_pool_init(&fv->s, fv->transfer_qf, &fv->transfer_exec_pool, - 1, - 0, 0, 0, NULL); - if (err < 0) - return err; - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - /* Detect the special RGB coding mode */ - fv->is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) && - !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8); - - /* bits_per_raw_sample use regular unsigned representation, - * but in higher bit depths, the data is casted to int16_t */ - fv->rep_fmt = FF_VK_REP_UINT; - if (!fv->is_rgb && f->bits_per_raw_sample > 8) - fv->rep_fmt = FF_VK_REP_INT; - - /* Init rct search shader */ - fv->optimize_rct = fv->is_rgb && f->version >= 4 && - !fv->force_pcm && fv->optimize_rct; - if (fv->optimize_rct) { - err = init_rct_search_shader(avctx, spv); - if (err < 0) { - spv->uninit(&spv); - return err; - } - } - - /* Init setup shader */ - err = init_setup_shader(avctx, spv); - if (err < 0) { - spv->uninit(&spv); - return err; - } - - /* Init reset shader */ - err = init_reset_shader(avctx, spv); - if (err < 0) { - spv->uninit(&spv); - return err; - } - - if (fv->is_rgb) { - enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx); - if (intermediate_fmt == AV_PIX_FMT_NONE) { - av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible " - "pixel format for RCT buffer!\n"); - return AVERROR(ENOTSUP); - } - - RET(init_indirect(avctx, intermediate_fmt)); - } - - /* Encode shader */ - err = init_encode_shader(avctx, spv); - if (err < 0) { - spv->uninit(&spv); - return err; - } - - spv->uninit(&spv); - - /* Range coder data */ - err = ff_ffv1_vk_init_state_transition_data(&fv->s, - &fv->rangecoder_static_buf, - f); - if (err < 0) - return err; - - /* Quantization table data */ - err = ff_ffv1_vk_init_quant_table_data(&fv->s, - &fv->quant_buf, - f); - if (err < 0) - return err; - - /* CRC table buffer */ - err = ff_ffv1_vk_init_crc_table_data(&fv->s, - &fv->crc_tab_buf, - f); - if (err < 0) - return err; - - /* Update setup global descriptors */ - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->setup, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, - VK_FORMAT_UNDEFINED)); - - /* Update encode global descriptors */ - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 1, 0, - &fv->quant_buf, - 0, fv->quant_buf.size, - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], - &fv->enc, 0, 2, 0, - &fv->crc_tab_buf, - 0, fv->crc_tab_buf.size, - VK_FORMAT_UNDEFINED)); - - /* Temporary frame */ - fv->frame = av_frame_alloc(); - if (!fv->frame) - return AVERROR(ENOMEM); - - /* Async data pool */ - fv->async_depth = fv->exec_pool.pool_size; - fv->exec_ctx_info = av_calloc(fv->async_depth, sizeof(*fv->exec_ctx_info)); - if (!fv->exec_ctx_info) - return AVERROR(ENOMEM); - for (int i = 0; i < fv->async_depth; i++) - fv->exec_pool.contexts[i].opaque = &fv->exec_ctx_info[i]; - - fv->buf_regions = av_malloc_array(f->max_slice_count, sizeof(*fv->buf_regions)); - if (!fv->buf_regions) - return AVERROR(ENOMEM); - -fail: - return err; -} - -static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) -{ - VulkanEncodeFFv1Context *fv = avctx->priv_data; - - ff_vk_exec_pool_free(&fv->s, &fv->exec_pool); - ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool); - - ff_vk_shader_free(&fv->s, &fv->enc); - ff_vk_shader_free(&fv->s, &fv->reset); - ff_vk_shader_free(&fv->s, &fv->setup); - ff_vk_shader_free(&fv->s, &fv->rct_search); - - if (fv->exec_ctx_info) { - for (int i = 0; i < fv->async_depth; i++) { - VulkanEncodeFFv1FrameData *fd = &fv->exec_ctx_info[i]; - av_buffer_unref(&fd->out_data_ref); - av_buffer_unref(&fd->results_data_ref); - av_buffer_unref(&fd->frame_opaque_ref); - } - } - av_free(fv->exec_ctx_info); - - av_buffer_unref(&fv->intermediate_frames_ref); - - av_buffer_pool_uninit(&fv->results_data_pool); - - av_buffer_pool_uninit(&fv->out_data_pool); - - av_buffer_unref(&fv->keyframe_slice_data_ref); - av_buffer_pool_uninit(&fv->slice_data_pool); - - ff_vk_free_buf(&fv->s, &fv->quant_buf); - ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf); - ff_vk_free_buf(&fv->s, &fv->crc_tab_buf); - - av_free(fv->buf_regions); - av_frame_free(&fv->frame); - ff_vk_uninit(&fv->s); - - return 0; -} - -#define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x) -#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM -static const AVOption vulkan_encode_ffv1_options[] = { - { "slicecrc", "Protect slices with CRCs", OFFSET(ctx.ec), AV_OPT_TYPE_INT, - { .i64 = -1 }, -1, 2, VE }, - { "context", "Context model", OFFSET(ctx.context_model), AV_OPT_TYPE_INT, - { .i64 = 0 }, 0, 1, VE }, - { "coder", "Coder type", OFFSET(ctx.ac), AV_OPT_TYPE_INT, - { .i64 = AC_RANGE_CUSTOM_TAB }, -2, 2, VE, .unit = "coder" }, - { "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST, - { .i64 = AC_GOLOMB_RICE }, INT_MIN, INT_MAX, VE, .unit = "coder" }, - { "range_def", "Range with default table", 0, AV_OPT_TYPE_CONST, - { .i64 = AC_RANGE_DEFAULT_TAB_FORCE }, INT_MIN, INT_MAX, VE, .unit = "coder" }, - { "range_tab", "Range with custom table", 0, AV_OPT_TYPE_CONST, - { .i64 = AC_RANGE_CUSTOM_TAB }, INT_MIN, INT_MAX, VE, .unit = "coder" }, - { "qtable", "Quantization table", OFFSET(ctx.qtable), AV_OPT_TYPE_INT, - { .i64 = -1 }, -1, 2, VE , .unit = "qtable"}, - { "default", NULL, 0, AV_OPT_TYPE_CONST, - { .i64 = QTABLE_DEFAULT }, INT_MIN, INT_MAX, VE, .unit = "qtable" }, - { "8bit", NULL, 0, AV_OPT_TYPE_CONST, - { .i64 = QTABLE_8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" }, - { "greater8bit", NULL, 0, AV_OPT_TYPE_CONST, - { .i64 = QTABLE_GT8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" }, - - { "slices_h", "Number of horizontal slices", OFFSET(num_h_slices), AV_OPT_TYPE_INT, - { .i64 = -1 }, -1, MAX_SLICES, VE }, - { "slices_v", "Number of vertical slices", OFFSET(num_v_slices), AV_OPT_TYPE_INT, - { .i64 = -1 }, -1, MAX_SLICES, VE }, - - { "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL, - { .i64 = 0 }, 0, 1, VE }, - - { "rct_search", "Run a search for RCT parameters (level 4 only)", OFFSET(optimize_rct), AV_OPT_TYPE_BOOL, - { .i64 = 1 }, 0, 1, VE }, - - { "async_depth", "Internal parallelization depth", OFFSET(async_depth), AV_OPT_TYPE_INT, - { .i64 = 1 }, 1, INT_MAX, VE }, - - { NULL } -}; - -static const FFCodecDefault vulkan_encode_ffv1_defaults[] = { - { "g", "1" }, - { NULL }, -}; - -static const AVClass vulkan_encode_ffv1_class = { - .class_name = "ffv1_vulkan", - .item_name = av_default_item_name, - .option = vulkan_encode_ffv1_options, - .version = LIBAVUTIL_VERSION_INT, -}; - -const AVCodecHWConfigInternal *const vulkan_encode_ffv1_hw_configs[] = { - HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), - NULL, -}; - -const FFCodec ff_ffv1_vulkan_encoder = { - .p.name = "ffv1_vulkan", - CODEC_LONG_NAME("FFmpeg video codec #1 (Vulkan)"), - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_FFV1, - .priv_data_size = sizeof(VulkanEncodeFFv1Context), - .init = &vulkan_encode_ffv1_init, - FF_CODEC_RECEIVE_PACKET_CB(&vulkan_encode_ffv1_receive_packet), - .close = &vulkan_encode_ffv1_close, - .p.priv_class = &vulkan_encode_ffv1_class, - .p.capabilities = AV_CODEC_CAP_DELAY | - AV_CODEC_CAP_HARDWARE | - AV_CODEC_CAP_DR1 | - AV_CODEC_CAP_ENCODER_FLUSH | - AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, - .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH, - .defaults = vulkan_encode_ffv1_defaults, - CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), - .hw_configs = vulkan_encode_ffv1_hw_configs, - .p.wrapper_name = "vulkan", -}; -- 2.49.1 From d3767c0fcc87dec9a467071f5ae33ceeec30780f Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:25:53 +0000 Subject: [PATCH 060/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1enc_vulkan.c | 1848 ++++++++++++++++++++++++++++ 1 file changed, 1848 insertions(+) create mode 100644 libavcodec/vulkan/ffv1enc_vulkan.c diff --git a/libavcodec/vulkan/ffv1enc_vulkan.c b/libavcodec/vulkan/ffv1enc_vulkan.c new file mode 100644 index 0000000000..883ad75d9b --- /dev/null +++ b/libavcodec/vulkan/ffv1enc_vulkan.c @@ -0,0 +1,1848 @@ +/* + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" +#include "../../libavutil/vulkan/vulkan.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" + +#include "libavcodec/avcodec.h" +#include "libavcodec/internal.h" +#include "libavcodec/hwconfig.h" +#include "libavcodec/encode.h" +#include "libavutil/opt.h" +#include "libavcodec/codec_internal.h" + +#include "libavcodec/ffv1.h" +#include "libavcodec/ffv1enc.h" +#include "ffv1_vulkan.h" + +/* Parallel Golomb alignment */ +#define LG_ALIGN_W 32 +#define LG_ALIGN_H 32 + +/* Unlike the decoder, we need 4 lines (but really only 3) */ +#define RGB_LINECACHE 4 + +typedef struct VulkanEncodeFFv1FrameData { + /* Output data */ + AVBufferRef *out_data_ref; + + /* Results data */ + AVBufferRef *results_data_ref; + + /* Copied from the source */ + int64_t pts; + int64_t duration; + void *frame_opaque; + AVBufferRef *frame_opaque_ref; + + int key_frame; +} VulkanEncodeFFv1FrameData; + +typedef struct VulkanEncodeFFv1Context { + FFV1Context ctx; + AVFrame *frame; + + FFVulkanContext s; + AVVulkanDeviceQueueFamily *qf; + FFVkExecPool exec_pool; + + AVVulkanDeviceQueueFamily *transfer_qf; + FFVkExecPool transfer_exec_pool; + + VkBufferCopy *buf_regions; + VulkanEncodeFFv1FrameData *exec_ctx_info; + int in_flight; + int async_depth; + size_t max_heap_size; + + FFVulkanShader setup; + FFVulkanShader rct_search; + FFVulkanShader reset; + FFVulkanShader enc; + + /* Constant read-only buffers */ + FFVkBuffer quant_buf; + FFVkBuffer rangecoder_static_buf; + FFVkBuffer crc_tab_buf; + + /* Slice data buffer pool */ + AVBufferPool *slice_data_pool; + AVBufferRef *keyframe_slice_data_ref; + + /* Output data buffer */ + AVBufferPool *out_data_pool; + + /* Slice results buffer */ + AVBufferPool *results_data_pool; + + /* Intermediate frame pool */ + AVBufferRef *intermediate_frames_ref; + + /* Representation mode */ + enum FFVkShaderRepFormat rep_fmt; + + int num_h_slices; + int num_v_slices; + int force_pcm; + int optimize_rct; + + int is_rgb; + int ppi; + int chunks; +} VulkanEncodeFFv1Context; + +extern const char *ff_source_common_comp; +extern const char *ff_source_rangecoder_comp; +extern const char *ff_source_ffv1_vlc_comp; +extern const char *ff_source_ffv1_common_comp; +extern const char *ff_source_ffv1_reset_comp; +extern const char *ff_source_ffv1_rct_search_comp; +extern const char *ff_source_ffv1_enc_setup_comp; +extern const char *ff_source_ffv1_enc_comp; + +typedef struct FFv1VkParameters { + VkDeviceAddress slice_state; + VkDeviceAddress scratch_data; + VkDeviceAddress out_data; + + int32_t fmt_lut[4]; + int32_t sar[2]; + uint32_t chroma_shift[2]; + + uint32_t plane_state_size; + uint32_t context_count; + uint32_t crcref; + uint32_t slice_size_max; + int rct_offset; + + uint8_t extend_lookup[8]; + uint8_t bits_per_raw_sample; + uint8_t context_model; + uint8_t version; + uint8_t micro_version; + uint8_t force_pcm; + uint8_t key_frame; + uint8_t components; + uint8_t planes; + uint8_t codec_planes; + uint8_t planar_rgb; + uint8_t transparency; + uint8_t colorspace; + uint8_t pic_mode; + uint8_t ec; + uint8_t ppi; + uint8_t chunks; + uint8_t rct_search; + uint8_t padding[3]; +} FFv1VkParameters; + +static void add_push_data(FFVulkanShader *shd) +{ + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, u8buf scratch_data; ); + GLSLC(1, u8buf out_data; ); + GLSLC(0, ); + GLSLC(1, ivec4 fmt_lut; ); + GLSLC(1, ivec2 sar; ); + GLSLC(1, uvec2 chroma_shift; ); + GLSLC(0, ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint context_count; ); + GLSLC(1, uint32_t crcref; ); + GLSLC(1, uint32_t slice_size_max; ); + GLSLC(1, int rct_offset; ); + GLSLC(0, ); + GLSLC(1, uint8_t extend_lookup[8]; ); + GLSLC(1, uint8_t bits_per_raw_sample; ); + GLSLC(1, uint8_t context_model; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t force_pcm; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t components; ); + GLSLC(1, uint8_t planes; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t planar_rgb; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t colorspace; ); + GLSLC(1, uint8_t pic_mode; ); + GLSLC(1, uint8_t ec; ); + GLSLC(1, uint8_t ppi; ); + GLSLC(1, uint8_t chunks; ); + GLSLC(1, uint8_t rct_search; ); + GLSLC(1, uint8_t padding[3]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); +} + +typedef struct FFv1VkRCTSearchParameters { + int fmt_lut[4]; + int rct_offset; + uint8_t planar_rgb; + uint8_t transparency; + uint8_t key_frame; + uint8_t force_pcm; + uint8_t version; + uint8_t micro_version; + uint8_t padding[2]; +} FFv1VkRCTSearchParameters; + +static int run_rct_search(AVCodecContext *avctx, FFVkExecContext *exec, + AVFrame *enc_in, VkImageView *enc_in_views, + FFVkBuffer *slice_data_buf, uint32_t slice_data_size) +{ + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanFunctions *vk = &fv->s.vkfn; + AVHWFramesContext *src_hwfc = (AVHWFramesContext *)enc_in->hw_frames_ctx->data; + FFv1VkRCTSearchParameters pd; + + /* Update descriptors */ + ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->rct_search, + 0, 0, 0, + slice_data_buf, + 0, slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct_search, + enc_in, enc_in_views, + 0, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct_search); + + pd = (FFv1VkRCTSearchParameters) { + .rct_offset = 1 << f->bits_per_raw_sample, + .planar_rgb = ff_vk_mt_is_np_rgb(src_hwfc->sw_format) && + (ff_vk_count_images((AVVkFrame *)enc_in->data[0]) > 1), + .transparency = f->transparency, + .key_frame = f->key_frame, + .force_pcm = fv->force_pcm, + .version = f->version, + .micro_version = f->micro_version, + }; + + if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14) + memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); + else + ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1); + + ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct_search, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); + + return 0; +} + +static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, + FFVkExecContext *exec, + const AVFrame *pict) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanFunctions *vk = &fv->s.vkfn; + + VulkanEncodeFFv1FrameData *fd = exec->opaque; + FFv1VkParameters pd; + + /* Slice data */ + AVBufferRef *slice_data_ref; + FFVkBuffer *slice_data_buf; + uint32_t plane_state_size; + uint32_t slice_state_size; + uint32_t slice_data_size; + + /* Output data */ + size_t maxsize; + FFVkBuffer *out_data_buf; + + /* Results data */ + FFVkBuffer *results_data_buf; + + int has_inter = avctx->gop_size > 1; + uint32_t context_count = f->context_count[f->context_model]; + const AVPixFmtDescriptor *fmt_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + + AVFrame *src = (AVFrame *)pict; + VkImageView src_views[AV_NUM_DATA_POINTERS]; + + AVFrame *tmp = NULL; + VkImageView tmp_views[AV_NUM_DATA_POINTERS]; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + /* Start recording */ + ff_vk_exec_start(&fv->s, exec); + + /* Frame state */ + f->cur_enc_frame = pict; + if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) { + av_buffer_unref(&fv->keyframe_slice_data_ref); + f->key_frame = fd->key_frame = 1; + f->gob_count++; + } else { + f->key_frame = fd->key_frame = 0; + } + + f->slice_count = f->max_slice_count; + + /* Allocate slice buffer data */ + if (f->ac == AC_GOLOMB_RICE) + plane_state_size = 8; + else + plane_state_size = CONTEXT_SIZE; + + plane_state_size *= context_count; + slice_state_size = plane_state_size*f->plane_count; + + slice_data_size = 256; /* Overestimation for the SliceContext struct */ + slice_state_size += slice_data_size; + slice_state_size = FFALIGN(slice_state_size, 8); + + /* Allocate slice data buffer */ + slice_data_ref = fv->keyframe_slice_data_ref; + if (!slice_data_ref) { + RET(ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool, + &slice_data_ref, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, slice_state_size*f->slice_count, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); + + /* Only save it if we're going to use it again */ + if (has_inter) + fv->keyframe_slice_data_ref = slice_data_ref; + } + slice_data_buf = (FFVkBuffer *)slice_data_ref->data; + ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter); + + /* Allocate results buffer */ + RET(ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool, + &fd->results_data_ref, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, 2*f->slice_count*sizeof(uint64_t), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + results_data_buf = (FFVkBuffer *)fd->results_data_ref->data; + ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->results_data_ref, 1, 1); + + /* Output buffer size */ + maxsize = ff_ffv1_encode_buffer_size(avctx); + maxsize = FFMIN(maxsize, fv->s.props_11.maxMemoryAllocationSize); + + /* Allocate output buffer */ + RET(ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool, + &fd->out_data_ref, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, maxsize, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + (maxsize < fv->max_heap_size ? + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0) | + (!(fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) ? + VK_MEMORY_PROPERTY_HOST_CACHED_BIT : 0x0))); + out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; + ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1); + + /* Prepare input frame */ + RET(ff_vk_exec_add_dep_frame(&fv->s, exec, src, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_create_imageviews(&fv->s, exec, src_views, src, + fv->rep_fmt)); + ff_vk_frame_barrier(&fv->s, exec, src, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + if (fv->is_rgb) { + /* Create a temporaty frame */ + tmp = av_frame_alloc(); + if (!(tmp)) + return AVERROR(ENOMEM); + + RET(av_hwframe_get_buffer(fv->intermediate_frames_ref, + tmp, 0)); + + RET(ff_vk_exec_add_dep_frame(&fv->s, exec, tmp, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(&fv->s, exec, tmp_views, + tmp, + fv->rep_fmt)); + } + + /* Setup shader */ + ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->setup, + 1, 0, 0, + slice_data_buf, + 0, slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup, + src, src_views, + 1, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + /* Add a buffer barrier between previous and current frame */ + if (!f->key_frame) { + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_data_buf->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_data_buf->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_data_buf->buf, + .size = VK_WHOLE_SIZE, + .offset = 0, + }; + } + + if (fv->optimize_rct) { + RET(run_rct_search(avctx, exec, + src, src_views, + slice_data_buf, slice_data_size)); + + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_data_buf->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_data_buf->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_data_buf->buf, + .size = slice_data_size*f->slice_count, + .offset = 0, + }; + } + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + nb_img_bar = 0; + if (nb_buf_bar) { + slice_data_buf->stage = buf_bar[0].dstStageMask; + slice_data_buf->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + } + + /* Run setup shader */ + ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup); + pd = (FFv1VkParameters) { + .slice_state = slice_data_buf->address + f->slice_count*256, + .out_data = out_data_buf->address, + .bits_per_raw_sample = f->bits_per_raw_sample, + .sar[0] = pict->sample_aspect_ratio.num, + .sar[1] = pict->sample_aspect_ratio.den, + .chroma_shift[0] = f->chroma_h_shift, + .chroma_shift[1] = f->chroma_v_shift, + .plane_state_size = plane_state_size, + .context_count = context_count, + .crcref = f->crcref, + .rct_offset = 1 << f->bits_per_raw_sample, + .slice_size_max = out_data_buf->size / f->slice_count, + .context_model = fv->ctx.context_model, + .version = f->version, + .micro_version = f->micro_version, + .force_pcm = fv->force_pcm, + .key_frame = f->key_frame, + .components = fmt_desc->nb_components, + .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt), + .codec_planes = f->plane_count, + .planar_rgb = ff_vk_mt_is_np_rgb(avctx->sw_pix_fmt) && + (ff_vk_count_images((AVVkFrame *)src->data[0]) > 1), + .transparency = f->transparency, + .colorspace = f->colorspace, + .pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 : + !(pict->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 2 : 1, + .ec = f->ec, + .ppi = fv->ppi, + .chunks = fv->chunks, + .rct_search = fv->optimize_rct, + }; + + /* For some reason the C FFv1 encoder/decoder treats these differently */ + if (avctx->sw_pix_fmt == AV_PIX_FMT_GBRP10 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRP12 || + avctx->sw_pix_fmt == AV_PIX_FMT_GBRP14) + memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); + else + ff_vk_set_perm(avctx->sw_pix_fmt, pd.fmt_lut, 1); + + for (int i = 0; i < f->quant_table_count; i++) + pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) || + (f->quant_tables[i][4][127] != 0); + ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); + + /* Clean up temporary image */ + if (fv->is_rgb) { + AVVkFrame *vkf = (AVVkFrame *)tmp->data[0]; + vk->CmdClearColorImage(exec->buf, vkf->img[0], VK_IMAGE_LAYOUT_GENERAL, + &((VkClearColorValue) { 0 }), + 1, &((VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + })); + } + + /* Setup shader modified the slice data buffer */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_data_buf->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_data_buf->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_data_buf->buf, + .size = slice_data_size*f->slice_count, + .offset = 0, + }; + + if (f->key_frame || f->version > 3) { + FFv1VkResetParameters pd_reset; + + ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->reset, + 1, 0, 0, + slice_data_buf, + 0, slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + + /* Run setup shader */ + ff_vk_exec_bind_shader(&fv->s, exec, &fv->reset); + pd_reset = (FFv1VkResetParameters) { + .slice_state = slice_data_buf->address + f->slice_count*256, + .plane_state_size = plane_state_size, + .codec_planes = f->plane_count, + .key_frame = f->key_frame, + }; + for (int i = 0; i < f->quant_table_count; i++) + pd_reset.context_count[i] = f->context_count[i]; + + ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd_reset), &pd_reset); + + /* Sync between setup and reset shaders */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_data_buf->stage = buf_bar[0].dstStageMask; + slice_data_buf->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + + vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, + f->plane_count); + } + + /* If the reset shader ran, insert a barrier now. */ + if (f->key_frame || f->version > 3) { + /* Reset shader modified the slice data buffer */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_data_buf->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_data_buf->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_data_buf->buf, + .size = slice_data_buf->size - slice_data_size*f->slice_count, + .offset = slice_data_size*f->slice_count, + }; + } + + if (fv->is_rgb) { + ff_vk_frame_barrier(&fv->s, exec, tmp, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + } + + /* Final barrier before encoding */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + nb_img_bar = 0; + if (nb_buf_bar) { + slice_data_buf->stage = buf_bar[0].dstStageMask; + slice_data_buf->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + } + + /* Main encode shader */ + ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->enc, + 1, 0, 0, + slice_data_buf, + 0, slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc, + src, src_views, + 1, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + ff_vk_shader_update_desc_buffer(&fv->s, exec, + &fv->enc, 1, 2, 0, + results_data_buf, + 0, results_data_buf->size, + VK_FORMAT_UNDEFINED); + if (fv->is_rgb) + ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc, + tmp, tmp_views, + 1, 3, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc); + ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1); + + /* Submit */ + err = ff_vk_exec_submit(&fv->s, exec); + if (err < 0) + return err; + + f->picture_number++; + + /* This, if needed, was referenced by the execution context + * as it was declared as a dependency. */ + av_frame_free(&tmp); + return 0; + +fail: + av_frame_free(&tmp); + ff_vk_exec_discard_deps(&fv->s, exec); + + return err; +} + +static int transfer_slices(AVCodecContext *avctx, + VkBufferCopy *buf_regions, int nb_regions, + VulkanEncodeFFv1FrameData *fd, + uint8_t *dst, AVBufferRef *dst_ref) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFVulkanFunctions *vk = &fv->s.vkfn; + FFVkExecContext *exec; + + FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; + + AVBufferRef *mapped_ref; + FFVkBuffer *mapped_buf; + + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + err = ff_vk_host_map_buffer(&fv->s, &mapped_ref, dst, dst_ref, + VK_BUFFER_USAGE_TRANSFER_DST_BIT); + if (err < 0) + return err; + + mapped_buf = (FFVkBuffer *)mapped_ref->data; + + /* Transfer the slices */ + exec = ff_vk_exec_get(&fv->s, &fv->transfer_exec_pool); + ff_vk_exec_start(&fv->s, exec); + + ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 0); + fd->out_data_ref = NULL; /* Ownership passed */ + + ff_vk_exec_add_dep_buf(&fv->s, exec, &mapped_ref, 1, 0); + mapped_ref = NULL; /* Ownership passed */ + + /* Ensure the output buffer is finished */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = out_data_buf->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT, + .srcAccessMask = out_data_buf->access, + .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = out_data_buf->buf, + .size = VK_WHOLE_SIZE, + .offset = 0, + }; + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + out_data_buf->stage = buf_bar[0].dstStageMask; + out_data_buf->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + + for (int i = 0; i < nb_regions; i++) + buf_regions[i].dstOffset += mapped_buf->virtual_offset; + + vk->CmdCopyBuffer(exec->buf, + out_data_buf->buf, mapped_buf->buf, + nb_regions, buf_regions); + + /* Submit */ + err = ff_vk_exec_submit(&fv->s, exec); + if (err < 0) + return err; + + /* We need the encoded data immediately */ + ff_vk_exec_wait(&fv->s, exec); + + return 0; +} + +static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec, + AVPacket *pkt) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanFunctions *vk = &fv->s.vkfn; + VulkanEncodeFFv1FrameData *fd = exec->opaque; + + FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data; + FFVkBuffer *results_data_buf = (FFVkBuffer *)fd->results_data_ref->data; + uint64_t *sc; + + /* Make sure encoding's done */ + ff_vk_exec_wait(&fv->s, exec); + + /* Invalidate slice/output data if needed */ + if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange invalidate_data = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = results_data_buf->mem, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev, + 1, &invalidate_data); + } + + /* Calculate final size */ + pkt->size = 0; + for (int i = 0; i < f->slice_count; i++) { + sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2]; + av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64", " + "src offset = %"PRIu64"\n", + i, sc[0], sc[1]); + + fv->buf_regions[i] = (VkBufferCopy) { + .srcOffset = sc[1], + .dstOffset = pkt->size, + .size = sc[0], + }; + pkt->size += sc[0]; + } + av_log(avctx, AV_LOG_VERBOSE, "Encoded data: %iMiB\n", pkt->size / (1024*1024)); + av_buffer_unref(&fd->results_data_ref); /* No need for this buffer anymore */ + + /* Allocate packet */ + if ((err = ff_get_encode_buffer(avctx, pkt, pkt->size, 0)) < 0) + return err; + + pkt->pts = fd->pts; + pkt->dts = fd->pts; + pkt->duration = fd->duration; + pkt->flags |= AV_PKT_FLAG_KEY * fd->key_frame; + + if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) { + pkt->opaque = fd->frame_opaque; + pkt->opaque_ref = fd->frame_opaque_ref; + fd->frame_opaque_ref = NULL; + } + + /* Try using host mapped memory transfers first */ + if (fv->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) { + err = transfer_slices(avctx, fv->buf_regions, f->slice_count, fd, + pkt->data, pkt->buf); + if (err >= 0) + return err; + } + + /* Invalidate slice/output data if needed */ + if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange invalidate_data = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = out_data_buf->mem, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev, + 1, &invalidate_data); + } + + /* Copy each slice */ + for (int i = 0; i < f->slice_count; i++) { + VkBufferCopy *region = &fv->buf_regions[i]; + memcpy(pkt->data + region->dstOffset, + out_data_buf->mapped_mem + region->srcOffset, + region->size); + } + + av_buffer_unref(&fd->out_data_ref); + + return 0; +} + +static int vulkan_encode_ffv1_receive_packet(AVCodecContext *avctx, + AVPacket *pkt) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + VulkanEncodeFFv1FrameData *fd; + FFVkExecContext *exec; + AVFrame *frame; + + while (1) { + /* Roll an execution context */ + exec = ff_vk_exec_get(&fv->s, &fv->exec_pool); + + /* If it had a frame, immediately output it */ + if (exec->had_submission) { + exec->had_submission = 0; + fv->in_flight--; + return get_packet(avctx, exec, pkt); + } + + /* Get next frame to encode */ + frame = fv->frame; + err = ff_encode_get_frame(avctx, frame); + if (err < 0 && err != AVERROR_EOF) { + return err; + } else if (err == AVERROR_EOF) { + if (!fv->in_flight) + return err; + continue; + } + + /* Encode frame */ + fd = exec->opaque; + fd->pts = frame->pts; + fd->duration = frame->duration; + if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) { + fd->frame_opaque = frame->opaque; + fd->frame_opaque_ref = frame->opaque_ref; + frame->opaque_ref = NULL; + } + + err = vulkan_encode_ffv1_submit_frame(avctx, exec, frame); + av_frame_unref(frame); + if (err < 0) + return err; + + fv->in_flight++; + if (fv->in_flight < fv->async_depth) + return AVERROR(EAGAIN); + } + + return 0; +} + +static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + AVHWFramesContext *frames_ctx; + AVVulkanFramesContext *vk_frames; + + fv->intermediate_frames_ref = av_hwframe_ctx_alloc(fv->s.device_ref); + if (!fv->intermediate_frames_ref) + return AVERROR(ENOMEM); + + frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = fv->s.frames->width; + frames_ctx->height = f->num_v_slices*RGB_LINECACHE; + + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; + vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + + err = av_hwframe_ctx_init(fv->intermediate_frames_ref); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", + av_get_pix_fmt_name(sw_format), av_err2str(err)); + av_buffer_unref(&fv->intermediate_frames_ref); + return err; + } + + return 0; +} + +static int check_support(AVHWFramesConstraints *constraints, + enum AVPixelFormat fmt) +{ + for (int i = 0; constraints->valid_sw_formats[i]; i++) { + if (constraints->valid_sw_formats[i] == fmt) + return 1; + } + return 0; +} + +static enum AVPixelFormat get_supported_rgb_buffer_fmt(AVCodecContext *avctx) +{ + VulkanEncodeFFv1Context *fv = avctx->priv_data; + + enum AVPixelFormat fmt; + AVHWFramesConstraints *constraints; + constraints = av_hwdevice_get_hwframe_constraints(fv->s.device_ref, + NULL); + + /* What we'd like to optimally have */ + fmt = fv->ctx.use32bit ? + (fv->ctx.transparency ? AV_PIX_FMT_RGBA128 : AV_PIX_FMT_RGB96) : + (fv->ctx.transparency ? AV_PIX_FMT_RGBA64 : AV_PIX_FMT_RGB48); + if (check_support(constraints, fmt)) + goto end; + + if (fv->ctx.use32bit) { + if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128))) + goto end; + } else { + if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA64))) + goto end; + + if (!fv->ctx.transparency && + check_support(constraints, (fmt = AV_PIX_FMT_RGB96))) + goto end; + + if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128))) + goto end; + } + + fmt = AV_PIX_FMT_NONE; + +end: + av_hwframe_constraints_free(&constraints); + return fmt; +} + +static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) +{ + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + int smp_bits = fv->ctx.use32bit ? 32 : 16; + + av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE); + av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); + + if (f->ac == AC_GOLOMB_RICE) { + av_bprintf(&shd->src, "#define PB_UNALIGNED\n" ); + av_bprintf(&shd->src, "#define GOLOMB\n" ); + } + + if (fv->is_rgb) + av_bprintf(&shd->src, "#define RGB\n"); + + GLSLF(0, #define TYPE int%i_t ,smp_bits); + GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); + GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); + GLSLD(ff_source_rangecoder_comp); + + if (f->ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + GLSLD(ff_source_ffv1_common_comp); +} + +static int init_rct_search_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanShader *shd = &fv->rct_search; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct_search", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2", + "GL_EXT_null_initializer" }, 3, + 32, 32, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, ivec4 fmt_lut; ); + GLSLC(1, int rct_offset; ); + GLSLC(1, uint8_t planar_rgb; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t force_pcm; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[3]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + /* Never used */ + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 1)); + + define_shared_code(avctx, shd); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + { + .name = "src", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, + fv->rep_fmt), + .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); + + GLSLD(ff_source_ffv1_rct_search_comp); + + RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanShader *shd = &fv->setup; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(&fv->s, shd, "ffv1_setup", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 1, 1, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + av_bprintf(&shd->src, "#define FULL_RENORM\n"); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { /* This descriptor is never used */ + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0)); + + define_shared_code(avctx, shd); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + { + .name = "src", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, + fv->rep_fmt), + .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); + + GLSLD(ff_source_ffv1_enc_setup_comp); + + RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanShader *shd = &fv->reset; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + int wg_dim = FFMIN(fv->s.props.properties.limits.maxComputeWorkGroupSize[0], 1024); + + RET(ff_vk_shader_init(&fv->s, shd, "ffv1_reset", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + wg_dim, 1, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0)); + + define_shared_code(avctx, shd); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + }; + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0)); + + GLSLD(ff_source_ffv1_reset_comp); + + RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) +{ + int err; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVulkanShader *shd = &fv->enc; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + int use_cached_reader = fv->ctx.ac != AC_GOLOMB_RICE; + + RET(ff_vk_shader_init(&fv->s, shd, "ffv1_enc", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + use_cached_reader ? CONTEXT_SIZE : 1, 1, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + if (use_cached_reader) + av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n"); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + { + .name = "crc_ieee_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint32_t crc_ieee[256];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 1, 0)); + + define_shared_code(avctx, shd); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + { + .name = "src", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format, + fv->rep_fmt), + .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format), + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "results_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "writeonly", + .buf_content = "uint64_t slice_results[2048];", + }, + { /* place holder for desc_set[3] */ + }, + }; + if (fv->is_rgb) { + AVHWFramesContext *intermediate_frames_ctx; + intermediate_frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; + desc_set[3] = (FFVulkanDescriptorSetBinding) { + .name = "tmp", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(intermediate_frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }; + } + RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3 + fv->is_rgb, 0, 0)); + + GLSLD(ff_source_ffv1_enc_comp); + + RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx) +{ + int err; + size_t maxsize, max_heap_size, max_host_size; + VulkanEncodeFFv1Context *fv = avctx->priv_data; + FFV1Context *f = &fv->ctx; + FFVkSPIRVCompiler *spv; + + if ((err = ff_ffv1_common_init(avctx, f)) < 0) + return err; + + if (f->ac == 1) + f->ac = AC_RANGE_CUSTOM_TAB; + + err = ff_ffv1_encode_setup_plane_info(avctx, avctx->sw_pix_fmt); + if (err < 0) + return err; + + /* Target version 3 by default */ + f->version = 3; + + err = ff_ffv1_encode_init(avctx); + if (err < 0) + return err; + + /* Rice coding did not support high bit depths */ + if (f->bits_per_raw_sample > (f->version > 3 ? 16 : 8)) { + if (f->ac == AC_GOLOMB_RICE) { + av_log(avctx, AV_LOG_WARNING, "bits_per_raw_sample > 8, " + "forcing range coder\n"); + f->ac = AC_RANGE_CUSTOM_TAB; + } + } + + if (f->version < 4 && avctx->gop_size > 1) { + av_log(avctx, AV_LOG_ERROR, "Using inter frames requires version 4 (-level 4)\n"); + return AVERROR_INVALIDDATA; + } + + if (f->version == 4 && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) { + av_log(avctx, AV_LOG_ERROR, "Version 4 is experimental and requires -strict -2\n"); + return AVERROR_INVALIDDATA; + } + + /* We target version 4.3 */ + if (f->version == 4 && f->micro_version > 4) + f->micro_version = 3; + + //if (fv->ctx.ac == AC_GOLOMB_RICE) { + if (0) { + int w_a = FFALIGN(avctx->width, LG_ALIGN_W); + int h_a = FFALIGN(avctx->height, LG_ALIGN_H); + int w_sl, h_sl; + + /* Pixels per line an invocation handles */ + int ppi = 0; + /* Chunk size */ + int chunks = 0; + + do { + if (ppi < 2) + ppi++; + chunks++; + w_sl = w_a / (LG_ALIGN_W*ppi); + h_sl = h_a / (LG_ALIGN_H*chunks); + } while (w_sl > MAX_SLICES / h_sl); + + av_log(avctx, AV_LOG_VERBOSE, "Slice config: %ix%i, %i total\n", + LG_ALIGN_W*ppi, LG_ALIGN_H*chunks, w_sl*h_sl); + av_log(avctx, AV_LOG_VERBOSE, "Horizontal slices: %i (%i pixels per invoc)\n", + w_sl, ppi); + av_log(avctx, AV_LOG_VERBOSE, "Vertical slices: %i (%i chunks)\n", + h_sl, chunks); + + f->num_h_slices = w_sl; + f->num_v_slices = h_sl; + + fv->ppi = ppi; + fv->chunks = chunks; + } else { + f->num_h_slices = fv->num_h_slices; + f->num_v_slices = fv->num_v_slices; + + if (f->num_h_slices <= 0 && f->num_v_slices <= 0) { + if (avctx->slices) { + err = ff_ffv1_encode_determine_slices(avctx); + if (err < 0) + return err; + } else { + f->num_h_slices = 32; + f->num_v_slices = 32; + } + } else if (f->num_h_slices && f->num_v_slices <= 0) { + f->num_v_slices = MAX_SLICES / f->num_h_slices; + } else if (f->num_v_slices && f->num_h_slices <= 0) { + f->num_h_slices = MAX_SLICES / f->num_v_slices; + } + + f->num_h_slices = FFMIN(f->num_h_slices, avctx->width); + f->num_v_slices = FFMIN(f->num_v_slices, avctx->height); + + if (f->num_h_slices * f->num_v_slices > MAX_SLICES) { + av_log(avctx, AV_LOG_ERROR, "Too many slices (%i), maximum supported " + "by the standard is %i\n", + f->num_h_slices * f->num_v_slices, MAX_SLICES); + return AVERROR_PATCHWELCOME; + } + } + + f->max_slice_count = f->num_h_slices * f->num_v_slices; + + if ((err = ff_ffv1_write_extradata(avctx)) < 0) + return err; + + if (f->version < 4) { + if (((f->chroma_h_shift > 0) && (avctx->width % (64 << f->chroma_h_shift))) || + ((f->chroma_v_shift > 0) && (avctx->height % (64 << f->chroma_v_shift)))) { + av_log(avctx, AV_LOG_ERROR, "Encoding frames with subsampling and unaligned " + "dimensions is only supported in version 4 (-level 4)\n"); + return AVERROR_PATCHWELCOME; + } + } + + if (fv->force_pcm) { + if (f->version < 4) { + av_log(avctx, AV_LOG_ERROR, "PCM coding only supported by version 4 (-level 4)\n"); + return AVERROR_INVALIDDATA; + } else if (f->ac == AC_GOLOMB_RICE) { + av_log(avctx, AV_LOG_ERROR, "PCM coding requires range coding\n"); + return AVERROR_INVALIDDATA; + } + } + + /* Init Vulkan */ + err = ff_vk_init(&fv->s, avctx, NULL, avctx->hw_frames_ctx); + if (err < 0) + return err; + + fv->qf = ff_vk_qf_find(&fv->s, VK_QUEUE_COMPUTE_BIT, 0); + if (!fv->qf) { + av_log(avctx, AV_LOG_ERROR, "Device has no compute queues!\n"); + return err; + } + + /* Try to measure VRAM size */ + max_heap_size = 0; + max_host_size = 0; + for (int i = 0; i < fv->s.mprops.memoryHeapCount; i++) { + if (fv->s.mprops.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) + max_heap_size = FFMAX(fv->max_heap_size, + fv->s.mprops.memoryHeaps[i].size); + if (!(fv->s.mprops.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)) + max_host_size = FFMAX(max_host_size, + fv->s.mprops.memoryHeaps[i].size); + } + fv->max_heap_size = max_heap_size; + + maxsize = ff_ffv1_encode_buffer_size(avctx); + if (maxsize > fv->s.props_11.maxMemoryAllocationSize) { + av_log(avctx, AV_LOG_WARNING, "Encoding buffer size (%"SIZE_SPECIFIER") larger " + "than maximum device allocation (%"SIZE_SPECIFIER"), clipping\n", + maxsize, fv->s.props_11.maxMemoryAllocationSize); + maxsize = fv->s.props_11.maxMemoryAllocationSize; + } + + if (max_heap_size < maxsize) { + av_log(avctx, AV_LOG_WARNING, "Encoding buffer (%"SIZE_SPECIFIER") larger than VRAM (%"SIZE_SPECIFIER"), " + "using host memory (slower)\n", + maxsize, fv->max_heap_size); + + /* Keep 1/2th of RAM as headroom */ + max_heap_size = max_host_size - (max_host_size >> 1); + } else { + /* Keep 1/8th of VRAM as headroom */ + max_heap_size = max_heap_size - (max_heap_size >> 3); + } + + av_log(avctx, AV_LOG_INFO, "Async buffers: %"SIZE_SPECIFIER"MiB per context, %"SIZE_SPECIFIER"MiB total, depth: %i\n", + maxsize / (1024*1024), + (fv->async_depth * maxsize) / (1024*1024), + fv->async_depth); + + err = ff_vk_exec_pool_init(&fv->s, fv->qf, &fv->exec_pool, + fv->async_depth, + 0, 0, 0, NULL); + if (err < 0) + return err; + + fv->transfer_qf = ff_vk_qf_find(&fv->s, VK_QUEUE_TRANSFER_BIT, 0); + if (!fv->transfer_qf) { + av_log(avctx, AV_LOG_ERROR, "Device has no transfer queues!\n"); + return err; + } + + err = ff_vk_exec_pool_init(&fv->s, fv->transfer_qf, &fv->transfer_exec_pool, + 1, + 0, 0, 0, NULL); + if (err < 0) + return err; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + /* Detect the special RGB coding mode */ + fv->is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) && + !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8); + + /* bits_per_raw_sample use regular unsigned representation, + * but in higher bit depths, the data is casted to int16_t */ + fv->rep_fmt = FF_VK_REP_UINT; + if (!fv->is_rgb && f->bits_per_raw_sample > 8) + fv->rep_fmt = FF_VK_REP_INT; + + /* Init rct search shader */ + fv->optimize_rct = fv->is_rgb && f->version >= 4 && + !fv->force_pcm && fv->optimize_rct; + if (fv->optimize_rct) { + err = init_rct_search_shader(avctx, spv); + if (err < 0) { + spv->uninit(&spv); + return err; + } + } + + /* Init setup shader */ + err = init_setup_shader(avctx, spv); + if (err < 0) { + spv->uninit(&spv); + return err; + } + + /* Init reset shader */ + err = init_reset_shader(avctx, spv); + if (err < 0) { + spv->uninit(&spv); + return err; + } + + if (fv->is_rgb) { + enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx); + if (intermediate_fmt == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible " + "pixel format for RCT buffer!\n"); + return AVERROR(ENOTSUP); + } + + RET(init_indirect(avctx, intermediate_fmt)); + } + + /* Encode shader */ + err = init_encode_shader(avctx, spv); + if (err < 0) { + spv->uninit(&spv); + return err; + } + + spv->uninit(&spv); + + /* Range coder data */ + err = ff_ffv1_vk_init_state_transition_data(&fv->s, + &fv->rangecoder_static_buf, + f); + if (err < 0) + return err; + + /* Quantization table data */ + err = ff_ffv1_vk_init_quant_table_data(&fv->s, + &fv->quant_buf, + f); + if (err < 0) + return err; + + /* CRC table buffer */ + err = ff_ffv1_vk_init_crc_table_data(&fv->s, + &fv->crc_tab_buf, + f); + if (err < 0) + return err; + + /* Update setup global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->setup, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + + /* Update encode global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 1, 0, + &fv->quant_buf, + 0, fv->quant_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0], + &fv->enc, 0, 2, 0, + &fv->crc_tab_buf, + 0, fv->crc_tab_buf.size, + VK_FORMAT_UNDEFINED)); + + /* Temporary frame */ + fv->frame = av_frame_alloc(); + if (!fv->frame) + return AVERROR(ENOMEM); + + /* Async data pool */ + fv->async_depth = fv->exec_pool.pool_size; + fv->exec_ctx_info = av_calloc(fv->async_depth, sizeof(*fv->exec_ctx_info)); + if (!fv->exec_ctx_info) + return AVERROR(ENOMEM); + for (int i = 0; i < fv->async_depth; i++) + fv->exec_pool.contexts[i].opaque = &fv->exec_ctx_info[i]; + + fv->buf_regions = av_malloc_array(f->max_slice_count, sizeof(*fv->buf_regions)); + if (!fv->buf_regions) + return AVERROR(ENOMEM); + +fail: + return err; +} + +static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx) +{ + VulkanEncodeFFv1Context *fv = avctx->priv_data; + + ff_vk_exec_pool_free(&fv->s, &fv->exec_pool); + ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool); + + ff_vk_shader_free(&fv->s, &fv->enc); + ff_vk_shader_free(&fv->s, &fv->reset); + ff_vk_shader_free(&fv->s, &fv->setup); + ff_vk_shader_free(&fv->s, &fv->rct_search); + + if (fv->exec_ctx_info) { + for (int i = 0; i < fv->async_depth; i++) { + VulkanEncodeFFv1FrameData *fd = &fv->exec_ctx_info[i]; + av_buffer_unref(&fd->out_data_ref); + av_buffer_unref(&fd->results_data_ref); + av_buffer_unref(&fd->frame_opaque_ref); + } + } + av_free(fv->exec_ctx_info); + + av_buffer_unref(&fv->intermediate_frames_ref); + + av_buffer_pool_uninit(&fv->results_data_pool); + + av_buffer_pool_uninit(&fv->out_data_pool); + + av_buffer_unref(&fv->keyframe_slice_data_ref); + av_buffer_pool_uninit(&fv->slice_data_pool); + + ff_vk_free_buf(&fv->s, &fv->quant_buf); + ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf); + ff_vk_free_buf(&fv->s, &fv->crc_tab_buf); + + av_free(fv->buf_regions); + av_frame_free(&fv->frame); + ff_vk_uninit(&fv->s); + + return 0; +} + +#define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x) +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM +static const AVOption vulkan_encode_ffv1_options[] = { + { "slicecrc", "Protect slices with CRCs", OFFSET(ctx.ec), AV_OPT_TYPE_INT, + { .i64 = -1 }, -1, 2, VE }, + { "context", "Context model", OFFSET(ctx.context_model), AV_OPT_TYPE_INT, + { .i64 = 0 }, 0, 1, VE }, + { "coder", "Coder type", OFFSET(ctx.ac), AV_OPT_TYPE_INT, + { .i64 = AC_RANGE_CUSTOM_TAB }, -2, 2, VE, .unit = "coder" }, + { "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST, + { .i64 = AC_GOLOMB_RICE }, INT_MIN, INT_MAX, VE, .unit = "coder" }, + { "range_def", "Range with default table", 0, AV_OPT_TYPE_CONST, + { .i64 = AC_RANGE_DEFAULT_TAB_FORCE }, INT_MIN, INT_MAX, VE, .unit = "coder" }, + { "range_tab", "Range with custom table", 0, AV_OPT_TYPE_CONST, + { .i64 = AC_RANGE_CUSTOM_TAB }, INT_MIN, INT_MAX, VE, .unit = "coder" }, + { "qtable", "Quantization table", OFFSET(ctx.qtable), AV_OPT_TYPE_INT, + { .i64 = -1 }, -1, 2, VE , .unit = "qtable"}, + { "default", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = QTABLE_DEFAULT }, INT_MIN, INT_MAX, VE, .unit = "qtable" }, + { "8bit", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = QTABLE_8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" }, + { "greater8bit", NULL, 0, AV_OPT_TYPE_CONST, + { .i64 = QTABLE_GT8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" }, + + { "slices_h", "Number of horizontal slices", OFFSET(num_h_slices), AV_OPT_TYPE_INT, + { .i64 = -1 }, -1, MAX_SLICES, VE }, + { "slices_v", "Number of vertical slices", OFFSET(num_v_slices), AV_OPT_TYPE_INT, + { .i64 = -1 }, -1, MAX_SLICES, VE }, + + { "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL, + { .i64 = 0 }, 0, 1, VE }, + + { "rct_search", "Run a search for RCT parameters (level 4 only)", OFFSET(optimize_rct), AV_OPT_TYPE_BOOL, + { .i64 = 1 }, 0, 1, VE }, + + { "async_depth", "Internal parallelization depth", OFFSET(async_depth), AV_OPT_TYPE_INT, + { .i64 = 1 }, 1, INT_MAX, VE }, + + { NULL } +}; + +static const FFCodecDefault vulkan_encode_ffv1_defaults[] = { + { "g", "1" }, + { NULL }, +}; + +static const AVClass vulkan_encode_ffv1_class = { + .class_name = "ffv1_vulkan", + .item_name = av_default_item_name, + .option = vulkan_encode_ffv1_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const AVCodecHWConfigInternal *const vulkan_encode_ffv1_hw_configs[] = { + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), + NULL, +}; + +const FFCodec ff_ffv1_vulkan_encoder = { + .p.name = "ffv1_vulkan", + CODEC_LONG_NAME("FFmpeg video codec #1 (Vulkan)"), + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_FFV1, + .priv_data_size = sizeof(VulkanEncodeFFv1Context), + .init = &vulkan_encode_ffv1_init, + FF_CODEC_RECEIVE_PACKET_CB(&vulkan_encode_ffv1_receive_packet), + .close = &vulkan_encode_ffv1_close, + .p.priv_class = &vulkan_encode_ffv1_class, + .p.capabilities = AV_CODEC_CAP_DELAY | + AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1 | + AV_CODEC_CAP_ENCODER_FLUSH | + AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH, + .defaults = vulkan_encode_ffv1_defaults, + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), + .hw_configs = vulkan_encode_ffv1_hw_configs, + .p.wrapper_name = "vulkan", +}; -- 2.49.1 From 4a4f6b372e5ba088bed0c72a038d6076550dce9b Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:26:42 +0000 Subject: [PATCH 061/118] Changing vulkan file directory --- libavcodec/ffv1_vulkan.h | 62 ---------------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 libavcodec/ffv1_vulkan.h diff --git a/libavcodec/ffv1_vulkan.h b/libavcodec/ffv1_vulkan.h deleted file mode 100644 index 372478f4b7..0000000000 --- a/libavcodec/ffv1_vulkan.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_FFV1_VULKAN_H -#define AVCODEC_FFV1_VULKAN_H - -#include "libavutil/vulkan.h" -#include "ffv1.h" - -int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f); - -int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f); - -int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f); - -int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, - FFVkBuffer *vkb, FFV1Context *f); - -typedef struct FFv1VkRCTParameters { - int fmt_lut[4]; - int offset; - uint8_t bits; - uint8_t planar_rgb; - uint8_t color_planes; - uint8_t transparency; - uint8_t version; - uint8_t micro_version; - uint8_t padding[2]; -} FFv1VkRCTParameters; - -typedef struct FFv1VkResetParameters { - uint32_t context_count[MAX_QUANT_TABLES]; - VkDeviceAddress slice_state; - uint32_t plane_state_size; - uint8_t codec_planes; - uint8_t key_frame; - uint8_t version; - uint8_t micro_version; - uint8_t padding[1]; -} FFv1VkResetParameters; - -#endif /* AVCODEC_FFV1_VULKAN_H */ -- 2.49.1 From f6350a14b74676335b5761c96b26bf647c13f93f Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:27:27 +0000 Subject: [PATCH 062/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_vulkan.h | 62 +++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 libavcodec/vulkan/ffv1_vulkan.h diff --git a/libavcodec/vulkan/ffv1_vulkan.h b/libavcodec/vulkan/ffv1_vulkan.h new file mode 100644 index 0000000000..bb10770576 --- /dev/null +++ b/libavcodec/vulkan/ffv1_vulkan.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_FFV1_VULKAN_H +#define AVCODEC_FFV1_VULKAN_H + +#include "../../libavutil/vulkan/vulkan.h" +#include "libavcodec/ffv1.h" + +int ff_ffv1_vk_update_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_state_transition_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s, + FFVkBuffer *vkb, FFV1Context *f); + +typedef struct FFv1VkRCTParameters { + int fmt_lut[4]; + int offset; + uint8_t bits; + uint8_t planar_rgb; + uint8_t color_planes; + uint8_t transparency; + uint8_t version; + uint8_t micro_version; + uint8_t padding[2]; +} FFv1VkRCTParameters; + +typedef struct FFv1VkResetParameters { + uint32_t context_count[MAX_QUANT_TABLES]; + VkDeviceAddress slice_state; + uint32_t plane_state_size; + uint8_t codec_planes; + uint8_t key_frame; + uint8_t version; + uint8_t micro_version; + uint8_t padding[1]; +} FFv1VkResetParameters; + +#endif /* AVCODEC_FFV1_VULKAN_H */ -- 2.49.1 From 3ed29bbe35ad3e34df22f15888c5dd2111f9974f Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:27:54 +0000 Subject: [PATCH 063/118] Changing vulkan file directory --- libavcodec/vulkan_av1.c | 670 ---------------------------------------- 1 file changed, 670 deletions(-) delete mode 100644 libavcodec/vulkan_av1.c diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c deleted file mode 100644 index 788e3cca78..0000000000 --- a/libavcodec/vulkan_av1.c +++ /dev/null @@ -1,670 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "av1dec.h" - -#include "vulkan_decode.h" - -/* Maximum number of tiles specified by any defined level */ -#define MAX_TILES 256 - -const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc = { - .codec_id = AV_CODEC_ID_AV1, - .decode_extension = FF_VK_EXT_VIDEO_DECODE_AV1, - .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, - .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR, - .ext_props = { - .extensionName = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, - .specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION, - }, -}; - -typedef struct AV1VulkanDecodePicture { - FFVulkanDecodePicture vp; - - /* TODO: investigate if this can be removed to make decoding completely - * independent. */ - FFVulkanDecodeContext *dec; - - uint32_t tile_sizes[MAX_TILES]; - - /* Current picture */ - StdVideoDecodeAV1ReferenceInfo std_ref; - VkVideoDecodeAV1DpbSlotInfoKHR vkav1_ref; - uint16_t width_in_sbs_minus1[64]; - uint16_t height_in_sbs_minus1[64]; - uint16_t mi_col_starts[64]; - uint16_t mi_row_starts[64]; - StdVideoAV1TileInfo tile_info; - StdVideoAV1Quantization quantization; - StdVideoAV1Segmentation segmentation; - StdVideoAV1LoopFilter loop_filter; - StdVideoAV1CDEF cdef; - StdVideoAV1LoopRestoration loop_restoration; - StdVideoAV1GlobalMotion global_motion; - StdVideoAV1FilmGrain film_grain; - StdVideoDecodeAV1PictureInfo std_pic_info; - VkVideoDecodeAV1PictureInfoKHR av1_pic_info; - - /* Picture refs */ - const AV1Frame *ref_src [AV1_NUM_REF_FRAMES]; - StdVideoDecodeAV1ReferenceInfo std_refs [AV1_NUM_REF_FRAMES]; - VkVideoDecodeAV1DpbSlotInfoKHR vkav1_refs[AV1_NUM_REF_FRAMES]; - - uint8_t frame_id_set; - uint8_t frame_id; - uint8_t ref_frame_sign_bias_mask; -} AV1VulkanDecodePicture; - -static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src, - VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ - VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ - StdVideoDecodeAV1ReferenceInfo *vkav1_std_ref, - VkVideoDecodeAV1DpbSlotInfoKHR *vkav1_ref, /* Goes in ^ */ - const AV1Frame *pic, int is_current, int has_grain, - const uint8_t *saved_order_hints) -{ - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - AV1VulkanDecodePicture *hp = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vkpic = &hp->vp; - - int err = ff_vk_decode_prepare_frame(dec, pic->f, vkpic, is_current, - has_grain || dec->dedicated_dpb); - if (err < 0) - return err; - - *vkav1_std_ref = (StdVideoDecodeAV1ReferenceInfo) { - .flags = (StdVideoDecodeAV1ReferenceInfoFlags) { - .disable_frame_end_update_cdf = pic->raw_frame_header->disable_frame_end_update_cdf, - .segmentation_enabled = pic->raw_frame_header->segmentation_enabled, - }, - .frame_type = pic->raw_frame_header->frame_type, - .OrderHint = pic->raw_frame_header->order_hint, - .RefFrameSignBias = hp->ref_frame_sign_bias_mask, - }; - - if (saved_order_hints) { - if (dec->quirk_av1_offset) - for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) - vkav1_std_ref->SavedOrderHints[i - 1] = saved_order_hints[i]; - else - for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) - vkav1_std_ref->SavedOrderHints[i] = saved_order_hints[i]; - } - - *vkav1_ref = (VkVideoDecodeAV1DpbSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR, - .pStdReferenceInfo = vkav1_std_ref, - }; - - vkav1_std_ref->flags.disable_frame_end_update_cdf = pic->raw_frame_header->disable_frame_end_update_cdf; - vkav1_std_ref->flags.segmentation_enabled = pic->raw_frame_header->segmentation_enabled; - vkav1_std_ref->frame_type = pic->raw_frame_header->frame_type; - - *ref = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .codedOffset = (VkOffset2D){ 0, 0 }, - .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, - .baseArrayLayer = ((has_grain || dec->dedicated_dpb) && ctx->common.layered_dpb) ? - hp->frame_id : 0, - .imageViewBinding = vkpic->view.ref[0], - }; - - *ref_slot = (VkVideoReferenceSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, - .pNext = vkav1_ref, - .slotIndex = hp->frame_id, - .pPictureResource = ref, - }; - - if (ref_src) - *ref_src = pic; - - return 0; -} - -static void vk_av1_params_fill(AVCodecContext *avctx, - StdVideoAV1TimingInfo *av1_timing_info, - StdVideoAV1ColorConfig *av1_color_config, - StdVideoAV1SequenceHeader *av1_sequence_header) -{ - const AV1DecContext *s = avctx->priv_data; - const AV1RawSequenceHeader *seq = s->raw_seq; - - *av1_timing_info = (StdVideoAV1TimingInfo) { - .flags = (StdVideoAV1TimingInfoFlags) { - .equal_picture_interval = seq->timing_info.equal_picture_interval, - }, - .num_units_in_display_tick = seq->timing_info.num_units_in_display_tick, - .time_scale = seq->timing_info.time_scale, - .num_ticks_per_picture_minus_1 = seq->timing_info.num_ticks_per_picture_minus_1, - }; - - *av1_color_config = (StdVideoAV1ColorConfig) { - .flags = (StdVideoAV1ColorConfigFlags) { - .mono_chrome = seq->color_config.mono_chrome, - .color_range = seq->color_config.color_range, - .separate_uv_delta_q = seq->color_config.separate_uv_delta_q, - }, - .BitDepth = seq->color_config.twelve_bit ? 12 : - seq->color_config.high_bitdepth ? 10 : 8, - .subsampling_x = seq->color_config.subsampling_x, - .subsampling_y = seq->color_config.subsampling_y, - .color_primaries = seq->color_config.color_primaries, - .transfer_characteristics = seq->color_config.transfer_characteristics, - .matrix_coefficients = seq->color_config.matrix_coefficients, - }; - - *av1_sequence_header = (StdVideoAV1SequenceHeader) { - .flags = (StdVideoAV1SequenceHeaderFlags) { - .still_picture = seq->still_picture, - .reduced_still_picture_header = seq->reduced_still_picture_header, - .use_128x128_superblock = seq->use_128x128_superblock, - .enable_filter_intra = seq->enable_filter_intra, - .enable_intra_edge_filter = seq->enable_intra_edge_filter, - .enable_interintra_compound = seq->enable_interintra_compound, - .enable_masked_compound = seq->enable_masked_compound, - .enable_warped_motion = seq->enable_warped_motion, - .enable_dual_filter = seq->enable_dual_filter, - .enable_order_hint = seq->enable_order_hint, - .enable_jnt_comp = seq->enable_jnt_comp, - .enable_ref_frame_mvs = seq->enable_ref_frame_mvs, - .frame_id_numbers_present_flag = seq->frame_id_numbers_present_flag, - .enable_superres = seq->enable_superres, - .enable_cdef = seq->enable_cdef, - .enable_restoration = seq->enable_restoration, - .film_grain_params_present = seq->film_grain_params_present, - .timing_info_present_flag = seq->timing_info_present_flag, - .initial_display_delay_present_flag = seq->initial_display_delay_present_flag, - }, - .seq_profile = seq->seq_profile, - .frame_width_bits_minus_1 = seq->frame_width_bits_minus_1, - .frame_height_bits_minus_1 = seq->frame_height_bits_minus_1, - .max_frame_width_minus_1 = seq->max_frame_width_minus_1, - .max_frame_height_minus_1 = seq->max_frame_height_minus_1, - .delta_frame_id_length_minus_2 = seq->delta_frame_id_length_minus_2, - .additional_frame_id_length_minus_1 = seq->additional_frame_id_length_minus_1, - .order_hint_bits_minus_1 = seq->order_hint_bits_minus_1, - .seq_force_integer_mv = seq->seq_force_integer_mv, - .seq_force_screen_content_tools = seq->seq_force_screen_content_tools, - .pTimingInfo = av1_timing_info, - .pColorConfig = av1_color_config, - }; -} - -static int vk_av1_create_params(AVCodecContext *avctx, AVBufferRef **buf, - AV1VulkanDecodePicture *ap) -{ - int err; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - - StdVideoAV1SequenceHeader av1_sequence_header; - StdVideoAV1TimingInfo av1_timing_info; - StdVideoAV1ColorConfig av1_color_config; - VkVideoDecodeAV1SessionParametersCreateInfoKHR av1_params; - VkVideoSessionParametersCreateInfoKHR session_params_create; - - vk_av1_params_fill(avctx, &av1_timing_info, &av1_color_config, - &av1_sequence_header); - - av1_params = (VkVideoDecodeAV1SessionParametersCreateInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pStdSequenceHeader = &av1_sequence_header, - }; - session_params_create = (VkVideoSessionParametersCreateInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pNext = &av1_params, - .videoSession = ctx->common.session, - .videoSessionParametersTemplate = VK_NULL_HANDLE, - }; - - err = ff_vk_decode_create_params(buf, avctx, ctx, &session_params_create); - if (err < 0) - return err; - - av_log(avctx, AV_LOG_DEBUG, "Created frame parameters\n"); - - return 0; -} - -static int vk_av1_start_frame(AVCodecContext *avctx, - av_unused const AVBufferRef *buffer_ref, - av_unused const uint8_t *buffer, - av_unused uint32_t size) -{ - int err; - int ref_count = 0; - AV1DecContext *s = avctx->priv_data; - const AV1Frame *pic = &s->cur_frame; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - uint32_t frame_id_alloc_mask = 0; - - AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &ap->vp; - - const AV1RawFrameHeader *frame_header = s->raw_frame_header; - const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain; - - const int apply_grain = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) && - film_grain->apply_grain; - StdVideoAV1FrameRestorationType remap_lr_type[4] = { STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_NONE, - STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_SWITCHABLE, - STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_WIENER, - STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_SGRPROJ }; - - /* Use the current frame_ids in ref[] to decide occupied frame_ids */ - for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) { - const AV1VulkanDecodePicture* rp = s->ref[i].hwaccel_picture_private; - if (rp) - frame_id_alloc_mask |= 1 << rp->frame_id; - } - - if (!ap->frame_id_set) { - unsigned slot_idx = 0; - for (unsigned i = 0; i < 32; i++) { - if (!(frame_id_alloc_mask & (1 << i))) { - slot_idx = i; - break; - } - } - ap->frame_id = slot_idx; - ap->frame_id_set = 1; - frame_id_alloc_mask |= (1 << slot_idx); - } - - ap->ref_frame_sign_bias_mask = 0x0; - for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) - ap->ref_frame_sign_bias_mask |= pic->ref_frame_sign_bias[i] << i; - - for (int i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; i++) { - const int idx = pic->raw_frame_header->ref_frame_idx[i]; - const AV1Frame *ref_frame = &s->ref[idx]; - AV1VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private; - int found = 0; - - if (!ref_frame->f) - continue; - - for (int j = 0; j < ref_count; j++) { - if (vp->ref_slots[j].slotIndex == hp->frame_id) { - found = 1; - break; - } - } - if (found) - continue; - - err = vk_av1_fill_pict(avctx, &ap->ref_src[ref_count], &vp->ref_slots[ref_count], - &vp->refs[ref_count], &ap->std_refs[ref_count], &ap->vkav1_refs[ref_count], - ref_frame, 0, 0, ref_frame->order_hints); - if (err < 0) - return err; - - ref_count++; - } - - err = vk_av1_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, - &ap->std_ref, - &ap->vkav1_ref, - pic, 1, apply_grain, NULL); - if (err < 0) - return err; - - ap->av1_pic_info = (VkVideoDecodeAV1PictureInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PICTURE_INFO_KHR, - .pStdPictureInfo = &ap->std_pic_info, - .frameHeaderOffset = 0, - .tileCount = 0, - .pTileOffsets = NULL, - .pTileSizes = ap->tile_sizes, - }; - - for (int i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; i++) { - const int idx = pic->raw_frame_header->ref_frame_idx[i]; - const AV1Frame *ref_frame = &s->ref[idx]; - AV1VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private; - - if (!ref_frame->f) - ap->av1_pic_info.referenceNameSlotIndices[i] = AV1_REF_FRAME_NONE; - else - ap->av1_pic_info.referenceNameSlotIndices[i] = hp->frame_id; - } - - vp->decode_info = (VkVideoDecodeInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, - .pNext = &ap->av1_pic_info, - .flags = 0x0, - .pSetupReferenceSlot = &vp->ref_slot, - .referenceSlotCount = ref_count, - .pReferenceSlots = vp->ref_slots, - .dstPictureResource = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .codedOffset = (VkOffset2D){ 0, 0 }, - .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, - .baseArrayLayer = 0, - .imageViewBinding = vp->view.out[0], - }, - }; - - ap->tile_info = (StdVideoAV1TileInfo) { - .flags = (StdVideoAV1TileInfoFlags) { - .uniform_tile_spacing_flag = frame_header->uniform_tile_spacing_flag, - }, - .TileCols = frame_header->tile_cols, - .TileRows = frame_header->tile_rows, - .context_update_tile_id = frame_header->context_update_tile_id, - .tile_size_bytes_minus_1 = frame_header->tile_size_bytes_minus1, - .pWidthInSbsMinus1 = ap->width_in_sbs_minus1, - .pHeightInSbsMinus1 = ap->height_in_sbs_minus1, - .pMiColStarts = ap->mi_col_starts, - .pMiRowStarts = ap->mi_row_starts, - }; - - ap->quantization = (StdVideoAV1Quantization) { - .flags.using_qmatrix = frame_header->using_qmatrix, - .flags.diff_uv_delta = frame_header->diff_uv_delta, - .base_q_idx = frame_header->base_q_idx, - .DeltaQYDc = frame_header->delta_q_y_dc, - .DeltaQUDc = frame_header->delta_q_u_dc, - .DeltaQUAc = frame_header->delta_q_u_ac, - .DeltaQVDc = frame_header->delta_q_v_dc, - .DeltaQVAc = frame_header->delta_q_v_ac, - .qm_y = frame_header->qm_y, - .qm_u = frame_header->qm_u, - .qm_v = frame_header->qm_v, - }; - - ap->loop_filter = (StdVideoAV1LoopFilter) { - .flags = (StdVideoAV1LoopFilterFlags) { - .loop_filter_delta_enabled = frame_header->loop_filter_delta_enabled, - .loop_filter_delta_update = frame_header->loop_filter_delta_update, - }, - .loop_filter_sharpness = frame_header->loop_filter_sharpness, - }; - - for (int i = 0; i < STD_VIDEO_AV1_MAX_LOOP_FILTER_STRENGTHS; i++) - ap->loop_filter.loop_filter_level[i] = frame_header->loop_filter_level[i]; - for (int i = 0; i < STD_VIDEO_AV1_LOOP_FILTER_ADJUSTMENTS; i++) - ap->loop_filter.loop_filter_mode_deltas[i] = frame_header->loop_filter_mode_deltas[i]; - - ap->cdef = (StdVideoAV1CDEF) { - .cdef_damping_minus_3 = frame_header->cdef_damping_minus_3, - .cdef_bits = frame_header->cdef_bits, - }; - - ap->loop_restoration = (StdVideoAV1LoopRestoration) { - .FrameRestorationType[0] = remap_lr_type[frame_header->lr_type[0]], - .FrameRestorationType[1] = remap_lr_type[frame_header->lr_type[1]], - .FrameRestorationType[2] = remap_lr_type[frame_header->lr_type[2]], - .LoopRestorationSize[0] = 1 + frame_header->lr_unit_shift, - .LoopRestorationSize[1] = 1 + frame_header->lr_unit_shift - frame_header->lr_uv_shift, - .LoopRestorationSize[2] = 1 + frame_header->lr_unit_shift - frame_header->lr_uv_shift, - }; - - ap->film_grain = (StdVideoAV1FilmGrain) { - .flags = (StdVideoAV1FilmGrainFlags) { - .chroma_scaling_from_luma = film_grain->chroma_scaling_from_luma, - .overlap_flag = film_grain->overlap_flag, - .clip_to_restricted_range = film_grain->clip_to_restricted_range, - }, - .grain_scaling_minus_8 = film_grain->grain_scaling_minus_8, - .ar_coeff_lag = film_grain->ar_coeff_lag, - .ar_coeff_shift_minus_6 = film_grain->ar_coeff_shift_minus_6, - .grain_scale_shift = film_grain->grain_scale_shift, - .grain_seed = film_grain->grain_seed, - .film_grain_params_ref_idx = film_grain->film_grain_params_ref_idx, - .num_y_points = film_grain->num_y_points, - .num_cb_points = film_grain->num_cb_points, - .num_cr_points = film_grain->num_cr_points, - .cb_mult = film_grain->cb_mult, - .cb_luma_mult = film_grain->cb_luma_mult, - .cb_offset = film_grain->cb_offset, - .cr_mult = film_grain->cr_mult, - .cr_luma_mult = film_grain->cr_luma_mult, - .cr_offset = film_grain->cr_offset, - }; - - /* Setup frame header */ - ap->std_pic_info = (StdVideoDecodeAV1PictureInfo) { - .flags = (StdVideoDecodeAV1PictureInfoFlags) { - .error_resilient_mode = frame_header->error_resilient_mode, - .disable_cdf_update = frame_header->disable_cdf_update, - .use_superres = frame_header->use_superres, - .render_and_frame_size_different = frame_header->render_and_frame_size_different, - .allow_screen_content_tools = frame_header->allow_screen_content_tools, - .is_filter_switchable = frame_header->is_filter_switchable, - .force_integer_mv = pic->force_integer_mv, - .frame_size_override_flag = frame_header->frame_size_override_flag, - .buffer_removal_time_present_flag = frame_header->buffer_removal_time_present_flag, - .allow_intrabc = frame_header->allow_intrabc, - .frame_refs_short_signaling = frame_header->frame_refs_short_signaling, - .allow_high_precision_mv = frame_header->allow_high_precision_mv, - .is_motion_mode_switchable = frame_header->is_motion_mode_switchable, - .use_ref_frame_mvs = frame_header->use_ref_frame_mvs, - .disable_frame_end_update_cdf = frame_header->disable_frame_end_update_cdf, - .allow_warped_motion = frame_header->allow_warped_motion, - .reduced_tx_set = frame_header->reduced_tx_set, - .reference_select = frame_header->reference_select, - .skip_mode_present = frame_header->skip_mode_present, - .delta_q_present = frame_header->delta_q_present, - .delta_lf_present = frame_header->delta_lf_present, - .delta_lf_multi = frame_header->delta_lf_multi, - .segmentation_enabled = frame_header->segmentation_enabled, - .segmentation_update_map = frame_header->segmentation_update_map, - .segmentation_temporal_update = frame_header->segmentation_temporal_update, - .segmentation_update_data = frame_header->segmentation_update_data, - .UsesLr = frame_header->lr_type[0] || frame_header->lr_type[1] || frame_header->lr_type[2], - .apply_grain = apply_grain, - }, - .frame_type = frame_header->frame_type, - .current_frame_id = frame_header->current_frame_id, - .OrderHint = frame_header->order_hint, - .primary_ref_frame = frame_header->primary_ref_frame, - .refresh_frame_flags = frame_header->refresh_frame_flags, - .interpolation_filter = frame_header->interpolation_filter, - .TxMode = frame_header->tx_mode, - .delta_q_res = frame_header->delta_q_res, - .delta_lf_res = frame_header->delta_lf_res, - .SkipModeFrame[0] = s->cur_frame.skip_mode_frame_idx[0], - .SkipModeFrame[1] = s->cur_frame.skip_mode_frame_idx[1], - .coded_denom = frame_header->coded_denom, - .pTileInfo = &ap->tile_info, - .pQuantization = &ap->quantization, - .pSegmentation = &ap->segmentation, - .pLoopFilter = &ap->loop_filter, - .pCDEF = &ap->cdef, - .pLoopRestoration = &ap->loop_restoration, - .pGlobalMotion = &ap->global_motion, - .pFilmGrain = apply_grain ? &ap->film_grain : NULL, - }; - - for (int i = 0; i < 64; i++) { - ap->width_in_sbs_minus1[i] = frame_header->width_in_sbs_minus_1[i]; - ap->height_in_sbs_minus1[i] = frame_header->height_in_sbs_minus_1[i]; - ap->mi_col_starts[i] = frame_header->tile_start_col_sb[i]; - ap->mi_row_starts[i] = frame_header->tile_start_row_sb[i]; - } - - for (int i = 0; i < STD_VIDEO_AV1_MAX_SEGMENTS; i++) { - ap->segmentation.FeatureEnabled[i] = 0x0; - for (int j = 0; j < STD_VIDEO_AV1_SEG_LVL_MAX; j++) { - ap->segmentation.FeatureEnabled[i] |= (frame_header->feature_enabled[i][j] << j); - ap->segmentation.FeatureData[i][j] = frame_header->feature_value[i][j]; - } - } - - if (dec->quirk_av1_offset) - for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) - ap->std_pic_info.OrderHints[i - 1] = pic->order_hints[i]; - else - for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) - ap->std_pic_info.OrderHints[i] = pic->order_hints[i]; - - for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) { - ap->loop_filter.loop_filter_ref_deltas[i] = frame_header->loop_filter_ref_deltas[i]; - ap->global_motion.GmType[i] = s->cur_frame.gm_type[i]; - for (int j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; j++) { - ap->global_motion.gm_params[i][j] = s->cur_frame.gm_params[i][j]; - } - } - - for (int i = 0; i < STD_VIDEO_AV1_MAX_CDEF_FILTER_STRENGTHS; i++) { - ap->cdef.cdef_y_pri_strength[i] = frame_header->cdef_y_pri_strength[i]; - ap->cdef.cdef_y_sec_strength[i] = frame_header->cdef_y_sec_strength[i]; - ap->cdef.cdef_uv_pri_strength[i] = frame_header->cdef_uv_pri_strength[i]; - ap->cdef.cdef_uv_sec_strength[i] = frame_header->cdef_uv_sec_strength[i]; - } - - if (apply_grain) { - for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_Y_POINTS; i++) { - ap->film_grain.point_y_value[i] = film_grain->point_y_value[i]; - ap->film_grain.point_y_scaling[i] = film_grain->point_y_scaling[i]; - } - - for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_CB_POINTS; i++) { - ap->film_grain.point_cb_value[i] = film_grain->point_cb_value[i]; - ap->film_grain.point_cb_scaling[i] = film_grain->point_cb_scaling[i]; - ap->film_grain.point_cr_value[i] = film_grain->point_cr_value[i]; - ap->film_grain.point_cr_scaling[i] = film_grain->point_cr_scaling[i]; - } - - for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_POS_LUMA; i++) - ap->film_grain.ar_coeffs_y_plus_128[i] = film_grain->ar_coeffs_y_plus_128[i]; - - for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_POS_CHROMA; i++) { - ap->film_grain.ar_coeffs_cb_plus_128[i] = film_grain->ar_coeffs_cb_plus_128[i]; - ap->film_grain.ar_coeffs_cr_plus_128[i] = film_grain->ar_coeffs_cr_plus_128[i]; - } - } - - ap->dec = dec; - - return 0; -} - -static int vk_av1_decode_slice(AVCodecContext *avctx, - const uint8_t *data, - uint32_t size) -{ - int err; - const AV1DecContext *s = avctx->priv_data; - AV1VulkanDecodePicture *ap = s->cur_frame.hwaccel_picture_private; - FFVulkanDecodePicture *vp = &ap->vp; - - /* Too many tiles, exceeding all defined levels in the AV1 spec */ - if (ap->av1_pic_info.tileCount > MAX_TILES) - return AVERROR(ENOSYS); - - for (int i = s->tg_start; i <= s->tg_end; i++) { - ap->tile_sizes[ap->av1_pic_info.tileCount] = s->tile_group_info[i].tile_size; - - err = ff_vk_decode_add_slice(avctx, vp, - data + s->tile_group_info[i].tile_offset, - s->tile_group_info[i].tile_size, 0, - &ap->av1_pic_info.tileCount, - &ap->av1_pic_info.pTileOffsets); - if (err < 0) - return err; - } - - return 0; -} - -static int vk_av1_end_frame(AVCodecContext *avctx) -{ - const AV1DecContext *s = avctx->priv_data; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - - const AV1Frame *pic = &s->cur_frame; - AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &ap->vp; - FFVulkanDecodePicture *rvp[AV1_NUM_REF_FRAMES] = { 0 }; - AVFrame *rav[AV1_NUM_REF_FRAMES] = { 0 }; - -#ifdef VK_KHR_video_maintenance2 - StdVideoAV1SequenceHeader av1_sequence_header; - StdVideoAV1TimingInfo av1_timing_info; - StdVideoAV1ColorConfig av1_color_config; - VkVideoDecodeAV1InlineSessionParametersInfoKHR av1_params; - - if (ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2) { - vk_av1_params_fill(avctx, &av1_timing_info, &av1_color_config, - &av1_sequence_header); - av1_params = (VkVideoDecodeAV1InlineSessionParametersInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_INLINE_SESSION_PARAMETERS_INFO_KHR, - .pStdSequenceHeader = &av1_sequence_header, - }; - ap->av1_pic_info.pNext = &av1_params; - } -#endif - - if (!ap->av1_pic_info.tileCount) - return 0; - - if (!dec->session_params && - !(ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2)) { - int err = vk_av1_create_params(avctx, &dec->session_params, ap); - if (err < 0) - return err; - } - - for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { - const AV1Frame *rp = ap->ref_src[i]; - AV1VulkanDecodePicture *rhp = rp->hwaccel_picture_private; - - rvp[i] = &rhp->vp; - rav[i] = ap->ref_src[i]->f; - } - - av_log(avctx, AV_LOG_DEBUG, "Decoding frame, %"SIZE_SPECIFIER" bytes, %i tiles\n", - vp->slices_size, ap->av1_pic_info.tileCount); - - return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); -} - -static void vk_av1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) -{ - AVHWDeviceContext *hwctx = _hwctx.nc; - AV1VulkanDecodePicture *ap = data; - - /* Free frame resources, this also destroys the session parameters. */ - ff_vk_decode_free_frame(hwctx, &ap->vp); -} - -const FFHWAccel ff_av1_vulkan_hwaccel = { - .p.name = "av1_vulkan", - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_AV1, - .p.pix_fmt = AV_PIX_FMT_VULKAN, - .start_frame = &vk_av1_start_frame, - .decode_slice = &vk_av1_decode_slice, - .end_frame = &vk_av1_end_frame, - .free_frame_priv = &vk_av1_free_frame_priv, - .frame_priv_data_size = sizeof(AV1VulkanDecodePicture), - .init = &ff_vk_decode_init, - .update_thread_context = &ff_vk_update_thread_context, - .decode_params = &ff_vk_params_invalidate, - .flush = &ff_vk_decode_flush, - .uninit = &ff_vk_decode_uninit, - .frame_params = &ff_vk_frame_params, - .priv_data_size = sizeof(FFVulkanDecodeContext), - .caps_internal = HWACCEL_CAP_ASYNC_SAFE, -}; -- 2.49.1 From 02605012d1182416beca37099a3a670ff0e29c26 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:28:22 +0000 Subject: [PATCH 064/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_av1.c | 670 +++++++++++++++++++++++++++++++++ 1 file changed, 670 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_av1.c diff --git a/libavcodec/vulkan/vulkan_av1.c b/libavcodec/vulkan/vulkan_av1.c new file mode 100644 index 0000000000..ee954a46ed --- /dev/null +++ b/libavcodec/vulkan/vulkan_av1.c @@ -0,0 +1,670 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/av1dec.h" + +#include "vulkan_decode.h" + +/* Maximum number of tiles specified by any defined level */ +#define MAX_TILES 256 + +const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc = { + .codec_id = AV_CODEC_ID_AV1, + .decode_extension = FF_VK_EXT_VIDEO_DECODE_AV1, + .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, + .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR, + .ext_props = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION, + }, +}; + +typedef struct AV1VulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* TODO: investigate if this can be removed to make decoding completely + * independent. */ + FFVulkanDecodeContext *dec; + + uint32_t tile_sizes[MAX_TILES]; + + /* Current picture */ + StdVideoDecodeAV1ReferenceInfo std_ref; + VkVideoDecodeAV1DpbSlotInfoKHR vkav1_ref; + uint16_t width_in_sbs_minus1[64]; + uint16_t height_in_sbs_minus1[64]; + uint16_t mi_col_starts[64]; + uint16_t mi_row_starts[64]; + StdVideoAV1TileInfo tile_info; + StdVideoAV1Quantization quantization; + StdVideoAV1Segmentation segmentation; + StdVideoAV1LoopFilter loop_filter; + StdVideoAV1CDEF cdef; + StdVideoAV1LoopRestoration loop_restoration; + StdVideoAV1GlobalMotion global_motion; + StdVideoAV1FilmGrain film_grain; + StdVideoDecodeAV1PictureInfo std_pic_info; + VkVideoDecodeAV1PictureInfoKHR av1_pic_info; + + /* Picture refs */ + const AV1Frame *ref_src [AV1_NUM_REF_FRAMES]; + StdVideoDecodeAV1ReferenceInfo std_refs [AV1_NUM_REF_FRAMES]; + VkVideoDecodeAV1DpbSlotInfoKHR vkav1_refs[AV1_NUM_REF_FRAMES]; + + uint8_t frame_id_set; + uint8_t frame_id; + uint8_t ref_frame_sign_bias_mask; +} AV1VulkanDecodePicture; + +static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + StdVideoDecodeAV1ReferenceInfo *vkav1_std_ref, + VkVideoDecodeAV1DpbSlotInfoKHR *vkav1_ref, /* Goes in ^ */ + const AV1Frame *pic, int is_current, int has_grain, + const uint8_t *saved_order_hints) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + AV1VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(dec, pic->f, vkpic, is_current, + has_grain || dec->dedicated_dpb); + if (err < 0) + return err; + + *vkav1_std_ref = (StdVideoDecodeAV1ReferenceInfo) { + .flags = (StdVideoDecodeAV1ReferenceInfoFlags) { + .disable_frame_end_update_cdf = pic->raw_frame_header->disable_frame_end_update_cdf, + .segmentation_enabled = pic->raw_frame_header->segmentation_enabled, + }, + .frame_type = pic->raw_frame_header->frame_type, + .OrderHint = pic->raw_frame_header->order_hint, + .RefFrameSignBias = hp->ref_frame_sign_bias_mask, + }; + + if (saved_order_hints) { + if (dec->quirk_av1_offset) + for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) + vkav1_std_ref->SavedOrderHints[i - 1] = saved_order_hints[i]; + else + for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) + vkav1_std_ref->SavedOrderHints[i] = saved_order_hints[i]; + } + + *vkav1_ref = (VkVideoDecodeAV1DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = vkav1_std_ref, + }; + + vkav1_std_ref->flags.disable_frame_end_update_cdf = pic->raw_frame_header->disable_frame_end_update_cdf; + vkav1_std_ref->flags.segmentation_enabled = pic->raw_frame_header->segmentation_enabled; + vkav1_std_ref->frame_type = pic->raw_frame_header->frame_type; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = ((has_grain || dec->dedicated_dpb) && ctx->common.layered_dpb) ? + hp->frame_id : 0, + .imageViewBinding = vkpic->view.ref[0], + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkav1_ref, + .slotIndex = hp->frame_id, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static void vk_av1_params_fill(AVCodecContext *avctx, + StdVideoAV1TimingInfo *av1_timing_info, + StdVideoAV1ColorConfig *av1_color_config, + StdVideoAV1SequenceHeader *av1_sequence_header) +{ + const AV1DecContext *s = avctx->priv_data; + const AV1RawSequenceHeader *seq = s->raw_seq; + + *av1_timing_info = (StdVideoAV1TimingInfo) { + .flags = (StdVideoAV1TimingInfoFlags) { + .equal_picture_interval = seq->timing_info.equal_picture_interval, + }, + .num_units_in_display_tick = seq->timing_info.num_units_in_display_tick, + .time_scale = seq->timing_info.time_scale, + .num_ticks_per_picture_minus_1 = seq->timing_info.num_ticks_per_picture_minus_1, + }; + + *av1_color_config = (StdVideoAV1ColorConfig) { + .flags = (StdVideoAV1ColorConfigFlags) { + .mono_chrome = seq->color_config.mono_chrome, + .color_range = seq->color_config.color_range, + .separate_uv_delta_q = seq->color_config.separate_uv_delta_q, + }, + .BitDepth = seq->color_config.twelve_bit ? 12 : + seq->color_config.high_bitdepth ? 10 : 8, + .subsampling_x = seq->color_config.subsampling_x, + .subsampling_y = seq->color_config.subsampling_y, + .color_primaries = seq->color_config.color_primaries, + .transfer_characteristics = seq->color_config.transfer_characteristics, + .matrix_coefficients = seq->color_config.matrix_coefficients, + }; + + *av1_sequence_header = (StdVideoAV1SequenceHeader) { + .flags = (StdVideoAV1SequenceHeaderFlags) { + .still_picture = seq->still_picture, + .reduced_still_picture_header = seq->reduced_still_picture_header, + .use_128x128_superblock = seq->use_128x128_superblock, + .enable_filter_intra = seq->enable_filter_intra, + .enable_intra_edge_filter = seq->enable_intra_edge_filter, + .enable_interintra_compound = seq->enable_interintra_compound, + .enable_masked_compound = seq->enable_masked_compound, + .enable_warped_motion = seq->enable_warped_motion, + .enable_dual_filter = seq->enable_dual_filter, + .enable_order_hint = seq->enable_order_hint, + .enable_jnt_comp = seq->enable_jnt_comp, + .enable_ref_frame_mvs = seq->enable_ref_frame_mvs, + .frame_id_numbers_present_flag = seq->frame_id_numbers_present_flag, + .enable_superres = seq->enable_superres, + .enable_cdef = seq->enable_cdef, + .enable_restoration = seq->enable_restoration, + .film_grain_params_present = seq->film_grain_params_present, + .timing_info_present_flag = seq->timing_info_present_flag, + .initial_display_delay_present_flag = seq->initial_display_delay_present_flag, + }, + .seq_profile = seq->seq_profile, + .frame_width_bits_minus_1 = seq->frame_width_bits_minus_1, + .frame_height_bits_minus_1 = seq->frame_height_bits_minus_1, + .max_frame_width_minus_1 = seq->max_frame_width_minus_1, + .max_frame_height_minus_1 = seq->max_frame_height_minus_1, + .delta_frame_id_length_minus_2 = seq->delta_frame_id_length_minus_2, + .additional_frame_id_length_minus_1 = seq->additional_frame_id_length_minus_1, + .order_hint_bits_minus_1 = seq->order_hint_bits_minus_1, + .seq_force_integer_mv = seq->seq_force_integer_mv, + .seq_force_screen_content_tools = seq->seq_force_screen_content_tools, + .pTimingInfo = av1_timing_info, + .pColorConfig = av1_color_config, + }; +} + +static int vk_av1_create_params(AVCodecContext *avctx, AVBufferRef **buf, + AV1VulkanDecodePicture *ap) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + StdVideoAV1SequenceHeader av1_sequence_header; + StdVideoAV1TimingInfo av1_timing_info; + StdVideoAV1ColorConfig av1_color_config; + VkVideoDecodeAV1SessionParametersCreateInfoKHR av1_params; + VkVideoSessionParametersCreateInfoKHR session_params_create; + + vk_av1_params_fill(avctx, &av1_timing_info, &av1_color_config, + &av1_sequence_header); + + av1_params = (VkVideoDecodeAV1SessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pStdSequenceHeader = &av1_sequence_header, + }; + session_params_create = (VkVideoSessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &av1_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = VK_NULL_HANDLE, + }; + + err = ff_vk_decode_create_params(buf, avctx, ctx, &session_params_create); + if (err < 0) + return err; + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters\n"); + + return 0; +} + +static int vk_av1_start_frame(AVCodecContext *avctx, + av_unused const AVBufferRef *buffer_ref, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + int ref_count = 0; + AV1DecContext *s = avctx->priv_data; + const AV1Frame *pic = &s->cur_frame; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + uint32_t frame_id_alloc_mask = 0; + + AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + + const AV1RawFrameHeader *frame_header = s->raw_frame_header; + const AV1RawFilmGrainParams *film_grain = &s->cur_frame.film_grain; + + const int apply_grain = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN) && + film_grain->apply_grain; + StdVideoAV1FrameRestorationType remap_lr_type[4] = { STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_NONE, + STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_SWITCHABLE, + STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_WIENER, + STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_SGRPROJ }; + + /* Use the current frame_ids in ref[] to decide occupied frame_ids */ + for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) { + const AV1VulkanDecodePicture* rp = s->ref[i].hwaccel_picture_private; + if (rp) + frame_id_alloc_mask |= 1 << rp->frame_id; + } + + if (!ap->frame_id_set) { + unsigned slot_idx = 0; + for (unsigned i = 0; i < 32; i++) { + if (!(frame_id_alloc_mask & (1 << i))) { + slot_idx = i; + break; + } + } + ap->frame_id = slot_idx; + ap->frame_id_set = 1; + frame_id_alloc_mask |= (1 << slot_idx); + } + + ap->ref_frame_sign_bias_mask = 0x0; + for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) + ap->ref_frame_sign_bias_mask |= pic->ref_frame_sign_bias[i] << i; + + for (int i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; i++) { + const int idx = pic->raw_frame_header->ref_frame_idx[i]; + const AV1Frame *ref_frame = &s->ref[idx]; + AV1VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private; + int found = 0; + + if (!ref_frame->f) + continue; + + for (int j = 0; j < ref_count; j++) { + if (vp->ref_slots[j].slotIndex == hp->frame_id) { + found = 1; + break; + } + } + if (found) + continue; + + err = vk_av1_fill_pict(avctx, &ap->ref_src[ref_count], &vp->ref_slots[ref_count], + &vp->refs[ref_count], &ap->std_refs[ref_count], &ap->vkav1_refs[ref_count], + ref_frame, 0, 0, ref_frame->order_hints); + if (err < 0) + return err; + + ref_count++; + } + + err = vk_av1_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &ap->std_ref, + &ap->vkav1_ref, + pic, 1, apply_grain, NULL); + if (err < 0) + return err; + + ap->av1_pic_info = (VkVideoDecodeAV1PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PICTURE_INFO_KHR, + .pStdPictureInfo = &ap->std_pic_info, + .frameHeaderOffset = 0, + .tileCount = 0, + .pTileOffsets = NULL, + .pTileSizes = ap->tile_sizes, + }; + + for (int i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; i++) { + const int idx = pic->raw_frame_header->ref_frame_idx[i]; + const AV1Frame *ref_frame = &s->ref[idx]; + AV1VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private; + + if (!ref_frame->f) + ap->av1_pic_info.referenceNameSlotIndices[i] = AV1_REF_FRAME_NONE; + else + ap->av1_pic_info.referenceNameSlotIndices[i] = hp->frame_id; + } + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &ap->av1_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = ref_count, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->view.out[0], + }, + }; + + ap->tile_info = (StdVideoAV1TileInfo) { + .flags = (StdVideoAV1TileInfoFlags) { + .uniform_tile_spacing_flag = frame_header->uniform_tile_spacing_flag, + }, + .TileCols = frame_header->tile_cols, + .TileRows = frame_header->tile_rows, + .context_update_tile_id = frame_header->context_update_tile_id, + .tile_size_bytes_minus_1 = frame_header->tile_size_bytes_minus1, + .pWidthInSbsMinus1 = ap->width_in_sbs_minus1, + .pHeightInSbsMinus1 = ap->height_in_sbs_minus1, + .pMiColStarts = ap->mi_col_starts, + .pMiRowStarts = ap->mi_row_starts, + }; + + ap->quantization = (StdVideoAV1Quantization) { + .flags.using_qmatrix = frame_header->using_qmatrix, + .flags.diff_uv_delta = frame_header->diff_uv_delta, + .base_q_idx = frame_header->base_q_idx, + .DeltaQYDc = frame_header->delta_q_y_dc, + .DeltaQUDc = frame_header->delta_q_u_dc, + .DeltaQUAc = frame_header->delta_q_u_ac, + .DeltaQVDc = frame_header->delta_q_v_dc, + .DeltaQVAc = frame_header->delta_q_v_ac, + .qm_y = frame_header->qm_y, + .qm_u = frame_header->qm_u, + .qm_v = frame_header->qm_v, + }; + + ap->loop_filter = (StdVideoAV1LoopFilter) { + .flags = (StdVideoAV1LoopFilterFlags) { + .loop_filter_delta_enabled = frame_header->loop_filter_delta_enabled, + .loop_filter_delta_update = frame_header->loop_filter_delta_update, + }, + .loop_filter_sharpness = frame_header->loop_filter_sharpness, + }; + + for (int i = 0; i < STD_VIDEO_AV1_MAX_LOOP_FILTER_STRENGTHS; i++) + ap->loop_filter.loop_filter_level[i] = frame_header->loop_filter_level[i]; + for (int i = 0; i < STD_VIDEO_AV1_LOOP_FILTER_ADJUSTMENTS; i++) + ap->loop_filter.loop_filter_mode_deltas[i] = frame_header->loop_filter_mode_deltas[i]; + + ap->cdef = (StdVideoAV1CDEF) { + .cdef_damping_minus_3 = frame_header->cdef_damping_minus_3, + .cdef_bits = frame_header->cdef_bits, + }; + + ap->loop_restoration = (StdVideoAV1LoopRestoration) { + .FrameRestorationType[0] = remap_lr_type[frame_header->lr_type[0]], + .FrameRestorationType[1] = remap_lr_type[frame_header->lr_type[1]], + .FrameRestorationType[2] = remap_lr_type[frame_header->lr_type[2]], + .LoopRestorationSize[0] = 1 + frame_header->lr_unit_shift, + .LoopRestorationSize[1] = 1 + frame_header->lr_unit_shift - frame_header->lr_uv_shift, + .LoopRestorationSize[2] = 1 + frame_header->lr_unit_shift - frame_header->lr_uv_shift, + }; + + ap->film_grain = (StdVideoAV1FilmGrain) { + .flags = (StdVideoAV1FilmGrainFlags) { + .chroma_scaling_from_luma = film_grain->chroma_scaling_from_luma, + .overlap_flag = film_grain->overlap_flag, + .clip_to_restricted_range = film_grain->clip_to_restricted_range, + }, + .grain_scaling_minus_8 = film_grain->grain_scaling_minus_8, + .ar_coeff_lag = film_grain->ar_coeff_lag, + .ar_coeff_shift_minus_6 = film_grain->ar_coeff_shift_minus_6, + .grain_scale_shift = film_grain->grain_scale_shift, + .grain_seed = film_grain->grain_seed, + .film_grain_params_ref_idx = film_grain->film_grain_params_ref_idx, + .num_y_points = film_grain->num_y_points, + .num_cb_points = film_grain->num_cb_points, + .num_cr_points = film_grain->num_cr_points, + .cb_mult = film_grain->cb_mult, + .cb_luma_mult = film_grain->cb_luma_mult, + .cb_offset = film_grain->cb_offset, + .cr_mult = film_grain->cr_mult, + .cr_luma_mult = film_grain->cr_luma_mult, + .cr_offset = film_grain->cr_offset, + }; + + /* Setup frame header */ + ap->std_pic_info = (StdVideoDecodeAV1PictureInfo) { + .flags = (StdVideoDecodeAV1PictureInfoFlags) { + .error_resilient_mode = frame_header->error_resilient_mode, + .disable_cdf_update = frame_header->disable_cdf_update, + .use_superres = frame_header->use_superres, + .render_and_frame_size_different = frame_header->render_and_frame_size_different, + .allow_screen_content_tools = frame_header->allow_screen_content_tools, + .is_filter_switchable = frame_header->is_filter_switchable, + .force_integer_mv = pic->force_integer_mv, + .frame_size_override_flag = frame_header->frame_size_override_flag, + .buffer_removal_time_present_flag = frame_header->buffer_removal_time_present_flag, + .allow_intrabc = frame_header->allow_intrabc, + .frame_refs_short_signaling = frame_header->frame_refs_short_signaling, + .allow_high_precision_mv = frame_header->allow_high_precision_mv, + .is_motion_mode_switchable = frame_header->is_motion_mode_switchable, + .use_ref_frame_mvs = frame_header->use_ref_frame_mvs, + .disable_frame_end_update_cdf = frame_header->disable_frame_end_update_cdf, + .allow_warped_motion = frame_header->allow_warped_motion, + .reduced_tx_set = frame_header->reduced_tx_set, + .reference_select = frame_header->reference_select, + .skip_mode_present = frame_header->skip_mode_present, + .delta_q_present = frame_header->delta_q_present, + .delta_lf_present = frame_header->delta_lf_present, + .delta_lf_multi = frame_header->delta_lf_multi, + .segmentation_enabled = frame_header->segmentation_enabled, + .segmentation_update_map = frame_header->segmentation_update_map, + .segmentation_temporal_update = frame_header->segmentation_temporal_update, + .segmentation_update_data = frame_header->segmentation_update_data, + .UsesLr = frame_header->lr_type[0] || frame_header->lr_type[1] || frame_header->lr_type[2], + .apply_grain = apply_grain, + }, + .frame_type = frame_header->frame_type, + .current_frame_id = frame_header->current_frame_id, + .OrderHint = frame_header->order_hint, + .primary_ref_frame = frame_header->primary_ref_frame, + .refresh_frame_flags = frame_header->refresh_frame_flags, + .interpolation_filter = frame_header->interpolation_filter, + .TxMode = frame_header->tx_mode, + .delta_q_res = frame_header->delta_q_res, + .delta_lf_res = frame_header->delta_lf_res, + .SkipModeFrame[0] = s->cur_frame.skip_mode_frame_idx[0], + .SkipModeFrame[1] = s->cur_frame.skip_mode_frame_idx[1], + .coded_denom = frame_header->coded_denom, + .pTileInfo = &ap->tile_info, + .pQuantization = &ap->quantization, + .pSegmentation = &ap->segmentation, + .pLoopFilter = &ap->loop_filter, + .pCDEF = &ap->cdef, + .pLoopRestoration = &ap->loop_restoration, + .pGlobalMotion = &ap->global_motion, + .pFilmGrain = apply_grain ? &ap->film_grain : NULL, + }; + + for (int i = 0; i < 64; i++) { + ap->width_in_sbs_minus1[i] = frame_header->width_in_sbs_minus_1[i]; + ap->height_in_sbs_minus1[i] = frame_header->height_in_sbs_minus_1[i]; + ap->mi_col_starts[i] = frame_header->tile_start_col_sb[i]; + ap->mi_row_starts[i] = frame_header->tile_start_row_sb[i]; + } + + for (int i = 0; i < STD_VIDEO_AV1_MAX_SEGMENTS; i++) { + ap->segmentation.FeatureEnabled[i] = 0x0; + for (int j = 0; j < STD_VIDEO_AV1_SEG_LVL_MAX; j++) { + ap->segmentation.FeatureEnabled[i] |= (frame_header->feature_enabled[i][j] << j); + ap->segmentation.FeatureData[i][j] = frame_header->feature_value[i][j]; + } + } + + if (dec->quirk_av1_offset) + for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) + ap->std_pic_info.OrderHints[i - 1] = pic->order_hints[i]; + else + for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) + ap->std_pic_info.OrderHints[i] = pic->order_hints[i]; + + for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) { + ap->loop_filter.loop_filter_ref_deltas[i] = frame_header->loop_filter_ref_deltas[i]; + ap->global_motion.GmType[i] = s->cur_frame.gm_type[i]; + for (int j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; j++) { + ap->global_motion.gm_params[i][j] = s->cur_frame.gm_params[i][j]; + } + } + + for (int i = 0; i < STD_VIDEO_AV1_MAX_CDEF_FILTER_STRENGTHS; i++) { + ap->cdef.cdef_y_pri_strength[i] = frame_header->cdef_y_pri_strength[i]; + ap->cdef.cdef_y_sec_strength[i] = frame_header->cdef_y_sec_strength[i]; + ap->cdef.cdef_uv_pri_strength[i] = frame_header->cdef_uv_pri_strength[i]; + ap->cdef.cdef_uv_sec_strength[i] = frame_header->cdef_uv_sec_strength[i]; + } + + if (apply_grain) { + for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_Y_POINTS; i++) { + ap->film_grain.point_y_value[i] = film_grain->point_y_value[i]; + ap->film_grain.point_y_scaling[i] = film_grain->point_y_scaling[i]; + } + + for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_CB_POINTS; i++) { + ap->film_grain.point_cb_value[i] = film_grain->point_cb_value[i]; + ap->film_grain.point_cb_scaling[i] = film_grain->point_cb_scaling[i]; + ap->film_grain.point_cr_value[i] = film_grain->point_cr_value[i]; + ap->film_grain.point_cr_scaling[i] = film_grain->point_cr_scaling[i]; + } + + for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_POS_LUMA; i++) + ap->film_grain.ar_coeffs_y_plus_128[i] = film_grain->ar_coeffs_y_plus_128[i]; + + for (int i = 0; i < STD_VIDEO_AV1_MAX_NUM_POS_CHROMA; i++) { + ap->film_grain.ar_coeffs_cb_plus_128[i] = film_grain->ar_coeffs_cb_plus_128[i]; + ap->film_grain.ar_coeffs_cr_plus_128[i] = film_grain->ar_coeffs_cr_plus_128[i]; + } + } + + ap->dec = dec; + + return 0; +} + +static int vk_av1_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + int err; + const AV1DecContext *s = avctx->priv_data; + AV1VulkanDecodePicture *ap = s->cur_frame.hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + + /* Too many tiles, exceeding all defined levels in the AV1 spec */ + if (ap->av1_pic_info.tileCount > MAX_TILES) + return AVERROR(ENOSYS); + + for (int i = s->tg_start; i <= s->tg_end; i++) { + ap->tile_sizes[ap->av1_pic_info.tileCount] = s->tile_group_info[i].tile_size; + + err = ff_vk_decode_add_slice(avctx, vp, + data + s->tile_group_info[i].tile_offset, + s->tile_group_info[i].tile_size, 0, + &ap->av1_pic_info.tileCount, + &ap->av1_pic_info.pTileOffsets); + if (err < 0) + return err; + } + + return 0; +} + +static int vk_av1_end_frame(AVCodecContext *avctx) +{ + const AV1DecContext *s = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + const AV1Frame *pic = &s->cur_frame; + AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + FFVulkanDecodePicture *rvp[AV1_NUM_REF_FRAMES] = { 0 }; + AVFrame *rav[AV1_NUM_REF_FRAMES] = { 0 }; + +#ifdef VK_KHR_video_maintenance2 + StdVideoAV1SequenceHeader av1_sequence_header; + StdVideoAV1TimingInfo av1_timing_info; + StdVideoAV1ColorConfig av1_color_config; + VkVideoDecodeAV1InlineSessionParametersInfoKHR av1_params; + + if (ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2) { + vk_av1_params_fill(avctx, &av1_timing_info, &av1_color_config, + &av1_sequence_header); + av1_params = (VkVideoDecodeAV1InlineSessionParametersInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_INLINE_SESSION_PARAMETERS_INFO_KHR, + .pStdSequenceHeader = &av1_sequence_header, + }; + ap->av1_pic_info.pNext = &av1_params; + } +#endif + + if (!ap->av1_pic_info.tileCount) + return 0; + + if (!dec->session_params && + !(ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2)) { + int err = vk_av1_create_params(avctx, &dec->session_params, ap); + if (err < 0) + return err; + } + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + const AV1Frame *rp = ap->ref_src[i]; + AV1VulkanDecodePicture *rhp = rp->hwaccel_picture_private; + + rvp[i] = &rhp->vp; + rav[i] = ap->ref_src[i]->f; + } + + av_log(avctx, AV_LOG_DEBUG, "Decoding frame, %"SIZE_SPECIFIER" bytes, %i tiles\n", + vp->slices_size, ap->av1_pic_info.tileCount); + + return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); +} + +static void vk_av1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *hwctx = _hwctx.nc; + AV1VulkanDecodePicture *ap = data; + + /* Free frame resources, this also destroys the session parameters. */ + ff_vk_decode_free_frame(hwctx, &ap->vp); +} + +const FFHWAccel ff_av1_vulkan_hwaccel = { + .p.name = "av1_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_AV1, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_av1_start_frame, + .decode_slice = &vk_av1_decode_slice, + .end_frame = &vk_av1_end_frame, + .free_frame_priv = &vk_av1_free_frame_priv, + .frame_priv_data_size = sizeof(AV1VulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +}; -- 2.49.1 From 170d83ecd684b45078a6377dbfbc9652713102db Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:29:02 +0000 Subject: [PATCH 065/118] Changing vulkan file directory --- libavcodec/vulkan_decode.c | 1399 ------------------------------------ 1 file changed, 1399 deletions(-) delete mode 100644 libavcodec/vulkan_decode.c diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c deleted file mode 100644 index b038d456dd..0000000000 --- a/libavcodec/vulkan_decode.c +++ /dev/null @@ -1,1399 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/refstruct.h" -#include "vulkan_video.h" -#include "vulkan_decode.h" -#include "config_components.h" -#include "libavutil/avassert.h" -#include "libavutil/mem.h" -#include "libavutil/vulkan_loader.h" - -#define DECODER_IS_SDR(codec_id) \ - (((codec_id) == AV_CODEC_ID_FFV1) || \ - ((codec_id) == AV_CODEC_ID_PRORES_RAW)) - -#if CONFIG_H264_VULKAN_HWACCEL -extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc; -#endif -#if CONFIG_HEVC_VULKAN_HWACCEL -extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc; -#endif -#if CONFIG_VP9_VULKAN_HWACCEL -extern const FFVulkanDecodeDescriptor ff_vk_dec_vp9_desc; -#endif -#if CONFIG_AV1_VULKAN_HWACCEL -extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc; -#endif -#if CONFIG_FFV1_VULKAN_HWACCEL -extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc; -#endif -#if CONFIG_PRORES_RAW_VULKAN_HWACCEL -extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc; -#endif - -static const FFVulkanDecodeDescriptor *dec_descs[] = { -#if CONFIG_H264_VULKAN_HWACCEL - &ff_vk_dec_h264_desc, -#endif -#if CONFIG_HEVC_VULKAN_HWACCEL - &ff_vk_dec_hevc_desc, -#endif -#if CONFIG_VP9_VULKAN_HWACCEL - &ff_vk_dec_vp9_desc, -#endif -#if CONFIG_AV1_VULKAN_HWACCEL - &ff_vk_dec_av1_desc, -#endif -#if CONFIG_FFV1_VULKAN_HWACCEL - &ff_vk_dec_ffv1_desc, -#endif -#if CONFIG_PRORES_RAW_VULKAN_HWACCEL - &ff_vk_dec_prores_raw_desc, -#endif -}; - -typedef struct FFVulkanDecodeProfileData { - VkVideoDecodeH264ProfileInfoKHR h264_profile; - VkVideoDecodeH265ProfileInfoKHR h265_profile; -#if CONFIG_VP9_VULKAN_HWACCEL - VkVideoDecodeVP9ProfileInfoKHR vp9_profile; -#endif - VkVideoDecodeAV1ProfileInfoKHR av1_profile; - - VkVideoDecodeUsageInfoKHR usage; - VkVideoProfileInfoKHR profile; - VkVideoProfileListInfoKHR profile_list; -} FFVulkanDecodeProfileData; - -static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id) -{ - for (size_t i = 0; i < FF_ARRAY_ELEMS(dec_descs); i++) - if (dec_descs[i]->codec_id == codec_id) - return dec_descs[i]; - av_assert1(!"no codec descriptor"); - return NULL; -} - -static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx, enum AVCodecID codec_id) -{ - const VkVideoProfileListInfoKHR *profile_list; - - VkStructureType profile_struct_type = - codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR : - codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR : -#if CONFIG_VP9_VULKAN_HWACCEL - codec_id == AV_CODEC_ID_VP9 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR : -#endif - codec_id == AV_CODEC_ID_AV1 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR : - VK_STRUCTURE_TYPE_MAX_ENUM; - if (profile_struct_type == VK_STRUCTURE_TYPE_MAX_ENUM) - return NULL; - - profile_list = ff_vk_find_struct(ctx->s.hwfc->create_pnext, - VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); - if (!profile_list) - return NULL; - - for (int i = 0; i < profile_list->profileCount; i++) - if (ff_vk_find_struct(profile_list->pProfiles[i].pNext, profile_struct_type)) - return &profile_list->pProfiles[i]; - - return NULL; -} - -int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) -{ - int err; - FFVulkanDecodeContext *src_ctx = src->internal->hwaccel_priv_data; - FFVulkanDecodeContext *dst_ctx = dst->internal->hwaccel_priv_data; - - av_refstruct_replace(&dst_ctx->shared_ctx, src_ctx->shared_ctx); - - err = av_buffer_replace(&dst_ctx->session_params, src_ctx->session_params); - if (err < 0) - return err; - - dst_ctx->dedicated_dpb = src_ctx->dedicated_dpb; - dst_ctx->external_fg = src_ctx->external_fg; - - return 0; -} - -int ff_vk_params_invalidate(AVCodecContext *avctx, int t, const uint8_t *b, uint32_t s) -{ - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - av_buffer_unref(&dec->session_params); - return 0; -} - -static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx) -{ - int err; - AVFrame *avf = av_frame_alloc(); - if (!avf) - return NULL; - - err = av_hwframe_get_buffer(ctx->common.dpb_hwfc_ref, avf, 0x0); - if (err < 0) - av_frame_free(&avf); - - return avf; -} - -static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic) -{ - FFVulkanDecodeShared *ctx = dec->shared_ctx; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - vkpic->dpb_frame = NULL; - for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) { - vkpic->view.ref[i] = VK_NULL_HANDLE; - vkpic->view.out[i] = VK_NULL_HANDLE; - vkpic->view.dst[i] = VK_NULL_HANDLE; - } - - vkpic->destroy_image_view = vk->DestroyImageView; - vkpic->wait_semaphores = vk->WaitSemaphores; - vkpic->invalidate_memory_ranges = vk->InvalidateMappedMemoryRanges; -} - -int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, - FFVulkanDecodePicture *vkpic, int is_current, - int alloc_dpb) -{ - int err; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - - vkpic->slices_size = 0; - - /* If the decoder made a blank frame to make up for a missing ref, or the - * frame is the current frame so it's missing one, create a re-representation */ - if (vkpic->view.ref[0]) - return 0; - - init_frame(dec, vkpic); - - if (ctx->common.layered_dpb && alloc_dpb) { - vkpic->view.ref[0] = ctx->common.layered_view; - vkpic->view.aspect_ref[0] = ctx->common.layered_aspect; - } else if (alloc_dpb) { - AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data; - AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx; - - vkpic->dpb_frame = vk_get_dpb_pool(ctx); - if (!vkpic->dpb_frame) - return AVERROR(ENOMEM); - - err = ff_vk_create_view(&ctx->s, &ctx->common, - &vkpic->view.ref[0], &vkpic->view.aspect_ref[0], - (AVVkFrame *)vkpic->dpb_frame->data[0], - dpb_hwfc->format[0], !is_current); - if (err < 0) - return err; - - vkpic->view.dst[0] = vkpic->view.ref[0]; - } - - if (!alloc_dpb || is_current) { - AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data; - AVVulkanFramesContext *hwfc = frames->hwctx; - - err = ff_vk_create_view(&ctx->s, &ctx->common, - &vkpic->view.out[0], &vkpic->view.aspect[0], - (AVVkFrame *)pic->data[0], - hwfc->format[0], !is_current); - if (err < 0) - return err; - - if (!alloc_dpb) { - vkpic->view.ref[0] = vkpic->view.out[0]; - vkpic->view.aspect_ref[0] = vkpic->view.aspect[0]; - } - } - - return 0; -} - -int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, - FFVulkanDecodePicture *vkpic, int is_current, - enum FFVkShaderRepFormat rep_fmt, int alloc_dpb) -{ - int err; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data; - - vkpic->slices_size = 0; - - if (vkpic->view.ref[0]) - return 0; - - init_frame(dec, vkpic); - - for (int i = 0; i < av_pix_fmt_count_planes(frames->sw_format); i++) { - if (alloc_dpb) { - vkpic->dpb_frame = vk_get_dpb_pool(ctx); - if (!vkpic->dpb_frame) - return AVERROR(ENOMEM); - - err = ff_vk_create_imageview(&ctx->s, - &vkpic->view.ref[i], &vkpic->view.aspect_ref[i], - vkpic->dpb_frame, i, rep_fmt); - if (err < 0) - return err; - - vkpic->view.dst[i] = vkpic->view.ref[i]; - } - - if (!alloc_dpb || is_current) { - err = ff_vk_create_imageview(&ctx->s, - &vkpic->view.out[i], &vkpic->view.aspect[i], - pic, i, rep_fmt); - if (err < 0) - return err; - - if (!alloc_dpb) { - vkpic->view.ref[i] = vkpic->view.out[i]; - vkpic->view.aspect_ref[i] = vkpic->view.aspect[i]; - } - } - } - - return 0; -} - -int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, - const uint8_t *data, size_t size, int add_startcode, - uint32_t *nb_slices, const uint32_t **offsets) -{ - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - - static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 }; - const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0; - const int nb = nb_slices ? *nb_slices : 0; - uint8_t *slices; - uint32_t *slice_off; - FFVkBuffer *vkbuf; - - size_t new_size = vp->slices_size + startcode_len + size + - ctx->caps.minBitstreamBufferSizeAlignment; - new_size = FFALIGN(new_size, ctx->caps.minBitstreamBufferSizeAlignment); - - if (offsets) { - slice_off = av_fast_realloc(dec->slice_off, &dec->slice_off_max, - (nb + 1)*sizeof(slice_off)); - if (!slice_off) - return AVERROR(ENOMEM); - - *offsets = dec->slice_off = slice_off; - - slice_off[nb] = vp->slices_size; - } - - vkbuf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; - if (!vkbuf || vkbuf->size < new_size) { - int err; - AVBufferRef *new_ref; - FFVkBuffer *new_buf; - - /* No point in requesting anything smaller. */ - size_t buf_size = FFMAX(new_size, 1024*1024); - - /* Align buffer to nearest power of two. Makes fragmentation management - * easier, and gives us ample headroom. */ - buf_size = 2 << av_log2(buf_size); - - err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &new_ref, - DECODER_IS_SDR(avctx->codec_id) ? - (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : - VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, - ctx->s.hwfc->create_pnext, buf_size, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - (DECODER_IS_SDR(avctx->codec_id) ? - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0)); - if (err < 0) - return err; - - new_buf = (FFVkBuffer *)new_ref->data; - - /* Copy data from the old buffer */ - if (vkbuf) { - memcpy(new_buf->mapped_mem, vkbuf->mapped_mem, vp->slices_size); - av_buffer_unref(&vp->slices_buf); - } - - vp->slices_buf = new_ref; - vkbuf = new_buf; - } - slices = vkbuf->mapped_mem; - - /* Startcode */ - memcpy(slices + vp->slices_size, startcode_prefix, startcode_len); - - /* Slice data */ - memcpy(slices + vp->slices_size + startcode_len, data, size); - - if (nb_slices) - *nb_slices = nb + 1; - - vp->slices_size += startcode_len + size; - - return 0; -} - -void ff_vk_decode_flush(AVCodecContext *avctx) -{ - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - - FFVulkanFunctions *vk = &ctx->s.vkfn; - VkVideoBeginCodingInfoKHR decode_start = { - .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, - .videoSession = ctx->common.session, - .videoSessionParameters = ctx->empty_session_params, - }; - VkVideoCodingControlInfoKHR decode_ctrl = { - .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, - .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, - }; - VkVideoEndCodingInfoKHR decode_end = { - .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, - }; - - VkCommandBuffer cmd_buf; - FFVkExecContext *exec; - - /* Non-video queues do not need to be reset */ - if (!(get_codecdesc(avctx->codec_id)->decode_op)) - return; - - exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); - ff_vk_exec_start(&ctx->s, exec); - cmd_buf = exec->buf; - - vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); - vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl); - vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); - ff_vk_exec_submit(&ctx->s, exec); -} - -int ff_vk_decode_frame(AVCodecContext *avctx, - AVFrame *pic, FFVulkanDecodePicture *vp, - AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]) -{ - int err; - VkResult ret; - VkCommandBuffer cmd_buf; - FFVkBuffer *sd_buf; - - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - /* Output */ - AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data; - - /* Quirks */ - const int layered_dpb = ctx->common.layered_dpb; - - VkVideoBeginCodingInfoKHR decode_start = { - .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, - .videoSession = ctx->common.session, - .videoSessionParameters = dec->session_params ? - *((VkVideoSessionParametersKHR *)dec->session_params->data) : - VK_NULL_HANDLE, - .referenceSlotCount = vp->decode_info.referenceSlotCount, - .pReferenceSlots = vp->decode_info.pReferenceSlots, - }; - VkVideoEndCodingInfoKHR decode_end = { - .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, - }; - - VkImageMemoryBarrier2 img_bar[37]; - int nb_img_bar = 0; - size_t data_size = FFALIGN(vp->slices_size, - ctx->caps.minBitstreamBufferSizeAlignment); - - FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); - - /* The current decoding reference has to be bound as an inactive reference */ - VkVideoReferenceSlotInfoKHR *cur_vk_ref; - cur_vk_ref = (void *)&decode_start.pReferenceSlots[decode_start.referenceSlotCount]; - cur_vk_ref[0] = vp->ref_slot; - cur_vk_ref[0].slotIndex = -1; - decode_start.referenceSlotCount++; - - sd_buf = (FFVkBuffer *)vp->slices_buf->data; - - /* Flush if needed */ - if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkMappedMemoryRange flush_buf = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = sd_buf->mem, - .offset = 0, - .size = FFALIGN(vp->slices_size, - ctx->s.props.properties.limits.nonCoherentAtomSize), - }; - - ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } - - vp->decode_info.srcBuffer = sd_buf->buf; - vp->decode_info.srcBufferOffset = 0; - vp->decode_info.srcBufferRange = data_size; - - /* Start command buffer recording */ - err = ff_vk_exec_start(&ctx->s, exec); - if (err < 0) - return err; - cmd_buf = exec->buf; - - /* Slices */ - err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0); - if (err < 0) - return err; - vp->slices_buf = NULL; /* Owned by the exec buffer from now on */ - - /* Parameters */ - err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &dec->session_params, 1, 1); - if (err < 0) - return err; - - err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic, - VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, - VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); - if (err < 0) - return err; - - err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, - pic); - if (err < 0) - return err; - - /* Output image - change layout, as it comes from a pool */ - img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, - .pNext = NULL, - .srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, - .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, - .srcAccessMask = VK_ACCESS_2_NONE, - .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, - .oldLayout = vkf->layout[0], - .newLayout = (layered_dpb || vp->dpb_frame) ? - VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR : - VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, /* Spec, 07252 utter madness */ - .srcQueueFamilyIndex = vkf->queue_family[0], - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = vkf->img[0], - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = vp->view.aspect[0], - .layerCount = 1, - .levelCount = 1, - }, - }; - ff_vk_exec_update_frame(&ctx->s, exec, pic, - &img_bar[nb_img_bar], &nb_img_bar); - - /* Reference for the current image, if existing and not layered */ - if (vp->dpb_frame) { - err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, - VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, - VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); - if (err < 0) - return err; - } - - if (!layered_dpb) { - /* All references (apart from the current) for non-layered refs */ - - for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { - AVFrame *ref_frame = rpic[i]; - FFVulkanDecodePicture *rvp = rvkp[i]; - AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame; - - err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref, - VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, - VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); - if (err < 0) - return err; - - if (err == 0) { - err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, - &rvp->sem, &rvp->sem_value, - ref); - if (err < 0) - return err; - } - - if (!rvp->dpb_frame) { - AVVkFrame *rvkf = (AVVkFrame *)ref->data[0]; - - img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, - .pNext = NULL, - .srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, - .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, - .srcAccessMask = VK_ACCESS_2_NONE, - .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR | - VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, - .oldLayout = rvkf->layout[0], - .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, - .srcQueueFamilyIndex = rvkf->queue_family[0], - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = rvkf->img[0], - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = rvp->view.aspect_ref[0], - .layerCount = 1, - .levelCount = 1, - }, - }; - ff_vk_exec_update_frame(&ctx->s, exec, ref, - &img_bar[nb_img_bar], &nb_img_bar); - } - } - } else if (vp->decode_info.referenceSlotCount || - vp->view.out[0] != vp->view.ref[0]) { - /* Single barrier for a single layered ref */ - err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame, - VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, - VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); - if (err < 0) - return err; - } - - /* Change image layout */ - vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - - /* Start, use parameters, decode and end decoding */ - vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); - vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info); - vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); - - /* End recording and submit for execution */ - return ff_vk_exec_submit(&ctx->s, exec); -} - -void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *vp) -{ - AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; - - VkSemaphoreWaitInfo sem_wait = (VkSemaphoreWaitInfo) { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, - .pSemaphores = &vp->sem, - .pValues = &vp->sem_value, - .semaphoreCount = 1, - }; - - /* We do not have to lock the frame here because we're not interested - * in the actual current semaphore value, but only that it's later than - * the time we submitted the image for decoding. */ - if (vp->sem) - vp->wait_semaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); - - /* Free slices data */ - av_buffer_unref(&vp->slices_buf); - - /* Destroy image view (out) */ - for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) { - if (vp->view.out[i] && vp->view.out[i] != vp->view.dst[i]) - vp->destroy_image_view(hwctx->act_dev, vp->view.out[i], hwctx->alloc); - - /* Destroy image view (ref, unlayered) */ - if (vp->view.dst[i]) - vp->destroy_image_view(hwctx->act_dev, vp->view.dst[i], hwctx->alloc); - } - - av_frame_free(&vp->dpb_frame); -} - -static void free_common(AVRefStructOpaque unused, void *obj) -{ - FFVulkanDecodeShared *ctx = obj; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - /* Wait on and free execution pool */ - ff_vk_exec_pool_free(&ctx->s, &ctx->exec_pool); - - /* This also frees all references from this pool */ - av_frame_free(&ctx->common.layered_frame); - - /* Destroy parameters */ - if (ctx->empty_session_params) - vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, - ctx->empty_session_params, - s->hwctx->alloc); - - av_buffer_pool_uninit(&ctx->buf_pool); - - ff_vk_video_common_uninit(s, &ctx->common); - - if (ctx->sd_ctx_free) - ctx->sd_ctx_free(ctx); - - ff_vk_uninit(s); -} - -static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_ref) -{ - int err; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - const FFVulkanDecodeDescriptor *vk_desc = get_codecdesc(avctx->codec_id); - AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; - AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; - AVVulkanDeviceContext *hwctx = device->hwctx; - FFVulkanDecodeShared *ctx; - - if (dec->shared_ctx) - return 0; - - dec->shared_ctx = av_refstruct_alloc_ext(sizeof(*ctx), 0, NULL, - free_common); - if (!dec->shared_ctx) - return AVERROR(ENOMEM); - - ctx = dec->shared_ctx; - - ctx->s.extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, - hwctx->nb_enabled_dev_extensions); - - if (vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) { - if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { - av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", - VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); - av_refstruct_unref(&dec->shared_ctx); - return AVERROR(ENOSYS); - } - } - - err = ff_vk_load_functions(device, &ctx->s.vkfn, ctx->s.extensions, 1, 1); - if (err < 0) { - av_refstruct_unref(&dec->shared_ctx); - return err; - } - - return 0; -} - -static VkResult vulkan_setup_profile(AVCodecContext *avctx, - FFVulkanDecodeProfileData *prof, - AVVulkanDeviceContext *hwctx, - FFVulkanFunctions *vk, - const FFVulkanDecodeDescriptor *vk_desc, - VkVideoDecodeH264CapabilitiesKHR *h264_caps, - VkVideoDecodeH265CapabilitiesKHR *h265_caps, -#if CONFIG_VP9_VULKAN_HWACCEL - VkVideoDecodeVP9CapabilitiesKHR *vp9_caps, -#endif - VkVideoDecodeAV1CapabilitiesKHR *av1_caps, - VkVideoCapabilitiesKHR *caps, - VkVideoDecodeCapabilitiesKHR *dec_caps, - int cur_profile) -{ - VkVideoDecodeUsageInfoKHR *usage = &prof->usage; - VkVideoProfileInfoKHR *profile = &prof->profile; - VkVideoProfileListInfoKHR *profile_list = &prof->profile_list; - - VkVideoDecodeH264ProfileInfoKHR *h264_profile = &prof->h264_profile; - VkVideoDecodeH265ProfileInfoKHR *h265_profile = &prof->h265_profile; -#if CONFIG_VP9_VULKAN_HWACCEL - VkVideoDecodeVP9ProfileInfoKHR *vp9_profile = &prof->vp9_profile; -#endif - VkVideoDecodeAV1ProfileInfoKHR *av1_profile = &prof->av1_profile; - - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); - if (!desc) - return AVERROR(EINVAL); - - if (avctx->codec_id == AV_CODEC_ID_H264) { - dec_caps->pNext = h264_caps; - usage->pNext = h264_profile; - h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR; - - /* Vulkan transmits all the constrant_set flags, rather than wanting them - * merged in the profile IDC */ - h264_profile->stdProfileIdc = cur_profile & ~(AV_PROFILE_H264_CONSTRAINED | - AV_PROFILE_H264_INTRA); - - h264_profile->pictureLayout = avctx->field_order == AV_FIELD_UNKNOWN || - avctx->field_order == AV_FIELD_PROGRESSIVE ? - VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR : - VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR; - } else if (avctx->codec_id == AV_CODEC_ID_H265) { - dec_caps->pNext = h265_caps; - usage->pNext = h265_profile; - h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; - h265_profile->stdProfileIdc = cur_profile; -#if CONFIG_VP9_VULKAN_HWACCEL - } else if (avctx->codec_id == AV_CODEC_ID_VP9) { - dec_caps->pNext = vp9_caps; - usage->pNext = vp9_profile; - vp9_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR; - vp9_profile->stdProfile = cur_profile; -#endif - } else if (avctx->codec_id == AV_CODEC_ID_AV1) { - dec_caps->pNext = av1_caps; - usage->pNext = av1_profile; - av1_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR; - av1_profile->stdProfile = cur_profile; - av1_profile->filmGrainSupport = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN); - } - - usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR; - usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR; - - profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; - profile->pNext = usage; - profile->videoCodecOperation = vk_desc->decode_op; - profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); - profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); - profile->chromaBitDepth = profile->lumaBitDepth; - - profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; - profile_list->profileCount = 1; - profile_list->pProfiles = profile; - - /* Get the capabilities of the decoder for the given profile */ - caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; - caps->pNext = dec_caps; - dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR; - /* dec_caps->pNext already filled in */ - - return vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile, - caps); -} - -static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_ref, - enum AVPixelFormat *pix_fmt, VkFormat *vk_fmt, - FFVulkanDecodeProfileData *prof, - int *dpb_dedicate) -{ - VkResult ret; - int max_level, base_profile, cur_profile; - const FFVulkanDecodeDescriptor *vk_desc = get_codecdesc(avctx->codec_id); - AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; - AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; - AVVulkanDeviceContext *hwctx = device->hwctx; - enum AVPixelFormat source_format; - enum AVPixelFormat best_format; - VkFormat best_vkfmt; - - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - VkVideoCapabilitiesKHR *caps = &ctx->caps; - VkVideoDecodeCapabilitiesKHR *dec_caps = &ctx->dec_caps; - - VkVideoDecodeH264CapabilitiesKHR h264_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, - }; - VkVideoDecodeH265CapabilitiesKHR h265_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, - }; -#if CONFIG_VP9_VULKAN_HWACCEL - VkVideoDecodeVP9CapabilitiesKHR vp9_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR, - }; -#endif - VkVideoDecodeAV1CapabilitiesKHR av1_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR, - }; - - VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, - .pNext = &prof->profile_list, - }; - VkVideoFormatPropertiesKHR *ret_info; - uint32_t nb_out_fmts = 0; - - if (!(vk_desc->decode_extension & ctx->s.extensions)) { - av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n", - avcodec_get_name(avctx->codec_id)); - return AVERROR(ENOSYS); - } - - cur_profile = avctx->profile; - base_profile = avctx->codec_id == AV_CODEC_ID_H264 ? AV_PROFILE_H264_CONSTRAINED_BASELINE : - avctx->codec_id == AV_CODEC_ID_H265 ? AV_PROFILE_HEVC_MAIN : -#if CONFIG_VP9_VULKAN_HWACCEL - avctx->codec_id == AV_CODEC_ID_VP9 ? STD_VIDEO_VP9_PROFILE_0 : -#endif - avctx->codec_id == AV_CODEC_ID_AV1 ? STD_VIDEO_AV1_PROFILE_MAIN : - 0; - - ret = vulkan_setup_profile(avctx, prof, hwctx, vk, vk_desc, - &h264_caps, - &h265_caps, -#if CONFIG_VP9_VULKAN_HWACCEL - &vp9_caps, -#endif - &av1_caps, - caps, - dec_caps, - cur_profile); - if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR && - avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH && - avctx->profile != base_profile) { - av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting " - "again with profile %s\n", - avcodec_get_name(avctx->codec_id), - avcodec_profile_name(avctx->codec_id, cur_profile), - avcodec_profile_name(avctx->codec_id, base_profile)); - cur_profile = base_profile; - ret = vulkan_setup_profile(avctx, prof, hwctx, vk, vk_desc, - &h264_caps, - &h265_caps, -#if CONFIG_VP9_VULKAN_HWACCEL - &vp9_caps, -#endif - &av1_caps, - caps, - dec_caps, - cur_profile); - } - - if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { - av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " - "%s profile \"%s\" not supported!\n", - avcodec_get_name(avctx->codec_id), - avcodec_profile_name(avctx->codec_id, cur_profile)); - return AVERROR(EINVAL); - } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { - av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " - "format (%s) not supported!\n", - av_get_pix_fmt_name(avctx->sw_pix_fmt)); - return AVERROR(EINVAL); - } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || - ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { - return AVERROR(EINVAL); - } else if (ret != VK_SUCCESS) { - return AVERROR_EXTERNAL; - } - - max_level = avctx->codec_id == AV_CODEC_ID_H264 ? ff_vk_h264_level_to_av(h264_caps.maxLevelIdc) : - avctx->codec_id == AV_CODEC_ID_H265 ? ff_vk_h265_level_to_av(h265_caps.maxLevelIdc) : -#if CONFIG_VP9_VULKAN_HWACCEL - avctx->codec_id == AV_CODEC_ID_VP9 ? vp9_caps.maxLevel : -#endif - avctx->codec_id == AV_CODEC_ID_AV1 ? av1_caps.maxLevel : - 0; - - av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n", - avcodec_get_name(avctx->codec_id), - avcodec_profile_name(avctx->codec_id, cur_profile)); - av_log(avctx, AV_LOG_VERBOSE, " Maximum level: %i (stream %i)\n", - max_level, avctx->level); - av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", - caps->minCodedExtent.width, caps->maxCodedExtent.width); - av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", - caps->minCodedExtent.height, caps->maxCodedExtent.height); - av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", - caps->pictureAccessGranularity.width); - av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", - caps->pictureAccessGranularity.height); - av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", - caps->minBitstreamBufferOffsetAlignment); - av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", - caps->minBitstreamBufferSizeAlignment); - av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", - caps->maxDpbSlots); - av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", - caps->maxActiveReferencePictures); - av_log(avctx, AV_LOG_VERBOSE, " Codec header name: '%s' (driver), '%s' (compiled)\n", - caps->stdHeaderVersion.extensionName, - vk_desc->ext_props.extensionName); - av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", - CODEC_VER(caps->stdHeaderVersion.specVersion), - CODEC_VER(vk_desc->ext_props.specVersion)); - av_log(avctx, AV_LOG_VERBOSE, " Decode modes:%s%s%s\n", - dec_caps->flags ? "" : - " invalid", - dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ? - " reuse_dst_dpb" : "", - dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ? - " dedicated_dpb" : ""); - av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", - caps->flags ? "" : - " none", - caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? - " protected" : "", - caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? - " separate_references" : ""); - - /* Check if decoding is possible with the given parameters */ - if (avctx->coded_width < caps->minCodedExtent.width || - avctx->coded_height < caps->minCodedExtent.height || - avctx->coded_width > caps->maxCodedExtent.width || - avctx->coded_height > caps->maxCodedExtent.height) - return AVERROR(EINVAL); - - if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) && - avctx->level > max_level) - return AVERROR(EINVAL); - - /* Some basic sanity checking */ - if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | - VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) { - av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither " - "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor " - "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n"); - return AVERROR_EXTERNAL; - } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | - VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) == - VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) && - !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) { - av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: " - "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set " - "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n"); - return AVERROR_EXTERNAL; - } - - dec->dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); - ctx->common.layered_dpb = !dec->dedicated_dpb ? 0 : - !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); - - if (dec->dedicated_dpb) { - fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; - } else { - fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | - VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT; - - if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | - FF_VK_EXT_VIDEO_MAINTENANCE_1)) - fmt_info.imageUsage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; - } - - /* Get the format of the images necessary */ - ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, - &fmt_info, - &nb_out_fmts, NULL); - if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || - (!nb_out_fmts && ret == VK_SUCCESS)) { - return AVERROR(EINVAL); - } else if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); - if (!ret_info) - return AVERROR(ENOMEM); - - for (int i = 0; i < nb_out_fmts; i++) - ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; - - ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, - &fmt_info, - &nb_out_fmts, ret_info); - if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || - (!nb_out_fmts && ret == VK_SUCCESS)) { - av_free(ret_info); - return AVERROR(EINVAL); - } else if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", - ff_vk_ret2str(ret)); - av_free(ret_info); - return AVERROR_EXTERNAL; - } - - /* Find a format to use */ - *pix_fmt = best_format = AV_PIX_FMT_NONE; - *vk_fmt = best_vkfmt = VK_FORMAT_UNDEFINED; - source_format = avctx->sw_pix_fmt; - - av_log(avctx, AV_LOG_DEBUG, "Choosing best pixel format for decoding from %i:\n", nb_out_fmts); - for (int i = 0; i < nb_out_fmts; i++) { - enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format); - if (tmp == AV_PIX_FMT_NONE) { - av_log(avctx, AV_LOG_WARNING, "Invalid/unknown Vulkan format %i!\n", ret_info[i].format); - continue; - } - - best_format = av_find_best_pix_fmt_of_2(tmp, best_format, source_format, 0, NULL); - if (tmp == best_format) - best_vkfmt = ret_info[i].format; - - av_log(avctx, AV_LOG_DEBUG, " %s%s (Vulkan ID: %i)\n", - av_get_pix_fmt_name(tmp), tmp == best_format ? "*" : "", - ret_info[i].format); - } - - av_free(ret_info); - - if (best_format == AV_PIX_FMT_NONE) { - av_log(avctx, AV_LOG_ERROR, "No valid/compatible pixel format found for decoding!\n"); - return AVERROR(EINVAL); - } else { - av_log(avctx, AV_LOG_VERBOSE, "Chosen frame pixfmt: %s (Vulkan ID: %i)\n", - av_get_pix_fmt_name(best_format), best_vkfmt); - } - - *pix_fmt = best_format; - *vk_fmt = best_vkfmt; - - *dpb_dedicate = dec->dedicated_dpb; - - return 0; -} - -static void free_profile_data(AVHWFramesContext *hwfc) -{ - av_free(hwfc->user_opaque); -} - -int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) -{ - VkFormat vkfmt = VK_FORMAT_UNDEFINED; - int err, dedicated_dpb; - AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; - AVVulkanFramesContext *hwfc = frames_ctx->hwctx; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeProfileData *prof = NULL; - - err = vulkan_decode_bootstrap(avctx, hw_frames_ctx); - if (err < 0) - return err; - - frames_ctx->sw_format = avctx->sw_pix_fmt; - - if (!DECODER_IS_SDR(avctx->codec_id)) { - prof = av_mallocz(sizeof(FFVulkanDecodeProfileData)); - if (!prof) - return AVERROR(ENOMEM); - - err = vulkan_decode_get_profile(avctx, hw_frames_ctx, - &frames_ctx->sw_format, &vkfmt, - prof, &dedicated_dpb); - if (err < 0) { - av_free(prof); - return err; - } - - frames_ctx->user_opaque = prof; - frames_ctx->free = free_profile_data; - - hwfc->create_pnext = &prof->profile_list; - } else { - switch (frames_ctx->sw_format) { - case AV_PIX_FMT_GBRAP16: - /* This should be more efficient for downloading and using */ - frames_ctx->sw_format = AV_PIX_FMT_RGBA64; - break; - case AV_PIX_FMT_GBRP10: - /* This saves memory bandwidth when downloading */ - frames_ctx->sw_format = AV_PIX_FMT_X2BGR10; - break; - case AV_PIX_FMT_BGR0: - /* mpv has issues with bgr0 mapping, so just remap it */ - frames_ctx->sw_format = AV_PIX_FMT_RGB0; - break; - default: - break; - } - } - - frames_ctx->width = avctx->coded_width; - frames_ctx->height = avctx->coded_height; - frames_ctx->format = AV_PIX_FMT_VULKAN; - - hwfc->format[0] = vkfmt; - hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; - hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT; - - if (prof) { - FFVulkanDecodeShared *ctx; - - hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; - if (!dec->dedicated_dpb) - hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; - - ctx = dec->shared_ctx; - if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | - FF_VK_EXT_VIDEO_MAINTENANCE_1)) - hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; - } - - return err; -} - -static void vk_decode_free_params(void *opaque, uint8_t *data) -{ - FFVulkanDecodeShared *ctx = opaque; - FFVulkanFunctions *vk = &ctx->s.vkfn; - VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data; - vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par, - ctx->s.hwctx->alloc); - av_free(par); -} - -int ff_vk_decode_create_params(AVBufferRef **par_ref, void *logctx, FFVulkanDecodeShared *ctx, - const VkVideoSessionParametersCreateInfoKHR *session_params_create) -{ - VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); - const FFVulkanFunctions *vk = &ctx->s.vkfn; - VkResult ret; - - if (!par) - return AVERROR(ENOMEM); - - /* Create session parameters */ - ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, session_params_create, - ctx->s.hwctx->alloc, par); - if (ret != VK_SUCCESS) { - av_log(logctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", - ff_vk_ret2str(ret)); - av_free(par); - return AVERROR_EXTERNAL; - } - *par_ref = av_buffer_create((uint8_t *)par, sizeof(*par), - vk_decode_free_params, ctx, 0); - if (!*par_ref) { - vk_decode_free_params(ctx, (uint8_t *)par); - return AVERROR(ENOMEM); - } - - return 0; -} - -int ff_vk_decode_uninit(AVCodecContext *avctx) -{ - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - - av_freep(&dec->hevc_headers); - av_buffer_unref(&dec->session_params); - av_refstruct_unref(&dec->shared_ctx); - av_freep(&dec->slice_off); - return 0; -} - -static int create_empty_session_parameters(AVCodecContext *avctx, - FFVulkanDecodeShared *ctx) -{ - VkResult ret; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &s->vkfn; - - VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, - }; - VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, - }; - StdVideoAV1SequenceHeader av1_empty_seq = { 0 }; - VkVideoDecodeAV1SessionParametersCreateInfoKHR av1_params = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pStdSequenceHeader = &av1_empty_seq, - }; - VkVideoSessionParametersCreateInfoKHR session_params_create = { - .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params : - avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params : - avctx->codec_id == AV_CODEC_ID_AV1 ? (void *)&av1_params : - NULL, - .videoSession = ctx->common.session, - }; - - if (avctx->codec_id == AV_CODEC_ID_VP9) - return 0; - - ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, - s->hwctx->alloc, &ctx->empty_session_params); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} - -int ff_vk_decode_init(AVCodecContext *avctx) -{ - int err; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx; - FFVulkanContext *s; - int async_depth; - const VkVideoProfileInfoKHR *profile; - const FFVulkanDecodeDescriptor *vk_desc; - const VkPhysicalDeviceDriverProperties *driver_props; - - VkVideoSessionCreateInfoKHR session_create = { - .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, - }; - - err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN); - if (err < 0) - return err; - - /* Initialize contexts */ - ctx = dec->shared_ctx; - s = &ctx->s; - - err = ff_vk_init(s, avctx, NULL, avctx->hw_frames_ctx); - if (err < 0) - return err; - - vk_desc = get_codecdesc(avctx->codec_id); - - profile = get_video_profile(ctx, avctx->codec_id); - if ((vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) && !profile) { - av_log(avctx, AV_LOG_ERROR, "Video profile missing from frames context!"); - return AVERROR(EINVAL); - } - - /* Create queue context */ - vk_desc = get_codecdesc(avctx->codec_id); - ctx->qf = ff_vk_qf_find(s, vk_desc->queue_flags, vk_desc->decode_op); - if (!ctx->qf) { - av_log(avctx, AV_LOG_ERROR, "Decoding of %s is not supported by this device\n", - avcodec_get_name(avctx->codec_id)); - return err; - } - - session_create.queueFamilyIndex = ctx->qf->idx; - session_create.maxCodedExtent = ctx->caps.maxCodedExtent; - session_create.maxDpbSlots = ctx->caps.maxDpbSlots; - session_create.maxActiveReferencePictures = ctx->caps.maxActiveReferencePictures; - session_create.pictureFormat = s->hwfc->format[0]; - session_create.referencePictureFormat = session_create.pictureFormat; - session_create.pStdHeaderVersion = &vk_desc->ext_props; - session_create.pVideoProfile = profile; -#ifdef VK_KHR_video_maintenance2 - if (ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2) - session_create.flags = VK_VIDEO_SESSION_CREATE_INLINE_SESSION_PARAMETERS_BIT_KHR; -#endif - - /* Create decode exec context for this specific main thread. - * 2 async contexts per thread was experimentally determined to be optimal - * for a majority of streams. */ - async_depth = 2*ctx->qf->num; - /* We don't need more than 2 per thread context */ - async_depth = FFMIN(async_depth, 2*avctx->thread_count); - /* Make sure there are enough async contexts for each thread */ - async_depth = FFMAX(async_depth, avctx->thread_count); - - err = ff_vk_exec_pool_init(s, ctx->qf, &ctx->exec_pool, - async_depth, 0, 0, 0, profile); - if (err < 0) - goto fail; - - if (!DECODER_IS_SDR(avctx->codec_id)) { - err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); - if (err < 0) - goto fail; - } - - /* If doing an out-of-place decoding, create a DPB pool */ - if (dec->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) { - AVHWFramesContext *dpb_frames; - AVVulkanFramesContext *dpb_hwfc; - - ctx->common.dpb_hwfc_ref = av_hwframe_ctx_alloc(s->frames->device_ref); - if (!ctx->common.dpb_hwfc_ref) { - err = AVERROR(ENOMEM); - goto fail; - } - - dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data; - dpb_frames->format = s->frames->format; - dpb_frames->sw_format = s->frames->sw_format; - dpb_frames->width = avctx->coded_width; - dpb_frames->height = avctx->coded_height; - - dpb_hwfc = dpb_frames->hwctx; - dpb_hwfc->create_pnext = (void *)ff_vk_find_struct(ctx->s.hwfc->create_pnext, - VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); - dpb_hwfc->format[0] = s->hwfc->format[0]; - dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; - dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | - VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */ - - if (ctx->common.layered_dpb) - dpb_hwfc->nb_layers = ctx->caps.maxDpbSlots; - - err = av_hwframe_ctx_init(ctx->common.dpb_hwfc_ref); - if (err < 0) - goto fail; - - if (ctx->common.layered_dpb) { - ctx->common.layered_frame = vk_get_dpb_pool(ctx); - if (!ctx->common.layered_frame) { - err = AVERROR(ENOMEM); - goto fail; - } - - err = ff_vk_create_view(&ctx->s, &ctx->common, - &ctx->common.layered_view, - &ctx->common.layered_aspect, - (AVVkFrame *)ctx->common.layered_frame->data[0], - s->hwfc->format[0], 1); - if (err < 0) - goto fail; - } - } - - if (!DECODER_IS_SDR(avctx->codec_id)) { - if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2)) { - err = create_empty_session_parameters(avctx, ctx); - if (err < 0) - return err; - } - } else { - /* For SDR decoders, this alignment value will be 0. Since this will make - * add_slice() malfunction, set it to a sane default value. */ - ctx->caps.minBitstreamBufferSizeAlignment = AV_INPUT_BUFFER_PADDING_SIZE; - } - - driver_props = &dec->shared_ctx->s.driver_props; - if (driver_props->driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY && - driver_props->conformanceVersion.major == 1 && - driver_props->conformanceVersion.minor == 3 && - driver_props->conformanceVersion.subminor == 8 && - driver_props->conformanceVersion.patch < 3) - dec->quirk_av1_offset = 1; - - ff_vk_decode_flush(avctx); - - av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization successful\n"); - - return 0; - -fail: - ff_vk_decode_uninit(avctx); - - return err; -} -- 2.49.1 From d09000c8bea2533dd3527bc965f823b109e2ec59 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:29:31 +0000 Subject: [PATCH 066/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_decode.c | 1399 +++++++++++++++++++++++++++++ 1 file changed, 1399 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_decode.c diff --git a/libavcodec/vulkan/vulkan_decode.c b/libavcodec/vulkan/vulkan_decode.c new file mode 100644 index 0000000000..fc934ca72d --- /dev/null +++ b/libavcodec/vulkan/vulkan_decode.c @@ -0,0 +1,1399 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/refstruct.h" +#include "vulkan_video.h" +#include "vulkan_decode.h" +#include "libavcodec/config_components.h" +#include "libavutil/avassert.h" +#include "libavutil/mem.h" +#include "../../libavutil/vulkan/vulkan_loader.h" + +#define DECODER_IS_SDR(codec_id) \ + (((codec_id) == AV_CODEC_ID_FFV1) || \ + ((codec_id) == AV_CODEC_ID_PRORES_RAW)) + +#if CONFIG_H264_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc; +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc; +#endif +#if CONFIG_VP9_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_vp9_desc; +#endif +#if CONFIG_AV1_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc; +#endif +#if CONFIG_FFV1_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc; +#endif +#if CONFIG_PRORES_RAW_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc; +#endif + +static const FFVulkanDecodeDescriptor *dec_descs[] = { +#if CONFIG_H264_VULKAN_HWACCEL + &ff_vk_dec_h264_desc, +#endif +#if CONFIG_HEVC_VULKAN_HWACCEL + &ff_vk_dec_hevc_desc, +#endif +#if CONFIG_VP9_VULKAN_HWACCEL + &ff_vk_dec_vp9_desc, +#endif +#if CONFIG_AV1_VULKAN_HWACCEL + &ff_vk_dec_av1_desc, +#endif +#if CONFIG_FFV1_VULKAN_HWACCEL + &ff_vk_dec_ffv1_desc, +#endif +#if CONFIG_PRORES_RAW_VULKAN_HWACCEL + &ff_vk_dec_prores_raw_desc, +#endif +}; + +typedef struct FFVulkanDecodeProfileData { + VkVideoDecodeH264ProfileInfoKHR h264_profile; + VkVideoDecodeH265ProfileInfoKHR h265_profile; +#if CONFIG_VP9_VULKAN_HWACCEL + VkVideoDecodeVP9ProfileInfoKHR vp9_profile; +#endif + VkVideoDecodeAV1ProfileInfoKHR av1_profile; + + VkVideoDecodeUsageInfoKHR usage; + VkVideoProfileInfoKHR profile; + VkVideoProfileListInfoKHR profile_list; +} FFVulkanDecodeProfileData; + +static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id) +{ + for (size_t i = 0; i < FF_ARRAY_ELEMS(dec_descs); i++) + if (dec_descs[i]->codec_id == codec_id) + return dec_descs[i]; + av_assert1(!"no codec descriptor"); + return NULL; +} + +static const VkVideoProfileInfoKHR *get_video_profile(FFVulkanDecodeShared *ctx, enum AVCodecID codec_id) +{ + const VkVideoProfileListInfoKHR *profile_list; + + VkStructureType profile_struct_type = + codec_id == AV_CODEC_ID_H264 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR : + codec_id == AV_CODEC_ID_HEVC ? VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR : +#if CONFIG_VP9_VULKAN_HWACCEL + codec_id == AV_CODEC_ID_VP9 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR : +#endif + codec_id == AV_CODEC_ID_AV1 ? VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR : + VK_STRUCTURE_TYPE_MAX_ENUM; + if (profile_struct_type == VK_STRUCTURE_TYPE_MAX_ENUM) + return NULL; + + profile_list = ff_vk_find_struct(ctx->s.hwfc->create_pnext, + VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); + if (!profile_list) + return NULL; + + for (int i = 0; i < profile_list->profileCount; i++) + if (ff_vk_find_struct(profile_list->pProfiles[i].pNext, profile_struct_type)) + return &profile_list->pProfiles[i]; + + return NULL; +} + +int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) +{ + int err; + FFVulkanDecodeContext *src_ctx = src->internal->hwaccel_priv_data; + FFVulkanDecodeContext *dst_ctx = dst->internal->hwaccel_priv_data; + + av_refstruct_replace(&dst_ctx->shared_ctx, src_ctx->shared_ctx); + + err = av_buffer_replace(&dst_ctx->session_params, src_ctx->session_params); + if (err < 0) + return err; + + dst_ctx->dedicated_dpb = src_ctx->dedicated_dpb; + dst_ctx->external_fg = src_ctx->external_fg; + + return 0; +} + +int ff_vk_params_invalidate(AVCodecContext *avctx, int t, const uint8_t *b, uint32_t s) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + av_buffer_unref(&dec->session_params); + return 0; +} + +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx) +{ + int err; + AVFrame *avf = av_frame_alloc(); + if (!avf) + return NULL; + + err = av_hwframe_get_buffer(ctx->common.dpb_hwfc_ref, avf, 0x0); + if (err < 0) + av_frame_free(&avf); + + return avf; +} + +static void init_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vkpic) +{ + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + vkpic->dpb_frame = NULL; + for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) { + vkpic->view.ref[i] = VK_NULL_HANDLE; + vkpic->view.out[i] = VK_NULL_HANDLE; + vkpic->view.dst[i] = VK_NULL_HANDLE; + } + + vkpic->destroy_image_view = vk->DestroyImageView; + vkpic->wait_semaphores = vk->WaitSemaphores; + vkpic->invalidate_memory_ranges = vk->InvalidateMappedMemoryRanges; +} + +int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb) +{ + int err; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + vkpic->slices_size = 0; + + /* If the decoder made a blank frame to make up for a missing ref, or the + * frame is the current frame so it's missing one, create a re-representation */ + if (vkpic->view.ref[0]) + return 0; + + init_frame(dec, vkpic); + + if (ctx->common.layered_dpb && alloc_dpb) { + vkpic->view.ref[0] = ctx->common.layered_view; + vkpic->view.aspect_ref[0] = ctx->common.layered_aspect; + } else if (alloc_dpb) { + AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data; + AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx; + + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); + + err = ff_vk_create_view(&ctx->s, &ctx->common, + &vkpic->view.ref[0], &vkpic->view.aspect_ref[0], + (AVVkFrame *)vkpic->dpb_frame->data[0], + dpb_hwfc->format[0], !is_current); + if (err < 0) + return err; + + vkpic->view.dst[0] = vkpic->view.ref[0]; + } + + if (!alloc_dpb || is_current) { + AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames->hwctx; + + err = ff_vk_create_view(&ctx->s, &ctx->common, + &vkpic->view.out[0], &vkpic->view.aspect[0], + (AVVkFrame *)pic->data[0], + hwfc->format[0], !is_current); + if (err < 0) + return err; + + if (!alloc_dpb) { + vkpic->view.ref[0] = vkpic->view.out[0]; + vkpic->view.aspect_ref[0] = vkpic->view.aspect[0]; + } + } + + return 0; +} + +int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + enum FFVkShaderRepFormat rep_fmt, int alloc_dpb) +{ + int err; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data; + + vkpic->slices_size = 0; + + if (vkpic->view.ref[0]) + return 0; + + init_frame(dec, vkpic); + + for (int i = 0; i < av_pix_fmt_count_planes(frames->sw_format); i++) { + if (alloc_dpb) { + vkpic->dpb_frame = vk_get_dpb_pool(ctx); + if (!vkpic->dpb_frame) + return AVERROR(ENOMEM); + + err = ff_vk_create_imageview(&ctx->s, + &vkpic->view.ref[i], &vkpic->view.aspect_ref[i], + vkpic->dpb_frame, i, rep_fmt); + if (err < 0) + return err; + + vkpic->view.dst[i] = vkpic->view.ref[i]; + } + + if (!alloc_dpb || is_current) { + err = ff_vk_create_imageview(&ctx->s, + &vkpic->view.out[i], &vkpic->view.aspect[i], + pic, i, rep_fmt); + if (err < 0) + return err; + + if (!alloc_dpb) { + vkpic->view.ref[i] = vkpic->view.out[i]; + vkpic->view.aspect_ref[i] = vkpic->view.aspect[i]; + } + } + } + + return 0; +} + +int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + static const uint8_t startcode_prefix[3] = { 0x0, 0x0, 0x1 }; + const size_t startcode_len = add_startcode ? sizeof(startcode_prefix) : 0; + const int nb = nb_slices ? *nb_slices : 0; + uint8_t *slices; + uint32_t *slice_off; + FFVkBuffer *vkbuf; + + size_t new_size = vp->slices_size + startcode_len + size + + ctx->caps.minBitstreamBufferSizeAlignment; + new_size = FFALIGN(new_size, ctx->caps.minBitstreamBufferSizeAlignment); + + if (offsets) { + slice_off = av_fast_realloc(dec->slice_off, &dec->slice_off_max, + (nb + 1)*sizeof(slice_off)); + if (!slice_off) + return AVERROR(ENOMEM); + + *offsets = dec->slice_off = slice_off; + + slice_off[nb] = vp->slices_size; + } + + vkbuf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; + if (!vkbuf || vkbuf->size < new_size) { + int err; + AVBufferRef *new_ref; + FFVkBuffer *new_buf; + + /* No point in requesting anything smaller. */ + size_t buf_size = FFMAX(new_size, 1024*1024); + + /* Align buffer to nearest power of two. Makes fragmentation management + * easier, and gives us ample headroom. */ + buf_size = 2 << av_log2(buf_size); + + err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &new_ref, + DECODER_IS_SDR(avctx->codec_id) ? + (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) : + VK_BUFFER_USAGE_VIDEO_DECODE_SRC_BIT_KHR, + ctx->s.hwfc->create_pnext, buf_size, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + (DECODER_IS_SDR(avctx->codec_id) ? + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0x0)); + if (err < 0) + return err; + + new_buf = (FFVkBuffer *)new_ref->data; + + /* Copy data from the old buffer */ + if (vkbuf) { + memcpy(new_buf->mapped_mem, vkbuf->mapped_mem, vp->slices_size); + av_buffer_unref(&vp->slices_buf); + } + + vp->slices_buf = new_ref; + vkbuf = new_buf; + } + slices = vkbuf->mapped_mem; + + /* Startcode */ + memcpy(slices + vp->slices_size, startcode_prefix, startcode_len); + + /* Slice data */ + memcpy(slices + vp->slices_size + startcode_len, data, size); + + if (nb_slices) + *nb_slices = nb + 1; + + vp->slices_size += startcode_len + size; + + return 0; +} + +void ff_vk_decode_flush(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = ctx->empty_session_params, + }; + VkVideoCodingControlInfoKHR decode_ctrl = { + .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, + .flags = VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkCommandBuffer cmd_buf; + FFVkExecContext *exec; + + /* Non-video queues do not need to be reset */ + if (!(get_codecdesc(avctx->codec_id)->decode_op)) + return; + + exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + vk->CmdControlVideoCodingKHR(cmd_buf, &decode_ctrl); + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + ff_vk_exec_submit(&ctx->s, exec); +} + +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]) +{ + int err; + VkResult ret; + VkCommandBuffer cmd_buf; + FFVkBuffer *sd_buf; + + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Output */ + AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data; + + /* Quirks */ + const int layered_dpb = ctx->common.layered_dpb; + + VkVideoBeginCodingInfoKHR decode_start = { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .videoSession = ctx->common.session, + .videoSessionParameters = dec->session_params ? + *((VkVideoSessionParametersKHR *)dec->session_params->data) : + VK_NULL_HANDLE, + .referenceSlotCount = vp->decode_info.referenceSlotCount, + .pReferenceSlots = vp->decode_info.pReferenceSlots, + }; + VkVideoEndCodingInfoKHR decode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + size_t data_size = FFALIGN(vp->slices_size, + ctx->caps.minBitstreamBufferSizeAlignment); + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + + /* The current decoding reference has to be bound as an inactive reference */ + VkVideoReferenceSlotInfoKHR *cur_vk_ref; + cur_vk_ref = (void *)&decode_start.pReferenceSlots[decode_start.referenceSlotCount]; + cur_vk_ref[0] = vp->ref_slot; + cur_vk_ref[0].slotIndex = -1; + decode_start.referenceSlotCount++; + + sd_buf = (FFVkBuffer *)vp->slices_buf->data; + + /* Flush if needed */ + if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = sd_buf->mem, + .offset = 0, + .size = FFALIGN(vp->slices_size, + ctx->s.props.properties.limits.nonCoherentAtomSize), + }; + + ret = vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &flush_buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + vp->decode_info.srcBuffer = sd_buf->buf; + vp->decode_info.srcBufferOffset = 0; + vp->decode_info.srcBufferRange = data_size; + + /* Start command buffer recording */ + err = ff_vk_exec_start(&ctx->s, exec); + if (err < 0) + return err; + cmd_buf = exec->buf; + + /* Slices */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0); + if (err < 0) + return err; + vp->slices_buf = NULL; /* Owned by the exec buffer from now on */ + + /* Parameters */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &dec->session_params, 1, 1); + if (err < 0) + return err; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, pic, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + pic); + if (err < 0) + return err; + + /* Output image - change layout, as it comes from a pool */ + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .srcAccessMask = VK_ACCESS_2_NONE, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = vkf->layout[0], + .newLayout = (layered_dpb || vp->dpb_frame) ? + VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR : + VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, /* Spec, 07252 utter madness */ + .srcQueueFamilyIndex = vkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = vp->view.aspect[0], + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, pic, + &img_bar[nb_img_bar], &nb_img_bar); + + /* Reference for the current image, if existing and not layered */ + if (vp->dpb_frame) { + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + } + + if (!layered_dpb) { + /* All references (apart from the current) for non-layered refs */ + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + AVFrame *ref_frame = rpic[i]; + FFVulkanDecodePicture *rvp = rvkp[i]; + AVFrame *ref = rvp->dpb_frame ? rvp->dpb_frame : ref_frame; + + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + + if (err == 0) { + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, + &rvp->sem, &rvp->sem_value, + ref); + if (err < 0) + return err; + } + + if (!rvp->dpb_frame) { + AVVkFrame *rvkf = (AVVkFrame *)ref->data[0]; + + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + .srcAccessMask = VK_ACCESS_2_NONE, + .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_READ_BIT_KHR | + VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR, + .oldLayout = rvkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, + .srcQueueFamilyIndex = rvkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = rvkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = rvp->view.aspect_ref[0], + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, ref, + &img_bar[nb_img_bar], &nb_img_bar); + } + } + } else if (vp->decode_info.referenceSlotCount || + vp->view.out[0] != vp->view.ref[0]) { + /* Single barrier for a single layered ref */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR); + if (err < 0) + return err; + } + + /* Change image layout */ + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Start, use parameters, decode and end decoding */ + vk->CmdBeginVideoCodingKHR(cmd_buf, &decode_start); + vk->CmdDecodeVideoKHR(cmd_buf, &vp->decode_info); + vk->CmdEndVideoCodingKHR(cmd_buf, &decode_end); + + /* End recording and submit for execution */ + return ff_vk_exec_submit(&ctx->s, exec); +} + +void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *vp) +{ + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + + VkSemaphoreWaitInfo sem_wait = (VkSemaphoreWaitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, + .pSemaphores = &vp->sem, + .pValues = &vp->sem_value, + .semaphoreCount = 1, + }; + + /* We do not have to lock the frame here because we're not interested + * in the actual current semaphore value, but only that it's later than + * the time we submitted the image for decoding. */ + if (vp->sem) + vp->wait_semaphores(hwctx->act_dev, &sem_wait, UINT64_MAX); + + /* Free slices data */ + av_buffer_unref(&vp->slices_buf); + + /* Destroy image view (out) */ + for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) { + if (vp->view.out[i] && vp->view.out[i] != vp->view.dst[i]) + vp->destroy_image_view(hwctx->act_dev, vp->view.out[i], hwctx->alloc); + + /* Destroy image view (ref, unlayered) */ + if (vp->view.dst[i]) + vp->destroy_image_view(hwctx->act_dev, vp->view.dst[i], hwctx->alloc); + } + + av_frame_free(&vp->dpb_frame); +} + +static void free_common(AVRefStructOpaque unused, void *obj) +{ + FFVulkanDecodeShared *ctx = obj; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(&ctx->s, &ctx->exec_pool); + + /* This also frees all references from this pool */ + av_frame_free(&ctx->common.layered_frame); + + /* Destroy parameters */ + if (ctx->empty_session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->empty_session_params, + s->hwctx->alloc); + + av_buffer_pool_uninit(&ctx->buf_pool); + + ff_vk_video_common_uninit(s, &ctx->common); + + if (ctx->sd_ctx_free) + ctx->sd_ctx_free(ctx); + + ff_vk_uninit(s); +} + +static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef *frames_ref) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + const FFVulkanDecodeDescriptor *vk_desc = get_codecdesc(avctx->codec_id); + AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; + AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; + AVVulkanDeviceContext *hwctx = device->hwctx; + FFVulkanDecodeShared *ctx; + + if (dec->shared_ctx) + return 0; + + dec->shared_ctx = av_refstruct_alloc_ext(sizeof(*ctx), 0, NULL, + free_common); + if (!dec->shared_ctx) + return AVERROR(ENOMEM); + + ctx = dec->shared_ctx; + + ctx->s.extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, + hwctx->nb_enabled_dev_extensions); + + if (vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) { + if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME); + av_refstruct_unref(&dec->shared_ctx); + return AVERROR(ENOSYS); + } + } + + err = ff_vk_load_functions(device, &ctx->s.vkfn, ctx->s.extensions, 1, 1); + if (err < 0) { + av_refstruct_unref(&dec->shared_ctx); + return err; + } + + return 0; +} + +static VkResult vulkan_setup_profile(AVCodecContext *avctx, + FFVulkanDecodeProfileData *prof, + AVVulkanDeviceContext *hwctx, + FFVulkanFunctions *vk, + const FFVulkanDecodeDescriptor *vk_desc, + VkVideoDecodeH264CapabilitiesKHR *h264_caps, + VkVideoDecodeH265CapabilitiesKHR *h265_caps, +#if CONFIG_VP9_VULKAN_HWACCEL + VkVideoDecodeVP9CapabilitiesKHR *vp9_caps, +#endif + VkVideoDecodeAV1CapabilitiesKHR *av1_caps, + VkVideoCapabilitiesKHR *caps, + VkVideoDecodeCapabilitiesKHR *dec_caps, + int cur_profile) +{ + VkVideoDecodeUsageInfoKHR *usage = &prof->usage; + VkVideoProfileInfoKHR *profile = &prof->profile; + VkVideoProfileListInfoKHR *profile_list = &prof->profile_list; + + VkVideoDecodeH264ProfileInfoKHR *h264_profile = &prof->h264_profile; + VkVideoDecodeH265ProfileInfoKHR *h265_profile = &prof->h265_profile; +#if CONFIG_VP9_VULKAN_HWACCEL + VkVideoDecodeVP9ProfileInfoKHR *vp9_profile = &prof->vp9_profile; +#endif + VkVideoDecodeAV1ProfileInfoKHR *av1_profile = &prof->av1_profile; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!desc) + return AVERROR(EINVAL); + + if (avctx->codec_id == AV_CODEC_ID_H264) { + dec_caps->pNext = h264_caps; + usage->pNext = h264_profile; + h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR; + + /* Vulkan transmits all the constrant_set flags, rather than wanting them + * merged in the profile IDC */ + h264_profile->stdProfileIdc = cur_profile & ~(AV_PROFILE_H264_CONSTRAINED | + AV_PROFILE_H264_INTRA); + + h264_profile->pictureLayout = avctx->field_order == AV_FIELD_UNKNOWN || + avctx->field_order == AV_FIELD_PROGRESSIVE ? + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR : + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR; + } else if (avctx->codec_id == AV_CODEC_ID_H265) { + dec_caps->pNext = h265_caps; + usage->pNext = h265_profile; + h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR; + h265_profile->stdProfileIdc = cur_profile; +#if CONFIG_VP9_VULKAN_HWACCEL + } else if (avctx->codec_id == AV_CODEC_ID_VP9) { + dec_caps->pNext = vp9_caps; + usage->pNext = vp9_profile; + vp9_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PROFILE_INFO_KHR; + vp9_profile->stdProfile = cur_profile; +#endif + } else if (avctx->codec_id == AV_CODEC_ID_AV1) { + dec_caps->pNext = av1_caps; + usage->pNext = av1_profile; + av1_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR; + av1_profile->stdProfile = cur_profile; + av1_profile->filmGrainSupport = !(avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN); + } + + usage->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR; + usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR; + + profile->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; + profile->pNext = usage; + profile->videoCodecOperation = vk_desc->decode_op; + profile->chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); + profile->lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); + profile->chromaBitDepth = profile->lumaBitDepth; + + profile_list->sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; + profile_list->profileCount = 1; + profile_list->pProfiles = profile; + + /* Get the capabilities of the decoder for the given profile */ + caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + caps->pNext = dec_caps; + dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR; + /* dec_caps->pNext already filled in */ + + return vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile, + caps); +} + +static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_ref, + enum AVPixelFormat *pix_fmt, VkFormat *vk_fmt, + FFVulkanDecodeProfileData *prof, + int *dpb_dedicate) +{ + VkResult ret; + int max_level, base_profile, cur_profile; + const FFVulkanDecodeDescriptor *vk_desc = get_codecdesc(avctx->codec_id); + AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; + AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data; + AVVulkanDeviceContext *hwctx = device->hwctx; + enum AVPixelFormat source_format; + enum AVPixelFormat best_format; + VkFormat best_vkfmt; + + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoCapabilitiesKHR *caps = &ctx->caps; + VkVideoDecodeCapabilitiesKHR *dec_caps = &ctx->dec_caps; + + VkVideoDecodeH264CapabilitiesKHR h264_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, + }; + VkVideoDecodeH265CapabilitiesKHR h265_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, + }; +#if CONFIG_VP9_VULKAN_HWACCEL + VkVideoDecodeVP9CapabilitiesKHR vp9_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_CAPABILITIES_KHR, + }; +#endif + VkVideoDecodeAV1CapabilitiesKHR av1_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR, + }; + + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, + .pNext = &prof->profile_list, + }; + VkVideoFormatPropertiesKHR *ret_info; + uint32_t nb_out_fmts = 0; + + if (!(vk_desc->decode_extension & ctx->s.extensions)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } + + cur_profile = avctx->profile; + base_profile = avctx->codec_id == AV_CODEC_ID_H264 ? AV_PROFILE_H264_CONSTRAINED_BASELINE : + avctx->codec_id == AV_CODEC_ID_H265 ? AV_PROFILE_HEVC_MAIN : +#if CONFIG_VP9_VULKAN_HWACCEL + avctx->codec_id == AV_CODEC_ID_VP9 ? STD_VIDEO_VP9_PROFILE_0 : +#endif + avctx->codec_id == AV_CODEC_ID_AV1 ? STD_VIDEO_AV1_PROFILE_MAIN : + 0; + + ret = vulkan_setup_profile(avctx, prof, hwctx, vk, vk_desc, + &h264_caps, + &h265_caps, +#if CONFIG_VP9_VULKAN_HWACCEL + &vp9_caps, +#endif + &av1_caps, + caps, + dec_caps, + cur_profile); + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR && + avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH && + avctx->profile != base_profile) { + av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting " + "again with profile %s\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, cur_profile), + avcodec_profile_name(avctx->codec_id, base_profile)); + cur_profile = base_profile; + ret = vulkan_setup_profile(avctx, prof, hwctx, vk, vk_desc, + &h264_caps, + &h265_caps, +#if CONFIG_VP9_VULKAN_HWACCEL + &vp9_caps, +#endif + &av1_caps, + caps, + dec_caps, + cur_profile); + } + + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "%s profile \"%s\" not supported!\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, cur_profile)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_VERBOSE, "Unable to initialize video session: " + "format (%s) not supported!\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || + ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } + + max_level = avctx->codec_id == AV_CODEC_ID_H264 ? ff_vk_h264_level_to_av(h264_caps.maxLevelIdc) : + avctx->codec_id == AV_CODEC_ID_H265 ? ff_vk_h265_level_to_av(h265_caps.maxLevelIdc) : +#if CONFIG_VP9_VULKAN_HWACCEL + avctx->codec_id == AV_CODEC_ID_VP9 ? vp9_caps.maxLevel : +#endif + avctx->codec_id == AV_CODEC_ID_AV1 ? av1_caps.maxLevel : + 0; + + av_log(avctx, AV_LOG_VERBOSE, "Decoder capabilities for %s profile \"%s\":\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, cur_profile)); + av_log(avctx, AV_LOG_VERBOSE, " Maximum level: %i (stream %i)\n", + max_level, avctx->level); + av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", + caps->minCodedExtent.width, caps->maxCodedExtent.width); + av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", + caps->minCodedExtent.height, caps->maxCodedExtent.height); + av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", + caps->pictureAccessGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", + caps->pictureAccessGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", + caps->minBitstreamBufferOffsetAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", + caps->minBitstreamBufferSizeAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", + caps->maxDpbSlots); + av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", + caps->maxActiveReferencePictures); + av_log(avctx, AV_LOG_VERBOSE, " Codec header name: '%s' (driver), '%s' (compiled)\n", + caps->stdHeaderVersion.extensionName, + vk_desc->ext_props.extensionName); + av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", + CODEC_VER(caps->stdHeaderVersion.specVersion), + CODEC_VER(vk_desc->ext_props.specVersion)); + av_log(avctx, AV_LOG_VERBOSE, " Decode modes:%s%s%s\n", + dec_caps->flags ? "" : + " invalid", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR ? + " reuse_dst_dpb" : "", + dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR ? + " dedicated_dpb" : ""); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", + caps->flags ? "" : + " none", + caps->flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? + " protected" : "", + caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? + " separate_references" : ""); + + /* Check if decoding is possible with the given parameters */ + if (avctx->coded_width < caps->minCodedExtent.width || + avctx->coded_height < caps->minCodedExtent.height || + avctx->coded_width > caps->maxCodedExtent.width || + avctx->coded_height > caps->maxCodedExtent.height) + return AVERROR(EINVAL); + + if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) && + avctx->level > max_level) + return AVERROR(EINVAL); + + /* Some basic sanity checking */ + if (!(dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR))) { + av_log(avctx, AV_LOG_ERROR, "Buggy driver signals invalid decoding mode: neither " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR nor " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR are set!\n"); + return AVERROR_EXTERNAL; + } else if ((dec_caps->flags & (VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR | + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR) == + VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR) && + !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR)) { + av_log(avctx, AV_LOG_ERROR, "Cannot initialize Vulkan decoding session, buggy driver: " + "VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR set " + "but VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR is unset!\n"); + return AVERROR_EXTERNAL; + } + + dec->dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR); + ctx->common.layered_dpb = !dec->dedicated_dpb ? 0 : + !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + if (dec->dedicated_dpb) { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + } else { + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + + if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | + FF_VK_EXT_VIDEO_MAINTENANCE_1)) + fmt_info.imageUsage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + } + + /* Get the format of the images necessary */ + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, NULL); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); + if (!ret_info) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_out_fmts; i++) + ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, ret_info); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + av_free(ret_info); + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + av_free(ret_info); + return AVERROR_EXTERNAL; + } + + /* Find a format to use */ + *pix_fmt = best_format = AV_PIX_FMT_NONE; + *vk_fmt = best_vkfmt = VK_FORMAT_UNDEFINED; + source_format = avctx->sw_pix_fmt; + + av_log(avctx, AV_LOG_DEBUG, "Choosing best pixel format for decoding from %i:\n", nb_out_fmts); + for (int i = 0; i < nb_out_fmts; i++) { + enum AVPixelFormat tmp = ff_vk_pix_fmt_from_vkfmt(ret_info[i].format); + if (tmp == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_WARNING, "Invalid/unknown Vulkan format %i!\n", ret_info[i].format); + continue; + } + + best_format = av_find_best_pix_fmt_of_2(tmp, best_format, source_format, 0, NULL); + if (tmp == best_format) + best_vkfmt = ret_info[i].format; + + av_log(avctx, AV_LOG_DEBUG, " %s%s (Vulkan ID: %i)\n", + av_get_pix_fmt_name(tmp), tmp == best_format ? "*" : "", + ret_info[i].format); + } + + av_free(ret_info); + + if (best_format == AV_PIX_FMT_NONE) { + av_log(avctx, AV_LOG_ERROR, "No valid/compatible pixel format found for decoding!\n"); + return AVERROR(EINVAL); + } else { + av_log(avctx, AV_LOG_VERBOSE, "Chosen frame pixfmt: %s (Vulkan ID: %i)\n", + av_get_pix_fmt_name(best_format), best_vkfmt); + } + + *pix_fmt = best_format; + *vk_fmt = best_vkfmt; + + *dpb_dedicate = dec->dedicated_dpb; + + return 0; +} + +static void free_profile_data(AVHWFramesContext *hwfc) +{ + av_free(hwfc->user_opaque); +} + +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) +{ + VkFormat vkfmt = VK_FORMAT_UNDEFINED; + int err, dedicated_dpb; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data; + AVVulkanFramesContext *hwfc = frames_ctx->hwctx; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeProfileData *prof = NULL; + + err = vulkan_decode_bootstrap(avctx, hw_frames_ctx); + if (err < 0) + return err; + + frames_ctx->sw_format = avctx->sw_pix_fmt; + + if (!DECODER_IS_SDR(avctx->codec_id)) { + prof = av_mallocz(sizeof(FFVulkanDecodeProfileData)); + if (!prof) + return AVERROR(ENOMEM); + + err = vulkan_decode_get_profile(avctx, hw_frames_ctx, + &frames_ctx->sw_format, &vkfmt, + prof, &dedicated_dpb); + if (err < 0) { + av_free(prof); + return err; + } + + frames_ctx->user_opaque = prof; + frames_ctx->free = free_profile_data; + + hwfc->create_pnext = &prof->profile_list; + } else { + switch (frames_ctx->sw_format) { + case AV_PIX_FMT_GBRAP16: + /* This should be more efficient for downloading and using */ + frames_ctx->sw_format = AV_PIX_FMT_RGBA64; + break; + case AV_PIX_FMT_GBRP10: + /* This saves memory bandwidth when downloading */ + frames_ctx->sw_format = AV_PIX_FMT_X2BGR10; + break; + case AV_PIX_FMT_BGR0: + /* mpv has issues with bgr0 mapping, so just remap it */ + frames_ctx->sw_format = AV_PIX_FMT_RGB0; + break; + default: + break; + } + } + + frames_ctx->width = avctx->coded_width; + frames_ctx->height = avctx->coded_height; + frames_ctx->format = AV_PIX_FMT_VULKAN; + + hwfc->format[0] = vkfmt; + hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT; + + if (prof) { + FFVulkanDecodeShared *ctx; + + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR; + if (!dec->dedicated_dpb) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR; + + ctx = dec->shared_ctx; + if (ctx->s.extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE | + FF_VK_EXT_VIDEO_MAINTENANCE_1)) + hwfc->usage |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR; + } + + return err; +} + +static void vk_decode_free_params(void *opaque, uint8_t *data) +{ + FFVulkanDecodeShared *ctx = opaque; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)data; + vk->DestroyVideoSessionParametersKHR(ctx->s.hwctx->act_dev, *par, + ctx->s.hwctx->alloc); + av_free(par); +} + +int ff_vk_decode_create_params(AVBufferRef **par_ref, void *logctx, FFVulkanDecodeShared *ctx, + const VkVideoSessionParametersCreateInfoKHR *session_params_create) +{ + VkVideoSessionParametersKHR *par = av_malloc(sizeof(*par)); + const FFVulkanFunctions *vk = &ctx->s.vkfn; + VkResult ret; + + if (!par) + return AVERROR(ENOMEM); + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(ctx->s.hwctx->act_dev, session_params_create, + ctx->s.hwctx->alloc, par); + if (ret != VK_SUCCESS) { + av_log(logctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + av_free(par); + return AVERROR_EXTERNAL; + } + *par_ref = av_buffer_create((uint8_t *)par, sizeof(*par), + vk_decode_free_params, ctx, 0); + if (!*par_ref) { + vk_decode_free_params(ctx, (uint8_t *)par); + return AVERROR(ENOMEM); + } + + return 0; +} + +int ff_vk_decode_uninit(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + + av_freep(&dec->hevc_headers); + av_buffer_unref(&dec->session_params); + av_refstruct_unref(&dec->shared_ctx); + av_freep(&dec->slice_off); + return 0; +} + +static int create_empty_session_parameters(AVCodecContext *avctx, + FFVulkanDecodeShared *ctx) +{ + VkResult ret; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &s->vkfn; + + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + }; + StdVideoAV1SequenceHeader av1_empty_seq = { 0 }; + VkVideoDecodeAV1SessionParametersCreateInfoKHR av1_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pStdSequenceHeader = &av1_empty_seq, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = avctx->codec_id == AV_CODEC_ID_H264 ? (void *)&h264_params : + avctx->codec_id == AV_CODEC_ID_HEVC ? (void *)&h265_params : + avctx->codec_id == AV_CODEC_ID_AV1 ? (void *)&av1_params : + NULL, + .videoSession = ctx->common.session, + }; + + if (avctx->codec_id == AV_CODEC_ID_VP9) + return 0; + + ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, + s->hwctx->alloc, &ctx->empty_session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create empty Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +int ff_vk_decode_init(AVCodecContext *avctx) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx; + FFVulkanContext *s; + int async_depth; + const VkVideoProfileInfoKHR *profile; + const FFVulkanDecodeDescriptor *vk_desc; + const VkPhysicalDeviceDriverProperties *driver_props; + + VkVideoSessionCreateInfoKHR session_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, + }; + + err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN); + if (err < 0) + return err; + + /* Initialize contexts */ + ctx = dec->shared_ctx; + s = &ctx->s; + + err = ff_vk_init(s, avctx, NULL, avctx->hw_frames_ctx); + if (err < 0) + return err; + + vk_desc = get_codecdesc(avctx->codec_id); + + profile = get_video_profile(ctx, avctx->codec_id); + if ((vk_desc->queue_flags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) && !profile) { + av_log(avctx, AV_LOG_ERROR, "Video profile missing from frames context!"); + return AVERROR(EINVAL); + } + + /* Create queue context */ + vk_desc = get_codecdesc(avctx->codec_id); + ctx->qf = ff_vk_qf_find(s, vk_desc->queue_flags, vk_desc->decode_op); + if (!ctx->qf) { + av_log(avctx, AV_LOG_ERROR, "Decoding of %s is not supported by this device\n", + avcodec_get_name(avctx->codec_id)); + return err; + } + + session_create.queueFamilyIndex = ctx->qf->idx; + session_create.maxCodedExtent = ctx->caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->caps.maxActiveReferencePictures; + session_create.pictureFormat = s->hwfc->format[0]; + session_create.referencePictureFormat = session_create.pictureFormat; + session_create.pStdHeaderVersion = &vk_desc->ext_props; + session_create.pVideoProfile = profile; +#ifdef VK_KHR_video_maintenance2 + if (ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2) + session_create.flags = VK_VIDEO_SESSION_CREATE_INLINE_SESSION_PARAMETERS_BIT_KHR; +#endif + + /* Create decode exec context for this specific main thread. + * 2 async contexts per thread was experimentally determined to be optimal + * for a majority of streams. */ + async_depth = 2*ctx->qf->num; + /* We don't need more than 2 per thread context */ + async_depth = FFMIN(async_depth, 2*avctx->thread_count); + /* Make sure there are enough async contexts for each thread */ + async_depth = FFMAX(async_depth, avctx->thread_count); + + err = ff_vk_exec_pool_init(s, ctx->qf, &ctx->exec_pool, + async_depth, 0, 0, 0, profile); + if (err < 0) + goto fail; + + if (!DECODER_IS_SDR(avctx->codec_id)) { + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + goto fail; + } + + /* If doing an out-of-place decoding, create a DPB pool */ + if (dec->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) { + AVHWFramesContext *dpb_frames; + AVVulkanFramesContext *dpb_hwfc; + + ctx->common.dpb_hwfc_ref = av_hwframe_ctx_alloc(s->frames->device_ref); + if (!ctx->common.dpb_hwfc_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + + dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data; + dpb_frames->format = s->frames->format; + dpb_frames->sw_format = s->frames->sw_format; + dpb_frames->width = avctx->coded_width; + dpb_frames->height = avctx->coded_height; + + dpb_hwfc = dpb_frames->hwctx; + dpb_hwfc->create_pnext = (void *)ff_vk_find_struct(ctx->s.hwfc->create_pnext, + VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR); + dpb_hwfc->format[0] = s->hwfc->format[0]; + dpb_hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + dpb_hwfc->usage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | + VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */ + + if (ctx->common.layered_dpb) + dpb_hwfc->nb_layers = ctx->caps.maxDpbSlots; + + err = av_hwframe_ctx_init(ctx->common.dpb_hwfc_ref); + if (err < 0) + goto fail; + + if (ctx->common.layered_dpb) { + ctx->common.layered_frame = vk_get_dpb_pool(ctx); + if (!ctx->common.layered_frame) { + err = AVERROR(ENOMEM); + goto fail; + } + + err = ff_vk_create_view(&ctx->s, &ctx->common, + &ctx->common.layered_view, + &ctx->common.layered_aspect, + (AVVkFrame *)ctx->common.layered_frame->data[0], + s->hwfc->format[0], 1); + if (err < 0) + goto fail; + } + } + + if (!DECODER_IS_SDR(avctx->codec_id)) { + if (!(ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2)) { + err = create_empty_session_parameters(avctx, ctx); + if (err < 0) + return err; + } + } else { + /* For SDR decoders, this alignment value will be 0. Since this will make + * add_slice() malfunction, set it to a sane default value. */ + ctx->caps.minBitstreamBufferSizeAlignment = AV_INPUT_BUFFER_PADDING_SIZE; + } + + driver_props = &dec->shared_ctx->s.driver_props; + if (driver_props->driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY && + driver_props->conformanceVersion.major == 1 && + driver_props->conformanceVersion.minor == 3 && + driver_props->conformanceVersion.subminor == 8 && + driver_props->conformanceVersion.patch < 3) + dec->quirk_av1_offset = 1; + + ff_vk_decode_flush(avctx); + + av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization successful\n"); + + return 0; + +fail: + ff_vk_decode_uninit(avctx); + + return err; +} -- 2.49.1 From 76fc9208cb8884e23b6d31f3d2fa9891be6ab22f Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:30:00 +0000 Subject: [PATCH 067/118] Changing vulkan file directory --- libavcodec/vulkan_decode.h | 189 ------------------------------------- 1 file changed, 189 deletions(-) delete mode 100644 libavcodec/vulkan_decode.h diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h deleted file mode 100644 index e32468f317..0000000000 --- a/libavcodec/vulkan_decode.h +++ /dev/null @@ -1,189 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_VULKAN_DECODE_H -#define AVCODEC_VULKAN_DECODE_H - -#include "codec_id.h" -#include "decode.h" -#include "hwaccel_internal.h" -#include "internal.h" - -#include "vulkan_video.h" - -typedef struct FFVulkanDecodeDescriptor { - enum AVCodecID codec_id; - FFVulkanExtensions decode_extension; - VkQueueFlagBits queue_flags; - VkVideoCodecOperationFlagBitsKHR decode_op; - - VkExtensionProperties ext_props; -} FFVulkanDecodeDescriptor; - -typedef struct FFVulkanDecodeShared { - FFVulkanContext s; - FFVkVideoCommon common; - AVVulkanDeviceQueueFamily *qf; - FFVkExecPool exec_pool; - - AVBufferPool *buf_pool; - - VkVideoCapabilitiesKHR caps; - VkVideoDecodeCapabilitiesKHR dec_caps; - - VkVideoSessionParametersKHR empty_session_params; - - /* Software-defined decoder context */ - void *sd_ctx; - void (*sd_ctx_free)(struct FFVulkanDecodeShared *ctx); -} FFVulkanDecodeShared; - -typedef struct FFVulkanDecodeContext { - FFVulkanDecodeShared *shared_ctx; - AVBufferRef *session_params; - - int dedicated_dpb; /* Oddity #1 - separate DPB images */ - int external_fg; /* Oddity #2 - hardware can't apply film grain */ - - /* Workaround for NVIDIA drivers tested with CTS version 1.3.8 for AV1. - * The tests were incorrect as the OrderHints were offset by 1. */ - int quirk_av1_offset; - - /* Thread-local state below */ - struct HEVCHeaderSet *hevc_headers; - size_t hevc_headers_size; - - uint32_t *slice_off; - unsigned int slice_off_max; -} FFVulkanDecodeContext; - -typedef struct FFVulkanDecodePicture { - AVFrame *dpb_frame; /* Only used for out-of-place decoding. */ - - struct { - VkImageView ref[AV_NUM_DATA_POINTERS]; /* Image representation view (reference) */ - VkImageView out[AV_NUM_DATA_POINTERS]; /* Image representation view (output-only) */ - VkImageView dst[AV_NUM_DATA_POINTERS]; /* Set to img_view_out if no layered refs are used */ - VkImageAspectFlags aspect[AV_NUM_DATA_POINTERS]; /* Image plane mask bits */ - VkImageAspectFlags aspect_ref[AV_NUM_DATA_POINTERS]; /* Only used for out-of-place decoding */ - } view; - - VkSemaphore sem; - uint64_t sem_value; - - /* Current picture */ - VkVideoPictureResourceInfoKHR ref; - VkVideoReferenceSlotInfoKHR ref_slot; - - /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */ - VkVideoPictureResourceInfoKHR refs [36]; - VkVideoReferenceSlotInfoKHR ref_slots[36]; - - /* Main decoding struct */ - VkVideoDecodeInfoKHR decode_info; - - /* Slice data */ - AVBufferRef *slices_buf; - size_t slices_size; - - /* Vulkan functions needed for destruction, as no other context is guaranteed to exist */ - PFN_vkWaitSemaphores wait_semaphores; - PFN_vkDestroyImageView destroy_image_view; - PFN_vkInvalidateMappedMemoryRanges invalidate_memory_ranges; -} FFVulkanDecodePicture; - -/** - * Initialize decoder. - */ -int ff_vk_decode_init(AVCodecContext *avctx); - -/** - * Synchronize the contexts between 2 threads. - */ -int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src); - -/** - * Initialize hw_frames_ctx with the parameters needed to decode the stream - * using the parameters from avctx. - * - * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize - * the context. - */ -int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); - -/** - * Removes current session parameters to recreate them - */ -int ff_vk_params_invalidate(AVCodecContext *avctx, int t, const uint8_t *b, uint32_t s); - -/** - * Prepare a frame, creates the image view, and sets up the dpb fields. - */ -int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, - FFVulkanDecodePicture *vkpic, int is_current, - int alloc_dpb); - -/** - * Software-defined decoder version of ff_vk_decode_prepare_frame. - */ -int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, - FFVulkanDecodePicture *vkpic, int is_current, - enum FFVkShaderRepFormat rep_fmt, int alloc_dpb); - -/** - * Add slice data to frame. - */ -int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, - const uint8_t *data, size_t size, int add_startcode, - uint32_t *nb_slices, const uint32_t **offsets); - -/** - * Decode a frame. - */ -int ff_vk_decode_frame(AVCodecContext *avctx, - AVFrame *pic, FFVulkanDecodePicture *vp, - AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]); - -/** - * Free a frame and its state. - */ -void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *vp); - -/** - * Get an FFVkBuffer suitable for decoding from. - */ -int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf, - void *create_pNext, size_t size); - -/** - * Create VkVideoSessionParametersKHR wrapped in an AVBufferRef. - */ -int ff_vk_decode_create_params(AVBufferRef **par_ref, void *logctx, FFVulkanDecodeShared *ctx, - const VkVideoSessionParametersCreateInfoKHR *session_params_create); - -/** - * Flush decoder. - */ -void ff_vk_decode_flush(AVCodecContext *avctx); - -/** - * Free decoder. - */ -int ff_vk_decode_uninit(AVCodecContext *avctx); - -#endif /* AVCODEC_VULKAN_DECODE_H */ -- 2.49.1 From ab5c3f473542c3e6db62a446e0352f462c9da19e Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:30:35 +0000 Subject: [PATCH 068/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_decode.h | 189 ++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_decode.h diff --git a/libavcodec/vulkan/vulkan_decode.h b/libavcodec/vulkan/vulkan_decode.h new file mode 100644 index 0000000000..59d22096f4 --- /dev/null +++ b/libavcodec/vulkan/vulkan_decode.h @@ -0,0 +1,189 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_DECODE_H +#define AVCODEC_VULKAN_DECODE_H + +#include "libavcodec/codec_id.h" +#include "libavcodec/decode.h" +#include "libavcodec/hwaccel_internal.h" +#include "libavcodec/internal.h" + +#include "vulkan_video.h" + +typedef struct FFVulkanDecodeDescriptor { + enum AVCodecID codec_id; + FFVulkanExtensions decode_extension; + VkQueueFlagBits queue_flags; + VkVideoCodecOperationFlagBitsKHR decode_op; + + VkExtensionProperties ext_props; +} FFVulkanDecodeDescriptor; + +typedef struct FFVulkanDecodeShared { + FFVulkanContext s; + FFVkVideoCommon common; + AVVulkanDeviceQueueFamily *qf; + FFVkExecPool exec_pool; + + AVBufferPool *buf_pool; + + VkVideoCapabilitiesKHR caps; + VkVideoDecodeCapabilitiesKHR dec_caps; + + VkVideoSessionParametersKHR empty_session_params; + + /* Software-defined decoder context */ + void *sd_ctx; + void (*sd_ctx_free)(struct FFVulkanDecodeShared *ctx); +} FFVulkanDecodeShared; + +typedef struct FFVulkanDecodeContext { + FFVulkanDecodeShared *shared_ctx; + AVBufferRef *session_params; + + int dedicated_dpb; /* Oddity #1 - separate DPB images */ + int external_fg; /* Oddity #2 - hardware can't apply film grain */ + + /* Workaround for NVIDIA drivers tested with CTS version 1.3.8 for AV1. + * The tests were incorrect as the OrderHints were offset by 1. */ + int quirk_av1_offset; + + /* Thread-local state below */ + struct HEVCHeaderSet *hevc_headers; + size_t hevc_headers_size; + + uint32_t *slice_off; + unsigned int slice_off_max; +} FFVulkanDecodeContext; + +typedef struct FFVulkanDecodePicture { + AVFrame *dpb_frame; /* Only used for out-of-place decoding. */ + + struct { + VkImageView ref[AV_NUM_DATA_POINTERS]; /* Image representation view (reference) */ + VkImageView out[AV_NUM_DATA_POINTERS]; /* Image representation view (output-only) */ + VkImageView dst[AV_NUM_DATA_POINTERS]; /* Set to img_view_out if no layered refs are used */ + VkImageAspectFlags aspect[AV_NUM_DATA_POINTERS]; /* Image plane mask bits */ + VkImageAspectFlags aspect_ref[AV_NUM_DATA_POINTERS]; /* Only used for out-of-place decoding */ + } view; + + VkSemaphore sem; + uint64_t sem_value; + + /* Current picture */ + VkVideoPictureResourceInfoKHR ref; + VkVideoReferenceSlotInfoKHR ref_slot; + + /* Picture refs. H264 has the maximum number of refs (36) of any supported codec. */ + VkVideoPictureResourceInfoKHR refs [36]; + VkVideoReferenceSlotInfoKHR ref_slots[36]; + + /* Main decoding struct */ + VkVideoDecodeInfoKHR decode_info; + + /* Slice data */ + AVBufferRef *slices_buf; + size_t slices_size; + + /* Vulkan functions needed for destruction, as no other context is guaranteed to exist */ + PFN_vkWaitSemaphores wait_semaphores; + PFN_vkDestroyImageView destroy_image_view; + PFN_vkInvalidateMappedMemoryRanges invalidate_memory_ranges; +} FFVulkanDecodePicture; + +/** + * Initialize decoder. + */ +int ff_vk_decode_init(AVCodecContext *avctx); + +/** + * Synchronize the contexts between 2 threads. + */ +int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src); + +/** + * Initialize hw_frames_ctx with the parameters needed to decode the stream + * using the parameters from avctx. + * + * NOTE: if avctx->internal->hwaccel_priv_data exists, will partially initialize + * the context. + */ +int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); + +/** + * Removes current session parameters to recreate them + */ +int ff_vk_params_invalidate(AVCodecContext *avctx, int t, const uint8_t *b, uint32_t s); + +/** + * Prepare a frame, creates the image view, and sets up the dpb fields. + */ +int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + int alloc_dpb); + +/** + * Software-defined decoder version of ff_vk_decode_prepare_frame. + */ +int ff_vk_decode_prepare_frame_sdr(FFVulkanDecodeContext *dec, AVFrame *pic, + FFVulkanDecodePicture *vkpic, int is_current, + enum FFVkShaderRepFormat rep_fmt, int alloc_dpb); + +/** + * Add slice data to frame. + */ +int ff_vk_decode_add_slice(AVCodecContext *avctx, FFVulkanDecodePicture *vp, + const uint8_t *data, size_t size, int add_startcode, + uint32_t *nb_slices, const uint32_t **offsets); + +/** + * Decode a frame. + */ +int ff_vk_decode_frame(AVCodecContext *avctx, + AVFrame *pic, FFVulkanDecodePicture *vp, + AVFrame *rpic[], FFVulkanDecodePicture *rvkp[]); + +/** + * Free a frame and its state. + */ +void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture *vp); + +/** + * Get an FFVkBuffer suitable for decoding from. + */ +int ff_vk_get_decode_buffer(FFVulkanDecodeContext *ctx, AVBufferRef **buf, + void *create_pNext, size_t size); + +/** + * Create VkVideoSessionParametersKHR wrapped in an AVBufferRef. + */ +int ff_vk_decode_create_params(AVBufferRef **par_ref, void *logctx, FFVulkanDecodeShared *ctx, + const VkVideoSessionParametersCreateInfoKHR *session_params_create); + +/** + * Flush decoder. + */ +void ff_vk_decode_flush(AVCodecContext *avctx); + +/** + * Free decoder. + */ +int ff_vk_decode_uninit(AVCodecContext *avctx); + +#endif /* AVCODEC_VULKAN_DECODE_H */ -- 2.49.1 From 4ad6fd88f38a083b0ba171521f01add728f21596 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:31:02 +0000 Subject: [PATCH 069/118] Changing vulkan file directory --- libavcodec/vulkan_encode.c | 1054 ------------------------------------ 1 file changed, 1054 deletions(-) delete mode 100644 libavcodec/vulkan_encode.c diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c deleted file mode 100644 index e5c0496f1c..0000000000 --- a/libavcodec/vulkan_encode.c +++ /dev/null @@ -1,1054 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/mem.h" -#include "libavutil/avassert.h" -#include "vulkan_encode.h" -#include "config.h" - -#include "libavutil/vulkan_loader.h" - -const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = { - HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), - NULL, -}; - -av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx) -{ - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &s->vkfn; - - /* Wait on and free execution pool */ - ff_vk_exec_pool_free(s, &ctx->enc_pool); - - /* Destroy the session params */ - if (ctx->session_params) - vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, - ctx->session_params, - s->hwctx->alloc); - - ff_hw_base_encode_close(&ctx->base); - - av_buffer_pool_uninit(&ctx->buf_pool); - - ff_vk_video_common_uninit(s, &ctx->common); - - ff_vk_uninit(s); -} - -static int vulkan_encode_init(AVCodecContext *avctx, FFHWBaseEncodePicture *pic) -{ - int err; - FFVulkanEncodeContext *ctx = avctx->priv_data; - FFVulkanEncodePicture *vp = pic->priv; - - AVFrame *f = pic->input_image; - AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; - AVVulkanFramesContext *vkfc = hwfc->hwctx; - AVVkFrame *vkf = (AVVkFrame *)f->data[0]; - - if (ctx->codec->picture_priv_data_size > 0) { - pic->codec_priv = av_mallocz(ctx->codec->picture_priv_data_size); - if (!pic->codec_priv) - return AVERROR(ENOMEM); - } - - /* Input image view */ - err = ff_vk_create_view(&ctx->s, &ctx->common, - &vp->in.view, &vp->in.aspect, - vkf, vkfc->format[0], 0); - if (err < 0) - return err; - - /* Reference view */ - if (!ctx->common.layered_dpb) { - AVFrame *rf = pic->recon_image; - AVVkFrame *rvkf = (AVVkFrame *)rf->data[0]; - err = ff_vk_create_view(&ctx->s, &ctx->common, - &vp->dpb.view, &vp->dpb.aspect, - rvkf, ctx->pic_format, 1); - if (err < 0) - return err; - } else { - vp->dpb.view = ctx->common.layered_view; - vp->dpb.aspect = ctx->common.layered_aspect; - } - - return 0; -} - -static int vulkan_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic) -{ - FFVulkanEncodeContext *ctx = avctx->priv_data; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - FFVulkanEncodePicture *vp = pic->priv; - - if (vp->in.view) - vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view, - ctx->s.hwctx->alloc); - - if (!ctx->common.layered_dpb && vp->dpb.view) - vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view, - ctx->s.hwctx->alloc); - - ctx->slots[vp->dpb_slot.slotIndex] = NULL; - - return 0; -} - -static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeRateControlInfoKHR *rc_info, - VkVideoEncodeRateControlLayerInfoKHR *rc_layer /* Goes in ^ */) -{ - FFVulkanEncodeContext *ctx = avctx->priv_data; - - *rc_info = (VkVideoEncodeRateControlInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR, - .rateControlMode = ctx->opts.rc_mode, - }; - - if (ctx->opts.rc_mode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { - *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR, - .averageBitrate = avctx->bit_rate, - .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : avctx->bit_rate, - .frameRateNumerator = avctx->framerate.num, - .frameRateDenominator = avctx->framerate.den, - }; - rc_info->layerCount++; - rc_info->pLayers = rc_layer; - } - - return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer); -} - -static int vulkan_encode_issue(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic) -{ - FFVulkanEncodeContext *ctx = avctx->priv_data; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment; - - FFVulkanEncodePicture *vp = base_pic->priv; - AVFrame *src = (AVFrame *)base_pic->input_image; - AVVkFrame *vkf = (AVVkFrame *)src->data[0]; - - int err, max_pkt_size; - - FFVkBuffer *sd_buf; - - int slot_index = -1; - FFVkExecContext *exec; - VkCommandBuffer cmd_buf; - VkImageMemoryBarrier2 img_bar[37]; - int nb_img_bar = 0; - - /* Coding start/end */ - VkVideoBeginCodingInfoKHR encode_start; - VkVideoEndCodingInfoKHR encode_end = { - .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, - }; - - VkVideoEncodeRateControlLayerInfoKHR rc_layer; - VkVideoEncodeRateControlInfoKHR rc_info; - VkVideoEncodeQualityLevelInfoKHR q_info; - VkVideoCodingControlInfoKHR encode_ctrl; - - VkVideoReferenceSlotInfoKHR ref_slot[37]; - VkVideoEncodeInfoKHR encode_info; - - /* Create packet data buffer */ - max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx->base.surface_height + (1 << 16), - ctx->caps.minBitstreamBufferSizeAlignment); - - err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf, - VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR, - &ctx->profile_list, max_pkt_size, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT); - if (err < 0) - return err; - - sd_buf = (FFVkBuffer *)vp->pkt_buf->data; - - /* Setup rate control */ - err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer); - if (err < 0) - return err; - - q_info = (VkVideoEncodeQualityLevelInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR, - .pNext = &rc_info, - .qualityLevel = ctx->opts.quality, - }; - encode_ctrl = (VkVideoCodingControlInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, - .pNext = &q_info, - .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR | - VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR | - VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, - }; - - for (int i = 0; i < ctx->caps.maxDpbSlots; i++) { - if (ctx->slots[i] == NULL) { - slot_index = i; - ctx->slots[i] = base_pic; - break; - } - } - av_assert0(slot_index >= 0); - - /* Current picture's ref slot */ - vp->dpb_res = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .pNext = NULL, - .codedOffset = { 0 }, - .codedExtent = (VkExtent2D){ avctx->width, avctx->height }, - .baseArrayLayer = ctx->common.layered_dpb ? slot_index : 0, - .imageViewBinding = vp->dpb.view, - }; - - vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, - .pNext = NULL, // Set later - .slotIndex = slot_index, - .pPictureResource = &vp->dpb_res, - }; - - encode_info = (VkVideoEncodeInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR, - .pNext = NULL, // Set later - .flags = 0x0, - .srcPictureResource = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .pNext = NULL, - .codedOffset = { 0, 0 }, - .codedExtent = (VkExtent2D){ base_pic->input_image->width, - base_pic->input_image->height }, - .baseArrayLayer = 0, - .imageViewBinding = vp->in.view, - }, - .pSetupReferenceSlot = &vp->dpb_slot, - .referenceSlotCount = 0, - .pReferenceSlots = ref_slot, - .dstBuffer = sd_buf->buf, - .dstBufferOffset = 0, - .dstBufferRange = sd_buf->size, - .precedingExternallyEncodedBytes = 0, - }; - - for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) { - for (int j = 0; j < base_pic->nb_refs[i]; j++) { - FFHWBaseEncodePicture *ref = base_pic->refs[i][j]; - FFVulkanEncodePicture *rvp = ref->priv; - ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot; - } - } - - /* Calling vkCmdBeginVideoCodingKHR requires to declare all references - * being enabled upfront, including the current frame's output ref. */ - ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot; - ref_slot[encode_info.referenceSlotCount].slotIndex = -1; - - /* Setup picture parameters */ - err = ctx->codec->init_pic_params(avctx, base_pic, - &encode_info); - if (err < 0) - return err; - - encode_start = (VkVideoBeginCodingInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, - .pNext = !base_pic->force_idr ? &rc_info : NULL, - .videoSession = ctx->common.session, - .videoSessionParameters = ctx->session_params, - .referenceSlotCount = encode_info.referenceSlotCount + 1, - .pReferenceSlots = ref_slot, - }; - - /* Write header */ - if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) { - uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset; - size_t data_size = encode_info.dstBufferRange; - err = ctx->codec->write_sequence_headers(avctx, base_pic, hdr_dst, &data_size); - if (err < 0) - goto fail; - encode_info.dstBufferOffset += data_size; - encode_info.dstBufferRange -= data_size; - } - - /* Write extra units */ - if (ctx->codec->write_extra_headers) { - uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset; - size_t data_size = encode_info.dstBufferRange; - err = ctx->codec->write_extra_headers(avctx, base_pic, hdr_dst, &data_size); - if (err < 0) - goto fail; - encode_info.dstBufferOffset += data_size; - encode_info.dstBufferRange -= data_size; - } - - /* Align buffer offset to the required value with filler units */ - if (ctx->codec->write_filler) { - uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset; - size_t data_size = encode_info.dstBufferRange; - - uint32_t offset = encode_info.dstBufferOffset; - size_t offset_align = ctx->caps.minBitstreamBufferOffsetAlignment; - - uint32_t filler_data = FFALIGN(offset, offset_align) - offset; - - if (filler_data) { - while (filler_data < ctx->codec->filler_header_size) - filler_data += offset_align; - - filler_data -= ctx->codec->filler_header_size; - - err = ctx->codec->write_filler(avctx, filler_data, - hdr_dst, &data_size); - if (err < 0) - goto fail; - - encode_info.dstBufferOffset += data_size; - encode_info.dstBufferRange -= data_size; - } - } - - vp->slices_offset = encode_info.dstBufferOffset; - - /* Align buffer size to the nearest lower alignment requirement. */ - encode_info.dstBufferRange -= size_align; - encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange, - size_align); - - /* Start command buffer recording */ - exec = vp->exec = ff_vk_exec_get(&ctx->s, &ctx->enc_pool); - ff_vk_exec_start(&ctx->s, exec); - cmd_buf = exec->buf; - - /* Output packet buffer */ - err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1); - if (err < 0) - goto fail; - - /* Source image */ - err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); - if (err < 0) - goto fail; - - /* Source image layout conversion */ - img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, - .pNext = NULL, - .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - .srcAccessMask = vkf->access[0], - .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR, - .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR, - .oldLayout = vkf->layout[0], - .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR, - .srcQueueFamilyIndex = vkf->queue_family[0], - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = vkf->img[0], - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = vp->in.aspect, - .layerCount = 1, - .levelCount = 1, - }, - }; - ff_vk_exec_update_frame(&ctx->s, exec, src, - &img_bar[nb_img_bar], &nb_img_bar); - - if (!ctx->common.layered_dpb) { - /* Source image's ref slot. - * No need to do a layout conversion, since the frames which are allocated - * with a DPB usage are automatically converted. */ - err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic->recon_image, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); - if (err < 0) - return err; - - /* All references */ - for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) { - for (int j = 0; j < base_pic->nb_refs[i]; j++) { - FFHWBaseEncodePicture *ref = base_pic->refs[i][j]; - err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->recon_image, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); - if (err < 0) - return err; - } - } - } else { - err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame, - VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR, - VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); - if (err < 0) - return err; - } - - /* Change image layout */ - vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - - /* Start, use parameters */ - vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start); - - /* Send control data */ - if (!ctx->session_reset) { - vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl); - ctx->session_reset++; - } - - /* Encode */ - vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0); - vk->CmdEncodeVideoKHR(cmd_buf, &encode_info); - vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0); - - /* End encoding */ - vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end); - - /* End recording and submit for execution */ - err = ff_vk_exec_submit(&ctx->s, vp->exec); - if (err < 0) - goto fail; - - /* We don't need to keep the input image any longer, its already ref'd */ - av_frame_free(&base_pic->input_image); - - return 0; - -fail: - return err; -} - -static void vulkan_encode_wait(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic) -{ - FFVulkanEncodeContext *ctx = avctx->priv_data; - FFVulkanEncodePicture *vp = base_pic->priv; - - av_assert0(base_pic->encode_issued); - - if (base_pic->encode_complete) - return; - - ff_vk_exec_wait(&ctx->s, vp->exec); - base_pic->encode_complete = 1; -} - -static int vulkan_encode_output(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic, AVPacket *pkt) -{ - VkResult ret; - FFVulkanEncodePicture *vp = base_pic->priv; - FFVulkanEncodeContext *ctx = avctx->priv_data; - FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data; - uint32_t *query_data; - - vulkan_encode_wait(avctx, base_pic); - - ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void **)&query_data, 0); - if (ret == VK_NOT_READY) { - av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR(EINVAL); - } - - if (ret != VK_NOT_READY && ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) { - av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", query_data[2]); - return AVERROR_EXTERNAL; - } - - /* Invalidate buffer if needed */ - if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - FFVulkanFunctions *vk = &ctx->s.vkfn; - VkMappedMemoryRange invalidate_buf; - - int offs = vp->slices_offset; - /* If the non-coherent alignment is greater than the bitstream buffer - * offset's alignment, and the offs value is not aligned already, - * align it to the previous alignment point. */ - if (ctx->s.props.properties.limits.nonCoherentAtomSize > - ctx->caps.minBitstreamBufferOffsetAlignment && offs && - (FFALIGN(offs, ctx->s.props.properties.limits.nonCoherentAtomSize) != offs)) { - offs -= ctx->s.props.properties.limits.nonCoherentAtomSize; - offs = FFALIGN(FFMAX(offs, 0), ctx->s.props.properties.limits.nonCoherentAtomSize); - } - - invalidate_buf = (VkMappedMemoryRange) { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = sd_buf->mem, - .offset = offs, - .size = VK_WHOLE_SIZE, - }; - - vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf); - } - - pkt->data = sd_buf->mapped_mem; - pkt->size = vp->slices_offset + /* base offset */ - query_data[0] /* secondary offset */ + - query_data[1] /* size */; - - /* Move reference */ - pkt->buf = vp->pkt_buf; - vp->pkt_buf = NULL; - - av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n", - base_pic->display_order, base_pic->encode_order); - - return ff_hw_base_encode_set_output_property(&ctx->base, avctx, - base_pic, pkt, - ctx->codec->flags & VK_ENC_FLAG_NO_DELAY); -} - -static const FFHWEncodePictureOperation vulkan_base_encode_ops = { - .priv_size = sizeof(FFVulkanEncodePicture), - .init = &vulkan_encode_init, - .issue = &vulkan_encode_issue, - .output = &vulkan_encode_output, - .free = &vulkan_encode_free, -}; - -int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt) -{ - FFVulkanEncodeContext *ctx = avctx->priv_data; - return ff_hw_base_encode_receive_packet(&ctx->base, avctx, pkt); -} - -static int vulkan_encode_create_dpb(AVCodecContext *avctx, FFVulkanEncodeContext *ctx) -{ - int err; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - AVVulkanFramesContext *hwfc; - - enum AVPixelFormat dpb_format; - err = ff_hw_base_get_recon_format(base_ctx, NULL, &dpb_format); - if (err < 0) - return err; - - base_ctx->recon_frames_ref = av_hwframe_ctx_alloc(base_ctx->device_ref); - if (!base_ctx->recon_frames_ref) - return AVERROR(ENOMEM); - - base_ctx->recon_frames = (AVHWFramesContext *)base_ctx->recon_frames_ref->data; - hwfc = (AVVulkanFramesContext *)base_ctx->recon_frames->hwctx; - - base_ctx->recon_frames->format = AV_PIX_FMT_VULKAN; - base_ctx->recon_frames->sw_format = dpb_format; - base_ctx->recon_frames->width = avctx->width; - base_ctx->recon_frames->height = avctx->height; - - hwfc->format[0] = ctx->pic_format; - hwfc->create_pnext = &ctx->profile_list; - hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; - hwfc->usage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR; - - if (ctx->common.layered_dpb) - hwfc->nb_layers = ctx->caps.maxDpbSlots; - - err = av_hwframe_ctx_init(base_ctx->recon_frames_ref); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to initialise DPB frame context: %s\n", - av_err2str(err)); - return err; - } - - if (ctx->common.layered_dpb) { - ctx->common.layered_frame = av_frame_alloc(); - if (!ctx->common.layered_frame) - return AVERROR(ENOMEM); - - err = av_hwframe_get_buffer(base_ctx->recon_frames_ref, - ctx->common.layered_frame, 0); - if (err < 0) - return AVERROR(ENOMEM); - - err = ff_vk_create_view(&ctx->s, &ctx->common, - &ctx->common.layered_view, - &ctx->common.layered_aspect, - (AVVkFrame *)ctx->common.layered_frame->data[0], - hwfc->format[0], 1); - if (err < 0) - return err; - - av_buffer_unref(&base_ctx->recon_frames_ref); - } - - return 0; -} - -static av_cold int init_rc(AVCodecContext *avctx, FFVulkanEncodeContext *ctx) -{ - if (ctx->opts.qp) { - ctx->explicit_qp = ctx->opts.qp; - } else if (avctx->global_quality > 0) { - if (avctx->flags & AV_CODEC_FLAG_QSCALE) - ctx->explicit_qp = avctx->global_quality / FF_QP2LAMBDA; - else - ctx->explicit_qp = avctx->global_quality; - } - - if (ctx->opts.rc_mode == FF_VK_RC_MODE_AUTO) { - if (ctx->explicit_qp >= 0) { - ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; - } else if (avctx->global_quality > 0) { - if (avctx->flags & AV_CODEC_FLAG_QSCALE) - ctx->explicit_qp = avctx->global_quality / FF_QP2LAMBDA; - else - ctx->explicit_qp = avctx->global_quality; - ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; - } else if (avctx->bit_rate) { - if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR) - ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR; - else if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR) - ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR; - else - ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR; - } else { - ctx->explicit_qp = 18; - ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; - av_log(avctx, AV_LOG_WARNING, "No rate control settings specified, using fixed QP = %i\n", - ctx->explicit_qp); - } - } else if (ctx->opts.rc_mode != VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR && - !avctx->bit_rate) { - av_log(avctx, AV_LOG_WARNING, "No bitrate specified!\n"); - return AVERROR(EINVAL); - } - - if (ctx->opts.rc_mode && !(ctx->enc_caps.rateControlModes & ctx->opts.rc_mode)) { - static const char *rc_modes[] = { - [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR] = "default", - [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR] = "cqp", - [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR] = "cbr", - [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR] = "vbr", - }; - av_log(avctx, AV_LOG_ERROR, "Unsupported rate control mode %s, supported are:\n", - rc_modes[FFMIN(FF_ARRAY_ELEMS(rc_modes), ctx->opts.rc_mode)]); - av_log(avctx, AV_LOG_ERROR, " %s\n", rc_modes[0]); - for (int i = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; - i <= VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR; i <<= 1) { - if (!(ctx->enc_caps.rateControlModes & i)) - continue; - av_log(avctx, AV_LOG_ERROR, " %s\n", rc_modes[i]); - } - return AVERROR(ENOTSUP); - } - - return 0; -} - -av_cold int ff_vulkan_write_global_header(AVCodecContext *avctx, - FFVulkanEncodeContext *ctx) -{ - int err; - - /* Write extradata if needed */ - if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { - uint8_t data[4096]; - size_t data_len = sizeof(data); - - err = ctx->codec->write_sequence_headers(avctx, NULL, data, &data_len); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to write sequence header " - "for extradata: %d.\n", err); - return err; - } else { - avctx->extradata_size = data_len; - avctx->extradata = av_mallocz(avctx->extradata_size + - AV_INPUT_BUFFER_PADDING_SIZE); - if (!avctx->extradata) { - err = AVERROR(ENOMEM); - return err; - } - memcpy(avctx->extradata, data, avctx->extradata_size); - } - } - - return 0; -} - -av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, - const FFVulkanEncodeDescriptor *vk_desc, - const FFVulkanCodec *codec, - void *codec_caps, void *quality_pnext) -{ - int i, err; - VkResult ret; - FFVulkanFunctions *vk = &ctx->s.vkfn; - FFVulkanContext *s = &ctx->s; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - - const AVPixFmtDescriptor *desc; - - VkVideoFormatPropertiesKHR *ret_info; - uint32_t nb_out_fmts = 0; - - VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR quality_info; - - VkQueryPoolVideoEncodeFeedbackCreateInfoKHR query_create; - - VkVideoSessionCreateInfoKHR session_create = { - .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, - }; - VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, - .pNext = &ctx->profile_list, - }; - - if (!avctx->hw_frames_ctx) { - av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is " - "required to associate the encoding device.\n"); - return AVERROR(EINVAL); - } - - ctx->base.op = &vulkan_base_encode_ops; - ctx->codec = codec; - - s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); - s->frames = (AVHWFramesContext *)s->frames_ref->data; - s->hwfc = s->frames->hwctx; - - s->device = (AVHWDeviceContext *)s->frames->device_ref->data; - s->hwctx = s->device->hwctx; - - desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); - if (!desc) - return AVERROR(EINVAL); - - s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, - s->hwctx->nb_enabled_dev_extensions); - - if (!(s->extensions & FF_VK_EXT_VIDEO_ENCODE_QUEUE)) { - av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", - VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME); - return AVERROR(ENOSYS); - } else if (!(s->extensions & FF_VK_EXT_VIDEO_MAINTENANCE_1)) { - av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", - VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME); - return AVERROR(ENOSYS); - } else if (!(s->extensions & vk_desc->encode_extension)) { - av_log(avctx, AV_LOG_ERROR, "Device does not support encoding %s!\n", - avcodec_get_name(avctx->codec_id)); - return AVERROR(ENOSYS); - } - - /* Load functions */ - err = ff_vk_load_functions(s->device, vk, s->extensions, 1, 1); - if (err < 0) - return err; - - /* Create queue context */ - ctx->qf_enc = ff_vk_qf_find(s, VK_QUEUE_VIDEO_ENCODE_BIT_KHR, vk_desc->encode_op); - if (!ctx->qf_enc) { - av_log(avctx, AV_LOG_ERROR, "Encoding of %s is not supported by this device\n", - avcodec_get_name(avctx->codec_id)); - return err; - } - - /* Load all properties */ - err = ff_vk_load_props(s); - if (err < 0) - return err; - - /* Set tuning */ - ctx->usage_info = (VkVideoEncodeUsageInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_USAGE_INFO_KHR, - .videoUsageHints = ctx->opts.usage, - .videoContentHints = ctx->opts.content, - .tuningMode = ctx->opts.tune, - }; - - /* Load up the profile now, needed for caps and to create a query pool */ - ctx->profile.sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; - ctx->profile.pNext = &ctx->usage_info; - ctx->profile.videoCodecOperation = vk_desc->encode_op; - ctx->profile.chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); - ctx->profile.lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); - ctx->profile.chromaBitDepth = ctx->profile.lumaBitDepth; - - /* Setup a profile */ - err = codec->init_profile(avctx, &ctx->profile, &ctx->usage_info); - if (err < 0) - return err; - - ctx->profile_list.sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; - ctx->profile_list.profileCount = 1; - ctx->profile_list.pProfiles = &ctx->profile; - - /* Get the capabilities of the encoder for the given profile */ - ctx->enc_caps.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR; - ctx->enc_caps.pNext = codec_caps; - ctx->caps.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; - ctx->caps.pNext = &ctx->enc_caps; - - ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, - &ctx->profile, - &ctx->caps); - if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: " - "%s profile \"%s\" not supported!\n", - avcodec_get_name(avctx->codec_id), - avcodec_profile_name(avctx->codec_id, avctx->profile)); - return AVERROR(EINVAL); - } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: " - "format (%s) not supported!\n", - av_get_pix_fmt_name(avctx->sw_pix_fmt)); - return AVERROR(EINVAL); - } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || - ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { - return AVERROR(EINVAL); - } else if (ret != VK_SUCCESS) { - return AVERROR_EXTERNAL; - } - - err = init_rc(avctx, ctx); - if (err < 0) - return err; - - /* Create command and query pool */ - query_create = (VkQueryPoolVideoEncodeFeedbackCreateInfoKHR) { - .sType = VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR, - .pNext = &ctx->profile, - .encodeFeedbackFlags = ctx->enc_caps.supportedEncodeFeedbackFlags & - (~VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_HAS_OVERRIDES_BIT_KHR), - }; - err = ff_vk_exec_pool_init(s, ctx->qf_enc, &ctx->enc_pool, base_ctx->async_depth, - 1, VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR, 0, - &query_create); - if (err < 0) - return err; - - if (ctx->opts.quality > ctx->enc_caps.maxQualityLevels) { - av_log(avctx, AV_LOG_ERROR, "Invalid quality level %i: allowed range is " - "0 to %i\n", - ctx->opts.quality, ctx->enc_caps.maxQualityLevels); - return AVERROR(EINVAL); - } - - /* Get quality properties for the profile and quality level */ - quality_info = (VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR) { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR, - .pVideoProfile = &ctx->profile, - .qualityLevel = ctx->opts.quality, - }; - ctx->quality_props = (VkVideoEncodeQualityLevelPropertiesKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_PROPERTIES_KHR, - .pNext = quality_pnext, - }; - ret = vk->GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR(s->hwctx->phys_dev, - &quality_info, - &ctx->quality_props); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; - - /* Printout informative properties */ - av_log(avctx, AV_LOG_VERBOSE, "Encoder capabilities for %s profile \"%s\":\n", - avcodec_get_name(avctx->codec_id), - avcodec_profile_name(avctx->codec_id, avctx->profile)); - av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", - ctx->caps.minCodedExtent.width, ctx->caps.maxCodedExtent.width); - av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", - ctx->caps.minCodedExtent.height, ctx->caps.maxCodedExtent.height); - av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", - ctx->caps.pictureAccessGranularity.width); - av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", - ctx->caps.pictureAccessGranularity.height); - av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", - ctx->caps.minBitstreamBufferOffsetAlignment); - av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", - ctx->caps.minBitstreamBufferSizeAlignment); - av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", - ctx->caps.maxDpbSlots); - av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", - ctx->caps.maxActiveReferencePictures); - av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", - CODEC_VER(ctx->caps.stdHeaderVersion.specVersion), - CODEC_VER(vk_desc->ext_props.specVersion)); - av_log(avctx, AV_LOG_VERBOSE, " Encoder max quality: %i\n", - ctx->enc_caps.maxQualityLevels); - av_log(avctx, AV_LOG_VERBOSE, " Encoder image width alignment: %i\n", - ctx->enc_caps.encodeInputPictureGranularity.width); - av_log(avctx, AV_LOG_VERBOSE, " Encoder image height alignment: %i\n", - ctx->enc_caps.encodeInputPictureGranularity.height); - av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", - ctx->caps.flags ? "" : - " none", - ctx->caps.flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? - " protected" : "", - ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? - " separate_references" : ""); - - /* Setup width/height alignment */ - base_ctx->surface_width = avctx->coded_width = - FFALIGN(avctx->width, ctx->enc_caps.encodeInputPictureGranularity.width); - base_ctx->surface_height = avctx->coded_height = - FFALIGN(avctx->height, ctx->enc_caps.encodeInputPictureGranularity.height); - - /* Setup slice width/height */ - base_ctx->slice_block_width = ctx->enc_caps.encodeInputPictureGranularity.width; - base_ctx->slice_block_height = ctx->enc_caps.encodeInputPictureGranularity.height; - - /* Check if encoding is possible with the given parameters */ - if (avctx->coded_width < ctx->caps.minCodedExtent.width || - avctx->coded_height < ctx->caps.minCodedExtent.height || - avctx->coded_width > ctx->caps.maxCodedExtent.width || - avctx->coded_height > ctx->caps.maxCodedExtent.height) { - av_log(avctx, AV_LOG_ERROR, "Input of %ix%i too large for encoder limits: %ix%i max\n", - avctx->coded_width, avctx->coded_height, - ctx->caps.minCodedExtent.width, ctx->caps.minCodedExtent.height); - return AVERROR(EINVAL); - } - - fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR; - - ctx->common.layered_dpb = !(ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); - - /* Get the supported image formats */ - ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev, - &fmt_info, - &nb_out_fmts, NULL); - if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || - (!nb_out_fmts && ret == VK_SUCCESS)) { - return AVERROR(EINVAL); - } else if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); - if (!ret_info) - return AVERROR(ENOMEM); - - for (int i = 0; i < nb_out_fmts; i++) - ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; - - ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev, - &fmt_info, - &nb_out_fmts, ret_info); - if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || - (!nb_out_fmts && ret == VK_SUCCESS)) { - av_free(ret_info); - return AVERROR(EINVAL); - } else if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", - ff_vk_ret2str(ret)); - av_free(ret_info); - return AVERROR_EXTERNAL; - } - - av_log(avctx, AV_LOG_VERBOSE, "Supported input formats:\n"); - for (i = 0; i < nb_out_fmts; i++) - av_log(avctx, AV_LOG_VERBOSE, " %i: %i\n", i, ret_info[i].format); - - for (i = 0; i < nb_out_fmts; i++) { - if (ff_vk_pix_fmt_from_vkfmt(ret_info[i].format) == s->frames->sw_format) { - ctx->pic_format = ret_info[i].format; - break; - } - } - - av_free(ret_info); - - if (i == nb_out_fmts) { - av_log(avctx, AV_LOG_ERROR, "Pixel format %s of input frames not supported!\n", - av_get_pix_fmt_name(s->frames->sw_format)); - return AVERROR(EINVAL); - } - - /* Create session */ - session_create.pVideoProfile = &ctx->profile; - session_create.flags = 0x0; - session_create.queueFamilyIndex = ctx->qf_enc->idx; - session_create.maxCodedExtent = ctx->caps.maxCodedExtent; - session_create.maxDpbSlots = ctx->caps.maxDpbSlots; - session_create.maxActiveReferencePictures = ctx->caps.maxActiveReferencePictures; - session_create.pictureFormat = ctx->pic_format; - session_create.referencePictureFormat = session_create.pictureFormat; - session_create.pStdHeaderVersion = &vk_desc->ext_props; - - err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); - if (err < 0) - return err; - - err = ff_hw_base_encode_init(avctx, &ctx->base); - if (err < 0) - return err; - - err = vulkan_encode_create_dpb(avctx, ctx); - if (err < 0) - return err; - - base_ctx->async_encode = 1; - base_ctx->encode_fifo = av_fifo_alloc2(base_ctx->async_depth, - sizeof(FFVulkanEncodePicture *), 0); - if (!base_ctx->encode_fifo) - return AVERROR(ENOMEM); - - return 0; -} - -int ff_vulkan_encode_create_session_params(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, - void *codec_params_pnext) -{ - VkResult ret; - FFVulkanFunctions *vk = &ctx->s.vkfn; - FFVulkanContext *s = &ctx->s; - - VkVideoEncodeQualityLevelInfoKHR q_info; - VkVideoSessionParametersCreateInfoKHR session_params_create; - - q_info = (VkVideoEncodeQualityLevelInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR, - .pNext = codec_params_pnext, - .qualityLevel = ctx->opts.quality, - }; - session_params_create = (VkVideoSessionParametersCreateInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pNext = &q_info, - .videoSession = ctx->common.session, - .videoSessionParametersTemplate = VK_NULL_HANDLE, - }; - - /* Create session parameters */ - ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, - s->hwctx->alloc, &ctx->session_params); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - return 0; -} -- 2.49.1 From e99f7e3241aab66af067a3eb3dc8125d1f63c163 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:31:28 +0000 Subject: [PATCH 070/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_encode.c | 1054 +++++++++++++++++++++++++++++ 1 file changed, 1054 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_encode.c diff --git a/libavcodec/vulkan/vulkan_encode.c b/libavcodec/vulkan/vulkan_encode.c new file mode 100644 index 0000000000..849504131c --- /dev/null +++ b/libavcodec/vulkan/vulkan_encode.c @@ -0,0 +1,1054 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" +#include "libavutil/avassert.h" +#include "vulkan_encode.h" +#include "libavutil/config.h" + +#include "../../libavutil/vulkan/vulkan_loader.h" + +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = { + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), + NULL, +}; + +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx) +{ + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &s->vkfn; + + /* Wait on and free execution pool */ + ff_vk_exec_pool_free(s, &ctx->enc_pool); + + /* Destroy the session params */ + if (ctx->session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->session_params, + s->hwctx->alloc); + + ff_hw_base_encode_close(&ctx->base); + + av_buffer_pool_uninit(&ctx->buf_pool); + + ff_vk_video_common_uninit(s, &ctx->common); + + ff_vk_uninit(s); +} + +static int vulkan_encode_init(AVCodecContext *avctx, FFHWBaseEncodePicture *pic) +{ + int err; + FFVulkanEncodeContext *ctx = avctx->priv_data; + FFVulkanEncodePicture *vp = pic->priv; + + AVFrame *f = pic->input_image; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + if (ctx->codec->picture_priv_data_size > 0) { + pic->codec_priv = av_mallocz(ctx->codec->picture_priv_data_size); + if (!pic->codec_priv) + return AVERROR(ENOMEM); + } + + /* Input image view */ + err = ff_vk_create_view(&ctx->s, &ctx->common, + &vp->in.view, &vp->in.aspect, + vkf, vkfc->format[0], 0); + if (err < 0) + return err; + + /* Reference view */ + if (!ctx->common.layered_dpb) { + AVFrame *rf = pic->recon_image; + AVVkFrame *rvkf = (AVVkFrame *)rf->data[0]; + err = ff_vk_create_view(&ctx->s, &ctx->common, + &vp->dpb.view, &vp->dpb.aspect, + rvkf, ctx->pic_format, 1); + if (err < 0) + return err; + } else { + vp->dpb.view = ctx->common.layered_view; + vp->dpb.aspect = ctx->common.layered_aspect; + } + + return 0; +} + +static int vulkan_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic) +{ + FFVulkanEncodeContext *ctx = avctx->priv_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + FFVulkanEncodePicture *vp = pic->priv; + + if (vp->in.view) + vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view, + ctx->s.hwctx->alloc); + + if (!ctx->common.layered_dpb && vp->dpb.view) + vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view, + ctx->s.hwctx->alloc); + + ctx->slots[vp->dpb_slot.slotIndex] = NULL; + + return 0; +} + +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeRateControlInfoKHR *rc_info, + VkVideoEncodeRateControlLayerInfoKHR *rc_layer /* Goes in ^ */) +{ + FFVulkanEncodeContext *ctx = avctx->priv_data; + + *rc_info = (VkVideoEncodeRateControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR, + .rateControlMode = ctx->opts.rc_mode, + }; + + if (ctx->opts.rc_mode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { + *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR, + .averageBitrate = avctx->bit_rate, + .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : avctx->bit_rate, + .frameRateNumerator = avctx->framerate.num, + .frameRateDenominator = avctx->framerate.den, + }; + rc_info->layerCount++; + rc_info->pLayers = rc_layer; + } + + return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer); +} + +static int vulkan_encode_issue(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic) +{ + FFVulkanEncodeContext *ctx = avctx->priv_data; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment; + + FFVulkanEncodePicture *vp = base_pic->priv; + AVFrame *src = (AVFrame *)base_pic->input_image; + AVVkFrame *vkf = (AVVkFrame *)src->data[0]; + + int err, max_pkt_size; + + FFVkBuffer *sd_buf; + + int slot_index = -1; + FFVkExecContext *exec; + VkCommandBuffer cmd_buf; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Coding start/end */ + VkVideoBeginCodingInfoKHR encode_start; + VkVideoEndCodingInfoKHR encode_end = { + .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR, + }; + + VkVideoEncodeRateControlLayerInfoKHR rc_layer; + VkVideoEncodeRateControlInfoKHR rc_info; + VkVideoEncodeQualityLevelInfoKHR q_info; + VkVideoCodingControlInfoKHR encode_ctrl; + + VkVideoReferenceSlotInfoKHR ref_slot[37]; + VkVideoEncodeInfoKHR encode_info; + + /* Create packet data buffer */ + max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx->base.surface_height + (1 << 16), + ctx->caps.minBitstreamBufferSizeAlignment); + + err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf, + VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR, + &ctx->profile_list, max_pkt_size, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + if (err < 0) + return err; + + sd_buf = (FFVkBuffer *)vp->pkt_buf->data; + + /* Setup rate control */ + err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer); + if (err < 0) + return err; + + q_info = (VkVideoEncodeQualityLevelInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR, + .pNext = &rc_info, + .qualityLevel = ctx->opts.quality, + }; + encode_ctrl = (VkVideoCodingControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR, + .pNext = &q_info, + .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR | + VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR | + VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR, + }; + + for (int i = 0; i < ctx->caps.maxDpbSlots; i++) { + if (ctx->slots[i] == NULL) { + slot_index = i; + ctx->slots[i] = base_pic; + break; + } + } + av_assert0(slot_index >= 0); + + /* Current picture's ref slot */ + vp->dpb_res = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .pNext = NULL, + .codedOffset = { 0 }, + .codedExtent = (VkExtent2D){ avctx->width, avctx->height }, + .baseArrayLayer = ctx->common.layered_dpb ? slot_index : 0, + .imageViewBinding = vp->dpb.view, + }; + + vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = NULL, // Set later + .slotIndex = slot_index, + .pPictureResource = &vp->dpb_res, + }; + + encode_info = (VkVideoEncodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR, + .pNext = NULL, // Set later + .flags = 0x0, + .srcPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .pNext = NULL, + .codedOffset = { 0, 0 }, + .codedExtent = (VkExtent2D){ base_pic->input_image->width, + base_pic->input_image->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->in.view, + }, + .pSetupReferenceSlot = &vp->dpb_slot, + .referenceSlotCount = 0, + .pReferenceSlots = ref_slot, + .dstBuffer = sd_buf->buf, + .dstBufferOffset = 0, + .dstBufferRange = sd_buf->size, + .precedingExternallyEncodedBytes = 0, + }; + + for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) { + for (int j = 0; j < base_pic->nb_refs[i]; j++) { + FFHWBaseEncodePicture *ref = base_pic->refs[i][j]; + FFVulkanEncodePicture *rvp = ref->priv; + ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot; + } + } + + /* Calling vkCmdBeginVideoCodingKHR requires to declare all references + * being enabled upfront, including the current frame's output ref. */ + ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot; + ref_slot[encode_info.referenceSlotCount].slotIndex = -1; + + /* Setup picture parameters */ + err = ctx->codec->init_pic_params(avctx, base_pic, + &encode_info); + if (err < 0) + return err; + + encode_start = (VkVideoBeginCodingInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR, + .pNext = !base_pic->force_idr ? &rc_info : NULL, + .videoSession = ctx->common.session, + .videoSessionParameters = ctx->session_params, + .referenceSlotCount = encode_info.referenceSlotCount + 1, + .pReferenceSlots = ref_slot, + }; + + /* Write header */ + if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) { + uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + err = ctx->codec->write_sequence_headers(avctx, base_pic, hdr_dst, &data_size); + if (err < 0) + goto fail; + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + + /* Write extra units */ + if (ctx->codec->write_extra_headers) { + uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + err = ctx->codec->write_extra_headers(avctx, base_pic, hdr_dst, &data_size); + if (err < 0) + goto fail; + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + + /* Align buffer offset to the required value with filler units */ + if (ctx->codec->write_filler) { + uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset; + size_t data_size = encode_info.dstBufferRange; + + uint32_t offset = encode_info.dstBufferOffset; + size_t offset_align = ctx->caps.minBitstreamBufferOffsetAlignment; + + uint32_t filler_data = FFALIGN(offset, offset_align) - offset; + + if (filler_data) { + while (filler_data < ctx->codec->filler_header_size) + filler_data += offset_align; + + filler_data -= ctx->codec->filler_header_size; + + err = ctx->codec->write_filler(avctx, filler_data, + hdr_dst, &data_size); + if (err < 0) + goto fail; + + encode_info.dstBufferOffset += data_size; + encode_info.dstBufferRange -= data_size; + } + } + + vp->slices_offset = encode_info.dstBufferOffset; + + /* Align buffer size to the nearest lower alignment requirement. */ + encode_info.dstBufferRange -= size_align; + encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange, + size_align); + + /* Start command buffer recording */ + exec = vp->exec = ff_vk_exec_get(&ctx->s, &ctx->enc_pool); + ff_vk_exec_start(&ctx->s, exec); + cmd_buf = exec->buf; + + /* Output packet buffer */ + err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1); + if (err < 0) + goto fail; + + /* Source image */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + goto fail; + + /* Source image layout conversion */ + img_bar[nb_img_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + .srcAccessMask = vkf->access[0], + .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR, + .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR, + .oldLayout = vkf->layout[0], + .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR, + .srcQueueFamilyIndex = vkf->queue_family[0], + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = vkf->img[0], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = vp->in.aspect, + .layerCount = 1, + .levelCount = 1, + }, + }; + ff_vk_exec_update_frame(&ctx->s, exec, src, + &img_bar[nb_img_bar], &nb_img_bar); + + if (!ctx->common.layered_dpb) { + /* Source image's ref slot. + * No need to do a layout conversion, since the frames which are allocated + * with a DPB usage are automatically converted. */ + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic->recon_image, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + return err; + + /* All references */ + for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) { + for (int j = 0; j < base_pic->nb_refs[i]; j++) { + FFHWBaseEncodePicture *ref = base_pic->refs[i][j]; + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->recon_image, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + return err; + } + } + } else { + err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR, + VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR); + if (err < 0) + return err; + } + + /* Change image layout */ + vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Start, use parameters */ + vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start); + + /* Send control data */ + if (!ctx->session_reset) { + vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl); + ctx->session_reset++; + } + + /* Encode */ + vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0); + vk->CmdEncodeVideoKHR(cmd_buf, &encode_info); + vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0); + + /* End encoding */ + vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end); + + /* End recording and submit for execution */ + err = ff_vk_exec_submit(&ctx->s, vp->exec); + if (err < 0) + goto fail; + + /* We don't need to keep the input image any longer, its already ref'd */ + av_frame_free(&base_pic->input_image); + + return 0; + +fail: + return err; +} + +static void vulkan_encode_wait(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic) +{ + FFVulkanEncodeContext *ctx = avctx->priv_data; + FFVulkanEncodePicture *vp = base_pic->priv; + + av_assert0(base_pic->encode_issued); + + if (base_pic->encode_complete) + return; + + ff_vk_exec_wait(&ctx->s, vp->exec); + base_pic->encode_complete = 1; +} + +static int vulkan_encode_output(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic, AVPacket *pkt) +{ + VkResult ret; + FFVulkanEncodePicture *vp = base_pic->priv; + FFVulkanEncodeContext *ctx = avctx->priv_data; + FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data; + uint32_t *query_data; + + vulkan_encode_wait(avctx, base_pic); + + ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void **)&query_data, 0); + if (ret == VK_NOT_READY) { + av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR(EINVAL); + } + + if (ret != VK_NOT_READY && ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) { + av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", query_data[2]); + return AVERROR_EXTERNAL; + } + + /* Invalidate buffer if needed */ + if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkMappedMemoryRange invalidate_buf; + + int offs = vp->slices_offset; + /* If the non-coherent alignment is greater than the bitstream buffer + * offset's alignment, and the offs value is not aligned already, + * align it to the previous alignment point. */ + if (ctx->s.props.properties.limits.nonCoherentAtomSize > + ctx->caps.minBitstreamBufferOffsetAlignment && offs && + (FFALIGN(offs, ctx->s.props.properties.limits.nonCoherentAtomSize) != offs)) { + offs -= ctx->s.props.properties.limits.nonCoherentAtomSize; + offs = FFALIGN(FFMAX(offs, 0), ctx->s.props.properties.limits.nonCoherentAtomSize); + } + + invalidate_buf = (VkMappedMemoryRange) { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = sd_buf->mem, + .offset = offs, + .size = VK_WHOLE_SIZE, + }; + + vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf); + } + + pkt->data = sd_buf->mapped_mem; + pkt->size = vp->slices_offset + /* base offset */ + query_data[0] /* secondary offset */ + + query_data[1] /* size */; + + /* Move reference */ + pkt->buf = vp->pkt_buf; + vp->pkt_buf = NULL; + + av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n", + base_pic->display_order, base_pic->encode_order); + + return ff_hw_base_encode_set_output_property(&ctx->base, avctx, + base_pic, pkt, + ctx->codec->flags & VK_ENC_FLAG_NO_DELAY); +} + +static const FFHWEncodePictureOperation vulkan_base_encode_ops = { + .priv_size = sizeof(FFVulkanEncodePicture), + .init = &vulkan_encode_init, + .issue = &vulkan_encode_issue, + .output = &vulkan_encode_output, + .free = &vulkan_encode_free, +}; + +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt) +{ + FFVulkanEncodeContext *ctx = avctx->priv_data; + return ff_hw_base_encode_receive_packet(&ctx->base, avctx, pkt); +} + +static int vulkan_encode_create_dpb(AVCodecContext *avctx, FFVulkanEncodeContext *ctx) +{ + int err; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + AVVulkanFramesContext *hwfc; + + enum AVPixelFormat dpb_format; + err = ff_hw_base_get_recon_format(base_ctx, NULL, &dpb_format); + if (err < 0) + return err; + + base_ctx->recon_frames_ref = av_hwframe_ctx_alloc(base_ctx->device_ref); + if (!base_ctx->recon_frames_ref) + return AVERROR(ENOMEM); + + base_ctx->recon_frames = (AVHWFramesContext *)base_ctx->recon_frames_ref->data; + hwfc = (AVVulkanFramesContext *)base_ctx->recon_frames->hwctx; + + base_ctx->recon_frames->format = AV_PIX_FMT_VULKAN; + base_ctx->recon_frames->sw_format = dpb_format; + base_ctx->recon_frames->width = avctx->width; + base_ctx->recon_frames->height = avctx->height; + + hwfc->format[0] = ctx->pic_format; + hwfc->create_pnext = &ctx->profile_list; + hwfc->tiling = VK_IMAGE_TILING_OPTIMAL; + hwfc->usage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR; + + if (ctx->common.layered_dpb) + hwfc->nb_layers = ctx->caps.maxDpbSlots; + + err = av_hwframe_ctx_init(base_ctx->recon_frames_ref); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to initialise DPB frame context: %s\n", + av_err2str(err)); + return err; + } + + if (ctx->common.layered_dpb) { + ctx->common.layered_frame = av_frame_alloc(); + if (!ctx->common.layered_frame) + return AVERROR(ENOMEM); + + err = av_hwframe_get_buffer(base_ctx->recon_frames_ref, + ctx->common.layered_frame, 0); + if (err < 0) + return AVERROR(ENOMEM); + + err = ff_vk_create_view(&ctx->s, &ctx->common, + &ctx->common.layered_view, + &ctx->common.layered_aspect, + (AVVkFrame *)ctx->common.layered_frame->data[0], + hwfc->format[0], 1); + if (err < 0) + return err; + + av_buffer_unref(&base_ctx->recon_frames_ref); + } + + return 0; +} + +static av_cold int init_rc(AVCodecContext *avctx, FFVulkanEncodeContext *ctx) +{ + if (ctx->opts.qp) { + ctx->explicit_qp = ctx->opts.qp; + } else if (avctx->global_quality > 0) { + if (avctx->flags & AV_CODEC_FLAG_QSCALE) + ctx->explicit_qp = avctx->global_quality / FF_QP2LAMBDA; + else + ctx->explicit_qp = avctx->global_quality; + } + + if (ctx->opts.rc_mode == FF_VK_RC_MODE_AUTO) { + if (ctx->explicit_qp >= 0) { + ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; + } else if (avctx->global_quality > 0) { + if (avctx->flags & AV_CODEC_FLAG_QSCALE) + ctx->explicit_qp = avctx->global_quality / FF_QP2LAMBDA; + else + ctx->explicit_qp = avctx->global_quality; + ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; + } else if (avctx->bit_rate) { + if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR) + ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR; + else if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR) + ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR; + else + ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR; + } else { + ctx->explicit_qp = 18; + ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; + av_log(avctx, AV_LOG_WARNING, "No rate control settings specified, using fixed QP = %i\n", + ctx->explicit_qp); + } + } else if (ctx->opts.rc_mode != VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR && + !avctx->bit_rate) { + av_log(avctx, AV_LOG_WARNING, "No bitrate specified!\n"); + return AVERROR(EINVAL); + } + + if (ctx->opts.rc_mode && !(ctx->enc_caps.rateControlModes & ctx->opts.rc_mode)) { + static const char *rc_modes[] = { + [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR] = "default", + [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR] = "cqp", + [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR] = "cbr", + [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR] = "vbr", + }; + av_log(avctx, AV_LOG_ERROR, "Unsupported rate control mode %s, supported are:\n", + rc_modes[FFMIN(FF_ARRAY_ELEMS(rc_modes), ctx->opts.rc_mode)]); + av_log(avctx, AV_LOG_ERROR, " %s\n", rc_modes[0]); + for (int i = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; + i <= VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR; i <<= 1) { + if (!(ctx->enc_caps.rateControlModes & i)) + continue; + av_log(avctx, AV_LOG_ERROR, " %s\n", rc_modes[i]); + } + return AVERROR(ENOTSUP); + } + + return 0; +} + +av_cold int ff_vulkan_write_global_header(AVCodecContext *avctx, + FFVulkanEncodeContext *ctx) +{ + int err; + + /* Write extradata if needed */ + if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { + uint8_t data[4096]; + size_t data_len = sizeof(data); + + err = ctx->codec->write_sequence_headers(avctx, NULL, data, &data_len); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write sequence header " + "for extradata: %d.\n", err); + return err; + } else { + avctx->extradata_size = data_len; + avctx->extradata = av_mallocz(avctx->extradata_size + + AV_INPUT_BUFFER_PADDING_SIZE); + if (!avctx->extradata) { + err = AVERROR(ENOMEM); + return err; + } + memcpy(avctx->extradata, data, avctx->extradata_size); + } + } + + return 0; +} + +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + const FFVulkanEncodeDescriptor *vk_desc, + const FFVulkanCodec *codec, + void *codec_caps, void *quality_pnext) +{ + int i, err; + VkResult ret; + FFVulkanFunctions *vk = &ctx->s.vkfn; + FFVulkanContext *s = &ctx->s; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + + const AVPixFmtDescriptor *desc; + + VkVideoFormatPropertiesKHR *ret_info; + uint32_t nb_out_fmts = 0; + + VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR quality_info; + + VkQueryPoolVideoEncodeFeedbackCreateInfoKHR query_create; + + VkVideoSessionCreateInfoKHR session_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR, + }; + VkPhysicalDeviceVideoFormatInfoKHR fmt_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR, + .pNext = &ctx->profile_list, + }; + + if (!avctx->hw_frames_ctx) { + av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is " + "required to associate the encoding device.\n"); + return AVERROR(EINVAL); + } + + ctx->base.op = &vulkan_base_encode_ops; + ctx->codec = codec; + + s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); + s->frames = (AVHWFramesContext *)s->frames_ref->data; + s->hwfc = s->frames->hwctx; + + s->device = (AVHWDeviceContext *)s->frames->device_ref->data; + s->hwctx = s->device->hwctx; + + desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + if (!desc) + return AVERROR(EINVAL); + + s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, + s->hwctx->nb_enabled_dev_extensions); + + if (!(s->extensions & FF_VK_EXT_VIDEO_ENCODE_QUEUE)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME); + return AVERROR(ENOSYS); + } else if (!(s->extensions & FF_VK_EXT_VIDEO_MAINTENANCE_1)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n", + VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME); + return AVERROR(ENOSYS); + } else if (!(s->extensions & vk_desc->encode_extension)) { + av_log(avctx, AV_LOG_ERROR, "Device does not support encoding %s!\n", + avcodec_get_name(avctx->codec_id)); + return AVERROR(ENOSYS); + } + + /* Load functions */ + err = ff_vk_load_functions(s->device, vk, s->extensions, 1, 1); + if (err < 0) + return err; + + /* Create queue context */ + ctx->qf_enc = ff_vk_qf_find(s, VK_QUEUE_VIDEO_ENCODE_BIT_KHR, vk_desc->encode_op); + if (!ctx->qf_enc) { + av_log(avctx, AV_LOG_ERROR, "Encoding of %s is not supported by this device\n", + avcodec_get_name(avctx->codec_id)); + return err; + } + + /* Load all properties */ + err = ff_vk_load_props(s); + if (err < 0) + return err; + + /* Set tuning */ + ctx->usage_info = (VkVideoEncodeUsageInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_USAGE_INFO_KHR, + .videoUsageHints = ctx->opts.usage, + .videoContentHints = ctx->opts.content, + .tuningMode = ctx->opts.tune, + }; + + /* Load up the profile now, needed for caps and to create a query pool */ + ctx->profile.sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR; + ctx->profile.pNext = &ctx->usage_info; + ctx->profile.videoCodecOperation = vk_desc->encode_op; + ctx->profile.chromaSubsampling = ff_vk_subsampling_from_av_desc(desc); + ctx->profile.lumaBitDepth = ff_vk_depth_from_av_depth(desc->comp[0].depth); + ctx->profile.chromaBitDepth = ctx->profile.lumaBitDepth; + + /* Setup a profile */ + err = codec->init_profile(avctx, &ctx->profile, &ctx->usage_info); + if (err < 0) + return err; + + ctx->profile_list.sType = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR; + ctx->profile_list.profileCount = 1; + ctx->profile_list.pProfiles = &ctx->profile; + + /* Get the capabilities of the encoder for the given profile */ + ctx->enc_caps.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR; + ctx->enc_caps.pNext = codec_caps; + ctx->caps.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR; + ctx->caps.pNext = &ctx->enc_caps; + + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, + &ctx->profile, + &ctx->caps); + if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: " + "%s profile \"%s\" not supported!\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: " + "format (%s) not supported!\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); + return AVERROR(EINVAL); + } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT || + ret == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + return AVERROR_EXTERNAL; + } + + err = init_rc(avctx, ctx); + if (err < 0) + return err; + + /* Create command and query pool */ + query_create = (VkQueryPoolVideoEncodeFeedbackCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR, + .pNext = &ctx->profile, + .encodeFeedbackFlags = ctx->enc_caps.supportedEncodeFeedbackFlags & + (~VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_HAS_OVERRIDES_BIT_KHR), + }; + err = ff_vk_exec_pool_init(s, ctx->qf_enc, &ctx->enc_pool, base_ctx->async_depth, + 1, VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR, 0, + &query_create); + if (err < 0) + return err; + + if (ctx->opts.quality > ctx->enc_caps.maxQualityLevels) { + av_log(avctx, AV_LOG_ERROR, "Invalid quality level %i: allowed range is " + "0 to %i\n", + ctx->opts.quality, ctx->enc_caps.maxQualityLevels); + return AVERROR(EINVAL); + } + + /* Get quality properties for the profile and quality level */ + quality_info = (VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR, + .pVideoProfile = &ctx->profile, + .qualityLevel = ctx->opts.quality, + }; + ctx->quality_props = (VkVideoEncodeQualityLevelPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_PROPERTIES_KHR, + .pNext = quality_pnext, + }; + ret = vk->GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR(s->hwctx->phys_dev, + &quality_info, + &ctx->quality_props); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + /* Printout informative properties */ + av_log(avctx, AV_LOG_VERBOSE, "Encoder capabilities for %s profile \"%s\":\n", + avcodec_get_name(avctx->codec_id), + avcodec_profile_name(avctx->codec_id, avctx->profile)); + av_log(avctx, AV_LOG_VERBOSE, " Width: from %i to %i\n", + ctx->caps.minCodedExtent.width, ctx->caps.maxCodedExtent.width); + av_log(avctx, AV_LOG_VERBOSE, " Height: from %i to %i\n", + ctx->caps.minCodedExtent.height, ctx->caps.maxCodedExtent.height); + av_log(avctx, AV_LOG_VERBOSE, " Width alignment: %i\n", + ctx->caps.pictureAccessGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Height alignment: %i\n", + ctx->caps.pictureAccessGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream offset alignment: %"PRIu64"\n", + ctx->caps.minBitstreamBufferOffsetAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Bitstream size alignment: %"PRIu64"\n", + ctx->caps.minBitstreamBufferSizeAlignment); + av_log(avctx, AV_LOG_VERBOSE, " Maximum references: %u\n", + ctx->caps.maxDpbSlots); + av_log(avctx, AV_LOG_VERBOSE, " Maximum active references: %u\n", + ctx->caps.maxActiveReferencePictures); + av_log(avctx, AV_LOG_VERBOSE, " Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n", + CODEC_VER(ctx->caps.stdHeaderVersion.specVersion), + CODEC_VER(vk_desc->ext_props.specVersion)); + av_log(avctx, AV_LOG_VERBOSE, " Encoder max quality: %i\n", + ctx->enc_caps.maxQualityLevels); + av_log(avctx, AV_LOG_VERBOSE, " Encoder image width alignment: %i\n", + ctx->enc_caps.encodeInputPictureGranularity.width); + av_log(avctx, AV_LOG_VERBOSE, " Encoder image height alignment: %i\n", + ctx->enc_caps.encodeInputPictureGranularity.height); + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:%s%s%s\n", + ctx->caps.flags ? "" : + " none", + ctx->caps.flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ? + " protected" : "", + ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ? + " separate_references" : ""); + + /* Setup width/height alignment */ + base_ctx->surface_width = avctx->coded_width = + FFALIGN(avctx->width, ctx->enc_caps.encodeInputPictureGranularity.width); + base_ctx->surface_height = avctx->coded_height = + FFALIGN(avctx->height, ctx->enc_caps.encodeInputPictureGranularity.height); + + /* Setup slice width/height */ + base_ctx->slice_block_width = ctx->enc_caps.encodeInputPictureGranularity.width; + base_ctx->slice_block_height = ctx->enc_caps.encodeInputPictureGranularity.height; + + /* Check if encoding is possible with the given parameters */ + if (avctx->coded_width < ctx->caps.minCodedExtent.width || + avctx->coded_height < ctx->caps.minCodedExtent.height || + avctx->coded_width > ctx->caps.maxCodedExtent.width || + avctx->coded_height > ctx->caps.maxCodedExtent.height) { + av_log(avctx, AV_LOG_ERROR, "Input of %ix%i too large for encoder limits: %ix%i max\n", + avctx->coded_width, avctx->coded_height, + ctx->caps.minCodedExtent.width, ctx->caps.minCodedExtent.height); + return AVERROR(EINVAL); + } + + fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR; + + ctx->common.layered_dpb = !(ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR); + + /* Get the supported image formats */ + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, NULL); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts); + if (!ret_info) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_out_fmts; i++) + ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR; + + ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev, + &fmt_info, + &nb_out_fmts, ret_info); + if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED || + (!nb_out_fmts && ret == VK_SUCCESS)) { + av_free(ret_info); + return AVERROR(EINVAL); + } else if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n", + ff_vk_ret2str(ret)); + av_free(ret_info); + return AVERROR_EXTERNAL; + } + + av_log(avctx, AV_LOG_VERBOSE, "Supported input formats:\n"); + for (i = 0; i < nb_out_fmts; i++) + av_log(avctx, AV_LOG_VERBOSE, " %i: %i\n", i, ret_info[i].format); + + for (i = 0; i < nb_out_fmts; i++) { + if (ff_vk_pix_fmt_from_vkfmt(ret_info[i].format) == s->frames->sw_format) { + ctx->pic_format = ret_info[i].format; + break; + } + } + + av_free(ret_info); + + if (i == nb_out_fmts) { + av_log(avctx, AV_LOG_ERROR, "Pixel format %s of input frames not supported!\n", + av_get_pix_fmt_name(s->frames->sw_format)); + return AVERROR(EINVAL); + } + + /* Create session */ + session_create.pVideoProfile = &ctx->profile; + session_create.flags = 0x0; + session_create.queueFamilyIndex = ctx->qf_enc->idx; + session_create.maxCodedExtent = ctx->caps.maxCodedExtent; + session_create.maxDpbSlots = ctx->caps.maxDpbSlots; + session_create.maxActiveReferencePictures = ctx->caps.maxActiveReferencePictures; + session_create.pictureFormat = ctx->pic_format; + session_create.referencePictureFormat = session_create.pictureFormat; + session_create.pStdHeaderVersion = &vk_desc->ext_props; + + err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create); + if (err < 0) + return err; + + err = ff_hw_base_encode_init(avctx, &ctx->base); + if (err < 0) + return err; + + err = vulkan_encode_create_dpb(avctx, ctx); + if (err < 0) + return err; + + base_ctx->async_encode = 1; + base_ctx->encode_fifo = av_fifo_alloc2(base_ctx->async_depth, + sizeof(FFVulkanEncodePicture *), 0); + if (!base_ctx->encode_fifo) + return AVERROR(ENOMEM); + + return 0; +} + +int ff_vulkan_encode_create_session_params(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + void *codec_params_pnext) +{ + VkResult ret; + FFVulkanFunctions *vk = &ctx->s.vkfn; + FFVulkanContext *s = &ctx->s; + + VkVideoEncodeQualityLevelInfoKHR q_info; + VkVideoSessionParametersCreateInfoKHR session_params_create; + + q_info = (VkVideoEncodeQualityLevelInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR, + .pNext = codec_params_pnext, + .qualityLevel = ctx->opts.quality, + }; + session_params_create = (VkVideoSessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &q_info, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = VK_NULL_HANDLE, + }; + + /* Create session parameters */ + ret = vk->CreateVideoSessionParametersKHR(s->hwctx->act_dev, &session_params_create, + s->hwctx->alloc, &ctx->session_params); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create Vulkan video session parameters: %s!\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} -- 2.49.1 From dbd82f7228f4d9a375892458ae093d5dc74e1c90 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:31:46 +0000 Subject: [PATCH 071/118] Changing vulkan file directory --- libavcodec/vulkan_encode.h | 259 ------------------------------------- 1 file changed, 259 deletions(-) delete mode 100644 libavcodec/vulkan_encode.h diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h deleted file mode 100644 index 3df06e11d0..0000000000 --- a/libavcodec/vulkan_encode.h +++ /dev/null @@ -1,259 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_VULKAN_ENCODE_H -#define AVCODEC_VULKAN_ENCODE_H - -#include "codec_id.h" -#include "internal.h" - -#include "encode.h" -#include "hwconfig.h" - -#include "vulkan_video.h" -#include "hw_base_encode.h" - -typedef struct FFVulkanEncodeDescriptor { - enum AVCodecID codec_id; - FFVulkanExtensions encode_extension; - VkVideoCodecOperationFlagBitsKHR encode_op; - - VkExtensionProperties ext_props; -} FFVulkanEncodeDescriptor; - -typedef struct FFVulkanEncodePicture { - FFHWBaseEncodePicture base; - VkVideoPictureResourceInfoKHR dpb_res; - VkVideoReferenceSlotInfoKHR dpb_slot; - - struct { - VkImageView view; - VkImageAspectFlags aspect; - } in; - - struct { - VkImageView view; - VkImageAspectFlags aspect; - } dpb; - - void *codec_layer; - void *codec_rc_layer; - - FFVkExecContext *exec; - AVBufferRef *pkt_buf; - int slices_offset; -} FFVulkanEncodePicture; - -/** - * Callback for writing stream-level headers. - */ -typedef int (*vkenc_cb_write_stream_headers)(AVCodecContext *avctx, - uint8_t *data, size_t *data_len); - -/** - * Callback for initializing codec-specific picture headers. - */ -typedef int (*vkenc_cb_init_pic_headers)(AVCodecContext *avctx, - FFVulkanEncodePicture *pic); - -/** - * Callback for writing alignment data. - * Align is the value to align offset to. - */ -typedef int (*vkenc_cb_write_filler)(AVCodecContext *avctx, uint32_t filler, - uint8_t *data, size_t *data_len); - -/** - * Callback for writing any extra units requested. data_len must be set - * to the available size, and its value will be overwritten by the #bytes written - * to the output buffer. - */ -typedef int (*vkenc_cb_write_extra_headers)(AVCodecContext *avctx, - FFVulkanEncodePicture *pic, - uint8_t *data, size_t *data_len); - -typedef struct FFVulkanCodec { - /** - * Codec feature flags. - */ - int flags; -/* Codec output packet without timestamp delay, which means the - * output packet has same PTS and DTS. For AV1. */ -#define VK_ENC_FLAG_NO_DELAY 1 << 6 - - /** - * Size of the codec-specific picture struct. - */ - size_t picture_priv_data_size; - - /** - * Size of the filler header. - */ - size_t filler_header_size; - - /** - * Initialize codec-specific structs in a Vulkan profile. - */ - int (*init_profile)(AVCodecContext *avctx, VkVideoProfileInfoKHR *profile, - void *pnext); - - /** - * Initialize codec-specific rate control structures for a picture. - */ - int (*init_pic_rc)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeRateControlInfoKHR *rc_info, - VkVideoEncodeRateControlLayerInfoKHR *rc_layer); - - /** - * Initialize codec-specific picture parameters. - */ - int (*init_pic_params)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeInfoKHR *encode_info); - - /** - * Callback for writing stream headers. - */ - int (*write_sequence_headers)(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic, - uint8_t *data, size_t *data_len); - - /** - * Callback for writing alignment data. - */ - int (*write_filler)(AVCodecContext *avctx, uint32_t filler, - uint8_t *data, size_t *data_len); - - /** - * Callback for writing any extra units requested. data_len must be set - * to the available size, and its value will be overwritten by the #bytes written - * to the output buffer. - */ - int (*write_extra_headers)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - uint8_t *data, size_t *data_len); -} FFVulkanCodec; - -typedef struct FFVkEncodeCommonOptions { - int qp; - int quality; - int profile; - int level; - int tier; - int async_depth; - VkVideoEncodeUsageFlagBitsKHR usage; - VkVideoEncodeContentFlagBitsKHR content; - VkVideoEncodeTuningModeKHR tune; - - VkVideoEncodeRateControlModeFlagBitsKHR rc_mode; -#define FF_VK_RC_MODE_AUTO 0xFFFFFFFF -} FFVkEncodeCommonOptions; - -typedef struct FFVulkanEncodeContext { - FFVulkanContext s; - FFVkVideoCommon common; - FFHWBaseEncodeContext base; - const FFVulkanCodec *codec; - - int explicit_qp; - int session_reset; - - /* Session parameters object, initialized by each codec independently - * and set here. */ - VkVideoSessionParametersKHR session_params; - - AVBufferPool *buf_pool; - - VkFormat pic_format; - - FFVkEncodeCommonOptions opts; - - VkVideoProfileInfoKHR profile; - VkVideoProfileListInfoKHR profile_list; - VkVideoCapabilitiesKHR caps; - VkVideoEncodeQualityLevelPropertiesKHR quality_props; - VkVideoEncodeCapabilitiesKHR enc_caps; - VkVideoEncodeUsageInfoKHR usage_info; - - AVVulkanDeviceQueueFamily *qf_enc; - FFVkExecPool enc_pool; - - FFHWBaseEncodePicture *slots[32]; -} FFVulkanEncodeContext; - -#define VULKAN_ENCODE_COMMON_OPTIONS \ - { "qp", "Use an explicit constant quantizer for the whole stream", OFFSET(common.opts.qp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 255, FLAGS }, \ - { "quality", "Set encode quality (trades off against speed, higher is faster)", OFFSET(common.opts.quality), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, \ - { "rc_mode", "Select rate control type", OFFSET(common.opts.rc_mode), AV_OPT_TYPE_INT, { .i64 = FF_VK_RC_MODE_AUTO }, 0, FF_VK_RC_MODE_AUTO, FLAGS, "rc_mode" }, \ - { "auto", "Choose mode automatically based on parameters", 0, AV_OPT_TYPE_CONST, { .i64 = FF_VK_RC_MODE_AUTO }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ - { "driver", "Driver-specific rate control", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ - { "cqp", "Constant quantizer mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ - { "cbr", "Constant bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ - { "vbr", "Variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ - { "tune", "Select tuning type", OFFSET(common.opts.tune), AV_OPT_TYPE_INT, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "tune" }, \ - { "default", "Default tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ - { "hq", "High quality tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ - { "ll", "Low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ - { "ull", "Ultra low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ - { "lossless", "Lossless mode tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ - { "usage", "Select usage type", OFFSET(common.opts.usage), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "usage" }, \ - { "default", "Default optimizations", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ - { "transcode", "Optimize for transcoding", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_TRANSCODING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ - { "stream", "Optimize for streaming", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_STREAMING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ - { "record", "Optimize for offline recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_RECORDING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ - { "conference", "Optimize for teleconferencing", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_CONFERENCING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ - { "content", "Select content type", OFFSET(common.opts.content), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "content" }, \ - { "default", "Default content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ - { "camera", "Camera footage", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_CAMERA_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ - { "desktop", "Screen recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DESKTOP_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ - { "rendered", "Game or 3D content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_RENDERED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" } - -/** - * Initialize encoder. - */ -av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, - const FFVulkanEncodeDescriptor *vk_desc, - const FFVulkanCodec *codec, - void *codec_caps, void *quality_pnext); - -/** - * Write out the extradata in case its needed. - */ -av_cold int ff_vulkan_write_global_header(AVCodecContext *avctx, - FFVulkanEncodeContext *ctx); - -/** - * Encode. - */ -int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt); - -/** - * Uninitialize encoder. - */ -void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx); - -/** - * Create session parameters. - */ -int ff_vulkan_encode_create_session_params(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, - void *codec_params_pnext); - -/** - * Paperwork. - */ -extern const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[]; - -#endif /* AVCODEC_VULKAN_ENCODE_H */ -- 2.49.1 From f93392ea6e08431625475e3744e0b391ebc51516 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:32:14 +0000 Subject: [PATCH 072/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_encode.h | 259 ++++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_encode.h diff --git a/libavcodec/vulkan/vulkan_encode.h b/libavcodec/vulkan/vulkan_encode.h new file mode 100644 index 0000000000..583199a7d7 --- /dev/null +++ b/libavcodec/vulkan/vulkan_encode.h @@ -0,0 +1,259 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_ENCODE_H +#define AVCODEC_VULKAN_ENCODE_H + +#include "libavcodec/codec_id.h" +#include "libavcodec/internal.h" + +#include "libavcodec/encode.h" +#include "libavcodec/hwconfig.h" + +#include "vulkan_video.h" +#include "libavcodec/hw_base_encode.h" + +typedef struct FFVulkanEncodeDescriptor { + enum AVCodecID codec_id; + FFVulkanExtensions encode_extension; + VkVideoCodecOperationFlagBitsKHR encode_op; + + VkExtensionProperties ext_props; +} FFVulkanEncodeDescriptor; + +typedef struct FFVulkanEncodePicture { + FFHWBaseEncodePicture base; + VkVideoPictureResourceInfoKHR dpb_res; + VkVideoReferenceSlotInfoKHR dpb_slot; + + struct { + VkImageView view; + VkImageAspectFlags aspect; + } in; + + struct { + VkImageView view; + VkImageAspectFlags aspect; + } dpb; + + void *codec_layer; + void *codec_rc_layer; + + FFVkExecContext *exec; + AVBufferRef *pkt_buf; + int slices_offset; +} FFVulkanEncodePicture; + +/** + * Callback for writing stream-level headers. + */ +typedef int (*vkenc_cb_write_stream_headers)(AVCodecContext *avctx, + uint8_t *data, size_t *data_len); + +/** + * Callback for initializing codec-specific picture headers. + */ +typedef int (*vkenc_cb_init_pic_headers)(AVCodecContext *avctx, + FFVulkanEncodePicture *pic); + +/** + * Callback for writing alignment data. + * Align is the value to align offset to. + */ +typedef int (*vkenc_cb_write_filler)(AVCodecContext *avctx, uint32_t filler, + uint8_t *data, size_t *data_len); + +/** + * Callback for writing any extra units requested. data_len must be set + * to the available size, and its value will be overwritten by the #bytes written + * to the output buffer. + */ +typedef int (*vkenc_cb_write_extra_headers)(AVCodecContext *avctx, + FFVulkanEncodePicture *pic, + uint8_t *data, size_t *data_len); + +typedef struct FFVulkanCodec { + /** + * Codec feature flags. + */ + int flags; +/* Codec output packet without timestamp delay, which means the + * output packet has same PTS and DTS. For AV1. */ +#define VK_ENC_FLAG_NO_DELAY 1 << 6 + + /** + * Size of the codec-specific picture struct. + */ + size_t picture_priv_data_size; + + /** + * Size of the filler header. + */ + size_t filler_header_size; + + /** + * Initialize codec-specific structs in a Vulkan profile. + */ + int (*init_profile)(AVCodecContext *avctx, VkVideoProfileInfoKHR *profile, + void *pnext); + + /** + * Initialize codec-specific rate control structures for a picture. + */ + int (*init_pic_rc)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeRateControlInfoKHR *rc_info, + VkVideoEncodeRateControlLayerInfoKHR *rc_layer); + + /** + * Initialize codec-specific picture parameters. + */ + int (*init_pic_params)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeInfoKHR *encode_info); + + /** + * Callback for writing stream headers. + */ + int (*write_sequence_headers)(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic, + uint8_t *data, size_t *data_len); + + /** + * Callback for writing alignment data. + */ + int (*write_filler)(AVCodecContext *avctx, uint32_t filler, + uint8_t *data, size_t *data_len); + + /** + * Callback for writing any extra units requested. data_len must be set + * to the available size, and its value will be overwritten by the #bytes written + * to the output buffer. + */ + int (*write_extra_headers)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + uint8_t *data, size_t *data_len); +} FFVulkanCodec; + +typedef struct FFVkEncodeCommonOptions { + int qp; + int quality; + int profile; + int level; + int tier; + int async_depth; + VkVideoEncodeUsageFlagBitsKHR usage; + VkVideoEncodeContentFlagBitsKHR content; + VkVideoEncodeTuningModeKHR tune; + + VkVideoEncodeRateControlModeFlagBitsKHR rc_mode; +#define FF_VK_RC_MODE_AUTO 0xFFFFFFFF +} FFVkEncodeCommonOptions; + +typedef struct FFVulkanEncodeContext { + FFVulkanContext s; + FFVkVideoCommon common; + FFHWBaseEncodeContext base; + const FFVulkanCodec *codec; + + int explicit_qp; + int session_reset; + + /* Session parameters object, initialized by each codec independently + * and set here. */ + VkVideoSessionParametersKHR session_params; + + AVBufferPool *buf_pool; + + VkFormat pic_format; + + FFVkEncodeCommonOptions opts; + + VkVideoProfileInfoKHR profile; + VkVideoProfileListInfoKHR profile_list; + VkVideoCapabilitiesKHR caps; + VkVideoEncodeQualityLevelPropertiesKHR quality_props; + VkVideoEncodeCapabilitiesKHR enc_caps; + VkVideoEncodeUsageInfoKHR usage_info; + + AVVulkanDeviceQueueFamily *qf_enc; + FFVkExecPool enc_pool; + + FFHWBaseEncodePicture *slots[32]; +} FFVulkanEncodeContext; + +#define VULKAN_ENCODE_COMMON_OPTIONS \ + { "qp", "Use an explicit constant quantizer for the whole stream", OFFSET(common.opts.qp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 255, FLAGS }, \ + { "quality", "Set encode quality (trades off against speed, higher is faster)", OFFSET(common.opts.quality), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, \ + { "rc_mode", "Select rate control type", OFFSET(common.opts.rc_mode), AV_OPT_TYPE_INT, { .i64 = FF_VK_RC_MODE_AUTO }, 0, FF_VK_RC_MODE_AUTO, FLAGS, "rc_mode" }, \ + { "auto", "Choose mode automatically based on parameters", 0, AV_OPT_TYPE_CONST, { .i64 = FF_VK_RC_MODE_AUTO }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ + { "driver", "Driver-specific rate control", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ + { "cqp", "Constant quantizer mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ + { "cbr", "Constant bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ + { "vbr", "Variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \ + { "tune", "Select tuning type", OFFSET(common.opts.tune), AV_OPT_TYPE_INT, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "tune" }, \ + { "default", "Default tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "hq", "High quality tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "ll", "Low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "ull", "Ultra low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "lossless", "Lossless mode tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \ + { "usage", "Select usage type", OFFSET(common.opts.usage), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "usage" }, \ + { "default", "Default optimizations", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "transcode", "Optimize for transcoding", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_TRANSCODING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "stream", "Optimize for streaming", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_STREAMING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "record", "Optimize for offline recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_RECORDING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "conference", "Optimize for teleconferencing", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_CONFERENCING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \ + { "content", "Select content type", OFFSET(common.opts.content), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "content" }, \ + { "default", "Default content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "camera", "Camera footage", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_CAMERA_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "desktop", "Screen recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DESKTOP_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }, \ + { "rendered", "Game or 3D content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_RENDERED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" } + +/** + * Initialize encoder. + */ +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + const FFVulkanEncodeDescriptor *vk_desc, + const FFVulkanCodec *codec, + void *codec_caps, void *quality_pnext); + +/** + * Write out the extradata in case its needed. + */ +av_cold int ff_vulkan_write_global_header(AVCodecContext *avctx, + FFVulkanEncodeContext *ctx); + +/** + * Encode. + */ +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt); + +/** + * Uninitialize encoder. + */ +void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx); + +/** + * Create session parameters. + */ +int ff_vulkan_encode_create_session_params(AVCodecContext *avctx, FFVulkanEncodeContext *ctx, + void *codec_params_pnext); + +/** + * Paperwork. + */ +extern const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[]; + +#endif /* AVCODEC_VULKAN_ENCODE_H */ -- 2.49.1 From b127a0655932ae25775dbf741c74fbc5fe05151b Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:32:35 +0000 Subject: [PATCH 073/118] Changing vulkan file directory --- libavcodec/vulkan_encode_av1.c | 1401 -------------------------------- 1 file changed, 1401 deletions(-) delete mode 100644 libavcodec/vulkan_encode_av1.c diff --git a/libavcodec/vulkan_encode_av1.c b/libavcodec/vulkan_encode_av1.c deleted file mode 100644 index 08ffbfa393..0000000000 --- a/libavcodec/vulkan_encode_av1.c +++ /dev/null @@ -1,1401 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/opt.h" -#include "libavutil/mem.h" - -#include "cbs.h" -#include "cbs_av1.h" -#include "av1_levels.h" -#include "libavutil/mastering_display_metadata.h" - -#include "codec_internal.h" -#include "vulkan_encode.h" - -#include "libavutil/avassert.h" - -const FFVulkanEncodeDescriptor ff_vk_enc_av1_desc = { - .codec_id = AV_CODEC_ID_AV1, - .encode_extension = FF_VK_EXT_VIDEO_ENCODE_AV1, - .encode_op = VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR, - .ext_props = { - .extensionName = VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_EXTENSION_NAME, - .specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_SPEC_VERSION, - }, -}; - -enum UnitElems { - UNIT_MASTERING_DISPLAY = 1 << 0, - UNIT_CONTENT_LIGHT_LEVEL = 1 << 1, -}; - -typedef struct VulkanEncodeAV1Picture { - int slot; - int64_t last_idr_frame; - - enum UnitElems units_needed; - - StdVideoAV1TileInfo tile_info; - StdVideoAV1Quantization quantization; - StdVideoAV1Segmentation segmentation; - StdVideoAV1LoopFilter loop_filter; - StdVideoAV1CDEF cdef; - StdVideoAV1LoopRestoration loop_restoration; - StdVideoAV1GlobalMotion global_motion; - - StdVideoEncodeAV1PictureInfo av1pic_info; - VkVideoEncodeAV1PictureInfoKHR vkav1pic_info; - - StdVideoEncodeAV1ExtensionHeader ext_header; - StdVideoEncodeAV1ReferenceInfo av1dpb_info; - VkVideoEncodeAV1DpbSlotInfoKHR vkav1dpb_info; - - VkVideoEncodeAV1RateControlInfoKHR vkrc_info; - VkVideoEncodeAV1RateControlLayerInfoKHR vkrc_layer_info; - VkVideoEncodeAV1GopRemainingFrameInfoKHR vkrc_remaining; -} VulkanEncodeAV1Picture; - -typedef struct VulkanEncodeAV1Context { - FFVulkanEncodeContext common; - - CodedBitstreamContext *cbs; - CodedBitstreamFragment current_access_unit; - - enum UnitElems unit_elems; - AV1RawOBU seq_hdr_obu; - AV1RawOBU meta_cll_obu; - AV1RawOBU meta_mastering_obu; - - VkVideoEncodeAV1ProfileInfoKHR profile; - - VkVideoEncodeAV1CapabilitiesKHR caps; - VkVideoEncodeAV1QualityLevelPropertiesKHR quality_props; - - uint64_t hrd_buffer_size; - uint64_t initial_buffer_fullness; - - int uniform_tile; - int tile_cols; - int tile_rows; - - int seq_tier; - int seq_level_idx; - - int q_idx_idr; - int q_idx_p; - int q_idx_b; - - uint8_t *padding_payload; -} VulkanEncodeAV1Context; - -static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeRateControlInfoKHR *rc_info, - VkVideoEncodeRateControlLayerInfoKHR *rc_layer) -{ - VulkanEncodeAV1Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - VulkanEncodeAV1Picture *ap = pic->codec_priv; - - /* This can be easy to calculate */ - ap->vkrc_remaining = (VkVideoEncodeAV1GopRemainingFrameInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_GOP_REMAINING_FRAME_INFO_KHR, - .useGopRemainingFrames = 0, - .gopRemainingIntra = 0, - .gopRemainingPredictive = 0, - .gopRemainingBipredictive = 0, - }; - - ap->vkrc_info = (VkVideoEncodeAV1RateControlInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_RATE_CONTROL_INFO_KHR, - .flags = VK_VIDEO_ENCODE_AV1_RATE_CONTROL_REFERENCE_PATTERN_FLAT_BIT_KHR | - VK_VIDEO_ENCODE_AV1_RATE_CONTROL_REGULAR_GOP_BIT_KHR, - .gopFrameCount = ctx->base.gop_size, - .keyFramePeriod = ctx->base.gop_size, - .consecutiveBipredictiveFrameCount = FFMAX(ctx->base.b_per_p - 1, 0), - .temporalLayerCount = 0, - }; - rc_info->pNext = &ap->vkrc_info; - - if (rc_info->rateControlMode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { - rc_info->virtualBufferSizeInMs = (enc->hrd_buffer_size * 1000LL) / avctx->bit_rate; - rc_info->initialVirtualBufferSizeInMs = (enc->initial_buffer_fullness * 1000LL) / avctx->bit_rate; - - ap->vkrc_layer_info = (VkVideoEncodeAV1RateControlLayerInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_RATE_CONTROL_LAYER_INFO_KHR, - - .useMinQIndex = avctx->qmin > 0, - .minQIndex.intraQIndex = avctx->qmin > 0 ? avctx->qmin : 0, - .minQIndex.predictiveQIndex = avctx->qmin > 0 ? avctx->qmin : 0, - .minQIndex.bipredictiveQIndex = avctx->qmin > 0 ? avctx->qmin : 0, - - .useMaxQIndex = avctx->qmax > 0, - .maxQIndex.intraQIndex = avctx->qmax > 0 ? avctx->qmax : 0, - .maxQIndex.predictiveQIndex = avctx->qmax > 0 ? avctx->qmax : 0, - .maxQIndex.bipredictiveQIndex = avctx->qmax > 0 ? avctx->qmax : 0, - - .useMaxFrameSize = 0, - }; - rc_layer->pNext = &ap->vkrc_layer_info; - ap->vkrc_info.temporalLayerCount = 1; - } - - return 0; -} - -static void set_name_slot(int slot, int *slot_indices, uint32_t allowed_idx, int group) -{ - int from = group ? AV1_REF_FRAME_GOLDEN : 0; - int to = group ? AV1_REFS_PER_FRAME : AV1_REF_FRAME_GOLDEN; - - for (int i = from; i < to; i++) { - if ((slot_indices[i] == -1) && (allowed_idx & (1 << i))) { - slot_indices[i] = slot; - return; - } - } - - av_assert0(0); -} - - -static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeInfoKHR *encode_info) -{ - VulkanEncodeAV1Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - - VulkanEncodeAV1Picture *ap = pic->codec_priv; - FFHWBaseEncodePicture *ref; - VulkanEncodeAV1Picture *ap_ref; - VkVideoReferenceSlotInfoKHR *ref_slot; - - uint32_t ref_name_mask = 0x0; - int name_slots[STD_VIDEO_AV1_REFS_PER_FRAME]; - - StdVideoAV1Segmentation *segmentation = &ap->segmentation; - StdVideoAV1LoopFilter *loop_filter = &ap->loop_filter; - StdVideoAV1Quantization *quantization = &ap->quantization; - StdVideoAV1CDEF *cdef = &ap->cdef; - StdVideoAV1LoopRestoration *loop_restoration = &ap->loop_restoration; - StdVideoAV1GlobalMotion *global_motion = &ap->global_motion; - StdVideoAV1TileInfo *tile_info = &ap->tile_info; - static const int8_t default_loop_filter_ref_deltas[STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME] = - { 1, 0, 0, 0, -1, 0, -1, -1 }; - - VkVideoEncodeAV1PredictionModeKHR pred_mode; - VkVideoEncodeAV1RateControlGroupKHR rc_group; - int lr_unit_shift = 0; - int lr_uv_shift = 0; - - ap->ext_header = (StdVideoEncodeAV1ExtensionHeader) { - .temporal_id = 0, - .spatial_id = 0, - }; - - *tile_info = (StdVideoAV1TileInfo) { - .flags = (StdVideoAV1TileInfoFlags) { - .uniform_tile_spacing_flag = enc->uniform_tile, - }, - .TileCols = enc->tile_cols, - .TileRows = enc->tile_rows, - .context_update_tile_id = 0, - .tile_size_bytes_minus_1 = 0, - }; - - for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) { - global_motion->GmType[i] = 0; - for (int j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; j++) { - global_motion->gm_params[i][j] = 0; - } - } - - for (int i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; i++) - name_slots[i] = -1; - - *loop_restoration = (StdVideoAV1LoopRestoration) { - .FrameRestorationType[0] = STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_NONE, - .FrameRestorationType[1] = STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_NONE, - .FrameRestorationType[2] = STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_NONE, - .LoopRestorationSize[0] = 1 + lr_unit_shift, - .LoopRestorationSize[1] = 1 + lr_unit_shift - lr_uv_shift, - .LoopRestorationSize[2] = 1 + lr_unit_shift - lr_uv_shift, - }; - - *cdef = (StdVideoAV1CDEF) { - .cdef_damping_minus_3 = 0, - .cdef_bits = 0, - }; - - for (int i = 0; i < STD_VIDEO_AV1_MAX_SEGMENTS; i++) { - segmentation->FeatureEnabled[i] = 0x0; - for (int j = 0; j < STD_VIDEO_AV1_SEG_LVL_MAX; j++) { - segmentation->FeatureEnabled[i] |= 0x0; - segmentation->FeatureData[i][j] = 0; - } - } - - *loop_filter = (StdVideoAV1LoopFilter) { - .flags = (StdVideoAV1LoopFilterFlags) { - .loop_filter_delta_enabled = 0, - .loop_filter_delta_update = 0, - }, - .loop_filter_level = { 0 }, - .loop_filter_sharpness = 0, - .update_ref_delta = 0, - .loop_filter_ref_deltas = { 0 }, - .update_mode_delta = 0, - .loop_filter_mode_deltas = { 0 }, - }; - loop_filter->update_mode_delta = 1; - memcpy(loop_filter->loop_filter_ref_deltas, default_loop_filter_ref_deltas, - STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t)); - - *quantization = (StdVideoAV1Quantization) { - .flags = (StdVideoAV1QuantizationFlags) { - .using_qmatrix = 0, - .diff_uv_delta = 0, - /* Reserved */ - }, - .base_q_idx = 0, /* Set later */ - .DeltaQYDc = 0, - .DeltaQUDc = 0, - .DeltaQUAc = 0, - .DeltaQVDc = 0, - .DeltaQVAc = 0, - .qm_y = 0, - .qm_u = 0, - .qm_v = 0, - }; - - ref_slot = (VkVideoReferenceSlotInfoKHR *)encode_info->pSetupReferenceSlot; - ap->av1pic_info = (StdVideoEncodeAV1PictureInfo) { - .flags = (StdVideoEncodeAV1PictureInfoFlags) { - .error_resilient_mode = (pic->type == FF_HW_PICTURE_TYPE_I || - pic->type == FF_HW_PICTURE_TYPE_IDR) && - (pic->display_order <= pic->encode_order), - .disable_cdf_update = 0, - .use_superres = 0, - .render_and_frame_size_different = 0, - .allow_screen_content_tools = 0, - .is_filter_switchable = 0, - .force_integer_mv = 0, - .frame_size_override_flag = 0, - .buffer_removal_time_present_flag = 0, - .allow_intrabc = 0, - .frame_refs_short_signaling = 0, - .allow_high_precision_mv = 0, - .is_motion_mode_switchable = 0, - .use_ref_frame_mvs = 0, - .disable_frame_end_update_cdf = 0, - .allow_warped_motion = 0, - .reduced_tx_set = 0, - .skip_mode_present = 0, - .delta_q_present = 0, - .delta_lf_present = 0, - .delta_lf_multi = 0, - .segmentation_enabled = 0, - .segmentation_update_map = 0, - .segmentation_temporal_update = 0, - .segmentation_update_data = 0, - .UsesLr = 0, - .usesChromaLr = 0, - .show_frame = pic->display_order <= pic->encode_order, - .showable_frame = 0, - /* Reserved */ - }, - .frame_type = 0, // set later - .frame_presentation_time = 0, - .current_frame_id = ref_slot->slotIndex, - .order_hint = 0, // set later - .primary_ref_frame = 0, // set later - .refresh_frame_flags = 0x0, // set later - .coded_denom = 0, - .render_width_minus_1 = base_ctx->surface_width - 1, - .render_height_minus_1 = base_ctx->surface_height - 1, - .interpolation_filter = 0, - .TxMode = STD_VIDEO_AV1_TX_MODE_SELECT, - .delta_q_res = 0, - .delta_lf_res = 0, - .ref_order_hint = { 0 }, // set later - .ref_frame_idx = { 0 }, // set later - /* Reserved */ - .delta_frame_id_minus_1 = { 0 }, - -// .pTileInfo = tile_info, TODO FIX - .pQuantization = quantization, - .pSegmentation = segmentation, - .pLoopFilter = loop_filter, - .pCDEF = cdef, - .pLoopRestoration = loop_restoration, - .pGlobalMotion = global_motion, - .pExtensionHeader = &ap->ext_header, - .pBufferRemovalTimes = NULL, - }; - - switch (pic->type) { - case FF_HW_PICTURE_TYPE_I: - case FF_HW_PICTURE_TYPE_IDR: - av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]); - ap->av1pic_info.frame_type = STD_VIDEO_AV1_FRAME_TYPE_KEY; - ap->av1pic_info.refresh_frame_flags = 0xFF; - quantization->base_q_idx = enc->q_idx_idr; - ap->slot = 0; - ap->last_idr_frame = pic->display_order; - pred_mode = VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_INTRA_ONLY_KHR; - rc_group = VK_VIDEO_ENCODE_AV1_RATE_CONTROL_GROUP_INTRA_KHR; - break; - case FF_HW_PICTURE_TYPE_P: - ref = pic->refs[0][pic->nb_refs[0] - 1]; - ap_ref = ref->codec_priv; - - ap->av1pic_info.frame_type = STD_VIDEO_AV1_FRAME_TYPE_INTER; - quantization->base_q_idx = enc->q_idx_p; - - ap->last_idr_frame = ap_ref->last_idr_frame; - ap->slot = !ap_ref->slot; - - ap->av1pic_info.refresh_frame_flags = 1 << ap->slot; - - /** set the nearest frame in L0 as all reference frame. */ - for (int i = 0; i < AV1_REFS_PER_FRAME; i++) - ap->av1pic_info.ref_frame_idx[i] = ap_ref->slot; - - ap->av1pic_info.primary_ref_frame = ap_ref->slot; - ap->av1pic_info.ref_order_hint[ap_ref->slot] = ref->display_order - ap_ref->last_idr_frame; - rc_group = VK_VIDEO_ENCODE_AV1_RATE_CONTROL_GROUP_PREDICTIVE_KHR; - pred_mode = VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_SINGLE_REFERENCE_KHR; - ref_name_mask = enc->caps.singleReferenceNameMask; - set_name_slot(ap_ref->av1pic_info.current_frame_id, name_slots, ref_name_mask, 0); - -// vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST; - - /** set the 2nd nearest frame in L0 as Golden frame. */ - if ((pic->nb_refs[0] > 1) && - ((enc->caps.maxSingleReferenceCount > 1) || - (enc->caps.maxUnidirectionalCompoundReferenceCount > 0))) { - if (enc->caps.maxUnidirectionalCompoundReferenceCount) { - pred_mode = VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_UNIDIRECTIONAL_COMPOUND_KHR; - ref_name_mask = enc->caps.unidirectionalCompoundReferenceNameMask; - } - ref = pic->refs[0][pic->nb_refs[0] - 2]; - ap_ref = ref->codec_priv; - ap->av1pic_info.ref_frame_idx[3] = ap_ref->slot; - ap->av1pic_info.ref_order_hint[ap_ref->slot] = ref->display_order - ap_ref->last_idr_frame; -// vpic->ref_frame_ctrl_l0.fields.search_idx1 = AV1_REF_FRAME_GOLDEN; - set_name_slot(ap_ref->av1pic_info.current_frame_id, name_slots, ref_name_mask, 0); - } - break; - case FF_HW_PICTURE_TYPE_B: - ap->av1pic_info.frame_type = STD_VIDEO_AV1_FRAME_TYPE_INTER; - quantization->base_q_idx = enc->q_idx_b; - ap->av1pic_info.refresh_frame_flags = 0x0; - - rc_group = VK_VIDEO_ENCODE_AV1_RATE_CONTROL_GROUP_BIPREDICTIVE_KHR; - pred_mode = VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_BIDIRECTIONAL_COMPOUND_KHR; - ref_name_mask = enc->caps.bidirectionalCompoundReferenceNameMask; - -// fh->reference_select = 1; - /** B frame will not be referenced, disable its recon frame. */ -// vpic->picture_flags.bits.disable_frame_recon = 1; - - /** Use LAST_FRAME and BWDREF_FRAME for reference. */ -// vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST; -// vpic->ref_frame_ctrl_l1.fields.search_idx0 = AV1_REF_FRAME_BWDREF; - - ref = pic->refs[0][pic->nb_refs[0] - 1]; - ap_ref = ref->codec_priv; - ap->last_idr_frame = ap_ref->last_idr_frame; - ap->av1pic_info.primary_ref_frame = ap_ref->slot; - ap->av1pic_info.ref_order_hint[ap_ref->slot] = ref->display_order - ap_ref->last_idr_frame; - for (int i = 0; i < AV1_REF_FRAME_GOLDEN; i++) - ap->av1pic_info.ref_frame_idx[i] = ap_ref->slot; - set_name_slot(ap_ref->av1pic_info.current_frame_id, name_slots, ref_name_mask, 0); - - ref = pic->refs[1][pic->nb_refs[1] - 1]; - ap_ref = ref->codec_priv; - ap->av1pic_info.ref_order_hint[ap_ref->slot] = ref->display_order - ap_ref->last_idr_frame; - for (int i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME; i++) - ap->av1pic_info.ref_frame_idx[i] = ap_ref->slot; - set_name_slot(ap_ref->av1pic_info.current_frame_id, name_slots, ref_name_mask, 1); - break; - } - - ap->av1pic_info.flags.showable_frame = ap->av1pic_info.frame_type != STD_VIDEO_AV1_FRAME_TYPE_KEY; - ap->av1pic_info.order_hint = pic->display_order - ap->last_idr_frame; - - ap->vkav1pic_info = (VkVideoEncodeAV1PictureInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_PICTURE_INFO_KHR, - .pNext = NULL, - .predictionMode = pred_mode, - .rateControlGroup = rc_group, - .constantQIndex = quantization->base_q_idx, - .pStdPictureInfo = &ap->av1pic_info, - // .referenceNameSlotIndices is set below - .primaryReferenceCdfOnly = 0, - .generateObuExtensionHeader = 0, - }; - encode_info->pNext = &ap->vkav1pic_info; - - for (int i = 0; i < FF_ARRAY_ELEMS(ap->vkav1pic_info.referenceNameSlotIndices); i++) - ap->vkav1pic_info.referenceNameSlotIndices[i] = name_slots[i]; - - ref_slot = (VkVideoReferenceSlotInfoKHR *)encode_info->pSetupReferenceSlot; - ref_slot->pNext = &ap->vkav1dpb_info; - - ap->av1dpb_info = (StdVideoEncodeAV1ReferenceInfo) { - .flags = (StdVideoEncodeAV1ReferenceInfoFlags) { - .disable_frame_end_update_cdf = 0, - .segmentation_enabled = 0, - /* Reserved */ - }, - .RefFrameId = ref_slot->slotIndex, - .frame_type = ap->av1pic_info.frame_type, - .OrderHint = pic->display_order - ap->last_idr_frame, - /* Reserved */ - .pExtensionHeader = &ap->ext_header, - }; - - ap->vkav1dpb_info = (VkVideoEncodeAV1DpbSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_DPB_SLOT_INFO_KHR, - .pStdReferenceInfo = &ap->av1dpb_info, - }; - - ap->units_needed = 0; - if (pic->type == FF_HW_PICTURE_TYPE_IDR) { - AVFrameSideData *sd = NULL; - if (enc->unit_elems & UNIT_MASTERING_DISPLAY) - sd = av_frame_get_side_data(pic->input_image, - AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); - if (sd) { - AVMasteringDisplayMetadata *mdm = - (AVMasteringDisplayMetadata *)sd->data; - if (mdm->has_primaries && mdm->has_luminance) { - AV1RawOBU *obu = &enc->meta_mastering_obu; - AV1RawMetadata *md = &obu->obu.metadata; - AV1RawMetadataHDRMDCV *mdcv = &md->metadata.hdr_mdcv; - const int chroma_den = 1 << 16; - const int max_luma_den = 1 << 8; - const int min_luma_den = 1 << 14; - - memset(obu, 0, sizeof(*obu)); - obu->header.obu_type = AV1_OBU_METADATA; - md->metadata_type = AV1_METADATA_TYPE_HDR_MDCV; - - for (int i = 0; i < 3; i++) { - mdcv->primary_chromaticity_x[i] = - av_rescale(mdm->display_primaries[i][0].num, chroma_den, - mdm->display_primaries[i][0].den); - mdcv->primary_chromaticity_y[i] = - av_rescale(mdm->display_primaries[i][1].num, chroma_den, - mdm->display_primaries[i][1].den); - } - - mdcv->white_point_chromaticity_x = - av_rescale(mdm->white_point[0].num, chroma_den, - mdm->white_point[0].den); - mdcv->white_point_chromaticity_y = - av_rescale(mdm->white_point[1].num, chroma_den, - mdm->white_point[1].den); - - mdcv->luminance_max = - av_rescale(mdm->max_luminance.num, max_luma_den, - mdm->max_luminance.den); - mdcv->luminance_min = - av_rescale(mdm->min_luminance.num, min_luma_den, - mdm->min_luminance.den); - ap->units_needed |= UNIT_MASTERING_DISPLAY; - } - } - - if (enc->unit_elems & UNIT_CONTENT_LIGHT_LEVEL) - sd = av_frame_get_side_data(pic->input_image, - AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); - if (sd) { - AVContentLightMetadata *cllm = (AVContentLightMetadata *)sd->data; - AV1RawOBU *obu = &enc->meta_cll_obu; - AV1RawMetadata *md = &obu->obu.metadata; - AV1RawMetadataHDRCLL *cll = &md->metadata.hdr_cll; - - memset(obu, 0, sizeof(*obu)); - obu->header.obu_type = AV1_OBU_METADATA; - md->metadata_type = AV1_METADATA_TYPE_HDR_CLL; - cll->max_cll = cllm->MaxCLL; - cll->max_fall = cllm->MaxFALL; - - ap->units_needed |= UNIT_CONTENT_LIGHT_LEVEL; - } - } - - return 0; -} - -static int init_profile(AVCodecContext *avctx, - VkVideoProfileInfoKHR *profile, void *pnext) -{ - VkResult ret; - VulkanEncodeAV1Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - - VkVideoEncodeAV1CapabilitiesKHR av1_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_CAPABILITIES_KHR, - }; - VkVideoEncodeCapabilitiesKHR enc_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR, - .pNext = &av1_caps, - }; - VkVideoCapabilitiesKHR caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR, - .pNext = &enc_caps, - }; - - /* In order of preference */ - int last_supported = AV_PROFILE_UNKNOWN; - static const int known_profiles[] = { - AV_PROFILE_AV1_MAIN, - AV_PROFILE_AV1_HIGH, - AV_PROFILE_AV1_PROFESSIONAL, - }; - int nb_profiles = FF_ARRAY_ELEMS(known_profiles); - - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->frames->sw_format); - if (!desc) - return AVERROR(EINVAL); - - if (s->frames->sw_format == AV_PIX_FMT_NV12 || - s->frames->sw_format == AV_PIX_FMT_P010) - nb_profiles = 1; - - enc->profile = (VkVideoEncodeAV1ProfileInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_PROFILE_INFO_KHR, - .pNext = pnext, - .stdProfile = ff_vk_av1_profile_to_vk(avctx->profile), - }; - profile->pNext = &enc->profile; - - /* Set level */ - if (avctx->level == AV_LEVEL_UNKNOWN) { - const AV1LevelDescriptor *level; - float framerate = 0.0; - - if (avctx->framerate.num > 0 && avctx->framerate.den > 0) - framerate = av_q2d(avctx->framerate); - - level = ff_av1_guess_level(avctx->bit_rate, enc->seq_tier, - base_ctx->surface_width, base_ctx->surface_height, - enc->tile_rows * enc->tile_cols, - enc->tile_cols, framerate); - if (level) { - av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name); - enc->seq_level_idx = ff_vk_av1_level_to_vk(level->level_idx); - } else { - av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to " - "any normal level, using level 7.3 by default.\n"); - enc->seq_level_idx = STD_VIDEO_AV1_LEVEL_7_3; - enc->seq_tier = 1; - } - } else { - enc->seq_level_idx = ff_vk_av1_level_to_vk(avctx->level); - } - - /* User has explicitly specified a profile. */ - if (avctx->profile != AV_PROFILE_UNKNOWN) - return 0; - - av_log(avctx, AV_LOG_DEBUG, "Supported profiles:\n"); - for (int i = 0; i < nb_profiles; i++) { - enc->profile.stdProfile = ff_vk_av1_profile_to_vk(known_profiles[i]); - ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, - profile, - &caps); - if (ret == VK_SUCCESS) { - av_log(avctx, AV_LOG_DEBUG, " %s\n", - avcodec_profile_name(avctx->codec_id, known_profiles[i])); - last_supported = known_profiles[i]; - } - } - - if (last_supported == AV_PROFILE_UNKNOWN) { - av_log(avctx, AV_LOG_ERROR, "No supported profiles for given format\n"); - return AVERROR(ENOTSUP); - } - - enc->profile.stdProfile = ff_vk_av1_profile_to_vk(last_supported); - av_log(avctx, AV_LOG_VERBOSE, "Using profile %s\n", - avcodec_profile_name(avctx->codec_id, last_supported)); - avctx->profile = last_supported; - - return 0; -} - -static int init_enc_options(AVCodecContext *avctx) -{ - VulkanEncodeAV1Context *enc = avctx->priv_data; - - if (avctx->rc_buffer_size) - enc->hrd_buffer_size = avctx->rc_buffer_size; - else if (avctx->rc_max_rate > 0) - enc->hrd_buffer_size = avctx->rc_max_rate; - else - enc->hrd_buffer_size = avctx->bit_rate; - - if (avctx->rc_initial_buffer_occupancy) { - if (avctx->rc_initial_buffer_occupancy > enc->hrd_buffer_size) { - av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: " - "must have initial buffer size (%d) <= " - "buffer size (%"PRId64").\n", - avctx->rc_initial_buffer_occupancy, enc->hrd_buffer_size); - return AVERROR(EINVAL); - } - enc->initial_buffer_fullness = avctx->rc_initial_buffer_occupancy; - } else { - enc->initial_buffer_fullness = enc->hrd_buffer_size * 3 / 4; - } - - if (enc->common.opts.rc_mode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { - enc->q_idx_p = av_clip(enc->common.opts.qp, - enc->caps.minQIndex, enc->caps.maxQIndex); - if (fabs(avctx->i_quant_factor) > 0.0) - enc->q_idx_idr = - av_clip((fabs(avctx->i_quant_factor) * enc->q_idx_p + - avctx->i_quant_offset) + 0.5, - 0, 255); - else - enc->q_idx_idr = enc->q_idx_p; - - if (fabs(avctx->b_quant_factor) > 0.0) - enc->q_idx_b = - av_clip((fabs(avctx->b_quant_factor) * enc->q_idx_p + - avctx->b_quant_offset) + 0.5, - 0, 255); - else - enc->q_idx_b = enc->q_idx_p; - } else { - /** Arbitrary value */ - enc->q_idx_idr = enc->q_idx_p = enc->q_idx_b = 128; - } - - return 0; -} - -static av_cold int init_sequence_headers(AVCodecContext *avctx) -{ - VulkanEncodeAV1Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - - AV1RawOBU *seq_obu = &enc->seq_hdr_obu; - AV1RawSequenceHeader *seq = &seq_obu->obu.sequence_header; - - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->frames->sw_format); - if (!desc) - return AVERROR(EINVAL); - - seq_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER; - *seq = (AV1RawSequenceHeader) { - .seq_profile = avctx->profile, - .seq_force_integer_mv = seq->seq_force_screen_content_tools ? - AV1_SELECT_SCREEN_CONTENT_TOOLS : - AV1_SELECT_INTEGER_MV, - .frame_width_bits_minus_1 = av_log2(base_ctx->surface_width), - .frame_height_bits_minus_1 = av_log2(base_ctx->surface_height), - .max_frame_width_minus_1 = base_ctx->surface_width - 1, - .max_frame_height_minus_1 = base_ctx->surface_height - 1, - .enable_order_hint = 1, - .order_hint_bits_minus_1 = av_clip_intp2(av_log2(ctx->base.gop_size), 3), - .use_128x128_superblock = !!(enc->caps.superblockSizes & VK_VIDEO_ENCODE_AV1_SUPERBLOCK_SIZE_128_BIT_KHR), - .color_config = (AV1RawColorConfig) { - .high_bitdepth = desc->comp[0].depth > 8, - .color_primaries = avctx->color_primaries, - .transfer_characteristics = avctx->color_trc, - .matrix_coefficients = avctx->colorspace, - .color_description_present_flag = (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED || - avctx->color_trc != AVCOL_TRC_UNSPECIFIED || - avctx->colorspace != AVCOL_SPC_UNSPECIFIED), - .subsampling_x = desc->log2_chroma_w, - .subsampling_y = desc->log2_chroma_h, - .chroma_sample_position = avctx->chroma_sample_location == AVCHROMA_LOC_LEFT ? - AV1_CSP_VERTICAL : - avctx->chroma_sample_location == AVCHROMA_LOC_TOPLEFT ? - AV1_CSP_COLOCATED : - AV1_CSP_UNKNOWN, - }, - - /* Operating point */ - .seq_tier = { enc->seq_tier }, - .seq_level_idx = { enc->seq_level_idx }, - .decoder_buffer_delay = { base_ctx->decode_delay }, - .encoder_buffer_delay = { base_ctx->output_delay }, - .operating_points_cnt_minus_1 = 1 - 1, - }; - - return 0; -} - -typedef struct VulkanAV1Units { - StdVideoAV1SequenceHeader seq_hdr; - StdVideoAV1TimingInfo timing_info; - StdVideoAV1ColorConfig color_config; - - StdVideoEncodeAV1DecoderModelInfo decoder_model; - StdVideoEncodeAV1OperatingPointInfo operating_points[AV1_MAX_OPERATING_POINTS]; - int nb_operating_points; -} VulkanAV1Units; - -static av_cold int base_unit_to_vk(AVCodecContext *avctx, VulkanAV1Units *vk_units) -{ - VulkanEncodeAV1Context *enc = avctx->priv_data; - - AV1RawOBU *seq_obu = &enc->seq_hdr_obu; - AV1RawSequenceHeader *seq = &seq_obu->obu.sequence_header; - - StdVideoAV1SequenceHeader *seq_hdr = &vk_units->seq_hdr; - StdVideoAV1TimingInfo *timing_info = &vk_units->timing_info; - StdVideoAV1ColorConfig *color_config = &vk_units->color_config; - - StdVideoEncodeAV1OperatingPointInfo *operating_points = vk_units->operating_points; - - *timing_info = (StdVideoAV1TimingInfo) { - .flags = (StdVideoAV1TimingInfoFlags) { - .equal_picture_interval = seq->timing_info.equal_picture_interval, - }, - .num_units_in_display_tick = seq->timing_info.num_units_in_display_tick, - .time_scale = seq->timing_info.time_scale, - .num_ticks_per_picture_minus_1 = seq->timing_info.num_ticks_per_picture_minus_1, - }; - - *color_config = (StdVideoAV1ColorConfig) { - .flags = (StdVideoAV1ColorConfigFlags) { - .mono_chrome = seq->color_config.mono_chrome, - .color_range = seq->color_config.color_range, - .separate_uv_delta_q = seq->color_config.separate_uv_delta_q, - }, - .BitDepth = seq->color_config.twelve_bit ? 12 : - seq->color_config.high_bitdepth ? 10 : 8, - .subsampling_x = seq->color_config.subsampling_x, - .subsampling_y = seq->color_config.subsampling_y, - .color_primaries = seq->color_config.color_primaries, - .transfer_characteristics = seq->color_config.transfer_characteristics, - .matrix_coefficients = seq->color_config.matrix_coefficients, - }; - - *seq_hdr = (StdVideoAV1SequenceHeader) { - .flags = (StdVideoAV1SequenceHeaderFlags) { - .still_picture = seq->still_picture, - .reduced_still_picture_header = seq->reduced_still_picture_header, - .use_128x128_superblock = seq->use_128x128_superblock, - .enable_filter_intra = seq->enable_filter_intra, - .enable_intra_edge_filter = seq->enable_intra_edge_filter, - .enable_interintra_compound = seq->enable_interintra_compound, - .enable_masked_compound = seq->enable_masked_compound, - .enable_warped_motion = seq->enable_warped_motion, - .enable_dual_filter = seq->enable_dual_filter, - .enable_order_hint = seq->enable_order_hint, - .enable_jnt_comp = seq->enable_jnt_comp, - .enable_ref_frame_mvs = seq->enable_ref_frame_mvs, - .frame_id_numbers_present_flag = seq->frame_id_numbers_present_flag, - .enable_superres = seq->enable_superres, - .enable_cdef = seq->enable_cdef, - .enable_restoration = seq->enable_restoration, - .film_grain_params_present = seq->film_grain_params_present, - .timing_info_present_flag = seq->timing_info_present_flag, - .initial_display_delay_present_flag = seq->initial_display_delay_present_flag, - }, - .seq_profile = seq->seq_profile, - .frame_width_bits_minus_1 = seq->frame_width_bits_minus_1, - .frame_height_bits_minus_1 = seq->frame_height_bits_minus_1, - .max_frame_width_minus_1 = seq->max_frame_width_minus_1, - .max_frame_height_minus_1 = seq->max_frame_height_minus_1, - .delta_frame_id_length_minus_2 = seq->delta_frame_id_length_minus_2, - .additional_frame_id_length_minus_1 = seq->additional_frame_id_length_minus_1, - .order_hint_bits_minus_1 = seq->order_hint_bits_minus_1, - .seq_force_integer_mv = seq->seq_force_integer_mv, - .seq_force_screen_content_tools = seq->seq_force_screen_content_tools, - .pTimingInfo = timing_info, - .pColorConfig = color_config, - }; - - for (int i = 0; i <= seq->operating_points_cnt_minus_1; i++) { - operating_points[i] = (StdVideoEncodeAV1OperatingPointInfo) { - .flags = (StdVideoEncodeAV1OperatingPointInfoFlags) { - .decoder_model_present_for_this_op = seq->decoder_model_present_for_this_op[i], - .low_delay_mode_flag = seq->low_delay_mode_flag[i], - .initial_display_delay_present_for_this_op = seq->initial_display_delay_present_for_this_op[i], - /* Reserved */ - }, - .operating_point_idc = seq->operating_point_idc[i], - .seq_level_idx = seq->seq_level_idx[i], - .seq_tier = seq->seq_tier[i], - .decoder_buffer_delay = seq->decoder_buffer_delay[i], - .encoder_buffer_delay = seq->encoder_buffer_delay[i], - .initial_display_delay_minus_1 = seq->initial_display_delay_minus_1[i], - }; - } - vk_units->nb_operating_points = seq->operating_points_cnt_minus_1 + 1; - - return 0; -} - -static int create_session_params(AVCodecContext *avctx) -{ - int err; - VulkanEncodeAV1Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - VulkanAV1Units vk_units = { 0 }; - - VkVideoEncodeAV1SessionParametersCreateInfoKHR av1_params; - - /* Convert it to Vulkan */ - err = base_unit_to_vk(avctx, &vk_units); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to convert sequence header to Vulkan: %s\n", - av_err2str(err)); - return err; - } - - /* Destroy the session params */ - if (ctx->session_params) - vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, - ctx->session_params, - s->hwctx->alloc); - - av1_params = (VkVideoEncodeAV1SessionParametersCreateInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pStdSequenceHeader = &vk_units.seq_hdr, - .pStdDecoderModelInfo = &vk_units.decoder_model, - .pStdOperatingPoints = vk_units.operating_points, - .stdOperatingPointCount = vk_units.nb_operating_points, - }; - - return ff_vulkan_encode_create_session_params(avctx, ctx, &av1_params); -} - -static int parse_feedback_units(AVCodecContext *avctx, - const uint8_t *data, size_t size) -{ - int err; - VulkanEncodeAV1Context *enc = avctx->priv_data; - AV1RawOBU *seq_obu = &enc->seq_hdr_obu; - AV1RawSequenceHeader *seq = &seq_obu->obu.sequence_header; - - CodedBitstreamContext *cbs; - CodedBitstreamFragment obu = { 0 }; - - err = ff_cbs_init(&cbs, AV_CODEC_ID_AV1, avctx); - if (err < 0) - return err; - - err = ff_cbs_read(cbs, &obu, NULL, data, size); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to parse feedback units, bad drivers: %s\n", - av_err2str(err)); - return err; - } - - /* If PPS has an override, just copy it entirely. */ - for (int i = 0; i < obu.nb_units; i++) { - if (obu.units[i].type == AV1_OBU_SEQUENCE_HEADER) { - AV1RawOBU *f_seq_obu = obu.units[i].content; - AV1RawSequenceHeader *f_seq = &f_seq_obu->obu.sequence_header; - seq->frame_width_bits_minus_1 = f_seq->frame_width_bits_minus_1; - seq->frame_height_bits_minus_1 = f_seq->frame_height_bits_minus_1; - seq->max_frame_width_minus_1 = f_seq->max_frame_width_minus_1; - seq->max_frame_height_minus_1 = f_seq->max_frame_height_minus_1; - seq->seq_choose_screen_content_tools = f_seq->seq_choose_screen_content_tools; - seq->seq_force_screen_content_tools = f_seq->seq_force_screen_content_tools; - seq->seq_choose_integer_mv = f_seq->seq_choose_integer_mv; - seq->seq_force_integer_mv = f_seq->seq_force_integer_mv; - } - } - - ff_cbs_fragment_free(&obu); - ff_cbs_close(&cbs); - - return 0; -} - -static int init_base_units(AVCodecContext *avctx) -{ - int err; - VkResult ret; - VulkanEncodeAV1Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - VkVideoEncodeSessionParametersGetInfoKHR params_info; - VkVideoEncodeSessionParametersFeedbackInfoKHR params_feedback; - - void *data = NULL; - size_t data_size = 0; - - /* Generate SPS/PPS unit info */ - err = init_sequence_headers(avctx); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize sequence header: %s\n", - av_err2str(err)); - return err; - } - - /* Create session parameters from them */ - err = create_session_params(avctx); - if (err < 0) - return err; - - params_info = (VkVideoEncodeSessionParametersGetInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_GET_INFO_KHR, - .videoSessionParameters = ctx->session_params, - }; - params_feedback = (VkVideoEncodeSessionParametersFeedbackInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, - }; - - ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, - ¶ms_feedback, - &data_size, data); - if (ret == VK_INCOMPLETE || - (ret == VK_SUCCESS) && (data_size > 0)) { - data = av_mallocz(data_size); - if (!data) - return AVERROR(ENOMEM); - } else { - av_log(avctx, AV_LOG_ERROR, "Unable to get feedback for AV1 sequence header = %"SIZE_SPECIFIER"\n", - data_size); - return err; - } - - ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, - ¶ms_feedback, - &data_size, data); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Error writing feedback units\n"); - return err; - } - - av_log(avctx, AV_LOG_VERBOSE, "Feedback units written, overrides: %i\n", - params_feedback.hasOverrides); - - params_feedback.hasOverrides = 1; - - /* No need to sync any overrides */ - if (!params_feedback.hasOverrides) - return 0; - - /* Parse back tne units and override */ - err = parse_feedback_units(avctx, data, data_size); - if (err < 0) - return err; - - /* Create final session parameters */ - err = create_session_params(avctx); - if (err < 0) - return err; - - return 0; -} - -static int vulkan_encode_av1_add_obu(AVCodecContext *avctx, - CodedBitstreamFragment *au, - uint8_t type, void *obu_unit) -{ - int err; - - err = ff_cbs_insert_unit_content(au, -1, - type, obu_unit, NULL); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: " - "type = %d.\n", type); - return err; - } - - return err; -} - -static int vulkan_encode_av1_write_obu(AVCodecContext *avctx, - uint8_t *data, size_t *data_len, - CodedBitstreamFragment *obu) -{ - VulkanEncodeAV1Context *enc = avctx->priv_data; - int ret; - - ret = ff_cbs_write_fragment_data(enc->cbs, obu); - if (ret < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n"); - return ret; - } - - memcpy(data, obu->data, obu->data_size); - *data_len = obu->data_size; - - return 0; -} - -static int write_sequence_header(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeAV1Context *enc = avctx->priv_data; - CodedBitstreamFragment *obu = &enc->current_access_unit; - - err = vulkan_encode_av1_add_obu(avctx, obu, - AV1_OBU_SEQUENCE_HEADER, &enc->seq_hdr_obu); - if (err < 0) - goto fail; - - err = vulkan_encode_av1_write_obu(avctx, data, data_len, obu); - -fail: - ff_cbs_fragment_reset(obu); - return err; -} - -static int write_extra_headers(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeAV1Context *enc = avctx->priv_data; - VulkanEncodeAV1Picture *ap = base_pic->codec_priv; - CodedBitstreamFragment *obu = &enc->current_access_unit; - - if (ap->units_needed & AV_FRAME_DATA_MASTERING_DISPLAY_METADATA) { - err = vulkan_encode_av1_add_obu(avctx, obu, - AV1_OBU_METADATA, - &enc->meta_mastering_obu); - if (err < 0) - goto fail; - } - - if (ap->units_needed & UNIT_CONTENT_LIGHT_LEVEL) { - err = vulkan_encode_av1_add_obu(avctx, obu, - AV1_OBU_METADATA, - &enc->meta_cll_obu); - if (err < 0) - goto fail; - } - - if (ap->units_needed) { - err = vulkan_encode_av1_write_obu(avctx, data, data_len, obu); - if (err < 0) - goto fail; - } else { - err = 0; - *data_len = 0; - } - -fail: - ff_cbs_fragment_reset(obu); - return err; -} - -static int write_padding(AVCodecContext *avctx, uint32_t padding, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeAV1Context *enc = avctx->priv_data; - CodedBitstreamFragment *obu = &enc->current_access_unit; - - AV1RawOBU padding_obu = { 0 }; - AV1RawPadding *raw_padding = &padding_obu.obu.padding; - - if (!padding) - padding = 16; - - /* 2 byte header + 1 byte trailing bits */ - padding_obu.header.obu_type = AV1_OBU_PADDING; - *raw_padding = (AV1RawPadding) { - .payload = enc->padding_payload, - .payload_size = padding, - }; - - err = vulkan_encode_av1_add_obu(avctx, obu, AV1_OBU_PADDING, &padding_obu); - if (err < 0) - goto fail; - - err = vulkan_encode_av1_write_obu(avctx, data, data_len, obu); -fail: - ff_cbs_fragment_reset(obu); - return err; -} - -static const FFVulkanCodec enc_cb = { - .flags = FF_HW_FLAG_B_PICTURES | - FF_HW_FLAG_B_PICTURE_REFERENCES | - VK_ENC_FLAG_NO_DELAY | - FF_HW_FLAG_SLICE_CONTROL, - .picture_priv_data_size = sizeof(VulkanEncodeAV1Picture), - .filler_header_size = 4, - .init_profile = init_profile, - .init_pic_rc = init_pic_rc, - .init_pic_params = init_pic_params, - .write_sequence_headers = write_sequence_header, - .write_extra_headers = write_extra_headers, - .write_filler = write_padding, -}; - -static av_cold int vulkan_encode_av1_init(AVCodecContext *avctx) -{ - int err; - VulkanEncodeAV1Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - int flags; - - if (avctx->profile == AV_PROFILE_UNKNOWN) - avctx->profile = enc->common.opts.profile; - - enc->caps = (VkVideoEncodeAV1CapabilitiesKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_CAPABILITIES_KHR, - }; - - enc->quality_props = (VkVideoEncodeAV1QualityLevelPropertiesKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_QUALITY_LEVEL_PROPERTIES_KHR, - }; - - err = ff_vulkan_encode_init(avctx, &enc->common, - &ff_vk_enc_av1_desc, &enc_cb, - &enc->caps, &enc->quality_props); - if (err < 0) - return err; - - av_log(avctx, AV_LOG_VERBOSE, "AV1 encoder capabilities:\n"); - av_log(avctx, AV_LOG_VERBOSE, " Standard capability flags:\n"); - av_log(avctx, AV_LOG_VERBOSE, " per_rate_control_group_min_max_q_index: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_PER_RATE_CONTROL_GROUP_MIN_MAX_Q_INDEX_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " generate_obu_extension_header: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_GENERATE_OBU_EXTENSION_HEADER_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " primary_reference_cdf_only: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_PRIMARY_REFERENCE_CDF_ONLY_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " frame_size_override: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_FRAME_SIZE_OVERRIDE_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " motion_vector_scaling: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_MOTION_VECTOR_SCALING_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " Capabilities:\n"); - av_log(avctx, AV_LOG_VERBOSE, " 64x64 superblocks: %i\n", - !!(enc->caps.superblockSizes & VK_VIDEO_ENCODE_AV1_SUPERBLOCK_SIZE_64_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " 128x128 superblocks: %i\n", - !!(enc->caps.superblockSizes & VK_VIDEO_ENCODE_AV1_SUPERBLOCK_SIZE_128_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " maxSingleReferenceCount: %i\n", - enc->caps.maxSingleReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " singleReferenceNameMask: 0x%x\n", - enc->caps.singleReferenceNameMask); - av_log(avctx, AV_LOG_VERBOSE, " maxUnidirectionalCompoundReferenceCount: %i\n", - enc->caps.maxUnidirectionalCompoundReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " maxUnidirectionalCompoundGroup1ReferenceCount: %i\n", - enc->caps.maxUnidirectionalCompoundGroup1ReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " unidirectionalCompoundReferenceNameMask: 0x%x\n", - enc->caps.unidirectionalCompoundReferenceNameMask); - av_log(avctx, AV_LOG_VERBOSE, " maxBidirectionalCompoundReferenceCount: %i\n", - enc->caps.maxBidirectionalCompoundReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " maxBidirectionalCompoundGroup1ReferenceCount: %i\n", - enc->caps.maxBidirectionalCompoundGroup1ReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " maxBidirectionalCompoundGroup2ReferenceCount: %i\n", - enc->caps.maxBidirectionalCompoundGroup2ReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " bidirectionalCompoundReferenceNameMask: 0x%x\n", - enc->caps.bidirectionalCompoundReferenceNameMask); - av_log(avctx, AV_LOG_VERBOSE, " maxTemporalLayerCount: %i\n", - enc->caps.maxTemporalLayerCount); - av_log(avctx, AV_LOG_VERBOSE, " maxSpatialLayerCount: %i\n", - enc->caps.maxSpatialLayerCount); - av_log(avctx, AV_LOG_VERBOSE, " maxOperatingPoints: %i\n", - enc->caps.maxOperatingPoints); - av_log(avctx, AV_LOG_VERBOSE, " min/max Qindex: [%i, %i]\n", - enc->caps.minQIndex, enc->caps.maxQIndex); - av_log(avctx, AV_LOG_VERBOSE, " prefersGopRemainingFrames: %i\n", - enc->caps.prefersGopRemainingFrames); - av_log(avctx, AV_LOG_VERBOSE, " requiresGopRemainingFrames: %i\n", - enc->caps.requiresGopRemainingFrames); - av_log(avctx, AV_LOG_VERBOSE, " maxLevel: %i\n", - enc->caps.maxLevel); - av_log(avctx, AV_LOG_VERBOSE, " codedPictureAlignment: %ix%i\n", - enc->caps.codedPictureAlignment.width, enc->caps.codedPictureAlignment.height); - av_log(avctx, AV_LOG_VERBOSE, " maxTiles: %ix%i\n", - enc->caps.maxTiles.width, enc->caps.maxTiles.height); - av_log(avctx, AV_LOG_VERBOSE, " Tile size: %ix%i to %ix%i\n", - enc->caps.minTileSize.width, enc->caps.minTileSize.height, - enc->caps.maxTileSize.width, enc->caps.maxTileSize.height); - - err = init_enc_options(avctx); - if (err < 0) - return err; - - flags = ctx->codec->flags; - err = ff_hw_base_init_gop_structure(base_ctx, avctx, - ctx->caps.maxDpbSlots, - enc->caps.maxBidirectionalCompoundReferenceCount, - flags, 0); - if (err < 0) - return err; - - base_ctx->output_delay = base_ctx->b_per_p; - base_ctx->decode_delay = base_ctx->max_b_depth; - - /* Create units and session parameters */ - err = init_base_units(avctx); - if (err < 0) - return err; - - /* Init CBS */ - err = ff_cbs_init(&enc->cbs, AV_CODEC_ID_AV1, avctx); - if (err < 0) - return err; - - if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { - uint8_t data[4096]; - size_t data_len = sizeof(data); - - err = write_sequence_header(avctx, NULL, data, &data_len); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to write sequence header " - "for extradata: %d.\n", err); - return err; - } else { - avctx->extradata_size = data_len; - avctx->extradata = av_mallocz(avctx->extradata_size + - AV_INPUT_BUFFER_PADDING_SIZE); - if (!avctx->extradata) { - err = AVERROR(ENOMEM); - return err; - } - memcpy(avctx->extradata, data, avctx->extradata_size); - } - } - - enc->padding_payload = av_mallocz(2*ctx->caps.minBitstreamBufferOffsetAlignment); - if (!enc->padding_payload) - return AVERROR(ENOMEM); - - memset(enc->padding_payload, 0xaa, 2*ctx->caps.minBitstreamBufferOffsetAlignment); - - return 0; -} - -static av_cold int vulkan_encode_av1_close(AVCodecContext *avctx) -{ - VulkanEncodeAV1Context *enc = avctx->priv_data; - av_free(enc->padding_payload); - ff_vulkan_encode_uninit(&enc->common); - return 0; -} - -#define OFFSET(x) offsetof(VulkanEncodeAV1Context, x) -#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) -static const AVOption vulkan_encode_av1_options[] = { - HW_BASE_ENCODE_COMMON_OPTIONS, - VULKAN_ENCODE_COMMON_OPTIONS, - - { "profile", "Set profile", - OFFSET(common.opts.profile), AV_OPT_TYPE_INT, - { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, 0xffff, FLAGS, .unit = "profile" }, - -#define PROFILE(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ - { .i64 = value }, 0, 0, FLAGS, .unit = "profile" - { PROFILE("main", AV_PROFILE_AV1_MAIN) }, - { PROFILE("high", AV_PROFILE_AV1_HIGH) }, - { PROFILE("professional", AV_PROFILE_AV1_PROFESSIONAL) }, -#undef PROFILE - - { "tier", "Set tier (seq_tier)", - OFFSET(common.opts.tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, .unit = "tier" }, - { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, .unit = "tier" }, - { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "tier" }, - - { "level", "Set level (level_idc)", - OFFSET(common.opts.level), AV_OPT_TYPE_INT, - { .i64 = AV_LEVEL_UNKNOWN }, AV_LEVEL_UNKNOWN, 0xff, FLAGS, .unit = "level" }, - -#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ - { .i64 = value }, 0, 0, FLAGS, .unit = "level" - { LEVEL("20", 0) }, - { LEVEL("21", 1) }, - { LEVEL("22", 2) }, - { LEVEL("23", 3) }, - { LEVEL("30", 4) }, - { LEVEL("31", 5) }, - { LEVEL("32", 6) }, - { LEVEL("33", 7) }, - { LEVEL("40", 8) }, - { LEVEL("41", 9) }, - { LEVEL("42", 10) }, - { LEVEL("43", 11) }, - { LEVEL("50", 12) }, - { LEVEL("51", 13) }, - { LEVEL("52", 14) }, - { LEVEL("53", 15) }, - { LEVEL("60", 16) }, - { LEVEL("61", 17) }, - { LEVEL("62", 18) }, - { LEVEL("63", 19) }, - { LEVEL("70", 20) }, - { LEVEL("71", 21) }, - { LEVEL("72", 22) }, - { LEVEL("73", 23) }, -#undef LEVEL - - { "units", "Set units to include", OFFSET(unit_elems), AV_OPT_TYPE_FLAGS, { .i64 = UNIT_MASTERING_DISPLAY | UNIT_CONTENT_LIGHT_LEVEL }, 0, INT_MAX, FLAGS, "units" }, - { "hdr", "Include HDR metadata for mastering display colour volume and content light level information", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_MASTERING_DISPLAY | UNIT_CONTENT_LIGHT_LEVEL }, INT_MIN, INT_MAX, FLAGS, "units" }, - - { NULL }, -}; - -static const FFCodecDefault vulkan_encode_av1_defaults[] = { - { "b", "0" }, - { "bf", "2" }, - { "g", "300" }, - { "qmin", "1" }, - { "qmax", "255" }, - { "refs", "0" }, - { NULL }, -}; - -static const AVClass vulkan_encode_av1_class = { - .class_name = "av1_vulkan", - .item_name = av_default_item_name, - .option = vulkan_encode_av1_options, - .version = LIBAVUTIL_VERSION_INT, -}; - -const FFCodec ff_av1_vulkan_encoder = { - .p.name = "av1_vulkan", - CODEC_LONG_NAME("AV1 (Vulkan)"), - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_AV1, - .priv_data_size = sizeof(VulkanEncodeAV1Context), - .init = &vulkan_encode_av1_init, - FF_CODEC_RECEIVE_PACKET_CB(&ff_vulkan_encode_receive_packet), - .close = &vulkan_encode_av1_close, - .p.priv_class = &vulkan_encode_av1_class, - .p.capabilities = AV_CODEC_CAP_DELAY | - AV_CODEC_CAP_HARDWARE | - AV_CODEC_CAP_DR1 | - AV_CODEC_CAP_ENCODER_FLUSH | - AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, - .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, - .defaults = vulkan_encode_av1_defaults, - CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), - .hw_configs = ff_vulkan_encode_hw_configs, - .p.wrapper_name = "vulkan", -}; -- 2.49.1 From 9711a91b3b9f7b3693a0faa62e22d98ba3d2a717 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:33:07 +0000 Subject: [PATCH 074/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_encode_av1.c | 1401 +++++++++++++++++++++++++ 1 file changed, 1401 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_encode_av1.c diff --git a/libavcodec/vulkan/vulkan_encode_av1.c b/libavcodec/vulkan/vulkan_encode_av1.c new file mode 100644 index 0000000000..a2e4b575bb --- /dev/null +++ b/libavcodec/vulkan/vulkan_encode_av1.c @@ -0,0 +1,1401 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "libavutil/mem.h" + +#include "libavcodec/cbs.h" +#include "libavcodec/cbs_av1.h" +#include "libavcodec/av1_levels.h" +#include "libavutil/mastering_display_metadata.h" + +#include "libavcodec/codec_internal.h" +#include "vulkan_encode.h" + +#include "libavutil/avassert.h" + +const FFVulkanEncodeDescriptor ff_vk_enc_av1_desc = { + .codec_id = AV_CODEC_ID_AV1, + .encode_extension = FF_VK_EXT_VIDEO_ENCODE_AV1, + .encode_op = VK_VIDEO_CODEC_OPERATION_ENCODE_AV1_BIT_KHR, + .ext_props = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_ENCODE_SPEC_VERSION, + }, +}; + +enum UnitElems { + UNIT_MASTERING_DISPLAY = 1 << 0, + UNIT_CONTENT_LIGHT_LEVEL = 1 << 1, +}; + +typedef struct VulkanEncodeAV1Picture { + int slot; + int64_t last_idr_frame; + + enum UnitElems units_needed; + + StdVideoAV1TileInfo tile_info; + StdVideoAV1Quantization quantization; + StdVideoAV1Segmentation segmentation; + StdVideoAV1LoopFilter loop_filter; + StdVideoAV1CDEF cdef; + StdVideoAV1LoopRestoration loop_restoration; + StdVideoAV1GlobalMotion global_motion; + + StdVideoEncodeAV1PictureInfo av1pic_info; + VkVideoEncodeAV1PictureInfoKHR vkav1pic_info; + + StdVideoEncodeAV1ExtensionHeader ext_header; + StdVideoEncodeAV1ReferenceInfo av1dpb_info; + VkVideoEncodeAV1DpbSlotInfoKHR vkav1dpb_info; + + VkVideoEncodeAV1RateControlInfoKHR vkrc_info; + VkVideoEncodeAV1RateControlLayerInfoKHR vkrc_layer_info; + VkVideoEncodeAV1GopRemainingFrameInfoKHR vkrc_remaining; +} VulkanEncodeAV1Picture; + +typedef struct VulkanEncodeAV1Context { + FFVulkanEncodeContext common; + + CodedBitstreamContext *cbs; + CodedBitstreamFragment current_access_unit; + + enum UnitElems unit_elems; + AV1RawOBU seq_hdr_obu; + AV1RawOBU meta_cll_obu; + AV1RawOBU meta_mastering_obu; + + VkVideoEncodeAV1ProfileInfoKHR profile; + + VkVideoEncodeAV1CapabilitiesKHR caps; + VkVideoEncodeAV1QualityLevelPropertiesKHR quality_props; + + uint64_t hrd_buffer_size; + uint64_t initial_buffer_fullness; + + int uniform_tile; + int tile_cols; + int tile_rows; + + int seq_tier; + int seq_level_idx; + + int q_idx_idr; + int q_idx_p; + int q_idx_b; + + uint8_t *padding_payload; +} VulkanEncodeAV1Context; + +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeRateControlInfoKHR *rc_info, + VkVideoEncodeRateControlLayerInfoKHR *rc_layer) +{ + VulkanEncodeAV1Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + VulkanEncodeAV1Picture *ap = pic->codec_priv; + + /* This can be easy to calculate */ + ap->vkrc_remaining = (VkVideoEncodeAV1GopRemainingFrameInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_GOP_REMAINING_FRAME_INFO_KHR, + .useGopRemainingFrames = 0, + .gopRemainingIntra = 0, + .gopRemainingPredictive = 0, + .gopRemainingBipredictive = 0, + }; + + ap->vkrc_info = (VkVideoEncodeAV1RateControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_RATE_CONTROL_INFO_KHR, + .flags = VK_VIDEO_ENCODE_AV1_RATE_CONTROL_REFERENCE_PATTERN_FLAT_BIT_KHR | + VK_VIDEO_ENCODE_AV1_RATE_CONTROL_REGULAR_GOP_BIT_KHR, + .gopFrameCount = ctx->base.gop_size, + .keyFramePeriod = ctx->base.gop_size, + .consecutiveBipredictiveFrameCount = FFMAX(ctx->base.b_per_p - 1, 0), + .temporalLayerCount = 0, + }; + rc_info->pNext = &ap->vkrc_info; + + if (rc_info->rateControlMode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { + rc_info->virtualBufferSizeInMs = (enc->hrd_buffer_size * 1000LL) / avctx->bit_rate; + rc_info->initialVirtualBufferSizeInMs = (enc->initial_buffer_fullness * 1000LL) / avctx->bit_rate; + + ap->vkrc_layer_info = (VkVideoEncodeAV1RateControlLayerInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_RATE_CONTROL_LAYER_INFO_KHR, + + .useMinQIndex = avctx->qmin > 0, + .minQIndex.intraQIndex = avctx->qmin > 0 ? avctx->qmin : 0, + .minQIndex.predictiveQIndex = avctx->qmin > 0 ? avctx->qmin : 0, + .minQIndex.bipredictiveQIndex = avctx->qmin > 0 ? avctx->qmin : 0, + + .useMaxQIndex = avctx->qmax > 0, + .maxQIndex.intraQIndex = avctx->qmax > 0 ? avctx->qmax : 0, + .maxQIndex.predictiveQIndex = avctx->qmax > 0 ? avctx->qmax : 0, + .maxQIndex.bipredictiveQIndex = avctx->qmax > 0 ? avctx->qmax : 0, + + .useMaxFrameSize = 0, + }; + rc_layer->pNext = &ap->vkrc_layer_info; + ap->vkrc_info.temporalLayerCount = 1; + } + + return 0; +} + +static void set_name_slot(int slot, int *slot_indices, uint32_t allowed_idx, int group) +{ + int from = group ? AV1_REF_FRAME_GOLDEN : 0; + int to = group ? AV1_REFS_PER_FRAME : AV1_REF_FRAME_GOLDEN; + + for (int i = from; i < to; i++) { + if ((slot_indices[i] == -1) && (allowed_idx & (1 << i))) { + slot_indices[i] = slot; + return; + } + } + + av_assert0(0); +} + + +static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeInfoKHR *encode_info) +{ + VulkanEncodeAV1Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + + VulkanEncodeAV1Picture *ap = pic->codec_priv; + FFHWBaseEncodePicture *ref; + VulkanEncodeAV1Picture *ap_ref; + VkVideoReferenceSlotInfoKHR *ref_slot; + + uint32_t ref_name_mask = 0x0; + int name_slots[STD_VIDEO_AV1_REFS_PER_FRAME]; + + StdVideoAV1Segmentation *segmentation = &ap->segmentation; + StdVideoAV1LoopFilter *loop_filter = &ap->loop_filter; + StdVideoAV1Quantization *quantization = &ap->quantization; + StdVideoAV1CDEF *cdef = &ap->cdef; + StdVideoAV1LoopRestoration *loop_restoration = &ap->loop_restoration; + StdVideoAV1GlobalMotion *global_motion = &ap->global_motion; + StdVideoAV1TileInfo *tile_info = &ap->tile_info; + static const int8_t default_loop_filter_ref_deltas[STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME] = + { 1, 0, 0, 0, -1, 0, -1, -1 }; + + VkVideoEncodeAV1PredictionModeKHR pred_mode; + VkVideoEncodeAV1RateControlGroupKHR rc_group; + int lr_unit_shift = 0; + int lr_uv_shift = 0; + + ap->ext_header = (StdVideoEncodeAV1ExtensionHeader) { + .temporal_id = 0, + .spatial_id = 0, + }; + + *tile_info = (StdVideoAV1TileInfo) { + .flags = (StdVideoAV1TileInfoFlags) { + .uniform_tile_spacing_flag = enc->uniform_tile, + }, + .TileCols = enc->tile_cols, + .TileRows = enc->tile_rows, + .context_update_tile_id = 0, + .tile_size_bytes_minus_1 = 0, + }; + + for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) { + global_motion->GmType[i] = 0; + for (int j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; j++) { + global_motion->gm_params[i][j] = 0; + } + } + + for (int i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; i++) + name_slots[i] = -1; + + *loop_restoration = (StdVideoAV1LoopRestoration) { + .FrameRestorationType[0] = STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_NONE, + .FrameRestorationType[1] = STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_NONE, + .FrameRestorationType[2] = STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_NONE, + .LoopRestorationSize[0] = 1 + lr_unit_shift, + .LoopRestorationSize[1] = 1 + lr_unit_shift - lr_uv_shift, + .LoopRestorationSize[2] = 1 + lr_unit_shift - lr_uv_shift, + }; + + *cdef = (StdVideoAV1CDEF) { + .cdef_damping_minus_3 = 0, + .cdef_bits = 0, + }; + + for (int i = 0; i < STD_VIDEO_AV1_MAX_SEGMENTS; i++) { + segmentation->FeatureEnabled[i] = 0x0; + for (int j = 0; j < STD_VIDEO_AV1_SEG_LVL_MAX; j++) { + segmentation->FeatureEnabled[i] |= 0x0; + segmentation->FeatureData[i][j] = 0; + } + } + + *loop_filter = (StdVideoAV1LoopFilter) { + .flags = (StdVideoAV1LoopFilterFlags) { + .loop_filter_delta_enabled = 0, + .loop_filter_delta_update = 0, + }, + .loop_filter_level = { 0 }, + .loop_filter_sharpness = 0, + .update_ref_delta = 0, + .loop_filter_ref_deltas = { 0 }, + .update_mode_delta = 0, + .loop_filter_mode_deltas = { 0 }, + }; + loop_filter->update_mode_delta = 1; + memcpy(loop_filter->loop_filter_ref_deltas, default_loop_filter_ref_deltas, + STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME * sizeof(int8_t)); + + *quantization = (StdVideoAV1Quantization) { + .flags = (StdVideoAV1QuantizationFlags) { + .using_qmatrix = 0, + .diff_uv_delta = 0, + /* Reserved */ + }, + .base_q_idx = 0, /* Set later */ + .DeltaQYDc = 0, + .DeltaQUDc = 0, + .DeltaQUAc = 0, + .DeltaQVDc = 0, + .DeltaQVAc = 0, + .qm_y = 0, + .qm_u = 0, + .qm_v = 0, + }; + + ref_slot = (VkVideoReferenceSlotInfoKHR *)encode_info->pSetupReferenceSlot; + ap->av1pic_info = (StdVideoEncodeAV1PictureInfo) { + .flags = (StdVideoEncodeAV1PictureInfoFlags) { + .error_resilient_mode = (pic->type == FF_HW_PICTURE_TYPE_I || + pic->type == FF_HW_PICTURE_TYPE_IDR) && + (pic->display_order <= pic->encode_order), + .disable_cdf_update = 0, + .use_superres = 0, + .render_and_frame_size_different = 0, + .allow_screen_content_tools = 0, + .is_filter_switchable = 0, + .force_integer_mv = 0, + .frame_size_override_flag = 0, + .buffer_removal_time_present_flag = 0, + .allow_intrabc = 0, + .frame_refs_short_signaling = 0, + .allow_high_precision_mv = 0, + .is_motion_mode_switchable = 0, + .use_ref_frame_mvs = 0, + .disable_frame_end_update_cdf = 0, + .allow_warped_motion = 0, + .reduced_tx_set = 0, + .skip_mode_present = 0, + .delta_q_present = 0, + .delta_lf_present = 0, + .delta_lf_multi = 0, + .segmentation_enabled = 0, + .segmentation_update_map = 0, + .segmentation_temporal_update = 0, + .segmentation_update_data = 0, + .UsesLr = 0, + .usesChromaLr = 0, + .show_frame = pic->display_order <= pic->encode_order, + .showable_frame = 0, + /* Reserved */ + }, + .frame_type = 0, // set later + .frame_presentation_time = 0, + .current_frame_id = ref_slot->slotIndex, + .order_hint = 0, // set later + .primary_ref_frame = 0, // set later + .refresh_frame_flags = 0x0, // set later + .coded_denom = 0, + .render_width_minus_1 = base_ctx->surface_width - 1, + .render_height_minus_1 = base_ctx->surface_height - 1, + .interpolation_filter = 0, + .TxMode = STD_VIDEO_AV1_TX_MODE_SELECT, + .delta_q_res = 0, + .delta_lf_res = 0, + .ref_order_hint = { 0 }, // set later + .ref_frame_idx = { 0 }, // set later + /* Reserved */ + .delta_frame_id_minus_1 = { 0 }, + +// .pTileInfo = tile_info, TODO FIX + .pQuantization = quantization, + .pSegmentation = segmentation, + .pLoopFilter = loop_filter, + .pCDEF = cdef, + .pLoopRestoration = loop_restoration, + .pGlobalMotion = global_motion, + .pExtensionHeader = &ap->ext_header, + .pBufferRemovalTimes = NULL, + }; + + switch (pic->type) { + case FF_HW_PICTURE_TYPE_I: + case FF_HW_PICTURE_TYPE_IDR: + av_assert0(pic->nb_refs[0] == 0 || pic->nb_refs[1]); + ap->av1pic_info.frame_type = STD_VIDEO_AV1_FRAME_TYPE_KEY; + ap->av1pic_info.refresh_frame_flags = 0xFF; + quantization->base_q_idx = enc->q_idx_idr; + ap->slot = 0; + ap->last_idr_frame = pic->display_order; + pred_mode = VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_INTRA_ONLY_KHR; + rc_group = VK_VIDEO_ENCODE_AV1_RATE_CONTROL_GROUP_INTRA_KHR; + break; + case FF_HW_PICTURE_TYPE_P: + ref = pic->refs[0][pic->nb_refs[0] - 1]; + ap_ref = ref->codec_priv; + + ap->av1pic_info.frame_type = STD_VIDEO_AV1_FRAME_TYPE_INTER; + quantization->base_q_idx = enc->q_idx_p; + + ap->last_idr_frame = ap_ref->last_idr_frame; + ap->slot = !ap_ref->slot; + + ap->av1pic_info.refresh_frame_flags = 1 << ap->slot; + + /** set the nearest frame in L0 as all reference frame. */ + for (int i = 0; i < AV1_REFS_PER_FRAME; i++) + ap->av1pic_info.ref_frame_idx[i] = ap_ref->slot; + + ap->av1pic_info.primary_ref_frame = ap_ref->slot; + ap->av1pic_info.ref_order_hint[ap_ref->slot] = ref->display_order - ap_ref->last_idr_frame; + rc_group = VK_VIDEO_ENCODE_AV1_RATE_CONTROL_GROUP_PREDICTIVE_KHR; + pred_mode = VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_SINGLE_REFERENCE_KHR; + ref_name_mask = enc->caps.singleReferenceNameMask; + set_name_slot(ap_ref->av1pic_info.current_frame_id, name_slots, ref_name_mask, 0); + +// vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST; + + /** set the 2nd nearest frame in L0 as Golden frame. */ + if ((pic->nb_refs[0] > 1) && + ((enc->caps.maxSingleReferenceCount > 1) || + (enc->caps.maxUnidirectionalCompoundReferenceCount > 0))) { + if (enc->caps.maxUnidirectionalCompoundReferenceCount) { + pred_mode = VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_UNIDIRECTIONAL_COMPOUND_KHR; + ref_name_mask = enc->caps.unidirectionalCompoundReferenceNameMask; + } + ref = pic->refs[0][pic->nb_refs[0] - 2]; + ap_ref = ref->codec_priv; + ap->av1pic_info.ref_frame_idx[3] = ap_ref->slot; + ap->av1pic_info.ref_order_hint[ap_ref->slot] = ref->display_order - ap_ref->last_idr_frame; +// vpic->ref_frame_ctrl_l0.fields.search_idx1 = AV1_REF_FRAME_GOLDEN; + set_name_slot(ap_ref->av1pic_info.current_frame_id, name_slots, ref_name_mask, 0); + } + break; + case FF_HW_PICTURE_TYPE_B: + ap->av1pic_info.frame_type = STD_VIDEO_AV1_FRAME_TYPE_INTER; + quantization->base_q_idx = enc->q_idx_b; + ap->av1pic_info.refresh_frame_flags = 0x0; + + rc_group = VK_VIDEO_ENCODE_AV1_RATE_CONTROL_GROUP_BIPREDICTIVE_KHR; + pred_mode = VK_VIDEO_ENCODE_AV1_PREDICTION_MODE_BIDIRECTIONAL_COMPOUND_KHR; + ref_name_mask = enc->caps.bidirectionalCompoundReferenceNameMask; + +// fh->reference_select = 1; + /** B frame will not be referenced, disable its recon frame. */ +// vpic->picture_flags.bits.disable_frame_recon = 1; + + /** Use LAST_FRAME and BWDREF_FRAME for reference. */ +// vpic->ref_frame_ctrl_l0.fields.search_idx0 = AV1_REF_FRAME_LAST; +// vpic->ref_frame_ctrl_l1.fields.search_idx0 = AV1_REF_FRAME_BWDREF; + + ref = pic->refs[0][pic->nb_refs[0] - 1]; + ap_ref = ref->codec_priv; + ap->last_idr_frame = ap_ref->last_idr_frame; + ap->av1pic_info.primary_ref_frame = ap_ref->slot; + ap->av1pic_info.ref_order_hint[ap_ref->slot] = ref->display_order - ap_ref->last_idr_frame; + for (int i = 0; i < AV1_REF_FRAME_GOLDEN; i++) + ap->av1pic_info.ref_frame_idx[i] = ap_ref->slot; + set_name_slot(ap_ref->av1pic_info.current_frame_id, name_slots, ref_name_mask, 0); + + ref = pic->refs[1][pic->nb_refs[1] - 1]; + ap_ref = ref->codec_priv; + ap->av1pic_info.ref_order_hint[ap_ref->slot] = ref->display_order - ap_ref->last_idr_frame; + for (int i = AV1_REF_FRAME_GOLDEN; i < AV1_REFS_PER_FRAME; i++) + ap->av1pic_info.ref_frame_idx[i] = ap_ref->slot; + set_name_slot(ap_ref->av1pic_info.current_frame_id, name_slots, ref_name_mask, 1); + break; + } + + ap->av1pic_info.flags.showable_frame = ap->av1pic_info.frame_type != STD_VIDEO_AV1_FRAME_TYPE_KEY; + ap->av1pic_info.order_hint = pic->display_order - ap->last_idr_frame; + + ap->vkav1pic_info = (VkVideoEncodeAV1PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_PICTURE_INFO_KHR, + .pNext = NULL, + .predictionMode = pred_mode, + .rateControlGroup = rc_group, + .constantQIndex = quantization->base_q_idx, + .pStdPictureInfo = &ap->av1pic_info, + // .referenceNameSlotIndices is set below + .primaryReferenceCdfOnly = 0, + .generateObuExtensionHeader = 0, + }; + encode_info->pNext = &ap->vkav1pic_info; + + for (int i = 0; i < FF_ARRAY_ELEMS(ap->vkav1pic_info.referenceNameSlotIndices); i++) + ap->vkav1pic_info.referenceNameSlotIndices[i] = name_slots[i]; + + ref_slot = (VkVideoReferenceSlotInfoKHR *)encode_info->pSetupReferenceSlot; + ref_slot->pNext = &ap->vkav1dpb_info; + + ap->av1dpb_info = (StdVideoEncodeAV1ReferenceInfo) { + .flags = (StdVideoEncodeAV1ReferenceInfoFlags) { + .disable_frame_end_update_cdf = 0, + .segmentation_enabled = 0, + /* Reserved */ + }, + .RefFrameId = ref_slot->slotIndex, + .frame_type = ap->av1pic_info.frame_type, + .OrderHint = pic->display_order - ap->last_idr_frame, + /* Reserved */ + .pExtensionHeader = &ap->ext_header, + }; + + ap->vkav1dpb_info = (VkVideoEncodeAV1DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = &ap->av1dpb_info, + }; + + ap->units_needed = 0; + if (pic->type == FF_HW_PICTURE_TYPE_IDR) { + AVFrameSideData *sd = NULL; + if (enc->unit_elems & UNIT_MASTERING_DISPLAY) + sd = av_frame_get_side_data(pic->input_image, + AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); + if (sd) { + AVMasteringDisplayMetadata *mdm = + (AVMasteringDisplayMetadata *)sd->data; + if (mdm->has_primaries && mdm->has_luminance) { + AV1RawOBU *obu = &enc->meta_mastering_obu; + AV1RawMetadata *md = &obu->obu.metadata; + AV1RawMetadataHDRMDCV *mdcv = &md->metadata.hdr_mdcv; + const int chroma_den = 1 << 16; + const int max_luma_den = 1 << 8; + const int min_luma_den = 1 << 14; + + memset(obu, 0, sizeof(*obu)); + obu->header.obu_type = AV1_OBU_METADATA; + md->metadata_type = AV1_METADATA_TYPE_HDR_MDCV; + + for (int i = 0; i < 3; i++) { + mdcv->primary_chromaticity_x[i] = + av_rescale(mdm->display_primaries[i][0].num, chroma_den, + mdm->display_primaries[i][0].den); + mdcv->primary_chromaticity_y[i] = + av_rescale(mdm->display_primaries[i][1].num, chroma_den, + mdm->display_primaries[i][1].den); + } + + mdcv->white_point_chromaticity_x = + av_rescale(mdm->white_point[0].num, chroma_den, + mdm->white_point[0].den); + mdcv->white_point_chromaticity_y = + av_rescale(mdm->white_point[1].num, chroma_den, + mdm->white_point[1].den); + + mdcv->luminance_max = + av_rescale(mdm->max_luminance.num, max_luma_den, + mdm->max_luminance.den); + mdcv->luminance_min = + av_rescale(mdm->min_luminance.num, min_luma_den, + mdm->min_luminance.den); + ap->units_needed |= UNIT_MASTERING_DISPLAY; + } + } + + if (enc->unit_elems & UNIT_CONTENT_LIGHT_LEVEL) + sd = av_frame_get_side_data(pic->input_image, + AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); + if (sd) { + AVContentLightMetadata *cllm = (AVContentLightMetadata *)sd->data; + AV1RawOBU *obu = &enc->meta_cll_obu; + AV1RawMetadata *md = &obu->obu.metadata; + AV1RawMetadataHDRCLL *cll = &md->metadata.hdr_cll; + + memset(obu, 0, sizeof(*obu)); + obu->header.obu_type = AV1_OBU_METADATA; + md->metadata_type = AV1_METADATA_TYPE_HDR_CLL; + cll->max_cll = cllm->MaxCLL; + cll->max_fall = cllm->MaxFALL; + + ap->units_needed |= UNIT_CONTENT_LIGHT_LEVEL; + } + } + + return 0; +} + +static int init_profile(AVCodecContext *avctx, + VkVideoProfileInfoKHR *profile, void *pnext) +{ + VkResult ret; + VulkanEncodeAV1Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + + VkVideoEncodeAV1CapabilitiesKHR av1_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_CAPABILITIES_KHR, + }; + VkVideoEncodeCapabilitiesKHR enc_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR, + .pNext = &av1_caps, + }; + VkVideoCapabilitiesKHR caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR, + .pNext = &enc_caps, + }; + + /* In order of preference */ + int last_supported = AV_PROFILE_UNKNOWN; + static const int known_profiles[] = { + AV_PROFILE_AV1_MAIN, + AV_PROFILE_AV1_HIGH, + AV_PROFILE_AV1_PROFESSIONAL, + }; + int nb_profiles = FF_ARRAY_ELEMS(known_profiles); + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->frames->sw_format); + if (!desc) + return AVERROR(EINVAL); + + if (s->frames->sw_format == AV_PIX_FMT_NV12 || + s->frames->sw_format == AV_PIX_FMT_P010) + nb_profiles = 1; + + enc->profile = (VkVideoEncodeAV1ProfileInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_PROFILE_INFO_KHR, + .pNext = pnext, + .stdProfile = ff_vk_av1_profile_to_vk(avctx->profile), + }; + profile->pNext = &enc->profile; + + /* Set level */ + if (avctx->level == AV_LEVEL_UNKNOWN) { + const AV1LevelDescriptor *level; + float framerate = 0.0; + + if (avctx->framerate.num > 0 && avctx->framerate.den > 0) + framerate = av_q2d(avctx->framerate); + + level = ff_av1_guess_level(avctx->bit_rate, enc->seq_tier, + base_ctx->surface_width, base_ctx->surface_height, + enc->tile_rows * enc->tile_cols, + enc->tile_cols, framerate); + if (level) { + av_log(avctx, AV_LOG_VERBOSE, "Using level %s.\n", level->name); + enc->seq_level_idx = ff_vk_av1_level_to_vk(level->level_idx); + } else { + av_log(avctx, AV_LOG_VERBOSE, "Stream will not conform to " + "any normal level, using level 7.3 by default.\n"); + enc->seq_level_idx = STD_VIDEO_AV1_LEVEL_7_3; + enc->seq_tier = 1; + } + } else { + enc->seq_level_idx = ff_vk_av1_level_to_vk(avctx->level); + } + + /* User has explicitly specified a profile. */ + if (avctx->profile != AV_PROFILE_UNKNOWN) + return 0; + + av_log(avctx, AV_LOG_DEBUG, "Supported profiles:\n"); + for (int i = 0; i < nb_profiles; i++) { + enc->profile.stdProfile = ff_vk_av1_profile_to_vk(known_profiles[i]); + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, + profile, + &caps); + if (ret == VK_SUCCESS) { + av_log(avctx, AV_LOG_DEBUG, " %s\n", + avcodec_profile_name(avctx->codec_id, known_profiles[i])); + last_supported = known_profiles[i]; + } + } + + if (last_supported == AV_PROFILE_UNKNOWN) { + av_log(avctx, AV_LOG_ERROR, "No supported profiles for given format\n"); + return AVERROR(ENOTSUP); + } + + enc->profile.stdProfile = ff_vk_av1_profile_to_vk(last_supported); + av_log(avctx, AV_LOG_VERBOSE, "Using profile %s\n", + avcodec_profile_name(avctx->codec_id, last_supported)); + avctx->profile = last_supported; + + return 0; +} + +static int init_enc_options(AVCodecContext *avctx) +{ + VulkanEncodeAV1Context *enc = avctx->priv_data; + + if (avctx->rc_buffer_size) + enc->hrd_buffer_size = avctx->rc_buffer_size; + else if (avctx->rc_max_rate > 0) + enc->hrd_buffer_size = avctx->rc_max_rate; + else + enc->hrd_buffer_size = avctx->bit_rate; + + if (avctx->rc_initial_buffer_occupancy) { + if (avctx->rc_initial_buffer_occupancy > enc->hrd_buffer_size) { + av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: " + "must have initial buffer size (%d) <= " + "buffer size (%"PRId64").\n", + avctx->rc_initial_buffer_occupancy, enc->hrd_buffer_size); + return AVERROR(EINVAL); + } + enc->initial_buffer_fullness = avctx->rc_initial_buffer_occupancy; + } else { + enc->initial_buffer_fullness = enc->hrd_buffer_size * 3 / 4; + } + + if (enc->common.opts.rc_mode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { + enc->q_idx_p = av_clip(enc->common.opts.qp, + enc->caps.minQIndex, enc->caps.maxQIndex); + if (fabs(avctx->i_quant_factor) > 0.0) + enc->q_idx_idr = + av_clip((fabs(avctx->i_quant_factor) * enc->q_idx_p + + avctx->i_quant_offset) + 0.5, + 0, 255); + else + enc->q_idx_idr = enc->q_idx_p; + + if (fabs(avctx->b_quant_factor) > 0.0) + enc->q_idx_b = + av_clip((fabs(avctx->b_quant_factor) * enc->q_idx_p + + avctx->b_quant_offset) + 0.5, + 0, 255); + else + enc->q_idx_b = enc->q_idx_p; + } else { + /** Arbitrary value */ + enc->q_idx_idr = enc->q_idx_p = enc->q_idx_b = 128; + } + + return 0; +} + +static av_cold int init_sequence_headers(AVCodecContext *avctx) +{ + VulkanEncodeAV1Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + + AV1RawOBU *seq_obu = &enc->seq_hdr_obu; + AV1RawSequenceHeader *seq = &seq_obu->obu.sequence_header; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->frames->sw_format); + if (!desc) + return AVERROR(EINVAL); + + seq_obu->header.obu_type = AV1_OBU_SEQUENCE_HEADER; + *seq = (AV1RawSequenceHeader) { + .seq_profile = avctx->profile, + .seq_force_integer_mv = seq->seq_force_screen_content_tools ? + AV1_SELECT_SCREEN_CONTENT_TOOLS : + AV1_SELECT_INTEGER_MV, + .frame_width_bits_minus_1 = av_log2(base_ctx->surface_width), + .frame_height_bits_minus_1 = av_log2(base_ctx->surface_height), + .max_frame_width_minus_1 = base_ctx->surface_width - 1, + .max_frame_height_minus_1 = base_ctx->surface_height - 1, + .enable_order_hint = 1, + .order_hint_bits_minus_1 = av_clip_intp2(av_log2(ctx->base.gop_size), 3), + .use_128x128_superblock = !!(enc->caps.superblockSizes & VK_VIDEO_ENCODE_AV1_SUPERBLOCK_SIZE_128_BIT_KHR), + .color_config = (AV1RawColorConfig) { + .high_bitdepth = desc->comp[0].depth > 8, + .color_primaries = avctx->color_primaries, + .transfer_characteristics = avctx->color_trc, + .matrix_coefficients = avctx->colorspace, + .color_description_present_flag = (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED || + avctx->color_trc != AVCOL_TRC_UNSPECIFIED || + avctx->colorspace != AVCOL_SPC_UNSPECIFIED), + .subsampling_x = desc->log2_chroma_w, + .subsampling_y = desc->log2_chroma_h, + .chroma_sample_position = avctx->chroma_sample_location == AVCHROMA_LOC_LEFT ? + AV1_CSP_VERTICAL : + avctx->chroma_sample_location == AVCHROMA_LOC_TOPLEFT ? + AV1_CSP_COLOCATED : + AV1_CSP_UNKNOWN, + }, + + /* Operating point */ + .seq_tier = { enc->seq_tier }, + .seq_level_idx = { enc->seq_level_idx }, + .decoder_buffer_delay = { base_ctx->decode_delay }, + .encoder_buffer_delay = { base_ctx->output_delay }, + .operating_points_cnt_minus_1 = 1 - 1, + }; + + return 0; +} + +typedef struct VulkanAV1Units { + StdVideoAV1SequenceHeader seq_hdr; + StdVideoAV1TimingInfo timing_info; + StdVideoAV1ColorConfig color_config; + + StdVideoEncodeAV1DecoderModelInfo decoder_model; + StdVideoEncodeAV1OperatingPointInfo operating_points[AV1_MAX_OPERATING_POINTS]; + int nb_operating_points; +} VulkanAV1Units; + +static av_cold int base_unit_to_vk(AVCodecContext *avctx, VulkanAV1Units *vk_units) +{ + VulkanEncodeAV1Context *enc = avctx->priv_data; + + AV1RawOBU *seq_obu = &enc->seq_hdr_obu; + AV1RawSequenceHeader *seq = &seq_obu->obu.sequence_header; + + StdVideoAV1SequenceHeader *seq_hdr = &vk_units->seq_hdr; + StdVideoAV1TimingInfo *timing_info = &vk_units->timing_info; + StdVideoAV1ColorConfig *color_config = &vk_units->color_config; + + StdVideoEncodeAV1OperatingPointInfo *operating_points = vk_units->operating_points; + + *timing_info = (StdVideoAV1TimingInfo) { + .flags = (StdVideoAV1TimingInfoFlags) { + .equal_picture_interval = seq->timing_info.equal_picture_interval, + }, + .num_units_in_display_tick = seq->timing_info.num_units_in_display_tick, + .time_scale = seq->timing_info.time_scale, + .num_ticks_per_picture_minus_1 = seq->timing_info.num_ticks_per_picture_minus_1, + }; + + *color_config = (StdVideoAV1ColorConfig) { + .flags = (StdVideoAV1ColorConfigFlags) { + .mono_chrome = seq->color_config.mono_chrome, + .color_range = seq->color_config.color_range, + .separate_uv_delta_q = seq->color_config.separate_uv_delta_q, + }, + .BitDepth = seq->color_config.twelve_bit ? 12 : + seq->color_config.high_bitdepth ? 10 : 8, + .subsampling_x = seq->color_config.subsampling_x, + .subsampling_y = seq->color_config.subsampling_y, + .color_primaries = seq->color_config.color_primaries, + .transfer_characteristics = seq->color_config.transfer_characteristics, + .matrix_coefficients = seq->color_config.matrix_coefficients, + }; + + *seq_hdr = (StdVideoAV1SequenceHeader) { + .flags = (StdVideoAV1SequenceHeaderFlags) { + .still_picture = seq->still_picture, + .reduced_still_picture_header = seq->reduced_still_picture_header, + .use_128x128_superblock = seq->use_128x128_superblock, + .enable_filter_intra = seq->enable_filter_intra, + .enable_intra_edge_filter = seq->enable_intra_edge_filter, + .enable_interintra_compound = seq->enable_interintra_compound, + .enable_masked_compound = seq->enable_masked_compound, + .enable_warped_motion = seq->enable_warped_motion, + .enable_dual_filter = seq->enable_dual_filter, + .enable_order_hint = seq->enable_order_hint, + .enable_jnt_comp = seq->enable_jnt_comp, + .enable_ref_frame_mvs = seq->enable_ref_frame_mvs, + .frame_id_numbers_present_flag = seq->frame_id_numbers_present_flag, + .enable_superres = seq->enable_superres, + .enable_cdef = seq->enable_cdef, + .enable_restoration = seq->enable_restoration, + .film_grain_params_present = seq->film_grain_params_present, + .timing_info_present_flag = seq->timing_info_present_flag, + .initial_display_delay_present_flag = seq->initial_display_delay_present_flag, + }, + .seq_profile = seq->seq_profile, + .frame_width_bits_minus_1 = seq->frame_width_bits_minus_1, + .frame_height_bits_minus_1 = seq->frame_height_bits_minus_1, + .max_frame_width_minus_1 = seq->max_frame_width_minus_1, + .max_frame_height_minus_1 = seq->max_frame_height_minus_1, + .delta_frame_id_length_minus_2 = seq->delta_frame_id_length_minus_2, + .additional_frame_id_length_minus_1 = seq->additional_frame_id_length_minus_1, + .order_hint_bits_minus_1 = seq->order_hint_bits_minus_1, + .seq_force_integer_mv = seq->seq_force_integer_mv, + .seq_force_screen_content_tools = seq->seq_force_screen_content_tools, + .pTimingInfo = timing_info, + .pColorConfig = color_config, + }; + + for (int i = 0; i <= seq->operating_points_cnt_minus_1; i++) { + operating_points[i] = (StdVideoEncodeAV1OperatingPointInfo) { + .flags = (StdVideoEncodeAV1OperatingPointInfoFlags) { + .decoder_model_present_for_this_op = seq->decoder_model_present_for_this_op[i], + .low_delay_mode_flag = seq->low_delay_mode_flag[i], + .initial_display_delay_present_for_this_op = seq->initial_display_delay_present_for_this_op[i], + /* Reserved */ + }, + .operating_point_idc = seq->operating_point_idc[i], + .seq_level_idx = seq->seq_level_idx[i], + .seq_tier = seq->seq_tier[i], + .decoder_buffer_delay = seq->decoder_buffer_delay[i], + .encoder_buffer_delay = seq->encoder_buffer_delay[i], + .initial_display_delay_minus_1 = seq->initial_display_delay_minus_1[i], + }; + } + vk_units->nb_operating_points = seq->operating_points_cnt_minus_1 + 1; + + return 0; +} + +static int create_session_params(AVCodecContext *avctx) +{ + int err; + VulkanEncodeAV1Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VulkanAV1Units vk_units = { 0 }; + + VkVideoEncodeAV1SessionParametersCreateInfoKHR av1_params; + + /* Convert it to Vulkan */ + err = base_unit_to_vk(avctx, &vk_units); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to convert sequence header to Vulkan: %s\n", + av_err2str(err)); + return err; + } + + /* Destroy the session params */ + if (ctx->session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->session_params, + s->hwctx->alloc); + + av1_params = (VkVideoEncodeAV1SessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pStdSequenceHeader = &vk_units.seq_hdr, + .pStdDecoderModelInfo = &vk_units.decoder_model, + .pStdOperatingPoints = vk_units.operating_points, + .stdOperatingPointCount = vk_units.nb_operating_points, + }; + + return ff_vulkan_encode_create_session_params(avctx, ctx, &av1_params); +} + +static int parse_feedback_units(AVCodecContext *avctx, + const uint8_t *data, size_t size) +{ + int err; + VulkanEncodeAV1Context *enc = avctx->priv_data; + AV1RawOBU *seq_obu = &enc->seq_hdr_obu; + AV1RawSequenceHeader *seq = &seq_obu->obu.sequence_header; + + CodedBitstreamContext *cbs; + CodedBitstreamFragment obu = { 0 }; + + err = ff_cbs_init(&cbs, AV_CODEC_ID_AV1, avctx); + if (err < 0) + return err; + + err = ff_cbs_read(cbs, &obu, NULL, data, size); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to parse feedback units, bad drivers: %s\n", + av_err2str(err)); + return err; + } + + /* If PPS has an override, just copy it entirely. */ + for (int i = 0; i < obu.nb_units; i++) { + if (obu.units[i].type == AV1_OBU_SEQUENCE_HEADER) { + AV1RawOBU *f_seq_obu = obu.units[i].content; + AV1RawSequenceHeader *f_seq = &f_seq_obu->obu.sequence_header; + seq->frame_width_bits_minus_1 = f_seq->frame_width_bits_minus_1; + seq->frame_height_bits_minus_1 = f_seq->frame_height_bits_minus_1; + seq->max_frame_width_minus_1 = f_seq->max_frame_width_minus_1; + seq->max_frame_height_minus_1 = f_seq->max_frame_height_minus_1; + seq->seq_choose_screen_content_tools = f_seq->seq_choose_screen_content_tools; + seq->seq_force_screen_content_tools = f_seq->seq_force_screen_content_tools; + seq->seq_choose_integer_mv = f_seq->seq_choose_integer_mv; + seq->seq_force_integer_mv = f_seq->seq_force_integer_mv; + } + } + + ff_cbs_fragment_free(&obu); + ff_cbs_close(&cbs); + + return 0; +} + +static int init_base_units(AVCodecContext *avctx) +{ + int err; + VkResult ret; + VulkanEncodeAV1Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoEncodeSessionParametersGetInfoKHR params_info; + VkVideoEncodeSessionParametersFeedbackInfoKHR params_feedback; + + void *data = NULL; + size_t data_size = 0; + + /* Generate SPS/PPS unit info */ + err = init_sequence_headers(avctx); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize sequence header: %s\n", + av_err2str(err)); + return err; + } + + /* Create session parameters from them */ + err = create_session_params(avctx); + if (err < 0) + return err; + + params_info = (VkVideoEncodeSessionParametersGetInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_GET_INFO_KHR, + .videoSessionParameters = ctx->session_params, + }; + params_feedback = (VkVideoEncodeSessionParametersFeedbackInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, + }; + + ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, + ¶ms_feedback, + &data_size, data); + if (ret == VK_INCOMPLETE || + (ret == VK_SUCCESS) && (data_size > 0)) { + data = av_mallocz(data_size); + if (!data) + return AVERROR(ENOMEM); + } else { + av_log(avctx, AV_LOG_ERROR, "Unable to get feedback for AV1 sequence header = %"SIZE_SPECIFIER"\n", + data_size); + return err; + } + + ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, + ¶ms_feedback, + &data_size, data); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Error writing feedback units\n"); + return err; + } + + av_log(avctx, AV_LOG_VERBOSE, "Feedback units written, overrides: %i\n", + params_feedback.hasOverrides); + + params_feedback.hasOverrides = 1; + + /* No need to sync any overrides */ + if (!params_feedback.hasOverrides) + return 0; + + /* Parse back tne units and override */ + err = parse_feedback_units(avctx, data, data_size); + if (err < 0) + return err; + + /* Create final session parameters */ + err = create_session_params(avctx); + if (err < 0) + return err; + + return 0; +} + +static int vulkan_encode_av1_add_obu(AVCodecContext *avctx, + CodedBitstreamFragment *au, + uint8_t type, void *obu_unit) +{ + int err; + + err = ff_cbs_insert_unit_content(au, -1, + type, obu_unit, NULL); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to add OBU unit: " + "type = %d.\n", type); + return err; + } + + return err; +} + +static int vulkan_encode_av1_write_obu(AVCodecContext *avctx, + uint8_t *data, size_t *data_len, + CodedBitstreamFragment *obu) +{ + VulkanEncodeAV1Context *enc = avctx->priv_data; + int ret; + + ret = ff_cbs_write_fragment_data(enc->cbs, obu); + if (ret < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n"); + return ret; + } + + memcpy(data, obu->data, obu->data_size); + *data_len = obu->data_size; + + return 0; +} + +static int write_sequence_header(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeAV1Context *enc = avctx->priv_data; + CodedBitstreamFragment *obu = &enc->current_access_unit; + + err = vulkan_encode_av1_add_obu(avctx, obu, + AV1_OBU_SEQUENCE_HEADER, &enc->seq_hdr_obu); + if (err < 0) + goto fail; + + err = vulkan_encode_av1_write_obu(avctx, data, data_len, obu); + +fail: + ff_cbs_fragment_reset(obu); + return err; +} + +static int write_extra_headers(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeAV1Context *enc = avctx->priv_data; + VulkanEncodeAV1Picture *ap = base_pic->codec_priv; + CodedBitstreamFragment *obu = &enc->current_access_unit; + + if (ap->units_needed & AV_FRAME_DATA_MASTERING_DISPLAY_METADATA) { + err = vulkan_encode_av1_add_obu(avctx, obu, + AV1_OBU_METADATA, + &enc->meta_mastering_obu); + if (err < 0) + goto fail; + } + + if (ap->units_needed & UNIT_CONTENT_LIGHT_LEVEL) { + err = vulkan_encode_av1_add_obu(avctx, obu, + AV1_OBU_METADATA, + &enc->meta_cll_obu); + if (err < 0) + goto fail; + } + + if (ap->units_needed) { + err = vulkan_encode_av1_write_obu(avctx, data, data_len, obu); + if (err < 0) + goto fail; + } else { + err = 0; + *data_len = 0; + } + +fail: + ff_cbs_fragment_reset(obu); + return err; +} + +static int write_padding(AVCodecContext *avctx, uint32_t padding, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeAV1Context *enc = avctx->priv_data; + CodedBitstreamFragment *obu = &enc->current_access_unit; + + AV1RawOBU padding_obu = { 0 }; + AV1RawPadding *raw_padding = &padding_obu.obu.padding; + + if (!padding) + padding = 16; + + /* 2 byte header + 1 byte trailing bits */ + padding_obu.header.obu_type = AV1_OBU_PADDING; + *raw_padding = (AV1RawPadding) { + .payload = enc->padding_payload, + .payload_size = padding, + }; + + err = vulkan_encode_av1_add_obu(avctx, obu, AV1_OBU_PADDING, &padding_obu); + if (err < 0) + goto fail; + + err = vulkan_encode_av1_write_obu(avctx, data, data_len, obu); +fail: + ff_cbs_fragment_reset(obu); + return err; +} + +static const FFVulkanCodec enc_cb = { + .flags = FF_HW_FLAG_B_PICTURES | + FF_HW_FLAG_B_PICTURE_REFERENCES | + VK_ENC_FLAG_NO_DELAY | + FF_HW_FLAG_SLICE_CONTROL, + .picture_priv_data_size = sizeof(VulkanEncodeAV1Picture), + .filler_header_size = 4, + .init_profile = init_profile, + .init_pic_rc = init_pic_rc, + .init_pic_params = init_pic_params, + .write_sequence_headers = write_sequence_header, + .write_extra_headers = write_extra_headers, + .write_filler = write_padding, +}; + +static av_cold int vulkan_encode_av1_init(AVCodecContext *avctx) +{ + int err; + VulkanEncodeAV1Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + int flags; + + if (avctx->profile == AV_PROFILE_UNKNOWN) + avctx->profile = enc->common.opts.profile; + + enc->caps = (VkVideoEncodeAV1CapabilitiesKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_CAPABILITIES_KHR, + }; + + enc->quality_props = (VkVideoEncodeAV1QualityLevelPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_AV1_QUALITY_LEVEL_PROPERTIES_KHR, + }; + + err = ff_vulkan_encode_init(avctx, &enc->common, + &ff_vk_enc_av1_desc, &enc_cb, + &enc->caps, &enc->quality_props); + if (err < 0) + return err; + + av_log(avctx, AV_LOG_VERBOSE, "AV1 encoder capabilities:\n"); + av_log(avctx, AV_LOG_VERBOSE, " Standard capability flags:\n"); + av_log(avctx, AV_LOG_VERBOSE, " per_rate_control_group_min_max_q_index: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_PER_RATE_CONTROL_GROUP_MIN_MAX_Q_INDEX_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " generate_obu_extension_header: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_GENERATE_OBU_EXTENSION_HEADER_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " primary_reference_cdf_only: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_PRIMARY_REFERENCE_CDF_ONLY_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " frame_size_override: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_FRAME_SIZE_OVERRIDE_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " motion_vector_scaling: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_AV1_CAPABILITY_MOTION_VECTOR_SCALING_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " Capabilities:\n"); + av_log(avctx, AV_LOG_VERBOSE, " 64x64 superblocks: %i\n", + !!(enc->caps.superblockSizes & VK_VIDEO_ENCODE_AV1_SUPERBLOCK_SIZE_64_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " 128x128 superblocks: %i\n", + !!(enc->caps.superblockSizes & VK_VIDEO_ENCODE_AV1_SUPERBLOCK_SIZE_128_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " maxSingleReferenceCount: %i\n", + enc->caps.maxSingleReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " singleReferenceNameMask: 0x%x\n", + enc->caps.singleReferenceNameMask); + av_log(avctx, AV_LOG_VERBOSE, " maxUnidirectionalCompoundReferenceCount: %i\n", + enc->caps.maxUnidirectionalCompoundReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " maxUnidirectionalCompoundGroup1ReferenceCount: %i\n", + enc->caps.maxUnidirectionalCompoundGroup1ReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " unidirectionalCompoundReferenceNameMask: 0x%x\n", + enc->caps.unidirectionalCompoundReferenceNameMask); + av_log(avctx, AV_LOG_VERBOSE, " maxBidirectionalCompoundReferenceCount: %i\n", + enc->caps.maxBidirectionalCompoundReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " maxBidirectionalCompoundGroup1ReferenceCount: %i\n", + enc->caps.maxBidirectionalCompoundGroup1ReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " maxBidirectionalCompoundGroup2ReferenceCount: %i\n", + enc->caps.maxBidirectionalCompoundGroup2ReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " bidirectionalCompoundReferenceNameMask: 0x%x\n", + enc->caps.bidirectionalCompoundReferenceNameMask); + av_log(avctx, AV_LOG_VERBOSE, " maxTemporalLayerCount: %i\n", + enc->caps.maxTemporalLayerCount); + av_log(avctx, AV_LOG_VERBOSE, " maxSpatialLayerCount: %i\n", + enc->caps.maxSpatialLayerCount); + av_log(avctx, AV_LOG_VERBOSE, " maxOperatingPoints: %i\n", + enc->caps.maxOperatingPoints); + av_log(avctx, AV_LOG_VERBOSE, " min/max Qindex: [%i, %i]\n", + enc->caps.minQIndex, enc->caps.maxQIndex); + av_log(avctx, AV_LOG_VERBOSE, " prefersGopRemainingFrames: %i\n", + enc->caps.prefersGopRemainingFrames); + av_log(avctx, AV_LOG_VERBOSE, " requiresGopRemainingFrames: %i\n", + enc->caps.requiresGopRemainingFrames); + av_log(avctx, AV_LOG_VERBOSE, " maxLevel: %i\n", + enc->caps.maxLevel); + av_log(avctx, AV_LOG_VERBOSE, " codedPictureAlignment: %ix%i\n", + enc->caps.codedPictureAlignment.width, enc->caps.codedPictureAlignment.height); + av_log(avctx, AV_LOG_VERBOSE, " maxTiles: %ix%i\n", + enc->caps.maxTiles.width, enc->caps.maxTiles.height); + av_log(avctx, AV_LOG_VERBOSE, " Tile size: %ix%i to %ix%i\n", + enc->caps.minTileSize.width, enc->caps.minTileSize.height, + enc->caps.maxTileSize.width, enc->caps.maxTileSize.height); + + err = init_enc_options(avctx); + if (err < 0) + return err; + + flags = ctx->codec->flags; + err = ff_hw_base_init_gop_structure(base_ctx, avctx, + ctx->caps.maxDpbSlots, + enc->caps.maxBidirectionalCompoundReferenceCount, + flags, 0); + if (err < 0) + return err; + + base_ctx->output_delay = base_ctx->b_per_p; + base_ctx->decode_delay = base_ctx->max_b_depth; + + /* Create units and session parameters */ + err = init_base_units(avctx); + if (err < 0) + return err; + + /* Init CBS */ + err = ff_cbs_init(&enc->cbs, AV_CODEC_ID_AV1, avctx); + if (err < 0) + return err; + + if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) { + uint8_t data[4096]; + size_t data_len = sizeof(data); + + err = write_sequence_header(avctx, NULL, data, &data_len); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write sequence header " + "for extradata: %d.\n", err); + return err; + } else { + avctx->extradata_size = data_len; + avctx->extradata = av_mallocz(avctx->extradata_size + + AV_INPUT_BUFFER_PADDING_SIZE); + if (!avctx->extradata) { + err = AVERROR(ENOMEM); + return err; + } + memcpy(avctx->extradata, data, avctx->extradata_size); + } + } + + enc->padding_payload = av_mallocz(2*ctx->caps.minBitstreamBufferOffsetAlignment); + if (!enc->padding_payload) + return AVERROR(ENOMEM); + + memset(enc->padding_payload, 0xaa, 2*ctx->caps.minBitstreamBufferOffsetAlignment); + + return 0; +} + +static av_cold int vulkan_encode_av1_close(AVCodecContext *avctx) +{ + VulkanEncodeAV1Context *enc = avctx->priv_data; + av_free(enc->padding_payload); + ff_vulkan_encode_uninit(&enc->common); + return 0; +} + +#define OFFSET(x) offsetof(VulkanEncodeAV1Context, x) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) +static const AVOption vulkan_encode_av1_options[] = { + HW_BASE_ENCODE_COMMON_OPTIONS, + VULKAN_ENCODE_COMMON_OPTIONS, + + { "profile", "Set profile", + OFFSET(common.opts.profile), AV_OPT_TYPE_INT, + { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, 0xffff, FLAGS, .unit = "profile" }, + +#define PROFILE(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ + { .i64 = value }, 0, 0, FLAGS, .unit = "profile" + { PROFILE("main", AV_PROFILE_AV1_MAIN) }, + { PROFILE("high", AV_PROFILE_AV1_HIGH) }, + { PROFILE("professional", AV_PROFILE_AV1_PROFESSIONAL) }, +#undef PROFILE + + { "tier", "Set tier (seq_tier)", + OFFSET(common.opts.tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, .unit = "tier" }, + { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, .unit = "tier" }, + { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "tier" }, + + { "level", "Set level (level_idc)", + OFFSET(common.opts.level), AV_OPT_TYPE_INT, + { .i64 = AV_LEVEL_UNKNOWN }, AV_LEVEL_UNKNOWN, 0xff, FLAGS, .unit = "level" }, + +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ + { .i64 = value }, 0, 0, FLAGS, .unit = "level" + { LEVEL("20", 0) }, + { LEVEL("21", 1) }, + { LEVEL("22", 2) }, + { LEVEL("23", 3) }, + { LEVEL("30", 4) }, + { LEVEL("31", 5) }, + { LEVEL("32", 6) }, + { LEVEL("33", 7) }, + { LEVEL("40", 8) }, + { LEVEL("41", 9) }, + { LEVEL("42", 10) }, + { LEVEL("43", 11) }, + { LEVEL("50", 12) }, + { LEVEL("51", 13) }, + { LEVEL("52", 14) }, + { LEVEL("53", 15) }, + { LEVEL("60", 16) }, + { LEVEL("61", 17) }, + { LEVEL("62", 18) }, + { LEVEL("63", 19) }, + { LEVEL("70", 20) }, + { LEVEL("71", 21) }, + { LEVEL("72", 22) }, + { LEVEL("73", 23) }, +#undef LEVEL + + { "units", "Set units to include", OFFSET(unit_elems), AV_OPT_TYPE_FLAGS, { .i64 = UNIT_MASTERING_DISPLAY | UNIT_CONTENT_LIGHT_LEVEL }, 0, INT_MAX, FLAGS, "units" }, + { "hdr", "Include HDR metadata for mastering display colour volume and content light level information", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_MASTERING_DISPLAY | UNIT_CONTENT_LIGHT_LEVEL }, INT_MIN, INT_MAX, FLAGS, "units" }, + + { NULL }, +}; + +static const FFCodecDefault vulkan_encode_av1_defaults[] = { + { "b", "0" }, + { "bf", "2" }, + { "g", "300" }, + { "qmin", "1" }, + { "qmax", "255" }, + { "refs", "0" }, + { NULL }, +}; + +static const AVClass vulkan_encode_av1_class = { + .class_name = "av1_vulkan", + .item_name = av_default_item_name, + .option = vulkan_encode_av1_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const FFCodec ff_av1_vulkan_encoder = { + .p.name = "av1_vulkan", + CODEC_LONG_NAME("AV1 (Vulkan)"), + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_AV1, + .priv_data_size = sizeof(VulkanEncodeAV1Context), + .init = &vulkan_encode_av1_init, + FF_CODEC_RECEIVE_PACKET_CB(&ff_vulkan_encode_receive_packet), + .close = &vulkan_encode_av1_close, + .p.priv_class = &vulkan_encode_av1_class, + .p.capabilities = AV_CODEC_CAP_DELAY | + AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1 | + AV_CODEC_CAP_ENCODER_FLUSH | + AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .defaults = vulkan_encode_av1_defaults, + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), + .hw_configs = ff_vulkan_encode_hw_configs, + .p.wrapper_name = "vulkan", +}; -- 2.49.1 From ba55a3eea07d7300edb5d8de02ba0856a4a0b629 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:33:26 +0000 Subject: [PATCH 075/118] Changing vulkan file directory --- libavcodec/vulkan_encode_h264.c | 1668 ------------------------------- 1 file changed, 1668 deletions(-) delete mode 100644 libavcodec/vulkan_encode_h264.c diff --git a/libavcodec/vulkan_encode_h264.c b/libavcodec/vulkan_encode_h264.c deleted file mode 100644 index 942e911fb7..0000000000 --- a/libavcodec/vulkan_encode_h264.c +++ /dev/null @@ -1,1668 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/internal.h" -#include "libavutil/opt.h" -#include "libavutil/mem.h" - -#include "cbs.h" -#include "cbs_h264.h" -#include "atsc_a53.h" - -#include "h264_levels.h" -#include "h2645data.h" -#include "codec_internal.h" -#include "version.h" -#include "hw_base_encode_h264.h" - -#include "vulkan_encode.h" - -enum UnitElems { - UNIT_AUD = 1 << 0, - UNIT_SEI_TIMING = 1 << 1, - UNIT_SEI_IDENTIFIER = 1 << 2, - UNIT_SEI_RECOVERY = 1 << 3, - UNIT_SEI_A53_CC = 1 << 4, -}; - -const FFVulkanEncodeDescriptor ff_vk_enc_h264_desc = { - .codec_id = AV_CODEC_ID_H264, - .encode_extension = FF_VK_EXT_VIDEO_ENCODE_H264, - .encode_op = VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR, - .ext_props = { - .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME, - .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION, - }, -}; - -/* Random (version 4) ISO 11578 UUID. */ -static const uint8_t vulkan_encode_h264_sei_identifier_uuid[16] = { - 0x03, 0xfd, 0xf2, 0x0a, 0x5d, 0x4c, 0x05, 0x48, - 0x20, 0x98, 0xca, 0x6b, 0x0c, 0x95, 0x30, 0x1c, -}; - -typedef struct VulkanEncodeH264Picture { - int frame_num; - int64_t last_idr_frame; - uint16_t idr_pic_id; - int primary_pic_type; - int slice_type; - int pic_order_cnt; - - enum UnitElems units_needed; - - VkVideoEncodeH264RateControlInfoKHR vkrc_info; - VkVideoEncodeH264RateControlLayerInfoKHR vkrc_layer_info; - VkVideoEncodeH264GopRemainingFrameInfoKHR vkrc_remaining; - - StdVideoEncodeH264WeightTable slice_wt; - StdVideoEncodeH264SliceHeader slice_hdr; - VkVideoEncodeH264NaluSliceInfoKHR vkslice; - - StdVideoEncodeH264PictureInfo h264pic_info; - VkVideoEncodeH264PictureInfoKHR vkh264pic_info; - - StdVideoEncodeH264ReferenceInfo h264dpb_info; - VkVideoEncodeH264DpbSlotInfoKHR vkh264dpb_info; - - StdVideoEncodeH264RefListModEntry mods[MAX_REFERENCE_LIST_NUM][H264_MAX_RPLM_COUNT]; - StdVideoEncodeH264RefPicMarkingEntry mmco[H264_MAX_RPLM_COUNT]; - StdVideoEncodeH264ReferenceListsInfo ref_list_info; -} VulkanEncodeH264Picture; - -typedef struct VulkanEncodeH264Context { - FFVulkanEncodeContext common; - - FFHWBaseEncodeH264 units; - FFHWBaseEncodeH264Opts unit_opts; - - enum UnitElems unit_elems; - - uint8_t fixed_qp_p; - uint8_t fixed_qp_b; - - VkVideoEncodeH264ProfileInfoKHR profile; - - VkVideoEncodeH264CapabilitiesKHR caps; - VkVideoEncodeH264QualityLevelPropertiesKHR quality_props; - - CodedBitstreamContext *cbs; - CodedBitstreamFragment current_access_unit; - - H264RawAUD raw_aud; - - SEIRawUserDataUnregistered sei_identifier; - H264RawSEIPicTiming sei_pic_timing; - H264RawSEIRecoveryPoint sei_recovery_point; - SEIRawUserDataRegistered sei_a53cc; - void *sei_a53cc_data; - char *sei_identifier_string; -} VulkanEncodeH264Context; - -static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeRateControlInfoKHR *rc_info, - VkVideoEncodeRateControlLayerInfoKHR *rc_layer) -{ - VulkanEncodeH264Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - VulkanEncodeH264Picture *hp = pic->codec_priv; - - hp->vkrc_info = (VkVideoEncodeH264RateControlInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_INFO_KHR, - .flags = VK_VIDEO_ENCODE_H264_RATE_CONTROL_REFERENCE_PATTERN_FLAT_BIT_KHR | - VK_VIDEO_ENCODE_H264_RATE_CONTROL_REGULAR_GOP_BIT_KHR, - .idrPeriod = ctx->base.gop_size, - .gopFrameCount = ctx->base.gop_size, - .consecutiveBFrameCount = FFMAX(ctx->base.b_per_p - 1, 0), - .temporalLayerCount = 0, - }; - rc_info->pNext = &hp->vkrc_info; - - if (rc_info->rateControlMode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { - rc_info->virtualBufferSizeInMs = (enc->unit_opts.hrd_buffer_size * 1000LL) / avctx->bit_rate; - rc_info->initialVirtualBufferSizeInMs = (enc->unit_opts.initial_buffer_fullness * 1000LL) / avctx->bit_rate; - - hp->vkrc_layer_info = (VkVideoEncodeH264RateControlLayerInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_LAYER_INFO_KHR, - - .useMinQp = avctx->qmin > 0, - .minQp.qpI = avctx->qmin > 0 ? avctx->qmin : 0, - .minQp.qpP = avctx->qmin > 0 ? avctx->qmin : 0, - .minQp.qpB = avctx->qmin > 0 ? avctx->qmin : 0, - - .useMaxQp = avctx->qmax > 0, - .maxQp.qpI = avctx->qmax > 0 ? avctx->qmax : 0, - .maxQp.qpP = avctx->qmax > 0 ? avctx->qmax : 0, - .maxQp.qpB = avctx->qmax > 0 ? avctx->qmax : 0, - - .useMaxFrameSize = 0, - }; - rc_layer->pNext = &hp->vkrc_layer_info; - hp->vkrc_info.temporalLayerCount = 1; - } - - return 0; -} - -static int vk_enc_h264_update_pic_info(AVCodecContext *avctx, - FFHWBaseEncodePicture *pic) -{ - VulkanEncodeH264Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - VulkanEncodeH264Picture *hp = pic->codec_priv; - FFHWBaseEncodePicture *prev = pic->prev; - VulkanEncodeH264Picture *hprev = prev ? prev->codec_priv : NULL; - - if (pic->type == FF_HW_PICTURE_TYPE_IDR) { - av_assert0(pic->display_order == pic->encode_order); - - hp->frame_num = 0; - hp->last_idr_frame = pic->display_order; - hp->idr_pic_id = hprev ? hprev->idr_pic_id + 1 : 0; - - hp->primary_pic_type = 0; - hp->slice_type = STD_VIDEO_H264_SLICE_TYPE_I; - } else { - av_assert0(prev); - - hp->frame_num = hprev->frame_num + prev->is_reference; - - hp->last_idr_frame = hprev->last_idr_frame; - hp->idr_pic_id = hprev->idr_pic_id; - - if (pic->type == FF_HW_PICTURE_TYPE_I) { - hp->slice_type = STD_VIDEO_H264_SLICE_TYPE_I; - hp->primary_pic_type = 0; - } else if (pic->type == FF_HW_PICTURE_TYPE_P) { - hp->slice_type = STD_VIDEO_H264_SLICE_TYPE_P; - hp->primary_pic_type = 1; - } else { - hp->slice_type = STD_VIDEO_H264_SLICE_TYPE_B; - hp->primary_pic_type = 2; - } - } - - hp->pic_order_cnt = pic->display_order - hp->last_idr_frame; - if (enc->units.raw_sps.pic_order_cnt_type == 2) - hp->pic_order_cnt *= 2; - - hp->units_needed = 0; - - if (enc->unit_elems & UNIT_SEI_IDENTIFIER && pic->encode_order == 0) - hp->units_needed |= UNIT_SEI_IDENTIFIER; - - if (enc->unit_elems & UNIT_SEI_TIMING) { - enc->sei_pic_timing = (H264RawSEIPicTiming) { - .cpb_removal_delay = 2 * (pic->encode_order - hp->last_idr_frame), - .dpb_output_delay = 2 * (pic->display_order - pic->encode_order + ctx->base.max_b_depth), - }; - - hp->units_needed |= UNIT_SEI_TIMING; - } - - if (enc->unit_elems & UNIT_SEI_RECOVERY && pic->type == FF_HW_PICTURE_TYPE_I) { - enc->sei_recovery_point = (H264RawSEIRecoveryPoint) { - .recovery_frame_cnt = 0, - .exact_match_flag = 1, - .broken_link_flag = ctx->base.b_per_p > 0, - }; - - hp->units_needed |= UNIT_SEI_RECOVERY; - } - - if (enc->unit_elems & UNIT_SEI_A53_CC) { - int err; - size_t sei_a53cc_len; - av_freep(&enc->sei_a53cc_data); - err = ff_alloc_a53_sei(pic->input_image, 0, &enc->sei_a53cc_data, &sei_a53cc_len); - if (err < 0) - return err; - if (enc->sei_a53cc_data != NULL) { - enc->sei_a53cc.itu_t_t35_country_code = 181; - enc->sei_a53cc.data = (uint8_t *)enc->sei_a53cc_data + 1; - enc->sei_a53cc.data_length = sei_a53cc_len - 1; - - hp->units_needed |= UNIT_SEI_A53_CC; - } - } - - return 0; -} - -static void setup_slices(AVCodecContext *avctx, - FFHWBaseEncodePicture *pic) -{ - VulkanEncodeH264Context *enc = avctx->priv_data; - VulkanEncodeH264Picture *hp = pic->codec_priv; - - hp->slice_wt = (StdVideoEncodeH264WeightTable) { - .flags = (StdVideoEncodeH264WeightTableFlags) { - .luma_weight_l0_flag = 0, - .chroma_weight_l0_flag = 0, - .luma_weight_l1_flag = 0, - .chroma_weight_l1_flag = 0, - }, - .luma_log2_weight_denom = 0, - .chroma_log2_weight_denom = 0, - .luma_weight_l0 = { 0 }, - .luma_offset_l0 = { 0 }, - .chroma_weight_l0 = { { 0 } }, - .chroma_offset_l0 = { { 0 } }, - .luma_weight_l1 = { 0 }, - .luma_offset_l1 = { 0 }, - .chroma_weight_l1 = { { 0 } }, - .chroma_offset_l1 = { { 0 } }, - }; - - hp->slice_hdr = (StdVideoEncodeH264SliceHeader) { - .flags = (StdVideoEncodeH264SliceHeaderFlags) { - .direct_spatial_mv_pred_flag = 1, - /* The vk_samples code does this */ - .num_ref_idx_active_override_flag = - ((enc->units.raw_pps.num_ref_idx_l0_default_active_minus1) && - (pic->type == FF_HW_PICTURE_TYPE_B)) ? 1 : 0, - }, - .first_mb_in_slice = 1, - .slice_type = hp->slice_type, - .slice_alpha_c0_offset_div2 = 0, - .slice_beta_offset_div2 = 0, - .slice_qp_delta = 0, /* Filled in below */ - /* Reserved */ - .cabac_init_idc = 0, - .disable_deblocking_filter_idc = 0, - .pWeightTable = NULL, // &hp->slice_wt, - }; - - hp->vkslice = (VkVideoEncodeH264NaluSliceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_NALU_SLICE_INFO_KHR, - .pNext = NULL, - .constantQp = pic->type == FF_HW_PICTURE_TYPE_B ? enc->fixed_qp_b : - pic->type == FF_HW_PICTURE_TYPE_P ? enc->fixed_qp_p : - enc->unit_opts.fixed_qp_idr, - .pStdSliceHeader = &hp->slice_hdr, - }; - - if (enc->common.opts.rc_mode != VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) - hp->vkslice.constantQp = 0; - - hp->slice_hdr.slice_qp_delta = hp->vkslice.constantQp - - (enc->units.raw_pps.pic_init_qp_minus26 + 26); - - hp->vkh264pic_info.pNaluSliceEntries = &hp->vkslice; - hp->vkh264pic_info.naluSliceEntryCount = 1; -} - -static void vk_enc_h264_default_ref_pic_list(AVCodecContext *avctx, - FFHWBaseEncodePicture *pic, - FFHWBaseEncodePicture **rpl0, - FFHWBaseEncodePicture **rpl1, - int *rpl_size) -{ - FFHWBaseEncodePicture *prev; - VulkanEncodeH264Picture *hp, *hn, *hc; - int i, j, n = 0; - - prev = pic->prev; - av_assert0(prev); - hp = pic->codec_priv; - - for (i = 0; i < pic->prev->nb_dpb_pics; i++) { - hn = prev->dpb[i]->codec_priv; - av_assert0(hn->frame_num < hp->frame_num); - - if (pic->type == FF_HW_PICTURE_TYPE_P) { - for (j = n; j > 0; j--) { - hc = rpl0[j - 1]->codec_priv; - av_assert0(hc->frame_num != hn->frame_num); - if (hc->frame_num > hn->frame_num) - break; - rpl0[j] = rpl0[j - 1]; - } - rpl0[j] = prev->dpb[i]; - - } else if (pic->type == FF_HW_PICTURE_TYPE_B) { - for (j = n; j > 0; j--) { - hc = rpl0[j - 1]->codec_priv; - av_assert0(hc->pic_order_cnt != hp->pic_order_cnt); - if (hc->pic_order_cnt < hp->pic_order_cnt) { - if (hn->pic_order_cnt > hp->pic_order_cnt || - hn->pic_order_cnt < hc->pic_order_cnt) - break; - } else { - if (hn->pic_order_cnt > hc->pic_order_cnt) - break; - } - rpl0[j] = rpl0[j - 1]; - } - rpl0[j] = prev->dpb[i]; - - for (j = n; j > 0; j--) { - hc = rpl1[j - 1]->codec_priv; - av_assert0(hc->pic_order_cnt != hp->pic_order_cnt); - if (hc->pic_order_cnt > hp->pic_order_cnt) { - if (hn->pic_order_cnt < hp->pic_order_cnt || - hn->pic_order_cnt > hc->pic_order_cnt) - break; - } else { - if (hn->pic_order_cnt < hc->pic_order_cnt) - break; - } - rpl1[j] = rpl1[j - 1]; - } - rpl1[j] = prev->dpb[i]; - } - - ++n; - } - - if (pic->type == FF_HW_PICTURE_TYPE_B) { - for (i = 0; i < n; i++) { - if (rpl0[i] != rpl1[i]) - break; - } - if (i == n) - FFSWAP(FFHWBaseEncodePicture *, rpl1[0], rpl1[1]); - } - - if (pic->type == FF_HW_PICTURE_TYPE_P || - pic->type == FF_HW_PICTURE_TYPE_B) { - av_log(avctx, AV_LOG_DEBUG, "Default RefPicList0 for fn=%d/poc=%d:", - hp->frame_num, hp->pic_order_cnt); - for (i = 0; i < n; i++) { - hn = rpl0[i]->codec_priv; - av_log(avctx, AV_LOG_DEBUG, " fn=%d/poc=%d", - hn->frame_num, hn->pic_order_cnt); - } - av_log(avctx, AV_LOG_DEBUG, "\n"); - } - if (pic->type == FF_HW_PICTURE_TYPE_B) { - av_log(avctx, AV_LOG_DEBUG, "Default RefPicList1 for fn=%d/poc=%d:", - hp->frame_num, hp->pic_order_cnt); - for (i = 0; i < n; i++) { - hn = rpl1[i]->codec_priv; - av_log(avctx, AV_LOG_DEBUG, " fn=%d/poc=%d", - hn->frame_num, hn->pic_order_cnt); - } - av_log(avctx, AV_LOG_DEBUG, "\n"); - } - - *rpl_size = n; -} - -static void setup_refs(AVCodecContext *avctx, - FFHWBaseEncodePicture *pic, - VkVideoEncodeInfoKHR *encode_info) -{ - int n, i, j; - VulkanEncodeH264Context *enc = avctx->priv_data; - VulkanEncodeH264Picture *hp = pic->codec_priv; - FFHWBaseEncodePicture *prev = pic->prev; - FFHWBaseEncodePicture *def_l0[MAX_DPB_SIZE], *def_l1[MAX_DPB_SIZE]; - VulkanEncodeH264Picture *href; - - hp->ref_list_info = (StdVideoEncodeH264ReferenceListsInfo) { - .flags = (StdVideoEncodeH264ReferenceListsInfoFlags) { - .ref_pic_list_modification_flag_l0 = 0, - .ref_pic_list_modification_flag_l1 = 0, - /* Reserved */ - }, - /* May be overridden during setup_slices() */ - .num_ref_idx_l0_active_minus1 = pic->nb_refs[0] - 1, - .num_ref_idx_l1_active_minus1 = pic->nb_refs[1] - 1, - /* .RefPicList0 is set in vk_enc_h264_default_ref_pic_list() */ - /* .RefPicList1 is set in vk_enc_h264_default_ref_pic_list() */ - /* Reserved */ - .pRefList0ModOperations = NULL, /* All set below */ - .refList0ModOpCount = 0, - .pRefList1ModOperations = NULL, - .refList1ModOpCount = 0, - .pRefPicMarkingOperations = NULL, - .refPicMarkingOpCount = 0, - }; - - for (i = 0; i < STD_VIDEO_H264_MAX_NUM_LIST_REF; i++) - hp->ref_list_info.RefPicList0[i] = hp->ref_list_info.RefPicList1[i] = -1; - - /* Note: really not sure */ - for (int i = 0; i < pic->nb_refs[0]; i++) { - VkVideoReferenceSlotInfoKHR *slot_info; - slot_info = (VkVideoReferenceSlotInfoKHR *)&encode_info->pReferenceSlots[i]; - hp->ref_list_info.RefPicList0[i] = slot_info->slotIndex; - } - - /* Note: really not sure */ - for (int i = 0; i < pic->nb_refs[1]; i++) { - VkVideoReferenceSlotInfoKHR *slot_info; - slot_info = (VkVideoReferenceSlotInfoKHR *)&encode_info->pReferenceSlots[pic->nb_refs[0] + i]; - hp->ref_list_info.RefPicList1[i] = slot_info->slotIndex; - } - - hp->h264pic_info.pRefLists = &hp->ref_list_info; - - if (pic->is_reference && pic->type != FF_HW_PICTURE_TYPE_IDR) { - FFHWBaseEncodePicture *discard_list[MAX_DPB_SIZE]; - int discard = 0, keep = 0; - - // Discard everything which is in the DPB of the previous frame but - // not in the DPB of this one. - for (i = 0; i < prev->nb_dpb_pics; i++) { - for (j = 0; j < pic->nb_dpb_pics; j++) { - if (prev->dpb[i] == pic->dpb[j]) - break; - } - if (j == pic->nb_dpb_pics) { - discard_list[discard] = prev->dpb[i]; - ++discard; - } else { - ++keep; - } - } - av_assert0(keep <= enc->units.dpb_frames); - - if (discard == 0) { - hp->h264pic_info.flags.adaptive_ref_pic_marking_mode_flag = 0; - } else { - hp->h264pic_info.flags.adaptive_ref_pic_marking_mode_flag = 1; - for (i = 0; i < discard; i++) { - VulkanEncodeH264Picture *old = discard_list[i]->codec_priv; - av_assert0(old->frame_num < hp->frame_num); - hp->mmco[i] = (StdVideoEncodeH264RefPicMarkingEntry) { - .memory_management_control_operation = 1, - .difference_of_pic_nums_minus1 = hp->frame_num - old->frame_num - 1, - }; - } - hp->mmco[i] = (StdVideoEncodeH264RefPicMarkingEntry) { - .memory_management_control_operation = 0, - }; - hp->ref_list_info.pRefPicMarkingOperations = hp->mmco; - hp->ref_list_info.refPicMarkingOpCount = i + 1; - } - } - - if (pic->type == FF_HW_PICTURE_TYPE_I || pic->type == FF_HW_PICTURE_TYPE_IDR) - return; - - // If the intended references are not the first entries of RefPicListN - // by default, use ref-pic-list-modification to move them there. - vk_enc_h264_default_ref_pic_list(avctx, pic, - def_l0, def_l1, &n); - - if (pic->type == FF_HW_PICTURE_TYPE_P) { - int need_rplm = 0; - for (i = 0; i < pic->nb_refs[0]; i++) { - av_assert0(pic->refs[0][i]); - if (pic->refs[0][i] != (FFHWBaseEncodePicture *)def_l0[i]) - need_rplm = 1; - } - - hp->ref_list_info.flags.ref_pic_list_modification_flag_l0 = need_rplm; - if (need_rplm) { - int pic_num = hp->frame_num; - for (i = 0; i < pic->nb_refs[0]; i++) { - href = pic->refs[0][i]->codec_priv; - av_assert0(href->frame_num != pic_num); - if (href->frame_num < pic_num) { - hp->mods[0][i] = (StdVideoEncodeH264RefListModEntry) { - .modification_of_pic_nums_idc = 0, - .abs_diff_pic_num_minus1 = pic_num - href->frame_num - 1, - }; - } else { - hp->mods[0][i] = (StdVideoEncodeH264RefListModEntry) { - .modification_of_pic_nums_idc = 1, - .abs_diff_pic_num_minus1 = href->frame_num - pic_num - 1, - }; - } - pic_num = href->frame_num; - } - hp->ref_list_info.pRefList0ModOperations = hp->mods[0]; - hp->ref_list_info.refList0ModOpCount = i - 1; - } - } else { - int need_rplm_l0 = 0, need_rplm_l1 = 0; - int n0 = 0, n1 = 0; - for (i = 0; i < pic->nb_refs[0]; i++) { - av_assert0(pic->refs[0][i]); - href = pic->refs[0][i]->codec_priv; - av_assert0(href->pic_order_cnt < hp->pic_order_cnt); - if (pic->refs[0][i] != (FFHWBaseEncodePicture *)def_l0[n0]) - need_rplm_l0 = 1; - ++n0; - } - - for (int i = 0; i < pic->nb_refs[1]; i++) { - av_assert0(pic->refs[1][i]); - href = pic->refs[1][i]->codec_priv; - av_assert0(href->pic_order_cnt > hp->pic_order_cnt); - if (pic->refs[1][i] != (FFHWBaseEncodePicture *)def_l1[n1]) - need_rplm_l1 = 1; - ++n1; - } - - hp->ref_list_info.flags.ref_pic_list_modification_flag_l0 = need_rplm_l0; - if (need_rplm_l0) { - int pic_num = hp->frame_num; - for (i = j = 0; i < pic->nb_refs[0]; i++) { - href = pic->refs[0][i]->codec_priv; - av_assert0(href->frame_num != pic_num); - if (href->frame_num < pic_num) { - hp->mods[0][j] = (StdVideoEncodeH264RefListModEntry) { - .modification_of_pic_nums_idc = 0, - .abs_diff_pic_num_minus1 = pic_num - href->frame_num - 1, - }; - } else { - hp->mods[0][j] = (StdVideoEncodeH264RefListModEntry) { - .modification_of_pic_nums_idc = 1, - .abs_diff_pic_num_minus1 = href->frame_num - pic_num - 1, - }; - } - pic_num = href->frame_num; - ++j; - } - hp->ref_list_info.pRefList0ModOperations = hp->mods[0]; - hp->ref_list_info.refList0ModOpCount = j - 1; - } - - hp->ref_list_info.flags.ref_pic_list_modification_flag_l1 = need_rplm_l1; - if (need_rplm_l1) { - int pic_num = hp->frame_num; - for (i = j = 0; i < pic->nb_refs[1]; i++) { - href = pic->refs[1][i]->codec_priv; - av_assert0(href->frame_num != pic_num); - if (href->frame_num < pic_num) { - hp->mods[1][j] = (StdVideoEncodeH264RefListModEntry) { - .modification_of_pic_nums_idc = 0, - .abs_diff_pic_num_minus1 = pic_num - href->frame_num - 1, - }; - } else { - hp->mods[1][j] = (StdVideoEncodeH264RefListModEntry) { - .modification_of_pic_nums_idc = 1, - .abs_diff_pic_num_minus1 = href->frame_num - pic_num - 1, - }; - } - pic_num = href->frame_num; - ++j; - } - hp->ref_list_info.pRefList1ModOperations = hp->mods[1]; - hp->ref_list_info.refList1ModOpCount = j - 1; - } - } -} - -static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeInfoKHR *encode_info) -{ - int err; - FFVulkanEncodePicture *vp = pic->priv; - VulkanEncodeH264Picture *hp = pic->codec_priv; - VkVideoReferenceSlotInfoKHR *ref_slot; - - err = vk_enc_h264_update_pic_info(avctx, pic); - if (err < 0) - return err; - - hp->vkh264pic_info = (VkVideoEncodeH264PictureInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PICTURE_INFO_KHR, - .pNext = NULL, - .pNaluSliceEntries = NULL, // Filled in during setup_slices() - .naluSliceEntryCount = 0, // Filled in during setup_slices() - .pStdPictureInfo = &hp->h264pic_info, - }; - - hp->h264pic_info = (StdVideoEncodeH264PictureInfo) { - .flags = (StdVideoEncodeH264PictureInfoFlags) { - .IdrPicFlag = pic->type == FF_HW_PICTURE_TYPE_IDR, - .is_reference = pic->is_reference, - .no_output_of_prior_pics_flag = 0, - .long_term_reference_flag = 0, - .adaptive_ref_pic_marking_mode_flag = 0, // Filled in during setup_refs() - /* Reserved */ - }, - .seq_parameter_set_id = 0, - .pic_parameter_set_id = 0, - .idr_pic_id = hp->idr_pic_id, - .primary_pic_type = pic->type == FF_HW_PICTURE_TYPE_P ? STD_VIDEO_H264_PICTURE_TYPE_P : - pic->type == FF_HW_PICTURE_TYPE_B ? STD_VIDEO_H264_PICTURE_TYPE_B : - pic->type == FF_HW_PICTURE_TYPE_I ? STD_VIDEO_H264_PICTURE_TYPE_I : - STD_VIDEO_H264_PICTURE_TYPE_IDR, - .frame_num = hp->frame_num, - .PicOrderCnt = hp->pic_order_cnt, - .temporal_id = 0, /* ? */ - /* Reserved */ - .pRefLists = NULL, // Filled in during setup_refs - }; - encode_info->pNext = &hp->vkh264pic_info; - - hp->h264dpb_info = (StdVideoEncodeH264ReferenceInfo) { - .flags = (StdVideoEncodeH264ReferenceInfoFlags) { - .used_for_long_term_reference = 0, - /* Reserved */ - }, - .primary_pic_type = hp->h264pic_info.primary_pic_type, - .FrameNum = hp->h264pic_info.frame_num, - .PicOrderCnt = hp->h264pic_info.PicOrderCnt, - .long_term_pic_num = 0, - .long_term_frame_idx = 0, - .temporal_id = hp->h264pic_info.temporal_id, - }; - hp->vkh264dpb_info = (VkVideoEncodeH264DpbSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_DPB_SLOT_INFO_KHR, - .pStdReferenceInfo = &hp->h264dpb_info, - }; - - vp->dpb_slot.pNext = &hp->vkh264dpb_info; - - ref_slot = (VkVideoReferenceSlotInfoKHR *)encode_info->pSetupReferenceSlot; - ref_slot->pNext = &hp->vkh264dpb_info; - - setup_refs(avctx, pic, encode_info); - - setup_slices(avctx, pic); - - return 0; -} - -static int init_profile(AVCodecContext *avctx, - VkVideoProfileInfoKHR *profile, void *pnext) -{ - VkResult ret; - VulkanEncodeH264Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - VkVideoEncodeH264CapabilitiesKHR h264_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_CAPABILITIES_KHR, - }; - VkVideoEncodeCapabilitiesKHR enc_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR, - .pNext = &h264_caps, - }; - VkVideoCapabilitiesKHR caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR, - .pNext = &enc_caps, - }; - - /* In order of preference */ - int last_supported = AV_PROFILE_UNKNOWN; - static const int known_profiles[] = { - AV_PROFILE_H264_CONSTRAINED_BASELINE, - AV_PROFILE_H264_MAIN, - AV_PROFILE_H264_HIGH, - AV_PROFILE_H264_HIGH_10, - }; - int nb_profiles = FF_ARRAY_ELEMS(known_profiles); - - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->frames->sw_format); - if (!desc) - return AVERROR(EINVAL); - - if (desc->comp[0].depth == 8) - nb_profiles = 3; - - enc->profile = (VkVideoEncodeH264ProfileInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR, - .pNext = pnext, - .stdProfileIdc = ff_vk_h264_profile_to_vk(avctx->profile), - }; - profile->pNext = &enc->profile; - - /* Set level */ - if (avctx->level == AV_LEVEL_UNKNOWN) - avctx->level = enc->common.opts.level; - - /* User has explicitly specified a profile. */ - if (avctx->profile != AV_PROFILE_UNKNOWN) - return 0; - - av_log(avctx, AV_LOG_DEBUG, "Supported profiles:\n"); - for (int i = 0; i < nb_profiles; i++) { - enc->profile.stdProfileIdc = ff_vk_h264_profile_to_vk(known_profiles[i]); - ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, - profile, - &caps); - if (ret == VK_SUCCESS) { - av_log(avctx, AV_LOG_DEBUG, " %s\n", - avcodec_profile_name(avctx->codec_id, known_profiles[i])); - last_supported = known_profiles[i]; - } - } - - if (last_supported == AV_PROFILE_UNKNOWN) { - av_log(avctx, AV_LOG_ERROR, "No supported profiles for given format\n"); - return AVERROR(ENOTSUP); - } - - enc->profile.stdProfileIdc = ff_vk_h264_profile_to_vk(last_supported); - av_log(avctx, AV_LOG_VERBOSE, "Using profile %s\n", - avcodec_profile_name(avctx->codec_id, last_supported)); - avctx->profile = last_supported; - - return 0; -} - -static int init_enc_options(AVCodecContext *avctx) -{ - VulkanEncodeH264Context *enc = avctx->priv_data; - FFHWBaseEncodeH264Opts *unit_opts = &enc->unit_opts; - - if (avctx->rc_buffer_size) - unit_opts->hrd_buffer_size = avctx->rc_buffer_size; - else if (avctx->rc_max_rate > 0) - unit_opts->hrd_buffer_size = avctx->rc_max_rate; - else - unit_opts->hrd_buffer_size = avctx->bit_rate; - - if (avctx->rc_initial_buffer_occupancy) { - if (avctx->rc_initial_buffer_occupancy > unit_opts->hrd_buffer_size) { - av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: " - "must have initial buffer size (%d) <= " - "buffer size (%"PRId64").\n", - avctx->rc_initial_buffer_occupancy, unit_opts->hrd_buffer_size); - return AVERROR(EINVAL); - } - unit_opts->initial_buffer_fullness = avctx->rc_initial_buffer_occupancy; - } else { - unit_opts->initial_buffer_fullness = unit_opts->hrd_buffer_size * 3 / 4; - } - - if (enc->common.opts.rc_mode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { - /* HRD info is required for timing */ - enc->unit_elems &= ~UNIT_SEI_TIMING; - - enc->fixed_qp_p = av_clip(enc->common.explicit_qp, - enc->caps.minQp, enc->caps.maxQp); - if (avctx->i_quant_factor > 0.0) - unit_opts->fixed_qp_idr = av_clip((avctx->i_quant_factor * enc->fixed_qp_p + - avctx->i_quant_offset) + 0.5, - enc->caps.minQp, enc->caps.maxQp); - else - unit_opts->fixed_qp_idr = enc->fixed_qp_p; - - if (avctx->b_quant_factor > 0.0) - enc->fixed_qp_b = av_clip((avctx->b_quant_factor * enc->fixed_qp_p + - avctx->b_quant_offset) + 0.5, - enc->caps.minQp, enc->caps.maxQp); - else - enc->fixed_qp_b = enc->fixed_qp_p; - - av_log(avctx, AV_LOG_DEBUG, "Using fixed QP = " - "%d / %d / %d for IDR- / P- / B-frames.\n", - unit_opts->fixed_qp_idr, enc->fixed_qp_p, enc->fixed_qp_b); - } else { - unit_opts->fixed_qp_idr = 26; - enc->fixed_qp_p = 26; - enc->fixed_qp_b = 26; - } - - return 0; -} - -static av_cold int init_sequence_headers(AVCodecContext *avctx) -{ - int err; - VulkanEncodeH264Context *enc = avctx->priv_data; - - FFHWBaseEncodeH264 *units = &enc->units; - FFHWBaseEncodeH264Opts *unit_opts = &enc->unit_opts; - - unit_opts->bit_rate = avctx->bit_rate; - unit_opts->mb_width = FFALIGN(avctx->width, 16) / 16; - unit_opts->mb_height = FFALIGN(avctx->height, 16) / 16; - unit_opts->flags = enc->unit_elems & UNIT_SEI_TIMING ? FF_HW_H264_SEI_TIMING : 0; - - /* cabac already set via an option */ - /* fixed_qp_idr initialized in init_enc_options() */ - /* hrd_buffer_size initialized in init_enc_options() */ - /* initial_buffer_fullness initialized in init_enc_options() */ - - err = ff_hw_base_encode_init_params_h264(&enc->common.base, avctx, - units, unit_opts); - if (err < 0) - return err; - - units->raw_sps.seq_scaling_matrix_present_flag = - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SCALING_MATRIX_PRESENT_FLAG_SET_BIT_KHR); - units->raw_pps.pic_scaling_matrix_present_flag = - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SCALING_MATRIX_PRESENT_FLAG_SET_BIT_KHR); - units->raw_pps.transform_8x8_mode_flag = - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_TRANSFORM_8X8_MODE_FLAG_SET_BIT_KHR); - - return 0; -} - -typedef struct VulkanH264Units { - StdVideoH264SequenceParameterSet vksps; - StdVideoH264ScalingLists vksps_scaling; - StdVideoH264HrdParameters vksps_vui_header; - StdVideoH264SequenceParameterSetVui vksps_vui; - - StdVideoH264PictureParameterSet vkpps; - StdVideoH264ScalingLists vkpps_scaling; -} VulkanH264Units; - -static av_cold int base_unit_to_vk(AVCodecContext *avctx, - VulkanH264Units *vk_units) -{ - VulkanEncodeH264Context *enc = avctx->priv_data; - - FFHWBaseEncodeH264 *units = &enc->units; - - H264RawSPS *sps = &units->raw_sps; - H264RawHRD *hrd = &sps->vui.nal_hrd_parameters; - StdVideoH264ScalingLists *vksps_scaling = &vk_units->vksps_scaling; - StdVideoH264HrdParameters *vksps_vui_header = &vk_units->vksps_vui_header; - StdVideoH264SequenceParameterSetVui *vksps_vui = &vk_units->vksps_vui; - StdVideoH264SequenceParameterSet *vksps = &vk_units->vksps; - - H264RawPPS *pps = &units->raw_pps; - StdVideoH264ScalingLists *vkpps_scaling = &vk_units->vkpps_scaling; - StdVideoH264PictureParameterSet *vkpps = &vk_units->vkpps; - - *vksps_scaling = (StdVideoH264ScalingLists) { - .scaling_list_present_mask = 0x0, // mask - .use_default_scaling_matrix_mask = 1, - }; - - *vksps_vui_header = (StdVideoH264HrdParameters) { - .cpb_cnt_minus1 = hrd->cpb_cnt_minus1, - .bit_rate_scale = hrd->bit_rate_scale, - .cpb_size_scale = hrd->cpb_size_scale, - /* Reserved */ - /* bit_rate/cpb_size/cbr_flag set below */ - .initial_cpb_removal_delay_length_minus1 = hrd->initial_cpb_removal_delay_length_minus1, - .cpb_removal_delay_length_minus1 = hrd->cpb_removal_delay_length_minus1, - .dpb_output_delay_length_minus1 = hrd->dpb_output_delay_length_minus1, - .time_offset_length = hrd->time_offset_length, - }; - - for (int i = 0; i < H264_MAX_CPB_CNT; i++) { - vksps_vui_header->bit_rate_value_minus1[i] = hrd->bit_rate_value_minus1[i]; - vksps_vui_header->cpb_size_value_minus1[i] = hrd->cpb_size_value_minus1[i]; - vksps_vui_header->cbr_flag[i] = hrd->cbr_flag[i]; - } - - *vksps_vui = (StdVideoH264SequenceParameterSetVui) { - .flags = (StdVideoH264SpsVuiFlags) { - .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, - .overscan_info_present_flag = sps->vui.overscan_info_present_flag, - .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, - .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, - .video_full_range_flag = sps->vui.video_full_range_flag, - .color_description_present_flag = sps->vui.colour_description_present_flag, - .chroma_loc_info_present_flag = sps->vui.chroma_loc_info_present_flag, - .timing_info_present_flag = sps->vui.timing_info_present_flag, - .fixed_frame_rate_flag = sps->vui.fixed_frame_rate_flag, - .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, - .nal_hrd_parameters_present_flag = sps->vui.nal_hrd_parameters_present_flag, - .vcl_hrd_parameters_present_flag = sps->vui.vcl_hrd_parameters_present_flag, - }, - .aspect_ratio_idc = sps->vui.aspect_ratio_idc, - .sar_width = sps->vui.sar_width, - .sar_height = sps->vui.sar_height, - .video_format = sps->vui.video_format, - .colour_primaries = sps->vui.colour_primaries, - .transfer_characteristics = sps->vui.transfer_characteristics, - .matrix_coefficients = sps->vui.matrix_coefficients, - .num_units_in_tick = sps->vui.num_units_in_tick, - .time_scale = sps->vui.time_scale, - .max_num_reorder_frames = sps->vui.max_num_reorder_frames, - .max_dec_frame_buffering = sps->vui.max_dec_frame_buffering, - .chroma_sample_loc_type_top_field = sps->vui.chroma_sample_loc_type_top_field, - .chroma_sample_loc_type_bottom_field = sps->vui.chroma_sample_loc_type_bottom_field, - /* Reserved */ - .pHrdParameters = vksps_vui_header, - }; - - *vksps = (StdVideoH264SequenceParameterSet) { - .flags = (StdVideoH264SpsFlags) { - .constraint_set0_flag = sps->constraint_set0_flag, - .constraint_set1_flag = sps->constraint_set1_flag, - .constraint_set2_flag = sps->constraint_set2_flag, - .constraint_set3_flag = sps->constraint_set3_flag, - .constraint_set4_flag = sps->constraint_set4_flag, - .constraint_set5_flag = sps->constraint_set5_flag, - .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, - .mb_adaptive_frame_field_flag = sps->mb_adaptive_frame_field_flag, - .frame_mbs_only_flag = sps->frame_mbs_only_flag, - .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, - .separate_colour_plane_flag = sps->separate_colour_plane_flag, - .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, - .qpprime_y_zero_transform_bypass_flag = sps->qpprime_y_zero_transform_bypass_flag, - .frame_cropping_flag = sps->frame_cropping_flag, - .seq_scaling_matrix_present_flag = sps->seq_scaling_matrix_present_flag, - .vui_parameters_present_flag = sps->vui_parameters_present_flag, - }, - .profile_idc = ff_vk_h264_profile_to_vk(sps->profile_idc), - .level_idc = ff_vk_h264_level_to_vk(sps->level_idc), - .chroma_format_idc = sps->chroma_format_idc, - .seq_parameter_set_id = sps->seq_parameter_set_id, - .bit_depth_luma_minus8 = sps->bit_depth_luma_minus8, - .bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8, - .log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4, - .pic_order_cnt_type = sps->pic_order_cnt_type, - .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, - .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, - .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4, - .num_ref_frames_in_pic_order_cnt_cycle = sps->num_ref_frames_in_pic_order_cnt_cycle, - .max_num_ref_frames = sps->max_num_ref_frames, - /* Reserved */ - .pic_width_in_mbs_minus1 = sps->pic_width_in_mbs_minus1, - .pic_height_in_map_units_minus1 = sps->pic_height_in_map_units_minus1, - .frame_crop_left_offset = sps->frame_crop_left_offset, - .frame_crop_right_offset = sps->frame_crop_right_offset, - .frame_crop_top_offset = sps->frame_crop_top_offset, - .frame_crop_bottom_offset = sps->frame_crop_bottom_offset, - /* Reserved */ - .pOffsetForRefFrame = sps->offset_for_ref_frame, - .pScalingLists = vksps_scaling, - .pSequenceParameterSetVui = vksps_vui, - }; - - *vkpps_scaling = (StdVideoH264ScalingLists) { - .scaling_list_present_mask = 0x0, // mask - .use_default_scaling_matrix_mask = 1, - }; - - *vkpps = (StdVideoH264PictureParameterSet) { - .flags = (StdVideoH264PpsFlags) { - .transform_8x8_mode_flag = pps->transform_8x8_mode_flag, - .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present_flag, - .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, - .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, - .weighted_pred_flag = pps->weighted_pred_flag, - .bottom_field_pic_order_in_frame_present_flag = pps->bottom_field_pic_order_in_frame_present_flag, - .entropy_coding_mode_flag = pps->entropy_coding_mode_flag, - .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, - }, - .seq_parameter_set_id = pps->seq_parameter_set_id, - .pic_parameter_set_id = pps->pic_parameter_set_id, - .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1, - .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1, - .weighted_bipred_idc = pps->weighted_bipred_idc, - .pic_init_qp_minus26 = pps->pic_init_qp_minus26, - .pic_init_qs_minus26 = pps->pic_init_qs_minus26, - .chroma_qp_index_offset = pps->chroma_qp_index_offset, - .second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset, - .pScalingLists = vkpps_scaling, - }; - - return 0; -} - -static int create_session_params(AVCodecContext *avctx) -{ - int err; - VulkanEncodeH264Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - VulkanH264Units vk_units = { 0 }; - - VkVideoEncodeH264SessionParametersAddInfoKHR h264_params_info; - VkVideoEncodeH264SessionParametersCreateInfoKHR h264_params; - - /* Convert it to Vulkan */ - err = base_unit_to_vk(avctx, &vk_units); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to convert SPS/PPS units to Vulkan: %s\n", - av_err2str(err)); - return err; - } - - /* Destroy the session params */ - if (ctx->session_params) - vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, - ctx->session_params, - s->hwctx->alloc); - - h264_params_info = (VkVideoEncodeH264SessionParametersAddInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR, - .pStdSPSs = &vk_units.vksps, - .stdSPSCount = 1, - .pStdPPSs = &vk_units.vkpps, - .stdPPSCount = 1, - }; - h264_params = (VkVideoEncodeH264SessionParametersCreateInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, - .maxStdSPSCount = 1, - .maxStdPPSCount = 1, - .pParametersAddInfo = &h264_params_info, - }; - - return ff_vulkan_encode_create_session_params(avctx, ctx, &h264_params); -} - -static int parse_feedback_units(AVCodecContext *avctx, - const uint8_t *data, size_t size, - int sps_override, int pps_override) -{ - int err; - VulkanEncodeH264Context *enc = avctx->priv_data; - - CodedBitstreamContext *cbs; - CodedBitstreamFragment au = { 0 }; - - err = ff_cbs_init(&cbs, AV_CODEC_ID_H264, avctx); - if (err < 0) - return err; - - err = ff_cbs_read(cbs, &au, NULL, data, size); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to parse feedback units, bad drivers: %s\n", - av_err2str(err)); - goto fail; - } - - /* If PPS has an override, just copy it entirely. */ - if (pps_override) { - for (int i = 0; i < au.nb_units; i++) { - if (au.units[i].type == H264_NAL_PPS) { - H264RawPPS *pps = au.units[i].content; - memcpy(&enc->units.raw_pps, pps, sizeof(*pps)); - break; - } - } - } - - err = 0; -fail: - ff_cbs_fragment_free(&au); - ff_cbs_close(&cbs); - - return err; -} - -static int init_base_units(AVCodecContext *avctx) -{ - int err; - VkResult ret; - VulkanEncodeH264Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - VkVideoEncodeH264SessionParametersGetInfoKHR h264_params_info; - VkVideoEncodeSessionParametersGetInfoKHR params_info; - VkVideoEncodeH264SessionParametersFeedbackInfoKHR h264_params_feedback; - VkVideoEncodeSessionParametersFeedbackInfoKHR params_feedback; - - void *data = NULL; - size_t data_size = 0; - - /* Generate SPS/PPS unit info */ - err = init_sequence_headers(avctx); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPS/PPS units: %s\n", - av_err2str(err)); - return err; - } - - /* Create session parameters from them */ - err = create_session_params(avctx); - if (err < 0) - return err; - - h264_params_info = (VkVideoEncodeH264SessionParametersGetInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_GET_INFO_KHR, - .writeStdSPS = 1, - .writeStdPPS = 1, - .stdSPSId = enc->units.raw_sps.seq_parameter_set_id, - .stdPPSId = enc->units.raw_pps.pic_parameter_set_id, - }; - params_info = (VkVideoEncodeSessionParametersGetInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_GET_INFO_KHR, - .pNext = &h264_params_info, - .videoSessionParameters = ctx->session_params, - }; - - h264_params_feedback = (VkVideoEncodeH264SessionParametersFeedbackInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, - }; - params_feedback = (VkVideoEncodeSessionParametersFeedbackInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, - .pNext = &h264_params_feedback, - }; - - ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, - ¶ms_feedback, - &data_size, data); - if (ret == VK_INCOMPLETE || - (ret == VK_SUCCESS) && (data_size > 0)) { - data = av_mallocz(data_size); - if (!data) - return AVERROR(ENOMEM); - } else { - av_log(avctx, AV_LOG_ERROR, "Unable to get feedback for H.264 units = %"SIZE_SPECIFIER"\n", data_size); - return err; - } - - ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, - ¶ms_feedback, - &data_size, data); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Error writing feedback units\n"); - return err; - } - - av_log(avctx, AV_LOG_VERBOSE, "Feedback units written, overrides: %i (SPS: %i PPS: %i)\n", - params_feedback.hasOverrides, - h264_params_feedback.hasStdSPSOverrides, - h264_params_feedback.hasStdPPSOverrides); - - params_feedback.hasOverrides = 1; - h264_params_feedback.hasStdPPSOverrides = 1; - - /* No need to sync any overrides */ - if (!params_feedback.hasOverrides) - return 0; - - /* Parse back tne units and override */ - err = parse_feedback_units(avctx, data, data_size, - h264_params_feedback.hasStdSPSOverrides, - h264_params_feedback.hasStdPPSOverrides); - if (err < 0) - return err; - - /* Create final session parameters */ - err = create_session_params(avctx); - if (err < 0) - return err; - - return 0; -} - -static int vulkan_encode_h264_add_nal(AVCodecContext *avctx, - CodedBitstreamFragment *au, - void *nal_unit) -{ - H264RawNALUnitHeader *header = nal_unit; - - int err = ff_cbs_insert_unit_content(au, -1, - header->nal_unit_type, nal_unit, NULL); - if (err < 0) - av_log(avctx, AV_LOG_ERROR, "Failed to add NAL unit: " - "type = %d.\n", header->nal_unit_type); - - return err; -} - -static int write_access_unit(AVCodecContext *avctx, - uint8_t *data, size_t *data_len, - CodedBitstreamFragment *au) -{ - VulkanEncodeH264Context *enc = avctx->priv_data; - - int err = ff_cbs_write_fragment_data(enc->cbs, au); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n"); - return err; - } - - if (*data_len < au->data_size) { - av_log(avctx, AV_LOG_ERROR, "Access unit too large: %zu < %zu.\n", - *data_len, au->data_size); - return AVERROR(ENOSPC); - } - - memcpy(data, au->data, au->data_size); - *data_len = au->data_size; - - return 0; -} - -static int write_sequence_headers(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeH264Context *enc = avctx->priv_data; - VulkanEncodeH264Picture *hp = base_pic ? base_pic->codec_priv : NULL; - CodedBitstreamFragment *au = &enc->current_access_unit; - - if (hp && hp->units_needed & UNIT_AUD) { - err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_aud); - if (err < 0) - goto fail; - } - - err = vulkan_encode_h264_add_nal(avctx, au, &enc->units.raw_sps); - if (err < 0) - goto fail; - - err = vulkan_encode_h264_add_nal(avctx, au, &enc->units.raw_pps); - if (err < 0) - goto fail; - - err = write_access_unit(avctx, data, data_len, au); -fail: - ff_cbs_fragment_reset(au); - return err; -} - -static int write_extra_headers(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeH264Context *enc = avctx->priv_data; - VulkanEncodeH264Picture *hp = base_pic->codec_priv; - CodedBitstreamFragment *au = &enc->current_access_unit; - - if (hp->units_needed & UNIT_AUD) { - err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_aud); - if (err < 0) - goto fail; - } - - if (hp->units_needed & UNIT_SEI_IDENTIFIER) { - err = ff_cbs_sei_add_message(enc->cbs, au, 1, - SEI_TYPE_USER_DATA_UNREGISTERED, - &enc->sei_identifier, NULL); - if (err < 0) - goto fail; - } - - if (hp->units_needed & UNIT_SEI_TIMING) { - if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) { - err = ff_cbs_sei_add_message(enc->cbs, au, 1, - SEI_TYPE_BUFFERING_PERIOD, - &enc->units.sei_buffering_period, NULL); - if (err < 0) - goto fail; - } - err = ff_cbs_sei_add_message(enc->cbs, au, 1, - SEI_TYPE_PIC_TIMING, - &enc->sei_pic_timing, NULL); - if (err < 0) - goto fail; - } - - if (hp->units_needed & UNIT_SEI_RECOVERY) { - err = ff_cbs_sei_add_message(enc->cbs, au, 1, - SEI_TYPE_RECOVERY_POINT, - &enc->sei_recovery_point, NULL); - if (err < 0) - goto fail; - } - - if (hp->units_needed & UNIT_SEI_A53_CC) { - err = ff_cbs_sei_add_message(enc->cbs, au, 1, - SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35, - &enc->sei_a53cc, NULL); - if (err < 0) - goto fail; - } - - if (hp->units_needed) { - err = write_access_unit(avctx, data, data_len, au); - if (err < 0) - goto fail; - } else { - err = 0; - *data_len = 0; - } - -fail: - ff_cbs_fragment_reset(au); - return err; -} - -static int write_filler(AVCodecContext *avctx, uint32_t filler, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeH264Context *enc = avctx->priv_data; - CodedBitstreamFragment *au = &enc->current_access_unit; - - H264RawFiller raw_filler = { - .nal_unit_header = { - .nal_unit_type = H264_NAL_FILLER_DATA, - }, - .filler_size = filler, - }; - - err = vulkan_encode_h264_add_nal(avctx, au, &raw_filler); - if (err < 0) - goto fail; - - err = write_access_unit(avctx, data, data_len, au); -fail: - ff_cbs_fragment_reset(au); - return err; -} - -static const FFVulkanCodec enc_cb = { - .flags = FF_HW_FLAG_B_PICTURES | - FF_HW_FLAG_B_PICTURE_REFERENCES | - FF_HW_FLAG_NON_IDR_KEY_PICTURES, - .picture_priv_data_size = sizeof(VulkanEncodeH264Picture), - .filler_header_size = 6, - .init_profile = init_profile, - .init_pic_rc = init_pic_rc, - .init_pic_params = init_pic_params, - .write_sequence_headers = write_sequence_headers, - .write_extra_headers = write_extra_headers, - .write_filler = write_filler, -}; - -static av_cold int vulkan_encode_h264_init(AVCodecContext *avctx) -{ - int err, ref_l0, ref_l1; - VulkanEncodeH264Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - int flags; - - if (avctx->profile == AV_PROFILE_UNKNOWN) - avctx->profile = enc->common.opts.profile; - - enc->caps = (VkVideoEncodeH264CapabilitiesKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_CAPABILITIES_KHR, - }; - - enc->quality_props = (VkVideoEncodeH264QualityLevelPropertiesKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_QUALITY_LEVEL_PROPERTIES_KHR, - }; - - err = ff_vulkan_encode_init(avctx, &enc->common, - &ff_vk_enc_h264_desc, &enc_cb, - &enc->caps, &enc->quality_props); - if (err < 0) - return err; - - av_log(avctx, AV_LOG_VERBOSE, "H264 encoder capabilities:\n"); - av_log(avctx, AV_LOG_VERBOSE, " Standard capability flags:\n"); - av_log(avctx, AV_LOG_VERBOSE, " separate_color_plane: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SEPARATE_COLOR_PLANE_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " qprime_y_zero_transform_bypass: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_QPPRIME_Y_ZERO_TRANSFORM_BYPASS_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " scaling_lists: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SCALING_MATRIX_PRESENT_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " chroma_qp_index_offset: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_CHROMA_QP_INDEX_OFFSET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " second_chroma_qp_index_offset: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SECOND_CHROMA_QP_INDEX_OFFSET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " pic_init_qp: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_PIC_INIT_QP_MINUS26_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " weighted:%s%s%s\n", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_WEIGHTED_PRED_FLAG_SET_BIT_KHR ? - " pred" : "", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_WEIGHTED_BIPRED_IDC_EXPLICIT_BIT_KHR ? - " bipred_explicit" : "", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_WEIGHTED_BIPRED_IDC_IMPLICIT_BIT_KHR ? - " bipred_implicit" : ""); - av_log(avctx, AV_LOG_VERBOSE, " 8x8_transforms: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_TRANSFORM_8X8_MODE_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " disable_direct_spatial_mv_pred: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DIRECT_SPATIAL_MV_PRED_FLAG_UNSET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " coder:%s%s\n", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_UNSET_BIT_KHR ? - " cabac" : "", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_SET_BIT_KHR ? - " cavlc" : ""); - av_log(avctx, AV_LOG_VERBOSE, " direct_8x8_inference: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DIRECT_8X8_INFERENCE_FLAG_UNSET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " constrained_intra_pred: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " deblock:%s%s%s\n", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DEBLOCKING_FILTER_DISABLED_BIT_KHR ? - " filter_disabling" : "", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DEBLOCKING_FILTER_ENABLED_BIT_KHR ? - " filter_enabling" : "", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DEBLOCKING_FILTER_PARTIAL_BIT_KHR ? - " filter_partial" : ""); - - av_log(avctx, AV_LOG_VERBOSE, " Capability flags:\n"); - av_log(avctx, AV_LOG_VERBOSE, " hdr_compliance: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " pred_weight_table_generated: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PREDICTION_WEIGHT_TABLE_GENERATED_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " row_unaligned_slice: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_ROW_UNALIGNED_SLICE_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " different_slice_type: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l0_list: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L0_LIST_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l1_list: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L1_LIST_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " per_pict_type_min_max_qp: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " per_slice_constant_qp: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PER_SLICE_CONSTANT_QP_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " generate_prefix_nalu: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_GENERATE_PREFIX_NALU_BIT_KHR)); - - av_log(avctx, AV_LOG_VERBOSE, " Capabilities:\n"); - av_log(avctx, AV_LOG_VERBOSE, " maxLevelIdc: %i\n", - enc->caps.maxLevelIdc); - av_log(avctx, AV_LOG_VERBOSE, " maxSliceCount: %i\n", - enc->caps.maxSliceCount); - av_log(avctx, AV_LOG_VERBOSE, " max(P/B)PictureL0ReferenceCount: %i P's; %i B's\n", - enc->caps.maxPPictureL0ReferenceCount, - enc->caps.maxBPictureL0ReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " maxL1ReferenceCount: %i\n", - enc->caps.maxL1ReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " maxTemporalLayerCount: %i\n", - enc->caps.maxTemporalLayerCount); - av_log(avctx, AV_LOG_VERBOSE, " expectDyadicTemporalLayerPattern: %i\n", - enc->caps.expectDyadicTemporalLayerPattern); - av_log(avctx, AV_LOG_VERBOSE, " min/max Qp: [%i, %i]\n", - enc->caps.minQp, enc->caps.maxQp); - av_log(avctx, AV_LOG_VERBOSE, " prefersGopRemainingFrames: %i\n", - enc->caps.prefersGopRemainingFrames); - av_log(avctx, AV_LOG_VERBOSE, " requiresGopRemainingFrames: %i\n", - enc->caps.requiresGopRemainingFrames); - - err = init_enc_options(avctx); - if (err < 0) - return err; - - flags = ctx->codec->flags; - if (!enc->caps.maxPPictureL0ReferenceCount && - !enc->caps.maxBPictureL0ReferenceCount && - !enc->caps.maxL1ReferenceCount) { - /* Intra-only */ - flags |= FF_HW_FLAG_INTRA_ONLY; - ref_l0 = ref_l1 = 0; - } else if (!enc->caps.maxPPictureL0ReferenceCount) { - /* No P-frames? How. */ - base_ctx->p_to_gpb = 1; - ref_l0 = enc->caps.maxBPictureL0ReferenceCount; - ref_l1 = enc->caps.maxL1ReferenceCount; - } else if (!enc->caps.maxBPictureL0ReferenceCount && - !enc->caps.maxL1ReferenceCount) { - /* No B-frames */ - flags &= ~(FF_HW_FLAG_B_PICTURES | FF_HW_FLAG_B_PICTURE_REFERENCES); - ref_l0 = enc->caps.maxPPictureL0ReferenceCount; - ref_l1 = 0; - } else { - /* P and B frames */ - ref_l0 = FFMIN(enc->caps.maxPPictureL0ReferenceCount, - enc->caps.maxBPictureL0ReferenceCount); - ref_l1 = enc->caps.maxL1ReferenceCount; - } - - err = ff_hw_base_init_gop_structure(base_ctx, avctx, ref_l0, ref_l1, - flags, 0); - if (err < 0) - return err; - - base_ctx->output_delay = base_ctx->b_per_p; - base_ctx->decode_delay = base_ctx->max_b_depth; - - /* Prepare SEI */ - if (enc->unit_elems & UNIT_SEI_IDENTIFIER) { - int len; - - memcpy(enc->sei_identifier.uuid_iso_iec_11578, - vulkan_encode_h264_sei_identifier_uuid, - sizeof(enc->sei_identifier.uuid_iso_iec_11578)); - - len = snprintf(NULL, 0, - "%s / Vulkan video %i.%i.%i / %s %i.%i.%i / %s", - LIBAVCODEC_IDENT, - CODEC_VER(ff_vk_enc_h264_desc.ext_props.specVersion), - s->driver_props.driverName, - CODEC_VER(s->props.properties.driverVersion), - s->props.properties.deviceName); - - if (len >= 0) { - enc->sei_identifier_string = av_malloc(len + 1); - if (!enc->sei_identifier_string) - return AVERROR(ENOMEM); - - len = snprintf(enc->sei_identifier_string, len + 1, - "%s / Vulkan video %i.%i.%i / %s %i.%i.%i / %s", - LIBAVCODEC_IDENT, - CODEC_VER(ff_vk_enc_h264_desc.ext_props.specVersion), - s->driver_props.driverName, - CODEC_VER(s->props.properties.driverVersion), - s->props.properties.deviceName); - - enc->sei_identifier.data = enc->sei_identifier_string; - enc->sei_identifier.data_length = len + 1; - } - } - - /* Init CBS */ - err = ff_cbs_init(&enc->cbs, AV_CODEC_ID_H264, avctx); - if (err < 0) - return err; - - /* Create units and session parameters */ - err = init_base_units(avctx); - if (err < 0) - return err; - - /* Write out extradata */ - err = ff_vulkan_write_global_header(avctx, &enc->common); - if (err < 0) - return err; - - return 0; -} - -static av_cold int vulkan_encode_h264_close(AVCodecContext *avctx) -{ - VulkanEncodeH264Context *enc = avctx->priv_data; - ff_vulkan_encode_uninit(&enc->common); - return 0; -} - -#define OFFSET(x) offsetof(VulkanEncodeH264Context, x) -#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) -static const AVOption vulkan_encode_h264_options[] = { - HW_BASE_ENCODE_COMMON_OPTIONS, - VULKAN_ENCODE_COMMON_OPTIONS, - - { "profile", "Set profile (profile_idc and constraint_set*_flag)", - OFFSET(common.opts.profile), AV_OPT_TYPE_INT, - { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, 0xffff, FLAGS, .unit = "profile" }, - -#define PROFILE(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ - { .i64 = value }, 0, 0, FLAGS, .unit = "profile" - { PROFILE("constrained_baseline", AV_PROFILE_H264_CONSTRAINED_BASELINE) }, - { PROFILE("main", AV_PROFILE_H264_MAIN) }, - { PROFILE("high", AV_PROFILE_H264_HIGH) }, - { PROFILE("high444p", AV_PROFILE_H264_HIGH_10) }, -#undef PROFILE - - { "level", "Set level (level_idc)", - OFFSET(common.opts.level), AV_OPT_TYPE_INT, - { .i64 = AV_LEVEL_UNKNOWN }, AV_LEVEL_UNKNOWN, 0xff, FLAGS, .unit = "level" }, - -#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ - { .i64 = value }, 0, 0, FLAGS, .unit = "level" - { LEVEL("1", 10) }, - { LEVEL("1.1", 11) }, - { LEVEL("1.2", 12) }, - { LEVEL("1.3", 13) }, - { LEVEL("2", 20) }, - { LEVEL("2.1", 21) }, - { LEVEL("2.2", 22) }, - { LEVEL("3", 30) }, - { LEVEL("3.1", 31) }, - { LEVEL("3.2", 32) }, - { LEVEL("4", 40) }, - { LEVEL("4.1", 41) }, - { LEVEL("4.2", 42) }, - { LEVEL("5", 50) }, - { LEVEL("5.1", 51) }, - { LEVEL("5.2", 52) }, - { LEVEL("6", 60) }, - { LEVEL("6.1", 61) }, - { LEVEL("6.2", 62) }, -#undef LEVEL - - { "coder", "Entropy coder type", OFFSET(unit_opts.cabac), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS, "coder" }, - { "cabac", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, FLAGS, "coder" }, - { "vlc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS, "coder" }, - - { "units", "Set units to include", OFFSET(unit_elems), AV_OPT_TYPE_FLAGS, { .i64 = UNIT_AUD | UNIT_SEI_IDENTIFIER | UNIT_SEI_RECOVERY | UNIT_SEI_TIMING | UNIT_SEI_A53_CC }, 0, INT_MAX, FLAGS, "units" }, - { "aud", "Include AUD units", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_AUD }, INT_MIN, INT_MAX, FLAGS, "units" }, - { "identifier", "Include encoder version identifier", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_IDENTIFIER }, INT_MIN, INT_MAX, FLAGS, "units" }, - { "timing", "Include timing parameters (buffering_period and pic_timing)", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_TIMING }, INT_MIN, INT_MAX, FLAGS, "units" }, - { "recovery", "Include recovery points where appropriate", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_RECOVERY }, INT_MIN, INT_MAX, FLAGS, "units" }, - { "a53_cc", "Include A/53 caption data", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_A53_CC }, INT_MIN, INT_MAX, FLAGS, "units" }, - - { NULL }, -}; - -static const FFCodecDefault vulkan_encode_h264_defaults[] = { - { "b", "0" }, - { "bf", "2" }, - { "g", "300" }, - { "i_qfactor", "1" }, - { "i_qoffset", "0" }, - { "b_qfactor", "1" }, - { "b_qoffset", "0" }, - { "qmin", "-1" }, - { "qmax", "-1" }, - { "refs", "0" }, - { NULL }, -}; - -static const AVClass vulkan_encode_h264_class = { - .class_name = "h264_vulkan", - .item_name = av_default_item_name, - .option = vulkan_encode_h264_options, - .version = LIBAVUTIL_VERSION_INT, -}; - -const FFCodec ff_h264_vulkan_encoder = { - .p.name = "h264_vulkan", - CODEC_LONG_NAME("H.264/AVC (Vulkan)"), - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_H264, - .priv_data_size = sizeof(VulkanEncodeH264Context), - .init = &vulkan_encode_h264_init, - FF_CODEC_RECEIVE_PACKET_CB(&ff_vulkan_encode_receive_packet), - .close = &vulkan_encode_h264_close, - .p.priv_class = &vulkan_encode_h264_class, - .p.capabilities = AV_CODEC_CAP_DELAY | - AV_CODEC_CAP_HARDWARE | - AV_CODEC_CAP_DR1 | - AV_CODEC_CAP_ENCODER_FLUSH | - AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, - .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, - .defaults = vulkan_encode_h264_defaults, - CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), - .hw_configs = ff_vulkan_encode_hw_configs, - .p.wrapper_name = "vulkan", -}; -- 2.49.1 From c12426040081c4d92223f7b4fc61209bf1fb37e4 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:33:54 +0000 Subject: [PATCH 076/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_encode_h264.c | 1668 ++++++++++++++++++++++++ 1 file changed, 1668 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_encode_h264.c diff --git a/libavcodec/vulkan/vulkan_encode_h264.c b/libavcodec/vulkan/vulkan_encode_h264.c new file mode 100644 index 0000000000..33f319cbcc --- /dev/null +++ b/libavcodec/vulkan/vulkan_encode_h264.c @@ -0,0 +1,1668 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/internal.h" +#include "libavutil/opt.h" +#include "libavutil/mem.h" + +#include "libavcodec/cbs.h" +#include "libavcodec/cbs_h264.h" +#include "libavcodec/atsc_a53.h" + +#include "libavcodec/h264_levels.h" +#include "libavcodec/h2645data.h" +#include "libavcodec/codec_internal.h" +#include "libavcodec/version.h" +#include "libavcodec/hw_base_encode_h264.h" + +#include "vulkan_encode.h" + +enum UnitElems { + UNIT_AUD = 1 << 0, + UNIT_SEI_TIMING = 1 << 1, + UNIT_SEI_IDENTIFIER = 1 << 2, + UNIT_SEI_RECOVERY = 1 << 3, + UNIT_SEI_A53_CC = 1 << 4, +}; + +const FFVulkanEncodeDescriptor ff_vk_enc_h264_desc = { + .codec_id = AV_CODEC_ID_H264, + .encode_extension = FF_VK_EXT_VIDEO_ENCODE_H264, + .encode_op = VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR, + .ext_props = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION, + }, +}; + +/* Random (version 4) ISO 11578 UUID. */ +static const uint8_t vulkan_encode_h264_sei_identifier_uuid[16] = { + 0x03, 0xfd, 0xf2, 0x0a, 0x5d, 0x4c, 0x05, 0x48, + 0x20, 0x98, 0xca, 0x6b, 0x0c, 0x95, 0x30, 0x1c, +}; + +typedef struct VulkanEncodeH264Picture { + int frame_num; + int64_t last_idr_frame; + uint16_t idr_pic_id; + int primary_pic_type; + int slice_type; + int pic_order_cnt; + + enum UnitElems units_needed; + + VkVideoEncodeH264RateControlInfoKHR vkrc_info; + VkVideoEncodeH264RateControlLayerInfoKHR vkrc_layer_info; + VkVideoEncodeH264GopRemainingFrameInfoKHR vkrc_remaining; + + StdVideoEncodeH264WeightTable slice_wt; + StdVideoEncodeH264SliceHeader slice_hdr; + VkVideoEncodeH264NaluSliceInfoKHR vkslice; + + StdVideoEncodeH264PictureInfo h264pic_info; + VkVideoEncodeH264PictureInfoKHR vkh264pic_info; + + StdVideoEncodeH264ReferenceInfo h264dpb_info; + VkVideoEncodeH264DpbSlotInfoKHR vkh264dpb_info; + + StdVideoEncodeH264RefListModEntry mods[MAX_REFERENCE_LIST_NUM][H264_MAX_RPLM_COUNT]; + StdVideoEncodeH264RefPicMarkingEntry mmco[H264_MAX_RPLM_COUNT]; + StdVideoEncodeH264ReferenceListsInfo ref_list_info; +} VulkanEncodeH264Picture; + +typedef struct VulkanEncodeH264Context { + FFVulkanEncodeContext common; + + FFHWBaseEncodeH264 units; + FFHWBaseEncodeH264Opts unit_opts; + + enum UnitElems unit_elems; + + uint8_t fixed_qp_p; + uint8_t fixed_qp_b; + + VkVideoEncodeH264ProfileInfoKHR profile; + + VkVideoEncodeH264CapabilitiesKHR caps; + VkVideoEncodeH264QualityLevelPropertiesKHR quality_props; + + CodedBitstreamContext *cbs; + CodedBitstreamFragment current_access_unit; + + H264RawAUD raw_aud; + + SEIRawUserDataUnregistered sei_identifier; + H264RawSEIPicTiming sei_pic_timing; + H264RawSEIRecoveryPoint sei_recovery_point; + SEIRawUserDataRegistered sei_a53cc; + void *sei_a53cc_data; + char *sei_identifier_string; +} VulkanEncodeH264Context; + +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeRateControlInfoKHR *rc_info, + VkVideoEncodeRateControlLayerInfoKHR *rc_layer) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + VulkanEncodeH264Picture *hp = pic->codec_priv; + + hp->vkrc_info = (VkVideoEncodeH264RateControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_INFO_KHR, + .flags = VK_VIDEO_ENCODE_H264_RATE_CONTROL_REFERENCE_PATTERN_FLAT_BIT_KHR | + VK_VIDEO_ENCODE_H264_RATE_CONTROL_REGULAR_GOP_BIT_KHR, + .idrPeriod = ctx->base.gop_size, + .gopFrameCount = ctx->base.gop_size, + .consecutiveBFrameCount = FFMAX(ctx->base.b_per_p - 1, 0), + .temporalLayerCount = 0, + }; + rc_info->pNext = &hp->vkrc_info; + + if (rc_info->rateControlMode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { + rc_info->virtualBufferSizeInMs = (enc->unit_opts.hrd_buffer_size * 1000LL) / avctx->bit_rate; + rc_info->initialVirtualBufferSizeInMs = (enc->unit_opts.initial_buffer_fullness * 1000LL) / avctx->bit_rate; + + hp->vkrc_layer_info = (VkVideoEncodeH264RateControlLayerInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_LAYER_INFO_KHR, + + .useMinQp = avctx->qmin > 0, + .minQp.qpI = avctx->qmin > 0 ? avctx->qmin : 0, + .minQp.qpP = avctx->qmin > 0 ? avctx->qmin : 0, + .minQp.qpB = avctx->qmin > 0 ? avctx->qmin : 0, + + .useMaxQp = avctx->qmax > 0, + .maxQp.qpI = avctx->qmax > 0 ? avctx->qmax : 0, + .maxQp.qpP = avctx->qmax > 0 ? avctx->qmax : 0, + .maxQp.qpB = avctx->qmax > 0 ? avctx->qmax : 0, + + .useMaxFrameSize = 0, + }; + rc_layer->pNext = &hp->vkrc_layer_info; + hp->vkrc_info.temporalLayerCount = 1; + } + + return 0; +} + +static int vk_enc_h264_update_pic_info(AVCodecContext *avctx, + FFHWBaseEncodePicture *pic) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + VulkanEncodeH264Picture *hp = pic->codec_priv; + FFHWBaseEncodePicture *prev = pic->prev; + VulkanEncodeH264Picture *hprev = prev ? prev->codec_priv : NULL; + + if (pic->type == FF_HW_PICTURE_TYPE_IDR) { + av_assert0(pic->display_order == pic->encode_order); + + hp->frame_num = 0; + hp->last_idr_frame = pic->display_order; + hp->idr_pic_id = hprev ? hprev->idr_pic_id + 1 : 0; + + hp->primary_pic_type = 0; + hp->slice_type = STD_VIDEO_H264_SLICE_TYPE_I; + } else { + av_assert0(prev); + + hp->frame_num = hprev->frame_num + prev->is_reference; + + hp->last_idr_frame = hprev->last_idr_frame; + hp->idr_pic_id = hprev->idr_pic_id; + + if (pic->type == FF_HW_PICTURE_TYPE_I) { + hp->slice_type = STD_VIDEO_H264_SLICE_TYPE_I; + hp->primary_pic_type = 0; + } else if (pic->type == FF_HW_PICTURE_TYPE_P) { + hp->slice_type = STD_VIDEO_H264_SLICE_TYPE_P; + hp->primary_pic_type = 1; + } else { + hp->slice_type = STD_VIDEO_H264_SLICE_TYPE_B; + hp->primary_pic_type = 2; + } + } + + hp->pic_order_cnt = pic->display_order - hp->last_idr_frame; + if (enc->units.raw_sps.pic_order_cnt_type == 2) + hp->pic_order_cnt *= 2; + + hp->units_needed = 0; + + if (enc->unit_elems & UNIT_SEI_IDENTIFIER && pic->encode_order == 0) + hp->units_needed |= UNIT_SEI_IDENTIFIER; + + if (enc->unit_elems & UNIT_SEI_TIMING) { + enc->sei_pic_timing = (H264RawSEIPicTiming) { + .cpb_removal_delay = 2 * (pic->encode_order - hp->last_idr_frame), + .dpb_output_delay = 2 * (pic->display_order - pic->encode_order + ctx->base.max_b_depth), + }; + + hp->units_needed |= UNIT_SEI_TIMING; + } + + if (enc->unit_elems & UNIT_SEI_RECOVERY && pic->type == FF_HW_PICTURE_TYPE_I) { + enc->sei_recovery_point = (H264RawSEIRecoveryPoint) { + .recovery_frame_cnt = 0, + .exact_match_flag = 1, + .broken_link_flag = ctx->base.b_per_p > 0, + }; + + hp->units_needed |= UNIT_SEI_RECOVERY; + } + + if (enc->unit_elems & UNIT_SEI_A53_CC) { + int err; + size_t sei_a53cc_len; + av_freep(&enc->sei_a53cc_data); + err = ff_alloc_a53_sei(pic->input_image, 0, &enc->sei_a53cc_data, &sei_a53cc_len); + if (err < 0) + return err; + if (enc->sei_a53cc_data != NULL) { + enc->sei_a53cc.itu_t_t35_country_code = 181; + enc->sei_a53cc.data = (uint8_t *)enc->sei_a53cc_data + 1; + enc->sei_a53cc.data_length = sei_a53cc_len - 1; + + hp->units_needed |= UNIT_SEI_A53_CC; + } + } + + return 0; +} + +static void setup_slices(AVCodecContext *avctx, + FFHWBaseEncodePicture *pic) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + VulkanEncodeH264Picture *hp = pic->codec_priv; + + hp->slice_wt = (StdVideoEncodeH264WeightTable) { + .flags = (StdVideoEncodeH264WeightTableFlags) { + .luma_weight_l0_flag = 0, + .chroma_weight_l0_flag = 0, + .luma_weight_l1_flag = 0, + .chroma_weight_l1_flag = 0, + }, + .luma_log2_weight_denom = 0, + .chroma_log2_weight_denom = 0, + .luma_weight_l0 = { 0 }, + .luma_offset_l0 = { 0 }, + .chroma_weight_l0 = { { 0 } }, + .chroma_offset_l0 = { { 0 } }, + .luma_weight_l1 = { 0 }, + .luma_offset_l1 = { 0 }, + .chroma_weight_l1 = { { 0 } }, + .chroma_offset_l1 = { { 0 } }, + }; + + hp->slice_hdr = (StdVideoEncodeH264SliceHeader) { + .flags = (StdVideoEncodeH264SliceHeaderFlags) { + .direct_spatial_mv_pred_flag = 1, + /* The vk_samples code does this */ + .num_ref_idx_active_override_flag = + ((enc->units.raw_pps.num_ref_idx_l0_default_active_minus1) && + (pic->type == FF_HW_PICTURE_TYPE_B)) ? 1 : 0, + }, + .first_mb_in_slice = 1, + .slice_type = hp->slice_type, + .slice_alpha_c0_offset_div2 = 0, + .slice_beta_offset_div2 = 0, + .slice_qp_delta = 0, /* Filled in below */ + /* Reserved */ + .cabac_init_idc = 0, + .disable_deblocking_filter_idc = 0, + .pWeightTable = NULL, // &hp->slice_wt, + }; + + hp->vkslice = (VkVideoEncodeH264NaluSliceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_NALU_SLICE_INFO_KHR, + .pNext = NULL, + .constantQp = pic->type == FF_HW_PICTURE_TYPE_B ? enc->fixed_qp_b : + pic->type == FF_HW_PICTURE_TYPE_P ? enc->fixed_qp_p : + enc->unit_opts.fixed_qp_idr, + .pStdSliceHeader = &hp->slice_hdr, + }; + + if (enc->common.opts.rc_mode != VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) + hp->vkslice.constantQp = 0; + + hp->slice_hdr.slice_qp_delta = hp->vkslice.constantQp - + (enc->units.raw_pps.pic_init_qp_minus26 + 26); + + hp->vkh264pic_info.pNaluSliceEntries = &hp->vkslice; + hp->vkh264pic_info.naluSliceEntryCount = 1; +} + +static void vk_enc_h264_default_ref_pic_list(AVCodecContext *avctx, + FFHWBaseEncodePicture *pic, + FFHWBaseEncodePicture **rpl0, + FFHWBaseEncodePicture **rpl1, + int *rpl_size) +{ + FFHWBaseEncodePicture *prev; + VulkanEncodeH264Picture *hp, *hn, *hc; + int i, j, n = 0; + + prev = pic->prev; + av_assert0(prev); + hp = pic->codec_priv; + + for (i = 0; i < pic->prev->nb_dpb_pics; i++) { + hn = prev->dpb[i]->codec_priv; + av_assert0(hn->frame_num < hp->frame_num); + + if (pic->type == FF_HW_PICTURE_TYPE_P) { + for (j = n; j > 0; j--) { + hc = rpl0[j - 1]->codec_priv; + av_assert0(hc->frame_num != hn->frame_num); + if (hc->frame_num > hn->frame_num) + break; + rpl0[j] = rpl0[j - 1]; + } + rpl0[j] = prev->dpb[i]; + + } else if (pic->type == FF_HW_PICTURE_TYPE_B) { + for (j = n; j > 0; j--) { + hc = rpl0[j - 1]->codec_priv; + av_assert0(hc->pic_order_cnt != hp->pic_order_cnt); + if (hc->pic_order_cnt < hp->pic_order_cnt) { + if (hn->pic_order_cnt > hp->pic_order_cnt || + hn->pic_order_cnt < hc->pic_order_cnt) + break; + } else { + if (hn->pic_order_cnt > hc->pic_order_cnt) + break; + } + rpl0[j] = rpl0[j - 1]; + } + rpl0[j] = prev->dpb[i]; + + for (j = n; j > 0; j--) { + hc = rpl1[j - 1]->codec_priv; + av_assert0(hc->pic_order_cnt != hp->pic_order_cnt); + if (hc->pic_order_cnt > hp->pic_order_cnt) { + if (hn->pic_order_cnt < hp->pic_order_cnt || + hn->pic_order_cnt > hc->pic_order_cnt) + break; + } else { + if (hn->pic_order_cnt < hc->pic_order_cnt) + break; + } + rpl1[j] = rpl1[j - 1]; + } + rpl1[j] = prev->dpb[i]; + } + + ++n; + } + + if (pic->type == FF_HW_PICTURE_TYPE_B) { + for (i = 0; i < n; i++) { + if (rpl0[i] != rpl1[i]) + break; + } + if (i == n) + FFSWAP(FFHWBaseEncodePicture *, rpl1[0], rpl1[1]); + } + + if (pic->type == FF_HW_PICTURE_TYPE_P || + pic->type == FF_HW_PICTURE_TYPE_B) { + av_log(avctx, AV_LOG_DEBUG, "Default RefPicList0 for fn=%d/poc=%d:", + hp->frame_num, hp->pic_order_cnt); + for (i = 0; i < n; i++) { + hn = rpl0[i]->codec_priv; + av_log(avctx, AV_LOG_DEBUG, " fn=%d/poc=%d", + hn->frame_num, hn->pic_order_cnt); + } + av_log(avctx, AV_LOG_DEBUG, "\n"); + } + if (pic->type == FF_HW_PICTURE_TYPE_B) { + av_log(avctx, AV_LOG_DEBUG, "Default RefPicList1 for fn=%d/poc=%d:", + hp->frame_num, hp->pic_order_cnt); + for (i = 0; i < n; i++) { + hn = rpl1[i]->codec_priv; + av_log(avctx, AV_LOG_DEBUG, " fn=%d/poc=%d", + hn->frame_num, hn->pic_order_cnt); + } + av_log(avctx, AV_LOG_DEBUG, "\n"); + } + + *rpl_size = n; +} + +static void setup_refs(AVCodecContext *avctx, + FFHWBaseEncodePicture *pic, + VkVideoEncodeInfoKHR *encode_info) +{ + int n, i, j; + VulkanEncodeH264Context *enc = avctx->priv_data; + VulkanEncodeH264Picture *hp = pic->codec_priv; + FFHWBaseEncodePicture *prev = pic->prev; + FFHWBaseEncodePicture *def_l0[MAX_DPB_SIZE], *def_l1[MAX_DPB_SIZE]; + VulkanEncodeH264Picture *href; + + hp->ref_list_info = (StdVideoEncodeH264ReferenceListsInfo) { + .flags = (StdVideoEncodeH264ReferenceListsInfoFlags) { + .ref_pic_list_modification_flag_l0 = 0, + .ref_pic_list_modification_flag_l1 = 0, + /* Reserved */ + }, + /* May be overridden during setup_slices() */ + .num_ref_idx_l0_active_minus1 = pic->nb_refs[0] - 1, + .num_ref_idx_l1_active_minus1 = pic->nb_refs[1] - 1, + /* .RefPicList0 is set in vk_enc_h264_default_ref_pic_list() */ + /* .RefPicList1 is set in vk_enc_h264_default_ref_pic_list() */ + /* Reserved */ + .pRefList0ModOperations = NULL, /* All set below */ + .refList0ModOpCount = 0, + .pRefList1ModOperations = NULL, + .refList1ModOpCount = 0, + .pRefPicMarkingOperations = NULL, + .refPicMarkingOpCount = 0, + }; + + for (i = 0; i < STD_VIDEO_H264_MAX_NUM_LIST_REF; i++) + hp->ref_list_info.RefPicList0[i] = hp->ref_list_info.RefPicList1[i] = -1; + + /* Note: really not sure */ + for (int i = 0; i < pic->nb_refs[0]; i++) { + VkVideoReferenceSlotInfoKHR *slot_info; + slot_info = (VkVideoReferenceSlotInfoKHR *)&encode_info->pReferenceSlots[i]; + hp->ref_list_info.RefPicList0[i] = slot_info->slotIndex; + } + + /* Note: really not sure */ + for (int i = 0; i < pic->nb_refs[1]; i++) { + VkVideoReferenceSlotInfoKHR *slot_info; + slot_info = (VkVideoReferenceSlotInfoKHR *)&encode_info->pReferenceSlots[pic->nb_refs[0] + i]; + hp->ref_list_info.RefPicList1[i] = slot_info->slotIndex; + } + + hp->h264pic_info.pRefLists = &hp->ref_list_info; + + if (pic->is_reference && pic->type != FF_HW_PICTURE_TYPE_IDR) { + FFHWBaseEncodePicture *discard_list[MAX_DPB_SIZE]; + int discard = 0, keep = 0; + + // Discard everything which is in the DPB of the previous frame but + // not in the DPB of this one. + for (i = 0; i < prev->nb_dpb_pics; i++) { + for (j = 0; j < pic->nb_dpb_pics; j++) { + if (prev->dpb[i] == pic->dpb[j]) + break; + } + if (j == pic->nb_dpb_pics) { + discard_list[discard] = prev->dpb[i]; + ++discard; + } else { + ++keep; + } + } + av_assert0(keep <= enc->units.dpb_frames); + + if (discard == 0) { + hp->h264pic_info.flags.adaptive_ref_pic_marking_mode_flag = 0; + } else { + hp->h264pic_info.flags.adaptive_ref_pic_marking_mode_flag = 1; + for (i = 0; i < discard; i++) { + VulkanEncodeH264Picture *old = discard_list[i]->codec_priv; + av_assert0(old->frame_num < hp->frame_num); + hp->mmco[i] = (StdVideoEncodeH264RefPicMarkingEntry) { + .memory_management_control_operation = 1, + .difference_of_pic_nums_minus1 = hp->frame_num - old->frame_num - 1, + }; + } + hp->mmco[i] = (StdVideoEncodeH264RefPicMarkingEntry) { + .memory_management_control_operation = 0, + }; + hp->ref_list_info.pRefPicMarkingOperations = hp->mmco; + hp->ref_list_info.refPicMarkingOpCount = i + 1; + } + } + + if (pic->type == FF_HW_PICTURE_TYPE_I || pic->type == FF_HW_PICTURE_TYPE_IDR) + return; + + // If the intended references are not the first entries of RefPicListN + // by default, use ref-pic-list-modification to move them there. + vk_enc_h264_default_ref_pic_list(avctx, pic, + def_l0, def_l1, &n); + + if (pic->type == FF_HW_PICTURE_TYPE_P) { + int need_rplm = 0; + for (i = 0; i < pic->nb_refs[0]; i++) { + av_assert0(pic->refs[0][i]); + if (pic->refs[0][i] != (FFHWBaseEncodePicture *)def_l0[i]) + need_rplm = 1; + } + + hp->ref_list_info.flags.ref_pic_list_modification_flag_l0 = need_rplm; + if (need_rplm) { + int pic_num = hp->frame_num; + for (i = 0; i < pic->nb_refs[0]; i++) { + href = pic->refs[0][i]->codec_priv; + av_assert0(href->frame_num != pic_num); + if (href->frame_num < pic_num) { + hp->mods[0][i] = (StdVideoEncodeH264RefListModEntry) { + .modification_of_pic_nums_idc = 0, + .abs_diff_pic_num_minus1 = pic_num - href->frame_num - 1, + }; + } else { + hp->mods[0][i] = (StdVideoEncodeH264RefListModEntry) { + .modification_of_pic_nums_idc = 1, + .abs_diff_pic_num_minus1 = href->frame_num - pic_num - 1, + }; + } + pic_num = href->frame_num; + } + hp->ref_list_info.pRefList0ModOperations = hp->mods[0]; + hp->ref_list_info.refList0ModOpCount = i - 1; + } + } else { + int need_rplm_l0 = 0, need_rplm_l1 = 0; + int n0 = 0, n1 = 0; + for (i = 0; i < pic->nb_refs[0]; i++) { + av_assert0(pic->refs[0][i]); + href = pic->refs[0][i]->codec_priv; + av_assert0(href->pic_order_cnt < hp->pic_order_cnt); + if (pic->refs[0][i] != (FFHWBaseEncodePicture *)def_l0[n0]) + need_rplm_l0 = 1; + ++n0; + } + + for (int i = 0; i < pic->nb_refs[1]; i++) { + av_assert0(pic->refs[1][i]); + href = pic->refs[1][i]->codec_priv; + av_assert0(href->pic_order_cnt > hp->pic_order_cnt); + if (pic->refs[1][i] != (FFHWBaseEncodePicture *)def_l1[n1]) + need_rplm_l1 = 1; + ++n1; + } + + hp->ref_list_info.flags.ref_pic_list_modification_flag_l0 = need_rplm_l0; + if (need_rplm_l0) { + int pic_num = hp->frame_num; + for (i = j = 0; i < pic->nb_refs[0]; i++) { + href = pic->refs[0][i]->codec_priv; + av_assert0(href->frame_num != pic_num); + if (href->frame_num < pic_num) { + hp->mods[0][j] = (StdVideoEncodeH264RefListModEntry) { + .modification_of_pic_nums_idc = 0, + .abs_diff_pic_num_minus1 = pic_num - href->frame_num - 1, + }; + } else { + hp->mods[0][j] = (StdVideoEncodeH264RefListModEntry) { + .modification_of_pic_nums_idc = 1, + .abs_diff_pic_num_minus1 = href->frame_num - pic_num - 1, + }; + } + pic_num = href->frame_num; + ++j; + } + hp->ref_list_info.pRefList0ModOperations = hp->mods[0]; + hp->ref_list_info.refList0ModOpCount = j - 1; + } + + hp->ref_list_info.flags.ref_pic_list_modification_flag_l1 = need_rplm_l1; + if (need_rplm_l1) { + int pic_num = hp->frame_num; + for (i = j = 0; i < pic->nb_refs[1]; i++) { + href = pic->refs[1][i]->codec_priv; + av_assert0(href->frame_num != pic_num); + if (href->frame_num < pic_num) { + hp->mods[1][j] = (StdVideoEncodeH264RefListModEntry) { + .modification_of_pic_nums_idc = 0, + .abs_diff_pic_num_minus1 = pic_num - href->frame_num - 1, + }; + } else { + hp->mods[1][j] = (StdVideoEncodeH264RefListModEntry) { + .modification_of_pic_nums_idc = 1, + .abs_diff_pic_num_minus1 = href->frame_num - pic_num - 1, + }; + } + pic_num = href->frame_num; + ++j; + } + hp->ref_list_info.pRefList1ModOperations = hp->mods[1]; + hp->ref_list_info.refList1ModOpCount = j - 1; + } + } +} + +static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeInfoKHR *encode_info) +{ + int err; + FFVulkanEncodePicture *vp = pic->priv; + VulkanEncodeH264Picture *hp = pic->codec_priv; + VkVideoReferenceSlotInfoKHR *ref_slot; + + err = vk_enc_h264_update_pic_info(avctx, pic); + if (err < 0) + return err; + + hp->vkh264pic_info = (VkVideoEncodeH264PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PICTURE_INFO_KHR, + .pNext = NULL, + .pNaluSliceEntries = NULL, // Filled in during setup_slices() + .naluSliceEntryCount = 0, // Filled in during setup_slices() + .pStdPictureInfo = &hp->h264pic_info, + }; + + hp->h264pic_info = (StdVideoEncodeH264PictureInfo) { + .flags = (StdVideoEncodeH264PictureInfoFlags) { + .IdrPicFlag = pic->type == FF_HW_PICTURE_TYPE_IDR, + .is_reference = pic->is_reference, + .no_output_of_prior_pics_flag = 0, + .long_term_reference_flag = 0, + .adaptive_ref_pic_marking_mode_flag = 0, // Filled in during setup_refs() + /* Reserved */ + }, + .seq_parameter_set_id = 0, + .pic_parameter_set_id = 0, + .idr_pic_id = hp->idr_pic_id, + .primary_pic_type = pic->type == FF_HW_PICTURE_TYPE_P ? STD_VIDEO_H264_PICTURE_TYPE_P : + pic->type == FF_HW_PICTURE_TYPE_B ? STD_VIDEO_H264_PICTURE_TYPE_B : + pic->type == FF_HW_PICTURE_TYPE_I ? STD_VIDEO_H264_PICTURE_TYPE_I : + STD_VIDEO_H264_PICTURE_TYPE_IDR, + .frame_num = hp->frame_num, + .PicOrderCnt = hp->pic_order_cnt, + .temporal_id = 0, /* ? */ + /* Reserved */ + .pRefLists = NULL, // Filled in during setup_refs + }; + encode_info->pNext = &hp->vkh264pic_info; + + hp->h264dpb_info = (StdVideoEncodeH264ReferenceInfo) { + .flags = (StdVideoEncodeH264ReferenceInfoFlags) { + .used_for_long_term_reference = 0, + /* Reserved */ + }, + .primary_pic_type = hp->h264pic_info.primary_pic_type, + .FrameNum = hp->h264pic_info.frame_num, + .PicOrderCnt = hp->h264pic_info.PicOrderCnt, + .long_term_pic_num = 0, + .long_term_frame_idx = 0, + .temporal_id = hp->h264pic_info.temporal_id, + }; + hp->vkh264dpb_info = (VkVideoEncodeH264DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = &hp->h264dpb_info, + }; + + vp->dpb_slot.pNext = &hp->vkh264dpb_info; + + ref_slot = (VkVideoReferenceSlotInfoKHR *)encode_info->pSetupReferenceSlot; + ref_slot->pNext = &hp->vkh264dpb_info; + + setup_refs(avctx, pic, encode_info); + + setup_slices(avctx, pic); + + return 0; +} + +static int init_profile(AVCodecContext *avctx, + VkVideoProfileInfoKHR *profile, void *pnext) +{ + VkResult ret; + VulkanEncodeH264Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoEncodeH264CapabilitiesKHR h264_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_CAPABILITIES_KHR, + }; + VkVideoEncodeCapabilitiesKHR enc_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR, + .pNext = &h264_caps, + }; + VkVideoCapabilitiesKHR caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR, + .pNext = &enc_caps, + }; + + /* In order of preference */ + int last_supported = AV_PROFILE_UNKNOWN; + static const int known_profiles[] = { + AV_PROFILE_H264_CONSTRAINED_BASELINE, + AV_PROFILE_H264_MAIN, + AV_PROFILE_H264_HIGH, + AV_PROFILE_H264_HIGH_10, + }; + int nb_profiles = FF_ARRAY_ELEMS(known_profiles); + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->frames->sw_format); + if (!desc) + return AVERROR(EINVAL); + + if (desc->comp[0].depth == 8) + nb_profiles = 3; + + enc->profile = (VkVideoEncodeH264ProfileInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_INFO_KHR, + .pNext = pnext, + .stdProfileIdc = ff_vk_h264_profile_to_vk(avctx->profile), + }; + profile->pNext = &enc->profile; + + /* Set level */ + if (avctx->level == AV_LEVEL_UNKNOWN) + avctx->level = enc->common.opts.level; + + /* User has explicitly specified a profile. */ + if (avctx->profile != AV_PROFILE_UNKNOWN) + return 0; + + av_log(avctx, AV_LOG_DEBUG, "Supported profiles:\n"); + for (int i = 0; i < nb_profiles; i++) { + enc->profile.stdProfileIdc = ff_vk_h264_profile_to_vk(known_profiles[i]); + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, + profile, + &caps); + if (ret == VK_SUCCESS) { + av_log(avctx, AV_LOG_DEBUG, " %s\n", + avcodec_profile_name(avctx->codec_id, known_profiles[i])); + last_supported = known_profiles[i]; + } + } + + if (last_supported == AV_PROFILE_UNKNOWN) { + av_log(avctx, AV_LOG_ERROR, "No supported profiles for given format\n"); + return AVERROR(ENOTSUP); + } + + enc->profile.stdProfileIdc = ff_vk_h264_profile_to_vk(last_supported); + av_log(avctx, AV_LOG_VERBOSE, "Using profile %s\n", + avcodec_profile_name(avctx->codec_id, last_supported)); + avctx->profile = last_supported; + + return 0; +} + +static int init_enc_options(AVCodecContext *avctx) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + FFHWBaseEncodeH264Opts *unit_opts = &enc->unit_opts; + + if (avctx->rc_buffer_size) + unit_opts->hrd_buffer_size = avctx->rc_buffer_size; + else if (avctx->rc_max_rate > 0) + unit_opts->hrd_buffer_size = avctx->rc_max_rate; + else + unit_opts->hrd_buffer_size = avctx->bit_rate; + + if (avctx->rc_initial_buffer_occupancy) { + if (avctx->rc_initial_buffer_occupancy > unit_opts->hrd_buffer_size) { + av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: " + "must have initial buffer size (%d) <= " + "buffer size (%"PRId64").\n", + avctx->rc_initial_buffer_occupancy, unit_opts->hrd_buffer_size); + return AVERROR(EINVAL); + } + unit_opts->initial_buffer_fullness = avctx->rc_initial_buffer_occupancy; + } else { + unit_opts->initial_buffer_fullness = unit_opts->hrd_buffer_size * 3 / 4; + } + + if (enc->common.opts.rc_mode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { + /* HRD info is required for timing */ + enc->unit_elems &= ~UNIT_SEI_TIMING; + + enc->fixed_qp_p = av_clip(enc->common.explicit_qp, + enc->caps.minQp, enc->caps.maxQp); + if (avctx->i_quant_factor > 0.0) + unit_opts->fixed_qp_idr = av_clip((avctx->i_quant_factor * enc->fixed_qp_p + + avctx->i_quant_offset) + 0.5, + enc->caps.minQp, enc->caps.maxQp); + else + unit_opts->fixed_qp_idr = enc->fixed_qp_p; + + if (avctx->b_quant_factor > 0.0) + enc->fixed_qp_b = av_clip((avctx->b_quant_factor * enc->fixed_qp_p + + avctx->b_quant_offset) + 0.5, + enc->caps.minQp, enc->caps.maxQp); + else + enc->fixed_qp_b = enc->fixed_qp_p; + + av_log(avctx, AV_LOG_DEBUG, "Using fixed QP = " + "%d / %d / %d for IDR- / P- / B-frames.\n", + unit_opts->fixed_qp_idr, enc->fixed_qp_p, enc->fixed_qp_b); + } else { + unit_opts->fixed_qp_idr = 26; + enc->fixed_qp_p = 26; + enc->fixed_qp_b = 26; + } + + return 0; +} + +static av_cold int init_sequence_headers(AVCodecContext *avctx) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + + FFHWBaseEncodeH264 *units = &enc->units; + FFHWBaseEncodeH264Opts *unit_opts = &enc->unit_opts; + + unit_opts->bit_rate = avctx->bit_rate; + unit_opts->mb_width = FFALIGN(avctx->width, 16) / 16; + unit_opts->mb_height = FFALIGN(avctx->height, 16) / 16; + unit_opts->flags = enc->unit_elems & UNIT_SEI_TIMING ? FF_HW_H264_SEI_TIMING : 0; + + /* cabac already set via an option */ + /* fixed_qp_idr initialized in init_enc_options() */ + /* hrd_buffer_size initialized in init_enc_options() */ + /* initial_buffer_fullness initialized in init_enc_options() */ + + err = ff_hw_base_encode_init_params_h264(&enc->common.base, avctx, + units, unit_opts); + if (err < 0) + return err; + + units->raw_sps.seq_scaling_matrix_present_flag = + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SCALING_MATRIX_PRESENT_FLAG_SET_BIT_KHR); + units->raw_pps.pic_scaling_matrix_present_flag = + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SCALING_MATRIX_PRESENT_FLAG_SET_BIT_KHR); + units->raw_pps.transform_8x8_mode_flag = + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_TRANSFORM_8X8_MODE_FLAG_SET_BIT_KHR); + + return 0; +} + +typedef struct VulkanH264Units { + StdVideoH264SequenceParameterSet vksps; + StdVideoH264ScalingLists vksps_scaling; + StdVideoH264HrdParameters vksps_vui_header; + StdVideoH264SequenceParameterSetVui vksps_vui; + + StdVideoH264PictureParameterSet vkpps; + StdVideoH264ScalingLists vkpps_scaling; +} VulkanH264Units; + +static av_cold int base_unit_to_vk(AVCodecContext *avctx, + VulkanH264Units *vk_units) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + + FFHWBaseEncodeH264 *units = &enc->units; + + H264RawSPS *sps = &units->raw_sps; + H264RawHRD *hrd = &sps->vui.nal_hrd_parameters; + StdVideoH264ScalingLists *vksps_scaling = &vk_units->vksps_scaling; + StdVideoH264HrdParameters *vksps_vui_header = &vk_units->vksps_vui_header; + StdVideoH264SequenceParameterSetVui *vksps_vui = &vk_units->vksps_vui; + StdVideoH264SequenceParameterSet *vksps = &vk_units->vksps; + + H264RawPPS *pps = &units->raw_pps; + StdVideoH264ScalingLists *vkpps_scaling = &vk_units->vkpps_scaling; + StdVideoH264PictureParameterSet *vkpps = &vk_units->vkpps; + + *vksps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = 0x0, // mask + .use_default_scaling_matrix_mask = 1, + }; + + *vksps_vui_header = (StdVideoH264HrdParameters) { + .cpb_cnt_minus1 = hrd->cpb_cnt_minus1, + .bit_rate_scale = hrd->bit_rate_scale, + .cpb_size_scale = hrd->cpb_size_scale, + /* Reserved */ + /* bit_rate/cpb_size/cbr_flag set below */ + .initial_cpb_removal_delay_length_minus1 = hrd->initial_cpb_removal_delay_length_minus1, + .cpb_removal_delay_length_minus1 = hrd->cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = hrd->dpb_output_delay_length_minus1, + .time_offset_length = hrd->time_offset_length, + }; + + for (int i = 0; i < H264_MAX_CPB_CNT; i++) { + vksps_vui_header->bit_rate_value_minus1[i] = hrd->bit_rate_value_minus1[i]; + vksps_vui_header->cpb_size_value_minus1[i] = hrd->cpb_size_value_minus1[i]; + vksps_vui_header->cbr_flag[i] = hrd->cbr_flag[i]; + } + + *vksps_vui = (StdVideoH264SequenceParameterSetVui) { + .flags = (StdVideoH264SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.video_full_range_flag, + .color_description_present_flag = sps->vui.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.chroma_loc_info_present_flag, + .timing_info_present_flag = sps->vui.timing_info_present_flag, + .fixed_frame_rate_flag = sps->vui.fixed_frame_rate_flag, + .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, + .nal_hrd_parameters_present_flag = sps->vui.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->vui.vcl_hrd_parameters_present_flag, + }, + .aspect_ratio_idc = sps->vui.aspect_ratio_idc, + .sar_width = sps->vui.sar_width, + .sar_height = sps->vui.sar_height, + .video_format = sps->vui.video_format, + .colour_primaries = sps->vui.colour_primaries, + .transfer_characteristics = sps->vui.transfer_characteristics, + .matrix_coefficients = sps->vui.matrix_coefficients, + .num_units_in_tick = sps->vui.num_units_in_tick, + .time_scale = sps->vui.time_scale, + .max_num_reorder_frames = sps->vui.max_num_reorder_frames, + .max_dec_frame_buffering = sps->vui.max_dec_frame_buffering, + .chroma_sample_loc_type_top_field = sps->vui.chroma_sample_loc_type_top_field, + .chroma_sample_loc_type_bottom_field = sps->vui.chroma_sample_loc_type_bottom_field, + /* Reserved */ + .pHrdParameters = vksps_vui_header, + }; + + *vksps = (StdVideoH264SequenceParameterSet) { + .flags = (StdVideoH264SpsFlags) { + .constraint_set0_flag = sps->constraint_set0_flag, + .constraint_set1_flag = sps->constraint_set1_flag, + .constraint_set2_flag = sps->constraint_set2_flag, + .constraint_set3_flag = sps->constraint_set3_flag, + .constraint_set4_flag = sps->constraint_set4_flag, + .constraint_set5_flag = sps->constraint_set5_flag, + .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, + .mb_adaptive_frame_field_flag = sps->mb_adaptive_frame_field_flag, + .frame_mbs_only_flag = sps->frame_mbs_only_flag, + .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, + .separate_colour_plane_flag = sps->separate_colour_plane_flag, + .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, + .qpprime_y_zero_transform_bypass_flag = sps->qpprime_y_zero_transform_bypass_flag, + .frame_cropping_flag = sps->frame_cropping_flag, + .seq_scaling_matrix_present_flag = sps->seq_scaling_matrix_present_flag, + .vui_parameters_present_flag = sps->vui_parameters_present_flag, + }, + .profile_idc = ff_vk_h264_profile_to_vk(sps->profile_idc), + .level_idc = ff_vk_h264_level_to_vk(sps->level_idc), + .chroma_format_idc = sps->chroma_format_idc, + .seq_parameter_set_id = sps->seq_parameter_set_id, + .bit_depth_luma_minus8 = sps->bit_depth_luma_minus8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8, + .log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4, + .pic_order_cnt_type = sps->pic_order_cnt_type, + .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, + .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4, + .num_ref_frames_in_pic_order_cnt_cycle = sps->num_ref_frames_in_pic_order_cnt_cycle, + .max_num_ref_frames = sps->max_num_ref_frames, + /* Reserved */ + .pic_width_in_mbs_minus1 = sps->pic_width_in_mbs_minus1, + .pic_height_in_map_units_minus1 = sps->pic_height_in_map_units_minus1, + .frame_crop_left_offset = sps->frame_crop_left_offset, + .frame_crop_right_offset = sps->frame_crop_right_offset, + .frame_crop_top_offset = sps->frame_crop_top_offset, + .frame_crop_bottom_offset = sps->frame_crop_bottom_offset, + /* Reserved */ + .pOffsetForRefFrame = sps->offset_for_ref_frame, + .pScalingLists = vksps_scaling, + .pSequenceParameterSetVui = vksps_vui, + }; + + *vkpps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = 0x0, // mask + .use_default_scaling_matrix_mask = 1, + }; + + *vkpps = (StdVideoH264PictureParameterSet) { + .flags = (StdVideoH264PpsFlags) { + .transform_8x8_mode_flag = pps->transform_8x8_mode_flag, + .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present_flag, + .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, + .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, + .weighted_pred_flag = pps->weighted_pred_flag, + .bottom_field_pic_order_in_frame_present_flag = pps->bottom_field_pic_order_in_frame_present_flag, + .entropy_coding_mode_flag = pps->entropy_coding_mode_flag, + .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, + }, + .seq_parameter_set_id = pps->seq_parameter_set_id, + .pic_parameter_set_id = pps->pic_parameter_set_id, + .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1, + .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1, + .weighted_bipred_idc = pps->weighted_bipred_idc, + .pic_init_qp_minus26 = pps->pic_init_qp_minus26, + .pic_init_qs_minus26 = pps->pic_init_qs_minus26, + .chroma_qp_index_offset = pps->chroma_qp_index_offset, + .second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset, + .pScalingLists = vkpps_scaling, + }; + + return 0; +} + +static int create_session_params(AVCodecContext *avctx) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VulkanH264Units vk_units = { 0 }; + + VkVideoEncodeH264SessionParametersAddInfoKHR h264_params_info; + VkVideoEncodeH264SessionParametersCreateInfoKHR h264_params; + + /* Convert it to Vulkan */ + err = base_unit_to_vk(avctx, &vk_units); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to convert SPS/PPS units to Vulkan: %s\n", + av_err2str(err)); + return err; + } + + /* Destroy the session params */ + if (ctx->session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->session_params, + s->hwctx->alloc); + + h264_params_info = (VkVideoEncodeH264SessionParametersAddInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR, + .pStdSPSs = &vk_units.vksps, + .stdSPSCount = 1, + .pStdPPSs = &vk_units.vkpps, + .stdPPSCount = 1, + }; + h264_params = (VkVideoEncodeH264SessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + .maxStdSPSCount = 1, + .maxStdPPSCount = 1, + .pParametersAddInfo = &h264_params_info, + }; + + return ff_vulkan_encode_create_session_params(avctx, ctx, &h264_params); +} + +static int parse_feedback_units(AVCodecContext *avctx, + const uint8_t *data, size_t size, + int sps_override, int pps_override) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + + CodedBitstreamContext *cbs; + CodedBitstreamFragment au = { 0 }; + + err = ff_cbs_init(&cbs, AV_CODEC_ID_H264, avctx); + if (err < 0) + return err; + + err = ff_cbs_read(cbs, &au, NULL, data, size); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to parse feedback units, bad drivers: %s\n", + av_err2str(err)); + goto fail; + } + + /* If PPS has an override, just copy it entirely. */ + if (pps_override) { + for (int i = 0; i < au.nb_units; i++) { + if (au.units[i].type == H264_NAL_PPS) { + H264RawPPS *pps = au.units[i].content; + memcpy(&enc->units.raw_pps, pps, sizeof(*pps)); + break; + } + } + } + + err = 0; +fail: + ff_cbs_fragment_free(&au); + ff_cbs_close(&cbs); + + return err; +} + +static int init_base_units(AVCodecContext *avctx) +{ + int err; + VkResult ret; + VulkanEncodeH264Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoEncodeH264SessionParametersGetInfoKHR h264_params_info; + VkVideoEncodeSessionParametersGetInfoKHR params_info; + VkVideoEncodeH264SessionParametersFeedbackInfoKHR h264_params_feedback; + VkVideoEncodeSessionParametersFeedbackInfoKHR params_feedback; + + void *data = NULL; + size_t data_size = 0; + + /* Generate SPS/PPS unit info */ + err = init_sequence_headers(avctx); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPS/PPS units: %s\n", + av_err2str(err)); + return err; + } + + /* Create session parameters from them */ + err = create_session_params(avctx); + if (err < 0) + return err; + + h264_params_info = (VkVideoEncodeH264SessionParametersGetInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_GET_INFO_KHR, + .writeStdSPS = 1, + .writeStdPPS = 1, + .stdSPSId = enc->units.raw_sps.seq_parameter_set_id, + .stdPPSId = enc->units.raw_pps.pic_parameter_set_id, + }; + params_info = (VkVideoEncodeSessionParametersGetInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_GET_INFO_KHR, + .pNext = &h264_params_info, + .videoSessionParameters = ctx->session_params, + }; + + h264_params_feedback = (VkVideoEncodeH264SessionParametersFeedbackInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, + }; + params_feedback = (VkVideoEncodeSessionParametersFeedbackInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, + .pNext = &h264_params_feedback, + }; + + ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, + ¶ms_feedback, + &data_size, data); + if (ret == VK_INCOMPLETE || + (ret == VK_SUCCESS) && (data_size > 0)) { + data = av_mallocz(data_size); + if (!data) + return AVERROR(ENOMEM); + } else { + av_log(avctx, AV_LOG_ERROR, "Unable to get feedback for H.264 units = %"SIZE_SPECIFIER"\n", data_size); + return err; + } + + ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, + ¶ms_feedback, + &data_size, data); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Error writing feedback units\n"); + return err; + } + + av_log(avctx, AV_LOG_VERBOSE, "Feedback units written, overrides: %i (SPS: %i PPS: %i)\n", + params_feedback.hasOverrides, + h264_params_feedback.hasStdSPSOverrides, + h264_params_feedback.hasStdPPSOverrides); + + params_feedback.hasOverrides = 1; + h264_params_feedback.hasStdPPSOverrides = 1; + + /* No need to sync any overrides */ + if (!params_feedback.hasOverrides) + return 0; + + /* Parse back tne units and override */ + err = parse_feedback_units(avctx, data, data_size, + h264_params_feedback.hasStdSPSOverrides, + h264_params_feedback.hasStdPPSOverrides); + if (err < 0) + return err; + + /* Create final session parameters */ + err = create_session_params(avctx); + if (err < 0) + return err; + + return 0; +} + +static int vulkan_encode_h264_add_nal(AVCodecContext *avctx, + CodedBitstreamFragment *au, + void *nal_unit) +{ + H264RawNALUnitHeader *header = nal_unit; + + int err = ff_cbs_insert_unit_content(au, -1, + header->nal_unit_type, nal_unit, NULL); + if (err < 0) + av_log(avctx, AV_LOG_ERROR, "Failed to add NAL unit: " + "type = %d.\n", header->nal_unit_type); + + return err; +} + +static int write_access_unit(AVCodecContext *avctx, + uint8_t *data, size_t *data_len, + CodedBitstreamFragment *au) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + + int err = ff_cbs_write_fragment_data(enc->cbs, au); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n"); + return err; + } + + if (*data_len < au->data_size) { + av_log(avctx, AV_LOG_ERROR, "Access unit too large: %"SIZE_SPECIFIER" < %"SIZE_SPECIFIER".\n", + *data_len, au->data_size); + return AVERROR(ENOSPC); + } + + memcpy(data, au->data, au->data_size); + *data_len = au->data_size; + + return 0; +} + +static int write_sequence_headers(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + VulkanEncodeH264Picture *hp = base_pic ? base_pic->codec_priv : NULL; + CodedBitstreamFragment *au = &enc->current_access_unit; + + if (hp && hp->units_needed & UNIT_AUD) { + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_aud); + if (err < 0) + goto fail; + } + + err = vulkan_encode_h264_add_nal(avctx, au, &enc->units.raw_sps); + if (err < 0) + goto fail; + + err = vulkan_encode_h264_add_nal(avctx, au, &enc->units.raw_pps); + if (err < 0) + goto fail; + + err = write_access_unit(avctx, data, data_len, au); +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static int write_extra_headers(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + VulkanEncodeH264Picture *hp = base_pic->codec_priv; + CodedBitstreamFragment *au = &enc->current_access_unit; + + if (hp->units_needed & UNIT_AUD) { + err = vulkan_encode_h264_add_nal(avctx, au, &enc->raw_aud); + if (err < 0) + goto fail; + } + + if (hp->units_needed & UNIT_SEI_IDENTIFIER) { + err = ff_cbs_sei_add_message(enc->cbs, au, 1, + SEI_TYPE_USER_DATA_UNREGISTERED, + &enc->sei_identifier, NULL); + if (err < 0) + goto fail; + } + + if (hp->units_needed & UNIT_SEI_TIMING) { + if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) { + err = ff_cbs_sei_add_message(enc->cbs, au, 1, + SEI_TYPE_BUFFERING_PERIOD, + &enc->units.sei_buffering_period, NULL); + if (err < 0) + goto fail; + } + err = ff_cbs_sei_add_message(enc->cbs, au, 1, + SEI_TYPE_PIC_TIMING, + &enc->sei_pic_timing, NULL); + if (err < 0) + goto fail; + } + + if (hp->units_needed & UNIT_SEI_RECOVERY) { + err = ff_cbs_sei_add_message(enc->cbs, au, 1, + SEI_TYPE_RECOVERY_POINT, + &enc->sei_recovery_point, NULL); + if (err < 0) + goto fail; + } + + if (hp->units_needed & UNIT_SEI_A53_CC) { + err = ff_cbs_sei_add_message(enc->cbs, au, 1, + SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35, + &enc->sei_a53cc, NULL); + if (err < 0) + goto fail; + } + + if (hp->units_needed) { + err = write_access_unit(avctx, data, data_len, au); + if (err < 0) + goto fail; + } else { + err = 0; + *data_len = 0; + } + +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static int write_filler(AVCodecContext *avctx, uint32_t filler, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH264Context *enc = avctx->priv_data; + CodedBitstreamFragment *au = &enc->current_access_unit; + + H264RawFiller raw_filler = { + .nal_unit_header = { + .nal_unit_type = H264_NAL_FILLER_DATA, + }, + .filler_size = filler, + }; + + err = vulkan_encode_h264_add_nal(avctx, au, &raw_filler); + if (err < 0) + goto fail; + + err = write_access_unit(avctx, data, data_len, au); +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static const FFVulkanCodec enc_cb = { + .flags = FF_HW_FLAG_B_PICTURES | + FF_HW_FLAG_B_PICTURE_REFERENCES | + FF_HW_FLAG_NON_IDR_KEY_PICTURES, + .picture_priv_data_size = sizeof(VulkanEncodeH264Picture), + .filler_header_size = 6, + .init_profile = init_profile, + .init_pic_rc = init_pic_rc, + .init_pic_params = init_pic_params, + .write_sequence_headers = write_sequence_headers, + .write_extra_headers = write_extra_headers, + .write_filler = write_filler, +}; + +static av_cold int vulkan_encode_h264_init(AVCodecContext *avctx) +{ + int err, ref_l0, ref_l1; + VulkanEncodeH264Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + int flags; + + if (avctx->profile == AV_PROFILE_UNKNOWN) + avctx->profile = enc->common.opts.profile; + + enc->caps = (VkVideoEncodeH264CapabilitiesKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_CAPABILITIES_KHR, + }; + + enc->quality_props = (VkVideoEncodeH264QualityLevelPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_QUALITY_LEVEL_PROPERTIES_KHR, + }; + + err = ff_vulkan_encode_init(avctx, &enc->common, + &ff_vk_enc_h264_desc, &enc_cb, + &enc->caps, &enc->quality_props); + if (err < 0) + return err; + + av_log(avctx, AV_LOG_VERBOSE, "H264 encoder capabilities:\n"); + av_log(avctx, AV_LOG_VERBOSE, " Standard capability flags:\n"); + av_log(avctx, AV_LOG_VERBOSE, " separate_color_plane: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SEPARATE_COLOR_PLANE_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " qprime_y_zero_transform_bypass: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_QPPRIME_Y_ZERO_TRANSFORM_BYPASS_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " scaling_lists: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SCALING_MATRIX_PRESENT_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " chroma_qp_index_offset: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_CHROMA_QP_INDEX_OFFSET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " second_chroma_qp_index_offset: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_SECOND_CHROMA_QP_INDEX_OFFSET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " pic_init_qp: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_PIC_INIT_QP_MINUS26_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " weighted:%s%s%s\n", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_WEIGHTED_PRED_FLAG_SET_BIT_KHR ? + " pred" : "", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_WEIGHTED_BIPRED_IDC_EXPLICIT_BIT_KHR ? + " bipred_explicit" : "", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_WEIGHTED_BIPRED_IDC_IMPLICIT_BIT_KHR ? + " bipred_implicit" : ""); + av_log(avctx, AV_LOG_VERBOSE, " 8x8_transforms: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_TRANSFORM_8X8_MODE_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " disable_direct_spatial_mv_pred: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DIRECT_SPATIAL_MV_PRED_FLAG_UNSET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " coder:%s%s\n", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_UNSET_BIT_KHR ? + " cabac" : "", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_SET_BIT_KHR ? + " cavlc" : ""); + av_log(avctx, AV_LOG_VERBOSE, " direct_8x8_inference: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DIRECT_8X8_INFERENCE_FLAG_UNSET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " constrained_intra_pred: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " deblock:%s%s%s\n", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DEBLOCKING_FILTER_DISABLED_BIT_KHR ? + " filter_disabling" : "", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DEBLOCKING_FILTER_ENABLED_BIT_KHR ? + " filter_enabling" : "", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H264_STD_DEBLOCKING_FILTER_PARTIAL_BIT_KHR ? + " filter_partial" : ""); + + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:\n"); + av_log(avctx, AV_LOG_VERBOSE, " hdr_compliance: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " pred_weight_table_generated: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PREDICTION_WEIGHT_TABLE_GENERATED_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " row_unaligned_slice: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_ROW_UNALIGNED_SLICE_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " different_slice_type: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l0_list: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L0_LIST_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l1_list: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L1_LIST_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " per_pict_type_min_max_qp: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " per_slice_constant_qp: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PER_SLICE_CONSTANT_QP_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " generate_prefix_nalu: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_GENERATE_PREFIX_NALU_BIT_KHR)); + + av_log(avctx, AV_LOG_VERBOSE, " Capabilities:\n"); + av_log(avctx, AV_LOG_VERBOSE, " maxLevelIdc: %i\n", + enc->caps.maxLevelIdc); + av_log(avctx, AV_LOG_VERBOSE, " maxSliceCount: %i\n", + enc->caps.maxSliceCount); + av_log(avctx, AV_LOG_VERBOSE, " max(P/B)PictureL0ReferenceCount: %i P's; %i B's\n", + enc->caps.maxPPictureL0ReferenceCount, + enc->caps.maxBPictureL0ReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " maxL1ReferenceCount: %i\n", + enc->caps.maxL1ReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " maxTemporalLayerCount: %i\n", + enc->caps.maxTemporalLayerCount); + av_log(avctx, AV_LOG_VERBOSE, " expectDyadicTemporalLayerPattern: %i\n", + enc->caps.expectDyadicTemporalLayerPattern); + av_log(avctx, AV_LOG_VERBOSE, " min/max Qp: [%i, %i]\n", + enc->caps.minQp, enc->caps.maxQp); + av_log(avctx, AV_LOG_VERBOSE, " prefersGopRemainingFrames: %i\n", + enc->caps.prefersGopRemainingFrames); + av_log(avctx, AV_LOG_VERBOSE, " requiresGopRemainingFrames: %i\n", + enc->caps.requiresGopRemainingFrames); + + err = init_enc_options(avctx); + if (err < 0) + return err; + + flags = ctx->codec->flags; + if (!enc->caps.maxPPictureL0ReferenceCount && + !enc->caps.maxBPictureL0ReferenceCount && + !enc->caps.maxL1ReferenceCount) { + /* Intra-only */ + flags |= FF_HW_FLAG_INTRA_ONLY; + ref_l0 = ref_l1 = 0; + } else if (!enc->caps.maxPPictureL0ReferenceCount) { + /* No P-frames? How. */ + base_ctx->p_to_gpb = 1; + ref_l0 = enc->caps.maxBPictureL0ReferenceCount; + ref_l1 = enc->caps.maxL1ReferenceCount; + } else if (!enc->caps.maxBPictureL0ReferenceCount && + !enc->caps.maxL1ReferenceCount) { + /* No B-frames */ + flags &= ~(FF_HW_FLAG_B_PICTURES | FF_HW_FLAG_B_PICTURE_REFERENCES); + ref_l0 = enc->caps.maxPPictureL0ReferenceCount; + ref_l1 = 0; + } else { + /* P and B frames */ + ref_l0 = FFMIN(enc->caps.maxPPictureL0ReferenceCount, + enc->caps.maxBPictureL0ReferenceCount); + ref_l1 = enc->caps.maxL1ReferenceCount; + } + + err = ff_hw_base_init_gop_structure(base_ctx, avctx, ref_l0, ref_l1, + flags, 0); + if (err < 0) + return err; + + base_ctx->output_delay = base_ctx->b_per_p; + base_ctx->decode_delay = base_ctx->max_b_depth; + + /* Prepare SEI */ + if (enc->unit_elems & UNIT_SEI_IDENTIFIER) { + int len; + + memcpy(enc->sei_identifier.uuid_iso_iec_11578, + vulkan_encode_h264_sei_identifier_uuid, + sizeof(enc->sei_identifier.uuid_iso_iec_11578)); + + len = snprintf(NULL, 0, + "%s / Vulkan video %i.%i.%i / %s %i.%i.%i / %s", + LIBAVCODEC_IDENT, + CODEC_VER(ff_vk_enc_h264_desc.ext_props.specVersion), + s->driver_props.driverName, + CODEC_VER(s->props.properties.driverVersion), + s->props.properties.deviceName); + + if (len >= 0) { + enc->sei_identifier_string = av_malloc(len + 1); + if (!enc->sei_identifier_string) + return AVERROR(ENOMEM); + + len = snprintf(enc->sei_identifier_string, len + 1, + "%s / Vulkan video %i.%i.%i / %s %i.%i.%i / %s", + LIBAVCODEC_IDENT, + CODEC_VER(ff_vk_enc_h264_desc.ext_props.specVersion), + s->driver_props.driverName, + CODEC_VER(s->props.properties.driverVersion), + s->props.properties.deviceName); + + enc->sei_identifier.data = enc->sei_identifier_string; + enc->sei_identifier.data_length = len + 1; + } + } + + /* Init CBS */ + err = ff_cbs_init(&enc->cbs, AV_CODEC_ID_H264, avctx); + if (err < 0) + return err; + + /* Create units and session parameters */ + err = init_base_units(avctx); + if (err < 0) + return err; + + /* Write out extradata */ + err = ff_vulkan_write_global_header(avctx, &enc->common); + if (err < 0) + return err; + + return 0; +} + +static av_cold int vulkan_encode_h264_close(AVCodecContext *avctx) +{ + VulkanEncodeH264Context *enc = avctx->priv_data; + ff_vulkan_encode_uninit(&enc->common); + return 0; +} + +#define OFFSET(x) offsetof(VulkanEncodeH264Context, x) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) +static const AVOption vulkan_encode_h264_options[] = { + HW_BASE_ENCODE_COMMON_OPTIONS, + VULKAN_ENCODE_COMMON_OPTIONS, + + { "profile", "Set profile (profile_idc and constraint_set*_flag)", + OFFSET(common.opts.profile), AV_OPT_TYPE_INT, + { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, 0xffff, FLAGS, .unit = "profile" }, + +#define PROFILE(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ + { .i64 = value }, 0, 0, FLAGS, .unit = "profile" + { PROFILE("constrained_baseline", AV_PROFILE_H264_CONSTRAINED_BASELINE) }, + { PROFILE("main", AV_PROFILE_H264_MAIN) }, + { PROFILE("high", AV_PROFILE_H264_HIGH) }, + { PROFILE("high444p", AV_PROFILE_H264_HIGH_10) }, +#undef PROFILE + + { "level", "Set level (level_idc)", + OFFSET(common.opts.level), AV_OPT_TYPE_INT, + { .i64 = AV_LEVEL_UNKNOWN }, AV_LEVEL_UNKNOWN, 0xff, FLAGS, .unit = "level" }, + +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ + { .i64 = value }, 0, 0, FLAGS, .unit = "level" + { LEVEL("1", 10) }, + { LEVEL("1.1", 11) }, + { LEVEL("1.2", 12) }, + { LEVEL("1.3", 13) }, + { LEVEL("2", 20) }, + { LEVEL("2.1", 21) }, + { LEVEL("2.2", 22) }, + { LEVEL("3", 30) }, + { LEVEL("3.1", 31) }, + { LEVEL("3.2", 32) }, + { LEVEL("4", 40) }, + { LEVEL("4.1", 41) }, + { LEVEL("4.2", 42) }, + { LEVEL("5", 50) }, + { LEVEL("5.1", 51) }, + { LEVEL("5.2", 52) }, + { LEVEL("6", 60) }, + { LEVEL("6.1", 61) }, + { LEVEL("6.2", 62) }, +#undef LEVEL + + { "coder", "Entropy coder type", OFFSET(unit_opts.cabac), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS, "coder" }, + { "cabac", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, FLAGS, "coder" }, + { "vlc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS, "coder" }, + + { "units", "Set units to include", OFFSET(unit_elems), AV_OPT_TYPE_FLAGS, { .i64 = UNIT_AUD | UNIT_SEI_IDENTIFIER | UNIT_SEI_RECOVERY | UNIT_SEI_TIMING | UNIT_SEI_A53_CC }, 0, INT_MAX, FLAGS, "units" }, + { "aud", "Include AUD units", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_AUD }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "identifier", "Include encoder version identifier", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_IDENTIFIER }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "timing", "Include timing parameters (buffering_period and pic_timing)", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_TIMING }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "recovery", "Include recovery points where appropriate", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_RECOVERY }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "a53_cc", "Include A/53 caption data", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_A53_CC }, INT_MIN, INT_MAX, FLAGS, "units" }, + + { NULL }, +}; + +static const FFCodecDefault vulkan_encode_h264_defaults[] = { + { "b", "0" }, + { "bf", "2" }, + { "g", "300" }, + { "i_qfactor", "1" }, + { "i_qoffset", "0" }, + { "b_qfactor", "1" }, + { "b_qoffset", "0" }, + { "qmin", "-1" }, + { "qmax", "-1" }, + { "refs", "0" }, + { NULL }, +}; + +static const AVClass vulkan_encode_h264_class = { + .class_name = "h264_vulkan", + .item_name = av_default_item_name, + .option = vulkan_encode_h264_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const FFCodec ff_h264_vulkan_encoder = { + .p.name = "h264_vulkan", + CODEC_LONG_NAME("H.264/AVC (Vulkan)"), + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_H264, + .priv_data_size = sizeof(VulkanEncodeH264Context), + .init = &vulkan_encode_h264_init, + FF_CODEC_RECEIVE_PACKET_CB(&ff_vulkan_encode_receive_packet), + .close = &vulkan_encode_h264_close, + .p.priv_class = &vulkan_encode_h264_class, + .p.capabilities = AV_CODEC_CAP_DELAY | + AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1 | + AV_CODEC_CAP_ENCODER_FLUSH | + AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .defaults = vulkan_encode_h264_defaults, + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), + .hw_configs = ff_vulkan_encode_hw_configs, + .p.wrapper_name = "vulkan", +}; -- 2.49.1 From 1f4495b10a28899cb25ce33031a5d9af2b2f958c Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:34:16 +0000 Subject: [PATCH 077/118] Changing vulkan file directory --- libavcodec/vulkan_encode_h265.c | 1795 ------------------------------- 1 file changed, 1795 deletions(-) delete mode 100644 libavcodec/vulkan_encode_h265.c diff --git a/libavcodec/vulkan_encode_h265.c b/libavcodec/vulkan_encode_h265.c deleted file mode 100644 index c30b7e8f93..0000000000 --- a/libavcodec/vulkan_encode_h265.c +++ /dev/null @@ -1,1795 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/internal.h" -#include "libavutil/opt.h" -#include "libavutil/mem.h" - -#include "cbs.h" -#include "cbs_h265.h" -#include "atsc_a53.h" -#include "libavutil/mastering_display_metadata.h" - -#include "codec_internal.h" -#include "version.h" -#include "hw_base_encode_h265.h" - -#include "vulkan_encode.h" - -enum UnitElems { - UNIT_AUD = 1 << 0, - UNIT_SEI_MASTERING_DISPLAY = 1 << 1, - UNIT_SEI_CONTENT_LIGHT_LEVEL = 1 << 2, - UNIT_SEI_A53_CC = 1 << 3, -}; - -const FFVulkanEncodeDescriptor ff_vk_enc_h265_desc = { - .codec_id = AV_CODEC_ID_H265, - .encode_extension = FF_VK_EXT_VIDEO_ENCODE_H265, - .encode_op = VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR, - .ext_props = { - .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME, - .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION, - }, -}; - -typedef struct VulkanEncodeH265Picture { - int frame_num; - int64_t last_idr_frame; - uint16_t idr_pic_id; - int primary_pic_type; - int slice_type; - int pic_order_cnt; - int pic_type; - - enum UnitElems units_needed; - - VkVideoEncodeH265RateControlInfoKHR vkrc_info; - VkVideoEncodeH265RateControlLayerInfoKHR vkrc_layer_info; - - StdVideoEncodeH265PictureInfo h265pic_info; - VkVideoEncodeH265PictureInfoKHR vkh265pic_info; - - StdVideoEncodeH265WeightTable slice_wt; - StdVideoEncodeH265SliceSegmentHeader slice_hdr; - VkVideoEncodeH265NaluSliceSegmentInfoKHR vkslice; - - StdVideoEncodeH265ReferenceInfo h265dpb_info; - VkVideoEncodeH265DpbSlotInfoKHR vkh265dpb_info; - - StdVideoEncodeH265ReferenceListsInfo ref_list_info; - StdVideoEncodeH265LongTermRefPics l_rps; - StdVideoH265ShortTermRefPicSet s_rps; -} VulkanEncodeH265Picture; - -typedef struct VulkanEncodeH265Context { - FFVulkanEncodeContext common; - - FFHWBaseEncodeH265 units; - FFHWBaseEncodeH265Opts unit_opts; - - enum UnitElems unit_elems; - - uint8_t fixed_qp_idr; - uint8_t fixed_qp_p; - uint8_t fixed_qp_b; - - uint64_t hrd_buffer_size; - uint64_t initial_buffer_fullness; - - VkVideoEncodeH265ProfileInfoKHR profile; - - VkVideoEncodeH265CapabilitiesKHR caps; - VkVideoEncodeH265QualityLevelPropertiesKHR quality_props; - - CodedBitstreamContext *cbs; - CodedBitstreamFragment current_access_unit; - - H265RawAUD raw_aud; - - SEIRawMasteringDisplayColourVolume sei_mastering_display; - SEIRawContentLightLevelInfo sei_content_light_level; - SEIRawUserDataRegistered sei_a53cc; - void *sei_a53cc_data; -} VulkanEncodeH265Context; - -static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeRateControlInfoKHR *rc_info, - VkVideoEncodeRateControlLayerInfoKHR *rc_layer) -{ - VulkanEncodeH265Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - VulkanEncodeH265Picture *hp = pic->codec_priv; - - hp->vkrc_info = (VkVideoEncodeH265RateControlInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_INFO_KHR, - .flags = VK_VIDEO_ENCODE_H265_RATE_CONTROL_REFERENCE_PATTERN_FLAT_BIT_KHR | - VK_VIDEO_ENCODE_H265_RATE_CONTROL_REGULAR_GOP_BIT_KHR, - .idrPeriod = ctx->base.gop_size, - .gopFrameCount = ctx->base.gop_size, - .consecutiveBFrameCount = FFMAX(ctx->base.b_per_p - 1, 0), - .subLayerCount = 0, - }; - rc_info->pNext = &hp->vkrc_info; - - if (rc_info->rateControlMode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { - rc_info->virtualBufferSizeInMs = (enc->hrd_buffer_size * 1000LL) / avctx->bit_rate; - rc_info->initialVirtualBufferSizeInMs = (enc->initial_buffer_fullness * 1000LL) / avctx->bit_rate; - - hp->vkrc_layer_info = (VkVideoEncodeH265RateControlLayerInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_LAYER_INFO_KHR, - - .useMinQp = avctx->qmin > 0, - .minQp.qpI = avctx->qmin > 0 ? avctx->qmin : 0, - .minQp.qpP = avctx->qmin > 0 ? avctx->qmin : 0, - .minQp.qpB = avctx->qmin > 0 ? avctx->qmin : 0, - - .useMaxQp = avctx->qmax > 0, - .maxQp.qpI = avctx->qmax > 0 ? avctx->qmax : 0, - .maxQp.qpP = avctx->qmax > 0 ? avctx->qmax : 0, - .maxQp.qpB = avctx->qmax > 0 ? avctx->qmax : 0, - - .useMaxFrameSize = 0, - }; - rc_layer->pNext = &hp->vkrc_layer_info; - hp->vkrc_info.subLayerCount = 1; - } - - return 0; -} - -static int vk_enc_h265_update_pic_info(AVCodecContext *avctx, - FFHWBaseEncodePicture *pic) -{ - VulkanEncodeH265Context *enc = avctx->priv_data; - VulkanEncodeH265Picture *hp = pic->codec_priv; - FFHWBaseEncodePicture *prev = pic->prev; - VulkanEncodeH265Picture *hprev = prev ? prev->codec_priv : NULL; - - if (pic->type == FF_HW_PICTURE_TYPE_IDR) { - av_assert0(pic->display_order == pic->encode_order); - - hp->last_idr_frame = pic->display_order; - - hp->slice_type = STD_VIDEO_H265_SLICE_TYPE_I; - hp->pic_type = STD_VIDEO_H265_PICTURE_TYPE_IDR; - } else { - av_assert0(prev); - hp->last_idr_frame = hprev->last_idr_frame; - - if (pic->type == FF_HW_PICTURE_TYPE_I) { - hp->slice_type = STD_VIDEO_H265_SLICE_TYPE_I; - hp->pic_type = STD_VIDEO_H265_PICTURE_TYPE_I; - } else if (pic->type == FF_HW_PICTURE_TYPE_P) { - av_assert0(pic->refs[0]); - hp->slice_type = STD_VIDEO_H265_SLICE_TYPE_P; - hp->pic_type = STD_VIDEO_H265_PICTURE_TYPE_P; - } else { - FFHWBaseEncodePicture *irap_ref; - av_assert0(pic->refs[0][0] && pic->refs[1][0]); - for (irap_ref = pic; irap_ref; irap_ref = irap_ref->refs[1][0]) { - if (irap_ref->type == FF_HW_PICTURE_TYPE_I) - break; - } - hp->slice_type = STD_VIDEO_H265_SLICE_TYPE_B; - hp->pic_type = STD_VIDEO_H265_PICTURE_TYPE_B; - } - } - hp->pic_order_cnt = pic->display_order - hp->last_idr_frame; - - hp->units_needed = 0; - - if (enc->unit_elems & UNIT_AUD) { - hp->units_needed |= UNIT_AUD; - enc->raw_aud = (H265RawAUD) { - .nal_unit_header = { - .nal_unit_type = HEVC_NAL_AUD, - .nuh_layer_id = 0, - .nuh_temporal_id_plus1 = 1, - }, - .pic_type = hp->pic_type, - }; - } - - // Only look for the metadata on I/IDR frame on the output. We - // may force an IDR frame on the output where the metadata gets - // changed on the input frame. - if ((enc->unit_elems & UNIT_SEI_MASTERING_DISPLAY) && - (pic->type == FF_HW_PICTURE_TYPE_I || pic->type == FF_HW_PICTURE_TYPE_IDR)) { - AVFrameSideData *sd = - av_frame_get_side_data(pic->input_image, - AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); - - if (sd) { - AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *)sd->data; - - // SEI is needed when both the primaries and luminance are set - if (mdm->has_primaries && mdm->has_luminance) { - SEIRawMasteringDisplayColourVolume *mdcv = - &enc->sei_mastering_display; - const int mapping[3] = {1, 2, 0}; - const int chroma_den = 50000; - const int luma_den = 10000; - - for (int i = 0; i < 3; i++) { - const int j = mapping[i]; - mdcv->display_primaries_x[i] = - FFMIN(lrint(chroma_den * - av_q2d(mdm->display_primaries[j][0])), - chroma_den); - mdcv->display_primaries_y[i] = - FFMIN(lrint(chroma_den * - av_q2d(mdm->display_primaries[j][1])), - chroma_den); - } - - mdcv->white_point_x = - FFMIN(lrint(chroma_den * av_q2d(mdm->white_point[0])), - chroma_den); - mdcv->white_point_y = - FFMIN(lrint(chroma_den * av_q2d(mdm->white_point[1])), - chroma_den); - - mdcv->max_display_mastering_luminance = - lrint(luma_den * av_q2d(mdm->max_luminance)); - mdcv->min_display_mastering_luminance = - FFMIN(lrint(luma_den * av_q2d(mdm->min_luminance)), - mdcv->max_display_mastering_luminance); - - hp->units_needed |= UNIT_SEI_MASTERING_DISPLAY; - } - } - } - - if ((enc->unit_elems & UNIT_SEI_CONTENT_LIGHT_LEVEL) && - (pic->type == FF_HW_PICTURE_TYPE_I || pic->type == FF_HW_PICTURE_TYPE_IDR)) { - AVFrameSideData *sd = av_frame_get_side_data(pic->input_image, - AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); - - if (sd) { - AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data; - SEIRawContentLightLevelInfo *clli = &enc->sei_content_light_level; - - clli->max_content_light_level = FFMIN(clm->MaxCLL, 65535); - clli->max_pic_average_light_level = FFMIN(clm->MaxFALL, 65535); - - hp->units_needed |= UNIT_SEI_CONTENT_LIGHT_LEVEL; - } - } - - if (enc->unit_elems & UNIT_SEI_A53_CC) { - int err; - size_t sei_a53cc_len; - av_freep(&enc->sei_a53cc_data); - err = ff_alloc_a53_sei(pic->input_image, 0, &enc->sei_a53cc_data, &sei_a53cc_len); - if (err < 0) - return err; - if (enc->sei_a53cc_data != NULL) { - enc->sei_a53cc.itu_t_t35_country_code = 181; - enc->sei_a53cc.data = (uint8_t *)enc->sei_a53cc_data + 1; - enc->sei_a53cc.data_length = sei_a53cc_len - 1; - - hp->units_needed |= UNIT_SEI_A53_CC; - } - } - - return 0; -} - -static void setup_slices(AVCodecContext *avctx, - FFHWBaseEncodePicture *pic) -{ - VulkanEncodeH265Context *enc = avctx->priv_data; - VulkanEncodeH265Picture *hp = pic->codec_priv; - - hp->slice_wt = (StdVideoEncodeH265WeightTable) { - .flags = (StdVideoEncodeH265WeightTableFlags) { - .luma_weight_l0_flag = 0, - .chroma_weight_l0_flag = 0, - .luma_weight_l1_flag = 0, - .chroma_weight_l1_flag = 0, - }, - .luma_log2_weight_denom = 0, - .delta_chroma_log2_weight_denom = 0, - .delta_luma_weight_l0 = { 0 }, - .luma_offset_l0 = { 0 }, - .delta_chroma_weight_l0 = { { 0 } }, - .delta_chroma_offset_l0 = { { 0 } }, - .delta_luma_weight_l1 = { 0 }, - .luma_offset_l1 = { 0 }, - .delta_chroma_weight_l1 = { { 0 } }, - .delta_chroma_offset_l1 = { { 0 } }, - }; - - hp->slice_hdr = (StdVideoEncodeH265SliceSegmentHeader) { - .flags = (StdVideoEncodeH265SliceSegmentHeaderFlags) { - .first_slice_segment_in_pic_flag = 1, - .dependent_slice_segment_flag = 0, - .slice_sao_luma_flag = enc->units.raw_sps.sample_adaptive_offset_enabled_flag, - .slice_sao_chroma_flag = enc->units.raw_sps.sample_adaptive_offset_enabled_flag, - .num_ref_idx_active_override_flag = 0, - .mvd_l1_zero_flag = 0, - .cabac_init_flag = 0, - .cu_chroma_qp_offset_enabled_flag = 0, - .deblocking_filter_override_flag = 0, - .slice_deblocking_filter_disabled_flag = 0, - .collocated_from_l0_flag = 1, - .slice_loop_filter_across_slices_enabled_flag = 0, - /* Reserved */ - }, - .slice_type = hp->slice_type, - .slice_segment_address = 0, - .collocated_ref_idx = 0, - .MaxNumMergeCand = 5, - .slice_cb_qp_offset = 0, - .slice_cr_qp_offset = 0, - .slice_beta_offset_div2 = 0, - .slice_tc_offset_div2 = 0, - .slice_act_y_qp_offset = 0, - .slice_act_cb_qp_offset = 0, - .slice_act_cr_qp_offset = 0, - .slice_qp_delta = 0, /* Filled in below */ - /* Reserved */ - .pWeightTable = NULL, // &hp->slice_wt, - }; - - hp->vkslice = (VkVideoEncodeH265NaluSliceSegmentInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_NALU_SLICE_SEGMENT_INFO_KHR, - .pNext = NULL, - .constantQp = pic->type == FF_HW_PICTURE_TYPE_B ? enc->fixed_qp_b : - pic->type == FF_HW_PICTURE_TYPE_P ? enc->fixed_qp_p : - enc->fixed_qp_idr, - .pStdSliceSegmentHeader = &hp->slice_hdr, - }; - - if (enc->common.opts.rc_mode != VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) - hp->vkslice.constantQp = 0; - - hp->slice_hdr.slice_qp_delta = hp->vkslice.constantQp - - (enc->units.raw_pps.init_qp_minus26 + 26); - - hp->vkh265pic_info.pNaluSliceSegmentEntries = &hp->vkslice; - hp->vkh265pic_info.naluSliceSegmentEntryCount = 1; -} - -static void setup_refs(AVCodecContext *avctx, - FFHWBaseEncodePicture *pic, - VkVideoEncodeInfoKHR *encode_info) -{ - int i, j; - VulkanEncodeH265Context *enc = avctx->priv_data; - VulkanEncodeH265Picture *hp = pic->codec_priv; - - hp->ref_list_info = (StdVideoEncodeH265ReferenceListsInfo) { - .flags = (StdVideoEncodeH265ReferenceListsInfoFlags) { - .ref_pic_list_modification_flag_l0 = 0, - .ref_pic_list_modification_flag_l1 = 0, - /* Reserved */ - }, - /* May be overridden during setup_slices() */ - .num_ref_idx_l0_active_minus1 = pic->nb_refs[0] - 1, - .num_ref_idx_l1_active_minus1 = pic->nb_refs[1] - 1, - /* Reserved */ - .list_entry_l0 = { 0 }, - .list_entry_l1 = { 0 }, - }; - - for (i = 0; i < STD_VIDEO_H265_MAX_NUM_LIST_REF; i++) - hp->ref_list_info.RefPicList0[i] = hp->ref_list_info.RefPicList1[i] = -1; - - /* Note: really not sure */ - for (i = 0; i < pic->nb_refs[0]; i++) { - VkVideoReferenceSlotInfoKHR *slot_info; - slot_info = (VkVideoReferenceSlotInfoKHR *)&encode_info->pReferenceSlots[i]; - hp->ref_list_info.RefPicList0[i] = slot_info->slotIndex; - } - - /* Note: really not sure */ - for (i = 0; i < pic->nb_refs[1]; i++) { - VkVideoReferenceSlotInfoKHR *slot_info; - slot_info = (VkVideoReferenceSlotInfoKHR *)&encode_info->pReferenceSlots[pic->nb_refs[0] + i]; - hp->ref_list_info.RefPicList1[i] = slot_info->slotIndex; - } - - hp->h265pic_info.pRefLists = &hp->ref_list_info; - - if (pic->type != FF_HW_PICTURE_TYPE_IDR) { - StdVideoH265ShortTermRefPicSet *rps; - VulkanEncodeH265Picture *strp; - int rps_poc[MAX_DPB_SIZE]; - int rps_used[MAX_DPB_SIZE]; - int poc, rps_pics; - - hp->h265pic_info.flags.short_term_ref_pic_set_sps_flag = 0; - - rps = &hp->s_rps; - memset(rps, 0, sizeof(*rps)); - - rps_pics = 0; - for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) { - for (j = 0; j < pic->nb_refs[i]; j++) { - strp = pic->refs[i][j]->codec_priv; - rps_poc[rps_pics] = strp->pic_order_cnt; - rps_used[rps_pics] = 1; - ++rps_pics; - } - } - - for (i = 0; i < pic->nb_dpb_pics; i++) { - if (pic->dpb[i] == pic) - continue; - - for (j = 0; j < pic->nb_refs[0]; j++) { - if (pic->dpb[i] == pic->refs[0][j]) - break; - } - if (j < pic->nb_refs[0]) - continue; - - for (j = 0; j < pic->nb_refs[1]; j++) { - if (pic->dpb[i] == pic->refs[1][j]) - break; - } - if (j < pic->nb_refs[1]) - continue; - - strp = pic->dpb[i]->codec_priv; - rps_poc[rps_pics] = strp->pic_order_cnt; - rps_used[rps_pics] = 0; - ++rps_pics; - } - - for (i = 1; i < rps_pics; i++) { - for (j = i; j > 0; j--) { - if (rps_poc[j] > rps_poc[j - 1]) - break; - av_assert0(rps_poc[j] != rps_poc[j - 1]); - FFSWAP(int, rps_poc[j], rps_poc[j - 1]); - FFSWAP(int, rps_used[j], rps_used[j - 1]); - } - } - - av_log(avctx, AV_LOG_DEBUG, "RPS for POC %d:", hp->pic_order_cnt); - for (i = 0; i < rps_pics; i++) - av_log(avctx, AV_LOG_DEBUG, " (%d,%d)", rps_poc[i], rps_used[i]); - - av_log(avctx, AV_LOG_DEBUG, "\n"); - - for (i = 0; i < rps_pics; i++) { - av_assert0(rps_poc[i] != hp->pic_order_cnt); - if (rps_poc[i] > hp->pic_order_cnt) - break; - } - - rps->num_negative_pics = i; - rps->used_by_curr_pic_s0_flag = 0x0; - poc = hp->pic_order_cnt; - for (j = i - 1; j >= 0; j--) { - rps->delta_poc_s0_minus1[i - 1 - j] = poc - rps_poc[j] - 1; - rps->used_by_curr_pic_s0_flag |= rps_used[j] << (i - 1 - j); - poc = rps_poc[j]; - } - - rps->num_positive_pics = rps_pics - i; - rps->used_by_curr_pic_s1_flag = 0x0; - poc = hp->pic_order_cnt; - for (j = i; j < rps_pics; j++) { - rps->delta_poc_s1_minus1[j - i] = rps_poc[j] - poc - 1; - rps->used_by_curr_pic_s1_flag |= rps_used[j] << (j - i); - poc = rps_poc[j]; - } - - hp->l_rps.num_long_term_sps = 0; - hp->l_rps.num_long_term_pics = 0; - - // when this flag is not present, it is inerred to 1. - hp->slice_hdr.flags.collocated_from_l0_flag = 1; - hp->h265pic_info.flags.slice_temporal_mvp_enabled_flag = - enc->units.raw_sps.sps_temporal_mvp_enabled_flag; - if (hp->h265pic_info.flags.slice_temporal_mvp_enabled_flag) { - if (hp->slice_hdr.slice_type == STD_VIDEO_H265_SLICE_TYPE_B) - hp->slice_hdr.flags.collocated_from_l0_flag = 1; - hp->slice_hdr.collocated_ref_idx = 0; - } - - hp->slice_hdr.flags.num_ref_idx_active_override_flag = 0; - hp->ref_list_info.num_ref_idx_l0_active_minus1 = enc->units.raw_pps.num_ref_idx_l0_default_active_minus1; - hp->ref_list_info.num_ref_idx_l1_active_minus1 = enc->units.raw_pps.num_ref_idx_l1_default_active_minus1; - } - - hp->h265pic_info.pShortTermRefPicSet = &hp->s_rps; - hp->h265pic_info.pLongTermRefPics = &hp->l_rps; -} - -static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, - VkVideoEncodeInfoKHR *encode_info) -{ - int err; - VulkanEncodeH265Context *enc = avctx->priv_data; - FFVulkanEncodePicture *vp = pic->priv; - VulkanEncodeH265Picture *hp = pic->codec_priv; - VkVideoReferenceSlotInfoKHR *ref_slot; - - err = vk_enc_h265_update_pic_info(avctx, pic); - if (err < 0) - return err; - - hp->vkh265pic_info = (VkVideoEncodeH265PictureInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_PICTURE_INFO_KHR, - .pNext = NULL, - .pNaluSliceSegmentEntries = NULL, // Filled in during setup_slices() - .naluSliceSegmentEntryCount = 0, // Filled in during setup_slices() - .pStdPictureInfo = &hp->h265pic_info, - }; - - hp->h265pic_info = (StdVideoEncodeH265PictureInfo) { - .flags = (StdVideoEncodeH265PictureInfoFlags) { - .is_reference = pic->is_reference, - .IrapPicFlag = pic->type == FF_HW_PICTURE_TYPE_IDR, - .used_for_long_term_reference = 0, - .discardable_flag = 0, - .cross_layer_bla_flag = 0, - .pic_output_flag = 1, - .no_output_of_prior_pics_flag = 0, - .short_term_ref_pic_set_sps_flag = 0, - .slice_temporal_mvp_enabled_flag = enc->units.raw_sps.sps_temporal_mvp_enabled_flag, - /* Reserved */ - }, - .pic_type = hp->pic_type, - .sps_video_parameter_set_id = 0, - .pps_seq_parameter_set_id = 0, - .pps_pic_parameter_set_id = 0, - .short_term_ref_pic_set_idx = 0, - .PicOrderCntVal = hp->pic_order_cnt, - .TemporalId = 0, - /* Reserved */ - .pRefLists = NULL, // Filled in during setup_refs - .pShortTermRefPicSet = NULL, - .pLongTermRefPics = NULL, - }; - encode_info->pNext = &hp->vkh265pic_info; - - hp->h265dpb_info = (StdVideoEncodeH265ReferenceInfo) { - .flags = (StdVideoEncodeH265ReferenceInfoFlags) { - .used_for_long_term_reference = 0, - .unused_for_reference = 0, - /* Reserved */ - }, - .pic_type = hp->h265pic_info.pic_type, - .PicOrderCntVal = hp->h265pic_info.PicOrderCntVal, - .TemporalId = hp->h265pic_info.TemporalId, - }; - hp->vkh265dpb_info = (VkVideoEncodeH265DpbSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_DPB_SLOT_INFO_KHR, - .pStdReferenceInfo = &hp->h265dpb_info, - }; - - vp->dpb_slot.pNext = &hp->vkh265dpb_info; - - ref_slot = (VkVideoReferenceSlotInfoKHR *)encode_info->pSetupReferenceSlot; - ref_slot->pNext = &hp->vkh265dpb_info; - - setup_refs(avctx, pic, encode_info); - - setup_slices(avctx, pic); - - return 0; -} - -static int init_profile(AVCodecContext *avctx, - VkVideoProfileInfoKHR *profile, void *pnext) -{ - VkResult ret; - VulkanEncodeH265Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - VkVideoEncodeH265CapabilitiesKHR h265_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_CAPABILITIES_KHR, - }; - VkVideoEncodeCapabilitiesKHR enc_caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR, - .pNext = &h265_caps, - }; - VkVideoCapabilitiesKHR caps = { - .sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR, - .pNext = &enc_caps, - }; - - /* In order of preference */ - int last_supported = AV_PROFILE_UNKNOWN; - static const int known_profiles[] = { - AV_PROFILE_HEVC_MAIN, - AV_PROFILE_HEVC_MAIN_10, - AV_PROFILE_HEVC_REXT, - }; - int nb_profiles = FF_ARRAY_ELEMS(known_profiles); - - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->frames->sw_format); - if (!desc) - return AVERROR(EINVAL); - - if (s->frames->sw_format == AV_PIX_FMT_NV12) - nb_profiles = 1; - else if (s->frames->sw_format == AV_PIX_FMT_P010) - nb_profiles = 2; - - enc->profile = (VkVideoEncodeH265ProfileInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_PROFILE_INFO_KHR, - .pNext = pnext, - .stdProfileIdc = ff_vk_h265_profile_to_vk(avctx->profile), - }; - profile->pNext = &enc->profile; - - /* Set level */ - if (avctx->level == AV_LEVEL_UNKNOWN) - avctx->level = enc->common.opts.level; - - /* User has explicitly specified a profile. */ - if (avctx->profile != AV_PROFILE_UNKNOWN) - return 0; - - av_log(avctx, AV_LOG_DEBUG, "Supported profiles:\n"); - for (int i = 0; i < nb_profiles; i++) { - enc->profile.stdProfileIdc = ff_vk_h265_profile_to_vk(known_profiles[i]); - ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, - profile, - &caps); - if (ret == VK_SUCCESS) { - av_log(avctx, AV_LOG_DEBUG, " %s\n", - avcodec_profile_name(avctx->codec_id, known_profiles[i])); - last_supported = known_profiles[i]; - } - } - - if (last_supported == AV_PROFILE_UNKNOWN) { - av_log(avctx, AV_LOG_ERROR, "No supported profiles for given format\n"); - return AVERROR(ENOTSUP); - } - - enc->profile.stdProfileIdc = ff_vk_h265_profile_to_vk(last_supported); - av_log(avctx, AV_LOG_VERBOSE, "Using profile %s\n", - avcodec_profile_name(avctx->codec_id, last_supported)); - avctx->profile = last_supported; - - return 0; -} - -static int init_enc_options(AVCodecContext *avctx) -{ - VulkanEncodeH265Context *enc = avctx->priv_data; - - if (avctx->rc_buffer_size) - enc->hrd_buffer_size = avctx->rc_buffer_size; - else if (avctx->rc_max_rate > 0) - enc->hrd_buffer_size = avctx->rc_max_rate; - else - enc->hrd_buffer_size = avctx->bit_rate; - - if (avctx->rc_initial_buffer_occupancy) { - if (avctx->rc_initial_buffer_occupancy > enc->hrd_buffer_size) { - av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: " - "must have initial buffer size (%d) <= " - "buffer size (%"PRId64").\n", - avctx->rc_initial_buffer_occupancy, enc->hrd_buffer_size); - return AVERROR(EINVAL); - } - enc->initial_buffer_fullness = avctx->rc_initial_buffer_occupancy; - } else { - enc->initial_buffer_fullness = enc->hrd_buffer_size * 3 / 4; - } - - if (enc->common.opts.rc_mode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { - enc->fixed_qp_p = av_clip(enc->common.opts.qp, - enc->caps.minQp, enc->caps.maxQp); - - if (avctx->i_quant_factor > 0.0) - enc->fixed_qp_idr = av_clip((avctx->i_quant_factor * enc->fixed_qp_p + - avctx->i_quant_offset) + 0.5, - enc->caps.minQp, enc->caps.maxQp); - else - enc->fixed_qp_idr = enc->fixed_qp_p; - - if (avctx->b_quant_factor > 0.0) - enc->fixed_qp_b = av_clip((avctx->b_quant_factor * enc->fixed_qp_p + - avctx->b_quant_offset) + 0.5, - enc->caps.minQp, enc->caps.maxQp); - else - enc->fixed_qp_b = enc->fixed_qp_p; - - av_log(avctx, AV_LOG_DEBUG, "Using fixed QP = " - "%d / %d / %d for IDR- / P- / B-frames.\n", - enc->fixed_qp_idr, enc->fixed_qp_p, enc->fixed_qp_b); - } else { - enc->fixed_qp_idr = 26; - enc->fixed_qp_p = 26; - enc->fixed_qp_b = 26; - } - - return 0; -} - -static av_cold int init_sequence_headers(AVCodecContext *avctx) -{ - int err; - VulkanEncodeH265Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - - FFHWBaseEncodeH265 *units = &enc->units; - FFHWBaseEncodeH265Opts *unit_opts = &enc->unit_opts; - - int max_ctb_size; - unsigned min_tb_size; - unsigned max_tb_size; - unsigned max_transform_hierarchy; - - unit_opts->tier = enc->common.opts.tier; - unit_opts->fixed_qp_idr = enc->fixed_qp_idr; - unit_opts->cu_qp_delta_enabled_flag = enc->common.opts.rc_mode != VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; - - unit_opts->nb_slices = 1; - - unit_opts->slice_block_rows = (avctx->height + base_ctx->slice_block_height - 1) / - base_ctx->slice_block_height; - unit_opts->slice_block_cols = (avctx->width + base_ctx->slice_block_width - 1) / - base_ctx->slice_block_width; - - /* cabac already set via an option */ - /* fixed_qp_idr initialized in init_enc_options() */ - /* hrd_buffer_size initialized in init_enc_options() */ - /* initial_buffer_fullness initialized in init_enc_options() */ - - err = ff_hw_base_encode_init_params_h265(&enc->common.base, avctx, - units, unit_opts); - if (err < 0) - return err; - - units->raw_sps.sample_adaptive_offset_enabled_flag = - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG_SET_BIT_KHR); - units->raw_pps.transform_skip_enabled_flag = - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_SET_BIT_KHR); - - max_ctb_size = 16; - - /* coding blocks from 8x8 to max CTB size. */ - if (enc->caps.ctbSizes & VK_VIDEO_ENCODE_H265_CTB_SIZE_64_BIT_KHR) - max_ctb_size = 64; - else if (enc->caps.ctbSizes & VK_VIDEO_ENCODE_H265_CTB_SIZE_32_BIT_KHR) - max_ctb_size = 32; - - min_tb_size = 0; - max_tb_size = 0; - if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_4_BIT_KHR) - min_tb_size = 4; - else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_8_BIT_KHR) - min_tb_size = 8; - else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_16_BIT_KHR) - min_tb_size = 16; - else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_32_BIT_KHR) - min_tb_size = 32; - - if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_32_BIT_KHR) - max_tb_size = 32; - else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_16_BIT_KHR) - max_tb_size = 16; - else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_8_BIT_KHR) - max_tb_size = 8; - else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_4_BIT_KHR) - max_tb_size = 4; - - units->raw_sps.log2_min_luma_coding_block_size_minus3 = 0; - units->raw_sps.log2_diff_max_min_luma_coding_block_size = av_log2(max_ctb_size) - 3; - units->raw_sps.log2_min_luma_transform_block_size_minus2 = av_log2(min_tb_size) - 2; - units->raw_sps.log2_diff_max_min_luma_transform_block_size = av_log2(max_tb_size) - av_log2(min_tb_size); - - max_transform_hierarchy = av_log2(max_ctb_size) - av_log2(min_tb_size); - units->raw_sps.max_transform_hierarchy_depth_intra = max_transform_hierarchy; - units->raw_sps.max_transform_hierarchy_depth_intra = max_transform_hierarchy; - - units->raw_sps.vui.bitstream_restriction_flag = 0; - units->raw_sps.vui.max_bytes_per_pic_denom = 2; - units->raw_sps.vui.max_bits_per_min_cu_denom = 1; - - units->raw_sps.sps_temporal_mvp_enabled_flag = 0; - - if (base_ctx->gop_size & base_ctx->gop_size - 1 == 0) - units->raw_sps.log2_max_pic_order_cnt_lsb_minus4 = FFMAX(av_log2(base_ctx->gop_size) - 4, 0); - else - units->raw_sps.log2_max_pic_order_cnt_lsb_minus4 = FFMAX(av_log2(base_ctx->gop_size) - 3, 0); - - return 0; -} - -typedef struct VulkanH265Units { - StdVideoH265SequenceParameterSet sps; - StdVideoH265ShortTermRefPicSet str[STD_VIDEO_H265_SUBLAYERS_LIST_SIZE]; - StdVideoH265LongTermRefPicsSps ltr; - StdVideoH265ProfileTierLevel ptl_sps; - StdVideoH265DecPicBufMgr dpbm_sps; - - StdVideoH265HrdParameters vui_header_sps; - StdVideoH265SequenceParameterSetVui vui_sps; - - StdVideoH265SubLayerHrdParameters slhdrnal[HEVC_MAX_SUB_LAYERS]; - StdVideoH265SubLayerHrdParameters slhdrvcl[HEVC_MAX_SUB_LAYERS]; - - StdVideoH265PictureParameterSet pps; - StdVideoH265ScalingLists pps_scaling; - - StdVideoH265VideoParameterSet vps; - StdVideoH265ProfileTierLevel ptl_vps; - StdVideoH265DecPicBufMgr dpbm_vps; - StdVideoH265HrdParameters vui_header_vps; -} VulkanH265Units; - -static av_cold int base_unit_to_vk(AVCodecContext *avctx, - VulkanH265Units *vk_units) -{ - VulkanEncodeH265Context *enc = avctx->priv_data; - - H265RawSPS *sps = &enc->units.raw_sps; - StdVideoH265SequenceParameterSet *vksps = &vk_units->sps; - StdVideoH265ShortTermRefPicSet *str = vk_units->str; - StdVideoH265LongTermRefPicsSps *ltr = &vk_units->ltr; - StdVideoH265ProfileTierLevel *ptl_sps = &vk_units->ptl_sps; - StdVideoH265DecPicBufMgr *dpbm_sps = &vk_units->dpbm_sps; - - StdVideoH265HrdParameters *vui_header_sps = &vk_units->vui_header_sps; - StdVideoH265SequenceParameterSetVui *vui_sps = &vk_units->vui_sps; - - StdVideoH265SubLayerHrdParameters *slhdrnal = vk_units->slhdrnal; - StdVideoH265SubLayerHrdParameters *slhdrvcl = vk_units->slhdrvcl; - - H265RawPPS *pps = &enc->units.raw_pps; - StdVideoH265PictureParameterSet *vkpps = &vk_units->pps; - - H265RawVPS *vps = &enc->units.raw_vps; - StdVideoH265VideoParameterSet *vkvps = &vk_units->vps; - StdVideoH265ProfileTierLevel *ptl_vps = &vk_units->ptl_vps; - StdVideoH265DecPicBufMgr *dpbm_vps = &vk_units->dpbm_vps; - StdVideoH265HrdParameters *vui_header_vps = &vk_units->vui_header_vps; - - /* SPS */ - for (int i = 0; i < HEVC_MAX_SUB_LAYERS; i++) { - memcpy(&slhdrnal[i], &sps->vui.hrd_parameters.nal_sub_layer_hrd_parameters[i], sizeof(*slhdrnal)); - memcpy(&slhdrvcl[i], &sps->vui.hrd_parameters.vcl_sub_layer_hrd_parameters[i], sizeof(*slhdrvcl)); - slhdrnal[i].cbr_flag = 0x0; - slhdrvcl[i].cbr_flag = 0x0; - for (int j = 0; j < HEVC_MAX_CPB_CNT; j++) { - slhdrnal[i].cbr_flag |= sps->vui.hrd_parameters.nal_sub_layer_hrd_parameters[i].cbr_flag[j] << i; - slhdrvcl[i].cbr_flag |= sps->vui.hrd_parameters.vcl_sub_layer_hrd_parameters[i].cbr_flag[j] << i; - } - } - - *vui_header_sps = (StdVideoH265HrdParameters) { - .flags = (StdVideoH265HrdFlags) { - .nal_hrd_parameters_present_flag = sps->vui.hrd_parameters.nal_hrd_parameters_present_flag, - .vcl_hrd_parameters_present_flag = sps->vui.hrd_parameters.vcl_hrd_parameters_present_flag, - .sub_pic_hrd_params_present_flag = sps->vui.hrd_parameters.sub_pic_hrd_params_present_flag, - .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->vui.hrd_parameters.sub_pic_cpb_params_in_pic_timing_sei_flag, - .fixed_pic_rate_general_flag = 0x0, - .fixed_pic_rate_within_cvs_flag = 0x0, - .low_delay_hrd_flag = 0x0, - }, - .tick_divisor_minus2 = sps->vui.hrd_parameters.tick_divisor_minus2, - .du_cpb_removal_delay_increment_length_minus1 = sps->vui.hrd_parameters.du_cpb_removal_delay_increment_length_minus1, - .dpb_output_delay_du_length_minus1 = sps->vui.hrd_parameters.dpb_output_delay_du_length_minus1, - .bit_rate_scale = sps->vui.hrd_parameters.bit_rate_scale, - .cpb_size_scale = sps->vui.hrd_parameters.cpb_size_scale, - .cpb_size_du_scale = sps->vui.hrd_parameters.cpb_size_du_scale, - .initial_cpb_removal_delay_length_minus1 = sps->vui.hrd_parameters.initial_cpb_removal_delay_length_minus1, - .au_cpb_removal_delay_length_minus1 = sps->vui.hrd_parameters.au_cpb_removal_delay_length_minus1, - .dpb_output_delay_length_minus1 = sps->vui.hrd_parameters.dpb_output_delay_length_minus1, - /* Reserved - 3*16 bits */ - .pSubLayerHrdParametersNal = slhdrnal, - .pSubLayerHrdParametersVcl = slhdrvcl, - }; - - for (int i = 0; i < HEVC_MAX_SUB_LAYERS; i++) { - vui_header_sps->flags.fixed_pic_rate_general_flag |= sps->vui.hrd_parameters.fixed_pic_rate_general_flag[i] << i; - vui_header_sps->flags.fixed_pic_rate_within_cvs_flag |= sps->vui.hrd_parameters.fixed_pic_rate_within_cvs_flag[i] << i; - vui_header_sps->flags.low_delay_hrd_flag |= sps->vui.hrd_parameters.low_delay_hrd_flag[i] << i; - } - - for (int i = 0; i < STD_VIDEO_H265_SUBLAYERS_LIST_SIZE; i++) { - dpbm_sps->max_latency_increase_plus1[i] = sps->sps_max_latency_increase_plus1[i]; - dpbm_sps->max_dec_pic_buffering_minus1[i] = sps->sps_max_dec_pic_buffering_minus1[i]; - dpbm_sps->max_num_reorder_pics[i] = sps->sps_max_num_reorder_pics[i]; - } - - *ptl_sps = (StdVideoH265ProfileTierLevel) { - .flags = (StdVideoH265ProfileTierLevelFlags) { - .general_tier_flag = sps->profile_tier_level.general_tier_flag, - .general_progressive_source_flag = sps->profile_tier_level.general_progressive_source_flag, - .general_interlaced_source_flag = sps->profile_tier_level.general_interlaced_source_flag, - .general_non_packed_constraint_flag = sps->profile_tier_level.general_non_packed_constraint_flag, - .general_frame_only_constraint_flag = sps->profile_tier_level.general_frame_only_constraint_flag, - }, - .general_profile_idc = ff_vk_h265_profile_to_vk(sps->profile_tier_level.general_profile_idc), - .general_level_idc = ff_vk_h265_level_to_vk(sps->profile_tier_level.general_level_idc), - }; - - for (int i = 0; i < STD_VIDEO_H265_MAX_SHORT_TERM_REF_PIC_SETS; i++) { - const H265RawSTRefPicSet *st_rps = &sps->st_ref_pic_set[i]; - - str[i] = (StdVideoH265ShortTermRefPicSet) { - .flags = (StdVideoH265ShortTermRefPicSetFlags) { - .inter_ref_pic_set_prediction_flag = st_rps->inter_ref_pic_set_prediction_flag, - .delta_rps_sign = st_rps->delta_rps_sign, - }, - .delta_idx_minus1 = st_rps->delta_idx_minus1, - .use_delta_flag = 0x0, - .abs_delta_rps_minus1 = st_rps->abs_delta_rps_minus1, - .used_by_curr_pic_flag = 0x0, - .used_by_curr_pic_s0_flag = 0x0, - .used_by_curr_pic_s1_flag = 0x0, - /* Reserved */ - /* Reserved */ - /* Reserved */ - .num_negative_pics = st_rps->num_negative_pics, - .num_positive_pics = st_rps->num_positive_pics, - }; - - for (int j = 0; j < HEVC_MAX_REFS; j++) { - str[i].use_delta_flag |= st_rps->use_delta_flag[j] << i; - str[i].used_by_curr_pic_flag |= st_rps->used_by_curr_pic_flag[j] << i; - str[i].used_by_curr_pic_s0_flag |= st_rps->used_by_curr_pic_s0_flag[j] << i; - str[i].used_by_curr_pic_s1_flag |= st_rps->used_by_curr_pic_s1_flag[j] << i; - str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc_s0_minus1[j]; - str[i].delta_poc_s1_minus1[j] = st_rps->delta_poc_s1_minus1[j]; - } - } - - ltr->used_by_curr_pic_lt_sps_flag = 0; - for (int i = 0; i < STD_VIDEO_H265_MAX_LONG_TERM_REF_PICS_SPS; i++) { - ltr->used_by_curr_pic_lt_sps_flag |= sps->lt_ref_pic_poc_lsb_sps[i] << i; - ltr->lt_ref_pic_poc_lsb_sps[i] = sps->lt_ref_pic_poc_lsb_sps[i]; - } - - *vksps = (StdVideoH265SequenceParameterSet) { - .flags = (StdVideoH265SpsFlags) { - .sps_temporal_id_nesting_flag = sps->sps_temporal_id_nesting_flag, - .separate_colour_plane_flag = sps->separate_colour_plane_flag, - .conformance_window_flag = sps->conformance_window_flag, - .sps_sub_layer_ordering_info_present_flag = sps->sps_sub_layer_ordering_info_present_flag, - .scaling_list_enabled_flag = sps->scaling_list_enabled_flag, - .sps_scaling_list_data_present_flag = sps->sps_scaling_list_data_present_flag, - .amp_enabled_flag = sps->amp_enabled_flag, - .sample_adaptive_offset_enabled_flag = sps->sample_adaptive_offset_enabled_flag, - .pcm_enabled_flag = sps->pcm_enabled_flag, - .pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled_flag, - .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag, - .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag, - .strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled_flag, - .vui_parameters_present_flag = sps->vui_parameters_present_flag, - .sps_extension_present_flag = sps->sps_extension_present_flag, - .sps_range_extension_flag = sps->sps_range_extension_flag, - .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag, - .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag, - .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag, - .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag, - .extended_precision_processing_flag = sps->extended_precision_processing_flag, - .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag, - .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag, - .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag, - .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag, - .sps_scc_extension_flag = sps->sps_scc_extension_flag, - .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag, - .palette_mode_enabled_flag = sps->palette_mode_enabled_flag, - .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializer_present_flag, - .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disable_flag, - }, - .chroma_format_idc = sps->chroma_format_idc, - .pic_width_in_luma_samples = sps->pic_width_in_luma_samples, - .pic_height_in_luma_samples = sps->pic_height_in_luma_samples, - .sps_video_parameter_set_id = sps->sps_video_parameter_set_id, - .sps_max_sub_layers_minus1 = sps->sps_max_sub_layers_minus1, - .sps_seq_parameter_set_id = sps->sps_seq_parameter_set_id, - .bit_depth_luma_minus8 = sps->bit_depth_luma_minus8, - .bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8, - .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4, - .log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3, - .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size, - .log2_min_luma_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2, - .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size, - .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, - .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, - .num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets, - .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, - .pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1, - .pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1, - .log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3, - .log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size, - /* Reserved */ - /* Reserved */ - .palette_max_size = sps->palette_max_size, - .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size, - .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc, - .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializer_minus1, - .conf_win_left_offset = sps->conf_win_left_offset, - .conf_win_right_offset = sps->conf_win_right_offset, - .conf_win_top_offset = sps->conf_win_top_offset, - .conf_win_bottom_offset = sps->conf_win_bottom_offset, - .pProfileTierLevel = ptl_sps, - .pDecPicBufMgr = dpbm_sps, - .pScalingLists = NULL, - .pShortTermRefPicSet = str, - .pLongTermRefPicsSps = ltr, - .pSequenceParameterSetVui = vui_sps, - .pPredictorPaletteEntries = NULL, - }; - - /* PPS */ - *vkpps = (StdVideoH265PictureParameterSet) { - .flags = (StdVideoH265PpsFlags) { - .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, - .output_flag_present_flag = pps->output_flag_present_flag, - .sign_data_hiding_enabled_flag = pps->sign_data_hiding_enabled_flag, - .cabac_init_present_flag = pps->cabac_init_present_flag, - .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, - .transform_skip_enabled_flag = pps->transform_skip_enabled_flag, - .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag, - .pps_slice_chroma_qp_offsets_present_flag = pps->pps_slice_chroma_qp_offsets_present_flag, - .weighted_pred_flag = pps->weighted_pred_flag, - .weighted_bipred_flag = pps->weighted_bipred_flag, - .transquant_bypass_enabled_flag = pps->transquant_bypass_enabled_flag, - .tiles_enabled_flag = pps->tiles_enabled_flag, - .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag, - .uniform_spacing_flag = pps->uniform_spacing_flag, - .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag, - .pps_loop_filter_across_slices_enabled_flag = pps->pps_loop_filter_across_slices_enabled_flag, - .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, - .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag, - .pps_deblocking_filter_disabled_flag = pps->pps_deblocking_filter_disabled_flag, - .pps_scaling_list_data_present_flag = pps->pps_scaling_list_data_present_flag, - .lists_modification_present_flag = pps->lists_modification_present_flag, - .slice_segment_header_extension_present_flag = pps->slice_segment_header_extension_present_flag, - .pps_extension_present_flag = pps->pps_extension_present_flag, - .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag, - .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag, - .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag, - .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag, - .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag, - .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializer_present_flag, - .monochrome_palette_flag = pps->monochrome_palette_flag, - .pps_range_extension_flag = pps->pps_range_extension_flag, - }, - .pps_pic_parameter_set_id = pps->pps_pic_parameter_set_id, - .pps_seq_parameter_set_id = pps->pps_seq_parameter_set_id, - .sps_video_parameter_set_id = sps->sps_video_parameter_set_id, - .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, - .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1, - .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1, - .init_qp_minus26 = pps->init_qp_minus26, - .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, - .pps_cb_qp_offset = pps->pps_cb_qp_offset, - .pps_cr_qp_offset = pps->pps_cr_qp_offset, - .pps_beta_offset_div2 = pps->pps_beta_offset_div2, - .pps_tc_offset_div2 = pps->pps_tc_offset_div2, - .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2, - .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size_minus2, - .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth, - .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1, - .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma, - .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma, - .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset_plus5, - .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset_plus5, - .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset_plus3, - .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializer, - .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry_minus8, - .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry_minus8, - .num_tile_columns_minus1 = pps->num_tile_columns_minus1, - .num_tile_rows_minus1 = pps->num_tile_rows_minus1, - .pScalingLists = NULL, - .pPredictorPaletteEntries = NULL, - }; - - for (int i = 0; i < pps->num_tile_columns_minus1; i++) - vkpps->column_width_minus1[i] = pps->column_width_minus1[i]; - - for (int i = 0; i < pps->num_tile_rows_minus1; i++) - vkpps->row_height_minus1[i] = pps->row_height_minus1[i]; - - for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) { - vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i]; - vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i]; - } - - /* VPS */ - for (int i = 0; i < STD_VIDEO_H265_SUBLAYERS_LIST_SIZE; i++) { - dpbm_vps->max_latency_increase_plus1[i] = vps->vps_max_latency_increase_plus1[i]; - dpbm_vps->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering_minus1[i]; - dpbm_vps->max_num_reorder_pics[i] = vps->vps_max_num_reorder_pics[i]; - } - - *ptl_vps = (StdVideoH265ProfileTierLevel) { - .flags = (StdVideoH265ProfileTierLevelFlags) { - .general_tier_flag = vps->profile_tier_level.general_tier_flag, - .general_progressive_source_flag = vps->profile_tier_level.general_progressive_source_flag, - .general_interlaced_source_flag = vps->profile_tier_level.general_interlaced_source_flag, - .general_non_packed_constraint_flag = vps->profile_tier_level.general_non_packed_constraint_flag, - .general_frame_only_constraint_flag = vps->profile_tier_level.general_frame_only_constraint_flag, - }, - .general_profile_idc = ff_vk_h265_profile_to_vk(vps->profile_tier_level.general_profile_idc), - .general_level_idc = ff_vk_h265_level_to_vk(vps->profile_tier_level.general_level_idc), - }; - - *vkvps = (StdVideoH265VideoParameterSet) { - .flags = (StdVideoH265VpsFlags) { - .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag, - .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag, - .vps_timing_info_present_flag = vps->vps_timing_info_present_flag, - .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag, - }, - .vps_video_parameter_set_id = vps->vps_video_parameter_set_id, - .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers_minus1, - /* Reserved */ - /* Reserved */ - .vps_num_units_in_tick = vps->vps_num_units_in_tick, - .vps_time_scale = vps->vps_time_scale, - .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one_minus1, - /* Reserved */ - .pDecPicBufMgr = dpbm_vps, - .pHrdParameters = vui_header_vps, - .pProfileTierLevel = ptl_vps, - }; - - return 0; -} - -static int create_session_params(AVCodecContext *avctx) -{ - int err; - VulkanEncodeH265Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - VulkanH265Units vk_units = { 0 }; - - VkVideoEncodeH265SessionParametersAddInfoKHR h265_params_info; - VkVideoEncodeH265SessionParametersCreateInfoKHR h265_params; - - /* Convert it to Vulkan */ - err = base_unit_to_vk(avctx, &vk_units); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to convert SPS/PPS units to Vulkan: %s\n", - av_err2str(err)); - return err; - } - - /* Destroy the session params */ - if (ctx->session_params) - vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, - ctx->session_params, - s->hwctx->alloc); - - h265_params_info = (VkVideoEncodeH265SessionParametersAddInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR, - .pStdSPSs = &vk_units.sps, - .stdSPSCount = 1, - .pStdPPSs = &vk_units.pps, - .stdPPSCount = 1, - .pStdVPSs = &vk_units.vps, - .stdVPSCount = 1, - }; - h265_params = (VkVideoEncodeH265SessionParametersCreateInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, - .maxStdSPSCount = 1, - .maxStdPPSCount = 1, - .maxStdVPSCount = 1, - .pParametersAddInfo = &h265_params_info, - }; - - return ff_vulkan_encode_create_session_params(avctx, ctx, &h265_params); -} - -static int parse_feedback_units(AVCodecContext *avctx, - const uint8_t *data, size_t size, - int sps_override, int pps_override) -{ - int err; - VulkanEncodeH265Context *enc = avctx->priv_data; - - CodedBitstreamContext *cbs; - CodedBitstreamFragment au = { 0 }; - - err = ff_cbs_init(&cbs, AV_CODEC_ID_HEVC, avctx); - if (err < 0) - return err; - - err = ff_cbs_read(cbs, &au, NULL, data, size); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to parse feedback units, bad drivers: %s\n", - av_err2str(err)); - goto fail; - } - - if (sps_override) { - for (int i = 0; i < au.nb_units; i++) { - if (au.units[i].type == HEVC_NAL_SPS) { - H265RawSPS *sps = au.units[i].content; - enc->units.raw_sps.pic_width_in_luma_samples = sps->pic_width_in_luma_samples; - enc->units.raw_sps.pic_height_in_luma_samples = sps->pic_height_in_luma_samples; - enc->units.raw_sps.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size; - enc->units.raw_sps.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; - enc->units.raw_sps.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; - } - } - } - - /* If PPS has an override, just copy it entirely. */ - if (pps_override) { - for (int i = 0; i < au.nb_units; i++) { - if (au.units[i].type == HEVC_NAL_PPS) { - H265RawPPS *pps = au.units[i].content; - memcpy(&enc->units.raw_pps, pps, sizeof(*pps)); - enc->fixed_qp_idr = pps->init_qp_minus26 + 26; - break; - } - } - } - - err = 0; -fail: - ff_cbs_fragment_free(&au); - ff_cbs_close(&cbs); - - return err; -} - -static int init_base_units(AVCodecContext *avctx) -{ - int err; - VkResult ret; - VulkanEncodeH265Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFVulkanContext *s = &ctx->s; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - VkVideoEncodeH265SessionParametersGetInfoKHR h265_params_info; - VkVideoEncodeSessionParametersGetInfoKHR params_info; - VkVideoEncodeH265SessionParametersFeedbackInfoKHR h265_params_feedback; - VkVideoEncodeSessionParametersFeedbackInfoKHR params_feedback; - - void *data = NULL; - size_t data_size = 0; - - /* Generate SPS/PPS unit info */ - err = init_sequence_headers(avctx); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPS/PPS units: %s\n", - av_err2str(err)); - return err; - } - - /* Create session parameters from them */ - err = create_session_params(avctx); - if (err < 0) - return err; - - h265_params_info = (VkVideoEncodeH265SessionParametersGetInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_GET_INFO_KHR, - .writeStdSPS = 1, - .writeStdPPS = 1, - .writeStdVPS = 1, - .stdSPSId = enc->units.raw_sps.sps_seq_parameter_set_id, - .stdPPSId = enc->units.raw_pps.pps_pic_parameter_set_id, - .stdVPSId = enc->units.raw_vps.vps_video_parameter_set_id, - }; - params_info = (VkVideoEncodeSessionParametersGetInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_GET_INFO_KHR, - .pNext = &h265_params_info, - .videoSessionParameters = ctx->session_params, - }; - - h265_params_feedback = (VkVideoEncodeH265SessionParametersFeedbackInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, - }; - params_feedback = (VkVideoEncodeSessionParametersFeedbackInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, - .pNext = &h265_params_feedback, - }; - - ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, - ¶ms_feedback, - &data_size, data); - if (ret == VK_INCOMPLETE || - (ret == VK_SUCCESS) && (data_size > 0)) { - data = av_mallocz(data_size); - if (!data) - return AVERROR(ENOMEM); - } else { - av_log(avctx, AV_LOG_ERROR, "Unable to get feedback for H.265 units = %"SIZE_SPECIFIER"\n", data_size); - return err; - } - - ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, - ¶ms_feedback, - &data_size, data); - if (ret != VK_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "Error writing feedback units\n"); - return err; - } - - av_log(avctx, AV_LOG_VERBOSE, "Feedback units written, overrides: %i (SPS: %i PPS: %i VPS: %i)\n", - params_feedback.hasOverrides, - h265_params_feedback.hasStdSPSOverrides, - h265_params_feedback.hasStdPPSOverrides, - h265_params_feedback.hasStdVPSOverrides); - - params_feedback.hasOverrides = 1; - h265_params_feedback.hasStdSPSOverrides = 1; - h265_params_feedback.hasStdPPSOverrides = 1; - - /* No need to sync any overrides */ - if (!params_feedback.hasOverrides) - return 0; - - /* Parse back tne units and override */ - err = parse_feedback_units(avctx, data, data_size, - h265_params_feedback.hasStdSPSOverrides, - h265_params_feedback.hasStdPPSOverrides); - if (err < 0) - return err; - - /* Create final session parameters */ - err = create_session_params(avctx); - if (err < 0) - return err; - - return 0; -} - -static int vulkan_encode_h265_add_nal(AVCodecContext *avctx, - CodedBitstreamFragment *au, - void *nal_unit) -{ - H265RawNALUnitHeader *header = nal_unit; - - int err = ff_cbs_insert_unit_content(au, -1, - header->nal_unit_type, nal_unit, NULL); - if (err < 0) - av_log(avctx, AV_LOG_ERROR, "Failed to add NAL unit: " - "type = %d.\n", header->nal_unit_type); - - return err; -} - -static int write_access_unit(AVCodecContext *avctx, - uint8_t *data, size_t *data_len, - CodedBitstreamFragment *au) -{ - VulkanEncodeH265Context *enc = avctx->priv_data; - - int err = ff_cbs_write_fragment_data(enc->cbs, au); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n"); - return err; - } - - if (*data_len < au->data_size) { - av_log(avctx, AV_LOG_ERROR, "Access unit too large: %zu < %zu.\n", - *data_len, au->data_size); - return AVERROR(ENOSPC); - } - - memcpy(data, au->data, au->data_size); - *data_len = au->data_size; - - return 0; -} - -static int write_sequence_headers(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeH265Context *enc = avctx->priv_data; - VulkanEncodeH265Picture *hp = base_pic ? base_pic->codec_priv : NULL; - CodedBitstreamFragment *au = &enc->current_access_unit; - - if (hp && hp->units_needed & UNIT_AUD) { - err = vulkan_encode_h265_add_nal(avctx, au, &enc->raw_aud); - if (err < 0) - goto fail; - hp->units_needed &= ~UNIT_AUD; - } - - err = vulkan_encode_h265_add_nal(avctx, au, &enc->units.raw_vps); - if (err < 0) - goto fail; - - err = vulkan_encode_h265_add_nal(avctx, au, &enc->units.raw_sps); - if (err < 0) - goto fail; - - err = vulkan_encode_h265_add_nal(avctx, au, &enc->units.raw_pps); - if (err < 0) - goto fail; - - err = write_access_unit(avctx, data, data_len, au); -fail: - ff_cbs_fragment_reset(au); - return err; -} - -static int write_extra_headers(AVCodecContext *avctx, - FFHWBaseEncodePicture *base_pic, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeH265Context *enc = avctx->priv_data; - VulkanEncodeH265Picture *hp = base_pic->codec_priv; - CodedBitstreamFragment *au = &enc->current_access_unit; - - if (hp->units_needed & UNIT_AUD) { - err = vulkan_encode_h265_add_nal(avctx, au, &enc->raw_aud); - if (err < 0) - goto fail; - } - - if (hp->units_needed & UNIT_SEI_MASTERING_DISPLAY) { - err = ff_cbs_sei_add_message(enc->cbs, au, 1, - SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME, - &enc->sei_mastering_display, NULL); - if (err < 0) - goto fail; - } - - if (hp->units_needed & UNIT_SEI_CONTENT_LIGHT_LEVEL) { - err = ff_cbs_sei_add_message(enc->cbs, au, 1, - SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO, - &enc->sei_content_light_level, NULL); - if (err < 0) - goto fail; - } - if (hp->units_needed & UNIT_SEI_A53_CC) { - err = ff_cbs_sei_add_message(enc->cbs, au, 1, - SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35, - &enc->sei_a53cc, NULL); - if (err < 0) - goto fail; - } - - if (hp->units_needed) { - err = write_access_unit(avctx, data, data_len, au); - if (err < 0) - goto fail; - } else { - err = 0; - *data_len = 0; - } - -fail: - ff_cbs_fragment_reset(au); - return err; -} - -static int write_filler(AVCodecContext *avctx, uint32_t filler, - uint8_t *data, size_t *data_len) -{ - int err; - VulkanEncodeH265Context *enc = avctx->priv_data; - CodedBitstreamFragment *au = &enc->current_access_unit; - - H265RawFiller raw_filler = { - .nal_unit_header = - { - .nal_unit_type = HEVC_NAL_FD_NUT, - .nuh_temporal_id_plus1 = 1, - }, - .filler_size = filler, - }; - - err = vulkan_encode_h265_add_nal(avctx, au, &raw_filler); - if (err < 0) - goto fail; - - err = write_access_unit(avctx, data, data_len, au); -fail: - ff_cbs_fragment_reset(au); - return err; -} - -static const FFVulkanCodec enc_cb = { - .flags = FF_HW_FLAG_B_PICTURES | - FF_HW_FLAG_B_PICTURE_REFERENCES | - FF_HW_FLAG_NON_IDR_KEY_PICTURES, - .picture_priv_data_size = sizeof(VulkanEncodeH265Picture), - .filler_header_size = 7, - .init_profile = init_profile, - .init_pic_rc = init_pic_rc, - .init_pic_params = init_pic_params, - .write_sequence_headers = write_sequence_headers, - .write_extra_headers = write_extra_headers, - .write_filler = write_filler, -}; - -static av_cold int vulkan_encode_h265_init(AVCodecContext *avctx) -{ - int err, ref_l0, ref_l1; - VulkanEncodeH265Context *enc = avctx->priv_data; - FFVulkanEncodeContext *ctx = &enc->common; - FFHWBaseEncodeContext *base_ctx = &ctx->base; - int flags; - - if (avctx->profile == AV_PROFILE_UNKNOWN) - avctx->profile = enc->common.opts.profile; - - enc->caps = (VkVideoEncodeH265CapabilitiesKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_CAPABILITIES_KHR, - }; - - enc->quality_props = (VkVideoEncodeH265QualityLevelPropertiesKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_QUALITY_LEVEL_PROPERTIES_KHR, - }; - - err = ff_vulkan_encode_init(avctx, &enc->common, - &ff_vk_enc_h265_desc, &enc_cb, - &enc->caps, &enc->quality_props); - if (err < 0) - return err; - - av_log(avctx, AV_LOG_VERBOSE, "H265 encoder capabilities:\n"); - av_log(avctx, AV_LOG_VERBOSE, " Standard capability flags:\n"); - av_log(avctx, AV_LOG_VERBOSE, " separate_color_plane: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SEPARATE_COLOR_PLANE_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " sample_adaptive_offset: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " scaling_lists: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SCALING_LIST_DATA_PRESENT_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " pcm: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_PCM_ENABLED_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " temporal_mvp: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SPS_TEMPORAL_MVP_ENABLED_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " init_qp: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_INIT_QP_MINUS26_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " weighted:%s%s\n", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_WEIGHTED_PRED_FLAG_SET_BIT_KHR ? - " pred" : "", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_WEIGHTED_BIPRED_FLAG_SET_BIT_KHR ? - " bipred" : ""); - av_log(avctx, AV_LOG_VERBOSE, " parallel_merge_level: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_LOG2_PARALLEL_MERGE_LEVEL_MINUS2_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " sign_data_hiding: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SIGN_DATA_HIDING_ENABLED_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " transform_skip:%s%s\n", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_SET_BIT_KHR ? - " set" : "", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_UNSET_BIT_KHR ? - " unset" : ""); - av_log(avctx, AV_LOG_VERBOSE, " slice_chroma_qp_offsets: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " transquant_bypass: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_TRANSQUANT_BYPASS_ENABLED_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " constrained_intra_pred: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " entrypy_coding_sync: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_ENTROPY_CODING_SYNC_ENABLED_FLAG_SET_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " dependent_slice_segment:%s%s\n", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG_SET_BIT_KHR ? - " enabled" : "", - enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_DEPENDENT_SLICE_SEGMENT_FLAG_SET_BIT_KHR ? - " set" : ""); - av_log(avctx, AV_LOG_VERBOSE, " slice_qp_delta: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SLICE_QP_DELTA_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " different_slice_qp_delta: %i\n", - !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_DIFFERENT_SLICE_QP_DELTA_BIT_KHR)); - - av_log(avctx, AV_LOG_VERBOSE, " Capability flags:\n"); - av_log(avctx, AV_LOG_VERBOSE, " hdr_compliance: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " pred_weight_table_generated: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PREDICTION_WEIGHT_TABLE_GENERATED_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " row_unaligned_slice: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_ROW_UNALIGNED_SLICE_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " different_slice_type: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l0_list: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L0_LIST_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l1_list: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L1_LIST_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " per_pict_type_min_max_qp: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " per_slice_constant_qp: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PER_SLICE_CONSTANT_QP_BIT_KHR)); - av_log(avctx, AV_LOG_VERBOSE, " generate_prefix_nalu: %i\n", - !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_GENERATE_PREFIX_NALU_BIT_KHR)); - - av_log(avctx, AV_LOG_VERBOSE, " Capabilities:\n"); - av_log(avctx, AV_LOG_VERBOSE, " maxLevelIdc: %i\n", - enc->caps.maxLevelIdc); - av_log(avctx, AV_LOG_VERBOSE, " maxSliceCount: %i\n", - enc->caps.maxSliceSegmentCount); - av_log(avctx, AV_LOG_VERBOSE, " maxTiles: %ix%i\n", - enc->caps.maxTiles.width, enc->caps.maxTiles.height); - av_log(avctx, AV_LOG_VERBOSE, " cbtSizes: 0x%x\n", - enc->caps.ctbSizes); - av_log(avctx, AV_LOG_VERBOSE, " transformBlockSizes: 0x%x\n", - enc->caps.transformBlockSizes); - av_log(avctx, AV_LOG_VERBOSE, " max(P/B)PictureL0ReferenceCount: %i P's; %i B's\n", - enc->caps.maxPPictureL0ReferenceCount, - enc->caps.maxBPictureL0ReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " maxL1ReferenceCount: %i\n", - enc->caps.maxL1ReferenceCount); - av_log(avctx, AV_LOG_VERBOSE, " maxSubLayerCount: %i\n", - enc->caps.maxSubLayerCount); - av_log(avctx, AV_LOG_VERBOSE, " expectDyadicTemporalLayerPattern: %i\n", - enc->caps.expectDyadicTemporalSubLayerPattern); - av_log(avctx, AV_LOG_VERBOSE, " min/max Qp: [%i, %i]\n", - enc->caps.minQp, enc->caps.maxQp); - av_log(avctx, AV_LOG_VERBOSE, " prefersGopRemainingFrames: %i\n", - enc->caps.prefersGopRemainingFrames); - av_log(avctx, AV_LOG_VERBOSE, " requiresGopRemainingFrames: %i\n", - enc->caps.requiresGopRemainingFrames); - - err = init_enc_options(avctx); - if (err < 0) - return err; - - flags = ctx->codec->flags; - if (!enc->caps.maxPPictureL0ReferenceCount && - !enc->caps.maxBPictureL0ReferenceCount && - !enc->caps.maxL1ReferenceCount) { - /* Intra-only */ - flags |= FF_HW_FLAG_INTRA_ONLY; - ref_l0 = ref_l1 = 0; - } else if (!enc->caps.maxPPictureL0ReferenceCount) { - /* No P-frames? How. */ - base_ctx->p_to_gpb = 1; - ref_l0 = enc->caps.maxBPictureL0ReferenceCount; - ref_l1 = enc->caps.maxL1ReferenceCount; - } else if (!enc->caps.maxBPictureL0ReferenceCount && - !enc->caps.maxL1ReferenceCount) { - /* No B-frames */ - flags &= ~(FF_HW_FLAG_B_PICTURES | FF_HW_FLAG_B_PICTURE_REFERENCES); - ref_l0 = enc->caps.maxPPictureL0ReferenceCount; - ref_l1 = 0; - } else { - /* P and B frames */ - ref_l0 = FFMIN(enc->caps.maxPPictureL0ReferenceCount, - enc->caps.maxBPictureL0ReferenceCount); - ref_l1 = enc->caps.maxL1ReferenceCount; - } - - err = ff_hw_base_init_gop_structure(base_ctx, avctx, ref_l0, ref_l1, - flags, 0); - if (err < 0) - return err; - - base_ctx->output_delay = base_ctx->b_per_p; - base_ctx->decode_delay = base_ctx->max_b_depth; - - /* Init CBS */ - err = ff_cbs_init(&enc->cbs, AV_CODEC_ID_HEVC, avctx); - if (err < 0) - return err; - - /* Create units and session parameters */ - err = init_base_units(avctx); - if (err < 0) - return err; - - /* Write out extradata */ - err = ff_vulkan_write_global_header(avctx, &enc->common); - if (err < 0) - return err; - - return 0; -} - -static av_cold int vulkan_encode_h265_close(AVCodecContext *avctx) -{ - VulkanEncodeH265Context *enc = avctx->priv_data; - ff_vulkan_encode_uninit(&enc->common); - return 0; -} - -#define OFFSET(x) offsetof(VulkanEncodeH265Context, x) -#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) -static const AVOption vulkan_encode_h265_options[] = { - HW_BASE_ENCODE_COMMON_OPTIONS, - VULKAN_ENCODE_COMMON_OPTIONS, - - { "profile", "Set profile (profile_idc and constraint_set*_flag)", - OFFSET(common.opts.profile), AV_OPT_TYPE_INT, - { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, 0xffff, FLAGS, .unit = "profile" }, - -#define PROFILE(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ - { .i64 = value }, 0, 0, FLAGS, .unit = "profile" - { PROFILE("main", AV_PROFILE_HEVC_MAIN) }, - { PROFILE("main10", AV_PROFILE_HEVC_MAIN_10) }, - { PROFILE("rext", AV_PROFILE_HEVC_REXT) }, -#undef PROFILE - - { "tier", "Set tier (general_tier_flag)", OFFSET(common.opts.tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, .unit = "tier" }, - { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, .unit = "tier" }, - { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "tier" }, - - { "level", "Set level (general_level_idc)", - OFFSET(common.opts.level), AV_OPT_TYPE_INT, - { .i64 = AV_LEVEL_UNKNOWN }, AV_LEVEL_UNKNOWN, 0xff, FLAGS, .unit = "level" }, - -#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ - { .i64 = value }, 0, 0, FLAGS, .unit = "level" - { LEVEL("1", 30) }, - { LEVEL("2", 60) }, - { LEVEL("2.1", 63) }, - { LEVEL("3", 90) }, - { LEVEL("3.1", 93) }, - { LEVEL("4", 120) }, - { LEVEL("4.1", 123) }, - { LEVEL("5", 150) }, - { LEVEL("5.1", 153) }, - { LEVEL("5.2", 156) }, - { LEVEL("6", 180) }, - { LEVEL("6.1", 183) }, - { LEVEL("6.2", 186) }, -#undef LEVEL - - { "units", "Set units to include", OFFSET(unit_elems), AV_OPT_TYPE_FLAGS, { .i64 = UNIT_SEI_MASTERING_DISPLAY | UNIT_SEI_CONTENT_LIGHT_LEVEL | UNIT_SEI_A53_CC }, 0, INT_MAX, FLAGS, "units" }, - { "hdr", "Include HDR metadata for mastering display colour volume and content light level information", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_MASTERING_DISPLAY | UNIT_SEI_CONTENT_LIGHT_LEVEL }, INT_MIN, INT_MAX, FLAGS, "units" }, - { "a53_cc", "Include A/53 caption data", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_A53_CC }, INT_MIN, INT_MAX, FLAGS, "units" }, - - { NULL }, -}; - -static const FFCodecDefault vulkan_encode_h265_defaults[] = { - { "b", "0" }, - { "bf", "2" }, - { "g", "300" }, - { "i_qfactor", "1" }, - { "i_qoffset", "0" }, - { "b_qfactor", "6/5" }, - { "b_qoffset", "0" }, - { "qmin", "-1" }, - { "qmax", "-1" }, - { "refs", "0" }, - { NULL }, -}; - -static const AVClass vulkan_encode_h265_class = { - .class_name = "hevc_vulkan", - .item_name = av_default_item_name, - .option = vulkan_encode_h265_options, - .version = LIBAVUTIL_VERSION_INT, -}; - -const FFCodec ff_hevc_vulkan_encoder = { - .p.name = "hevc_vulkan", - CODEC_LONG_NAME("H.265/HEVC (Vulkan)"), - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_HEVC, - .priv_data_size = sizeof(VulkanEncodeH265Context), - .init = &vulkan_encode_h265_init, - FF_CODEC_RECEIVE_PACKET_CB(&ff_vulkan_encode_receive_packet), - .close = &vulkan_encode_h265_close, - .p.priv_class = &vulkan_encode_h265_class, - .p.capabilities = AV_CODEC_CAP_DELAY | - AV_CODEC_CAP_HARDWARE | - AV_CODEC_CAP_DR1 | - AV_CODEC_CAP_ENCODER_FLUSH | - AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, - .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, - .defaults = vulkan_encode_h265_defaults, - CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), - .hw_configs = ff_vulkan_encode_hw_configs, - .p.wrapper_name = "vulkan", -}; -- 2.49.1 From 11b0e7cd6abc536087b2e4c63bddce5949beeb38 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:35:51 +0000 Subject: [PATCH 078/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_encode_h265.c | 1795 ++++++++++++++++++++++++ 1 file changed, 1795 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_encode_h265.c diff --git a/libavcodec/vulkan/vulkan_encode_h265.c b/libavcodec/vulkan/vulkan_encode_h265.c new file mode 100644 index 0000000000..d1b6af25fb --- /dev/null +++ b/libavcodec/vulkan/vulkan_encode_h265.c @@ -0,0 +1,1795 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/internal.h" +#include "libavutil/opt.h" +#include "libavutil/mem.h" + +#include "libavcodec/cbs.h" +#include "libavcodec/cbs_h265.h" +#include "libavcodec/atsc_a53.h" +#include "libavutil/mastering_display_metadata.h" + +#include "libavcodec/codec_internal.h" +#include "libavcodec/version.h" +#include "libavcodec/hw_base_encode_h265.h" + +#include "vulkan_encode.h" + +enum UnitElems { + UNIT_AUD = 1 << 0, + UNIT_SEI_MASTERING_DISPLAY = 1 << 1, + UNIT_SEI_CONTENT_LIGHT_LEVEL = 1 << 2, + UNIT_SEI_A53_CC = 1 << 3, +}; + +const FFVulkanEncodeDescriptor ff_vk_enc_h265_desc = { + .codec_id = AV_CODEC_ID_H265, + .encode_extension = FF_VK_EXT_VIDEO_ENCODE_H265, + .encode_op = VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR, + .ext_props = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION, + }, +}; + +typedef struct VulkanEncodeH265Picture { + int frame_num; + int64_t last_idr_frame; + uint16_t idr_pic_id; + int primary_pic_type; + int slice_type; + int pic_order_cnt; + int pic_type; + + enum UnitElems units_needed; + + VkVideoEncodeH265RateControlInfoKHR vkrc_info; + VkVideoEncodeH265RateControlLayerInfoKHR vkrc_layer_info; + + StdVideoEncodeH265PictureInfo h265pic_info; + VkVideoEncodeH265PictureInfoKHR vkh265pic_info; + + StdVideoEncodeH265WeightTable slice_wt; + StdVideoEncodeH265SliceSegmentHeader slice_hdr; + VkVideoEncodeH265NaluSliceSegmentInfoKHR vkslice; + + StdVideoEncodeH265ReferenceInfo h265dpb_info; + VkVideoEncodeH265DpbSlotInfoKHR vkh265dpb_info; + + StdVideoEncodeH265ReferenceListsInfo ref_list_info; + StdVideoEncodeH265LongTermRefPics l_rps; + StdVideoH265ShortTermRefPicSet s_rps; +} VulkanEncodeH265Picture; + +typedef struct VulkanEncodeH265Context { + FFVulkanEncodeContext common; + + FFHWBaseEncodeH265 units; + FFHWBaseEncodeH265Opts unit_opts; + + enum UnitElems unit_elems; + + uint8_t fixed_qp_idr; + uint8_t fixed_qp_p; + uint8_t fixed_qp_b; + + uint64_t hrd_buffer_size; + uint64_t initial_buffer_fullness; + + VkVideoEncodeH265ProfileInfoKHR profile; + + VkVideoEncodeH265CapabilitiesKHR caps; + VkVideoEncodeH265QualityLevelPropertiesKHR quality_props; + + CodedBitstreamContext *cbs; + CodedBitstreamFragment current_access_unit; + + H265RawAUD raw_aud; + + SEIRawMasteringDisplayColourVolume sei_mastering_display; + SEIRawContentLightLevelInfo sei_content_light_level; + SEIRawUserDataRegistered sei_a53cc; + void *sei_a53cc_data; +} VulkanEncodeH265Context; + +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeRateControlInfoKHR *rc_info, + VkVideoEncodeRateControlLayerInfoKHR *rc_layer) +{ + VulkanEncodeH265Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + VulkanEncodeH265Picture *hp = pic->codec_priv; + + hp->vkrc_info = (VkVideoEncodeH265RateControlInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_INFO_KHR, + .flags = VK_VIDEO_ENCODE_H265_RATE_CONTROL_REFERENCE_PATTERN_FLAT_BIT_KHR | + VK_VIDEO_ENCODE_H265_RATE_CONTROL_REGULAR_GOP_BIT_KHR, + .idrPeriod = ctx->base.gop_size, + .gopFrameCount = ctx->base.gop_size, + .consecutiveBFrameCount = FFMAX(ctx->base.b_per_p - 1, 0), + .subLayerCount = 0, + }; + rc_info->pNext = &hp->vkrc_info; + + if (rc_info->rateControlMode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { + rc_info->virtualBufferSizeInMs = (enc->hrd_buffer_size * 1000LL) / avctx->bit_rate; + rc_info->initialVirtualBufferSizeInMs = (enc->initial_buffer_fullness * 1000LL) / avctx->bit_rate; + + hp->vkrc_layer_info = (VkVideoEncodeH265RateControlLayerInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_LAYER_INFO_KHR, + + .useMinQp = avctx->qmin > 0, + .minQp.qpI = avctx->qmin > 0 ? avctx->qmin : 0, + .minQp.qpP = avctx->qmin > 0 ? avctx->qmin : 0, + .minQp.qpB = avctx->qmin > 0 ? avctx->qmin : 0, + + .useMaxQp = avctx->qmax > 0, + .maxQp.qpI = avctx->qmax > 0 ? avctx->qmax : 0, + .maxQp.qpP = avctx->qmax > 0 ? avctx->qmax : 0, + .maxQp.qpB = avctx->qmax > 0 ? avctx->qmax : 0, + + .useMaxFrameSize = 0, + }; + rc_layer->pNext = &hp->vkrc_layer_info; + hp->vkrc_info.subLayerCount = 1; + } + + return 0; +} + +static int vk_enc_h265_update_pic_info(AVCodecContext *avctx, + FFHWBaseEncodePicture *pic) +{ + VulkanEncodeH265Context *enc = avctx->priv_data; + VulkanEncodeH265Picture *hp = pic->codec_priv; + FFHWBaseEncodePicture *prev = pic->prev; + VulkanEncodeH265Picture *hprev = prev ? prev->codec_priv : NULL; + + if (pic->type == FF_HW_PICTURE_TYPE_IDR) { + av_assert0(pic->display_order == pic->encode_order); + + hp->last_idr_frame = pic->display_order; + + hp->slice_type = STD_VIDEO_H265_SLICE_TYPE_I; + hp->pic_type = STD_VIDEO_H265_PICTURE_TYPE_IDR; + } else { + av_assert0(prev); + hp->last_idr_frame = hprev->last_idr_frame; + + if (pic->type == FF_HW_PICTURE_TYPE_I) { + hp->slice_type = STD_VIDEO_H265_SLICE_TYPE_I; + hp->pic_type = STD_VIDEO_H265_PICTURE_TYPE_I; + } else if (pic->type == FF_HW_PICTURE_TYPE_P) { + av_assert0(pic->refs[0]); + hp->slice_type = STD_VIDEO_H265_SLICE_TYPE_P; + hp->pic_type = STD_VIDEO_H265_PICTURE_TYPE_P; + } else { + FFHWBaseEncodePicture *irap_ref; + av_assert0(pic->refs[0][0] && pic->refs[1][0]); + for (irap_ref = pic; irap_ref; irap_ref = irap_ref->refs[1][0]) { + if (irap_ref->type == FF_HW_PICTURE_TYPE_I) + break; + } + hp->slice_type = STD_VIDEO_H265_SLICE_TYPE_B; + hp->pic_type = STD_VIDEO_H265_PICTURE_TYPE_B; + } + } + hp->pic_order_cnt = pic->display_order - hp->last_idr_frame; + + hp->units_needed = 0; + + if (enc->unit_elems & UNIT_AUD) { + hp->units_needed |= UNIT_AUD; + enc->raw_aud = (H265RawAUD) { + .nal_unit_header = { + .nal_unit_type = HEVC_NAL_AUD, + .nuh_layer_id = 0, + .nuh_temporal_id_plus1 = 1, + }, + .pic_type = hp->pic_type, + }; + } + + // Only look for the metadata on I/IDR frame on the output. We + // may force an IDR frame on the output where the metadata gets + // changed on the input frame. + if ((enc->unit_elems & UNIT_SEI_MASTERING_DISPLAY) && + (pic->type == FF_HW_PICTURE_TYPE_I || pic->type == FF_HW_PICTURE_TYPE_IDR)) { + AVFrameSideData *sd = + av_frame_get_side_data(pic->input_image, + AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); + + if (sd) { + AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *)sd->data; + + // SEI is needed when both the primaries and luminance are set + if (mdm->has_primaries && mdm->has_luminance) { + SEIRawMasteringDisplayColourVolume *mdcv = + &enc->sei_mastering_display; + const int mapping[3] = {1, 2, 0}; + const int chroma_den = 50000; + const int luma_den = 10000; + + for (int i = 0; i < 3; i++) { + const int j = mapping[i]; + mdcv->display_primaries_x[i] = + FFMIN(lrint(chroma_den * + av_q2d(mdm->display_primaries[j][0])), + chroma_den); + mdcv->display_primaries_y[i] = + FFMIN(lrint(chroma_den * + av_q2d(mdm->display_primaries[j][1])), + chroma_den); + } + + mdcv->white_point_x = + FFMIN(lrint(chroma_den * av_q2d(mdm->white_point[0])), + chroma_den); + mdcv->white_point_y = + FFMIN(lrint(chroma_den * av_q2d(mdm->white_point[1])), + chroma_den); + + mdcv->max_display_mastering_luminance = + lrint(luma_den * av_q2d(mdm->max_luminance)); + mdcv->min_display_mastering_luminance = + FFMIN(lrint(luma_den * av_q2d(mdm->min_luminance)), + mdcv->max_display_mastering_luminance); + + hp->units_needed |= UNIT_SEI_MASTERING_DISPLAY; + } + } + } + + if ((enc->unit_elems & UNIT_SEI_CONTENT_LIGHT_LEVEL) && + (pic->type == FF_HW_PICTURE_TYPE_I || pic->type == FF_HW_PICTURE_TYPE_IDR)) { + AVFrameSideData *sd = av_frame_get_side_data(pic->input_image, + AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); + + if (sd) { + AVContentLightMetadata *clm = (AVContentLightMetadata *)sd->data; + SEIRawContentLightLevelInfo *clli = &enc->sei_content_light_level; + + clli->max_content_light_level = FFMIN(clm->MaxCLL, 65535); + clli->max_pic_average_light_level = FFMIN(clm->MaxFALL, 65535); + + hp->units_needed |= UNIT_SEI_CONTENT_LIGHT_LEVEL; + } + } + + if (enc->unit_elems & UNIT_SEI_A53_CC) { + int err; + size_t sei_a53cc_len; + av_freep(&enc->sei_a53cc_data); + err = ff_alloc_a53_sei(pic->input_image, 0, &enc->sei_a53cc_data, &sei_a53cc_len); + if (err < 0) + return err; + if (enc->sei_a53cc_data != NULL) { + enc->sei_a53cc.itu_t_t35_country_code = 181; + enc->sei_a53cc.data = (uint8_t *)enc->sei_a53cc_data + 1; + enc->sei_a53cc.data_length = sei_a53cc_len - 1; + + hp->units_needed |= UNIT_SEI_A53_CC; + } + } + + return 0; +} + +static void setup_slices(AVCodecContext *avctx, + FFHWBaseEncodePicture *pic) +{ + VulkanEncodeH265Context *enc = avctx->priv_data; + VulkanEncodeH265Picture *hp = pic->codec_priv; + + hp->slice_wt = (StdVideoEncodeH265WeightTable) { + .flags = (StdVideoEncodeH265WeightTableFlags) { + .luma_weight_l0_flag = 0, + .chroma_weight_l0_flag = 0, + .luma_weight_l1_flag = 0, + .chroma_weight_l1_flag = 0, + }, + .luma_log2_weight_denom = 0, + .delta_chroma_log2_weight_denom = 0, + .delta_luma_weight_l0 = { 0 }, + .luma_offset_l0 = { 0 }, + .delta_chroma_weight_l0 = { { 0 } }, + .delta_chroma_offset_l0 = { { 0 } }, + .delta_luma_weight_l1 = { 0 }, + .luma_offset_l1 = { 0 }, + .delta_chroma_weight_l1 = { { 0 } }, + .delta_chroma_offset_l1 = { { 0 } }, + }; + + hp->slice_hdr = (StdVideoEncodeH265SliceSegmentHeader) { + .flags = (StdVideoEncodeH265SliceSegmentHeaderFlags) { + .first_slice_segment_in_pic_flag = 1, + .dependent_slice_segment_flag = 0, + .slice_sao_luma_flag = enc->units.raw_sps.sample_adaptive_offset_enabled_flag, + .slice_sao_chroma_flag = enc->units.raw_sps.sample_adaptive_offset_enabled_flag, + .num_ref_idx_active_override_flag = 0, + .mvd_l1_zero_flag = 0, + .cabac_init_flag = 0, + .cu_chroma_qp_offset_enabled_flag = 0, + .deblocking_filter_override_flag = 0, + .slice_deblocking_filter_disabled_flag = 0, + .collocated_from_l0_flag = 1, + .slice_loop_filter_across_slices_enabled_flag = 0, + /* Reserved */ + }, + .slice_type = hp->slice_type, + .slice_segment_address = 0, + .collocated_ref_idx = 0, + .MaxNumMergeCand = 5, + .slice_cb_qp_offset = 0, + .slice_cr_qp_offset = 0, + .slice_beta_offset_div2 = 0, + .slice_tc_offset_div2 = 0, + .slice_act_y_qp_offset = 0, + .slice_act_cb_qp_offset = 0, + .slice_act_cr_qp_offset = 0, + .slice_qp_delta = 0, /* Filled in below */ + /* Reserved */ + .pWeightTable = NULL, // &hp->slice_wt, + }; + + hp->vkslice = (VkVideoEncodeH265NaluSliceSegmentInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_NALU_SLICE_SEGMENT_INFO_KHR, + .pNext = NULL, + .constantQp = pic->type == FF_HW_PICTURE_TYPE_B ? enc->fixed_qp_b : + pic->type == FF_HW_PICTURE_TYPE_P ? enc->fixed_qp_p : + enc->fixed_qp_idr, + .pStdSliceSegmentHeader = &hp->slice_hdr, + }; + + if (enc->common.opts.rc_mode != VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) + hp->vkslice.constantQp = 0; + + hp->slice_hdr.slice_qp_delta = hp->vkslice.constantQp - + (enc->units.raw_pps.init_qp_minus26 + 26); + + hp->vkh265pic_info.pNaluSliceSegmentEntries = &hp->vkslice; + hp->vkh265pic_info.naluSliceSegmentEntryCount = 1; +} + +static void setup_refs(AVCodecContext *avctx, + FFHWBaseEncodePicture *pic, + VkVideoEncodeInfoKHR *encode_info) +{ + int i, j; + VulkanEncodeH265Context *enc = avctx->priv_data; + VulkanEncodeH265Picture *hp = pic->codec_priv; + + hp->ref_list_info = (StdVideoEncodeH265ReferenceListsInfo) { + .flags = (StdVideoEncodeH265ReferenceListsInfoFlags) { + .ref_pic_list_modification_flag_l0 = 0, + .ref_pic_list_modification_flag_l1 = 0, + /* Reserved */ + }, + /* May be overridden during setup_slices() */ + .num_ref_idx_l0_active_minus1 = pic->nb_refs[0] - 1, + .num_ref_idx_l1_active_minus1 = pic->nb_refs[1] - 1, + /* Reserved */ + .list_entry_l0 = { 0 }, + .list_entry_l1 = { 0 }, + }; + + for (i = 0; i < STD_VIDEO_H265_MAX_NUM_LIST_REF; i++) + hp->ref_list_info.RefPicList0[i] = hp->ref_list_info.RefPicList1[i] = -1; + + /* Note: really not sure */ + for (i = 0; i < pic->nb_refs[0]; i++) { + VkVideoReferenceSlotInfoKHR *slot_info; + slot_info = (VkVideoReferenceSlotInfoKHR *)&encode_info->pReferenceSlots[i]; + hp->ref_list_info.RefPicList0[i] = slot_info->slotIndex; + } + + /* Note: really not sure */ + for (i = 0; i < pic->nb_refs[1]; i++) { + VkVideoReferenceSlotInfoKHR *slot_info; + slot_info = (VkVideoReferenceSlotInfoKHR *)&encode_info->pReferenceSlots[pic->nb_refs[0] + i]; + hp->ref_list_info.RefPicList1[i] = slot_info->slotIndex; + } + + hp->h265pic_info.pRefLists = &hp->ref_list_info; + + if (pic->type != FF_HW_PICTURE_TYPE_IDR) { + StdVideoH265ShortTermRefPicSet *rps; + VulkanEncodeH265Picture *strp; + int rps_poc[MAX_DPB_SIZE]; + int rps_used[MAX_DPB_SIZE]; + int poc, rps_pics; + + hp->h265pic_info.flags.short_term_ref_pic_set_sps_flag = 0; + + rps = &hp->s_rps; + memset(rps, 0, sizeof(*rps)); + + rps_pics = 0; + for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) { + for (j = 0; j < pic->nb_refs[i]; j++) { + strp = pic->refs[i][j]->codec_priv; + rps_poc[rps_pics] = strp->pic_order_cnt; + rps_used[rps_pics] = 1; + ++rps_pics; + } + } + + for (i = 0; i < pic->nb_dpb_pics; i++) { + if (pic->dpb[i] == pic) + continue; + + for (j = 0; j < pic->nb_refs[0]; j++) { + if (pic->dpb[i] == pic->refs[0][j]) + break; + } + if (j < pic->nb_refs[0]) + continue; + + for (j = 0; j < pic->nb_refs[1]; j++) { + if (pic->dpb[i] == pic->refs[1][j]) + break; + } + if (j < pic->nb_refs[1]) + continue; + + strp = pic->dpb[i]->codec_priv; + rps_poc[rps_pics] = strp->pic_order_cnt; + rps_used[rps_pics] = 0; + ++rps_pics; + } + + for (i = 1; i < rps_pics; i++) { + for (j = i; j > 0; j--) { + if (rps_poc[j] > rps_poc[j - 1]) + break; + av_assert0(rps_poc[j] != rps_poc[j - 1]); + FFSWAP(int, rps_poc[j], rps_poc[j - 1]); + FFSWAP(int, rps_used[j], rps_used[j - 1]); + } + } + + av_log(avctx, AV_LOG_DEBUG, "RPS for POC %d:", hp->pic_order_cnt); + for (i = 0; i < rps_pics; i++) + av_log(avctx, AV_LOG_DEBUG, " (%d,%d)", rps_poc[i], rps_used[i]); + + av_log(avctx, AV_LOG_DEBUG, "\n"); + + for (i = 0; i < rps_pics; i++) { + av_assert0(rps_poc[i] != hp->pic_order_cnt); + if (rps_poc[i] > hp->pic_order_cnt) + break; + } + + rps->num_negative_pics = i; + rps->used_by_curr_pic_s0_flag = 0x0; + poc = hp->pic_order_cnt; + for (j = i - 1; j >= 0; j--) { + rps->delta_poc_s0_minus1[i - 1 - j] = poc - rps_poc[j] - 1; + rps->used_by_curr_pic_s0_flag |= rps_used[j] << (i - 1 - j); + poc = rps_poc[j]; + } + + rps->num_positive_pics = rps_pics - i; + rps->used_by_curr_pic_s1_flag = 0x0; + poc = hp->pic_order_cnt; + for (j = i; j < rps_pics; j++) { + rps->delta_poc_s1_minus1[j - i] = rps_poc[j] - poc - 1; + rps->used_by_curr_pic_s1_flag |= rps_used[j] << (j - i); + poc = rps_poc[j]; + } + + hp->l_rps.num_long_term_sps = 0; + hp->l_rps.num_long_term_pics = 0; + + // when this flag is not present, it is inerred to 1. + hp->slice_hdr.flags.collocated_from_l0_flag = 1; + hp->h265pic_info.flags.slice_temporal_mvp_enabled_flag = + enc->units.raw_sps.sps_temporal_mvp_enabled_flag; + if (hp->h265pic_info.flags.slice_temporal_mvp_enabled_flag) { + if (hp->slice_hdr.slice_type == STD_VIDEO_H265_SLICE_TYPE_B) + hp->slice_hdr.flags.collocated_from_l0_flag = 1; + hp->slice_hdr.collocated_ref_idx = 0; + } + + hp->slice_hdr.flags.num_ref_idx_active_override_flag = 0; + hp->ref_list_info.num_ref_idx_l0_active_minus1 = enc->units.raw_pps.num_ref_idx_l0_default_active_minus1; + hp->ref_list_info.num_ref_idx_l1_active_minus1 = enc->units.raw_pps.num_ref_idx_l1_default_active_minus1; + } + + hp->h265pic_info.pShortTermRefPicSet = &hp->s_rps; + hp->h265pic_info.pLongTermRefPics = &hp->l_rps; +} + +static int init_pic_params(AVCodecContext *avctx, FFHWBaseEncodePicture *pic, + VkVideoEncodeInfoKHR *encode_info) +{ + int err; + VulkanEncodeH265Context *enc = avctx->priv_data; + FFVulkanEncodePicture *vp = pic->priv; + VulkanEncodeH265Picture *hp = pic->codec_priv; + VkVideoReferenceSlotInfoKHR *ref_slot; + + err = vk_enc_h265_update_pic_info(avctx, pic); + if (err < 0) + return err; + + hp->vkh265pic_info = (VkVideoEncodeH265PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_PICTURE_INFO_KHR, + .pNext = NULL, + .pNaluSliceSegmentEntries = NULL, // Filled in during setup_slices() + .naluSliceSegmentEntryCount = 0, // Filled in during setup_slices() + .pStdPictureInfo = &hp->h265pic_info, + }; + + hp->h265pic_info = (StdVideoEncodeH265PictureInfo) { + .flags = (StdVideoEncodeH265PictureInfoFlags) { + .is_reference = pic->is_reference, + .IrapPicFlag = pic->type == FF_HW_PICTURE_TYPE_IDR, + .used_for_long_term_reference = 0, + .discardable_flag = 0, + .cross_layer_bla_flag = 0, + .pic_output_flag = 1, + .no_output_of_prior_pics_flag = 0, + .short_term_ref_pic_set_sps_flag = 0, + .slice_temporal_mvp_enabled_flag = enc->units.raw_sps.sps_temporal_mvp_enabled_flag, + /* Reserved */ + }, + .pic_type = hp->pic_type, + .sps_video_parameter_set_id = 0, + .pps_seq_parameter_set_id = 0, + .pps_pic_parameter_set_id = 0, + .short_term_ref_pic_set_idx = 0, + .PicOrderCntVal = hp->pic_order_cnt, + .TemporalId = 0, + /* Reserved */ + .pRefLists = NULL, // Filled in during setup_refs + .pShortTermRefPicSet = NULL, + .pLongTermRefPics = NULL, + }; + encode_info->pNext = &hp->vkh265pic_info; + + hp->h265dpb_info = (StdVideoEncodeH265ReferenceInfo) { + .flags = (StdVideoEncodeH265ReferenceInfoFlags) { + .used_for_long_term_reference = 0, + .unused_for_reference = 0, + /* Reserved */ + }, + .pic_type = hp->h265pic_info.pic_type, + .PicOrderCntVal = hp->h265pic_info.PicOrderCntVal, + .TemporalId = hp->h265pic_info.TemporalId, + }; + hp->vkh265dpb_info = (VkVideoEncodeH265DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = &hp->h265dpb_info, + }; + + vp->dpb_slot.pNext = &hp->vkh265dpb_info; + + ref_slot = (VkVideoReferenceSlotInfoKHR *)encode_info->pSetupReferenceSlot; + ref_slot->pNext = &hp->vkh265dpb_info; + + setup_refs(avctx, pic, encode_info); + + setup_slices(avctx, pic); + + return 0; +} + +static int init_profile(AVCodecContext *avctx, + VkVideoProfileInfoKHR *profile, void *pnext) +{ + VkResult ret; + VulkanEncodeH265Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + VkVideoEncodeH265CapabilitiesKHR h265_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_CAPABILITIES_KHR, + }; + VkVideoEncodeCapabilitiesKHR enc_caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR, + .pNext = &h265_caps, + }; + VkVideoCapabilitiesKHR caps = { + .sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR, + .pNext = &enc_caps, + }; + + /* In order of preference */ + int last_supported = AV_PROFILE_UNKNOWN; + static const int known_profiles[] = { + AV_PROFILE_HEVC_MAIN, + AV_PROFILE_HEVC_MAIN_10, + AV_PROFILE_HEVC_REXT, + }; + int nb_profiles = FF_ARRAY_ELEMS(known_profiles); + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->frames->sw_format); + if (!desc) + return AVERROR(EINVAL); + + if (s->frames->sw_format == AV_PIX_FMT_NV12) + nb_profiles = 1; + else if (s->frames->sw_format == AV_PIX_FMT_P010) + nb_profiles = 2; + + enc->profile = (VkVideoEncodeH265ProfileInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_PROFILE_INFO_KHR, + .pNext = pnext, + .stdProfileIdc = ff_vk_h265_profile_to_vk(avctx->profile), + }; + profile->pNext = &enc->profile; + + /* Set level */ + if (avctx->level == AV_LEVEL_UNKNOWN) + avctx->level = enc->common.opts.level; + + /* User has explicitly specified a profile. */ + if (avctx->profile != AV_PROFILE_UNKNOWN) + return 0; + + av_log(avctx, AV_LOG_DEBUG, "Supported profiles:\n"); + for (int i = 0; i < nb_profiles; i++) { + enc->profile.stdProfileIdc = ff_vk_h265_profile_to_vk(known_profiles[i]); + ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev, + profile, + &caps); + if (ret == VK_SUCCESS) { + av_log(avctx, AV_LOG_DEBUG, " %s\n", + avcodec_profile_name(avctx->codec_id, known_profiles[i])); + last_supported = known_profiles[i]; + } + } + + if (last_supported == AV_PROFILE_UNKNOWN) { + av_log(avctx, AV_LOG_ERROR, "No supported profiles for given format\n"); + return AVERROR(ENOTSUP); + } + + enc->profile.stdProfileIdc = ff_vk_h265_profile_to_vk(last_supported); + av_log(avctx, AV_LOG_VERBOSE, "Using profile %s\n", + avcodec_profile_name(avctx->codec_id, last_supported)); + avctx->profile = last_supported; + + return 0; +} + +static int init_enc_options(AVCodecContext *avctx) +{ + VulkanEncodeH265Context *enc = avctx->priv_data; + + if (avctx->rc_buffer_size) + enc->hrd_buffer_size = avctx->rc_buffer_size; + else if (avctx->rc_max_rate > 0) + enc->hrd_buffer_size = avctx->rc_max_rate; + else + enc->hrd_buffer_size = avctx->bit_rate; + + if (avctx->rc_initial_buffer_occupancy) { + if (avctx->rc_initial_buffer_occupancy > enc->hrd_buffer_size) { + av_log(avctx, AV_LOG_ERROR, "Invalid RC buffer settings: " + "must have initial buffer size (%d) <= " + "buffer size (%"PRId64").\n", + avctx->rc_initial_buffer_occupancy, enc->hrd_buffer_size); + return AVERROR(EINVAL); + } + enc->initial_buffer_fullness = avctx->rc_initial_buffer_occupancy; + } else { + enc->initial_buffer_fullness = enc->hrd_buffer_size * 3 / 4; + } + + if (enc->common.opts.rc_mode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) { + enc->fixed_qp_p = av_clip(enc->common.opts.qp, + enc->caps.minQp, enc->caps.maxQp); + + if (avctx->i_quant_factor > 0.0) + enc->fixed_qp_idr = av_clip((avctx->i_quant_factor * enc->fixed_qp_p + + avctx->i_quant_offset) + 0.5, + enc->caps.minQp, enc->caps.maxQp); + else + enc->fixed_qp_idr = enc->fixed_qp_p; + + if (avctx->b_quant_factor > 0.0) + enc->fixed_qp_b = av_clip((avctx->b_quant_factor * enc->fixed_qp_p + + avctx->b_quant_offset) + 0.5, + enc->caps.minQp, enc->caps.maxQp); + else + enc->fixed_qp_b = enc->fixed_qp_p; + + av_log(avctx, AV_LOG_DEBUG, "Using fixed QP = " + "%d / %d / %d for IDR- / P- / B-frames.\n", + enc->fixed_qp_idr, enc->fixed_qp_p, enc->fixed_qp_b); + } else { + enc->fixed_qp_idr = 26; + enc->fixed_qp_p = 26; + enc->fixed_qp_b = 26; + } + + return 0; +} + +static av_cold int init_sequence_headers(AVCodecContext *avctx) +{ + int err; + VulkanEncodeH265Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + + FFHWBaseEncodeH265 *units = &enc->units; + FFHWBaseEncodeH265Opts *unit_opts = &enc->unit_opts; + + int max_ctb_size; + unsigned min_tb_size; + unsigned max_tb_size; + unsigned max_transform_hierarchy; + + unit_opts->tier = enc->common.opts.tier; + unit_opts->fixed_qp_idr = enc->fixed_qp_idr; + unit_opts->cu_qp_delta_enabled_flag = enc->common.opts.rc_mode != VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR; + + unit_opts->nb_slices = 1; + + unit_opts->slice_block_rows = (avctx->height + base_ctx->slice_block_height - 1) / + base_ctx->slice_block_height; + unit_opts->slice_block_cols = (avctx->width + base_ctx->slice_block_width - 1) / + base_ctx->slice_block_width; + + /* cabac already set via an option */ + /* fixed_qp_idr initialized in init_enc_options() */ + /* hrd_buffer_size initialized in init_enc_options() */ + /* initial_buffer_fullness initialized in init_enc_options() */ + + err = ff_hw_base_encode_init_params_h265(&enc->common.base, avctx, + units, unit_opts); + if (err < 0) + return err; + + units->raw_sps.sample_adaptive_offset_enabled_flag = + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG_SET_BIT_KHR); + units->raw_pps.transform_skip_enabled_flag = + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_SET_BIT_KHR); + + max_ctb_size = 16; + + /* coding blocks from 8x8 to max CTB size. */ + if (enc->caps.ctbSizes & VK_VIDEO_ENCODE_H265_CTB_SIZE_64_BIT_KHR) + max_ctb_size = 64; + else if (enc->caps.ctbSizes & VK_VIDEO_ENCODE_H265_CTB_SIZE_32_BIT_KHR) + max_ctb_size = 32; + + min_tb_size = 0; + max_tb_size = 0; + if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_4_BIT_KHR) + min_tb_size = 4; + else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_8_BIT_KHR) + min_tb_size = 8; + else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_16_BIT_KHR) + min_tb_size = 16; + else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_32_BIT_KHR) + min_tb_size = 32; + + if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_32_BIT_KHR) + max_tb_size = 32; + else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_16_BIT_KHR) + max_tb_size = 16; + else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_8_BIT_KHR) + max_tb_size = 8; + else if (enc->caps.transformBlockSizes & VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_4_BIT_KHR) + max_tb_size = 4; + + units->raw_sps.log2_min_luma_coding_block_size_minus3 = 0; + units->raw_sps.log2_diff_max_min_luma_coding_block_size = av_log2(max_ctb_size) - 3; + units->raw_sps.log2_min_luma_transform_block_size_minus2 = av_log2(min_tb_size) - 2; + units->raw_sps.log2_diff_max_min_luma_transform_block_size = av_log2(max_tb_size) - av_log2(min_tb_size); + + max_transform_hierarchy = av_log2(max_ctb_size) - av_log2(min_tb_size); + units->raw_sps.max_transform_hierarchy_depth_intra = max_transform_hierarchy; + units->raw_sps.max_transform_hierarchy_depth_intra = max_transform_hierarchy; + + units->raw_sps.vui.bitstream_restriction_flag = 0; + units->raw_sps.vui.max_bytes_per_pic_denom = 2; + units->raw_sps.vui.max_bits_per_min_cu_denom = 1; + + units->raw_sps.sps_temporal_mvp_enabled_flag = 0; + + if (base_ctx->gop_size & base_ctx->gop_size - 1 == 0) + units->raw_sps.log2_max_pic_order_cnt_lsb_minus4 = FFMAX(av_log2(base_ctx->gop_size) - 4, 0); + else + units->raw_sps.log2_max_pic_order_cnt_lsb_minus4 = FFMAX(av_log2(base_ctx->gop_size) - 3, 0); + + return 0; +} + +typedef struct VulkanH265Units { + StdVideoH265SequenceParameterSet sps; + StdVideoH265ShortTermRefPicSet str[STD_VIDEO_H265_SUBLAYERS_LIST_SIZE]; + StdVideoH265LongTermRefPicsSps ltr; + StdVideoH265ProfileTierLevel ptl_sps; + StdVideoH265DecPicBufMgr dpbm_sps; + + StdVideoH265HrdParameters vui_header_sps; + StdVideoH265SequenceParameterSetVui vui_sps; + + StdVideoH265SubLayerHrdParameters slhdrnal[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters slhdrvcl[HEVC_MAX_SUB_LAYERS]; + + StdVideoH265PictureParameterSet pps; + StdVideoH265ScalingLists pps_scaling; + + StdVideoH265VideoParameterSet vps; + StdVideoH265ProfileTierLevel ptl_vps; + StdVideoH265DecPicBufMgr dpbm_vps; + StdVideoH265HrdParameters vui_header_vps; +} VulkanH265Units; + +static av_cold int base_unit_to_vk(AVCodecContext *avctx, + VulkanH265Units *vk_units) +{ + VulkanEncodeH265Context *enc = avctx->priv_data; + + H265RawSPS *sps = &enc->units.raw_sps; + StdVideoH265SequenceParameterSet *vksps = &vk_units->sps; + StdVideoH265ShortTermRefPicSet *str = vk_units->str; + StdVideoH265LongTermRefPicsSps *ltr = &vk_units->ltr; + StdVideoH265ProfileTierLevel *ptl_sps = &vk_units->ptl_sps; + StdVideoH265DecPicBufMgr *dpbm_sps = &vk_units->dpbm_sps; + + StdVideoH265HrdParameters *vui_header_sps = &vk_units->vui_header_sps; + StdVideoH265SequenceParameterSetVui *vui_sps = &vk_units->vui_sps; + + StdVideoH265SubLayerHrdParameters *slhdrnal = vk_units->slhdrnal; + StdVideoH265SubLayerHrdParameters *slhdrvcl = vk_units->slhdrvcl; + + H265RawPPS *pps = &enc->units.raw_pps; + StdVideoH265PictureParameterSet *vkpps = &vk_units->pps; + + H265RawVPS *vps = &enc->units.raw_vps; + StdVideoH265VideoParameterSet *vkvps = &vk_units->vps; + StdVideoH265ProfileTierLevel *ptl_vps = &vk_units->ptl_vps; + StdVideoH265DecPicBufMgr *dpbm_vps = &vk_units->dpbm_vps; + StdVideoH265HrdParameters *vui_header_vps = &vk_units->vui_header_vps; + + /* SPS */ + for (int i = 0; i < HEVC_MAX_SUB_LAYERS; i++) { + memcpy(&slhdrnal[i], &sps->vui.hrd_parameters.nal_sub_layer_hrd_parameters[i], sizeof(*slhdrnal)); + memcpy(&slhdrvcl[i], &sps->vui.hrd_parameters.vcl_sub_layer_hrd_parameters[i], sizeof(*slhdrvcl)); + slhdrnal[i].cbr_flag = 0x0; + slhdrvcl[i].cbr_flag = 0x0; + for (int j = 0; j < HEVC_MAX_CPB_CNT; j++) { + slhdrnal[i].cbr_flag |= sps->vui.hrd_parameters.nal_sub_layer_hrd_parameters[i].cbr_flag[j] << i; + slhdrvcl[i].cbr_flag |= sps->vui.hrd_parameters.vcl_sub_layer_hrd_parameters[i].cbr_flag[j] << i; + } + } + + *vui_header_sps = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = sps->vui.hrd_parameters.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->vui.hrd_parameters.vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = sps->vui.hrd_parameters.sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->vui.hrd_parameters.sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = 0x0, + .fixed_pic_rate_within_cvs_flag = 0x0, + .low_delay_hrd_flag = 0x0, + }, + .tick_divisor_minus2 = sps->vui.hrd_parameters.tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = sps->vui.hrd_parameters.du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = sps->vui.hrd_parameters.dpb_output_delay_du_length_minus1, + .bit_rate_scale = sps->vui.hrd_parameters.bit_rate_scale, + .cpb_size_scale = sps->vui.hrd_parameters.cpb_size_scale, + .cpb_size_du_scale = sps->vui.hrd_parameters.cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = sps->vui.hrd_parameters.initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = sps->vui.hrd_parameters.au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = sps->vui.hrd_parameters.dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = slhdrnal, + .pSubLayerHrdParametersVcl = slhdrvcl, + }; + + for (int i = 0; i < HEVC_MAX_SUB_LAYERS; i++) { + vui_header_sps->flags.fixed_pic_rate_general_flag |= sps->vui.hrd_parameters.fixed_pic_rate_general_flag[i] << i; + vui_header_sps->flags.fixed_pic_rate_within_cvs_flag |= sps->vui.hrd_parameters.fixed_pic_rate_within_cvs_flag[i] << i; + vui_header_sps->flags.low_delay_hrd_flag |= sps->vui.hrd_parameters.low_delay_hrd_flag[i] << i; + } + + for (int i = 0; i < STD_VIDEO_H265_SUBLAYERS_LIST_SIZE; i++) { + dpbm_sps->max_latency_increase_plus1[i] = sps->sps_max_latency_increase_plus1[i]; + dpbm_sps->max_dec_pic_buffering_minus1[i] = sps->sps_max_dec_pic_buffering_minus1[i]; + dpbm_sps->max_num_reorder_pics[i] = sps->sps_max_num_reorder_pics[i]; + } + + *ptl_sps = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = sps->profile_tier_level.general_tier_flag, + .general_progressive_source_flag = sps->profile_tier_level.general_progressive_source_flag, + .general_interlaced_source_flag = sps->profile_tier_level.general_interlaced_source_flag, + .general_non_packed_constraint_flag = sps->profile_tier_level.general_non_packed_constraint_flag, + .general_frame_only_constraint_flag = sps->profile_tier_level.general_frame_only_constraint_flag, + }, + .general_profile_idc = ff_vk_h265_profile_to_vk(sps->profile_tier_level.general_profile_idc), + .general_level_idc = ff_vk_h265_level_to_vk(sps->profile_tier_level.general_level_idc), + }; + + for (int i = 0; i < STD_VIDEO_H265_MAX_SHORT_TERM_REF_PIC_SETS; i++) { + const H265RawSTRefPicSet *st_rps = &sps->st_ref_pic_set[i]; + + str[i] = (StdVideoH265ShortTermRefPicSet) { + .flags = (StdVideoH265ShortTermRefPicSetFlags) { + .inter_ref_pic_set_prediction_flag = st_rps->inter_ref_pic_set_prediction_flag, + .delta_rps_sign = st_rps->delta_rps_sign, + }, + .delta_idx_minus1 = st_rps->delta_idx_minus1, + .use_delta_flag = 0x0, + .abs_delta_rps_minus1 = st_rps->abs_delta_rps_minus1, + .used_by_curr_pic_flag = 0x0, + .used_by_curr_pic_s0_flag = 0x0, + .used_by_curr_pic_s1_flag = 0x0, + /* Reserved */ + /* Reserved */ + /* Reserved */ + .num_negative_pics = st_rps->num_negative_pics, + .num_positive_pics = st_rps->num_positive_pics, + }; + + for (int j = 0; j < HEVC_MAX_REFS; j++) { + str[i].use_delta_flag |= st_rps->use_delta_flag[j] << i; + str[i].used_by_curr_pic_flag |= st_rps->used_by_curr_pic_flag[j] << i; + str[i].used_by_curr_pic_s0_flag |= st_rps->used_by_curr_pic_s0_flag[j] << i; + str[i].used_by_curr_pic_s1_flag |= st_rps->used_by_curr_pic_s1_flag[j] << i; + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc_s0_minus1[j]; + str[i].delta_poc_s1_minus1[j] = st_rps->delta_poc_s1_minus1[j]; + } + } + + ltr->used_by_curr_pic_lt_sps_flag = 0; + for (int i = 0; i < STD_VIDEO_H265_MAX_LONG_TERM_REF_PICS_SPS; i++) { + ltr->used_by_curr_pic_lt_sps_flag |= sps->lt_ref_pic_poc_lsb_sps[i] << i; + ltr->lt_ref_pic_poc_lsb_sps[i] = sps->lt_ref_pic_poc_lsb_sps[i]; + } + + *vksps = (StdVideoH265SequenceParameterSet) { + .flags = (StdVideoH265SpsFlags) { + .sps_temporal_id_nesting_flag = sps->sps_temporal_id_nesting_flag, + .separate_colour_plane_flag = sps->separate_colour_plane_flag, + .conformance_window_flag = sps->conformance_window_flag, + .sps_sub_layer_ordering_info_present_flag = sps->sps_sub_layer_ordering_info_present_flag, + .scaling_list_enabled_flag = sps->scaling_list_enabled_flag, + .sps_scaling_list_data_present_flag = sps->sps_scaling_list_data_present_flag, + .amp_enabled_flag = sps->amp_enabled_flag, + .sample_adaptive_offset_enabled_flag = sps->sample_adaptive_offset_enabled_flag, + .pcm_enabled_flag = sps->pcm_enabled_flag, + .pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled_flag, + .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag, + .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag, + .strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled_flag, + .vui_parameters_present_flag = sps->vui_parameters_present_flag, + .sps_extension_present_flag = sps->sps_extension_present_flag, + .sps_range_extension_flag = sps->sps_range_extension_flag, + .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag, + .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag, + .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag, + .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag, + .extended_precision_processing_flag = sps->extended_precision_processing_flag, + .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag, + .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag, + .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag, + .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag, + .sps_scc_extension_flag = sps->sps_scc_extension_flag, + .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag, + .palette_mode_enabled_flag = sps->palette_mode_enabled_flag, + .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializer_present_flag, + .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disable_flag, + }, + .chroma_format_idc = sps->chroma_format_idc, + .pic_width_in_luma_samples = sps->pic_width_in_luma_samples, + .pic_height_in_luma_samples = sps->pic_height_in_luma_samples, + .sps_video_parameter_set_id = sps->sps_video_parameter_set_id, + .sps_max_sub_layers_minus1 = sps->sps_max_sub_layers_minus1, + .sps_seq_parameter_set_id = sps->sps_seq_parameter_set_id, + .bit_depth_luma_minus8 = sps->bit_depth_luma_minus8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4, + .log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3, + .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size, + .log2_min_luma_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2, + .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size, + .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, + .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, + .num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets, + .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, + .pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1, + .pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1, + .log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3, + .log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size, + /* Reserved */ + /* Reserved */ + .palette_max_size = sps->palette_max_size, + .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size, + .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc, + .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializer_minus1, + .conf_win_left_offset = sps->conf_win_left_offset, + .conf_win_right_offset = sps->conf_win_right_offset, + .conf_win_top_offset = sps->conf_win_top_offset, + .conf_win_bottom_offset = sps->conf_win_bottom_offset, + .pProfileTierLevel = ptl_sps, + .pDecPicBufMgr = dpbm_sps, + .pScalingLists = NULL, + .pShortTermRefPicSet = str, + .pLongTermRefPicsSps = ltr, + .pSequenceParameterSetVui = vui_sps, + .pPredictorPaletteEntries = NULL, + }; + + /* PPS */ + *vkpps = (StdVideoH265PictureParameterSet) { + .flags = (StdVideoH265PpsFlags) { + .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, + .output_flag_present_flag = pps->output_flag_present_flag, + .sign_data_hiding_enabled_flag = pps->sign_data_hiding_enabled_flag, + .cabac_init_present_flag = pps->cabac_init_present_flag, + .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, + .transform_skip_enabled_flag = pps->transform_skip_enabled_flag, + .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag, + .pps_slice_chroma_qp_offsets_present_flag = pps->pps_slice_chroma_qp_offsets_present_flag, + .weighted_pred_flag = pps->weighted_pred_flag, + .weighted_bipred_flag = pps->weighted_bipred_flag, + .transquant_bypass_enabled_flag = pps->transquant_bypass_enabled_flag, + .tiles_enabled_flag = pps->tiles_enabled_flag, + .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag, + .uniform_spacing_flag = pps->uniform_spacing_flag, + .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag, + .pps_loop_filter_across_slices_enabled_flag = pps->pps_loop_filter_across_slices_enabled_flag, + .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, + .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag, + .pps_deblocking_filter_disabled_flag = pps->pps_deblocking_filter_disabled_flag, + .pps_scaling_list_data_present_flag = pps->pps_scaling_list_data_present_flag, + .lists_modification_present_flag = pps->lists_modification_present_flag, + .slice_segment_header_extension_present_flag = pps->slice_segment_header_extension_present_flag, + .pps_extension_present_flag = pps->pps_extension_present_flag, + .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag, + .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag, + .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag, + .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag, + .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag, + .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializer_present_flag, + .monochrome_palette_flag = pps->monochrome_palette_flag, + .pps_range_extension_flag = pps->pps_range_extension_flag, + }, + .pps_pic_parameter_set_id = pps->pps_pic_parameter_set_id, + .pps_seq_parameter_set_id = pps->pps_seq_parameter_set_id, + .sps_video_parameter_set_id = sps->sps_video_parameter_set_id, + .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, + .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1, + .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1, + .init_qp_minus26 = pps->init_qp_minus26, + .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, + .pps_cb_qp_offset = pps->pps_cb_qp_offset, + .pps_cr_qp_offset = pps->pps_cr_qp_offset, + .pps_beta_offset_div2 = pps->pps_beta_offset_div2, + .pps_tc_offset_div2 = pps->pps_tc_offset_div2, + .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2, + .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size_minus2, + .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth, + .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1, + .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma, + .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma, + .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset_plus5, + .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset_plus5, + .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset_plus3, + .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializer, + .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry_minus8, + .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry_minus8, + .num_tile_columns_minus1 = pps->num_tile_columns_minus1, + .num_tile_rows_minus1 = pps->num_tile_rows_minus1, + .pScalingLists = NULL, + .pPredictorPaletteEntries = NULL, + }; + + for (int i = 0; i < pps->num_tile_columns_minus1; i++) + vkpps->column_width_minus1[i] = pps->column_width_minus1[i]; + + for (int i = 0; i < pps->num_tile_rows_minus1; i++) + vkpps->row_height_minus1[i] = pps->row_height_minus1[i]; + + for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) { + vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i]; + vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i]; + } + + /* VPS */ + for (int i = 0; i < STD_VIDEO_H265_SUBLAYERS_LIST_SIZE; i++) { + dpbm_vps->max_latency_increase_plus1[i] = vps->vps_max_latency_increase_plus1[i]; + dpbm_vps->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering_minus1[i]; + dpbm_vps->max_num_reorder_pics[i] = vps->vps_max_num_reorder_pics[i]; + } + + *ptl_vps = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = vps->profile_tier_level.general_tier_flag, + .general_progressive_source_flag = vps->profile_tier_level.general_progressive_source_flag, + .general_interlaced_source_flag = vps->profile_tier_level.general_interlaced_source_flag, + .general_non_packed_constraint_flag = vps->profile_tier_level.general_non_packed_constraint_flag, + .general_frame_only_constraint_flag = vps->profile_tier_level.general_frame_only_constraint_flag, + }, + .general_profile_idc = ff_vk_h265_profile_to_vk(vps->profile_tier_level.general_profile_idc), + .general_level_idc = ff_vk_h265_level_to_vk(vps->profile_tier_level.general_level_idc), + }; + + *vkvps = (StdVideoH265VideoParameterSet) { + .flags = (StdVideoH265VpsFlags) { + .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag, + .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag, + .vps_timing_info_present_flag = vps->vps_timing_info_present_flag, + .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag, + }, + .vps_video_parameter_set_id = vps->vps_video_parameter_set_id, + .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers_minus1, + /* Reserved */ + /* Reserved */ + .vps_num_units_in_tick = vps->vps_num_units_in_tick, + .vps_time_scale = vps->vps_time_scale, + .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one_minus1, + /* Reserved */ + .pDecPicBufMgr = dpbm_vps, + .pHrdParameters = vui_header_vps, + .pProfileTierLevel = ptl_vps, + }; + + return 0; +} + +static int create_session_params(AVCodecContext *avctx) +{ + int err; + VulkanEncodeH265Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VulkanH265Units vk_units = { 0 }; + + VkVideoEncodeH265SessionParametersAddInfoKHR h265_params_info; + VkVideoEncodeH265SessionParametersCreateInfoKHR h265_params; + + /* Convert it to Vulkan */ + err = base_unit_to_vk(avctx, &vk_units); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to convert SPS/PPS units to Vulkan: %s\n", + av_err2str(err)); + return err; + } + + /* Destroy the session params */ + if (ctx->session_params) + vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev, + ctx->session_params, + s->hwctx->alloc); + + h265_params_info = (VkVideoEncodeH265SessionParametersAddInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR, + .pStdSPSs = &vk_units.sps, + .stdSPSCount = 1, + .pStdPPSs = &vk_units.pps, + .stdPPSCount = 1, + .pStdVPSs = &vk_units.vps, + .stdVPSCount = 1, + }; + h265_params = (VkVideoEncodeH265SessionParametersCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + .maxStdSPSCount = 1, + .maxStdPPSCount = 1, + .maxStdVPSCount = 1, + .pParametersAddInfo = &h265_params_info, + }; + + return ff_vulkan_encode_create_session_params(avctx, ctx, &h265_params); +} + +static int parse_feedback_units(AVCodecContext *avctx, + const uint8_t *data, size_t size, + int sps_override, int pps_override) +{ + int err; + VulkanEncodeH265Context *enc = avctx->priv_data; + + CodedBitstreamContext *cbs; + CodedBitstreamFragment au = { 0 }; + + err = ff_cbs_init(&cbs, AV_CODEC_ID_HEVC, avctx); + if (err < 0) + return err; + + err = ff_cbs_read(cbs, &au, NULL, data, size); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to parse feedback units, bad drivers: %s\n", + av_err2str(err)); + goto fail; + } + + if (sps_override) { + for (int i = 0; i < au.nb_units; i++) { + if (au.units[i].type == HEVC_NAL_SPS) { + H265RawSPS *sps = au.units[i].content; + enc->units.raw_sps.pic_width_in_luma_samples = sps->pic_width_in_luma_samples; + enc->units.raw_sps.pic_height_in_luma_samples = sps->pic_height_in_luma_samples; + enc->units.raw_sps.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size; + enc->units.raw_sps.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; + enc->units.raw_sps.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; + } + } + } + + /* If PPS has an override, just copy it entirely. */ + if (pps_override) { + for (int i = 0; i < au.nb_units; i++) { + if (au.units[i].type == HEVC_NAL_PPS) { + H265RawPPS *pps = au.units[i].content; + memcpy(&enc->units.raw_pps, pps, sizeof(*pps)); + enc->fixed_qp_idr = pps->init_qp_minus26 + 26; + break; + } + } + } + + err = 0; +fail: + ff_cbs_fragment_free(&au); + ff_cbs_close(&cbs); + + return err; +} + +static int init_base_units(AVCodecContext *avctx) +{ + int err; + VkResult ret; + VulkanEncodeH265Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFVulkanContext *s = &ctx->s; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + VkVideoEncodeH265SessionParametersGetInfoKHR h265_params_info; + VkVideoEncodeSessionParametersGetInfoKHR params_info; + VkVideoEncodeH265SessionParametersFeedbackInfoKHR h265_params_feedback; + VkVideoEncodeSessionParametersFeedbackInfoKHR params_feedback; + + void *data = NULL; + size_t data_size = 0; + + /* Generate SPS/PPS unit info */ + err = init_sequence_headers(avctx); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPS/PPS units: %s\n", + av_err2str(err)); + return err; + } + + /* Create session parameters from them */ + err = create_session_params(avctx); + if (err < 0) + return err; + + h265_params_info = (VkVideoEncodeH265SessionParametersGetInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_GET_INFO_KHR, + .writeStdSPS = 1, + .writeStdPPS = 1, + .writeStdVPS = 1, + .stdSPSId = enc->units.raw_sps.sps_seq_parameter_set_id, + .stdPPSId = enc->units.raw_pps.pps_pic_parameter_set_id, + .stdVPSId = enc->units.raw_vps.vps_video_parameter_set_id, + }; + params_info = (VkVideoEncodeSessionParametersGetInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_GET_INFO_KHR, + .pNext = &h265_params_info, + .videoSessionParameters = ctx->session_params, + }; + + h265_params_feedback = (VkVideoEncodeH265SessionParametersFeedbackInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, + }; + params_feedback = (VkVideoEncodeSessionParametersFeedbackInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_SESSION_PARAMETERS_FEEDBACK_INFO_KHR, + .pNext = &h265_params_feedback, + }; + + ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, + ¶ms_feedback, + &data_size, data); + if (ret == VK_INCOMPLETE || + (ret == VK_SUCCESS) && (data_size > 0)) { + data = av_mallocz(data_size); + if (!data) + return AVERROR(ENOMEM); + } else { + av_log(avctx, AV_LOG_ERROR, "Unable to get feedback for H.265 units = %"SIZE_SPECIFIER"\n", data_size); + return err; + } + + ret = vk->GetEncodedVideoSessionParametersKHR(s->hwctx->act_dev, ¶ms_info, + ¶ms_feedback, + &data_size, data); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Error writing feedback units\n"); + return err; + } + + av_log(avctx, AV_LOG_VERBOSE, "Feedback units written, overrides: %i (SPS: %i PPS: %i VPS: %i)\n", + params_feedback.hasOverrides, + h265_params_feedback.hasStdSPSOverrides, + h265_params_feedback.hasStdPPSOverrides, + h265_params_feedback.hasStdVPSOverrides); + + params_feedback.hasOverrides = 1; + h265_params_feedback.hasStdSPSOverrides = 1; + h265_params_feedback.hasStdPPSOverrides = 1; + + /* No need to sync any overrides */ + if (!params_feedback.hasOverrides) + return 0; + + /* Parse back tne units and override */ + err = parse_feedback_units(avctx, data, data_size, + h265_params_feedback.hasStdSPSOverrides, + h265_params_feedback.hasStdPPSOverrides); + if (err < 0) + return err; + + /* Create final session parameters */ + err = create_session_params(avctx); + if (err < 0) + return err; + + return 0; +} + +static int vulkan_encode_h265_add_nal(AVCodecContext *avctx, + CodedBitstreamFragment *au, + void *nal_unit) +{ + H265RawNALUnitHeader *header = nal_unit; + + int err = ff_cbs_insert_unit_content(au, -1, + header->nal_unit_type, nal_unit, NULL); + if (err < 0) + av_log(avctx, AV_LOG_ERROR, "Failed to add NAL unit: " + "type = %d.\n", header->nal_unit_type); + + return err; +} + +static int write_access_unit(AVCodecContext *avctx, + uint8_t *data, size_t *data_len, + CodedBitstreamFragment *au) +{ + VulkanEncodeH265Context *enc = avctx->priv_data; + + int err = ff_cbs_write_fragment_data(enc->cbs, au); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to write packed header.\n"); + return err; + } + + if (*data_len < au->data_size) { + av_log(avctx, AV_LOG_ERROR, "Access unit too large: %"SIZE_SPECIFIER" < %"SIZE_SPECIFIER".\n", + *data_len, au->data_size); + return AVERROR(ENOSPC); + } + + memcpy(data, au->data, au->data_size); + *data_len = au->data_size; + + return 0; +} + +static int write_sequence_headers(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH265Context *enc = avctx->priv_data; + VulkanEncodeH265Picture *hp = base_pic ? base_pic->codec_priv : NULL; + CodedBitstreamFragment *au = &enc->current_access_unit; + + if (hp && hp->units_needed & UNIT_AUD) { + err = vulkan_encode_h265_add_nal(avctx, au, &enc->raw_aud); + if (err < 0) + goto fail; + hp->units_needed &= ~UNIT_AUD; + } + + err = vulkan_encode_h265_add_nal(avctx, au, &enc->units.raw_vps); + if (err < 0) + goto fail; + + err = vulkan_encode_h265_add_nal(avctx, au, &enc->units.raw_sps); + if (err < 0) + goto fail; + + err = vulkan_encode_h265_add_nal(avctx, au, &enc->units.raw_pps); + if (err < 0) + goto fail; + + err = write_access_unit(avctx, data, data_len, au); +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static int write_extra_headers(AVCodecContext *avctx, + FFHWBaseEncodePicture *base_pic, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH265Context *enc = avctx->priv_data; + VulkanEncodeH265Picture *hp = base_pic->codec_priv; + CodedBitstreamFragment *au = &enc->current_access_unit; + + if (hp->units_needed & UNIT_AUD) { + err = vulkan_encode_h265_add_nal(avctx, au, &enc->raw_aud); + if (err < 0) + goto fail; + } + + if (hp->units_needed & UNIT_SEI_MASTERING_DISPLAY) { + err = ff_cbs_sei_add_message(enc->cbs, au, 1, + SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME, + &enc->sei_mastering_display, NULL); + if (err < 0) + goto fail; + } + + if (hp->units_needed & UNIT_SEI_CONTENT_LIGHT_LEVEL) { + err = ff_cbs_sei_add_message(enc->cbs, au, 1, + SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO, + &enc->sei_content_light_level, NULL); + if (err < 0) + goto fail; + } + if (hp->units_needed & UNIT_SEI_A53_CC) { + err = ff_cbs_sei_add_message(enc->cbs, au, 1, + SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35, + &enc->sei_a53cc, NULL); + if (err < 0) + goto fail; + } + + if (hp->units_needed) { + err = write_access_unit(avctx, data, data_len, au); + if (err < 0) + goto fail; + } else { + err = 0; + *data_len = 0; + } + +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static int write_filler(AVCodecContext *avctx, uint32_t filler, + uint8_t *data, size_t *data_len) +{ + int err; + VulkanEncodeH265Context *enc = avctx->priv_data; + CodedBitstreamFragment *au = &enc->current_access_unit; + + H265RawFiller raw_filler = { + .nal_unit_header = + { + .nal_unit_type = HEVC_NAL_FD_NUT, + .nuh_temporal_id_plus1 = 1, + }, + .filler_size = filler, + }; + + err = vulkan_encode_h265_add_nal(avctx, au, &raw_filler); + if (err < 0) + goto fail; + + err = write_access_unit(avctx, data, data_len, au); +fail: + ff_cbs_fragment_reset(au); + return err; +} + +static const FFVulkanCodec enc_cb = { + .flags = FF_HW_FLAG_B_PICTURES | + FF_HW_FLAG_B_PICTURE_REFERENCES | + FF_HW_FLAG_NON_IDR_KEY_PICTURES, + .picture_priv_data_size = sizeof(VulkanEncodeH265Picture), + .filler_header_size = 7, + .init_profile = init_profile, + .init_pic_rc = init_pic_rc, + .init_pic_params = init_pic_params, + .write_sequence_headers = write_sequence_headers, + .write_extra_headers = write_extra_headers, + .write_filler = write_filler, +}; + +static av_cold int vulkan_encode_h265_init(AVCodecContext *avctx) +{ + int err, ref_l0, ref_l1; + VulkanEncodeH265Context *enc = avctx->priv_data; + FFVulkanEncodeContext *ctx = &enc->common; + FFHWBaseEncodeContext *base_ctx = &ctx->base; + int flags; + + if (avctx->profile == AV_PROFILE_UNKNOWN) + avctx->profile = enc->common.opts.profile; + + enc->caps = (VkVideoEncodeH265CapabilitiesKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_CAPABILITIES_KHR, + }; + + enc->quality_props = (VkVideoEncodeH265QualityLevelPropertiesKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_QUALITY_LEVEL_PROPERTIES_KHR, + }; + + err = ff_vulkan_encode_init(avctx, &enc->common, + &ff_vk_enc_h265_desc, &enc_cb, + &enc->caps, &enc->quality_props); + if (err < 0) + return err; + + av_log(avctx, AV_LOG_VERBOSE, "H265 encoder capabilities:\n"); + av_log(avctx, AV_LOG_VERBOSE, " Standard capability flags:\n"); + av_log(avctx, AV_LOG_VERBOSE, " separate_color_plane: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SEPARATE_COLOR_PLANE_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " sample_adaptive_offset: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " scaling_lists: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SCALING_LIST_DATA_PRESENT_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " pcm: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_PCM_ENABLED_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " temporal_mvp: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SPS_TEMPORAL_MVP_ENABLED_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " init_qp: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_INIT_QP_MINUS26_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " weighted:%s%s\n", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_WEIGHTED_PRED_FLAG_SET_BIT_KHR ? + " pred" : "", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_WEIGHTED_BIPRED_FLAG_SET_BIT_KHR ? + " bipred" : ""); + av_log(avctx, AV_LOG_VERBOSE, " parallel_merge_level: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_LOG2_PARALLEL_MERGE_LEVEL_MINUS2_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " sign_data_hiding: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SIGN_DATA_HIDING_ENABLED_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " transform_skip:%s%s\n", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_SET_BIT_KHR ? + " set" : "", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_UNSET_BIT_KHR ? + " unset" : ""); + av_log(avctx, AV_LOG_VERBOSE, " slice_chroma_qp_offsets: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " transquant_bypass: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_TRANSQUANT_BYPASS_ENABLED_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " constrained_intra_pred: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " entrypy_coding_sync: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_ENTROPY_CODING_SYNC_ENABLED_FLAG_SET_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " dependent_slice_segment:%s%s\n", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_DEPENDENT_SLICE_SEGMENTS_ENABLED_FLAG_SET_BIT_KHR ? + " enabled" : "", + enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_DEPENDENT_SLICE_SEGMENT_FLAG_SET_BIT_KHR ? + " set" : ""); + av_log(avctx, AV_LOG_VERBOSE, " slice_qp_delta: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_SLICE_QP_DELTA_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " different_slice_qp_delta: %i\n", + !!(enc->caps.stdSyntaxFlags & VK_VIDEO_ENCODE_H265_STD_DIFFERENT_SLICE_QP_DELTA_BIT_KHR)); + + av_log(avctx, AV_LOG_VERBOSE, " Capability flags:\n"); + av_log(avctx, AV_LOG_VERBOSE, " hdr_compliance: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " pred_weight_table_generated: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PREDICTION_WEIGHT_TABLE_GENERATED_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " row_unaligned_slice: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_ROW_UNALIGNED_SLICE_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " different_slice_type: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l0_list: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L0_LIST_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " b_frame_in_l1_list: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L1_LIST_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " per_pict_type_min_max_qp: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " per_slice_constant_qp: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_PER_SLICE_CONSTANT_QP_BIT_KHR)); + av_log(avctx, AV_LOG_VERBOSE, " generate_prefix_nalu: %i\n", + !!(enc->caps.flags & VK_VIDEO_ENCODE_H264_CAPABILITY_GENERATE_PREFIX_NALU_BIT_KHR)); + + av_log(avctx, AV_LOG_VERBOSE, " Capabilities:\n"); + av_log(avctx, AV_LOG_VERBOSE, " maxLevelIdc: %i\n", + enc->caps.maxLevelIdc); + av_log(avctx, AV_LOG_VERBOSE, " maxSliceCount: %i\n", + enc->caps.maxSliceSegmentCount); + av_log(avctx, AV_LOG_VERBOSE, " maxTiles: %ix%i\n", + enc->caps.maxTiles.width, enc->caps.maxTiles.height); + av_log(avctx, AV_LOG_VERBOSE, " cbtSizes: 0x%x\n", + enc->caps.ctbSizes); + av_log(avctx, AV_LOG_VERBOSE, " transformBlockSizes: 0x%x\n", + enc->caps.transformBlockSizes); + av_log(avctx, AV_LOG_VERBOSE, " max(P/B)PictureL0ReferenceCount: %i P's; %i B's\n", + enc->caps.maxPPictureL0ReferenceCount, + enc->caps.maxBPictureL0ReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " maxL1ReferenceCount: %i\n", + enc->caps.maxL1ReferenceCount); + av_log(avctx, AV_LOG_VERBOSE, " maxSubLayerCount: %i\n", + enc->caps.maxSubLayerCount); + av_log(avctx, AV_LOG_VERBOSE, " expectDyadicTemporalLayerPattern: %i\n", + enc->caps.expectDyadicTemporalSubLayerPattern); + av_log(avctx, AV_LOG_VERBOSE, " min/max Qp: [%i, %i]\n", + enc->caps.minQp, enc->caps.maxQp); + av_log(avctx, AV_LOG_VERBOSE, " prefersGopRemainingFrames: %i\n", + enc->caps.prefersGopRemainingFrames); + av_log(avctx, AV_LOG_VERBOSE, " requiresGopRemainingFrames: %i\n", + enc->caps.requiresGopRemainingFrames); + + err = init_enc_options(avctx); + if (err < 0) + return err; + + flags = ctx->codec->flags; + if (!enc->caps.maxPPictureL0ReferenceCount && + !enc->caps.maxBPictureL0ReferenceCount && + !enc->caps.maxL1ReferenceCount) { + /* Intra-only */ + flags |= FF_HW_FLAG_INTRA_ONLY; + ref_l0 = ref_l1 = 0; + } else if (!enc->caps.maxPPictureL0ReferenceCount) { + /* No P-frames? How. */ + base_ctx->p_to_gpb = 1; + ref_l0 = enc->caps.maxBPictureL0ReferenceCount; + ref_l1 = enc->caps.maxL1ReferenceCount; + } else if (!enc->caps.maxBPictureL0ReferenceCount && + !enc->caps.maxL1ReferenceCount) { + /* No B-frames */ + flags &= ~(FF_HW_FLAG_B_PICTURES | FF_HW_FLAG_B_PICTURE_REFERENCES); + ref_l0 = enc->caps.maxPPictureL0ReferenceCount; + ref_l1 = 0; + } else { + /* P and B frames */ + ref_l0 = FFMIN(enc->caps.maxPPictureL0ReferenceCount, + enc->caps.maxBPictureL0ReferenceCount); + ref_l1 = enc->caps.maxL1ReferenceCount; + } + + err = ff_hw_base_init_gop_structure(base_ctx, avctx, ref_l0, ref_l1, + flags, 0); + if (err < 0) + return err; + + base_ctx->output_delay = base_ctx->b_per_p; + base_ctx->decode_delay = base_ctx->max_b_depth; + + /* Init CBS */ + err = ff_cbs_init(&enc->cbs, AV_CODEC_ID_HEVC, avctx); + if (err < 0) + return err; + + /* Create units and session parameters */ + err = init_base_units(avctx); + if (err < 0) + return err; + + /* Write out extradata */ + err = ff_vulkan_write_global_header(avctx, &enc->common); + if (err < 0) + return err; + + return 0; +} + +static av_cold int vulkan_encode_h265_close(AVCodecContext *avctx) +{ + VulkanEncodeH265Context *enc = avctx->priv_data; + ff_vulkan_encode_uninit(&enc->common); + return 0; +} + +#define OFFSET(x) offsetof(VulkanEncodeH265Context, x) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM) +static const AVOption vulkan_encode_h265_options[] = { + HW_BASE_ENCODE_COMMON_OPTIONS, + VULKAN_ENCODE_COMMON_OPTIONS, + + { "profile", "Set profile (profile_idc and constraint_set*_flag)", + OFFSET(common.opts.profile), AV_OPT_TYPE_INT, + { .i64 = AV_PROFILE_UNKNOWN }, AV_PROFILE_UNKNOWN, 0xffff, FLAGS, .unit = "profile" }, + +#define PROFILE(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ + { .i64 = value }, 0, 0, FLAGS, .unit = "profile" + { PROFILE("main", AV_PROFILE_HEVC_MAIN) }, + { PROFILE("main10", AV_PROFILE_HEVC_MAIN_10) }, + { PROFILE("rext", AV_PROFILE_HEVC_REXT) }, +#undef PROFILE + + { "tier", "Set tier (general_tier_flag)", OFFSET(common.opts.tier), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, .unit = "tier" }, + { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, .unit = "tier" }, + { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "tier" }, + + { "level", "Set level (general_level_idc)", + OFFSET(common.opts.level), AV_OPT_TYPE_INT, + { .i64 = AV_LEVEL_UNKNOWN }, AV_LEVEL_UNKNOWN, 0xff, FLAGS, .unit = "level" }, + +#define LEVEL(name, value) name, NULL, 0, AV_OPT_TYPE_CONST, \ + { .i64 = value }, 0, 0, FLAGS, .unit = "level" + { LEVEL("1", 30) }, + { LEVEL("2", 60) }, + { LEVEL("2.1", 63) }, + { LEVEL("3", 90) }, + { LEVEL("3.1", 93) }, + { LEVEL("4", 120) }, + { LEVEL("4.1", 123) }, + { LEVEL("5", 150) }, + { LEVEL("5.1", 153) }, + { LEVEL("5.2", 156) }, + { LEVEL("6", 180) }, + { LEVEL("6.1", 183) }, + { LEVEL("6.2", 186) }, +#undef LEVEL + + { "units", "Set units to include", OFFSET(unit_elems), AV_OPT_TYPE_FLAGS, { .i64 = UNIT_SEI_MASTERING_DISPLAY | UNIT_SEI_CONTENT_LIGHT_LEVEL | UNIT_SEI_A53_CC }, 0, INT_MAX, FLAGS, "units" }, + { "hdr", "Include HDR metadata for mastering display colour volume and content light level information", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_MASTERING_DISPLAY | UNIT_SEI_CONTENT_LIGHT_LEVEL }, INT_MIN, INT_MAX, FLAGS, "units" }, + { "a53_cc", "Include A/53 caption data", 0, AV_OPT_TYPE_CONST, { .i64 = UNIT_SEI_A53_CC }, INT_MIN, INT_MAX, FLAGS, "units" }, + + { NULL }, +}; + +static const FFCodecDefault vulkan_encode_h265_defaults[] = { + { "b", "0" }, + { "bf", "2" }, + { "g", "300" }, + { "i_qfactor", "1" }, + { "i_qoffset", "0" }, + { "b_qfactor", "6/5" }, + { "b_qoffset", "0" }, + { "qmin", "-1" }, + { "qmax", "-1" }, + { "refs", "0" }, + { NULL }, +}; + +static const AVClass vulkan_encode_h265_class = { + .class_name = "hevc_vulkan", + .item_name = av_default_item_name, + .option = vulkan_encode_h265_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +const FFCodec ff_hevc_vulkan_encoder = { + .p.name = "hevc_vulkan", + CODEC_LONG_NAME("H.265/HEVC (Vulkan)"), + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_HEVC, + .priv_data_size = sizeof(VulkanEncodeH265Context), + .init = &vulkan_encode_h265_init, + FF_CODEC_RECEIVE_PACKET_CB(&ff_vulkan_encode_receive_packet), + .close = &vulkan_encode_h265_close, + .p.priv_class = &vulkan_encode_h265_class, + .p.capabilities = AV_CODEC_CAP_DELAY | + AV_CODEC_CAP_HARDWARE | + AV_CODEC_CAP_DR1 | + AV_CODEC_CAP_ENCODER_FLUSH | + AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .defaults = vulkan_encode_h265_defaults, + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), + .hw_configs = ff_vulkan_encode_hw_configs, + .p.wrapper_name = "vulkan", +}; -- 2.49.1 From eeafc4f977be2d52e6b7015073d143e64fbb5c84 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:36:15 +0000 Subject: [PATCH 079/118] Changing vulkan file directory --- libavcodec/vulkan_ffv1.c | 1175 -------------------------------------- 1 file changed, 1175 deletions(-) delete mode 100644 libavcodec/vulkan_ffv1.c diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c deleted file mode 100644 index b02bc71683..0000000000 --- a/libavcodec/vulkan_ffv1.c +++ /dev/null @@ -1,1175 +0,0 @@ -/* - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "vulkan_decode.h" -#include "hwaccel_internal.h" - -#include "ffv1.h" -#include "ffv1_vulkan.h" -#include "libavutil/vulkan_spirv.h" -#include "libavutil/mem.h" - -#define RGB_LINECACHE 2 - -extern const char *ff_source_common_comp; -extern const char *ff_source_rangecoder_comp; -extern const char *ff_source_ffv1_vlc_comp; -extern const char *ff_source_ffv1_common_comp; -extern const char *ff_source_ffv1_dec_setup_comp; -extern const char *ff_source_ffv1_reset_comp; -extern const char *ff_source_ffv1_dec_comp; - -const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { - .codec_id = AV_CODEC_ID_FFV1, - .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR, - .queue_flags = VK_QUEUE_COMPUTE_BIT, -}; - -typedef struct FFv1VulkanDecodePicture { - FFVulkanDecodePicture vp; - - AVBufferRef *slice_state; - uint32_t plane_state_size; - uint32_t slice_state_size; - uint32_t slice_data_size; - - AVBufferRef *slice_offset_buf; - uint32_t *slice_offset; - int slice_num; - - AVBufferRef *slice_status_buf; - int crc_checked; -} FFv1VulkanDecodePicture; - -typedef struct FFv1VulkanDecodeContext { - AVBufferRef *intermediate_frames_ref[2]; /* 16/32 bit */ - - FFVulkanShader setup; - FFVulkanShader reset[2]; /* AC/Golomb */ - FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */ - - FFVkBuffer rangecoder_static_buf; - FFVkBuffer quant_buf; - FFVkBuffer crc_tab_buf; - - AVBufferPool *slice_state_pool; - AVBufferPool *slice_offset_pool; - AVBufferPool *slice_status_pool; -} FFv1VulkanDecodeContext; - -typedef struct FFv1VkParameters { - VkDeviceAddress slice_data; - VkDeviceAddress slice_state; - - int fmt_lut[4]; - uint32_t img_size[2]; - uint32_t chroma_shift[2]; - - uint32_t plane_state_size; - uint32_t crcref; - int rct_offset; - - uint8_t extend_lookup[8]; - uint8_t bits_per_raw_sample; - uint8_t quant_table_count; - uint8_t version; - uint8_t micro_version; - uint8_t key_frame; - uint8_t planes; - uint8_t codec_planes; - uint8_t color_planes; - uint8_t transparency; - uint8_t planar_rgb; - uint8_t colorspace; - uint8_t ec; - uint8_t golomb; - uint8_t check_crc; - uint8_t padding[3]; -} FFv1VkParameters; - -static void add_push_data(FFVulkanShader *shd) -{ - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf slice_data; ); - GLSLC(1, u8buf slice_state; ); - GLSLC(0, ); - GLSLC(1, ivec4 fmt_lut; ); - GLSLC(1, uvec2 img_size; ); - GLSLC(1, uvec2 chroma_shift; ); - GLSLC(0, ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint32_t crcref; ); - GLSLC(1, int rct_offset; ); - GLSLC(0, ); - GLSLC(1, uint8_t extend_lookup[8]; ); - GLSLC(1, uint8_t bits_per_raw_sample; ); - GLSLC(1, uint8_t quant_table_count; ); - GLSLC(1, uint8_t version; ); - GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t planes; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t color_planes; ); - GLSLC(1, uint8_t transparency; ); - GLSLC(1, uint8_t planar_rgb; ); - GLSLC(1, uint8_t colorspace; ); - GLSLC(1, uint8_t ec; ); - GLSLC(1, uint8_t golomb; ); - GLSLC(1, uint8_t check_crc; ); - GLSLC(1, uint8_t padding[3]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), - VK_SHADER_STAGE_COMPUTE_BIT); -} - -static int vk_ffv1_start_frame(AVCodecContext *avctx, - const AVBufferRef *buffer_ref, - av_unused const uint8_t *buffer, - av_unused uint32_t size) -{ - int err; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - FFv1VulkanDecodeContext *fv = ctx->sd_ctx; - FFV1Context *f = avctx->priv_data; - - FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &fp->vp; - - AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; - enum AVPixelFormat sw_format = hwfc->sw_format; - - int max_contexts; - int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && - !(sw_format == AV_PIX_FMT_YA8); - - fp->slice_num = 0; - - max_contexts = 0; - for (int i = 0; i < f->quant_table_count; i++) - max_contexts = FFMAX(f->context_count[i], max_contexts); - - /* Allocate slice buffer data */ - if (f->ac == AC_GOLOMB_RICE) - fp->plane_state_size = 8; - else - fp->plane_state_size = CONTEXT_SIZE; - - fp->plane_state_size *= max_contexts; - fp->slice_state_size = fp->plane_state_size*f->plane_count; - - fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */ - fp->slice_state_size += fp->slice_data_size; - fp->slice_state_size = FFALIGN(fp->slice_state_size, 8); - - fp->crc_checked = f->ec && (avctx->err_recognition & AV_EF_CRCCHECK); - - /* Host map the input slices data if supported */ - if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) - ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data, - buffer_ref, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); - - /* Allocate slice state data */ - if (f->picture.f->flags & AV_FRAME_FLAG_KEY) { - err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool, - &fp->slice_state, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, f->slice_count*fp->slice_state_size, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (err < 0) - return err; - } else { - FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; - fp->slice_state = av_buffer_ref(fpl->slice_state); - if (!fp->slice_state) - return AVERROR(ENOMEM); - } - - /* Allocate slice offsets buffer */ - err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool, - &fp->slice_offset_buf, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, 2*f->slice_count*sizeof(uint32_t), - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - if (err < 0) - return err; - - /* Allocate slice status buffer */ - err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_status_pool, - &fp->slice_status_buf, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, 2*f->slice_count*sizeof(uint32_t), - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - if (err < 0) - return err; - - /* Prepare frame to be used */ - err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1, - FF_VK_REP_NATIVE, 0); - if (err < 0) - return err; - - /* Create a temporaty frame for RGB */ - if (is_rgb) { - vp->dpb_frame = av_frame_alloc(); - if (!vp->dpb_frame) - return AVERROR(ENOMEM); - - err = av_hwframe_get_buffer(fv->intermediate_frames_ref[f->use32bit], - vp->dpb_frame, 0); - if (err < 0) - return err; - } - - return 0; -} - -static int vk_ffv1_decode_slice(AVCodecContext *avctx, - const uint8_t *data, - uint32_t size) -{ - FFV1Context *f = avctx->priv_data; - - FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &fp->vp; - - FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; - FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; - - if (slices_buf && slices_buf->host_ref) { - AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 0)*sizeof(uint32_t), - data - slices_buf->mapped_mem); - AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 1)*sizeof(uint32_t), - size); - - fp->slice_num++; - } else { - int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, - &fp->slice_num, - (const uint32_t **)&fp->slice_offset); - if (err < 0) - return err; - - AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 0)*sizeof(uint32_t), - fp->slice_offset[fp->slice_num - 1]); - AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 1)*sizeof(uint32_t), - size); - } - - return 0; -} - -static int vk_ffv1_end_frame(AVCodecContext *avctx) -{ - int err; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - FFV1Context *f = avctx->priv_data; - FFv1VulkanDecodeContext *fv = ctx->sd_ctx; - FFv1VkParameters pd; - FFv1VkResetParameters pd_reset; - - AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; - enum AVPixelFormat sw_format = hwfc->sw_format; - - int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8; - int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && - !(sw_format == AV_PIX_FMT_YA8); - int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components; - - FFVulkanShader *reset_shader; - FFVulkanShader *decode_shader; - - FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &fp->vp; - - FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; - FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data; - FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; - FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data; - - VkImageView rct_image_views[AV_NUM_DATA_POINTERS]; - - AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f; - VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->view.out; - - VkImageMemoryBarrier2 img_bar[37]; - int nb_img_bar = 0; - VkBufferMemoryBarrier2 buf_bar[8]; - int nb_buf_bar = 0; - - FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); - ff_vk_exec_start(&ctx->s, exec); - - /* Prepare deps */ - RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - - err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, - f->picture.f); - if (err < 0) - return err; - - if (is_rgb) { - RET(ff_vk_create_imageviews(&ctx->s, exec, rct_image_views, - vp->dpb_frame, FF_VK_REP_NATIVE)); - RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_CLEAR_BIT)); - ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_ACCESS_2_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - } - - if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) { - FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; - FFVulkanDecodePicture *vpl = &fpl->vp; - - /* Wait on the previous frame */ - RET(ff_vk_exec_add_dep_wait_sem(&ctx->s, exec, vpl->sem, vpl->sem_value, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)); - } - - RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1)); - RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_status_buf, 1, 1)); - RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); - vp->slices_buf = NULL; - RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0)); - fp->slice_offset_buf = NULL; - - /* Entry barrier for the slice state */ - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = slice_state->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = slice_state->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = slice_state->buf, - .offset = 0, - .size = fp->slice_data_size*f->slice_count, - }; - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - slice_state->stage = buf_bar[0].dstStageMask; - slice_state->access = buf_bar[0].dstAccessMask; - nb_buf_bar = 0; - nb_img_bar = 0; - - /* Setup shader */ - ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, - 1, 0, 0, - slice_state, - 0, fp->slice_data_size*f->slice_count, - VK_FORMAT_UNDEFINED); - ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, - 1, 1, 0, - slice_offset, - 0, 2*f->slice_count*sizeof(uint32_t), - VK_FORMAT_UNDEFINED); - ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, - 1, 2, 0, - slice_status, - 0, 2*f->slice_count*sizeof(uint32_t), - VK_FORMAT_UNDEFINED); - - ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); - pd = (FFv1VkParameters) { - .slice_data = slices_buf->address, - .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, - - .img_size[0] = f->picture.f->width, - .img_size[1] = f->picture.f->height, - .chroma_shift[0] = f->chroma_h_shift, - .chroma_shift[1] = f->chroma_v_shift, - - .plane_state_size = fp->plane_state_size, - .crcref = f->crcref, - .rct_offset = 1 << bits, - - .bits_per_raw_sample = bits, - .quant_table_count = f->quant_table_count, - .version = f->version, - .micro_version = f->micro_version, - .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, - .planes = av_pix_fmt_count_planes(sw_format), - .codec_planes = f->plane_count, - .color_planes = color_planes, - .transparency = f->transparency, - .planar_rgb = ff_vk_mt_is_np_rgb(sw_format) && - (ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1), - .colorspace = f->colorspace, - .ec = f->ec, - .golomb = f->ac == AC_GOLOMB_RICE, - .check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK), - }; - for (int i = 0; i < f->quant_table_count; i++) - pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) || - (f->quant_tables[i][4][127] != 0); - - - /* For some reason the C FFv1 encoder/decoder treats these differently */ - if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 || - sw_format == AV_PIX_FMT_GBRP14) - memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); - else if (sw_format == AV_PIX_FMT_X2BGR10) - memcpy(pd.fmt_lut, (int [4]) { 0, 2, 1, 3 }, 4*sizeof(int)); - else - ff_vk_set_perm(sw_format, pd.fmt_lut, 0); - - ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - - vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); - - if (is_rgb) { - AVVkFrame *vkf = (AVVkFrame *)vp->dpb_frame->data[0]; - for (int i = 0; i < color_planes; i++) - vk->CmdClearColorImage(exec->buf, vkf->img[i], VK_IMAGE_LAYOUT_GENERAL, - &((VkClearColorValue) { 0 }), - 1, &((VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1, - })); - } - - /* Reset shader */ - reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE]; - ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader, - 1, 0, 0, - slice_state, - 0, fp->slice_data_size*f->slice_count, - VK_FORMAT_UNDEFINED); - - ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader); - - pd_reset = (FFv1VkResetParameters) { - .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, - .plane_state_size = fp->plane_state_size, - .codec_planes = f->plane_count, - .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, - .version = f->version, - .micro_version = f->micro_version, - }; - for (int i = 0; i < f->quant_table_count; i++) - pd_reset.context_count[i] = f->context_count[i]; - - ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd_reset), &pd_reset); - - /* Sync between setup and reset shaders */ - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = slice_state->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = slice_state->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = slice_state->buf, - .offset = 0, - .size = fp->slice_data_size*f->slice_count, - }; - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - slice_state->stage = buf_bar[0].dstStageMask; - slice_state->access = buf_bar[0].dstAccessMask; - nb_buf_bar = 0; - nb_img_bar = 0; - - vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, - f->plane_count); - - /* Decode */ - decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb]; - ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, - 1, 0, 0, - slice_state, - 0, fp->slice_data_size*f->slice_count, - VK_FORMAT_UNDEFINED); - ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, - decode_dst, decode_dst_view, - 1, 1, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, - 1, 2, 0, - slice_status, - 0, 2*f->slice_count*sizeof(uint32_t), - VK_FORMAT_UNDEFINED); - if (is_rgb) - ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, - f->picture.f, vp->view.out, - 1, 3, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - - ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader); - ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - - /* Sync between reset and decode shaders */ - buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, - .srcStageMask = slice_state->stage, - .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - .srcAccessMask = slice_state->access, - .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | - VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = slice_state->buf, - .offset = fp->slice_data_size*f->slice_count, - .size = f->slice_count*(fp->slice_state_size - fp->slice_data_size), - }; - - /* Input frame barrier */ - ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_WRITE_BIT | - (!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0), - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - if (is_rgb) - ff_vk_frame_barrier(&ctx->s, exec, vp->dpb_frame, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - .pBufferMemoryBarriers = buf_bar, - .bufferMemoryBarrierCount = nb_buf_bar, - }); - slice_state->stage = buf_bar[0].dstStageMask; - slice_state->access = buf_bar[0].dstAccessMask; - nb_img_bar = 0; - nb_buf_bar = 0; - - vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); - - err = ff_vk_exec_submit(&ctx->s, exec); - if (err < 0) - return err; - - /* We don't need the temporary frame after decoding */ - av_frame_free(&vp->dpb_frame); - -fail: - return 0; -} - -static void define_shared_code(FFVulkanShader *shd, int use32bit) -{ - int smp_bits = use32bit ? 32 : 16; - - GLSLC(0, #define DECODE ); - - av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE); - av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); - - GLSLF(0, #define TYPE int%i_t ,smp_bits); - GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); - GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); - GLSLD(ff_source_rangecoder_comp); - GLSLD(ff_source_ffv1_common_comp); -} - -static int init_setup_shader(FFV1Context *f, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd) -{ - int err; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - 1, 1, 1, - 0)); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - add_push_data(shd); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", - }, - { - .name = "crc_ieee_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint32_t crc_ieee[256];", - }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", - }, - }; - - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); - - define_shared_code(shd, 0 /* Irrelevant */); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, - }, - { - .name = "slice_offsets_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "readonly", - .buf_content = "uint32_t slice_offsets", - .buf_elems = 2*f->max_slice_count, - }, - { - .name = "slice_status_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "writeonly", - .buf_content = "uint32_t slice_status", - .buf_elems = 2*f->max_slice_count, - }, - }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0)); - - GLSLD(ff_source_ffv1_dec_setup_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(s, pool, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static int init_reset_shader(FFV1Context *f, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int ac) -{ - int err; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024); - - RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - wg_dim, 1, 1, - 0)); - - if (ac == AC_GOLOMB_RICE) - av_bprintf(&shd->src, "#define GOLOMB\n"); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); - GLSLC(1, u8buf slice_state; ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t version; ); - GLSLC(1, uint8_t micro_version; ); - GLSLC(1, uint8_t padding[1]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", - }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", - }, - }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); - - define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */); - if (ac == AC_GOLOMB_RICE) - GLSLD(ff_source_ffv1_vlc_comp); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .mem_quali = "readonly", - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, - }, - }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0)); - - GLSLD(ff_source_ffv1_reset_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(s, pool, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, - AVHWFramesContext *dec_frames_ctx, - AVHWFramesContext *out_frames_ctx, - int use32bit, int ac, int rgb) -{ - int err; - FFVulkanDescriptorSetBinding *desc_set; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - int use_cached_reader = ac != AC_GOLOMB_RICE && - s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV; - - RET(ff_vk_shader_init(s, shd, "ffv1_dec", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2" }, 2, - use_cached_reader ? CONTEXT_SIZE : 1, 1, 1, - 0)); - - if (ac == AC_GOLOMB_RICE) - av_bprintf(&shd->src, "#define GOLOMB\n"); - - if (rgb) - av_bprintf(&shd->src, "#define RGB\n"); - - if (use_cached_reader) - av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n"); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - add_push_data(shd); - - av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); - av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); - av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "rangecoder_static_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t zero_one_state[512];", - }, - { - .name = "quant_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" - "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", - }, - }; - - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); - - define_shared_code(shd, use32bit); - if (ac == AC_GOLOMB_RICE) - GLSLD(ff_source_ffv1_vlc_comp); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "slice_data_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .buf_content = "SliceContext slice_ctx", - .buf_elems = f->max_slice_count, - }, - { - .name = "dec", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - { - .name = "slice_status_buf", - .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_quali = "writeonly", - .buf_content = "uint32_t slice_status", - .buf_elems = 2*f->max_slice_count, - }, - { - .name = "dst", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .dimensions = 2, - .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format, - FF_VK_REP_NATIVE), - .mem_quali = "writeonly", - .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3 + rgb, 0, 0)); - - GLSLD(ff_source_ffv1_dec_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(s, pool, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s, - AVBufferRef **dst, enum AVPixelFormat sw_format) -{ - int err; - AVHWFramesContext *frames_ctx; - AVVulkanFramesContext *vk_frames; - FFV1Context *f = avctx->priv_data; - - *dst = av_hwframe_ctx_alloc(s->device_ref); - if (!(*dst)) - return AVERROR(ENOMEM); - - frames_ctx = (AVHWFramesContext *)((*dst)->data); - frames_ctx->format = AV_PIX_FMT_VULKAN; - frames_ctx->sw_format = sw_format; - frames_ctx->width = s->frames->width; - frames_ctx->height = f->num_v_slices*RGB_LINECACHE; - - vk_frames = frames_ctx->hwctx; - vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; - vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; - vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT; - - err = av_hwframe_ctx_init(*dst); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", - av_get_pix_fmt_name(sw_format), av_err2str(err)); - av_buffer_unref(dst); - return err; - } - - return 0; -} - -static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx) -{ - FFv1VulkanDecodeContext *fv = ctx->sd_ctx; - - ff_vk_shader_free(&ctx->s, &fv->setup); - - for (int i = 0; i < 2; i++) /* 16/32 bit */ - av_buffer_unref(&fv->intermediate_frames_ref[i]); - - for (int i = 0; i < 2; i++) /* AC/Golomb */ - ff_vk_shader_free(&ctx->s, &fv->reset[i]); - - for (int i = 0; i < 2; i++) /* 16/32 bit */ - for (int j = 0; j < 2; j++) /* AC/Golomb */ - for (int k = 0; k < 2; k++) /* Normal/RGB */ - ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]); - - ff_vk_free_buf(&ctx->s, &fv->quant_buf); - ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf); - ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf); - - av_buffer_pool_uninit(&fv->slice_state_pool); - av_buffer_pool_uninit(&fv->slice_offset_pool); - av_buffer_pool_uninit(&fv->slice_status_pool); - - av_freep(&fv); -} - -static int vk_decode_ffv1_init(AVCodecContext *avctx) -{ - int err; - FFV1Context *f = avctx->priv_data; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = NULL; - FFv1VulkanDecodeContext *fv; - FFVkSPIRVCompiler *spv; - - if (f->version < 3 || - (f->version == 4 && f->micro_version > 3)) - return AVERROR(ENOTSUP); - - spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - err = ff_vk_decode_init(avctx); - if (err < 0) - return err; - ctx = dec->shared_ctx; - - fv = ctx->sd_ctx = av_mallocz(sizeof(*fv)); - if (!fv) { - err = AVERROR(ENOMEM); - goto fail; - } - - ctx->sd_ctx_free = &vk_decode_ffv1_uninit; - - /* Intermediate frame pool for RCT */ - for (int i = 0; i < 2; i++) { /* 16/32 bit */ - RET(init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref[i], - i ? AV_PIX_FMT_GBRAP32 : AV_PIX_FMT_GBRAP16)); - } - - /* Setup shader */ - RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup)); - - /* Reset shaders */ - for (int i = 0; i < 2; i++) { /* AC/Golomb */ - RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool, - spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0)); - } - - /* Decode shaders */ - for (int i = 0; i < 2; i++) { /* 16/32 bit */ - for (int j = 0; j < 2; j++) { /* AC/Golomb */ - for (int k = 0; k < 2; k++) { /* Normal/RGB */ - AVHWFramesContext *dec_frames_ctx; - dec_frames_ctx = k ? (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data : - (AVHWFramesContext *)avctx->hw_frames_ctx->data; - RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, - spv, &fv->decode[i][j][k], - dec_frames_ctx, - (AVHWFramesContext *)avctx->hw_frames_ctx->data, - i, - !j ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE, - k)); - } - } - } - - /* Range coder data */ - RET(ff_ffv1_vk_init_state_transition_data(&ctx->s, - &fv->rangecoder_static_buf, - f)); - - /* Quantization table data */ - RET(ff_ffv1_vk_init_quant_table_data(&ctx->s, - &fv->quant_buf, - f)); - - /* CRC table buffer */ - RET(ff_ffv1_vk_init_crc_table_data(&ctx->s, - &fv->crc_tab_buf, - f)); - - /* Update setup global descriptors */ - RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], - &fv->setup, 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], - &fv->setup, 0, 1, 0, - &fv->crc_tab_buf, - 0, fv->crc_tab_buf.size, - VK_FORMAT_UNDEFINED)); - - /* Update decode global descriptors */ - for (int i = 0; i < 2; i++) { /* 16/32 bit */ - for (int j = 0; j < 2; j++) { /* AC/Golomb */ - for (int k = 0; k < 2; k++) { /* Normal/RGB */ - RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], - &fv->decode[i][j][k], 0, 0, 0, - &fv->rangecoder_static_buf, - 0, fv->rangecoder_static_buf.size, - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], - &fv->decode[i][j][k], 0, 1, 0, - &fv->quant_buf, - 0, fv->quant_buf.size, - VK_FORMAT_UNDEFINED)); - } - } - } - -fail: - spv->uninit(&spv); - - return err; -} - -static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) -{ - AVHWDeviceContext *dev_ctx = _hwctx.nc; - AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; - - FFv1VulkanDecodePicture *fp = data; - FFVulkanDecodePicture *vp = &fp->vp; - FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data; - - ff_vk_decode_free_frame(dev_ctx, vp); - - /* Invalidate slice/output data if needed */ - if (!(slice_status->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - VkMappedMemoryRange invalidate_data = { - .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = slice_status->mem, - .offset = 0, - .size = 2*fp->slice_num*sizeof(uint32_t), - }; - vp->invalidate_memory_ranges(hwctx->act_dev, - 1, &invalidate_data); - } - - for (int i = 0; i < fp->slice_num; i++) { - uint32_t crc_res = 0; - if (fp->crc_checked) - crc_res = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 0); - uint32_t status = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 4); - if (status || crc_res) - av_log(dev_ctx, AV_LOG_ERROR, "Slice %i status: 0x%x, CRC 0x%x\n", - i, status, crc_res); - } - - av_buffer_unref(&vp->slices_buf); - av_buffer_unref(&fp->slice_state); - av_buffer_unref(&fp->slice_offset_buf); - av_buffer_unref(&fp->slice_status_buf); -} - -const FFHWAccel ff_ffv1_vulkan_hwaccel = { - .p.name = "ffv1_vulkan", - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_FFV1, - .p.pix_fmt = AV_PIX_FMT_VULKAN, - .start_frame = &vk_ffv1_start_frame, - .decode_slice = &vk_ffv1_decode_slice, - .end_frame = &vk_ffv1_end_frame, - .free_frame_priv = &vk_ffv1_free_frame_priv, - .frame_priv_data_size = sizeof(FFv1VulkanDecodePicture), - .init = &vk_decode_ffv1_init, - .update_thread_context = &ff_vk_update_thread_context, - .decode_params = &ff_vk_params_invalidate, - .flush = &ff_vk_decode_flush, - .uninit = &ff_vk_decode_uninit, - .frame_params = &ff_vk_frame_params, - .priv_data_size = sizeof(FFVulkanDecodeContext), - .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, -}; -- 2.49.1 From 841a4eb1e1f486510a7165168f56acbe1e73b0f1 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:36:48 +0000 Subject: [PATCH 080/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_ffv1.c | 1175 +++++++++++++++++++++++++++++++ 1 file changed, 1175 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_ffv1.c diff --git a/libavcodec/vulkan/vulkan_ffv1.c b/libavcodec/vulkan/vulkan_ffv1.c new file mode 100644 index 0000000000..5bc5c84a57 --- /dev/null +++ b/libavcodec/vulkan/vulkan_ffv1.c @@ -0,0 +1,1175 @@ +/* + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_decode.h" +#include "libavcodec/hwaccel_internal.h" + +#include "libavcodec/ffv1.h" +#include "ffv1_vulkan.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "libavutil/mem.h" + +#define RGB_LINECACHE 2 + +extern const char *ff_source_common_comp; +extern const char *ff_source_rangecoder_comp; +extern const char *ff_source_ffv1_vlc_comp; +extern const char *ff_source_ffv1_common_comp; +extern const char *ff_source_ffv1_dec_setup_comp; +extern const char *ff_source_ffv1_reset_comp; +extern const char *ff_source_ffv1_dec_comp; + +const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { + .codec_id = AV_CODEC_ID_FFV1, + .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR, + .queue_flags = VK_QUEUE_COMPUTE_BIT, +}; + +typedef struct FFv1VulkanDecodePicture { + FFVulkanDecodePicture vp; + + AVBufferRef *slice_state; + uint32_t plane_state_size; + uint32_t slice_state_size; + uint32_t slice_data_size; + + AVBufferRef *slice_offset_buf; + uint32_t *slice_offset; + int slice_num; + + AVBufferRef *slice_status_buf; + int crc_checked; +} FFv1VulkanDecodePicture; + +typedef struct FFv1VulkanDecodeContext { + AVBufferRef *intermediate_frames_ref[2]; /* 16/32 bit */ + + FFVulkanShader setup; + FFVulkanShader reset[2]; /* AC/Golomb */ + FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */ + + FFVkBuffer rangecoder_static_buf; + FFVkBuffer quant_buf; + FFVkBuffer crc_tab_buf; + + AVBufferPool *slice_state_pool; + AVBufferPool *slice_offset_pool; + AVBufferPool *slice_status_pool; +} FFv1VulkanDecodeContext; + +typedef struct FFv1VkParameters { + VkDeviceAddress slice_data; + VkDeviceAddress slice_state; + + int fmt_lut[4]; + uint32_t img_size[2]; + uint32_t chroma_shift[2]; + + uint32_t plane_state_size; + uint32_t crcref; + int rct_offset; + + uint8_t extend_lookup[8]; + uint8_t bits_per_raw_sample; + uint8_t quant_table_count; + uint8_t version; + uint8_t micro_version; + uint8_t key_frame; + uint8_t planes; + uint8_t codec_planes; + uint8_t color_planes; + uint8_t transparency; + uint8_t planar_rgb; + uint8_t colorspace; + uint8_t ec; + uint8_t golomb; + uint8_t check_crc; + uint8_t padding[3]; +} FFv1VkParameters; + +static void add_push_data(FFVulkanShader *shd) +{ + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_data; ); + GLSLC(1, u8buf slice_state; ); + GLSLC(0, ); + GLSLC(1, ivec4 fmt_lut; ); + GLSLC(1, uvec2 img_size; ); + GLSLC(1, uvec2 chroma_shift; ); + GLSLC(0, ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint32_t crcref; ); + GLSLC(1, int rct_offset; ); + GLSLC(0, ); + GLSLC(1, uint8_t extend_lookup[8]; ); + GLSLC(1, uint8_t bits_per_raw_sample; ); + GLSLC(1, uint8_t quant_table_count; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t planes; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t color_planes; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t planar_rgb; ); + GLSLC(1, uint8_t colorspace; ); + GLSLC(1, uint8_t ec; ); + GLSLC(1, uint8_t golomb; ); + GLSLC(1, uint8_t check_crc; ); + GLSLC(1, uint8_t padding[3]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); +} + +static int vk_ffv1_start_frame(AVCodecContext *avctx, + const AVBufferRef *buffer_ref, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + FFV1Context *f = avctx->priv_data; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + + AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + enum AVPixelFormat sw_format = hwfc->sw_format; + + int max_contexts; + int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && + !(sw_format == AV_PIX_FMT_YA8); + + fp->slice_num = 0; + + max_contexts = 0; + for (int i = 0; i < f->quant_table_count; i++) + max_contexts = FFMAX(f->context_count[i], max_contexts); + + /* Allocate slice buffer data */ + if (f->ac == AC_GOLOMB_RICE) + fp->plane_state_size = 8; + else + fp->plane_state_size = CONTEXT_SIZE; + + fp->plane_state_size *= max_contexts; + fp->slice_state_size = fp->plane_state_size*f->plane_count; + + fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */ + fp->slice_state_size += fp->slice_data_size; + fp->slice_state_size = FFALIGN(fp->slice_state_size, 8); + + fp->crc_checked = f->ec && (avctx->err_recognition & AV_EF_CRCCHECK); + + /* Host map the input slices data if supported */ + if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) + ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data, + buffer_ref, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); + + /* Allocate slice state data */ + if (f->picture.f->flags & AV_FRAME_FLAG_KEY) { + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool, + &fp->slice_state, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, f->slice_count*fp->slice_state_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + } else { + FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; + fp->slice_state = av_buffer_ref(fpl->slice_state); + if (!fp->slice_state) + return AVERROR(ENOMEM); + } + + /* Allocate slice offsets buffer */ + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool, + &fp->slice_offset_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, 2*f->slice_count*sizeof(uint32_t), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) + return err; + + /* Allocate slice status buffer */ + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_status_pool, + &fp->slice_status_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, 2*f->slice_count*sizeof(uint32_t), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) + return err; + + /* Prepare frame to be used */ + err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1, + FF_VK_REP_NATIVE, 0); + if (err < 0) + return err; + + /* Create a temporaty frame for RGB */ + if (is_rgb) { + vp->dpb_frame = av_frame_alloc(); + if (!vp->dpb_frame) + return AVERROR(ENOMEM); + + err = av_hwframe_get_buffer(fv->intermediate_frames_ref[f->use32bit], + vp->dpb_frame, 0); + if (err < 0) + return err; + } + + return 0; +} + +static int vk_ffv1_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + FFV1Context *f = avctx->priv_data; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + + FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; + + if (slices_buf && slices_buf->host_ref) { + AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 0)*sizeof(uint32_t), + data - slices_buf->mapped_mem); + AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 1)*sizeof(uint32_t), + size); + + fp->slice_num++; + } else { + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + &fp->slice_num, + (const uint32_t **)&fp->slice_offset); + if (err < 0) + return err; + + AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 0)*sizeof(uint32_t), + fp->slice_offset[fp->slice_num - 1]); + AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 1)*sizeof(uint32_t), + size); + } + + return 0; +} + +static int vk_ffv1_end_frame(AVCodecContext *avctx) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + FFV1Context *f = avctx->priv_data; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + FFv1VkParameters pd; + FFv1VkResetParameters pd_reset; + + AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; + enum AVPixelFormat sw_format = hwfc->sw_format; + + int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8; + int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && + !(sw_format == AV_PIX_FMT_YA8); + int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components; + + FFVulkanShader *reset_shader; + FFVulkanShader *decode_shader; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + + FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; + FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data; + FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data; + + VkImageView rct_image_views[AV_NUM_DATA_POINTERS]; + + AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f; + VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->view.out; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + + /* Prepare deps */ + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + f->picture.f); + if (err < 0) + return err; + + if (is_rgb) { + RET(ff_vk_create_imageviews(&ctx->s, exec, rct_image_views, + vp->dpb_frame, FF_VK_REP_NATIVE)); + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_CLEAR_BIT)); + ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + } + + if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) { + FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; + FFVulkanDecodePicture *vpl = &fpl->vp; + + /* Wait on the previous frame */ + RET(ff_vk_exec_add_dep_wait_sem(&ctx->s, exec, vpl->sem, vpl->sem_value, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)); + } + + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1)); + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_status_buf, 1, 1)); + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); + vp->slices_buf = NULL; + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0)); + fp->slice_offset_buf = NULL; + + /* Entry barrier for the slice state */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = 0, + .size = fp->slice_data_size*f->slice_count, + }; + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + nb_img_bar = 0; + + /* Setup shader */ + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 1, 0, + slice_offset, + 0, 2*f->slice_count*sizeof(uint32_t), + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 2, 0, + slice_status, + 0, 2*f->slice_count*sizeof(uint32_t), + VK_FORMAT_UNDEFINED); + + ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); + pd = (FFv1VkParameters) { + .slice_data = slices_buf->address, + .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, + + .img_size[0] = f->picture.f->width, + .img_size[1] = f->picture.f->height, + .chroma_shift[0] = f->chroma_h_shift, + .chroma_shift[1] = f->chroma_v_shift, + + .plane_state_size = fp->plane_state_size, + .crcref = f->crcref, + .rct_offset = 1 << bits, + + .bits_per_raw_sample = bits, + .quant_table_count = f->quant_table_count, + .version = f->version, + .micro_version = f->micro_version, + .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, + .planes = av_pix_fmt_count_planes(sw_format), + .codec_planes = f->plane_count, + .color_planes = color_planes, + .transparency = f->transparency, + .planar_rgb = ff_vk_mt_is_np_rgb(sw_format) && + (ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1), + .colorspace = f->colorspace, + .ec = f->ec, + .golomb = f->ac == AC_GOLOMB_RICE, + .check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK), + }; + for (int i = 0; i < f->quant_table_count; i++) + pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) || + (f->quant_tables[i][4][127] != 0); + + + /* For some reason the C FFv1 encoder/decoder treats these differently */ + if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 || + sw_format == AV_PIX_FMT_GBRP14) + memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); + else if (sw_format == AV_PIX_FMT_X2BGR10) + memcpy(pd.fmt_lut, (int [4]) { 0, 2, 1, 3 }, 4*sizeof(int)); + else + ff_vk_set_perm(sw_format, pd.fmt_lut, 0); + + ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); + + if (is_rgb) { + AVVkFrame *vkf = (AVVkFrame *)vp->dpb_frame->data[0]; + for (int i = 0; i < color_planes; i++) + vk->CmdClearColorImage(exec->buf, vkf->img[i], VK_IMAGE_LAYOUT_GENERAL, + &((VkClearColorValue) { 0 }), + 1, &((VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, + })); + } + + /* Reset shader */ + reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE]; + ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + + ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader); + + pd_reset = (FFv1VkResetParameters) { + .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, + .plane_state_size = fp->plane_state_size, + .codec_planes = f->plane_count, + .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, + .version = f->version, + .micro_version = f->micro_version, + }; + for (int i = 0; i < f->quant_table_count; i++) + pd_reset.context_count[i] = f->context_count[i]; + + ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd_reset), &pd_reset); + + /* Sync between setup and reset shaders */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = 0, + .size = fp->slice_data_size*f->slice_count, + }; + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + nb_img_bar = 0; + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, + f->plane_count); + + /* Decode */ + decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb]; + ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, + decode_dst, decode_dst_view, + 1, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, + 1, 2, 0, + slice_status, + 0, 2*f->slice_count*sizeof(uint32_t), + VK_FORMAT_UNDEFINED); + if (is_rgb) + ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, + f->picture.f, vp->view.out, + 1, 3, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader); + ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + /* Sync between reset and decode shaders */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = fp->slice_data_size*f->slice_count, + .size = f->slice_count*(fp->slice_state_size - fp->slice_data_size), + }; + + /* Input frame barrier */ + ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT | + (!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0), + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + if (is_rgb) + ff_vk_frame_barrier(&ctx->s, exec, vp->dpb_frame, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_img_bar = 0; + nb_buf_bar = 0; + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); + + err = ff_vk_exec_submit(&ctx->s, exec); + if (err < 0) + return err; + + /* We don't need the temporary frame after decoding */ + av_frame_free(&vp->dpb_frame); + +fail: + return 0; +} + +static void define_shared_code(FFVulkanShader *shd, int use32bit) +{ + int smp_bits = use32bit ? 32 : 16; + + GLSLC(0, #define DECODE ); + + av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE); + av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); + + GLSLF(0, #define TYPE int%i_t ,smp_bits); + GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); + GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); + GLSLD(ff_source_rangecoder_comp); + GLSLD(ff_source_ffv1_common_comp); +} + +static int init_setup_shader(FFV1Context *f, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 1, 1, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "crc_ieee_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint32_t crc_ieee[256];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); + + define_shared_code(shd, 0 /* Irrelevant */); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + { + .name = "slice_offsets_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "readonly", + .buf_content = "uint32_t slice_offsets", + .buf_elems = 2*f->max_slice_count, + }, + { + .name = "slice_status_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "writeonly", + .buf_content = "uint32_t slice_status", + .buf_elems = 2*f->max_slice_count, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0)); + + GLSLD(ff_source_ffv1_dec_setup_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_reset_shader(FFV1Context *f, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd, int ac) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024); + + RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + wg_dim, 1, 1, + 0)); + + if (ac == AC_GOLOMB_RICE) + av_bprintf(&shd->src, "#define GOLOMB\n"); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); + + define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */); + if (ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0)); + + GLSLD(ff_source_ffv1_reset_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd, + AVHWFramesContext *dec_frames_ctx, + AVHWFramesContext *out_frames_ctx, + int use32bit, int ac, int rgb) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + int use_cached_reader = ac != AC_GOLOMB_RICE && + s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV; + + RET(ff_vk_shader_init(s, shd, "ffv1_dec", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + use_cached_reader ? CONTEXT_SIZE : 1, 1, 1, + 0)); + + if (ac == AC_GOLOMB_RICE) + av_bprintf(&shd->src, "#define GOLOMB\n"); + + if (rgb) + av_bprintf(&shd->src, "#define RGB\n"); + + if (use_cached_reader) + av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n"); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); + + define_shared_code(shd, use32bit); + if (ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx", + .buf_elems = f->max_slice_count, + }, + { + .name = "dec", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "slice_status_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "writeonly", + .buf_content = "uint32_t slice_status", + .buf_elems = 2*f->max_slice_count, + }, + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .mem_quali = "writeonly", + .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3 + rgb, 0, 0)); + + GLSLD(ff_source_ffv1_dec_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s, + AVBufferRef **dst, enum AVPixelFormat sw_format) +{ + int err; + AVHWFramesContext *frames_ctx; + AVVulkanFramesContext *vk_frames; + FFV1Context *f = avctx->priv_data; + + *dst = av_hwframe_ctx_alloc(s->device_ref); + if (!(*dst)) + return AVERROR(ENOMEM); + + frames_ctx = (AVHWFramesContext *)((*dst)->data); + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = s->frames->width; + frames_ctx->height = f->num_v_slices*RGB_LINECACHE; + + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + err = av_hwframe_ctx_init(*dst); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", + av_get_pix_fmt_name(sw_format), av_err2str(err)); + av_buffer_unref(dst); + return err; + } + + return 0; +} + +static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx) +{ + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + + ff_vk_shader_free(&ctx->s, &fv->setup); + + for (int i = 0; i < 2; i++) /* 16/32 bit */ + av_buffer_unref(&fv->intermediate_frames_ref[i]); + + for (int i = 0; i < 2; i++) /* AC/Golomb */ + ff_vk_shader_free(&ctx->s, &fv->reset[i]); + + for (int i = 0; i < 2; i++) /* 16/32 bit */ + for (int j = 0; j < 2; j++) /* AC/Golomb */ + for (int k = 0; k < 2; k++) /* Normal/RGB */ + ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]); + + ff_vk_free_buf(&ctx->s, &fv->quant_buf); + ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf); + ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf); + + av_buffer_pool_uninit(&fv->slice_state_pool); + av_buffer_pool_uninit(&fv->slice_offset_pool); + av_buffer_pool_uninit(&fv->slice_status_pool); + + av_freep(&fv); +} + +static int vk_decode_ffv1_init(AVCodecContext *avctx) +{ + int err; + FFV1Context *f = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = NULL; + FFv1VulkanDecodeContext *fv; + FFVkSPIRVCompiler *spv; + + if (f->version < 3 || + (f->version == 4 && f->micro_version > 3)) + return AVERROR(ENOTSUP); + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + err = ff_vk_decode_init(avctx); + if (err < 0) + return err; + ctx = dec->shared_ctx; + + fv = ctx->sd_ctx = av_mallocz(sizeof(*fv)); + if (!fv) { + err = AVERROR(ENOMEM); + goto fail; + } + + ctx->sd_ctx_free = &vk_decode_ffv1_uninit; + + /* Intermediate frame pool for RCT */ + for (int i = 0; i < 2; i++) { /* 16/32 bit */ + RET(init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref[i], + i ? AV_PIX_FMT_GBRAP32 : AV_PIX_FMT_GBRAP16)); + } + + /* Setup shader */ + RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup)); + + /* Reset shaders */ + for (int i = 0; i < 2; i++) { /* AC/Golomb */ + RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool, + spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0)); + } + + /* Decode shaders */ + for (int i = 0; i < 2; i++) { /* 16/32 bit */ + for (int j = 0; j < 2; j++) { /* AC/Golomb */ + for (int k = 0; k < 2; k++) { /* Normal/RGB */ + AVHWFramesContext *dec_frames_ctx; + dec_frames_ctx = k ? (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data : + (AVHWFramesContext *)avctx->hw_frames_ctx->data; + RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, + spv, &fv->decode[i][j][k], + dec_frames_ctx, + (AVHWFramesContext *)avctx->hw_frames_ctx->data, + i, + !j ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE, + k)); + } + } + } + + /* Range coder data */ + RET(ff_ffv1_vk_init_state_transition_data(&ctx->s, + &fv->rangecoder_static_buf, + f)); + + /* Quantization table data */ + RET(ff_ffv1_vk_init_quant_table_data(&ctx->s, + &fv->quant_buf, + f)); + + /* CRC table buffer */ + RET(ff_ffv1_vk_init_crc_table_data(&ctx->s, + &fv->crc_tab_buf, + f)); + + /* Update setup global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->setup, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->setup, 0, 1, 0, + &fv->crc_tab_buf, + 0, fv->crc_tab_buf.size, + VK_FORMAT_UNDEFINED)); + + /* Update decode global descriptors */ + for (int i = 0; i < 2; i++) { /* 16/32 bit */ + for (int j = 0; j < 2; j++) { /* AC/Golomb */ + for (int k = 0; k < 2; k++) { /* Normal/RGB */ + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 1, 0, + &fv->quant_buf, + 0, fv->quant_buf.size, + VK_FORMAT_UNDEFINED)); + } + } + } + +fail: + spv->uninit(&spv); + + return err; +} + +static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *dev_ctx = _hwctx.nc; + AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; + + FFv1VulkanDecodePicture *fp = data; + FFVulkanDecodePicture *vp = &fp->vp; + FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data; + + ff_vk_decode_free_frame(dev_ctx, vp); + + /* Invalidate slice/output data if needed */ + if (!(slice_status->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + VkMappedMemoryRange invalidate_data = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = slice_status->mem, + .offset = 0, + .size = 2*fp->slice_num*sizeof(uint32_t), + }; + vp->invalidate_memory_ranges(hwctx->act_dev, + 1, &invalidate_data); + } + + for (int i = 0; i < fp->slice_num; i++) { + uint32_t crc_res = 0; + if (fp->crc_checked) + crc_res = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 0); + uint32_t status = AV_RN32(slice_status->mapped_mem + 2*i*sizeof(uint32_t) + 4); + if (status || crc_res) + av_log(dev_ctx, AV_LOG_ERROR, "Slice %i status: 0x%x, CRC 0x%x\n", + i, status, crc_res); + } + + av_buffer_unref(&vp->slices_buf); + av_buffer_unref(&fp->slice_state); + av_buffer_unref(&fp->slice_offset_buf); + av_buffer_unref(&fp->slice_status_buf); +} + +const FFHWAccel ff_ffv1_vulkan_hwaccel = { + .p.name = "ffv1_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_FFV1, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_ffv1_start_frame, + .decode_slice = &vk_ffv1_decode_slice, + .end_frame = &vk_ffv1_end_frame, + .free_frame_priv = &vk_ffv1_free_frame_priv, + .frame_priv_data_size = sizeof(FFv1VulkanDecodePicture), + .init = &vk_decode_ffv1_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; -- 2.49.1 From a7c4e07d3442f9b0313e4d37befaf34aff822f93 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:37:11 +0000 Subject: [PATCH 081/118] Changing vulkan file directory --- libavcodec/vulkan_h264.c | 592 --------------------------------------- 1 file changed, 592 deletions(-) delete mode 100644 libavcodec/vulkan_h264.c diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c deleted file mode 100644 index ebe305e7b5..0000000000 --- a/libavcodec/vulkan_h264.c +++ /dev/null @@ -1,592 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "h264dec.h" -#include "h264_ps.h" - -#include "vulkan_decode.h" - -const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc = { - .codec_id = AV_CODEC_ID_H264, - .decode_extension = FF_VK_EXT_VIDEO_DECODE_H264, - .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, - .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, - .ext_props = { - .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, - .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION, - }, -}; - -typedef struct H264VulkanDecodePicture { - FFVulkanDecodePicture vp; - - /* Current picture */ - StdVideoDecodeH264ReferenceInfo h264_ref; - VkVideoDecodeH264DpbSlotInfoKHR vkh264_ref; - - /* Picture refs */ - H264Picture *ref_src [H264_MAX_PICTURE_COUNT]; - StdVideoDecodeH264ReferenceInfo h264_refs [H264_MAX_PICTURE_COUNT]; - VkVideoDecodeH264DpbSlotInfoKHR vkh264_refs[H264_MAX_PICTURE_COUNT]; - - /* Current picture (contd.) */ - StdVideoDecodeH264PictureInfo h264pic; - VkVideoDecodeH264PictureInfoKHR h264_pic_info; -} H264VulkanDecodePicture; - -const static int h264_scaling_list8_order[] = { 0, 3, 1, 4, 2, 5 }; - -static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src, - VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ - VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ - VkVideoDecodeH264DpbSlotInfoKHR *vkh264_ref, /* Goes in ^ */ - StdVideoDecodeH264ReferenceInfo *h264_ref, /* Goes in ^ */ - H264Picture *pic, int is_current, - int is_field, int picture_structure, - int dpb_slot_index) -{ - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vkpic = &hp->vp; - - int err = ff_vk_decode_prepare_frame(dec, pic->f, vkpic, is_current, - dec->dedicated_dpb); - if (err < 0) - return err; - - *h264_ref = (StdVideoDecodeH264ReferenceInfo) { - .FrameNum = pic->long_ref ? pic->pic_id : pic->frame_num, - .PicOrderCnt = { pic->field_poc[0], pic->field_poc[1] }, - .flags = (StdVideoDecodeH264ReferenceInfoFlags) { - .top_field_flag = is_field ? !!(picture_structure & PICT_TOP_FIELD) : 0, - .bottom_field_flag = is_field ? !!(picture_structure & PICT_BOTTOM_FIELD) : 0, - .used_for_long_term_reference = pic->reference && pic->long_ref, - /* - * flags.is_non_existing is used to indicate whether the picture is marked as - * “non-existing” as defined in section 8.2.5.2 of the ITU-T H.264 Specification; - * 8.2.5.2 Decoding process for gaps in frame_num - * corresponds to the code in h264_slice.c:h264_field_start, - * which sets the invalid_gap flag when decoding. - */ - .is_non_existing = pic->invalid_gap, - }, - }; - - *vkh264_ref = (VkVideoDecodeH264DpbSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR, - .pStdReferenceInfo = h264_ref, - }; - - *ref = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .codedOffset = (VkOffset2D){ 0, 0 }, - .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, - .baseArrayLayer = ctx->common.layered_dpb ? dpb_slot_index : 0, - .imageViewBinding = vkpic->view.ref[0], - }; - - *ref_slot = (VkVideoReferenceSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, - .pNext = vkh264_ref, - .slotIndex = dpb_slot_index, - .pPictureResource = ref, - }; - - if (ref_src) - *ref_src = pic; - - return 0; -} - -static StdVideoH264LevelIdc convert_to_vk_level_idc(int level_idc) -{ - switch (level_idc) { - case 10: return STD_VIDEO_H264_LEVEL_IDC_1_0; - case 11: return STD_VIDEO_H264_LEVEL_IDC_1_1; - case 12: return STD_VIDEO_H264_LEVEL_IDC_1_2; - case 13: return STD_VIDEO_H264_LEVEL_IDC_1_3; - case 20: return STD_VIDEO_H264_LEVEL_IDC_2_0; - case 21: return STD_VIDEO_H264_LEVEL_IDC_2_1; - case 22: return STD_VIDEO_H264_LEVEL_IDC_2_2; - case 30: return STD_VIDEO_H264_LEVEL_IDC_3_0; - case 31: return STD_VIDEO_H264_LEVEL_IDC_3_1; - case 32: return STD_VIDEO_H264_LEVEL_IDC_3_2; - case 40: return STD_VIDEO_H264_LEVEL_IDC_4_0; - case 41: return STD_VIDEO_H264_LEVEL_IDC_4_1; - case 42: return STD_VIDEO_H264_LEVEL_IDC_4_2; - case 50: return STD_VIDEO_H264_LEVEL_IDC_5_0; - case 51: return STD_VIDEO_H264_LEVEL_IDC_5_1; - case 52: return STD_VIDEO_H264_LEVEL_IDC_5_2; - case 60: return STD_VIDEO_H264_LEVEL_IDC_6_0; - case 61: return STD_VIDEO_H264_LEVEL_IDC_6_1; - default: - case 62: return STD_VIDEO_H264_LEVEL_IDC_6_2; - } -} - -static void set_sps(const SPS *sps, - StdVideoH264ScalingLists *vksps_scaling, - StdVideoH264HrdParameters *vksps_vui_header, - StdVideoH264SequenceParameterSetVui *vksps_vui, - StdVideoH264SequenceParameterSet *vksps) -{ - *vksps_scaling = (StdVideoH264ScalingLists) { - .scaling_list_present_mask = sps->scaling_matrix_present_mask, - .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ - }; - - for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) - for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++) - vksps_scaling->ScalingList4x4[i][j] = sps->scaling_matrix4[i][ff_zigzag_scan[j]]; - - for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) - for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++) - vksps_scaling->ScalingList8x8[i][j] = - sps->scaling_matrix8[h264_scaling_list8_order[i]][ff_zigzag_direct[j]]; - - *vksps_vui_header = (StdVideoH264HrdParameters) { - .cpb_cnt_minus1 = sps->cpb_cnt - 1, - .bit_rate_scale = sps->bit_rate_scale, - .initial_cpb_removal_delay_length_minus1 = sps->initial_cpb_removal_delay_length - 1, - .cpb_removal_delay_length_minus1 = sps->cpb_removal_delay_length - 1, - .dpb_output_delay_length_minus1 = sps->dpb_output_delay_length - 1, - .time_offset_length = sps->time_offset_length, - }; - - for (int i = 0; i < sps->cpb_cnt; i++) { - vksps_vui_header->bit_rate_value_minus1[i] = sps->bit_rate_value[i] - 1; - vksps_vui_header->cpb_size_value_minus1[i] = sps->cpb_size_value[i] - 1; - vksps_vui_header->cbr_flag[i] = (sps->cpr_flag >> i) & 0x1; - } - - *vksps_vui = (StdVideoH264SequenceParameterSetVui) { - .aspect_ratio_idc = sps->vui.aspect_ratio_idc, - .sar_width = sps->vui.sar.num, - .sar_height = sps->vui.sar.den, - .video_format = sps->vui.video_format, - .colour_primaries = sps->vui.colour_primaries, - .transfer_characteristics = sps->vui.transfer_characteristics, - .matrix_coefficients = sps->vui.matrix_coeffs, - .num_units_in_tick = sps->num_units_in_tick, - .time_scale = sps->time_scale, - .pHrdParameters = vksps_vui_header, - .max_num_reorder_frames = sps->num_reorder_frames, - .max_dec_frame_buffering = sps->max_dec_frame_buffering, - .flags = (StdVideoH264SpsVuiFlags) { - .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, - .overscan_info_present_flag = sps->vui.overscan_info_present_flag, - .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, - .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, - .video_full_range_flag = sps->vui.video_full_range_flag, - .color_description_present_flag = sps->vui.colour_description_present_flag, - .chroma_loc_info_present_flag = sps->vui.chroma_location, - .timing_info_present_flag = sps->timing_info_present_flag, - .fixed_frame_rate_flag = sps->fixed_frame_rate_flag, - .bitstream_restriction_flag = sps->bitstream_restriction_flag, - .nal_hrd_parameters_present_flag = sps->nal_hrd_parameters_present_flag, - .vcl_hrd_parameters_present_flag = sps->vcl_hrd_parameters_present_flag, - }, - }; - - *vksps = (StdVideoH264SequenceParameterSet) { - .profile_idc = sps->profile_idc, - .level_idc = convert_to_vk_level_idc(sps->level_idc), - .seq_parameter_set_id = sps->sps_id, - .chroma_format_idc = sps->chroma_format_idc, - .bit_depth_luma_minus8 = sps->bit_depth_luma - 8, - .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, - .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4, - .pic_order_cnt_type = sps->poc_type, - .log2_max_pic_order_cnt_lsb_minus4 = sps->poc_type ? 0 : sps->log2_max_poc_lsb - 4, - .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, - .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, - .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length, - .max_num_ref_frames = sps->ref_frame_count, - .pic_width_in_mbs_minus1 = sps->mb_width - 1, - .pic_height_in_map_units_minus1 = (sps->mb_height/(2 - sps->frame_mbs_only_flag)) - 1, - .frame_crop_left_offset = sps->crop_left, - .frame_crop_right_offset = sps->crop_right, - .frame_crop_top_offset = sps->crop_top, - .frame_crop_bottom_offset = sps->crop_bottom, - .flags = (StdVideoH264SpsFlags) { - .constraint_set0_flag = (sps->constraint_set_flags >> 0) & 0x1, - .constraint_set1_flag = (sps->constraint_set_flags >> 1) & 0x1, - .constraint_set2_flag = (sps->constraint_set_flags >> 2) & 0x1, - .constraint_set3_flag = (sps->constraint_set_flags >> 3) & 0x1, - .constraint_set4_flag = (sps->constraint_set_flags >> 4) & 0x1, - .constraint_set5_flag = (sps->constraint_set_flags >> 5) & 0x1, - .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, - .mb_adaptive_frame_field_flag = sps->mb_aff, - .frame_mbs_only_flag = sps->frame_mbs_only_flag, - .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, - .separate_colour_plane_flag = sps->residual_color_transform_flag, - .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, - .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass, - .frame_cropping_flag = sps->crop, - .seq_scaling_matrix_present_flag = sps->scaling_matrix_present, - .vui_parameters_present_flag = sps->vui_parameters_present_flag, - }, - .pOffsetForRefFrame = sps->offset_for_ref_frame, - .pScalingLists = vksps_scaling, - .pSequenceParameterSetVui = vksps_vui, - }; -} - -static void set_pps(const PPS *pps, const SPS *sps, - StdVideoH264ScalingLists *vkpps_scaling, - StdVideoH264PictureParameterSet *vkpps) -{ - *vkpps_scaling = (StdVideoH264ScalingLists) { - .scaling_list_present_mask = pps->pic_scaling_matrix_present_mask, - .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ - }; - - for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) - for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++) - vkpps_scaling->ScalingList4x4[i][j] = pps->scaling_matrix4[i][ff_zigzag_scan[j]]; - - for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) - for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++) - vkpps_scaling->ScalingList8x8[i][j] = - pps->scaling_matrix8[h264_scaling_list8_order[i]][ff_zigzag_direct[j]]; - - *vkpps = (StdVideoH264PictureParameterSet) { - .seq_parameter_set_id = pps->sps_id, - .pic_parameter_set_id = pps->pps_id, - .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1, - .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1, - .weighted_bipred_idc = pps->weighted_bipred_idc, - .pic_init_qp_minus26 = pps->init_qp - 26, - .pic_init_qs_minus26 = pps->init_qs - 26, - .chroma_qp_index_offset = pps->chroma_qp_index_offset[0], - .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1], - .flags = (StdVideoH264PpsFlags) { - .transform_8x8_mode_flag = pps->transform_8x8_mode, - .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present, - .constrained_intra_pred_flag = pps->constrained_intra_pred, - .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present, - .weighted_pred_flag = pps->weighted_pred, - .bottom_field_pic_order_in_frame_present_flag = pps->pic_order_present, - .entropy_coding_mode_flag = pps->cabac, - .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, - }, - .pScalingLists = vkpps_scaling, - }; -} - -static int vk_h264_create_params(AVCodecContext *avctx, AVBufferRef **buf) -{ - int err; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - const H264Context *h = avctx->priv_data; - - /* SPS */ - StdVideoH264ScalingLists vksps_scaling[MAX_SPS_COUNT]; - StdVideoH264HrdParameters vksps_vui_header[MAX_SPS_COUNT]; - StdVideoH264SequenceParameterSetVui vksps_vui[MAX_SPS_COUNT]; - StdVideoH264SequenceParameterSet vksps[MAX_SPS_COUNT]; - - /* PPS */ - StdVideoH264ScalingLists vkpps_scaling[MAX_PPS_COUNT]; - StdVideoH264PictureParameterSet vkpps[MAX_PPS_COUNT]; - - VkVideoDecodeH264SessionParametersAddInfoKHR h264_params_info = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR, - .pStdSPSs = vksps, - .stdSPSCount = 0, - .pStdPPSs = vkpps, - .stdPPSCount = 0, - }; - VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pParametersAddInfo = &h264_params_info, - }; - VkVideoSessionParametersCreateInfoKHR session_params_create = { - .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pNext = &h264_params, - .videoSession = ctx->common.session, - .videoSessionParametersTemplate = VK_NULL_HANDLE, - }; - - /* SPS list */ - for (int i = 0; i < FF_ARRAY_ELEMS(h->ps.sps_list); i++) { - if (h->ps.sps_list[i]) { - const SPS *sps_l = h->ps.sps_list[i]; - int idx = h264_params_info.stdSPSCount; - set_sps(sps_l, &vksps_scaling[idx], &vksps_vui_header[idx], &vksps_vui[idx], &vksps[idx]); - h264_params_info.stdSPSCount++; - } - } - - /* PPS list */ - for (int i = 0; i < FF_ARRAY_ELEMS(h->ps.pps_list); i++) { - if (h->ps.pps_list[i]) { - const PPS *pps_l = h->ps.pps_list[i]; - int idx = h264_params_info.stdPPSCount; - set_pps(pps_l, pps_l->sps, &vkpps_scaling[idx], &vkpps[idx]); - h264_params_info.stdPPSCount++; - } - } - - h264_params.maxStdSPSCount = h264_params_info.stdSPSCount; - h264_params.maxStdPPSCount = h264_params_info.stdPPSCount; - - err = ff_vk_decode_create_params(buf, avctx, ctx, &session_params_create); - if (err < 0) - return err; - - av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS\n", - h264_params_info.stdSPSCount, h264_params_info.stdPPSCount); - - return 0; -} - -static int vk_h264_start_frame(AVCodecContext *avctx, - av_unused const AVBufferRef *buffer_ref, - av_unused const uint8_t *buffer, - av_unused uint32_t size) -{ - int err; - int dpb_slot_index = 0; - H264Context *h = avctx->priv_data; - - H264Picture *pic = h->cur_pic_ptr; - H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &hp->vp; - - /* Fill in main slot */ - dpb_slot_index = 0; - for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { - if (pic == &h->DPB[slot]) { - dpb_slot_index = slot; - break; - } - } - - err = vk_h264_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, - &hp->vkh264_ref, &hp->h264_ref, pic, 1, - h->DPB[dpb_slot_index].field_picture, - h->DPB[dpb_slot_index].reference, - dpb_slot_index); - if (err < 0) - return err; - - /* Fill in short-term references */ - for (int i = 0; i < h->short_ref_count; i++) { - dpb_slot_index = 0; - for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { - if (h->short_ref[i] == &h->DPB[slot]) { - dpb_slot_index = slot; - break; - } - } - err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], - &vp->refs[i], &hp->vkh264_refs[i], - &hp->h264_refs[i], h->short_ref[i], 0, - h->DPB[dpb_slot_index].field_picture, - h->DPB[dpb_slot_index].reference, - dpb_slot_index); - if (err < 0) - return err; - } - - /* Fill in long-term refs */ - for (int r = 0, i = h->short_ref_count; r < H264_MAX_DPB_FRAMES && - i < h->short_ref_count + h->long_ref_count; r++) { - if (!h->long_ref[r]) - continue; - - dpb_slot_index = 0; - for (unsigned slot = 0; slot < 16; slot++) { - if (h->long_ref[r] == &h->DPB[slot]) { - dpb_slot_index = slot; - break; - } - } - err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], - &vp->refs[i], &hp->vkh264_refs[i], - &hp->h264_refs[i], h->long_ref[r], 0, - h->DPB[dpb_slot_index].field_picture, - h->DPB[dpb_slot_index].reference, - dpb_slot_index); - if (err < 0) - return err; - i++; - } - - hp->h264pic = (StdVideoDecodeH264PictureInfo) { - .seq_parameter_set_id = pic->pps->sps_id, - .pic_parameter_set_id = pic->pps->pps_id, - .frame_num = 0, /* Set later */ - .idr_pic_id = 0, /* Set later */ - .PicOrderCnt[0] = pic->field_poc[0], - .PicOrderCnt[1] = pic->field_poc[1], - .flags = (StdVideoDecodeH264PictureInfoFlags) { - .field_pic_flag = FIELD_PICTURE(h), - .is_intra = 1, /* Set later */ - .IdrPicFlag = h->picture_idr, - .bottom_field_flag = h->picture_structure != PICT_FRAME && - h->picture_structure & PICT_BOTTOM_FIELD, - .is_reference = h->nal_ref_idc != 0, - .complementary_field_pair = h->first_field && FIELD_PICTURE(h), - }, - }; - - hp->h264_pic_info = (VkVideoDecodeH264PictureInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR, - .pStdPictureInfo = &hp->h264pic, - }; - - vp->decode_info = (VkVideoDecodeInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, - .pNext = &hp->h264_pic_info, - .flags = 0x0, - .pSetupReferenceSlot = &vp->ref_slot, - .referenceSlotCount = h->short_ref_count + h->long_ref_count, - .pReferenceSlots = vp->ref_slots, - .dstPictureResource = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .codedOffset = (VkOffset2D){ 0, 0 }, - .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, - .baseArrayLayer = 0, - .imageViewBinding = vp->view.out[0], - }, - }; - - return 0; -} - -static int vk_h264_decode_slice(AVCodecContext *avctx, - const uint8_t *data, - uint32_t size) -{ - const H264Context *h = avctx->priv_data; - const H264SliceContext *sl = &h->slice_ctx[0]; - H264VulkanDecodePicture *hp = h->cur_pic_ptr->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &hp->vp; - - int err = ff_vk_decode_add_slice(avctx, vp, data, size, 1, - &hp->h264_pic_info.sliceCount, - &hp->h264_pic_info.pSliceOffsets); - if (err < 0) - return err; - - hp->h264pic.frame_num = sl->frame_num; - hp->h264pic.idr_pic_id = sl->idr_pic_id; - - /* Frame is only intra of all slices are marked as intra */ - if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI) - hp->h264pic.flags.is_intra = 0; - - return 0; -} - -static int vk_h264_end_frame(AVCodecContext *avctx) -{ - const H264Context *h = avctx->priv_data; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - - H264Picture *pic = h->cur_pic_ptr; - H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &hp->vp; - FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 }; - AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 }; - -#ifdef VK_KHR_video_maintenance2 - StdVideoH264ScalingLists vksps_scaling; - StdVideoH264HrdParameters vksps_vui_header; - StdVideoH264SequenceParameterSetVui vksps_vui; - StdVideoH264SequenceParameterSet vksps; - StdVideoH264ScalingLists vkpps_scaling; - StdVideoH264PictureParameterSet vkpps; - VkVideoDecodeH264InlineSessionParametersInfoKHR h264_params; - - if (ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2) { - set_sps(h->ps.sps, &vksps_scaling, - &vksps_vui_header, &vksps_vui, &vksps); - set_pps(h->ps.pps, h->ps.sps, &vkpps_scaling, &vkpps); - h264_params = (VkVideoDecodeH264InlineSessionParametersInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_INLINE_SESSION_PARAMETERS_INFO_KHR, - .pStdSPS = &vksps, - .pStdPPS = &vkpps, - }; - hp->h264_pic_info.pNext = &h264_params; - } -#endif - - if (!hp->h264_pic_info.sliceCount) - return 0; - - if (!vp->slices_buf) - return AVERROR(EINVAL); - - if (!dec->session_params && - !(ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2)) { - int err = vk_h264_create_params(avctx, &dec->session_params); - if (err < 0) - return err; - - hp->h264pic.seq_parameter_set_id = pic->pps->sps_id; - hp->h264pic.pic_parameter_set_id = pic->pps->pps_id; - } - - for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { - H264Picture *rp = hp->ref_src[i]; - H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private; - - rvp[i] = &rhp->vp; - rav[i] = hp->ref_src[i]->f; - } - - av_log(avctx, AV_LOG_DEBUG, "Decoding frame, %"SIZE_SPECIFIER" bytes, %i slices\n", - vp->slices_size, hp->h264_pic_info.sliceCount); - - return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); -} - -static void vk_h264_free_frame_priv(AVRefStructOpaque _hwctx, void *data) -{ - AVHWDeviceContext *hwctx = _hwctx.nc; - H264VulkanDecodePicture *hp = data; - - /* Free frame resources, this also destroys the session parameters. */ - ff_vk_decode_free_frame(hwctx, &hp->vp); -} - -const FFHWAccel ff_h264_vulkan_hwaccel = { - .p.name = "h264_vulkan", - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_H264, - .p.pix_fmt = AV_PIX_FMT_VULKAN, - .start_frame = &vk_h264_start_frame, - .decode_slice = &vk_h264_decode_slice, - .end_frame = &vk_h264_end_frame, - .free_frame_priv = &vk_h264_free_frame_priv, - .frame_priv_data_size = sizeof(H264VulkanDecodePicture), - .init = &ff_vk_decode_init, - .update_thread_context = &ff_vk_update_thread_context, - .decode_params = &ff_vk_params_invalidate, - .flush = &ff_vk_decode_flush, - .uninit = &ff_vk_decode_uninit, - .frame_params = &ff_vk_frame_params, - .priv_data_size = sizeof(FFVulkanDecodeContext), - .caps_internal = HWACCEL_CAP_ASYNC_SAFE, -}; -- 2.49.1 From 84fd250275d641ad757fcf6a3e0a362a72f814a2 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:38:00 +0000 Subject: [PATCH 082/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_h264.c | 592 ++++++++++++++++++++++++++++++++ 1 file changed, 592 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_h264.c diff --git a/libavcodec/vulkan/vulkan_h264.c b/libavcodec/vulkan/vulkan_h264.c new file mode 100644 index 0000000000..aebfd4b738 --- /dev/null +++ b/libavcodec/vulkan/vulkan_h264.c @@ -0,0 +1,592 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/h264dec.h" +#include "libavcodec/h264_ps.h" + +#include "vulkan_decode.h" + +const FFVulkanDecodeDescriptor ff_vk_dec_h264_desc = { + .codec_id = AV_CODEC_ID_H264, + .decode_extension = FF_VK_EXT_VIDEO_DECODE_H264, + .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, + .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR, + .ext_props = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION, + }, +}; + +typedef struct H264VulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* Current picture */ + StdVideoDecodeH264ReferenceInfo h264_ref; + VkVideoDecodeH264DpbSlotInfoKHR vkh264_ref; + + /* Picture refs */ + H264Picture *ref_src [H264_MAX_PICTURE_COUNT]; + StdVideoDecodeH264ReferenceInfo h264_refs [H264_MAX_PICTURE_COUNT]; + VkVideoDecodeH264DpbSlotInfoKHR vkh264_refs[H264_MAX_PICTURE_COUNT]; + + /* Current picture (contd.) */ + StdVideoDecodeH264PictureInfo h264pic; + VkVideoDecodeH264PictureInfoKHR h264_pic_info; +} H264VulkanDecodePicture; + +const static int h264_scaling_list8_order[] = { 0, 3, 1, 4, 2, 5 }; + +static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeH264DpbSlotInfoKHR *vkh264_ref, /* Goes in ^ */ + StdVideoDecodeH264ReferenceInfo *h264_ref, /* Goes in ^ */ + H264Picture *pic, int is_current, + int is_field, int picture_structure, + int dpb_slot_index) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(dec, pic->f, vkpic, is_current, + dec->dedicated_dpb); + if (err < 0) + return err; + + *h264_ref = (StdVideoDecodeH264ReferenceInfo) { + .FrameNum = pic->long_ref ? pic->pic_id : pic->frame_num, + .PicOrderCnt = { pic->field_poc[0], pic->field_poc[1] }, + .flags = (StdVideoDecodeH264ReferenceInfoFlags) { + .top_field_flag = is_field ? !!(picture_structure & PICT_TOP_FIELD) : 0, + .bottom_field_flag = is_field ? !!(picture_structure & PICT_BOTTOM_FIELD) : 0, + .used_for_long_term_reference = pic->reference && pic->long_ref, + /* + * flags.is_non_existing is used to indicate whether the picture is marked as + * “non-existing” as defined in section 8.2.5.2 of the ITU-T H.264 Specification; + * 8.2.5.2 Decoding process for gaps in frame_num + * corresponds to the code in h264_slice.c:h264_field_start, + * which sets the invalid_gap flag when decoding. + */ + .is_non_existing = pic->invalid_gap, + }, + }; + + *vkh264_ref = (VkVideoDecodeH264DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = h264_ref, + }; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = ctx->common.layered_dpb ? dpb_slot_index : 0, + .imageViewBinding = vkpic->view.ref[0], + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkh264_ref, + .slotIndex = dpb_slot_index, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static StdVideoH264LevelIdc convert_to_vk_level_idc(int level_idc) +{ + switch (level_idc) { + case 10: return STD_VIDEO_H264_LEVEL_IDC_1_0; + case 11: return STD_VIDEO_H264_LEVEL_IDC_1_1; + case 12: return STD_VIDEO_H264_LEVEL_IDC_1_2; + case 13: return STD_VIDEO_H264_LEVEL_IDC_1_3; + case 20: return STD_VIDEO_H264_LEVEL_IDC_2_0; + case 21: return STD_VIDEO_H264_LEVEL_IDC_2_1; + case 22: return STD_VIDEO_H264_LEVEL_IDC_2_2; + case 30: return STD_VIDEO_H264_LEVEL_IDC_3_0; + case 31: return STD_VIDEO_H264_LEVEL_IDC_3_1; + case 32: return STD_VIDEO_H264_LEVEL_IDC_3_2; + case 40: return STD_VIDEO_H264_LEVEL_IDC_4_0; + case 41: return STD_VIDEO_H264_LEVEL_IDC_4_1; + case 42: return STD_VIDEO_H264_LEVEL_IDC_4_2; + case 50: return STD_VIDEO_H264_LEVEL_IDC_5_0; + case 51: return STD_VIDEO_H264_LEVEL_IDC_5_1; + case 52: return STD_VIDEO_H264_LEVEL_IDC_5_2; + case 60: return STD_VIDEO_H264_LEVEL_IDC_6_0; + case 61: return STD_VIDEO_H264_LEVEL_IDC_6_1; + default: + case 62: return STD_VIDEO_H264_LEVEL_IDC_6_2; + } +} + +static void set_sps(const SPS *sps, + StdVideoH264ScalingLists *vksps_scaling, + StdVideoH264HrdParameters *vksps_vui_header, + StdVideoH264SequenceParameterSetVui *vksps_vui, + StdVideoH264SequenceParameterSet *vksps) +{ + *vksps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = sps->scaling_matrix_present_mask, + .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ + }; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++) + vksps_scaling->ScalingList4x4[i][j] = sps->scaling_matrix4[i][ff_zigzag_scan[j]]; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) + for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++) + vksps_scaling->ScalingList8x8[i][j] = + sps->scaling_matrix8[h264_scaling_list8_order[i]][ff_zigzag_direct[j]]; + + *vksps_vui_header = (StdVideoH264HrdParameters) { + .cpb_cnt_minus1 = sps->cpb_cnt - 1, + .bit_rate_scale = sps->bit_rate_scale, + .initial_cpb_removal_delay_length_minus1 = sps->initial_cpb_removal_delay_length - 1, + .cpb_removal_delay_length_minus1 = sps->cpb_removal_delay_length - 1, + .dpb_output_delay_length_minus1 = sps->dpb_output_delay_length - 1, + .time_offset_length = sps->time_offset_length, + }; + + for (int i = 0; i < sps->cpb_cnt; i++) { + vksps_vui_header->bit_rate_value_minus1[i] = sps->bit_rate_value[i] - 1; + vksps_vui_header->cpb_size_value_minus1[i] = sps->cpb_size_value[i] - 1; + vksps_vui_header->cbr_flag[i] = (sps->cpr_flag >> i) & 0x1; + } + + *vksps_vui = (StdVideoH264SequenceParameterSetVui) { + .aspect_ratio_idc = sps->vui.aspect_ratio_idc, + .sar_width = sps->vui.sar.num, + .sar_height = sps->vui.sar.den, + .video_format = sps->vui.video_format, + .colour_primaries = sps->vui.colour_primaries, + .transfer_characteristics = sps->vui.transfer_characteristics, + .matrix_coefficients = sps->vui.matrix_coeffs, + .num_units_in_tick = sps->num_units_in_tick, + .time_scale = sps->time_scale, + .pHrdParameters = vksps_vui_header, + .max_num_reorder_frames = sps->num_reorder_frames, + .max_dec_frame_buffering = sps->max_dec_frame_buffering, + .flags = (StdVideoH264SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.video_full_range_flag, + .color_description_present_flag = sps->vui.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.chroma_location, + .timing_info_present_flag = sps->timing_info_present_flag, + .fixed_frame_rate_flag = sps->fixed_frame_rate_flag, + .bitstream_restriction_flag = sps->bitstream_restriction_flag, + .nal_hrd_parameters_present_flag = sps->nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->vcl_hrd_parameters_present_flag, + }, + }; + + *vksps = (StdVideoH264SequenceParameterSet) { + .profile_idc = sps->profile_idc, + .level_idc = convert_to_vk_level_idc(sps->level_idc), + .seq_parameter_set_id = sps->sps_id, + .chroma_format_idc = sps->chroma_format_idc, + .bit_depth_luma_minus8 = sps->bit_depth_luma - 8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, + .log2_max_frame_num_minus4 = sps->log2_max_frame_num - 4, + .pic_order_cnt_type = sps->poc_type, + .log2_max_pic_order_cnt_lsb_minus4 = sps->poc_type ? 0 : sps->log2_max_poc_lsb - 4, + .offset_for_non_ref_pic = sps->offset_for_non_ref_pic, + .offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field, + .num_ref_frames_in_pic_order_cnt_cycle = sps->poc_cycle_length, + .max_num_ref_frames = sps->ref_frame_count, + .pic_width_in_mbs_minus1 = sps->mb_width - 1, + .pic_height_in_map_units_minus1 = (sps->mb_height/(2 - sps->frame_mbs_only_flag)) - 1, + .frame_crop_left_offset = sps->crop_left, + .frame_crop_right_offset = sps->crop_right, + .frame_crop_top_offset = sps->crop_top, + .frame_crop_bottom_offset = sps->crop_bottom, + .flags = (StdVideoH264SpsFlags) { + .constraint_set0_flag = (sps->constraint_set_flags >> 0) & 0x1, + .constraint_set1_flag = (sps->constraint_set_flags >> 1) & 0x1, + .constraint_set2_flag = (sps->constraint_set_flags >> 2) & 0x1, + .constraint_set3_flag = (sps->constraint_set_flags >> 3) & 0x1, + .constraint_set4_flag = (sps->constraint_set_flags >> 4) & 0x1, + .constraint_set5_flag = (sps->constraint_set_flags >> 5) & 0x1, + .direct_8x8_inference_flag = sps->direct_8x8_inference_flag, + .mb_adaptive_frame_field_flag = sps->mb_aff, + .frame_mbs_only_flag = sps->frame_mbs_only_flag, + .delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag, + .separate_colour_plane_flag = sps->residual_color_transform_flag, + .gaps_in_frame_num_value_allowed_flag = sps->gaps_in_frame_num_allowed_flag, + .qpprime_y_zero_transform_bypass_flag = sps->transform_bypass, + .frame_cropping_flag = sps->crop, + .seq_scaling_matrix_present_flag = sps->scaling_matrix_present, + .vui_parameters_present_flag = sps->vui_parameters_present_flag, + }, + .pOffsetForRefFrame = sps->offset_for_ref_frame, + .pScalingLists = vksps_scaling, + .pSequenceParameterSetVui = vksps_vui, + }; +} + +static void set_pps(const PPS *pps, const SPS *sps, + StdVideoH264ScalingLists *vkpps_scaling, + StdVideoH264PictureParameterSet *vkpps) +{ + *vkpps_scaling = (StdVideoH264ScalingLists) { + .scaling_list_present_mask = pps->pic_scaling_matrix_present_mask, + .use_default_scaling_matrix_mask = 0, /* We already fill in the default matrix */ + }; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) + for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++) + vkpps_scaling->ScalingList4x4[i][j] = pps->scaling_matrix4[i][ff_zigzag_scan[j]]; + + for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS; i++) + for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++) + vkpps_scaling->ScalingList8x8[i][j] = + pps->scaling_matrix8[h264_scaling_list8_order[i]][ff_zigzag_direct[j]]; + + *vkpps = (StdVideoH264PictureParameterSet) { + .seq_parameter_set_id = pps->sps_id, + .pic_parameter_set_id = pps->pps_id, + .num_ref_idx_l0_default_active_minus1 = pps->ref_count[0] - 1, + .num_ref_idx_l1_default_active_minus1 = pps->ref_count[1] - 1, + .weighted_bipred_idc = pps->weighted_bipred_idc, + .pic_init_qp_minus26 = pps->init_qp - 26, + .pic_init_qs_minus26 = pps->init_qs - 26, + .chroma_qp_index_offset = pps->chroma_qp_index_offset[0], + .second_chroma_qp_index_offset = pps->chroma_qp_index_offset[1], + .flags = (StdVideoH264PpsFlags) { + .transform_8x8_mode_flag = pps->transform_8x8_mode, + .redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present, + .constrained_intra_pred_flag = pps->constrained_intra_pred, + .deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present, + .weighted_pred_flag = pps->weighted_pred, + .bottom_field_pic_order_in_frame_present_flag = pps->pic_order_present, + .entropy_coding_mode_flag = pps->cabac, + .pic_scaling_matrix_present_flag = pps->pic_scaling_matrix_present_flag, + }, + .pScalingLists = vkpps_scaling, + }; +} + +static int vk_h264_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + const H264Context *h = avctx->priv_data; + + /* SPS */ + StdVideoH264ScalingLists vksps_scaling[MAX_SPS_COUNT]; + StdVideoH264HrdParameters vksps_vui_header[MAX_SPS_COUNT]; + StdVideoH264SequenceParameterSetVui vksps_vui[MAX_SPS_COUNT]; + StdVideoH264SequenceParameterSet vksps[MAX_SPS_COUNT]; + + /* PPS */ + StdVideoH264ScalingLists vkpps_scaling[MAX_PPS_COUNT]; + StdVideoH264PictureParameterSet vkpps[MAX_PPS_COUNT]; + + VkVideoDecodeH264SessionParametersAddInfoKHR h264_params_info = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR, + .pStdSPSs = vksps, + .stdSPSCount = 0, + .pStdPPSs = vkpps, + .stdPPSCount = 0, + }; + VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pParametersAddInfo = &h264_params_info, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h264_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = VK_NULL_HANDLE, + }; + + /* SPS list */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->ps.sps_list); i++) { + if (h->ps.sps_list[i]) { + const SPS *sps_l = h->ps.sps_list[i]; + int idx = h264_params_info.stdSPSCount; + set_sps(sps_l, &vksps_scaling[idx], &vksps_vui_header[idx], &vksps_vui[idx], &vksps[idx]); + h264_params_info.stdSPSCount++; + } + } + + /* PPS list */ + for (int i = 0; i < FF_ARRAY_ELEMS(h->ps.pps_list); i++) { + if (h->ps.pps_list[i]) { + const PPS *pps_l = h->ps.pps_list[i]; + int idx = h264_params_info.stdPPSCount; + set_pps(pps_l, pps_l->sps, &vkpps_scaling[idx], &vkpps[idx]); + h264_params_info.stdPPSCount++; + } + } + + h264_params.maxStdSPSCount = h264_params_info.stdSPSCount; + h264_params.maxStdPPSCount = h264_params_info.stdPPSCount; + + err = ff_vk_decode_create_params(buf, avctx, ctx, &session_params_create); + if (err < 0) + return err; + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS\n", + h264_params_info.stdSPSCount, h264_params_info.stdPPSCount); + + return 0; +} + +static int vk_h264_start_frame(AVCodecContext *avctx, + av_unused const AVBufferRef *buffer_ref, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + int dpb_slot_index = 0; + H264Context *h = avctx->priv_data; + + H264Picture *pic = h->cur_pic_ptr; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + /* Fill in main slot */ + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (pic == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + + err = vk_h264_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &hp->vkh264_ref, &hp->h264_ref, pic, 1, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + + /* Fill in short-term references */ + for (int i = 0; i < h->short_ref_count; i++) { + dpb_slot_index = 0; + for (unsigned slot = 0; slot < H264_MAX_PICTURE_COUNT; slot++) { + if (h->short_ref[i] == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &hp->vkh264_refs[i], + &hp->h264_refs[i], h->short_ref[i], 0, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + } + + /* Fill in long-term refs */ + for (int r = 0, i = h->short_ref_count; r < H264_MAX_DPB_FRAMES && + i < h->short_ref_count + h->long_ref_count; r++) { + if (!h->long_ref[r]) + continue; + + dpb_slot_index = 0; + for (unsigned slot = 0; slot < 16; slot++) { + if (h->long_ref[r] == &h->DPB[slot]) { + dpb_slot_index = slot; + break; + } + } + err = vk_h264_fill_pict(avctx, &hp->ref_src[i], &vp->ref_slots[i], + &vp->refs[i], &hp->vkh264_refs[i], + &hp->h264_refs[i], h->long_ref[r], 0, + h->DPB[dpb_slot_index].field_picture, + h->DPB[dpb_slot_index].reference, + dpb_slot_index); + if (err < 0) + return err; + i++; + } + + hp->h264pic = (StdVideoDecodeH264PictureInfo) { + .seq_parameter_set_id = pic->pps->sps_id, + .pic_parameter_set_id = pic->pps->pps_id, + .frame_num = 0, /* Set later */ + .idr_pic_id = 0, /* Set later */ + .PicOrderCnt[0] = pic->field_poc[0], + .PicOrderCnt[1] = pic->field_poc[1], + .flags = (StdVideoDecodeH264PictureInfoFlags) { + .field_pic_flag = FIELD_PICTURE(h), + .is_intra = 1, /* Set later */ + .IdrPicFlag = h->picture_idr, + .bottom_field_flag = h->picture_structure != PICT_FRAME && + h->picture_structure & PICT_BOTTOM_FIELD, + .is_reference = h->nal_ref_idc != 0, + .complementary_field_pair = h->first_field && FIELD_PICTURE(h), + }, + }; + + hp->h264_pic_info = (VkVideoDecodeH264PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR, + .pStdPictureInfo = &hp->h264pic, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &hp->h264_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = h->short_ref_count + h->long_ref_count, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->view.out[0], + }, + }; + + return 0; +} + +static int vk_h264_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + const H264Context *h = avctx->priv_data; + const H264SliceContext *sl = &h->slice_ctx[0]; + H264VulkanDecodePicture *hp = h->cur_pic_ptr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 1, + &hp->h264_pic_info.sliceCount, + &hp->h264_pic_info.pSliceOffsets); + if (err < 0) + return err; + + hp->h264pic.frame_num = sl->frame_num; + hp->h264pic.idr_pic_id = sl->idr_pic_id; + + /* Frame is only intra of all slices are marked as intra */ + if (sl->slice_type != AV_PICTURE_TYPE_I && sl->slice_type != AV_PICTURE_TYPE_SI) + hp->h264pic.flags.is_intra = 0; + + return 0; +} + +static int vk_h264_end_frame(AVCodecContext *avctx) +{ + const H264Context *h = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + H264Picture *pic = h->cur_pic_ptr; + H264VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + FFVulkanDecodePicture *rvp[H264_MAX_PICTURE_COUNT] = { 0 }; + AVFrame *rav[H264_MAX_PICTURE_COUNT] = { 0 }; + +#ifdef VK_KHR_video_maintenance2 + StdVideoH264ScalingLists vksps_scaling; + StdVideoH264HrdParameters vksps_vui_header; + StdVideoH264SequenceParameterSetVui vksps_vui; + StdVideoH264SequenceParameterSet vksps; + StdVideoH264ScalingLists vkpps_scaling; + StdVideoH264PictureParameterSet vkpps; + VkVideoDecodeH264InlineSessionParametersInfoKHR h264_params; + + if (ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2) { + set_sps(h->ps.sps, &vksps_scaling, + &vksps_vui_header, &vksps_vui, &vksps); + set_pps(h->ps.pps, h->ps.sps, &vkpps_scaling, &vkpps); + h264_params = (VkVideoDecodeH264InlineSessionParametersInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_INLINE_SESSION_PARAMETERS_INFO_KHR, + .pStdSPS = &vksps, + .pStdPPS = &vkpps, + }; + hp->h264_pic_info.pNext = &h264_params; + } +#endif + + if (!hp->h264_pic_info.sliceCount) + return 0; + + if (!vp->slices_buf) + return AVERROR(EINVAL); + + if (!dec->session_params && + !(ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2)) { + int err = vk_h264_create_params(avctx, &dec->session_params); + if (err < 0) + return err; + + hp->h264pic.seq_parameter_set_id = pic->pps->sps_id; + hp->h264pic.pic_parameter_set_id = pic->pps->pps_id; + } + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + H264Picture *rp = hp->ref_src[i]; + H264VulkanDecodePicture *rhp = rp->hwaccel_picture_private; + + rvp[i] = &rhp->vp; + rav[i] = hp->ref_src[i]->f; + } + + av_log(avctx, AV_LOG_DEBUG, "Decoding frame, %"SIZE_SPECIFIER" bytes, %i slices\n", + vp->slices_size, hp->h264_pic_info.sliceCount); + + return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); +} + +static void vk_h264_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *hwctx = _hwctx.nc; + H264VulkanDecodePicture *hp = data; + + /* Free frame resources, this also destroys the session parameters. */ + ff_vk_decode_free_frame(hwctx, &hp->vp); +} + +const FFHWAccel ff_h264_vulkan_hwaccel = { + .p.name = "h264_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_H264, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_h264_start_frame, + .decode_slice = &vk_h264_decode_slice, + .end_frame = &vk_h264_end_frame, + .free_frame_priv = &vk_h264_free_frame_priv, + .frame_priv_data_size = sizeof(H264VulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +}; -- 2.49.1 From 78b6ad6ce91af3ba55cb81a538b4fbc4c8dce703 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:38:26 +0000 Subject: [PATCH 083/118] Changing vulkan file directory --- libavcodec/vulkan_hevc.c | 957 --------------------------------------- 1 file changed, 957 deletions(-) delete mode 100644 libavcodec/vulkan_hevc.c diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c deleted file mode 100644 index 5e15c6b931..0000000000 --- a/libavcodec/vulkan_hevc.c +++ /dev/null @@ -1,957 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/mem.h" -#include "hevc/hevcdec.h" -#include "hevc/data.h" -#include "hevc/ps.h" - -#include "vulkan_decode.h" - -const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc = { - .codec_id = AV_CODEC_ID_HEVC, - .decode_extension = FF_VK_EXT_VIDEO_DECODE_H265, - .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, - .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR, - .ext_props = { - .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, - .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION, - }, -}; - -typedef struct HEVCHeaderSPS { - StdVideoH265ScalingLists scaling; - StdVideoH265HrdParameters vui_header; - StdVideoH265SequenceParameterSetVui vui; - StdVideoH265ProfileTierLevel ptl; - StdVideoH265DecPicBufMgr dpbm; - StdVideoH265PredictorPaletteEntries pal; - StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; - StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; - StdVideoH265ShortTermRefPicSet str[HEVC_MAX_SHORT_TERM_REF_PIC_SETS]; - StdVideoH265LongTermRefPicsSps ltr; -} HEVCHeaderSPS; - -typedef struct HEVCHeaderPPS { - StdVideoH265ScalingLists scaling; - StdVideoH265PredictorPaletteEntries pal; -} HEVCHeaderPPS; - -typedef struct HEVCHeaderVPSSet { - StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; - StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; -} HEVCHeaderVPSSet; - -typedef struct HEVCHeaderVPS { - StdVideoH265ProfileTierLevel ptl; - StdVideoH265DecPicBufMgr dpbm; - StdVideoH265HrdParameters hdr[HEVC_MAX_LAYER_SETS]; - HEVCHeaderVPSSet *sls; -} HEVCHeaderVPS; - -typedef struct HEVCHeaderSet { - StdVideoH265SequenceParameterSet sps[HEVC_MAX_SPS_COUNT]; - HEVCHeaderSPS hsps[HEVC_MAX_SPS_COUNT]; - - StdVideoH265PictureParameterSet pps[HEVC_MAX_PPS_COUNT]; - HEVCHeaderPPS hpps[HEVC_MAX_PPS_COUNT]; - - StdVideoH265VideoParameterSet vps[HEVC_MAX_PPS_COUNT]; - HEVCHeaderVPS *hvps; -} HEVCHeaderSet; - -static int alloc_hevc_header_structs(FFVulkanDecodeContext *s, - int nb_vps, - const int vps_list_idx[HEVC_MAX_VPS_COUNT], - const HEVCVPS * const vps_list[HEVC_MAX_VPS_COUNT]) -{ - uint8_t *data_ptr; - HEVCHeaderSet *hdr; - - size_t buf_size = sizeof(HEVCHeaderSet) + nb_vps*sizeof(HEVCHeaderVPS); - for (int i = 0; i < nb_vps; i++) { - const HEVCVPS *vps = vps_list[vps_list_idx[i]]; - buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; - } - - if (buf_size > s->hevc_headers_size) { - av_freep(&s->hevc_headers); - s->hevc_headers_size = 0; - s->hevc_headers = av_mallocz(buf_size); - if (!s->hevc_headers) - return AVERROR(ENOMEM); - s->hevc_headers_size = buf_size; - } - - /* Setup struct pointers */ - hdr = s->hevc_headers; - data_ptr = (uint8_t *)hdr; - hdr->hvps = (HEVCHeaderVPS *)(data_ptr + sizeof(HEVCHeaderSet)); - data_ptr += sizeof(HEVCHeaderSet) + nb_vps*sizeof(HEVCHeaderVPS); - for (int i = 0; i < nb_vps; i++) { - const HEVCVPS *vps = vps_list[vps_list_idx[i]]; - hdr->hvps[i].sls = (HEVCHeaderVPSSet *)data_ptr; - data_ptr += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; - } - - return 0; -} - -typedef struct HEVCVulkanDecodePicture { - FFVulkanDecodePicture vp; - - /* Current picture */ - StdVideoDecodeH265ReferenceInfo h265_ref; - VkVideoDecodeH265DpbSlotInfoKHR vkh265_ref; - - /* Picture refs */ - HEVCFrame *ref_src [HEVC_MAX_REFS]; - StdVideoDecodeH265ReferenceInfo h265_refs [HEVC_MAX_REFS]; - VkVideoDecodeH265DpbSlotInfoKHR vkh265_refs[HEVC_MAX_REFS]; - - /* Current picture (contd.) */ - StdVideoDecodeH265PictureInfo h265pic; - VkVideoDecodeH265PictureInfoKHR h265_pic_info; -} HEVCVulkanDecodePicture; - -static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src, - VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ - VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ - VkVideoDecodeH265DpbSlotInfoKHR *vkh265_ref, /* Goes in ^ */ - StdVideoDecodeH265ReferenceInfo *h265_ref, /* Goes in ^ */ - HEVCFrame *pic, int is_current, int pic_id) -{ - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vkpic = &hp->vp; - - int err = ff_vk_decode_prepare_frame(dec, pic->f, vkpic, is_current, - dec->dedicated_dpb); - if (err < 0) - return err; - - *h265_ref = (StdVideoDecodeH265ReferenceInfo) { - .flags = (StdVideoDecodeH265ReferenceInfoFlags) { - .used_for_long_term_reference = pic->flags & HEVC_FRAME_FLAG_LONG_REF, - .unused_for_reference = 0, - }, - .PicOrderCntVal = pic->poc, - }; - - *vkh265_ref = (VkVideoDecodeH265DpbSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR, - .pStdReferenceInfo = h265_ref, - }; - - *ref = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .codedOffset = (VkOffset2D){ 0, 0 }, - .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, - .baseArrayLayer = ctx->common.layered_dpb ? pic_id : 0, - .imageViewBinding = vkpic->view.ref[0], - }; - - *ref_slot = (VkVideoReferenceSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, - .pNext = vkh265_ref, - .slotIndex = pic_id, - .pPictureResource = ref, - }; - - if (ref_src) - *ref_src = pic; - - return 0; -} - -static void copy_scaling_list(const ScalingList *sl, StdVideoH265ScalingLists *vksl) -{ - for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) { - for (int j = 0; j < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS; j++) { - uint8_t pos = 4 * ff_hevc_diag_scan4x4_y[j] + ff_hevc_diag_scan4x4_x[j]; - vksl->ScalingList4x4[i][j] = sl->sl[0][i][pos]; - } - } - - for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++) { - for (int j = 0; j < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS; j++) { - uint8_t pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j]; - vksl->ScalingList8x8[i][j] = sl->sl[1][i][pos]; - } - } - - for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++) { - for (int j = 0; j < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS; j++) { - uint8_t pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j]; - vksl->ScalingList16x16[i][j] = sl->sl[2][i][pos]; - } - } - - for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) { - for (int j = 0; j < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS; j++) { - uint8_t pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j]; - vksl->ScalingList32x32[i][j] = sl->sl[3][i * 3][pos]; - } - } - - memcpy(vksl->ScalingListDCCoef16x16, sl->sl_dc[0], - STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * sizeof(*vksl->ScalingListDCCoef16x16)); - - for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) - vksl->ScalingListDCCoef32x32[i] = sl->sl_dc[1][i * 3]; -} - -static void set_sps(const HEVCSPS *sps, int sps_idx, - StdVideoH265ScalingLists *vksps_scaling, - StdVideoH265HrdParameters *vksps_vui_header, - StdVideoH265SequenceParameterSetVui *vksps_vui, - StdVideoH265SequenceParameterSet *vksps, - StdVideoH265SubLayerHrdParameters *slhdrnal, - StdVideoH265SubLayerHrdParameters *slhdrvcl, - StdVideoH265ProfileTierLevel *ptl, - StdVideoH265DecPicBufMgr *dpbm, - StdVideoH265PredictorPaletteEntries *pal, - StdVideoH265ShortTermRefPicSet *str, - StdVideoH265LongTermRefPicsSps *ltr) -{ - copy_scaling_list(&sps->scaling_list, vksps_scaling); - - *vksps_vui_header = (StdVideoH265HrdParameters) { - .flags = (StdVideoH265HrdFlags) { - .nal_hrd_parameters_present_flag = sps->hdr.nal_hrd_parameters_present_flag, - .vcl_hrd_parameters_present_flag = sps->hdr.vcl_hrd_parameters_present_flag, - .sub_pic_hrd_params_present_flag = sps->hdr.sub_pic_hrd_params_present_flag, - .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->hdr.sub_pic_cpb_params_in_pic_timing_sei_flag, - .fixed_pic_rate_general_flag = sps->hdr.flags.fixed_pic_rate_general_flag, - .fixed_pic_rate_within_cvs_flag = sps->hdr.flags.fixed_pic_rate_within_cvs_flag, - .low_delay_hrd_flag = sps->hdr.flags.low_delay_hrd_flag, - }, - .tick_divisor_minus2 = sps->hdr.tick_divisor_minus2, - .du_cpb_removal_delay_increment_length_minus1 = sps->hdr.du_cpb_removal_delay_increment_length_minus1, - .dpb_output_delay_du_length_minus1 = sps->hdr.dpb_output_delay_du_length_minus1, - .bit_rate_scale = sps->hdr.bit_rate_scale, - .cpb_size_scale = sps->hdr.cpb_size_scale, - .cpb_size_du_scale = sps->hdr.cpb_size_du_scale, - .initial_cpb_removal_delay_length_minus1 = sps->hdr.initial_cpb_removal_delay_length_minus1, - .au_cpb_removal_delay_length_minus1 = sps->hdr.au_cpb_removal_delay_length_minus1, - .dpb_output_delay_length_minus1 = sps->hdr.dpb_output_delay_length_minus1, - /* Reserved - 3*16 bits */ - .pSubLayerHrdParametersNal = slhdrnal, - .pSubLayerHrdParametersVcl = slhdrvcl, - }; - - memcpy(vksps_vui_header->cpb_cnt_minus1, sps->hdr.cpb_cnt_minus1, - STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->cpb_cnt_minus1)); - memcpy(vksps_vui_header->elemental_duration_in_tc_minus1, sps->hdr.elemental_duration_in_tc_minus1, - STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->elemental_duration_in_tc_minus1)); - - memcpy(slhdrnal, sps->hdr.nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrnal)); - memcpy(slhdrvcl, sps->hdr.vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrvcl)); - - *vksps_vui = (StdVideoH265SequenceParameterSetVui) { - .flags = (StdVideoH265SpsVuiFlags) { - .aspect_ratio_info_present_flag = sps->vui.common.aspect_ratio_info_present_flag, - .overscan_info_present_flag = sps->vui.common.overscan_info_present_flag, - .overscan_appropriate_flag = sps->vui.common.overscan_appropriate_flag, - .video_signal_type_present_flag = sps->vui.common.video_signal_type_present_flag, - .video_full_range_flag = sps->vui.common.video_full_range_flag, - .colour_description_present_flag = sps->vui.common.colour_description_present_flag, - .chroma_loc_info_present_flag = sps->vui.common.chroma_loc_info_present_flag, - .neutral_chroma_indication_flag = sps->vui.neutra_chroma_indication_flag, - .field_seq_flag = sps->vui.field_seq_flag, - .frame_field_info_present_flag = sps->vui.frame_field_info_present_flag, - .default_display_window_flag = sps->vui.default_display_window_flag, - .vui_timing_info_present_flag = sps->vui.vui_timing_info_present_flag, - .vui_poc_proportional_to_timing_flag = sps->vui.vui_poc_proportional_to_timing_flag, - .vui_hrd_parameters_present_flag = sps->vui.vui_hrd_parameters_present_flag, - .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, - .tiles_fixed_structure_flag = sps->vui.tiles_fixed_structure_flag, - .motion_vectors_over_pic_boundaries_flag = sps->vui.motion_vectors_over_pic_boundaries_flag, - .restricted_ref_pic_lists_flag = sps->vui.restricted_ref_pic_lists_flag, - }, - .aspect_ratio_idc = sps->vui.common.aspect_ratio_idc, - .sar_width = sps->vui.common.sar.num, - .sar_height = sps->vui.common.sar.den, - .video_format = sps->vui.common.video_format, - .colour_primaries = sps->vui.common.colour_primaries, - .transfer_characteristics = sps->vui.common.transfer_characteristics, - .matrix_coeffs = sps->vui.common.matrix_coeffs, - .chroma_sample_loc_type_top_field = sps->vui.common.chroma_sample_loc_type_top_field, - .chroma_sample_loc_type_bottom_field = sps->vui.common.chroma_sample_loc_type_bottom_field, - /* Reserved */ - /* Reserved */ - .def_disp_win_left_offset = sps->vui.def_disp_win.left_offset, - .def_disp_win_right_offset = sps->vui.def_disp_win.right_offset, - .def_disp_win_top_offset = sps->vui.def_disp_win.top_offset, - .def_disp_win_bottom_offset = sps->vui.def_disp_win.bottom_offset, - .vui_num_units_in_tick = sps->vui.vui_num_units_in_tick, - .vui_time_scale = sps->vui.vui_time_scale, - .vui_num_ticks_poc_diff_one_minus1 = sps->vui.vui_num_ticks_poc_diff_one_minus1, - .min_spatial_segmentation_idc = sps->vui.min_spatial_segmentation_idc, - .max_bytes_per_pic_denom = sps->vui.max_bytes_per_pic_denom, - .max_bits_per_min_cu_denom = sps->vui.max_bits_per_min_cu_denom, - .log2_max_mv_length_horizontal = sps->vui.log2_max_mv_length_horizontal, - .log2_max_mv_length_vertical = sps->vui.log2_max_mv_length_vertical, - .pHrdParameters = vksps_vui_header, - }; - - *ptl = (StdVideoH265ProfileTierLevel) { - .flags = (StdVideoH265ProfileTierLevelFlags) { - .general_tier_flag = sps->ptl.general_ptl.tier_flag, - .general_progressive_source_flag = sps->ptl.general_ptl.progressive_source_flag, - .general_interlaced_source_flag = sps->ptl.general_ptl.interlaced_source_flag, - .general_non_packed_constraint_flag = sps->ptl.general_ptl.non_packed_constraint_flag, - .general_frame_only_constraint_flag = sps->ptl.general_ptl.frame_only_constraint_flag, - }, - .general_profile_idc = sps->ptl.general_ptl.profile_idc, - .general_level_idc = ff_vk_h265_level_to_vk(sps->ptl.general_ptl.level_idc), - }; - - for (int i = 0; i < sps->max_sub_layers; i++) { - dpbm->max_latency_increase_plus1[i] = sps->temporal_layer[i].max_latency_increase + 1; - dpbm->max_dec_pic_buffering_minus1[i] = sps->temporal_layer[i].max_dec_pic_buffering - 1; - dpbm->max_num_reorder_pics[i] = sps->temporal_layer[i].num_reorder_pics; - } - - for (int i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) - for (int j = 0; j < sps->sps_num_palette_predictor_initializers; j++) - pal->PredictorPaletteEntries[i][j] = sps->sps_palette_predictor_initializer[i][j]; - - for (int i = 0; i < sps->nb_st_rps; i++) { - const ShortTermRPS *st_rps = &sps->st_rps[i]; - - str[i] = (StdVideoH265ShortTermRefPicSet) { - .flags = (StdVideoH265ShortTermRefPicSetFlags) { - .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict, - .delta_rps_sign = sps->st_rps[i].delta_rps_sign, - }, - .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1, - .use_delta_flag = sps->st_rps[i].use_delta, - .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1, - .used_by_curr_pic_flag = 0x0, - .used_by_curr_pic_s0_flag = 0x0, - .used_by_curr_pic_s1_flag = 0x0, - /* Reserved */ - /* Reserved */ - /* Reserved */ - .num_negative_pics = sps->st_rps[i].num_negative_pics, - .num_positive_pics = sps->st_rps[i].num_delta_pocs - sps->st_rps[i].num_negative_pics, - }; - - /* NOTE: This is the predicted, and *reordered* version. - * Probably incorrect, but the spec doesn't say which version to use. */ - str[i].used_by_curr_pic_flag = st_rps->used; - str[i].used_by_curr_pic_s0_flag = av_zero_extend(st_rps->used, str[i].num_negative_pics); - str[i].used_by_curr_pic_s1_flag = st_rps->used >> str[i].num_negative_pics; - - for (int j = 0; j < str[i].num_negative_pics; j++) - str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; - - for (int j = 0; j < str[i].num_positive_pics; j++) - str[i].delta_poc_s1_minus1[j] = st_rps->delta_poc[st_rps->num_negative_pics + j] - - (j ? st_rps->delta_poc[st_rps->num_negative_pics + j - 1] : 0) - 1; - } - - *ltr = (StdVideoH265LongTermRefPicsSps) { - .used_by_curr_pic_lt_sps_flag = sps->used_by_curr_pic_lt, - }; - - for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) { - ltr->lt_ref_pic_poc_lsb_sps[i] = sps->lt_ref_pic_poc_lsb_sps[i]; - } - - *vksps = (StdVideoH265SequenceParameterSet) { - .flags = (StdVideoH265SpsFlags) { - .sps_temporal_id_nesting_flag = sps->temporal_id_nesting, - .separate_colour_plane_flag = sps->separate_colour_plane, - .conformance_window_flag = sps->conformance_window, - .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info, - .scaling_list_enabled_flag = sps->scaling_list_enabled, - .sps_scaling_list_data_present_flag = sps->scaling_list_enabled, - .amp_enabled_flag = sps->amp_enabled, - .sample_adaptive_offset_enabled_flag = sps->sao_enabled, - .pcm_enabled_flag = sps->pcm_enabled, - .pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled, - .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present, - .sps_temporal_mvp_enabled_flag = sps->temporal_mvp_enabled, - .strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled, - .vui_parameters_present_flag = sps->vui_present, - .sps_extension_present_flag = sps->extension_present, - .sps_range_extension_flag = sps->range_extension, - .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled, - .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled, - .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled, - .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled, - .extended_precision_processing_flag = sps->extended_precision_processing, - .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled, - .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled, - .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled, - .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled, - .sps_scc_extension_flag = sps->scc_extension, - .sps_curr_pic_ref_enabled_flag = sps->curr_pic_ref_enabled, - .palette_mode_enabled_flag = sps->palette_mode_enabled, - .sps_palette_predictor_initializers_present_flag = sps->palette_predictor_initializers_present, - .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disabled, - }, - .chroma_format_idc = sps->chroma_format_idc, - .pic_width_in_luma_samples = sps->width, - .pic_height_in_luma_samples = sps->height, - .sps_video_parameter_set_id = sps->vps_id, - .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, - .sps_seq_parameter_set_id = sps_idx, - .bit_depth_luma_minus8 = sps->bit_depth - 8, - .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, - .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, - .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, - .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, - .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, - .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_transform_block_size, - .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, - .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, - .num_short_term_ref_pic_sets = sps->nb_st_rps, - .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, - .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, - .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, - .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, - .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, - /* Reserved */ - /* Reserved */ - .palette_max_size = sps->palette_max_size, - .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size, - .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc, - .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializers - 1, - .conf_win_left_offset = sps->pic_conf_win.left_offset, - .conf_win_right_offset = sps->pic_conf_win.right_offset, - .conf_win_top_offset = sps->pic_conf_win.top_offset, - .conf_win_bottom_offset = sps->pic_conf_win.bottom_offset, - .pProfileTierLevel = ptl, - .pDecPicBufMgr = dpbm, - .pScalingLists = vksps_scaling, - .pShortTermRefPicSet = str, - .pLongTermRefPicsSps = ltr, - .pSequenceParameterSetVui = vksps_vui, - .pPredictorPaletteEntries = pal, - }; -} - -static void set_pps(const HEVCPPS *pps, const HEVCSPS *sps, - StdVideoH265ScalingLists *vkpps_scaling, - StdVideoH265PictureParameterSet *vkpps, - StdVideoH265PredictorPaletteEntries *pal) -{ - copy_scaling_list(&pps->scaling_list, vkpps_scaling); - - *vkpps = (StdVideoH265PictureParameterSet) { - .flags = (StdVideoH265PpsFlags) { - .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, - .output_flag_present_flag = pps->output_flag_present_flag, - .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag, - .cabac_init_present_flag = pps->cabac_init_present_flag, - .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, - .transform_skip_enabled_flag = pps->transform_skip_enabled_flag, - .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag, - .pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag, - .weighted_pred_flag = pps->weighted_pred_flag, - .weighted_bipred_flag = pps->weighted_bipred_flag, - .transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag, - .tiles_enabled_flag = pps->tiles_enabled_flag, - .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag, - .uniform_spacing_flag = pps->uniform_spacing_flag, - .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag, - .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag, - .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, - .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag, - .pps_deblocking_filter_disabled_flag = pps->disable_dbf, - .pps_scaling_list_data_present_flag = pps->scaling_list_data_present_flag, - .lists_modification_present_flag = pps->lists_modification_present_flag, - .slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag, - .pps_extension_present_flag = pps->pps_extension_present_flag, - .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag, - .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag, - .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag, - .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag, - .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag, - .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializers_present_flag, - .monochrome_palette_flag = pps->monochrome_palette_flag, - .pps_range_extension_flag = pps->pps_range_extensions_flag, - }, - .pps_pic_parameter_set_id = pps->pps_id, - .pps_seq_parameter_set_id = pps->sps_id, - .sps_video_parameter_set_id = sps->vps_id, - .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, - .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1, - .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1, - .init_qp_minus26 = pps->pic_init_qp_minus26, - .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, - .pps_cb_qp_offset = pps->cb_qp_offset, - .pps_cr_qp_offset = pps->cr_qp_offset, - .pps_beta_offset_div2 = pps->beta_offset >> 1, - .pps_tc_offset_div2 = pps->tc_offset >> 1, - .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, - .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2, - .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth, - .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1, - .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma, - .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma, - .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset + 5, - .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset + 5, - .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset + 3, - .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializers, - .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry - 8, - .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry - 8, - .num_tile_columns_minus1 = pps->num_tile_columns - 1, - .num_tile_rows_minus1 = pps->num_tile_rows - 1, - .pScalingLists = vkpps_scaling, - .pPredictorPaletteEntries = pal, - }; - - for (int i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) { - for (int j = 0; j < pps->pps_num_palette_predictor_initializers; j++) - pal->PredictorPaletteEntries[i][j] = pps->pps_palette_predictor_initializer[i][j]; - } - - for (int i = 0; i < pps->num_tile_columns - 1; i++) - vkpps->column_width_minus1[i] = pps->column_width[i] - 1; - - for (int i = 0; i < pps->num_tile_rows - 1; i++) - vkpps->row_height_minus1[i] = pps->row_height[i] - 1; - - for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) { - vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i]; - vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i]; - } -} - -static void set_vps(const HEVCVPS *vps, - StdVideoH265VideoParameterSet *vkvps, - StdVideoH265ProfileTierLevel *ptl, - StdVideoH265DecPicBufMgr *dpbm, - StdVideoH265HrdParameters *sls_hdr, - HEVCHeaderVPSSet sls[]) -{ - for (int i = 0; i < vps->vps_num_hrd_parameters; i++) { - const HEVCHdrParams *src = &vps->hdr[i]; - - sls_hdr[i] = (StdVideoH265HrdParameters) { - .flags = (StdVideoH265HrdFlags) { - .nal_hrd_parameters_present_flag = src->nal_hrd_parameters_present_flag, - .vcl_hrd_parameters_present_flag = src->vcl_hrd_parameters_present_flag, - .sub_pic_hrd_params_present_flag = src->sub_pic_hrd_params_present_flag, - .sub_pic_cpb_params_in_pic_timing_sei_flag = src->sub_pic_cpb_params_in_pic_timing_sei_flag, - .fixed_pic_rate_general_flag = src->flags.fixed_pic_rate_general_flag, - .fixed_pic_rate_within_cvs_flag = src->flags.fixed_pic_rate_within_cvs_flag, - .low_delay_hrd_flag = src->flags.low_delay_hrd_flag, - }, - .tick_divisor_minus2 = src->tick_divisor_minus2, - .du_cpb_removal_delay_increment_length_minus1 = src->du_cpb_removal_delay_increment_length_minus1, - .dpb_output_delay_du_length_minus1 = src->dpb_output_delay_du_length_minus1, - .bit_rate_scale = src->bit_rate_scale, - .cpb_size_scale = src->cpb_size_scale, - .cpb_size_du_scale = src->cpb_size_du_scale, - .initial_cpb_removal_delay_length_minus1 = src->initial_cpb_removal_delay_length_minus1, - .au_cpb_removal_delay_length_minus1 = src->au_cpb_removal_delay_length_minus1, - .dpb_output_delay_length_minus1 = src->dpb_output_delay_length_minus1, - /* Reserved - 3*16 bits */ - .pSubLayerHrdParametersNal = sls[i].nal_hdr, - .pSubLayerHrdParametersVcl = sls[i].vcl_hdr, - }; - - memcpy(sls_hdr[i].cpb_cnt_minus1, src->cpb_cnt_minus1, - STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].cpb_cnt_minus1)); - memcpy(sls_hdr[i].elemental_duration_in_tc_minus1, src->elemental_duration_in_tc_minus1, - STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].elemental_duration_in_tc_minus1)); - - memcpy(sls[i].nal_hdr, src->nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].nal_hdr)); - memcpy(sls[i].vcl_hdr, src->vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].vcl_hdr)); - } - - *ptl = (StdVideoH265ProfileTierLevel) { - .flags = (StdVideoH265ProfileTierLevelFlags) { - .general_tier_flag = vps->ptl.general_ptl.tier_flag, - .general_progressive_source_flag = vps->ptl.general_ptl.progressive_source_flag, - .general_interlaced_source_flag = vps->ptl.general_ptl.interlaced_source_flag, - .general_non_packed_constraint_flag = vps->ptl.general_ptl.non_packed_constraint_flag, - .general_frame_only_constraint_flag = vps->ptl.general_ptl.frame_only_constraint_flag, - }, - .general_profile_idc = ff_vk_h265_profile_to_vk(vps->ptl.general_ptl.profile_idc), - .general_level_idc = ff_vk_h265_level_to_vk(vps->ptl.general_ptl.level_idc), - }; - - for (int i = 0; i < vps->vps_max_sub_layers; i++) { - dpbm->max_latency_increase_plus1[i] = vps->vps_max_latency_increase[i] + 1; - dpbm->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering[i] - 1; - dpbm->max_num_reorder_pics[i] = vps->vps_num_reorder_pics[i]; - } - - *vkvps = (StdVideoH265VideoParameterSet) { - .flags = (StdVideoH265VpsFlags) { - .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag, - .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag, - .vps_timing_info_present_flag = vps->vps_timing_info_present_flag, - .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag, - }, - .vps_video_parameter_set_id = vps->vps_id, - .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers - 1, - /* Reserved */ - /* Reserved */ - .vps_num_units_in_tick = vps->vps_num_units_in_tick, - .vps_time_scale = vps->vps_time_scale, - .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one - 1, - /* Reserved */ - .pDecPicBufMgr = dpbm, - .pHrdParameters = sls_hdr, - .pProfileTierLevel = ptl, - }; -} - -static int vk_hevc_create_params(AVCodecContext *avctx, AVBufferRef **buf) -{ - int err; - const HEVCContext *h = avctx->priv_data; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - - VkVideoDecodeH265SessionParametersAddInfoKHR h265_params_info = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR, - .stdSPSCount = 0, - .stdPPSCount = 0, - .stdVPSCount = 0, - }; - VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pParametersAddInfo = &h265_params_info, - }; - VkVideoSessionParametersCreateInfoKHR session_params_create = { - .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, - .pNext = &h265_params, - .videoSession = ctx->common.session, - .videoSessionParametersTemplate = VK_NULL_HANDLE, - }; - - HEVCHeaderSet *hdr; - int nb_vps = 0; - int vps_list_idx[HEVC_MAX_VPS_COUNT]; - - for (int i = 0; i < HEVC_MAX_VPS_COUNT; i++) - if (h->ps.vps_list[i]) - vps_list_idx[nb_vps++] = i; - - err = alloc_hevc_header_structs(dec, nb_vps, vps_list_idx, h->ps.vps_list); - if (err < 0) - return err; - - hdr = dec->hevc_headers; - - h265_params_info.pStdSPSs = hdr->sps; - h265_params_info.pStdPPSs = hdr->pps; - h265_params_info.pStdVPSs = hdr->vps; - - /* SPS list */ - for (int i = 0; i < HEVC_MAX_SPS_COUNT; i++) { - if (h->ps.sps_list[i]) { - const HEVCSPS *sps_l = h->ps.sps_list[i]; - int idx = h265_params_info.stdSPSCount++; - set_sps(sps_l, i, &hdr->hsps[idx].scaling, &hdr->hsps[idx].vui_header, - &hdr->hsps[idx].vui, &hdr->sps[idx], hdr->hsps[idx].nal_hdr, - hdr->hsps[idx].vcl_hdr, &hdr->hsps[idx].ptl, &hdr->hsps[idx].dpbm, - &hdr->hsps[idx].pal, hdr->hsps[idx].str, &hdr->hsps[idx].ltr); - } - } - - /* PPS list */ - for (int i = 0; i < HEVC_MAX_PPS_COUNT; i++) { - if (h->ps.pps_list[i]) { - const HEVCPPS *pps_l = h->ps.pps_list[i]; - const HEVCSPS *sps_l = h->ps.sps_list[pps_l->sps_id]; - int idx = h265_params_info.stdPPSCount++; - set_pps(pps_l, sps_l, &hdr->hpps[idx].scaling, - &hdr->pps[idx], &hdr->hpps[idx].pal); - } - } - - /* VPS list */ - for (int i = 0; i < nb_vps; i++) { - const HEVCVPS *vps_l = h->ps.vps_list[vps_list_idx[i]]; - set_vps(vps_l, &hdr->vps[i], &hdr->hvps[i].ptl, &hdr->hvps[i].dpbm, - hdr->hvps[i].hdr, hdr->hvps[i].sls); - h265_params_info.stdVPSCount++; - } - - h265_params.maxStdSPSCount = h265_params_info.stdSPSCount; - h265_params.maxStdPPSCount = h265_params_info.stdPPSCount; - h265_params.maxStdVPSCount = h265_params_info.stdVPSCount; - - err = ff_vk_decode_create_params(buf, avctx, ctx, &session_params_create); - if (err < 0) - return err; - - av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS %i VPS\n", - h265_params_info.stdSPSCount, h265_params_info.stdPPSCount, - h265_params_info.stdVPSCount); - - return 0; -} - -static int vk_hevc_start_frame(AVCodecContext *avctx, - av_unused const AVBufferRef *buffer_ref, - av_unused const uint8_t *buffer, - av_unused uint32_t size) -{ - int err; - HEVCContext *h = avctx->priv_data; - HEVCLayerContext *l = &h->layers[h->cur_layer]; - - HEVCFrame *pic = h->cur_frame; - HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &hp->vp; - const HEVCPPS *pps = h->pps; - const HEVCSPS *sps = pps->sps; - int nb_refs = 0; - - hp->h265pic = (StdVideoDecodeH265PictureInfo) { - .flags = (StdVideoDecodeH265PictureInfoFlags) { - .IrapPicFlag = IS_IRAP(h), - .IdrPicFlag = IS_IDR(h), - .IsReference = h->nal_unit_type < 16 ? h->nal_unit_type & 1 : 1, - .short_term_ref_pic_set_sps_flag = h->sh.short_term_ref_pic_set_sps_flag, - }, - .sps_video_parameter_set_id = sps->vps_id, - .pps_seq_parameter_set_id = pps->sps_id, - .pps_pic_parameter_set_id = pps->pps_id, - .NumDeltaPocsOfRefRpsIdx = h->sh.short_term_rps ? h->sh.short_term_rps->rps_idx_num_delta_pocs : 0, - .PicOrderCntVal = h->poc, - .NumBitsForSTRefPicSetInSlice = !h->sh.short_term_ref_pic_set_sps_flag ? - h->sh.short_term_ref_pic_set_size : 0, - }; - - /* Fill in references */ - for (int i = 0; i < FF_ARRAY_ELEMS(l->DPB); i++) { - const HEVCFrame *ref = &l->DPB[i]; - int idx = nb_refs; - - if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF))) - continue; - - if (ref == pic) { - err = vk_hevc_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, - &hp->vkh265_ref, &hp->h265_ref, pic, 1, i); - if (err < 0) - return err; - - continue; - } - - err = vk_hevc_fill_pict(avctx, &hp->ref_src[idx], &vp->ref_slots[idx], - &vp->refs[idx], &hp->vkh265_refs[idx], - &hp->h265_refs[idx], (HEVCFrame *)ref, 0, i); - if (err < 0) - return err; - - nb_refs++; - } - - memset(hp->h265pic.RefPicSetStCurrBefore, 0xff, 8); - for (int i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { - HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i]; - for (int j = 0; j < FF_ARRAY_ELEMS(l->DPB); j++) { - const HEVCFrame *ref = &l->DPB[j]; - if (ref == frame) { - hp->h265pic.RefPicSetStCurrBefore[i] = j; - break; - } - } - } - memset(hp->h265pic.RefPicSetStCurrAfter, 0xff, 8); - for (int i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { - HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i]; - for (int j = 0; j < FF_ARRAY_ELEMS(l->DPB); j++) { - const HEVCFrame *ref = &l->DPB[j]; - if (ref == frame) { - hp->h265pic.RefPicSetStCurrAfter[i] = j; - break; - } - } - } - memset(hp->h265pic.RefPicSetLtCurr, 0xff, 8); - for (int i = 0; i < h->rps[LT_CURR].nb_refs; i++) { - HEVCFrame *frame = h->rps[LT_CURR].ref[i]; - for (int j = 0; j < FF_ARRAY_ELEMS(l->DPB); j++) { - const HEVCFrame *ref = &l->DPB[j]; - if (ref == frame) { - hp->h265pic.RefPicSetLtCurr[i] = j; - break; - } - } - } - - hp->h265_pic_info = (VkVideoDecodeH265PictureInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR, - .pStdPictureInfo = &hp->h265pic, - .sliceSegmentCount = 0, - }; - - vp->decode_info = (VkVideoDecodeInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, - .pNext = &hp->h265_pic_info, - .flags = 0x0, - .pSetupReferenceSlot = &vp->ref_slot, - .referenceSlotCount = nb_refs, - .pReferenceSlots = vp->ref_slots, - .dstPictureResource = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .codedOffset = (VkOffset2D){ 0, 0 }, - .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, - .baseArrayLayer = 0, - .imageViewBinding = vp->view.out[0], - }, - }; - - return 0; -} - -static int vk_hevc_decode_slice(AVCodecContext *avctx, - const uint8_t *data, - uint32_t size) -{ - const HEVCContext *h = avctx->priv_data; - HEVCVulkanDecodePicture *hp = h->cur_frame->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &hp->vp; - - int err = ff_vk_decode_add_slice(avctx, vp, data, size, 1, - &hp->h265_pic_info.sliceSegmentCount, - &hp->h265_pic_info.pSliceSegmentOffsets); - if (err < 0) - return err; - - return 0; -} - -static int vk_hevc_end_frame(AVCodecContext *avctx) -{ - const HEVCContext *h = avctx->priv_data; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - - HEVCFrame *pic = h->cur_frame; - HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &hp->vp; - FFVulkanDecodePicture *rvp[HEVC_MAX_REFS] = { 0 }; - AVFrame *rav[HEVC_MAX_REFS] = { 0 }; - int err; - - const HEVCPPS *pps = h->pps; - const HEVCSPS *sps = pps->sps; - -#ifdef VK_KHR_video_maintenance2 - HEVCHeaderPPS vkpps_p; - StdVideoH265PictureParameterSet vkpps; - HEVCHeaderSPS vksps_p; - StdVideoH265SequenceParameterSet vksps; - HEVCHeaderVPSSet vkvps_ps[HEVC_MAX_SUB_LAYERS]; - HEVCHeaderVPS vkvps_p; - StdVideoH265VideoParameterSet vkvps; - VkVideoDecodeH265InlineSessionParametersInfoKHR h265_params; - - if (ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2) { - set_pps(pps, sps, &vkpps_p.scaling, &vkpps, &vkpps_p.pal); - set_sps(sps, pps->sps_id, &vksps_p.scaling, &vksps_p.vui_header, - &vksps_p.vui, &vksps, vksps_p.nal_hdr, - vksps_p.vcl_hdr, &vksps_p.ptl, &vksps_p.dpbm, - &vksps_p.pal, vksps_p.str, &vksps_p.ltr); - - vkvps_p.sls = vkvps_ps; - set_vps(sps->vps, &vkvps, &vkvps_p.ptl, &vkvps_p.dpbm, - vkvps_p.hdr, vkvps_p.sls); - - h265_params = (VkVideoDecodeH265InlineSessionParametersInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_INLINE_SESSION_PARAMETERS_INFO_KHR, - .pStdSPS = &vksps, - .pStdPPS = &vkpps, - .pStdVPS = &vkvps, - }; - hp->h265_pic_info.pNext = &h265_params; - } -#endif - - if (!hp->h265_pic_info.sliceSegmentCount) - return 0; - - if (!dec->session_params && - !(ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2)) { - if (!pps) { - unsigned int pps_id = h->sh.pps_id; - if (pps_id < HEVC_MAX_PPS_COUNT && h->ps.pps_list[pps_id] != NULL) - pps = h->ps.pps_list[pps_id]; - } - - if (!pps) { - av_log(avctx, AV_LOG_ERROR, - "Encountered frame without a valid active PPS reference.\n"); - return AVERROR_INVALIDDATA; - } - - err = vk_hevc_create_params(avctx, &dec->session_params); - if (err < 0) - return err; - - hp->h265pic.sps_video_parameter_set_id = sps->vps_id; - hp->h265pic.pps_seq_parameter_set_id = pps->sps_id; - hp->h265pic.pps_pic_parameter_set_id = pps->pps_id; - } - - for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { - HEVCVulkanDecodePicture *rfhp = hp->ref_src[i]->hwaccel_picture_private; - rav[i] = hp->ref_src[i]->f; - rvp[i] = &rfhp->vp; - } - - av_log(avctx, AV_LOG_DEBUG, "Decoding frame, %"SIZE_SPECIFIER" bytes, %i slices\n", - vp->slices_size, hp->h265_pic_info.sliceSegmentCount); - - return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); -} - -static void vk_hevc_free_frame_priv(AVRefStructOpaque _hwctx, void *data) -{ - AVHWDeviceContext *hwctx = _hwctx.nc; - HEVCVulkanDecodePicture *hp = data; - - /* Free frame resources */ - ff_vk_decode_free_frame(hwctx, &hp->vp); -} - -const FFHWAccel ff_hevc_vulkan_hwaccel = { - .p.name = "hevc_vulkan", - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_HEVC, - .p.pix_fmt = AV_PIX_FMT_VULKAN, - .start_frame = &vk_hevc_start_frame, - .decode_slice = &vk_hevc_decode_slice, - .end_frame = &vk_hevc_end_frame, - .free_frame_priv = &vk_hevc_free_frame_priv, - .frame_priv_data_size = sizeof(HEVCVulkanDecodePicture), - .init = &ff_vk_decode_init, - .update_thread_context = &ff_vk_update_thread_context, - .decode_params = &ff_vk_params_invalidate, - .flush = &ff_vk_decode_flush, - .uninit = &ff_vk_decode_uninit, - .frame_params = &ff_vk_frame_params, - .priv_data_size = sizeof(FFVulkanDecodeContext), - .caps_internal = HWACCEL_CAP_ASYNC_SAFE, -}; -- 2.49.1 From 8198504517fa809ad6662b23bd82ac7b38b7d509 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:38:53 +0000 Subject: [PATCH 084/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_hevc.c | 957 ++++++++++++++++++++++++++++++++ 1 file changed, 957 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_hevc.c diff --git a/libavcodec/vulkan/vulkan_hevc.c b/libavcodec/vulkan/vulkan_hevc.c new file mode 100644 index 0000000000..15f7e7c950 --- /dev/null +++ b/libavcodec/vulkan/vulkan_hevc.c @@ -0,0 +1,957 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" +#include "libavcodec/hevcdec.h" +#include "libavcodec/hevc_data.h" +#include "libavcodec/hevc_ps.h" + +#include "vulkan_decode.h" + +const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc = { + .codec_id = AV_CODEC_ID_HEVC, + .decode_extension = FF_VK_EXT_VIDEO_DECODE_H265, + .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, + .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR, + .ext_props = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION, + }, +}; + +typedef struct HEVCHeaderSPS { + StdVideoH265ScalingLists scaling; + StdVideoH265HrdParameters vui_header; + StdVideoH265SequenceParameterSetVui vui; + StdVideoH265ProfileTierLevel ptl; + StdVideoH265DecPicBufMgr dpbm; + StdVideoH265PredictorPaletteEntries pal; + StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265ShortTermRefPicSet str[HEVC_MAX_SHORT_TERM_REF_PIC_SETS]; + StdVideoH265LongTermRefPicsSps ltr; +} HEVCHeaderSPS; + +typedef struct HEVCHeaderPPS { + StdVideoH265ScalingLists scaling; + StdVideoH265PredictorPaletteEntries pal; +} HEVCHeaderPPS; + +typedef struct HEVCHeaderVPSSet { + StdVideoH265SubLayerHrdParameters nal_hdr[HEVC_MAX_SUB_LAYERS]; + StdVideoH265SubLayerHrdParameters vcl_hdr[HEVC_MAX_SUB_LAYERS]; +} HEVCHeaderVPSSet; + +typedef struct HEVCHeaderVPS { + StdVideoH265ProfileTierLevel ptl; + StdVideoH265DecPicBufMgr dpbm; + StdVideoH265HrdParameters hdr[HEVC_MAX_LAYER_SETS]; + HEVCHeaderVPSSet *sls; +} HEVCHeaderVPS; + +typedef struct HEVCHeaderSet { + StdVideoH265SequenceParameterSet sps[HEVC_MAX_SPS_COUNT]; + HEVCHeaderSPS hsps[HEVC_MAX_SPS_COUNT]; + + StdVideoH265PictureParameterSet pps[HEVC_MAX_PPS_COUNT]; + HEVCHeaderPPS hpps[HEVC_MAX_PPS_COUNT]; + + StdVideoH265VideoParameterSet vps[HEVC_MAX_PPS_COUNT]; + HEVCHeaderVPS *hvps; +} HEVCHeaderSet; + +static int alloc_hevc_header_structs(FFVulkanDecodeContext *s, + int nb_vps, + const int vps_list_idx[HEVC_MAX_VPS_COUNT], + const HEVCVPS * const vps_list[HEVC_MAX_VPS_COUNT]) +{ + uint8_t *data_ptr; + HEVCHeaderSet *hdr; + + size_t buf_size = sizeof(HEVCHeaderSet) + nb_vps*sizeof(HEVCHeaderVPS); + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps = vps_list[vps_list_idx[i]]; + buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; + } + + if (buf_size > s->hevc_headers_size) { + av_freep(&s->hevc_headers); + s->hevc_headers_size = 0; + s->hevc_headers = av_mallocz(buf_size); + if (!s->hevc_headers) + return AVERROR(ENOMEM); + s->hevc_headers_size = buf_size; + } + + /* Setup struct pointers */ + hdr = s->hevc_headers; + data_ptr = (uint8_t *)hdr; + hdr->hvps = (HEVCHeaderVPS *)(data_ptr + sizeof(HEVCHeaderSet)); + data_ptr += sizeof(HEVCHeaderSet) + nb_vps*sizeof(HEVCHeaderVPS); + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps = vps_list[vps_list_idx[i]]; + hdr->hvps[i].sls = (HEVCHeaderVPSSet *)data_ptr; + data_ptr += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters; + } + + return 0; +} + +typedef struct HEVCVulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* Current picture */ + StdVideoDecodeH265ReferenceInfo h265_ref; + VkVideoDecodeH265DpbSlotInfoKHR vkh265_ref; + + /* Picture refs */ + HEVCFrame *ref_src [HEVC_MAX_REFS]; + StdVideoDecodeH265ReferenceInfo h265_refs [HEVC_MAX_REFS]; + VkVideoDecodeH265DpbSlotInfoKHR vkh265_refs[HEVC_MAX_REFS]; + + /* Current picture (contd.) */ + StdVideoDecodeH265PictureInfo h265pic; + VkVideoDecodeH265PictureInfoKHR h265_pic_info; +} HEVCVulkanDecodePicture; + +static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + VkVideoDecodeH265DpbSlotInfoKHR *vkh265_ref, /* Goes in ^ */ + StdVideoDecodeH265ReferenceInfo *h265_ref, /* Goes in ^ */ + HEVCFrame *pic, int is_current, int pic_id) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(dec, pic->f, vkpic, is_current, + dec->dedicated_dpb); + if (err < 0) + return err; + + *h265_ref = (StdVideoDecodeH265ReferenceInfo) { + .flags = (StdVideoDecodeH265ReferenceInfoFlags) { + .used_for_long_term_reference = pic->flags & HEVC_FRAME_FLAG_LONG_REF, + .unused_for_reference = 0, + }, + .PicOrderCntVal = pic->poc, + }; + + *vkh265_ref = (VkVideoDecodeH265DpbSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR, + .pStdReferenceInfo = h265_ref, + }; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = ctx->common.layered_dpb ? pic_id : 0, + .imageViewBinding = vkpic->view.ref[0], + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .pNext = vkh265_ref, + .slotIndex = pic_id, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static void copy_scaling_list(const ScalingList *sl, StdVideoH265ScalingLists *vksl) +{ + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS; i++) { + for (int j = 0; j < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS; j++) { + uint8_t pos = 4 * ff_hevc_diag_scan4x4_y[j] + ff_hevc_diag_scan4x4_x[j]; + vksl->ScalingList4x4[i][j] = sl->sl[0][i][pos]; + } + } + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS; i++) { + for (int j = 0; j < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS; j++) { + uint8_t pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j]; + vksl->ScalingList8x8[i][j] = sl->sl[1][i][pos]; + } + } + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; i++) { + for (int j = 0; j < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS; j++) { + uint8_t pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j]; + vksl->ScalingList16x16[i][j] = sl->sl[2][i][pos]; + } + } + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) { + for (int j = 0; j < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS; j++) { + uint8_t pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j]; + vksl->ScalingList32x32[i][j] = sl->sl[3][i * 3][pos]; + } + } + + memcpy(vksl->ScalingListDCCoef16x16, sl->sl_dc[0], + STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * sizeof(*vksl->ScalingListDCCoef16x16)); + + for (int i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) + vksl->ScalingListDCCoef32x32[i] = sl->sl_dc[1][i * 3]; +} + +static void set_sps(const HEVCSPS *sps, int sps_idx, + StdVideoH265ScalingLists *vksps_scaling, + StdVideoH265HrdParameters *vksps_vui_header, + StdVideoH265SequenceParameterSetVui *vksps_vui, + StdVideoH265SequenceParameterSet *vksps, + StdVideoH265SubLayerHrdParameters *slhdrnal, + StdVideoH265SubLayerHrdParameters *slhdrvcl, + StdVideoH265ProfileTierLevel *ptl, + StdVideoH265DecPicBufMgr *dpbm, + StdVideoH265PredictorPaletteEntries *pal, + StdVideoH265ShortTermRefPicSet *str, + StdVideoH265LongTermRefPicsSps *ltr) +{ + copy_scaling_list(&sps->scaling_list, vksps_scaling); + + *vksps_vui_header = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = sps->hdr.nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = sps->hdr.vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = sps->hdr.sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = sps->hdr.sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = sps->hdr.flags.fixed_pic_rate_general_flag, + .fixed_pic_rate_within_cvs_flag = sps->hdr.flags.fixed_pic_rate_within_cvs_flag, + .low_delay_hrd_flag = sps->hdr.flags.low_delay_hrd_flag, + }, + .tick_divisor_minus2 = sps->hdr.tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = sps->hdr.du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = sps->hdr.dpb_output_delay_du_length_minus1, + .bit_rate_scale = sps->hdr.bit_rate_scale, + .cpb_size_scale = sps->hdr.cpb_size_scale, + .cpb_size_du_scale = sps->hdr.cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = sps->hdr.initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = sps->hdr.au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = sps->hdr.dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = slhdrnal, + .pSubLayerHrdParametersVcl = slhdrvcl, + }; + + memcpy(vksps_vui_header->cpb_cnt_minus1, sps->hdr.cpb_cnt_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->cpb_cnt_minus1)); + memcpy(vksps_vui_header->elemental_duration_in_tc_minus1, sps->hdr.elemental_duration_in_tc_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*vksps_vui_header->elemental_duration_in_tc_minus1)); + + memcpy(slhdrnal, sps->hdr.nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrnal)); + memcpy(slhdrvcl, sps->hdr.vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*slhdrvcl)); + + *vksps_vui = (StdVideoH265SequenceParameterSetVui) { + .flags = (StdVideoH265SpsVuiFlags) { + .aspect_ratio_info_present_flag = sps->vui.common.aspect_ratio_info_present_flag, + .overscan_info_present_flag = sps->vui.common.overscan_info_present_flag, + .overscan_appropriate_flag = sps->vui.common.overscan_appropriate_flag, + .video_signal_type_present_flag = sps->vui.common.video_signal_type_present_flag, + .video_full_range_flag = sps->vui.common.video_full_range_flag, + .colour_description_present_flag = sps->vui.common.colour_description_present_flag, + .chroma_loc_info_present_flag = sps->vui.common.chroma_loc_info_present_flag, + .neutral_chroma_indication_flag = sps->vui.neutra_chroma_indication_flag, + .field_seq_flag = sps->vui.field_seq_flag, + .frame_field_info_present_flag = sps->vui.frame_field_info_present_flag, + .default_display_window_flag = sps->vui.default_display_window_flag, + .vui_timing_info_present_flag = sps->vui.vui_timing_info_present_flag, + .vui_poc_proportional_to_timing_flag = sps->vui.vui_poc_proportional_to_timing_flag, + .vui_hrd_parameters_present_flag = sps->vui.vui_hrd_parameters_present_flag, + .bitstream_restriction_flag = sps->vui.bitstream_restriction_flag, + .tiles_fixed_structure_flag = sps->vui.tiles_fixed_structure_flag, + .motion_vectors_over_pic_boundaries_flag = sps->vui.motion_vectors_over_pic_boundaries_flag, + .restricted_ref_pic_lists_flag = sps->vui.restricted_ref_pic_lists_flag, + }, + .aspect_ratio_idc = sps->vui.common.aspect_ratio_idc, + .sar_width = sps->vui.common.sar.num, + .sar_height = sps->vui.common.sar.den, + .video_format = sps->vui.common.video_format, + .colour_primaries = sps->vui.common.colour_primaries, + .transfer_characteristics = sps->vui.common.transfer_characteristics, + .matrix_coeffs = sps->vui.common.matrix_coeffs, + .chroma_sample_loc_type_top_field = sps->vui.common.chroma_sample_loc_type_top_field, + .chroma_sample_loc_type_bottom_field = sps->vui.common.chroma_sample_loc_type_bottom_field, + /* Reserved */ + /* Reserved */ + .def_disp_win_left_offset = sps->vui.def_disp_win.left_offset, + .def_disp_win_right_offset = sps->vui.def_disp_win.right_offset, + .def_disp_win_top_offset = sps->vui.def_disp_win.top_offset, + .def_disp_win_bottom_offset = sps->vui.def_disp_win.bottom_offset, + .vui_num_units_in_tick = sps->vui.vui_num_units_in_tick, + .vui_time_scale = sps->vui.vui_time_scale, + .vui_num_ticks_poc_diff_one_minus1 = sps->vui.vui_num_ticks_poc_diff_one_minus1, + .min_spatial_segmentation_idc = sps->vui.min_spatial_segmentation_idc, + .max_bytes_per_pic_denom = sps->vui.max_bytes_per_pic_denom, + .max_bits_per_min_cu_denom = sps->vui.max_bits_per_min_cu_denom, + .log2_max_mv_length_horizontal = sps->vui.log2_max_mv_length_horizontal, + .log2_max_mv_length_vertical = sps->vui.log2_max_mv_length_vertical, + .pHrdParameters = vksps_vui_header, + }; + + *ptl = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = sps->ptl.general_ptl.tier_flag, + .general_progressive_source_flag = sps->ptl.general_ptl.progressive_source_flag, + .general_interlaced_source_flag = sps->ptl.general_ptl.interlaced_source_flag, + .general_non_packed_constraint_flag = sps->ptl.general_ptl.non_packed_constraint_flag, + .general_frame_only_constraint_flag = sps->ptl.general_ptl.frame_only_constraint_flag, + }, + .general_profile_idc = sps->ptl.general_ptl.profile_idc, + .general_level_idc = ff_vk_h265_level_to_vk(sps->ptl.general_ptl.level_idc), + }; + + for (int i = 0; i < sps->max_sub_layers; i++) { + dpbm->max_latency_increase_plus1[i] = sps->temporal_layer[i].max_latency_increase + 1; + dpbm->max_dec_pic_buffering_minus1[i] = sps->temporal_layer[i].max_dec_pic_buffering - 1; + dpbm->max_num_reorder_pics[i] = sps->temporal_layer[i].num_reorder_pics; + } + + for (int i = 0; i < (sps->chroma_format_idc ? 3 : 1); i++) + for (int j = 0; j < sps->sps_num_palette_predictor_initializers; j++) + pal->PredictorPaletteEntries[i][j] = sps->sps_palette_predictor_initializer[i][j]; + + for (int i = 0; i < sps->nb_st_rps; i++) { + const ShortTermRPS *st_rps = &sps->st_rps[i]; + + str[i] = (StdVideoH265ShortTermRefPicSet) { + .flags = (StdVideoH265ShortTermRefPicSetFlags) { + .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict, + .delta_rps_sign = sps->st_rps[i].delta_rps_sign, + }, + .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1, + .use_delta_flag = sps->st_rps[i].use_delta, + .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1, + .used_by_curr_pic_flag = 0x0, + .used_by_curr_pic_s0_flag = 0x0, + .used_by_curr_pic_s1_flag = 0x0, + /* Reserved */ + /* Reserved */ + /* Reserved */ + .num_negative_pics = sps->st_rps[i].num_negative_pics, + .num_positive_pics = sps->st_rps[i].num_delta_pocs - sps->st_rps[i].num_negative_pics, + }; + + /* NOTE: This is the predicted, and *reordered* version. + * Probably incorrect, but the spec doesn't say which version to use. */ + str[i].used_by_curr_pic_flag = st_rps->used; + str[i].used_by_curr_pic_s0_flag = av_zero_extend(st_rps->used, str[i].num_negative_pics); + str[i].used_by_curr_pic_s1_flag = st_rps->used >> str[i].num_negative_pics; + + for (int j = 0; j < str[i].num_negative_pics; j++) + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; + + for (int j = 0; j < str[i].num_positive_pics; j++) + str[i].delta_poc_s1_minus1[j] = st_rps->delta_poc[st_rps->num_negative_pics + j] - + (j ? st_rps->delta_poc[st_rps->num_negative_pics + j - 1] : 0) - 1; + } + + *ltr = (StdVideoH265LongTermRefPicsSps) { + .used_by_curr_pic_lt_sps_flag = sps->used_by_curr_pic_lt, + }; + + for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) { + ltr->lt_ref_pic_poc_lsb_sps[i] = sps->lt_ref_pic_poc_lsb_sps[i]; + } + + *vksps = (StdVideoH265SequenceParameterSet) { + .flags = (StdVideoH265SpsFlags) { + .sps_temporal_id_nesting_flag = sps->temporal_id_nesting, + .separate_colour_plane_flag = sps->separate_colour_plane, + .conformance_window_flag = sps->conformance_window, + .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info, + .scaling_list_enabled_flag = sps->scaling_list_enabled, + .sps_scaling_list_data_present_flag = sps->scaling_list_enabled, + .amp_enabled_flag = sps->amp_enabled, + .sample_adaptive_offset_enabled_flag = sps->sao_enabled, + .pcm_enabled_flag = sps->pcm_enabled, + .pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled, + .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present, + .sps_temporal_mvp_enabled_flag = sps->temporal_mvp_enabled, + .strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled, + .vui_parameters_present_flag = sps->vui_present, + .sps_extension_present_flag = sps->extension_present, + .sps_range_extension_flag = sps->range_extension, + .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled, + .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled, + .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled, + .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled, + .extended_precision_processing_flag = sps->extended_precision_processing, + .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled, + .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled, + .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled, + .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled, + .sps_scc_extension_flag = sps->scc_extension, + .sps_curr_pic_ref_enabled_flag = sps->curr_pic_ref_enabled, + .palette_mode_enabled_flag = sps->palette_mode_enabled, + .sps_palette_predictor_initializers_present_flag = sps->palette_predictor_initializers_present, + .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disabled, + }, + .chroma_format_idc = sps->chroma_format_idc, + .pic_width_in_luma_samples = sps->width, + .pic_height_in_luma_samples = sps->height, + .sps_video_parameter_set_id = sps->vps_id, + .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, + .sps_seq_parameter_set_id = sps_idx, + .bit_depth_luma_minus8 = sps->bit_depth - 8, + .bit_depth_chroma_minus8 = sps->bit_depth_chroma - 8, + .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, + .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, + .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, + .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, + .log2_diff_max_min_luma_transform_block_size = sps->log2_diff_max_min_transform_block_size, + .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, + .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, + .num_short_term_ref_pic_sets = sps->nb_st_rps, + .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, + .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, + .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, + .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, + .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, + /* Reserved */ + /* Reserved */ + .palette_max_size = sps->palette_max_size, + .delta_palette_max_predictor_size = sps->delta_palette_max_predictor_size, + .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc, + .sps_num_palette_predictor_initializers_minus1 = sps->sps_num_palette_predictor_initializers - 1, + .conf_win_left_offset = sps->pic_conf_win.left_offset, + .conf_win_right_offset = sps->pic_conf_win.right_offset, + .conf_win_top_offset = sps->pic_conf_win.top_offset, + .conf_win_bottom_offset = sps->pic_conf_win.bottom_offset, + .pProfileTierLevel = ptl, + .pDecPicBufMgr = dpbm, + .pScalingLists = vksps_scaling, + .pShortTermRefPicSet = str, + .pLongTermRefPicsSps = ltr, + .pSequenceParameterSetVui = vksps_vui, + .pPredictorPaletteEntries = pal, + }; +} + +static void set_pps(const HEVCPPS *pps, const HEVCSPS *sps, + StdVideoH265ScalingLists *vkpps_scaling, + StdVideoH265PictureParameterSet *vkpps, + StdVideoH265PredictorPaletteEntries *pal) +{ + copy_scaling_list(&pps->scaling_list, vkpps_scaling); + + *vkpps = (StdVideoH265PictureParameterSet) { + .flags = (StdVideoH265PpsFlags) { + .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, + .output_flag_present_flag = pps->output_flag_present_flag, + .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag, + .cabac_init_present_flag = pps->cabac_init_present_flag, + .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, + .transform_skip_enabled_flag = pps->transform_skip_enabled_flag, + .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag, + .pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag, + .weighted_pred_flag = pps->weighted_pred_flag, + .weighted_bipred_flag = pps->weighted_bipred_flag, + .transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag, + .tiles_enabled_flag = pps->tiles_enabled_flag, + .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag, + .uniform_spacing_flag = pps->uniform_spacing_flag, + .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag, + .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag, + .deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag, + .deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag, + .pps_deblocking_filter_disabled_flag = pps->disable_dbf, + .pps_scaling_list_data_present_flag = pps->scaling_list_data_present_flag, + .lists_modification_present_flag = pps->lists_modification_present_flag, + .slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag, + .pps_extension_present_flag = pps->pps_extension_present_flag, + .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag, + .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag, + .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag, + .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag, + .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag, + .pps_palette_predictor_initializers_present_flag = pps->pps_palette_predictor_initializers_present_flag, + .monochrome_palette_flag = pps->monochrome_palette_flag, + .pps_range_extension_flag = pps->pps_range_extensions_flag, + }, + .pps_pic_parameter_set_id = pps->pps_id, + .pps_seq_parameter_set_id = pps->sps_id, + .sps_video_parameter_set_id = sps->vps_id, + .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, + .num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1, + .num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1, + .init_qp_minus26 = pps->pic_init_qp_minus26, + .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, + .pps_cb_qp_offset = pps->cb_qp_offset, + .pps_cr_qp_offset = pps->cr_qp_offset, + .pps_beta_offset_div2 = pps->beta_offset >> 1, + .pps_tc_offset_div2 = pps->tc_offset >> 1, + .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, + .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2, + .diff_cu_chroma_qp_offset_depth = pps->diff_cu_chroma_qp_offset_depth, + .chroma_qp_offset_list_len_minus1 = pps->chroma_qp_offset_list_len_minus1, + .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma, + .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma, + .pps_act_y_qp_offset_plus5 = pps->pps_act_y_qp_offset + 5, + .pps_act_cb_qp_offset_plus5 = pps->pps_act_cb_qp_offset + 5, + .pps_act_cr_qp_offset_plus3 = pps->pps_act_cr_qp_offset + 3, + .pps_num_palette_predictor_initializers = pps->pps_num_palette_predictor_initializers, + .luma_bit_depth_entry_minus8 = pps->luma_bit_depth_entry - 8, + .chroma_bit_depth_entry_minus8 = pps->chroma_bit_depth_entry - 8, + .num_tile_columns_minus1 = pps->num_tile_columns - 1, + .num_tile_rows_minus1 = pps->num_tile_rows - 1, + .pScalingLists = vkpps_scaling, + .pPredictorPaletteEntries = pal, + }; + + for (int i = 0; i < (pps->monochrome_palette_flag ? 1 : 3); i++) { + for (int j = 0; j < pps->pps_num_palette_predictor_initializers; j++) + pal->PredictorPaletteEntries[i][j] = pps->pps_palette_predictor_initializer[i][j]; + } + + for (int i = 0; i < pps->num_tile_columns - 1; i++) + vkpps->column_width_minus1[i] = pps->column_width[i] - 1; + + for (int i = 0; i < pps->num_tile_rows - 1; i++) + vkpps->row_height_minus1[i] = pps->row_height[i] - 1; + + for (int i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) { + vkpps->cb_qp_offset_list[i] = pps->cb_qp_offset_list[i]; + vkpps->cr_qp_offset_list[i] = pps->cr_qp_offset_list[i]; + } +} + +static void set_vps(const HEVCVPS *vps, + StdVideoH265VideoParameterSet *vkvps, + StdVideoH265ProfileTierLevel *ptl, + StdVideoH265DecPicBufMgr *dpbm, + StdVideoH265HrdParameters *sls_hdr, + HEVCHeaderVPSSet sls[]) +{ + for (int i = 0; i < vps->vps_num_hrd_parameters; i++) { + const HEVCHdrParams *src = &vps->hdr[i]; + + sls_hdr[i] = (StdVideoH265HrdParameters) { + .flags = (StdVideoH265HrdFlags) { + .nal_hrd_parameters_present_flag = src->nal_hrd_parameters_present_flag, + .vcl_hrd_parameters_present_flag = src->vcl_hrd_parameters_present_flag, + .sub_pic_hrd_params_present_flag = src->sub_pic_hrd_params_present_flag, + .sub_pic_cpb_params_in_pic_timing_sei_flag = src->sub_pic_cpb_params_in_pic_timing_sei_flag, + .fixed_pic_rate_general_flag = src->flags.fixed_pic_rate_general_flag, + .fixed_pic_rate_within_cvs_flag = src->flags.fixed_pic_rate_within_cvs_flag, + .low_delay_hrd_flag = src->flags.low_delay_hrd_flag, + }, + .tick_divisor_minus2 = src->tick_divisor_minus2, + .du_cpb_removal_delay_increment_length_minus1 = src->du_cpb_removal_delay_increment_length_minus1, + .dpb_output_delay_du_length_minus1 = src->dpb_output_delay_du_length_minus1, + .bit_rate_scale = src->bit_rate_scale, + .cpb_size_scale = src->cpb_size_scale, + .cpb_size_du_scale = src->cpb_size_du_scale, + .initial_cpb_removal_delay_length_minus1 = src->initial_cpb_removal_delay_length_minus1, + .au_cpb_removal_delay_length_minus1 = src->au_cpb_removal_delay_length_minus1, + .dpb_output_delay_length_minus1 = src->dpb_output_delay_length_minus1, + /* Reserved - 3*16 bits */ + .pSubLayerHrdParametersNal = sls[i].nal_hdr, + .pSubLayerHrdParametersVcl = sls[i].vcl_hdr, + }; + + memcpy(sls_hdr[i].cpb_cnt_minus1, src->cpb_cnt_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].cpb_cnt_minus1)); + memcpy(sls_hdr[i].elemental_duration_in_tc_minus1, src->elemental_duration_in_tc_minus1, + STD_VIDEO_H265_SUBLAYERS_LIST_SIZE*sizeof(*sls_hdr[i].elemental_duration_in_tc_minus1)); + + memcpy(sls[i].nal_hdr, src->nal_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].nal_hdr)); + memcpy(sls[i].vcl_hdr, src->vcl_params, HEVC_MAX_SUB_LAYERS*sizeof(*sls[i].vcl_hdr)); + } + + *ptl = (StdVideoH265ProfileTierLevel) { + .flags = (StdVideoH265ProfileTierLevelFlags) { + .general_tier_flag = vps->ptl.general_ptl.tier_flag, + .general_progressive_source_flag = vps->ptl.general_ptl.progressive_source_flag, + .general_interlaced_source_flag = vps->ptl.general_ptl.interlaced_source_flag, + .general_non_packed_constraint_flag = vps->ptl.general_ptl.non_packed_constraint_flag, + .general_frame_only_constraint_flag = vps->ptl.general_ptl.frame_only_constraint_flag, + }, + .general_profile_idc = ff_vk_h265_profile_to_vk(vps->ptl.general_ptl.profile_idc), + .general_level_idc = ff_vk_h265_level_to_vk(vps->ptl.general_ptl.level_idc), + }; + + for (int i = 0; i < vps->vps_max_sub_layers; i++) { + dpbm->max_latency_increase_plus1[i] = vps->vps_max_latency_increase[i] + 1; + dpbm->max_dec_pic_buffering_minus1[i] = vps->vps_max_dec_pic_buffering[i] - 1; + dpbm->max_num_reorder_pics[i] = vps->vps_num_reorder_pics[i]; + } + + *vkvps = (StdVideoH265VideoParameterSet) { + .flags = (StdVideoH265VpsFlags) { + .vps_temporal_id_nesting_flag = vps->vps_temporal_id_nesting_flag, + .vps_sub_layer_ordering_info_present_flag = vps->vps_sub_layer_ordering_info_present_flag, + .vps_timing_info_present_flag = vps->vps_timing_info_present_flag, + .vps_poc_proportional_to_timing_flag = vps->vps_poc_proportional_to_timing_flag, + }, + .vps_video_parameter_set_id = vps->vps_id, + .vps_max_sub_layers_minus1 = vps->vps_max_sub_layers - 1, + /* Reserved */ + /* Reserved */ + .vps_num_units_in_tick = vps->vps_num_units_in_tick, + .vps_time_scale = vps->vps_time_scale, + .vps_num_ticks_poc_diff_one_minus1 = vps->vps_num_ticks_poc_diff_one - 1, + /* Reserved */ + .pDecPicBufMgr = dpbm, + .pHrdParameters = sls_hdr, + .pProfileTierLevel = ptl, + }; +} + +static int vk_hevc_create_params(AVCodecContext *avctx, AVBufferRef **buf) +{ + int err; + const HEVCContext *h = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + VkVideoDecodeH265SessionParametersAddInfoKHR h265_params_info = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_KHR, + .stdSPSCount = 0, + .stdPPSCount = 0, + .stdVPSCount = 0, + }; + VkVideoDecodeH265SessionParametersCreateInfoKHR h265_params = { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pParametersAddInfo = &h265_params_info, + }; + VkVideoSessionParametersCreateInfoKHR session_params_create = { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_PARAMETERS_CREATE_INFO_KHR, + .pNext = &h265_params, + .videoSession = ctx->common.session, + .videoSessionParametersTemplate = VK_NULL_HANDLE, + }; + + HEVCHeaderSet *hdr; + int nb_vps = 0; + int vps_list_idx[HEVC_MAX_VPS_COUNT]; + + for (int i = 0; i < HEVC_MAX_VPS_COUNT; i++) + if (h->ps.vps_list[i]) + vps_list_idx[nb_vps++] = i; + + err = alloc_hevc_header_structs(dec, nb_vps, vps_list_idx, h->ps.vps_list); + if (err < 0) + return err; + + hdr = dec->hevc_headers; + + h265_params_info.pStdSPSs = hdr->sps; + h265_params_info.pStdPPSs = hdr->pps; + h265_params_info.pStdVPSs = hdr->vps; + + /* SPS list */ + for (int i = 0; i < HEVC_MAX_SPS_COUNT; i++) { + if (h->ps.sps_list[i]) { + const HEVCSPS *sps_l = h->ps.sps_list[i]; + int idx = h265_params_info.stdSPSCount++; + set_sps(sps_l, i, &hdr->hsps[idx].scaling, &hdr->hsps[idx].vui_header, + &hdr->hsps[idx].vui, &hdr->sps[idx], hdr->hsps[idx].nal_hdr, + hdr->hsps[idx].vcl_hdr, &hdr->hsps[idx].ptl, &hdr->hsps[idx].dpbm, + &hdr->hsps[idx].pal, hdr->hsps[idx].str, &hdr->hsps[idx].ltr); + } + } + + /* PPS list */ + for (int i = 0; i < HEVC_MAX_PPS_COUNT; i++) { + if (h->ps.pps_list[i]) { + const HEVCPPS *pps_l = h->ps.pps_list[i]; + const HEVCSPS *sps_l = h->ps.sps_list[pps_l->sps_id]; + int idx = h265_params_info.stdPPSCount++; + set_pps(pps_l, sps_l, &hdr->hpps[idx].scaling, + &hdr->pps[idx], &hdr->hpps[idx].pal); + } + } + + /* VPS list */ + for (int i = 0; i < nb_vps; i++) { + const HEVCVPS *vps_l = h->ps.vps_list[vps_list_idx[i]]; + set_vps(vps_l, &hdr->vps[i], &hdr->hvps[i].ptl, &hdr->hvps[i].dpbm, + hdr->hvps[i].hdr, hdr->hvps[i].sls); + h265_params_info.stdVPSCount++; + } + + h265_params.maxStdSPSCount = h265_params_info.stdSPSCount; + h265_params.maxStdPPSCount = h265_params_info.stdPPSCount; + h265_params.maxStdVPSCount = h265_params_info.stdVPSCount; + + err = ff_vk_decode_create_params(buf, avctx, ctx, &session_params_create); + if (err < 0) + return err; + + av_log(avctx, AV_LOG_DEBUG, "Created frame parameters: %i SPS %i PPS %i VPS\n", + h265_params_info.stdSPSCount, h265_params_info.stdPPSCount, + h265_params_info.stdVPSCount); + + return 0; +} + +static int vk_hevc_start_frame(AVCodecContext *avctx, + av_unused const AVBufferRef *buffer_ref, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + HEVCContext *h = avctx->priv_data; + HEVCLayerContext *l = &h->layers[h->cur_layer]; + + HEVCFrame *pic = h->cur_frame; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + const HEVCPPS *pps = h->pps; + const HEVCSPS *sps = pps->sps; + int nb_refs = 0; + + hp->h265pic = (StdVideoDecodeH265PictureInfo) { + .flags = (StdVideoDecodeH265PictureInfoFlags) { + .IrapPicFlag = IS_IRAP(h), + .IdrPicFlag = IS_IDR(h), + .IsReference = h->nal_unit_type < 16 ? h->nal_unit_type & 1 : 1, + .short_term_ref_pic_set_sps_flag = h->sh.short_term_ref_pic_set_sps_flag, + }, + .sps_video_parameter_set_id = sps->vps_id, + .pps_seq_parameter_set_id = pps->sps_id, + .pps_pic_parameter_set_id = pps->pps_id, + .NumDeltaPocsOfRefRpsIdx = h->sh.short_term_rps ? h->sh.short_term_rps->rps_idx_num_delta_pocs : 0, + .PicOrderCntVal = h->poc, + .NumBitsForSTRefPicSetInSlice = !h->sh.short_term_ref_pic_set_sps_flag ? + h->sh.short_term_ref_pic_set_size : 0, + }; + + /* Fill in references */ + for (int i = 0; i < FF_ARRAY_ELEMS(l->DPB); i++) { + const HEVCFrame *ref = &l->DPB[i]; + int idx = nb_refs; + + if (!(ref->flags & (HEVC_FRAME_FLAG_SHORT_REF | HEVC_FRAME_FLAG_LONG_REF))) + continue; + + if (ref == pic) { + err = vk_hevc_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + &hp->vkh265_ref, &hp->h265_ref, pic, 1, i); + if (err < 0) + return err; + + continue; + } + + err = vk_hevc_fill_pict(avctx, &hp->ref_src[idx], &vp->ref_slots[idx], + &vp->refs[idx], &hp->vkh265_refs[idx], + &hp->h265_refs[idx], (HEVCFrame *)ref, 0, i); + if (err < 0) + return err; + + nb_refs++; + } + + memset(hp->h265pic.RefPicSetStCurrBefore, 0xff, 8); + for (int i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { + HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(l->DPB); j++) { + const HEVCFrame *ref = &l->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetStCurrBefore[i] = j; + break; + } + } + } + memset(hp->h265pic.RefPicSetStCurrAfter, 0xff, 8); + for (int i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { + HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(l->DPB); j++) { + const HEVCFrame *ref = &l->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetStCurrAfter[i] = j; + break; + } + } + } + memset(hp->h265pic.RefPicSetLtCurr, 0xff, 8); + for (int i = 0; i < h->rps[LT_CURR].nb_refs; i++) { + HEVCFrame *frame = h->rps[LT_CURR].ref[i]; + for (int j = 0; j < FF_ARRAY_ELEMS(l->DPB); j++) { + const HEVCFrame *ref = &l->DPB[j]; + if (ref == frame) { + hp->h265pic.RefPicSetLtCurr[i] = j; + break; + } + } + } + + hp->h265_pic_info = (VkVideoDecodeH265PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR, + .pStdPictureInfo = &hp->h265pic, + .sliceSegmentCount = 0, + }; + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &hp->h265_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = nb_refs, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->view.out[0], + }, + }; + + return 0; +} + +static int vk_hevc_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + const HEVCContext *h = avctx->priv_data; + HEVCVulkanDecodePicture *hp = h->cur_frame->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 1, + &hp->h265_pic_info.sliceSegmentCount, + &hp->h265_pic_info.pSliceSegmentOffsets); + if (err < 0) + return err; + + return 0; +} + +static int vk_hevc_end_frame(AVCodecContext *avctx) +{ + const HEVCContext *h = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + + HEVCFrame *pic = h->cur_frame; + HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &hp->vp; + FFVulkanDecodePicture *rvp[HEVC_MAX_REFS] = { 0 }; + AVFrame *rav[HEVC_MAX_REFS] = { 0 }; + int err; + + const HEVCPPS *pps = h->pps; + const HEVCSPS *sps = pps->sps; + +#ifdef VK_KHR_video_maintenance2 + HEVCHeaderPPS vkpps_p; + StdVideoH265PictureParameterSet vkpps; + HEVCHeaderSPS vksps_p; + StdVideoH265SequenceParameterSet vksps; + HEVCHeaderVPSSet vkvps_ps[HEVC_MAX_SUB_LAYERS]; + HEVCHeaderVPS vkvps_p; + StdVideoH265VideoParameterSet vkvps; + VkVideoDecodeH265InlineSessionParametersInfoKHR h265_params; + + if (ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2) { + set_pps(pps, sps, &vkpps_p.scaling, &vkpps, &vkpps_p.pal); + set_sps(sps, pps->sps_id, &vksps_p.scaling, &vksps_p.vui_header, + &vksps_p.vui, &vksps, vksps_p.nal_hdr, + vksps_p.vcl_hdr, &vksps_p.ptl, &vksps_p.dpbm, + &vksps_p.pal, vksps_p.str, &vksps_p.ltr); + + vkvps_p.sls = vkvps_ps; + set_vps(sps->vps, &vkvps, &vkvps_p.ptl, &vkvps_p.dpbm, + vkvps_p.hdr, vkvps_p.sls); + + h265_params = (VkVideoDecodeH265InlineSessionParametersInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_INLINE_SESSION_PARAMETERS_INFO_KHR, + .pStdSPS = &vksps, + .pStdPPS = &vkpps, + .pStdVPS = &vkvps, + }; + hp->h265_pic_info.pNext = &h265_params; + } +#endif + + if (!hp->h265_pic_info.sliceSegmentCount) + return 0; + + if (!dec->session_params && + !(ctx->s.extensions & FF_VK_EXT_VIDEO_MAINTENANCE_2)) { + if (!pps) { + unsigned int pps_id = h->sh.pps_id; + if (pps_id < HEVC_MAX_PPS_COUNT && h->ps.pps_list[pps_id] != NULL) + pps = h->ps.pps_list[pps_id]; + } + + if (!pps) { + av_log(avctx, AV_LOG_ERROR, + "Encountered frame without a valid active PPS reference.\n"); + return AVERROR_INVALIDDATA; + } + + err = vk_hevc_create_params(avctx, &dec->session_params); + if (err < 0) + return err; + + hp->h265pic.sps_video_parameter_set_id = sps->vps_id; + hp->h265pic.pps_seq_parameter_set_id = pps->sps_id; + hp->h265pic.pps_pic_parameter_set_id = pps->pps_id; + } + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + HEVCVulkanDecodePicture *rfhp = hp->ref_src[i]->hwaccel_picture_private; + rav[i] = hp->ref_src[i]->f; + rvp[i] = &rfhp->vp; + } + + av_log(avctx, AV_LOG_DEBUG, "Decoding frame, %"SIZE_SPECIFIER" bytes, %i slices\n", + vp->slices_size, hp->h265_pic_info.sliceSegmentCount); + + return ff_vk_decode_frame(avctx, pic->f, vp, rav, rvp); +} + +static void vk_hevc_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *hwctx = _hwctx.nc; + HEVCVulkanDecodePicture *hp = data; + + /* Free frame resources */ + ff_vk_decode_free_frame(hwctx, &hp->vp); +} + +const FFHWAccel ff_hevc_vulkan_hwaccel = { + .p.name = "hevc_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_HEVC, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_hevc_start_frame, + .decode_slice = &vk_hevc_decode_slice, + .end_frame = &vk_hevc_end_frame, + .free_frame_priv = &vk_hevc_free_frame_priv, + .frame_priv_data_size = sizeof(HEVCVulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +}; -- 2.49.1 From f84b108065ff196a359bda238526a99fc938a967 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:39:16 +0000 Subject: [PATCH 085/118] Changing vulkan file directory --- libavcodec/vulkan_prores_raw.c | 503 --------------------------------- 1 file changed, 503 deletions(-) delete mode 100644 libavcodec/vulkan_prores_raw.c diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c deleted file mode 100644 index 7a1f97a640..0000000000 --- a/libavcodec/vulkan_prores_raw.c +++ /dev/null @@ -1,503 +0,0 @@ -/* - * Copyright (c) 2025 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "vulkan_decode.h" -#include "hwaccel_internal.h" - -#include "prores_raw.h" -#include "libavutil/vulkan_spirv.h" -#include "libavutil/mem.h" - -extern const char *ff_source_common_comp; -extern const char *ff_source_prores_raw_comp; - -const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc = { - .codec_id = AV_CODEC_ID_PRORES_RAW, - .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR, - .queue_flags = VK_QUEUE_COMPUTE_BIT, -}; - -typedef struct ProResRAWVulkanDecodePicture { - FFVulkanDecodePicture vp; - - AVBufferRef *tile_data; - uint32_t nb_tiles; -} ProResRAWVulkanDecodePicture; - -typedef struct ProResRAWVulkanDecodeContext { - FFVulkanShader decode[2]; - - AVBufferPool *tile_data_pool; - - FFVkBuffer uniform_buf; -} ProResRAWVulkanDecodeContext; - -typedef struct DecodePushData { - VkDeviceAddress tile_data; - VkDeviceAddress pkt_data; - uint32_t frame_size[2]; - uint32_t tile_size[2]; - uint8_t qmat[64]; -} DecodePushData; - -typedef struct TileData { - int32_t pos[2]; - uint32_t offset; - uint32_t size; -} TileData; - -static int vk_prores_raw_start_frame(AVCodecContext *avctx, - const AVBufferRef *buffer_ref, - av_unused const uint8_t *buffer, - av_unused uint32_t size) -{ - int err; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx; - ProResRAWContext *prr = avctx->priv_data; - - ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &pp->vp; - - /* Host map the input tile data if supported */ - if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) - ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data, - buffer_ref, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); - - /* Allocate tile data */ - err = ff_vk_get_pooled_buffer(&ctx->s, &prv->tile_data_pool, - &pp->tile_data, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, prr->nb_tiles*sizeof(TileData), - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); - if (err < 0) - return err; - - /* Prepare frame to be used */ - err = ff_vk_decode_prepare_frame_sdr(dec, prr->frame, vp, 1, - FF_VK_REP_FLOAT, 0); - if (err < 0) - return err; - - return 0; -} - -static int vk_prores_raw_decode_slice(AVCodecContext *avctx, - const uint8_t *data, - uint32_t size) -{ - ProResRAWContext *prr = avctx->priv_data; - - ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &pp->vp; - - FFVkBuffer *tile_data_buf = (FFVkBuffer *)pp->tile_data->data; - TileData *td = (TileData *)tile_data_buf->mapped_mem; - FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; - - td[pp->nb_tiles].pos[0] = prr->tiles[pp->nb_tiles].x; - td[pp->nb_tiles].pos[1] = prr->tiles[pp->nb_tiles].y; - td[pp->nb_tiles].size = size; - - if (vp->slices_buf && slices_buf->host_ref) { - td[pp->nb_tiles].offset = data - slices_buf->mapped_mem; - pp->nb_tiles++; - } else { - int err; - td[pp->nb_tiles].offset = vp->slices_size; - err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, - &pp->nb_tiles, NULL); - if (err < 0) - return err; - } - - return 0; -} - -static int vk_prores_raw_end_frame(AVCodecContext *avctx) -{ - int err; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - FFVulkanFunctions *vk = &ctx->s.vkfn; - - ProResRAWContext *prr = avctx->priv_data; - ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx; - - ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &pp->vp; - - FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; - FFVkBuffer *tile_data = (FFVkBuffer *)pp->tile_data->data; - - VkImageMemoryBarrier2 img_bar[8]; - int nb_img_bar = 0; - - FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); - ff_vk_exec_start(&ctx->s, exec); - - /* Prepare deps */ - RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, prr->frame, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - - err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, - prr->frame); - if (err < 0) - return err; - - RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &pp->tile_data, 1, 0)); - pp->tile_data = NULL; - RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); - vp->slices_buf = NULL; - - ff_vk_frame_barrier(&ctx->s, exec, prr->frame, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_ACCESS_2_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_GENERAL, - VK_QUEUE_FAMILY_IGNORED); - - vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { - .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, - .pImageMemoryBarriers = img_bar, - .imageMemoryBarrierCount = nb_img_bar, - }); - nb_img_bar = 0; - - FFVulkanShader *decode_shader = &prv->decode[prr->version]; - ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, - prr->frame, vp->view.out, - 0, 0, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - - ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader); - - /* Update push data */ - DecodePushData pd_decode = (DecodePushData) { - .tile_data = tile_data->address, - .pkt_data = slices_buf->address, - .frame_size[0] = avctx->width, - .frame_size[1] = avctx->height, - .tile_size[0] = prr->tw, - .tile_size[1] = prr->th, - }; - memcpy(pd_decode.qmat, prr->qmat, 64); - ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, - VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd_decode), &pd_decode); - - vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1); - - err = ff_vk_exec_submit(&ctx->s, exec); - if (err < 0) - return err; - -fail: - return 0; -} - -static int init_decode_shader(ProResRAWContext *prr, FFVulkanContext *s, - FFVkExecPool *pool, FFVkSPIRVCompiler *spv, - FFVulkanShader *shd, int version) -{ - int err; - FFVulkanDescriptorSetBinding *desc_set; - int parallel_rows = 1; - - uint8_t *spv_data; - size_t spv_len; - void *spv_opaque = NULL; - - if (s->props.properties.limits.maxComputeWorkGroupInvocations < 512 || - s->props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) - parallel_rows = 0; - - RET(ff_vk_shader_init(s, shd, "prores_raw", - VK_SHADER_STAGE_COMPUTE_BIT, - (const char *[]) { "GL_EXT_buffer_reference", - "GL_EXT_buffer_reference2", - "GL_EXT_null_initializer" }, 3, - parallel_rows ? 8 : 1 /* 8x8 transforms, 8-point width */, - version == 0 ? 8 : 16 /* Horizontal blocks */, - 4 /* Components */, - 0)); - - if (parallel_rows) - GLSLC(0, #define PARALLEL_ROWS ); - - /* Common codec header */ - GLSLD(ff_source_common_comp); - - GLSLC(0, layout(buffer_reference, buffer_reference_align = 16) buffer TileData { ); - GLSLC(1, ivec2 pos; ); - GLSLC(1, uint offset; ); - GLSLC(1, uint size; ); - GLSLC(0, }; ); - GLSLC(0, ); - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, TileData tile_data; ); - GLSLC(1, u8buf pkt_data; ); - GLSLC(1, uvec2 frame_size; ); - GLSLC(1, uvec2 tile_size; ); - GLSLC(1, uint8_t qmat[64]; ); - GLSLC(0, }; ); - GLSLC(0, ); - ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData), - VK_SHADER_STAGE_COMPUTE_BIT); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "dst", - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .mem_layout = "r16", - .mem_quali = "writeonly", - .dimensions = 2, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - }, - }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0)); - - desc_set = (FFVulkanDescriptorSetBinding []) { - { - .name = "dct_scale_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "float idct_8x8_scales[64];", - }, - { - .name = "scan_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t scan[64];", - }, - { - .name = "dc_cb_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "uint8_t dc_cb[13];", - }, - { - .name = "ac_cb_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t ac_cb[95];", - }, - { - .name = "rn_cb_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t rn_cb[28];", - }, - { - .name = "ln_cb_buf", - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .mem_layout = "scalar", - .buf_content = "int16_t ln_cb[15];", - }, - }; - RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 6, 1, 0)); - - GLSLD(ff_source_prores_raw_comp); - - RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", - &spv_opaque)); - RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); - - RET(ff_vk_shader_register_exec(s, pool, shd)); - -fail: - if (spv_opaque) - spv->free_shader(spv, &spv_opaque); - - return err; -} - -static void vk_decode_prores_raw_uninit(FFVulkanDecodeShared *ctx) -{ - ProResRAWVulkanDecodeContext *fv = ctx->sd_ctx; - - ff_vk_shader_free(&ctx->s, &fv->decode[0]); - ff_vk_shader_free(&ctx->s, &fv->decode[1]); - - ff_vk_free_buf(&ctx->s, &fv->uniform_buf); - - av_buffer_pool_uninit(&fv->tile_data_pool); - - av_freep(&fv); -} - -static int vk_decode_prores_raw_init(AVCodecContext *avctx) -{ - int err; - ProResRAWContext *prr = avctx->priv_data; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - - FFVkSPIRVCompiler *spv = ff_vk_spirv_init(); - if (!spv) { - av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); - return AVERROR_EXTERNAL; - } - - err = ff_vk_decode_init(avctx); - if (err < 0) - return err; - - FFVulkanDecodeShared *ctx = dec->shared_ctx; - ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx = av_mallocz(sizeof(*prv)); - if (!prv) { - err = AVERROR(ENOMEM); - goto fail; - } - - ctx->sd_ctx_free = &vk_decode_prores_raw_uninit; - - /* Setup decode shader */ - RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[0], 0)); - RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[1], 1)); - - /* Size in bytes of each codebook table */ - size_t cb_size[5] = { - 13*sizeof(uint8_t), - 95*sizeof(int16_t), - 28*sizeof(int16_t), - 15*sizeof(int16_t), - }; - - /* Offset of each codebook table */ - size_t cb_offset[5]; - size_t ua = ctx->s.props.properties.limits.minUniformBufferOffsetAlignment; - cb_offset[0] = 64*sizeof(float) + 64*sizeof(uint8_t); - cb_offset[1] = cb_offset[0] + FFALIGN(cb_size[0], ua); - cb_offset[2] = cb_offset[1] + FFALIGN(cb_size[1], ua); - cb_offset[3] = cb_offset[2] + FFALIGN(cb_size[2], ua); - cb_offset[4] = cb_offset[3] + FFALIGN(cb_size[3], ua); - - RET(ff_vk_create_buf(&ctx->s, &prv->uniform_buf, - 64*sizeof(float) + 64*sizeof(uint8_t) + cb_offset[4] + 256, - NULL, NULL, - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - - uint8_t *uniform_buf; - RET(ff_vk_map_buffer(&ctx->s, &prv->uniform_buf, &uniform_buf, 0)); - - /* DCT scales */ - float *dct_scale_buf = (float *)uniform_buf; - double idct_8_scales[8] = { - cos(4.0*M_PI/16.0) / 2.0, - cos(1.0*M_PI/16.0) / 2.0, - cos(2.0*M_PI/16.0) / 2.0, - cos(3.0*M_PI/16.0) / 2.0, - cos(4.0*M_PI/16.0) / 2.0, - cos(5.0*M_PI/16.0) / 2.0, - cos(6.0*M_PI/16.0) / 2.0, - cos(7.0*M_PI/16.0) / 2.0, - }; - for (int i = 0; i < 64; i++) - dct_scale_buf[i] = (float)(idct_8_scales[i >> 3] * - idct_8_scales[i & 7]); - - /* Scan table */ - uint8_t *scan_buf = uniform_buf + 64*sizeof(float); - for (int i = 0; i < 64; i++) - scan_buf[prr->scan[i]] = i; - - /* Codebooks */ - memcpy(uniform_buf + cb_offset[0], ff_prores_raw_dc_cb, - sizeof(ff_prores_raw_dc_cb)); - memcpy(uniform_buf + cb_offset[1], ff_prores_raw_ac_cb, - sizeof(ff_prores_raw_ac_cb)); - memcpy(uniform_buf + cb_offset[2], ff_prores_raw_rn_cb, - sizeof(ff_prores_raw_rn_cb)); - memcpy(uniform_buf + cb_offset[3], ff_prores_raw_ln_cb, - sizeof(ff_prores_raw_ln_cb)); - - RET(ff_vk_unmap_buffer(&ctx->s, &prv->uniform_buf, 1)); - - /* Done; update descriptors */ - for (int i = 0; i < 2; i++) { - RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], - &prv->decode[i], 1, 0, 0, - &prv->uniform_buf, - 0, 64*sizeof(float), - VK_FORMAT_UNDEFINED)); - RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], - &prv->decode[i], 1, 1, 0, - &prv->uniform_buf, - 64*sizeof(float), 64*sizeof(uint8_t), - VK_FORMAT_UNDEFINED)); - for (int j = 0; j < 4; j++) - RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], - &prv->decode[i], 1, 2 + j, 0, - &prv->uniform_buf, - cb_offset[j], cb_size[j], - VK_FORMAT_UNDEFINED)); - } - -fail: - spv->uninit(&spv); - - return err; -} - -static void vk_prores_raw_free_frame_priv(AVRefStructOpaque _hwctx, void *data) -{ - AVHWDeviceContext *dev_ctx = _hwctx.nc; - - ProResRAWVulkanDecodePicture *pp = data; - FFVulkanDecodePicture *vp = &pp->vp; - - ff_vk_decode_free_frame(dev_ctx, vp); -} - -const FFHWAccel ff_prores_raw_vulkan_hwaccel = { - .p.name = "prores_raw_vulkan", - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_PRORES_RAW, - .p.pix_fmt = AV_PIX_FMT_VULKAN, - .start_frame = &vk_prores_raw_start_frame, - .decode_slice = &vk_prores_raw_decode_slice, - .end_frame = &vk_prores_raw_end_frame, - .free_frame_priv = &vk_prores_raw_free_frame_priv, - .frame_priv_data_size = sizeof(ProResRAWVulkanDecodePicture), - .init = &vk_decode_prores_raw_init, - .update_thread_context = &ff_vk_update_thread_context, - .decode_params = &ff_vk_params_invalidate, - .flush = &ff_vk_decode_flush, - .uninit = &ff_vk_decode_uninit, - .frame_params = &ff_vk_frame_params, - .priv_data_size = sizeof(FFVulkanDecodeContext), - .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, -}; -- 2.49.1 From 9ce26edc967bf1b27ad0469840ac4280aa068d9a Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:39:44 +0000 Subject: [PATCH 086/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_prores_raw.c | 503 ++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_prores_raw.c diff --git a/libavcodec/vulkan/vulkan_prores_raw.c b/libavcodec/vulkan/vulkan_prores_raw.c new file mode 100644 index 0000000000..a38835efcc --- /dev/null +++ b/libavcodec/vulkan/vulkan_prores_raw.c @@ -0,0 +1,503 @@ +/* + * Copyright (c) 2025 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_decode.h" +#include "libavcodec/hwaccel_internal.h" + +#include "libavcodec/prores_raw.h" +#include "../../libavutil/vulkan/vulkan_spirv.h" +#include "libavutil/mem.h" + +extern const char *ff_source_common_comp; +extern const char *ff_source_prores_raw_comp; + +const FFVulkanDecodeDescriptor ff_vk_dec_prores_raw_desc = { + .codec_id = AV_CODEC_ID_PRORES_RAW, + .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR, + .queue_flags = VK_QUEUE_COMPUTE_BIT, +}; + +typedef struct ProResRAWVulkanDecodePicture { + FFVulkanDecodePicture vp; + + AVBufferRef *tile_data; + uint32_t nb_tiles; +} ProResRAWVulkanDecodePicture; + +typedef struct ProResRAWVulkanDecodeContext { + FFVulkanShader decode[2]; + + AVBufferPool *tile_data_pool; + + FFVkBuffer uniform_buf; +} ProResRAWVulkanDecodeContext; + +typedef struct DecodePushData { + VkDeviceAddress tile_data; + VkDeviceAddress pkt_data; + uint32_t frame_size[2]; + uint32_t tile_size[2]; + uint8_t qmat[64]; +} DecodePushData; + +typedef struct TileData { + int32_t pos[2]; + uint32_t offset; + uint32_t size; +} TileData; + +static int vk_prores_raw_start_frame(AVCodecContext *avctx, + const AVBufferRef *buffer_ref, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx; + ProResRAWContext *prr = avctx->priv_data; + + ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + /* Host map the input tile data if supported */ + if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) + ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data, + buffer_ref, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); + + /* Allocate tile data */ + err = ff_vk_get_pooled_buffer(&ctx->s, &prv->tile_data_pool, + &pp->tile_data, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, prr->nb_tiles*sizeof(TileData), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) + return err; + + /* Prepare frame to be used */ + err = ff_vk_decode_prepare_frame_sdr(dec, prr->frame, vp, 1, + FF_VK_REP_FLOAT, 0); + if (err < 0) + return err; + + return 0; +} + +static int vk_prores_raw_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + ProResRAWContext *prr = avctx->priv_data; + + ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + FFVkBuffer *tile_data_buf = (FFVkBuffer *)pp->tile_data->data; + TileData *td = (TileData *)tile_data_buf->mapped_mem; + FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; + + td[pp->nb_tiles].pos[0] = prr->tiles[pp->nb_tiles].x; + td[pp->nb_tiles].pos[1] = prr->tiles[pp->nb_tiles].y; + td[pp->nb_tiles].size = size; + + if (vp->slices_buf && slices_buf->host_ref) { + td[pp->nb_tiles].offset = data - slices_buf->mapped_mem; + pp->nb_tiles++; + } else { + int err; + td[pp->nb_tiles].offset = vp->slices_size; + err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + &pp->nb_tiles, NULL); + if (err < 0) + return err; + } + + return 0; +} + +static int vk_prores_raw_end_frame(AVCodecContext *avctx) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + ProResRAWContext *prr = avctx->priv_data; + ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx; + + ProResRAWVulkanDecodePicture *pp = prr->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &pp->vp; + + FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; + FFVkBuffer *tile_data = (FFVkBuffer *)pp->tile_data->data; + + VkImageMemoryBarrier2 img_bar[8]; + int nb_img_bar = 0; + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + + /* Prepare deps */ + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, prr->frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, + prr->frame); + if (err < 0) + return err; + + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &pp->tile_data, 1, 0)); + pp->tile_data = NULL; + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); + vp->slices_buf = NULL; + + ff_vk_frame_barrier(&ctx->s, exec, prr->frame, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + nb_img_bar = 0; + + FFVulkanShader *decode_shader = &prv->decode[prr->version]; + ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, + prr->frame, vp->view.out, + 0, 0, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader); + + /* Update push data */ + DecodePushData pd_decode = (DecodePushData) { + .tile_data = tile_data->address, + .pkt_data = slices_buf->address, + .frame_size[0] = avctx->width, + .frame_size[1] = avctx->height, + .tile_size[0] = prr->tw, + .tile_size[1] = prr->th, + }; + memcpy(pd_decode.qmat, prr->qmat, 64); + ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd_decode), &pd_decode); + + vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1); + + err = ff_vk_exec_submit(&ctx->s, exec); + if (err < 0) + return err; + +fail: + return 0; +} + +static int init_decode_shader(ProResRAWContext *prr, FFVulkanContext *s, + FFVkExecPool *pool, FFVkSPIRVCompiler *spv, + FFVulkanShader *shd, int version) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + int parallel_rows = 1; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + if (s->props.properties.limits.maxComputeWorkGroupInvocations < 512 || + s->props.properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) + parallel_rows = 0; + + RET(ff_vk_shader_init(s, shd, "prores_raw", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2", + "GL_EXT_null_initializer" }, 3, + parallel_rows ? 8 : 1 /* 8x8 transforms, 8-point width */, + version == 0 ? 8 : 16 /* Horizontal blocks */, + 4 /* Components */, + 0)); + + if (parallel_rows) + GLSLC(0, #define PARALLEL_ROWS ); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(buffer_reference, buffer_reference_align = 16) buffer TileData { ); + GLSLC(1, ivec2 pos; ); + GLSLC(1, uint offset; ); + GLSLC(1, uint size; ); + GLSLC(0, }; ); + GLSLC(0, ); + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, TileData tile_data; ); + GLSLC(1, u8buf pkt_data; ); + GLSLC(1, uvec2 frame_size; ); + GLSLC(1, uvec2 tile_size; ); + GLSLC(1, uint8_t qmat[64]; ); + GLSLC(0, }; ); + GLSLC(0, ); + ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData), + VK_SHADER_STAGE_COMPUTE_BIT); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = "r16", + .mem_quali = "writeonly", + .dimensions = 2, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0)); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "dct_scale_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "float idct_8x8_scales[64];", + }, + { + .name = "scan_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t scan[64];", + }, + { + .name = "dc_cb_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t dc_cb[13];", + }, + { + .name = "ac_cb_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t ac_cb[95];", + }, + { + .name = "rn_cb_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t rn_cb[28];", + }, + { + .name = "ln_cb_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t ln_cb[15];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 6, 1, 0)); + + GLSLD(ff_source_prores_raw_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static void vk_decode_prores_raw_uninit(FFVulkanDecodeShared *ctx) +{ + ProResRAWVulkanDecodeContext *fv = ctx->sd_ctx; + + ff_vk_shader_free(&ctx->s, &fv->decode[0]); + ff_vk_shader_free(&ctx->s, &fv->decode[1]); + + ff_vk_free_buf(&ctx->s, &fv->uniform_buf); + + av_buffer_pool_uninit(&fv->tile_data_pool); + + av_freep(&fv); +} + +static int vk_decode_prores_raw_init(AVCodecContext *avctx) +{ + int err; + ProResRAWContext *prr = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + + FFVkSPIRVCompiler *spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + err = ff_vk_decode_init(avctx); + if (err < 0) + return err; + + FFVulkanDecodeShared *ctx = dec->shared_ctx; + ProResRAWVulkanDecodeContext *prv = ctx->sd_ctx = av_mallocz(sizeof(*prv)); + if (!prv) { + err = AVERROR(ENOMEM); + goto fail; + } + + ctx->sd_ctx_free = &vk_decode_prores_raw_uninit; + + /* Setup decode shader */ + RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[0], 0)); + RET(init_decode_shader(prr, &ctx->s, &ctx->exec_pool, spv, &prv->decode[1], 1)); + + /* Size in bytes of each codebook table */ + size_t cb_size[5] = { + 13*sizeof(uint8_t), + 95*sizeof(int16_t), + 28*sizeof(int16_t), + 15*sizeof(int16_t), + }; + + /* Offset of each codebook table */ + size_t cb_offset[5]; + size_t ua = ctx->s.props.properties.limits.minUniformBufferOffsetAlignment; + cb_offset[0] = 64*sizeof(float) + 64*sizeof(uint8_t); + cb_offset[1] = cb_offset[0] + FFALIGN(cb_size[0], ua); + cb_offset[2] = cb_offset[1] + FFALIGN(cb_size[1], ua); + cb_offset[3] = cb_offset[2] + FFALIGN(cb_size[2], ua); + cb_offset[4] = cb_offset[3] + FFALIGN(cb_size[3], ua); + + RET(ff_vk_create_buf(&ctx->s, &prv->uniform_buf, + 64*sizeof(float) + 64*sizeof(uint8_t) + cb_offset[4] + 256, + NULL, NULL, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + + uint8_t *uniform_buf; + RET(ff_vk_map_buffer(&ctx->s, &prv->uniform_buf, &uniform_buf, 0)); + + /* DCT scales */ + float *dct_scale_buf = (float *)uniform_buf; + double idct_8_scales[8] = { + cos(4.0*M_PI/16.0) / 2.0, + cos(1.0*M_PI/16.0) / 2.0, + cos(2.0*M_PI/16.0) / 2.0, + cos(3.0*M_PI/16.0) / 2.0, + cos(4.0*M_PI/16.0) / 2.0, + cos(5.0*M_PI/16.0) / 2.0, + cos(6.0*M_PI/16.0) / 2.0, + cos(7.0*M_PI/16.0) / 2.0, + }; + for (int i = 0; i < 64; i++) + dct_scale_buf[i] = (float)(idct_8_scales[i >> 3] * + idct_8_scales[i & 7]); + + /* Scan table */ + uint8_t *scan_buf = uniform_buf + 64*sizeof(float); + for (int i = 0; i < 64; i++) + scan_buf[prr->scan[i]] = i; + + /* Codebooks */ + memcpy(uniform_buf + cb_offset[0], ff_prores_raw_dc_cb, + sizeof(ff_prores_raw_dc_cb)); + memcpy(uniform_buf + cb_offset[1], ff_prores_raw_ac_cb, + sizeof(ff_prores_raw_ac_cb)); + memcpy(uniform_buf + cb_offset[2], ff_prores_raw_rn_cb, + sizeof(ff_prores_raw_rn_cb)); + memcpy(uniform_buf + cb_offset[3], ff_prores_raw_ln_cb, + sizeof(ff_prores_raw_ln_cb)); + + RET(ff_vk_unmap_buffer(&ctx->s, &prv->uniform_buf, 1)); + + /* Done; update descriptors */ + for (int i = 0; i < 2; i++) { + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &prv->decode[i], 1, 0, 0, + &prv->uniform_buf, + 0, 64*sizeof(float), + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &prv->decode[i], 1, 1, 0, + &prv->uniform_buf, + 64*sizeof(float), 64*sizeof(uint8_t), + VK_FORMAT_UNDEFINED)); + for (int j = 0; j < 4; j++) + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &prv->decode[i], 1, 2 + j, 0, + &prv->uniform_buf, + cb_offset[j], cb_size[j], + VK_FORMAT_UNDEFINED)); + } + +fail: + spv->uninit(&spv); + + return err; +} + +static void vk_prores_raw_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *dev_ctx = _hwctx.nc; + + ProResRAWVulkanDecodePicture *pp = data; + FFVulkanDecodePicture *vp = &pp->vp; + + ff_vk_decode_free_frame(dev_ctx, vp); +} + +const FFHWAccel ff_prores_raw_vulkan_hwaccel = { + .p.name = "prores_raw_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_PRORES_RAW, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_prores_raw_start_frame, + .decode_slice = &vk_prores_raw_decode_slice, + .end_frame = &vk_prores_raw_end_frame, + .free_frame_priv = &vk_prores_raw_free_frame_priv, + .frame_priv_data_size = sizeof(ProResRAWVulkanDecodePicture), + .init = &vk_decode_prores_raw_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; -- 2.49.1 From 6497f35b2ba497b088d1766e95dd0de61a01835a Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:40:21 +0000 Subject: [PATCH 087/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_source.c | 2242 +++++++++++++++++++++++++++++ 1 file changed, 2242 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_source.c diff --git a/libavcodec/vulkan/vulkan_source.c b/libavcodec/vulkan/vulkan_source.c new file mode 100644 index 0000000000..17a60e4b0f --- /dev/null +++ b/libavcodec/vulkan/vulkan_source.c @@ -0,0 +1,2242 @@ +#include <stddef.h> +#include "vulkan_source.h" + +const char *ff_source_common_comp = "\ +layout(buffer_reference, buffer_reference_align = 1) buffer u8buf {\n\ + uint8_t v;\n\ +};\n\ +\n\ +layout(buffer_reference, buffer_reference_align = 1) buffer u8vec2buf {\n\ + u8vec2 v;\n\ +};\n\ +\n\ +layout(buffer_reference, buffer_reference_align = 1) buffer u8vec4buf {\n\ + u8vec4 v;\n\ +};\n\ +\n\ +layout(buffer_reference, buffer_reference_align = 2) buffer u16buf {\n\ + uint16_t v;\n\ +};\n\ +\n\ +layout(buffer_reference, buffer_reference_align = 4) buffer u32buf {\n\ + uint32_t v;\n\ +};\n\ +\n\ +layout(buffer_reference, buffer_reference_align = 4) buffer u32vec2buf {\n\ + u32vec2 v;\n\ +};\n\ +\n\ +layout(buffer_reference, buffer_reference_align = 8) buffer u64buf {\n\ + uint64_t v;\n\ +};\n\ +\n\ +#define OFFBUF(type, b, l) \\\n\ + type(uint64_t(b) + uint64_t(l))\n\ +\n\ +#define zero_extend(a, p) \\\n\ + ((a) & ((1 << (p)) - 1))\n\ +\n\ +#define sign_extend(val, bits) \\\n\ + bitfieldExtract(val, 0, bits)\n\ +\n\ +#define fold(diff, bits) \\\n\ + sign_extend(diff, bits)\n\ +\n\ +#define mid_pred(a, b, c) \\\n\ + max(min((a), (b)), min(max((a), (b)), (c)))\n\ +\n\ +/* TODO: optimize */\n\ +uint align(uint src, uint a)\n\ +{\n\ + uint res = src % a;\n\ + if (res == 0)\n\ + return src;\n\ + return src + a - res;\n\ +}\n\ +\n\ +/* TODO: optimize */\n\ +uint64_t align64(uint64_t src, uint64_t a)\n\ +{\n\ + uint64_t res = src % a;\n\ + if (res == 0)\n\ + return src;\n\ + return src + a - res;\n\ +}\n\ +\n\ +#define reverse4(src) \\\n\ + (pack32(unpack8(uint32_t(src)).wzyx))\n\ +\n\ +u32vec2 reverse8(uint64_t src)\n\ +{\n\ + u32vec2 tmp = unpack32(src);\n\ + tmp.x = reverse4(tmp.x);\n\ + tmp.y = reverse4(tmp.y);\n\ + return tmp.yx;\n\ +}\n\ +\n\ +#ifdef PB_32\n\ +#define BIT_BUF_TYPE uint32_t\n\ +#define BUF_TYPE u32buf\n\ +#define BUF_REVERSE(src) reverse4(src)\n\ +#define BUF_BITS uint8_t(32)\n\ +#define BUF_BYTES uint8_t(4)\n\ +#define BYTE_EXTRACT(src, byte_off) \\\n\ + (uint8_t(bitfieldExtract((src), ((byte_off) << 3), 8)))\n\ +#else\n\ +#define BIT_BUF_TYPE uint64_t\n\ +#define BUF_TYPE u32vec2buf\n\ +#define BUF_REVERSE(src) reverse8(src)\n\ +#define BUF_BITS uint8_t(64)\n\ +#define BUF_BYTES uint8_t(8)\n\ +#define BYTE_EXTRACT(src, byte_off) \\\n\ + (uint8_t(((src) >> ((byte_off) << 3)) & 0xFF))\n\ +#endif\n\ +\n\ +struct PutBitContext {\n\ + uint64_t buf_start;\n\ + uint64_t buf;\n\ +\n\ + BIT_BUF_TYPE bit_buf;\n\ + uint8_t bit_left;\n\ +};\n\ +\n\ +void put_bits(inout PutBitContext pb, const uint32_t n, uint32_t value)\n\ +{\n\ + if (n < pb.bit_left) {\n\ + pb.bit_buf = (pb.bit_buf << n) | value;\n\ + pb.bit_left -= uint8_t(n);\n\ + } else {\n\ + pb.bit_buf <<= pb.bit_left;\n\ + pb.bit_buf |= (value >> (n - pb.bit_left));\n\ +\n\ +#ifdef PB_UNALIGNED\n\ + u8buf bs = u8buf(pb.buf);\n\ + [[unroll]]\n\ + for (uint8_t i = uint8_t(0); i < BUF_BYTES; i++)\n\ + bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i);\n\ +#else\n\ +#ifdef DEBUG\n\ + if ((pb.buf % BUF_BYTES) != 0)\n\ + debugPrintfEXT(\"put_bits buffer is not aligned!\");\n\ +#endif\n\ +\n\ + BUF_TYPE bs = BUF_TYPE(pb.buf);\n\ + bs.v = BUF_REVERSE(pb.bit_buf);\n\ +#endif\n\ + pb.buf = uint64_t(bs) + BUF_BYTES;\n\ +\n\ + pb.bit_left += BUF_BITS - uint8_t(n);\n\ + pb.bit_buf = value;\n\ + }\n\ +}\n\ +\n\ +uint32_t flush_put_bits(inout PutBitContext pb)\n\ +{\n\ + /* Align bits to MSBs */\n\ + if (pb.bit_left < BUF_BITS)\n\ + pb.bit_buf <<= pb.bit_left;\n\ +\n\ + if (pb.bit_left < BUF_BITS) {\n\ + uint to_write = ((BUF_BITS - pb.bit_left - 1) >> 3) + 1;\n\ +\n\ + u8buf bs = u8buf(pb.buf);\n\ + for (int i = 0; i < to_write; i++)\n\ + bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i);\n\ + pb.buf = uint64_t(bs) + to_write;\n\ + }\n\ +\n\ + pb.bit_left = BUF_BITS;\n\ + pb.bit_buf = 0x0;\n\ +\n\ + return uint32_t(pb.buf - pb.buf_start);\n\ +}\n\ +\n\ +void init_put_bits(out PutBitContext pb, u8buf data, uint64_t len)\n\ +{\n\ + pb.buf_start = uint64_t(data);\n\ + pb.buf = uint64_t(data);\n\ +\n\ + pb.bit_buf = 0;\n\ + pb.bit_left = BUF_BITS;\n\ +}\n\ +\n\ +uint64_t put_bits_count(in PutBitContext pb)\n\ +{\n\ + return (pb.buf - pb.buf_start)*8 + BUF_BITS - pb.bit_left;\n\ +}\n\ +\n\ +uint32_t put_bytes_count(in PutBitContext pb)\n\ +{\n\ + uint64_t num_bytes = (pb.buf - pb.buf_start) + ((BUF_BITS - pb.bit_left) >> 3);\n\ + return uint32_t(num_bytes);\n\ +}\n\ +\n\ +struct GetBitContext {\n\ + uint64_t buf_start;\n\ + uint64_t buf;\n\ + uint64_t buf_end;\n\ +\n\ + uint64_t bits;\n\ + int bits_valid;\n\ + int size_in_bits;\n\ +};\n\ +\n\ +#define LOAD64() \\\n\ + { \\\n\ + u8vec4buf ptr = u8vec4buf(gb.buf); \\\n\ + uint32_t rf1 = pack32((ptr[0].v).wzyx); \\\n\ + uint32_t rf2 = pack32((ptr[1].v).wzyx); \\\n\ + gb.buf += 8; \\\n\ + gb.bits = uint64_t(rf1) << 32 | uint64_t(rf2); \\\n\ + gb.bits_valid = 64; \\\n\ + }\n\ +\n\ +#define RELOAD32() \\\n\ + { \\\n\ + u8vec4buf ptr = u8vec4buf(gb.buf); \\\n\ + uint32_t rf = pack32((ptr[0].v).wzyx); \\\n\ + gb.buf += 4; \\\n\ + gb.bits = uint64_t(rf) << (32 - gb.bits_valid) | gb.bits; \\\n\ + gb.bits_valid += 32; \\\n\ + }\n\ +\n\ +void init_get_bits(inout GetBitContext gb, u8buf data, int len)\n\ +{\n\ + gb.buf = gb.buf_start = uint64_t(data);\n\ + gb.buf_end = uint64_t(data) + len;\n\ + gb.size_in_bits = len * 8;\n\ +\n\ + /* Preload */\n\ + LOAD64()\n\ +}\n\ +\n\ +bool get_bit(inout GetBitContext gb)\n\ +{\n\ + if (gb.bits_valid == 0)\n\ + LOAD64()\n\ +\n\ + bool val = bool(gb.bits >> (64 - 1));\n\ + gb.bits <<= 1;\n\ + gb.bits_valid--;\n\ + return val;\n\ +}\n\ +\n\ +uint get_bits(inout GetBitContext gb, int n)\n\ +{\n\ + if (n == 0)\n\ + return 0;\n\ +\n\ + if (n > gb.bits_valid)\n\ + RELOAD32()\n\ +\n\ + uint val = uint(gb.bits >> (64 - n));\n\ + gb.bits <<= n;\n\ + gb.bits_valid -= n;\n\ + return val;\n\ +}\n\ +\n\ +uint show_bits(inout GetBitContext gb, int n)\n\ +{\n\ + if (n > gb.bits_valid)\n\ + RELOAD32()\n\ +\n\ + return uint(gb.bits >> (64 - n));\n\ +}\n\ +\n\ +void skip_bits(inout GetBitContext gb, int n)\n\ +{\n\ + if (n > gb.bits_valid)\n\ + RELOAD32()\n\ +\n\ + gb.bits <<= n;\n\ + gb.bits_valid -= n;\n\ +}\n\ +\n\ +int tell_bits(in GetBitContext gb)\n\ +{\n\ + return int(gb.buf - gb.buf_start) * 8 - gb.bits_valid;\n\ +}\n\ +\n\ +int left_bits(in GetBitContext gb)\n\ +{\n\ + return gb.size_in_bits - int(gb.buf - gb.buf_start) * 8 + gb.bits_valid;\n\ +}"; + +const char *ff_source_rangecoder_comp = "\ +struct RangeCoder {\n\ + uint64_t bytestream_start;\n\ + uint64_t bytestream;\n\ + uint64_t bytestream_end;\n\ +\n\ + int low;\n\ + int range;\n\ + uint16_t outstanding_count;\n\ + uint8_t outstanding_byte;\n\ +};\n\ +\n\ +#ifdef FULL_RENORM\n\ +/* Full renorm version that can handle outstanding_byte == 0xFF */\n\ +void renorm_encoder(inout RangeCoder c)\n\ +{\n\ + int bs_cnt = 0;\n\ + u8buf bytestream = u8buf(c.bytestream);\n\ +\n\ + if (c.outstanding_byte == 0xFF) {\n\ + c.outstanding_byte = uint8_t(c.low >> 8);\n\ + } else if (c.low <= 0xFF00) {\n\ + bytestream[bs_cnt++].v = c.outstanding_byte;\n\ + uint16_t cnt = c.outstanding_count;\n\ + for (; cnt > 0; cnt--)\n\ + bytestream[bs_cnt++].v = uint8_t(0xFF);\n\ + c.outstanding_count = uint16_t(0);\n\ + c.outstanding_byte = uint8_t(c.low >> 8);\n\ + } else if (c.low >= 0x10000) {\n\ + bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1);\n\ + uint16_t cnt = c.outstanding_count;\n\ + for (; cnt > 0; cnt--)\n\ + bytestream[bs_cnt++].v = uint8_t(0x00);\n\ + c.outstanding_count = uint16_t(0);\n\ + c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8));\n\ + } else {\n\ + c.outstanding_count++;\n\ + }\n\ +\n\ + c.bytestream += bs_cnt;\n\ + c.range <<= 8;\n\ + c.low = bitfieldInsert(0, c.low, 8, 8);\n\ +}\n\ +\n\ +#else\n\ +\n\ +/* Cannot deal with outstanding_byte == -1 in the name of speed */\n\ +void renorm_encoder(inout RangeCoder c)\n\ +{\n\ + uint16_t oc = c.outstanding_count + uint16_t(1);\n\ + int low = c.low;\n\ +\n\ + c.range <<= 8;\n\ + c.low = bitfieldInsert(0, low, 8, 8);\n\ +\n\ + if (low > 0xFF00 && low < 0x10000) {\n\ + c.outstanding_count = oc;\n\ + return;\n\ + }\n\ +\n\ + u8buf bs = u8buf(c.bytestream);\n\ + uint8_t outstanding_byte = c.outstanding_byte;\n\ +\n\ + c.bytestream = uint64_t(bs) + oc;\n\ + c.outstanding_count = uint16_t(0);\n\ + c.outstanding_byte = uint8_t(low >> 8);\n\ +\n\ + uint8_t obs = uint8_t(low > 0xFF00);\n\ + uint8_t fill = obs - uint8_t(1); /* unsigned underflow */\n\ +\n\ + bs[0].v = outstanding_byte + obs;\n\ + for (int i = 1; i < oc; i++)\n\ + bs[i].v = fill;\n\ +}\n\ +#endif\n\ +\n\ +void put_rac_internal(inout RangeCoder c, const int range1, bool bit)\n\ +{\n\ +#ifdef DEBUG\n\ + if (range1 >= c.range)\n\ + debugPrintfEXT(\"Error: range1 >= c.range\");\n\ + if (range1 <= 0)\n\ + debugPrintfEXT(\"Error: range1 <= 0\");\n\ +#endif\n\ +\n\ + int ranged = c.range - range1;\n\ + c.low += bit ? ranged : 0;\n\ + c.range = bit ? range1 : ranged;\n\ +\n\ + if (expectEXT(c.range < 0x100, false))\n\ + renorm_encoder(c);\n\ +}\n\ +\n\ +void put_rac_direct(inout RangeCoder c, inout uint8_t state, bool bit)\n\ +{\n\ + put_rac_internal(c, (c.range * state) >> 8, bit);\n\ + state = zero_one_state[(uint(bit) << 8) + state];\n\ +}\n\ +\n\ +void put_rac(inout RangeCoder c, uint64_t state, bool bit)\n\ +{\n\ + put_rac_direct(c, u8buf(state).v, bit);\n\ +}\n\ +\n\ +/* Equiprobable bit */\n\ +void put_rac_equi(inout RangeCoder c, bool bit)\n\ +{\n\ + put_rac_internal(c, c.range >> 1, bit);\n\ +}\n\ +\n\ +void put_rac_terminate(inout RangeCoder c)\n\ +{\n\ + int range1 = (c.range * 129) >> 8;\n\ +\n\ +#ifdef DEBUG\n\ + if (range1 >= c.range)\n\ + debugPrintfEXT(\"Error: range1 >= c.range\");\n\ + if (range1 <= 0)\n\ + debugPrintfEXT(\"Error: range1 <= 0\");\n\ +#endif\n\ +\n\ + c.range -= range1;\n\ + if (expectEXT(c.range < 0x100, false))\n\ + renorm_encoder(c);\n\ +}\n\ +\n\ +/* Return the number of bytes written. */\n\ +uint32_t rac_terminate(inout RangeCoder c)\n\ +{\n\ + put_rac_terminate(c);\n\ + c.range = uint16_t(0xFF);\n\ + c.low += 0xFF;\n\ + renorm_encoder(c);\n\ + c.range = uint16_t(0xFF);\n\ + renorm_encoder(c);\n\ +\n\ +#ifdef DEBUG\n\ + if (c.low != 0)\n\ + debugPrintfEXT(\"Error: c.low != 0\");\n\ + if (c.range < 0x100)\n\ + debugPrintfEXT(\"Error: range < 0x100\");\n\ +#endif\n\ +\n\ + return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start));\n\ +}\n\ +\n\ +void rac_init(out RangeCoder r, u8buf data, uint buf_size)\n\ +{\n\ + r.bytestream_start = uint64_t(data);\n\ + r.bytestream = uint64_t(data);\n\ + r.bytestream_end = uint64_t(data) + buf_size;\n\ + r.low = 0;\n\ + r.range = 0xFF00;\n\ + r.outstanding_count = uint16_t(0);\n\ + r.outstanding_byte = uint8_t(0xFF);\n\ +}\n\ +\n\ +/* Decoder */\n\ +uint overread = 0;\n\ +bool corrupt = false;\n\ +\n\ +void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size)\n\ +{\n\ + overread = 0;\n\ + corrupt = false;\n\ +\n\ + /* Skip priming bytes */\n\ + rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2);\n\ +\n\ + u8vec2 prime = u8vec2buf(data).v;\n\ + /* Switch endianness of the priming bytes */\n\ + r.low = pack16(prime.yx);\n\ +\n\ + if (r.low >= 0xFF00) {\n\ + r.low = 0xFF00;\n\ + r.bytestream_end = uint64_t(data) + 2;\n\ + }\n\ +}\n\ +\n\ +void refill(inout RangeCoder c)\n\ +{\n\ + c.range <<= 8;\n\ + c.low <<= 8;\n\ + if (expectEXT(c.bytestream < c.bytestream_end, false)) {\n\ + c.low |= u8buf(c.bytestream).v;\n\ + c.bytestream++;\n\ + } else {\n\ + overread++;\n\ + }\n\ +}\n\ +\n\ +bool get_rac_internal(inout RangeCoder c, const int range1)\n\ +{\n\ + int ranged = c.range - range1;\n\ + bool bit = c.low >= ranged;\n\ + c.low -= bit ? ranged : 0;\n\ + c.range = (bit ? 0 : ranged) + (bit ? range1 : 0);\n\ +\n\ + if (expectEXT(c.range < 0x100, false))\n\ + refill(c);\n\ +\n\ + return bit;\n\ +}\n\ +\n\ +bool get_rac_direct(inout RangeCoder c, inout uint8_t state)\n\ +{\n\ + bool bit = get_rac_internal(c, c.range * state >> 8);\n\ + state = zero_one_state[state + (bit ? 256 : 0)];\n\ + return bit;\n\ +}\n\ +\n\ +bool get_rac(inout RangeCoder c, uint64_t state)\n\ +{\n\ + return get_rac_direct(c, u8buf(state).v);\n\ +}\n\ +\n\ +bool get_rac_equi(inout RangeCoder c)\n\ +{\n\ + return get_rac_internal(c, c.range >> 1);\n\ +}"; + +const char *ff_source_ffv1_vlc_comp = "\ +#define VLC_STATE_SIZE 8\n\ +layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer VlcState {\n\ + uint32_t error_sum;\n\ + int16_t drift;\n\ + int8_t bias;\n\ + uint8_t count;\n\ +};\n\ +\n\ +void update_vlc_state(inout VlcState state, const int v)\n\ +{\n\ + int drift = state.drift;\n\ + int count = state.count;\n\ + int bias = state.bias;\n\ + state.error_sum += uint16_t(abs(v));\n\ + drift += v;\n\ +\n\ + if (count == 128) { // FIXME: variable\n\ + count >>= 1;\n\ + drift >>= 1;\n\ + state.error_sum >>= 1;\n\ + }\n\ + count++;\n\ +\n\ + if (drift <= -count) {\n\ + bias = max(bias - 1, -128);\n\ + drift = max(drift + count, -count + 1);\n\ + } else if (drift > 0) {\n\ + bias = min(bias + 1, 127);\n\ + drift = min(drift - count, 0);\n\ + }\n\ +\n\ + state.bias = int8_t(bias);\n\ + state.drift = int16_t(drift);\n\ + state.count = uint8_t(count);\n\ +}\n\ +\n\ +struct Symbol {\n\ + uint32_t bits;\n\ + uint32_t val;\n\ +};\n\ +\n\ +Symbol set_ur_golomb(int i, int k, int limit, int esc_len)\n\ +{\n\ + int e;\n\ + Symbol sym;\n\ +\n\ +#ifdef DEBUG\n\ + if (i < 0)\n\ + debugPrintfEXT(\"Error: i is zero!\");\n\ +#endif\n\ +\n\ + e = i >> k;\n\ + if (e < limit) {\n\ + sym.bits = e + k + 1;\n\ + sym.val = (1 << k) + zero_extend(i, k);\n\ + } else {\n\ + sym.bits = limit + esc_len;\n\ + sym.val = i - limit + 1;\n\ + }\n\ +\n\ + return sym;\n\ +}\n\ +\n\ +/**\n\ + * write signed golomb rice code (ffv1).\n\ + */\n\ +Symbol set_sr_golomb(int i, int k, int limit, int esc_len)\n\ +{\n\ + int v;\n\ +\n\ + v = -2 * i - 1;\n\ + v ^= (v >> 31);\n\ +\n\ + return set_ur_golomb(v, k, limit, esc_len);\n\ +}\n\ +\n\ +Symbol get_vlc_symbol(inout VlcState state, int v, int bits)\n\ +{\n\ + int i, k, code;\n\ + Symbol sym;\n\ + v = fold(v - int(state.bias), bits);\n\ +\n\ + i = state.count;\n\ + k = 0;\n\ + while (i < state.error_sum) { // FIXME: optimize\n\ + k++;\n\ + i += i;\n\ + }\n\ +\n\ +#ifdef DEBUG\n\ + if (k > 16)\n\ + debugPrintfEXT(\"Error: k > 16!\");\n\ +#endif\n\ +\n\ + code = v ^ ((2 * state.drift + state.count) >> 31);\n\ +\n\ + update_vlc_state(state, v);\n\ +\n\ + return set_sr_golomb(code, k, 12, bits);\n\ +}\n\ +\n\ +uint get_ur_golomb(inout GetBitContext gb, int k, int limit, int esc_len)\n\ +{\n\ + for (uint i = 0; i < 12; i++)\n\ + if (get_bit(gb))\n\ + return get_bits(gb, k) + (i << k);\n\ +\n\ + return get_bits(gb, esc_len) + 11;\n\ +}\n\ +\n\ +int get_sr_golomb(inout GetBitContext gb, int k, int limit, int esc_len)\n\ +{\n\ + int v = int(get_ur_golomb(gb, k, limit, esc_len));\n\ + return (v >> 1) ^ -(v & 1);\n\ +}\n\ +\n\ +int read_vlc_symbol(inout GetBitContext gb, inout VlcState state, int bits)\n\ +{\n\ + int k, i, v, ret;\n\ +\n\ + i = state.count;\n\ + k = 0;\n\ + while (i < state.error_sum) { // FIXME: optimize\n\ + k++;\n\ + i += i;\n\ + }\n\ +\n\ + v = get_sr_golomb(gb, k, 12, bits);\n\ +\n\ + v ^= ((2 * state.drift + state.count) >> 31);\n\ +\n\ + ret = fold(v + state.bias, bits);\n\ +\n\ + update_vlc_state(state, v);\n\ +\n\ + return ret;\n\ +}"; + +const char *ff_source_ffv1_common_comp = "\ +struct SliceContext {\n\ + RangeCoder c;\n\ +\n\ +#if !defined(DECODE)\n\ + PutBitContext pb; /* 8*8 bytes */\n\ +#else\n\ + GetBitContext gb;\n\ +#endif\n\ +\n\ + ivec2 slice_dim;\n\ + ivec2 slice_pos;\n\ + ivec2 slice_rct_coef;\n\ + u8vec3 quant_table_idx;\n\ +\n\ + uint hdr_len; // only used for golomb\n\ +\n\ + uint slice_coding_mode;\n\ + bool slice_reset_contexts;\n\ +};\n\ +\n\ +/* -1, { -1, 0 } */\n\ +int predict(int L, ivec2 top)\n\ +{\n\ + return mid_pred(L, L + top[1] - top[0], top[1]);\n\ +}\n\ +\n\ +/* { -2, -1 }, { -1, 0, 1 }, 0 */\n\ +int get_context(VTYPE2 cur_l, VTYPE3 top_l, TYPE top2, uint8_t quant_table_idx)\n\ +{\n\ + const int LT = top_l[0]; /* -1 */\n\ + const int T = top_l[1]; /* 0 */\n\ + const int RT = top_l[2]; /* 1 */\n\ + const int L = cur_l[1]; /* -1 */\n\ +\n\ + int base = quant_table[quant_table_idx][0][(L - LT) & MAX_QUANT_TABLE_MASK] +\n\ + quant_table[quant_table_idx][1][(LT - T) & MAX_QUANT_TABLE_MASK] +\n\ + quant_table[quant_table_idx][2][(T - RT) & MAX_QUANT_TABLE_MASK];\n\ +\n\ + if ((quant_table[quant_table_idx][3][127] == 0) &&\n\ + (quant_table[quant_table_idx][4][127] == 0))\n\ + return base;\n\ +\n\ + const int TT = top2; /* -2 */\n\ + const int LL = cur_l[0]; /* -2 */\n\ + return base +\n\ + quant_table[quant_table_idx][3][(LL - L) & MAX_QUANT_TABLE_MASK] +\n\ + quant_table[quant_table_idx][4][(TT - T) & MAX_QUANT_TABLE_MASK];\n\ +}\n\ +\n\ +const uint32_t log2_run[41] = {\n\ + 0, 0, 0, 0, 1, 1, 1, 1,\n\ + 2, 2, 2, 2, 3, 3, 3, 3,\n\ + 4, 4, 5, 5, 6, 6, 7, 7,\n\ + 8, 9, 10, 11, 12, 13, 14, 15,\n\ + 16, 17, 18, 19, 20, 21, 22, 23,\n\ + 24,\n\ +};\n\ +\n\ +uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)\n\ +{\n\ + uint mpw = 1 << chroma_shift;\n\ + uint awidth = align(width, mpw);\n\ +\n\ + if ((version < 4) || ((version == 4) && (micro_version < 3)))\n\ + return width * sx / num_h_slices;\n\ +\n\ + sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;\n\ + if (sx == awidth)\n\ + sx = width;\n\ +\n\ + return sx;\n\ +}\n\ +\n\ +#ifdef RGB\n\ +#define RGB_LBUF (RGB_LINECACHE - 1)\n\ +#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF)))\n\ +\n\ +ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,\n\ + int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)\n\ +{\n\ + const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? off + ivec2(1, -1) : off;\n\ +\n\ + /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */\n\ + VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, -1)))[comp]),\n\ + TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),\n\ + TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));\n\ +\n\ + /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must\n\ + * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous\n\ + * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */\n\ + TYPE cur = TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, 0)))[comp]);\n\ +\n\ + int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +\n\ + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +\n\ + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];\n\ +\n\ + if (expectEXT(extend_lookup, false)) {\n\ + TYPE cur2 = TYPE(0);\n\ + if (expectEXT(off.x > 0, true)) {\n\ + const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);\n\ + cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]);\n\ + }\n\ + base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];\n\ +\n\ + /* top-2 became current upon swap */\n\ + TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]);\n\ + base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];\n\ + }\n\ +\n\ + /* context, prediction */\n\ + return ivec2(base, predict(cur, VTYPE2(top)));\n\ +}\n\ +\n\ +#else /* RGB */\n\ +\n\ +#define LADDR(p) (p)\n\ +\n\ +ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,\n\ + int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)\n\ +{\n\ + const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);\n\ + sp += off;\n\ +\n\ + VTYPE3 top = VTYPE3(TYPE(0),\n\ + TYPE(0),\n\ + TYPE(0));\n\ + if (off.y > 0 && off != ivec2(0, 1))\n\ + top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);\n\ + if (off.y > 0) {\n\ + top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);\n\ + top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);\n\ + }\n\ +\n\ + TYPE cur = TYPE(0);\n\ + if (off != ivec2(0, 0))\n\ + cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);\n\ +\n\ + int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +\n\ + quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +\n\ + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];\n\ +\n\ + if (expectEXT(extend_lookup, false)) {\n\ + TYPE cur2 = TYPE(0);\n\ + if (off.x > 0 && off != ivec2(1, 0)) {\n\ + const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);\n\ + cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);\n\ + }\n\ + base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];\n\ +\n\ + TYPE top2 = TYPE(0);\n\ + if (off.y > 1)\n\ + top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);\n\ + base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];\n\ + }\n\ +\n\ + /* context, prediction */\n\ + return ivec2(base, predict(cur, VTYPE2(top)));\n\ +}\n\ +#endif"; + +const char *ff_source_ffv1_reset_comp = "\ +void main(void)\n\ +{\n\ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ +\n\ + if (key_frame == 0 &&\n\ + slice_ctx[slice_idx].slice_reset_contexts == false)\n\ + return;\n\ +\n\ + const uint8_t qidx = slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z];\n\ + uint contexts = context_count[qidx];\n\ + uint64_t slice_state_off = uint64_t(slice_state) +\n\ + slice_idx*plane_state_size*codec_planes;\n\ +\n\ +#ifdef GOLOMB\n\ + uint64_t start = slice_state_off +\n\ + (gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) + gl_LocalInvocationID.x)*VLC_STATE_SIZE;\n\ + for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) {\n\ + VlcState sb = VlcState(start);\n\ + sb.drift = int16_t(0);\n\ + sb.error_sum = uint16_t(4);\n\ + sb.bias = int8_t(0);\n\ + sb.count = uint8_t(1);\n\ + start += gl_WorkGroupSize.x*VLC_STATE_SIZE;\n\ + }\n\ +#else\n\ + uint64_t start = slice_state_off +\n\ + gl_WorkGroupID.z*plane_state_size +\n\ + (gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */\n\ + uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);\n\ + for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) {\n\ + u32buf(start).v = 0x80808080;\n\ + start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);\n\ + }\n\ +#endif\n\ +}"; + +const char *ff_source_ffv1_rct_search_comp = "\ +ivec3 load_components(ivec2 pos)\n\ +{\n\ + ivec3 pix = ivec3(imageLoad(src[0], pos));\n\ + if (planar_rgb != 0) {\n\ + for (int i = 1; i < 3; i++)\n\ + pix[i] = int(imageLoad(src[i], pos)[0]);\n\ + }\n\ +\n\ + return ivec3(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]]);\n\ +}\n\ +\n\ +#define NUM_CHECKS 15\n\ +const ivec2 rct_y_coeff[NUM_CHECKS] = {\n\ + ivec2(0, 0), // 4G\n\ +\n\ + ivec2(0, 1), // 3G + B\n\ + ivec2(1, 0), // R + 3G\n\ + ivec2(1, 1), // R + 2G + B\n\ +\n\ + ivec2(0, 2), // 2G + 2B\n\ + ivec2(2, 0), // 2R + 2G\n\ + ivec2(2, 2), // 2R + 2B\n\ +\n\ + ivec2(0, 3), // 1G + 3B\n\ + ivec2(3, 0), // 3R + 1G\n\ +\n\ + ivec2(0, 4), // 4B\n\ + ivec2(4, 0), // 4R\n\ +\n\ + ivec2(1, 2), // R + G + 2B\n\ + ivec2(2, 1), // 2R + G + B\n\ +\n\ + ivec2(3, 1), // 3R + B\n\ + ivec2(1, 3), // R + 3B\n\ +};\n\ +\n\ +shared ivec3 pix_buf[gl_WorkGroupSize.x + 1][gl_WorkGroupSize.y + 1] = { };\n\ +\n\ +ivec3 transform_sample(ivec3 pix, ivec2 rct_coef)\n\ +{\n\ + pix.b -= pix.g;\n\ + pix.r -= pix.g;\n\ + pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;\n\ + pix.b += rct_offset;\n\ + pix.r += rct_offset;\n\ + return pix;\n\ +}\n\ +\n\ +uint get_dist(ivec3 cur)\n\ +{\n\ + ivec3 LL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 1];\n\ + ivec3 TL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 0];\n\ + ivec3 TT = pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 0];\n\ +\n\ + ivec3 pred = ivec3(predict(LL.r, ivec2(TL.r, TT.r)),\n\ + predict(LL.g, ivec2(TL.g, TT.g)),\n\ + predict(LL.b, ivec2(TL.b, TT.b)));\n\ +\n\ + uvec3 c = abs(pred - cur);\n\ + return mid_pred(c.r, c.g, c.b);\n\ +}\n\ +\n\ +shared uint score_cols[gl_WorkGroupSize.y] = { };\n\ +shared uint score_mode[16] = { };\n\ +\n\ +void process(ivec2 pos)\n\ +{\n\ + ivec3 pix = load_components(pos);\n\ +\n\ + for (int i = 0; i < NUM_CHECKS; i++) {\n\ + ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]);\n\ + pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = tx_pix;\n\ + memoryBarrierShared();\n\ +\n\ + uint dist = get_dist(tx_pix);\n\ + atomicAdd(score_mode[i], dist);\n\ + }\n\ +}\n\ +\n\ +void coeff_search(inout SliceContext sc)\n\ +{\n\ + uvec2 img_size = imageSize(src[0]);\n\ + uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,\n\ + gl_NumWorkGroups.x, 0);\n\ + uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,\n\ + gl_NumWorkGroups.x, 0);\n\ + uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,\n\ + gl_NumWorkGroups.y, 0);\n\ + uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,\n\ + gl_NumWorkGroups.y, 0);\n\ +\n\ + for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += gl_WorkGroupSize.y) {\n\ + for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += gl_WorkGroupSize.x) {\n\ + process(ivec2(x, y));\n\ + }\n\ + }\n\ +\n\ + if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {\n\ + uint min_score = 0xFFFFFFFF;\n\ + uint min_idx = 3;\n\ + for (int i = 0; i < NUM_CHECKS; i++) {\n\ + if (score_mode[i] < min_score) {\n\ + min_score = score_mode[i];\n\ + min_idx = i;\n\ + }\n\ + }\n\ + sc.slice_rct_coef = rct_y_coeff[min_idx];\n\ + }\n\ +}\n\ +\n\ +void main(void)\n\ +{\n\ + if (force_pcm == 1)\n\ + return;\n\ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ + coeff_search(slice_ctx[slice_idx]);\n\ +}"; + +const char *ff_source_ffv1_enc_setup_comp = "\ +uint8_t state[CONTEXT_SIZE];\n\ +\n\ +void init_slice(inout SliceContext sc, const uint slice_idx)\n\ +{\n\ + /* Set coordinates */\n\ + uvec2 img_size = imageSize(src[0]);\n\ + uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,\n\ + gl_NumWorkGroups.x, chroma_shift.x);\n\ + uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,\n\ + gl_NumWorkGroups.x, chroma_shift.x);\n\ + uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,\n\ + gl_NumWorkGroups.y, chroma_shift.y);\n\ + uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,\n\ + gl_NumWorkGroups.y, chroma_shift.y);\n\ +\n\ + sc.slice_pos = ivec2(sxs, sys);\n\ + sc.slice_dim = ivec2(sxe - sxs, sye - sys);\n\ + sc.slice_coding_mode = int(force_pcm == 1);\n\ + sc.slice_reset_contexts = sc.slice_coding_mode == 1;\n\ + sc.quant_table_idx = u8vec3(context_model);\n\ +\n\ + if ((rct_search == 0) || (sc.slice_coding_mode == 1))\n\ + sc.slice_rct_coef = ivec2(1, 1);\n\ +\n\ + rac_init(sc.c,\n\ + OFFBUF(u8buf, out_data, slice_idx * slice_size_max),\n\ + slice_size_max);\n\ +}\n\ +\n\ +void put_usymbol(inout RangeCoder c, uint v)\n\ +{\n\ + bool is_nil = (v == 0);\n\ + put_rac_direct(c, state[0], is_nil);\n\ + if (is_nil)\n\ + return;\n\ +\n\ + const int e = findMSB(v);\n\ +\n\ + for (int i = 0; i < e; i++)\n\ + put_rac_direct(c, state[1 + min(i, 9)], true);\n\ + put_rac_direct(c, state[1 + min(e, 9)], false);\n\ +\n\ + for (int i = e - 1; i >= 0; i--)\n\ + put_rac_direct(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1)));\n\ +}\n\ +\n\ +void write_slice_header(inout SliceContext sc)\n\ +{\n\ + [[unroll]]\n\ + for (int i = 0; i < CONTEXT_SIZE; i++)\n\ + state[i] = uint8_t(128);\n\ +\n\ + put_usymbol(sc.c, gl_WorkGroupID.x);\n\ + put_usymbol(sc.c, gl_WorkGroupID.y);\n\ + put_usymbol(sc.c, 0);\n\ + put_usymbol(sc.c, 0);\n\ +\n\ + for (int i = 0; i < codec_planes; i++)\n\ + put_usymbol(sc.c, sc.quant_table_idx[i]);\n\ +\n\ + put_usymbol(sc.c, pic_mode);\n\ + put_usymbol(sc.c, sar.x);\n\ + put_usymbol(sc.c, sar.y);\n\ +\n\ + if (version >= 4) {\n\ + put_rac_direct(sc.c, state[0], sc.slice_reset_contexts);\n\ + put_usymbol(sc.c, sc.slice_coding_mode);\n\ + if (sc.slice_coding_mode != 1 && colorspace == 1) {\n\ + put_usymbol(sc.c, sc.slice_rct_coef.y);\n\ + put_usymbol(sc.c, sc.slice_rct_coef.x);\n\ + }\n\ + }\n\ +}\n\ +\n\ +void write_frame_header(inout SliceContext sc)\n\ +{\n\ + put_rac_equi(sc.c, bool(key_frame));\n\ +}\n\ +\n\ +#ifdef GOLOMB\n\ +void init_golomb(inout SliceContext sc)\n\ +{\n\ + sc.hdr_len = rac_terminate(sc.c);\n\ + init_put_bits(sc.pb,\n\ + OFFBUF(u8buf, sc.c.bytestream_start, sc.hdr_len),\n\ + slice_size_max - sc.hdr_len);\n\ +}\n\ +#endif\n\ +\n\ +void main(void)\n\ +{\n\ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ +\n\ + init_slice(slice_ctx[slice_idx], slice_idx);\n\ +\n\ + if (slice_idx == 0)\n\ + write_frame_header(slice_ctx[slice_idx]);\n\ +\n\ + write_slice_header(slice_ctx[slice_idx]);\n\ +\n\ +#ifdef GOLOMB\n\ + init_golomb(slice_ctx[slice_idx]);\n\ +#endif\n\ +}"; + +const char *ff_source_ffv1_enc_comp = "\ +#ifndef GOLOMB\n\ +#ifdef CACHED_SYMBOL_READER\n\ +shared uint8_t state[CONTEXT_SIZE];\n\ +#define WRITE(c, off, val) put_rac_direct(c, state[off], val)\n\ +#else\n\ +#define WRITE(c, off, val) put_rac(c, uint64_t(slice_state) + (state_off + off), val)\n\ +#endif\n\ +\n\ +/* Note - only handles signed values */\n\ +void put_symbol(inout RangeCoder c, uint state_off, int v)\n\ +{\n\ + bool is_nil = (v == 0);\n\ + WRITE(c, 0, is_nil);\n\ + if (is_nil)\n\ + return;\n\ +\n\ + const int a = abs(v);\n\ + const int e = findMSB(a);\n\ +\n\ + for (int i = 0; i < e; i++)\n\ + WRITE(c, 1 + min(i, 9), true);\n\ + WRITE(c, 1 + min(e, 9), false);\n\ +\n\ + for (int i = e - 1; i >= 0; i--)\n\ + WRITE(c, 22 + min(i, 9), bool(bitfieldExtract(a, i, 1)));\n\ +\n\ + WRITE(c, 22 - 11 + min(e, 10), v < 0);\n\ +}\n\ +\n\ +void encode_line_pcm(inout SliceContext sc, readonly uimage2D img,\n\ + ivec2 sp, int y, int p, int comp, int bits)\n\ +{\n\ + int w = sc.slice_dim.x;\n\ +\n\ +#ifdef CACHED_SYMBOL_READER\n\ + if (gl_LocalInvocationID.x > 0)\n\ + return;\n\ +#endif\n\ +\n\ +#ifndef RGB\n\ + if (p > 0 && p < 3) {\n\ + w >>= chroma_shift.x;\n\ + sp >>= chroma_shift;\n\ + }\n\ +#endif\n\ +\n\ + for (int x = 0; x < w; x++) {\n\ + uint v = imageLoad(img, sp + LADDR(ivec2(x, y)))[comp];\n\ + for (int i = (bits - 1); i >= 0; i--)\n\ + put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));\n\ + }\n\ +}\n\ +\n\ +void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off,\n\ + ivec2 sp, int y, int p, int comp, int bits,\n\ + uint8_t quant_table_idx, const int run_index)\n\ +{\n\ + int w = sc.slice_dim.x;\n\ +\n\ +#ifndef RGB\n\ + if (p > 0 && p < 3) {\n\ + w >>= chroma_shift.x;\n\ + sp >>= chroma_shift;\n\ + }\n\ +#endif\n\ +\n\ + for (int x = 0; x < w; x++) {\n\ + ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w,\n\ + quant_table_idx, extend_lookup[quant_table_idx] > 0);\n\ + d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1];\n\ +\n\ + if (d[0] < 0)\n\ + d = -d;\n\ +\n\ + d[1] = fold(d[1], bits);\n\ +\n\ + uint context_off = state_off + CONTEXT_SIZE*d[0];\n\ +#ifdef CACHED_SYMBOL_READER\n\ + u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x);\n\ + state[gl_LocalInvocationID.x] = sb.v;\n\ + barrier();\n\ + if (gl_LocalInvocationID.x == 0)\n\ +#endif\n\ +\n\ + put_symbol(sc.c, context_off, d[1]);\n\ +\n\ +#ifdef CACHED_SYMBOL_READER\n\ + barrier();\n\ + sb.v = state[gl_LocalInvocationID.x];\n\ +#endif\n\ + }\n\ +}\n\ +\n\ +#else /* GOLOMB */\n\ +\n\ +void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off,\n\ + ivec2 sp, int y, int p, int comp, int bits,\n\ + uint8_t quant_table_idx, inout int run_index)\n\ +{\n\ + int w = sc.slice_dim.x;\n\ +\n\ +#ifndef RGB\n\ + if (p > 0 && p < 3) {\n\ + w >>= chroma_shift.x;\n\ + sp >>= chroma_shift;\n\ + }\n\ +#endif\n\ +\n\ + int run_count = 0;\n\ + bool run_mode = false;\n\ +\n\ + for (int x = 0; x < w; x++) {\n\ + ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w,\n\ + quant_table_idx, extend_lookup[quant_table_idx] > 0);\n\ + d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1];\n\ +\n\ + if (d[0] < 0)\n\ + d = -d;\n\ +\n\ + d[1] = fold(d[1], bits);\n\ +\n\ + if (d[0] == 0)\n\ + run_mode = true;\n\ +\n\ + if (run_mode) {\n\ + if (d[1] != 0) {\n\ + /* A very unlikely loop */\n\ + while (run_count >= 1 << log2_run[run_index]) {\n\ + run_count -= 1 << log2_run[run_index];\n\ + run_index++;\n\ + put_bits(sc.pb, 1, 1);\n\ + }\n\ +\n\ + put_bits(sc.pb, 1 + log2_run[run_index], run_count);\n\ + if (run_index != 0)\n\ + run_index--;\n\ + run_count = 0;\n\ + run_mode = false;\n\ + if (d[1] > 0)\n\ + d[1]--;\n\ + } else {\n\ + run_count++;\n\ + }\n\ + }\n\ +\n\ + if (!run_mode) {\n\ + VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*d[0]);\n\ + Symbol sym = get_vlc_symbol(sb, d[1], bits);\n\ + put_bits(sc.pb, sym.bits, sym.val);\n\ + }\n\ + }\n\ +\n\ + if (run_mode) {\n\ + while (run_count >= (1 << log2_run[run_index])) {\n\ + run_count -= 1 << log2_run[run_index];\n\ + run_index++;\n\ + put_bits(sc.pb, 1, 1);\n\ + }\n\ +\n\ + if (run_count > 0)\n\ + put_bits(sc.pb, 1, 1);\n\ + }\n\ +}\n\ +#endif\n\ +\n\ +#ifdef RGB\n\ +ivec4 load_components(ivec2 pos)\n\ +{\n\ + ivec4 pix = ivec4(imageLoad(src[0], pos));\n\ + if (planar_rgb != 0) {\n\ + for (int i = 1; i < (3 + transparency); i++)\n\ + pix[i] = int(imageLoad(src[i], pos)[0]);\n\ + }\n\ +\n\ + return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],\n\ + pix[fmt_lut[2]], pix[fmt_lut[3]]);\n\ +}\n\ +\n\ +void transform_sample(inout ivec4 pix, ivec2 rct_coef)\n\ +{\n\ + pix.b -= pix.g;\n\ + pix.r -= pix.g;\n\ + pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;\n\ + pix.b += rct_offset;\n\ + pix.r += rct_offset;\n\ +}\n\ +\n\ +void preload_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)\n\ +{\n\ + for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {\n\ + ivec2 lpos = sp + LADDR(ivec2(x, y));\n\ + ivec2 pos = sc.slice_pos + ivec2(x, y);\n\ +\n\ + ivec4 pix = load_components(pos);\n\ +\n\ + if (expectEXT(apply_rct, true))\n\ + transform_sample(pix, sc.slice_rct_coef);\n\ +\n\ + imageStore(tmp, lpos, pix);\n\ + }\n\ +}\n\ +#endif\n\ +\n\ +void encode_slice(inout SliceContext sc, const uint slice_idx)\n\ +{\n\ + ivec2 sp = sc.slice_pos;\n\ +\n\ +#ifndef RGB\n\ + int bits = bits_per_raw_sample;\n\ +#else\n\ + int bits = 9;\n\ + if (bits != 8 || sc.slice_coding_mode != 0)\n\ + bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);\n\ +\n\ + sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE;\n\ +#endif\n\ +\n\ +#ifndef GOLOMB\n\ + if (sc.slice_coding_mode == 1) {\n\ +#ifndef RGB\n\ + for (int c = 0; c < components; c++) {\n\ +\n\ + int h = sc.slice_dim.y;\n\ + if (c > 0 && c < 3)\n\ + h >>= chroma_shift.y;\n\ +\n\ + /* Takes into account dual-plane YUV formats */\n\ + int p = min(c, planes - 1);\n\ + int comp = c - p;\n\ +\n\ + for (int y = 0; y < h; y++)\n\ + encode_line_pcm(sc, src[p], sp, y, p, comp, bits);\n\ + }\n\ +#else\n\ + for (int y = 0; y < sc.slice_dim.y; y++) {\n\ + preload_rgb(sc, sp, sc.slice_dim.x, y, false);\n\ +\n\ + encode_line_pcm(sc, tmp, sp, y, 0, 1, bits);\n\ + encode_line_pcm(sc, tmp, sp, y, 0, 2, bits);\n\ + encode_line_pcm(sc, tmp, sp, y, 0, 0, bits);\n\ + if (transparency == 1)\n\ + encode_line_pcm(sc, tmp, sp, y, 0, 3, bits);\n\ + }\n\ +#endif\n\ + } else\n\ +#endif\n\ + {\n\ + u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;\n\ + u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size;\n\ +\n\ +#ifndef RGB\n\ + for (int c = 0; c < components; c++) {\n\ + int run_index = 0;\n\ +\n\ + int h = sc.slice_dim.y;\n\ + if (c > 0 && c < 3)\n\ + h >>= chroma_shift.y;\n\ +\n\ + int p = min(c, planes - 1);\n\ + int comp = c - p;\n\ +\n\ + for (int y = 0; y < h; y++)\n\ + encode_line(sc, src[p], slice_state_off[c], sp, y, p,\n\ + comp, bits, quant_table_idx[c], run_index);\n\ + }\n\ +#else\n\ + int run_index = 0;\n\ + for (int y = 0; y < sc.slice_dim.y; y++) {\n\ + preload_rgb(sc, sp, sc.slice_dim.x, y, true);\n\ +\n\ + encode_line(sc, tmp, slice_state_off[0],\n\ + sp, y, 0, 1, bits, quant_table_idx[0], run_index);\n\ + encode_line(sc, tmp, slice_state_off[1],\n\ + sp, y, 0, 2, bits, quant_table_idx[1], run_index);\n\ + encode_line(sc, tmp, slice_state_off[2],\n\ + sp, y, 0, 0, bits, quant_table_idx[2], run_index);\n\ + if (transparency == 1)\n\ + encode_line(sc, tmp, slice_state_off[3],\n\ + sp, y, 0, 3, bits, quant_table_idx[3], run_index);\n\ + }\n\ +#endif\n\ + }\n\ +}\n\ +\n\ +void finalize_slice(inout SliceContext sc, const uint slice_idx)\n\ +{\n\ +#ifdef CACHED_SYMBOL_READER\n\ + if (gl_LocalInvocationID.x > 0)\n\ + return;\n\ +#endif\n\ +\n\ +#ifdef GOLOMB\n\ + uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);\n\ +#else\n\ + uint32_t enc_len = rac_terminate(sc.c);\n\ +#endif\n\ +\n\ + u8buf bs = u8buf(sc.c.bytestream_start);\n\ +\n\ + /* Append slice length */\n\ + u8vec4 enc_len_p = unpack8(enc_len);\n\ + bs[enc_len + 0].v = enc_len_p.z;\n\ + bs[enc_len + 1].v = enc_len_p.y;\n\ + bs[enc_len + 2].v = enc_len_p.x;\n\ + enc_len += 3;\n\ +\n\ + /* Calculate and write CRC */\n\ + if (ec != 0) {\n\ + bs[enc_len].v = uint8_t(0);\n\ + enc_len++;\n\ +\n\ + uint32_t crc = crcref;\n\ + for (int i = 0; i < enc_len; i++)\n\ + crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);\n\ +\n\ + if (crcref != 0x00000000)\n\ + crc ^= 0x8CD88196;\n\ +\n\ + u8vec4 crc_p = unpack8(crc);\n\ + bs[enc_len + 0].v = crc_p.x;\n\ + bs[enc_len + 1].v = crc_p.y;\n\ + bs[enc_len + 2].v = crc_p.z;\n\ + bs[enc_len + 3].v = crc_p.w;\n\ + enc_len += 4;\n\ + }\n\ +\n\ + slice_results[slice_idx*2 + 0] = enc_len;\n\ + slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data);\n\ +}\n\ +\n\ +void main(void)\n\ +{\n\ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ + encode_slice(slice_ctx[slice_idx], slice_idx);\n\ + finalize_slice(slice_ctx[slice_idx], slice_idx);\n\ +}"; + +const char *ff_source_ffv1_rct_comp = "\ +ivec4 load_components(ivec2 pos)\n\ +{\n\ + ivec4 pix = ivec4(imageLoad(src[0], pos));\n\ + if (planar_rgb != 0) {\n\ + for (int i = 1; i < (3 + transparency); i++)\n\ + pix[i] = int(imageLoad(src[i], pos)[0]);\n\ + }\n\ +\n\ + return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],\n\ + pix[fmt_lut[2]], pix[fmt_lut[3]]);\n\ +}\n\ +\n\ +void bypass_sample(ivec2 pos)\n\ +{\n\ + imageStore(dst[0], pos, load_components(pos));\n\ +}\n\ +\n\ +void bypass_block(in SliceContext sc)\n\ +{\n\ + ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;\n\ + ivec2 end = sc.slice_pos + sc.slice_dim;\n\ + for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)\n\ + for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)\n\ + bypass_sample(ivec2(x, y));\n\ +}\n\ +\n\ +void transform_sample(ivec2 pos, ivec2 rct_coef)\n\ +{\n\ + ivec4 pix = load_components(pos);\n\ + pix.b -= offset;\n\ + pix.r -= offset;\n\ + pix.g -= (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;\n\ + pix.b += pix.g;\n\ + pix.r += pix.g;\n\ + imageStore(dst[0], pos, pix);\n\ +}\n\ +\n\ +void transform_sample(ivec2 pos, ivec2 rct_coef)\n\ +{\n\ + ivec4 pix = load_components(pos);\n\ + pix.b -= pix.g;\n\ + pix.r -= pix.g;\n\ + pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;\n\ + pix.b += offset;\n\ + pix.r += offset;\n\ + imageStore(dst[0], pos, pix);\n\ +}\n\ +\n\ +void transform_block(in SliceContext sc)\n\ +{\n\ + const ivec2 rct_coef = sc.slice_rct_coef;\n\ + const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;\n\ + const ivec2 end = sc.slice_pos + sc.slice_dim;\n\ +\n\ + for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)\n\ + for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)\n\ + transform_sample(ivec2(x, y), rct_coef);\n\ +}\n\ +\n\ +void main()\n\ +{\n\ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ +\n\ + if (slice_ctx[slice_idx].slice_coding_mode == 1)\n\ + bypass_block(slice_ctx[slice_idx]);\n\ + else\n\ + transform_block(slice_ctx[slice_idx]);\n\ +}"; + +const char *ff_source_ffv1_enc_rct_comp = "\ +ivec4 load_components(ivec2 pos)\n\ +{\n\ + ivec4 pix = ivec4(imageLoad(src[0], pos));\n\ + if (planar_rgb != 0) {\n\ + for (int i = 1; i < (3 + transparency); i++)\n\ + pix[i] = int(imageLoad(src[i], pos)[0]);\n\ + }\n\ +\n\ + return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],\n\ + pix[fmt_lut[2]], pix[fmt_lut[3]]);\n\ +}\n\ +\n\ +void bypass_sample(ivec2 pos)\n\ +{\n\ + imageStore(dst[0], pos, load_components(pos));\n\ +}\n\ +\n\ +void bypass_block(in SliceContext sc)\n\ +{\n\ + ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;\n\ + ivec2 end = sc.slice_pos + sc.slice_dim;\n\ + for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)\n\ + for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)\n\ + bypass_sample(ivec2(x, y));\n\ +}\n\ +\n\ +void transform_sample(ivec2 pos, ivec2 rct_coef)\n\ +{\n\ + ivec4 pix = load_components(pos);\n\ + pix.b -= pix.g;\n\ + pix.r -= pix.g;\n\ + pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;\n\ + pix.b += offset;\n\ + pix.r += offset;\n\ + imageStore(dst[0], pos, pix);\n\ +}\n\ +\n\ +void transform_block(in SliceContext sc)\n\ +{\n\ + const ivec2 rct_coef = sc.slice_rct_coef;\n\ + const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;\n\ + const ivec2 end = sc.slice_pos + sc.slice_dim;\n\ +\n\ + for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)\n\ + for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)\n\ + transform_sample(ivec2(x, y), rct_coef);\n\ +}\n\ +\n\ +void main()\n\ +{\n\ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ +\n\ + if (slice_ctx[slice_idx].slice_coding_mode == 1)\n\ + bypass_block(slice_ctx[slice_idx]);\n\ + else\n\ + transform_block(slice_ctx[slice_idx]);\n\ +}"; + +const char *ff_source_ffv1_dec_setup_comp = "\ +uint8_t setup_state[CONTEXT_SIZE];\n\ +\n\ +uint get_usymbol(inout RangeCoder c)\n\ +{\n\ + if (get_rac_direct(c, setup_state[0]))\n\ + return 0;\n\ +\n\ + int e = 0;\n\ + while (get_rac_direct(c, setup_state[1 + min(e, 9)])) { // 1..10\n\ + e++;\n\ + if (e > 31) {\n\ + corrupt = true;\n\ + return 0;\n\ + }\n\ + }\n\ +\n\ + uint a = 1;\n\ + for (int i = e - 1; i >= 0; i--) {\n\ + a <<= 1;\n\ + a |= uint(get_rac_direct(c, setup_state[22 + min(i, 9)])); // 22..31\n\ + }\n\ +\n\ + return a;\n\ +}\n\ +\n\ +bool decode_slice_header(inout SliceContext sc)\n\ +{\n\ + [[unroll]]\n\ + for (int i = 0; i < CONTEXT_SIZE; i++)\n\ + setup_state[i] = uint8_t(128);\n\ +\n\ + uint sx = get_usymbol(sc.c);\n\ + uint sy = get_usymbol(sc.c);\n\ + uint sw = get_usymbol(sc.c) + 1;\n\ + uint sh = get_usymbol(sc.c) + 1;\n\ +\n\ + if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 ||\n\ + sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) ||\n\ + corrupt) {\n\ + return true;\n\ + }\n\ +\n\ + /* Set coordinates */\n\ + uint sxs = slice_coord(img_size.x, sx , gl_NumWorkGroups.x, chroma_shift.x);\n\ + uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x);\n\ + uint sys = slice_coord(img_size.y, sy , gl_NumWorkGroups.y, chroma_shift.y);\n\ + uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y);\n\ +\n\ + sc.slice_pos = ivec2(sxs, sys);\n\ + sc.slice_dim = ivec2(sxe - sxs, sye - sys);\n\ + sc.slice_rct_coef = ivec2(1, 1);\n\ + sc.slice_coding_mode = int(0);\n\ +\n\ + for (uint i = 0; i < codec_planes; i++) {\n\ + uint idx = get_usymbol(sc.c);\n\ + if (idx >= quant_table_count)\n\ + return true;\n\ + sc.quant_table_idx[i] = uint8_t(idx);\n\ + }\n\ +\n\ + get_usymbol(sc.c);\n\ + get_usymbol(sc.c);\n\ + get_usymbol(sc.c);\n\ +\n\ + if (version >= 4) {\n\ + sc.slice_reset_contexts = get_rac_direct(sc.c, setup_state[0]);\n\ + sc.slice_coding_mode = get_usymbol(sc.c);\n\ + if (sc.slice_coding_mode != 1 && colorspace == 1) {\n\ + sc.slice_rct_coef.x = int(get_usymbol(sc.c));\n\ + sc.slice_rct_coef.y = int(get_usymbol(sc.c));\n\ + if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4)\n\ + return true;\n\ + }\n\ + }\n\ +\n\ + return false;\n\ +}\n\ +\n\ +void golomb_init(inout SliceContext sc)\n\ +{\n\ + if (version == 3 && micro_version > 1 || version > 3) {\n\ + setup_state[0] = uint8_t(129);\n\ + get_rac_direct(sc.c, setup_state[0]);\n\ + }\n\ +\n\ + uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;\n\ + init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),\n\ + int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count));\n\ +}\n\ +\n\ +void main(void)\n\ +{\n\ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ +\n\ + u8buf bs = u8buf(slice_data + slice_offsets[2*slice_idx + 0]);\n\ + uint32_t slice_size = slice_offsets[2*slice_idx + 1];\n\ +\n\ + rac_init_dec(slice_ctx[slice_idx].c,\n\ + bs, slice_size);\n\ +\n\ + if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1))\n\ + get_rac_equi(slice_ctx[slice_idx].c);\n\ +\n\ + decode_slice_header(slice_ctx[slice_idx]);\n\ +\n\ + if (golomb == 1)\n\ + golomb_init(slice_ctx[slice_idx]);\n\ +\n\ + if (ec != 0 && check_crc != 0) {\n\ + uint32_t crc = crcref;\n\ + for (int i = 0; i < slice_size; i++)\n\ + crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);\n\ +\n\ + slice_status[2*slice_idx + 0] = crc;\n\ + }\n\ +\n\ + slice_status[2*slice_idx + 1] = corrupt ? uint32_t(corrupt) : overread;\n\ +}"; + +const char *ff_source_ffv1_dec_comp = "\ +#ifndef GOLOMB\n\ +#ifdef CACHED_SYMBOL_READER\n\ +shared uint8_t state[CONTEXT_SIZE];\n\ +#define READ(c, off) get_rac_direct(c, state[off])\n\ +#else\n\ +#define READ(c, off) get_rac(c, uint64_t(slice_state) + (state_off + off))\n\ +#endif\n\ +\n\ +int get_isymbol(inout RangeCoder c, uint state_off)\n\ +{\n\ + if (READ(c, 0))\n\ + return 0;\n\ +\n\ + uint e = 1;\n\ + for (; e < 33; e++)\n\ + if (!READ(c, min(e, 10)))\n\ + break;\n\ +\n\ + if (expectEXT(e == 1, false)) {\n\ + return READ(c, 11) ? -1 : 1;\n\ + } else if (expectEXT(e == 33, false)) {\n\ + corrupt = true;\n\ + return 0;\n\ + }\n\ +\n\ + int a = 1;\n\ + for (uint i = e + 20; i >= 22; i--) {\n\ + a <<= 1;\n\ + a |= int(READ(c, min(i, 31)));\n\ + }\n\ +\n\ + return READ(c, min(e + 10, 21)) ? -a : a;\n\ +}\n\ +\n\ +void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits)\n\ +{\n\ +#ifdef CACHED_SYMBOL_READER\n\ + if (gl_LocalInvocationID.x > 0)\n\ + return;\n\ +#endif\n\ +\n\ +#ifndef RGB\n\ + if (p > 0 && p < 3) {\n\ + w >>= chroma_shift.x;\n\ + sp >>= chroma_shift;\n\ + }\n\ +#endif\n\ +\n\ + for (int x = 0; x < w; x++) {\n\ + uint v = 0;\n\ + for (int i = (bits - 1); i >= 0; i--)\n\ + v |= uint(get_rac_equi(sc.c)) << i;\n\ +\n\ + imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));\n\ + }\n\ +}\n\ +\n\ +void decode_line(inout SliceContext sc, ivec2 sp, int w,\n\ + int y, int p, int bits, uint state_off,\n\ + uint8_t quant_table_idx, const int run_index)\n\ +{\n\ +#ifndef RGB\n\ + if (p > 0 && p < 3) {\n\ + w >>= chroma_shift.x;\n\ + sp >>= chroma_shift;\n\ + }\n\ +#endif\n\ +\n\ + for (int x = 0; x < w; x++) {\n\ + ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,\n\ + quant_table_idx, extend_lookup[quant_table_idx] > 0);\n\ +\n\ + uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]);\n\ +#ifdef CACHED_SYMBOL_READER\n\ + u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x);\n\ + state[gl_LocalInvocationID.x] = sb.v;\n\ + barrier();\n\ + if (gl_LocalInvocationID.x == 0) {\n\ +\n\ +#endif\n\ +\n\ + int diff = get_isymbol(sc.c, context_off);\n\ + if (pr[0] < 0)\n\ + diff = -diff;\n\ +\n\ + uint v = zero_extend(pr[1] + diff, bits);\n\ + imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));\n\ +\n\ +#ifdef CACHED_SYMBOL_READER\n\ + }\n\ +\n\ + barrier();\n\ + sb.v = state[gl_LocalInvocationID.x];\n\ +#endif\n\ + }\n\ +}\n\ +\n\ +#else /* GOLOMB */\n\ +\n\ +void decode_line(inout SliceContext sc, ivec2 sp, int w,\n\ + int y, int p, int bits, uint state_off,\n\ + uint8_t quant_table_idx, inout int run_index)\n\ +{\n\ +#ifndef RGB\n\ + if (p > 0 && p < 3) {\n\ + w >>= chroma_shift.x;\n\ + sp >>= chroma_shift;\n\ + }\n\ +#endif\n\ +\n\ + int run_count = 0;\n\ + int run_mode = 0;\n\ +\n\ + for (int x = 0; x < w; x++) {\n\ + ivec2 pos = sp + ivec2(x, y);\n\ + int diff;\n\ + ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,\n\ + quant_table_idx, extend_lookup[quant_table_idx] > 0);\n\ +\n\ + uint context_off = state_off + VLC_STATE_SIZE*abs(pr[0]);\n\ + VlcState sb = VlcState(uint64_t(slice_state) + context_off);\n\ +\n\ + if (pr[0] == 0 && run_mode == 0)\n\ + run_mode = 1;\n\ +\n\ + if (run_mode != 0) {\n\ + if (run_count == 0 && run_mode == 1) {\n\ + int tmp_idx = int(log2_run[run_index]);\n\ + if (get_bit(sc.gb)) {\n\ + run_count = 1 << tmp_idx;\n\ + if (x + run_count <= w)\n\ + run_index++;\n\ + } else {\n\ + if (tmp_idx != 0) {\n\ + run_count = int(get_bits(sc.gb, tmp_idx));\n\ + } else\n\ + run_count = 0;\n\ +\n\ + if (run_index != 0)\n\ + run_index--;\n\ + run_mode = 2;\n\ + }\n\ + }\n\ +\n\ + run_count--;\n\ + if (run_count < 0) {\n\ + run_mode = 0;\n\ + run_count = 0;\n\ + diff = read_vlc_symbol(sc.gb, sb, bits);\n\ + if (diff >= 0)\n\ + diff++;\n\ + } else {\n\ + diff = 0;\n\ + }\n\ + } else {\n\ + diff = read_vlc_symbol(sc.gb, sb, bits);\n\ + }\n\ +\n\ + if (pr[0] < 0)\n\ + diff = -diff;\n\ +\n\ + uint v = zero_extend(pr[1] + diff, bits);\n\ + imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));\n\ + }\n\ +}\n\ +#endif\n\ +\n\ +#ifdef RGB\n\ +ivec4 transform_sample(ivec4 pix, ivec2 rct_coef)\n\ +{\n\ + pix.b -= rct_offset;\n\ + pix.r -= rct_offset;\n\ + pix.g -= (pix.b*rct_coef.y + pix.r*rct_coef.x) >> 2;\n\ + pix.b += pix.g;\n\ + pix.r += pix.g;\n\ + return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],\n\ + pix[fmt_lut[2]], pix[fmt_lut[3]]);\n\ +}\n\ +\n\ +void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)\n\ +{\n\ + for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {\n\ + ivec2 lpos = sp + LADDR(ivec2(x, y));\n\ + ivec2 pos = sc.slice_pos + ivec2(x, y);\n\ +\n\ + ivec4 pix;\n\ + pix.r = int(imageLoad(dec[2], lpos)[0]);\n\ + pix.g = int(imageLoad(dec[0], lpos)[0]);\n\ + pix.b = int(imageLoad(dec[1], lpos)[0]);\n\ + if (transparency != 0)\n\ + pix.a = int(imageLoad(dec[3], lpos)[0]);\n\ +\n\ + if (expectEXT(apply_rct, true))\n\ + pix = transform_sample(pix, sc.slice_rct_coef);\n\ +\n\ + imageStore(dst[0], pos, pix);\n\ + if (planar_rgb != 0) {\n\ + for (int i = 1; i < color_planes; i++)\n\ + imageStore(dst[i], pos, ivec4(pix[i]));\n\ + }\n\ + }\n\ +}\n\ +#endif\n\ +\n\ +void decode_slice(inout SliceContext sc, const uint slice_idx)\n\ +{\n\ + int w = sc.slice_dim.x;\n\ + ivec2 sp = sc.slice_pos;\n\ +\n\ +#ifndef RGB\n\ + int bits = bits_per_raw_sample;\n\ +#else\n\ + int bits = 9;\n\ + if (bits != 8 || sc.slice_coding_mode != 0)\n\ + bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);\n\ +\n\ + sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE;\n\ +#endif\n\ +\n\ + /* PCM coding */\n\ +#ifndef GOLOMB\n\ + if (sc.slice_coding_mode == 1) {\n\ +#ifndef RGB\n\ + for (int p = 0; p < planes; p++) {\n\ + int h = sc.slice_dim.y;\n\ + if (p > 0 && p < 3)\n\ + h >>= chroma_shift.y;\n\ +\n\ + for (int y = 0; y < h; y++)\n\ + decode_line_pcm(sc, sp, w, y, p, bits);\n\ + }\n\ +#else\n\ + for (int y = 0; y < sc.slice_dim.y; y++) {\n\ + for (int p = 0; p < color_planes; p++)\n\ + decode_line_pcm(sc, sp, w, y, p, bits);\n\ +\n\ + writeout_rgb(sc, sp, w, y, false);\n\ + }\n\ +#endif\n\ + } else\n\ +\n\ + /* Arithmetic coding */\n\ +#endif\n\ + {\n\ + u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;\n\ + u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size;\n\ +\n\ +#ifndef RGB\n\ + for (int p = 0; p < planes; p++) {\n\ + int h = sc.slice_dim.y;\n\ + if (p > 0 && p < 3)\n\ + h >>= chroma_shift.y;\n\ +\n\ + int run_index = 0;\n\ + for (int y = 0; y < h; y++)\n\ + decode_line(sc, sp, w, y, p, bits,\n\ + slice_state_off[p], quant_table_idx[p], run_index);\n\ + }\n\ +#else\n\ + int run_index = 0;\n\ + for (int y = 0; y < sc.slice_dim.y; y++) {\n\ + for (int p = 0; p < color_planes; p++)\n\ + decode_line(sc, sp, w, y, p, bits,\n\ + slice_state_off[p], quant_table_idx[p], run_index);\n\ +\n\ + writeout_rgb(sc, sp, w, y, true);\n\ + }\n\ +#endif\n\ + }\n\ +}\n\ +\n\ +void main(void)\n\ +{\n\ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ + decode_slice(slice_ctx[slice_idx], slice_idx);\n\ +\n\ + uint32_t status = corrupt ? uint32_t(corrupt) : overread;\n\ + if (status != 0)\n\ + slice_status[2*slice_idx + 1] = status;\n\ +}"; + +const char *ff_source_prores_raw_comp = "\ +#define I16(x) (int16_t(x))\n\ +\n\ +#define COMP_ID (gl_LocalInvocationID.z)\n\ +#define BLOCK_ID (gl_LocalInvocationID.y)\n\ +#define ROW_ID (gl_LocalInvocationID.x)\n\ +\n\ +GetBitContext gb;\n\ +shared float btemp[gl_WorkGroupSize.z][16][64] = { };\n\ +shared float block[gl_WorkGroupSize.z][16][64];\n\ +\n\ +void idct8_horiz(const uint row_id)\n\ +{\n\ + float t0, t1, t2, t3, t4, t5, t6, t7, u8;\n\ + float u0, u1, u2, u3, u4, u5, u6, u7;\n\ +\n\ + /* Input */\n\ + t0 = block[COMP_ID][BLOCK_ID][8*row_id + 0];\n\ + u4 = block[COMP_ID][BLOCK_ID][8*row_id + 1];\n\ + t2 = block[COMP_ID][BLOCK_ID][8*row_id + 2];\n\ + u6 = block[COMP_ID][BLOCK_ID][8*row_id + 3];\n\ + t1 = block[COMP_ID][BLOCK_ID][8*row_id + 4];\n\ + u5 = block[COMP_ID][BLOCK_ID][8*row_id + 5];\n\ + t3 = block[COMP_ID][BLOCK_ID][8*row_id + 6];\n\ + u7 = block[COMP_ID][BLOCK_ID][8*row_id + 7];\n\ +\n\ + /* Embedded scaled inverse 4-point Type-II DCT */\n\ + u0 = t0 + t1;\n\ + u1 = t0 - t1;\n\ + u3 = t2 + t3;\n\ + u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;\n\ + t0 = u0 + u3;\n\ + t3 = u0 - u3;\n\ + t1 = u1 + u2;\n\ + t2 = u1 - u2;\n\ +\n\ + /* Embedded scaled inverse 4-point Type-IV DST */\n\ + t5 = u5 + u6;\n\ + t6 = u5 - u6;\n\ + t7 = u4 + u7;\n\ + t4 = u4 - u7;\n\ + u7 = t7 + t5;\n\ + u5 = (t7 - t5)*(1.4142135623730950488016887242097f);\n\ + u8 = (t4 + t6)*(1.8477590650225735122563663787936f);\n\ + u4 = u8 - t4*(1.0823922002923939687994464107328f);\n\ + u6 = u8 - t6*(2.6131259297527530557132863468544f);\n\ + t7 = u7;\n\ + t6 = t7 - u6;\n\ + t5 = t6 + u5;\n\ + t4 = t5 - u4;\n\ +\n\ + /* Butterflies */\n\ + u0 = t0 + t7;\n\ + u7 = t0 - t7;\n\ + u6 = t1 + t6;\n\ + u1 = t1 - t6;\n\ + u2 = t2 + t5;\n\ + u5 = t2 - t5;\n\ + u4 = t3 + t4;\n\ + u3 = t3 - t4;\n\ +\n\ + /* Output */\n\ + btemp[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;\n\ + btemp[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;\n\ + btemp[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;\n\ + btemp[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;\n\ + btemp[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;\n\ + btemp[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;\n\ + btemp[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;\n\ + btemp[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;\n\ +}\n\ +\n\ +void idct8_vert(const uint row_id)\n\ +{\n\ + float t0, t1, t2, t3, t4, t5, t6, t7, u8;\n\ + float u0, u1, u2, u3, u4, u5, u6, u7;\n\ +\n\ + /* Input */\n\ + t0 = btemp[COMP_ID][BLOCK_ID][8*row_id + 0] + 0.5f; // NOTE\n\ + u4 = btemp[COMP_ID][BLOCK_ID][8*row_id + 1];\n\ + t2 = btemp[COMP_ID][BLOCK_ID][8*row_id + 2];\n\ + u6 = btemp[COMP_ID][BLOCK_ID][8*row_id + 3];\n\ + t1 = btemp[COMP_ID][BLOCK_ID][8*row_id + 4];\n\ + u5 = btemp[COMP_ID][BLOCK_ID][8*row_id + 5];\n\ + t3 = btemp[COMP_ID][BLOCK_ID][8*row_id + 6];\n\ + u7 = btemp[COMP_ID][BLOCK_ID][8*row_id + 7];\n\ +\n\ + /* Embedded scaled inverse 4-point Type-II DCT */\n\ + u0 = t0 + t1;\n\ + u1 = t0 - t1;\n\ + u3 = t2 + t3;\n\ + u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;\n\ + t0 = u0 + u3;\n\ + t3 = u0 - u3;\n\ + t1 = u1 + u2;\n\ + t2 = u1 - u2;\n\ +\n\ + /* Embedded scaled inverse 4-point Type-IV DST */\n\ + t5 = u5 + u6;\n\ + t6 = u5 - u6;\n\ + t7 = u4 + u7;\n\ + t4 = u4 - u7;\n\ + u7 = t7 + t5;\n\ + u5 = (t7 - t5)*(1.4142135623730950488016887242097f);\n\ + u8 = (t4 + t6)*(1.8477590650225735122563663787936f);\n\ + u4 = u8 - t4*(1.0823922002923939687994464107328f);\n\ + u6 = u8 - t6*(2.6131259297527530557132863468544f);\n\ + t7 = u7;\n\ + t6 = t7 - u6;\n\ + t5 = t6 + u5;\n\ + t4 = t5 - u4;\n\ +\n\ + /* Butterflies */\n\ + u0 = t0 + t7;\n\ + u7 = t0 - t7;\n\ + u6 = t1 + t6;\n\ + u1 = t1 - t6;\n\ + u2 = t2 + t5;\n\ + u5 = t2 - t5;\n\ + u4 = t3 + t4;\n\ + u3 = t3 - t4;\n\ +\n\ + /* Output */\n\ + block[COMP_ID][BLOCK_ID][0*8 + row_id] = u0;\n\ + block[COMP_ID][BLOCK_ID][1*8 + row_id] = u1;\n\ + block[COMP_ID][BLOCK_ID][2*8 + row_id] = u2;\n\ + block[COMP_ID][BLOCK_ID][3*8 + row_id] = u3;\n\ + block[COMP_ID][BLOCK_ID][4*8 + row_id] = u4;\n\ + block[COMP_ID][BLOCK_ID][5*8 + row_id] = u5;\n\ + block[COMP_ID][BLOCK_ID][6*8 + row_id] = u6;\n\ + block[COMP_ID][BLOCK_ID][7*8 + row_id] = u7;\n\ +}\n\ +\n\ +int16_t get_value(int16_t codebook)\n\ +{\n\ + const int16_t switch_bits = codebook >> 8;\n\ + const int16_t rice_order = codebook & I16(0xf);\n\ + const int16_t exp_order = (codebook >> 4) & I16(0xf);\n\ +\n\ + uint32_t b = show_bits(gb, 32);\n\ + if (expectEXT(b == 0, false))\n\ + return I16(0);\n\ + int16_t q = I16(31) - I16(findMSB(b));\n\ +\n\ + if ((b & 0x80000000) != 0) {\n\ + skip_bits(gb, 1 + rice_order);\n\ + return I16((b & 0x7FFFFFFF) >> (31 - rice_order));\n\ + }\n\ +\n\ + if (q <= switch_bits) {\n\ + skip_bits(gb, q + rice_order + 1);\n\ + return I16((q << rice_order) +\n\ + (((b << (q + 1)) >> 1) >> (31 - rice_order)));\n\ + }\n\ +\n\ + int16_t bits = exp_order + (q << 1) - switch_bits;\n\ + skip_bits(gb, bits);\n\ + return I16((b >> (32 - bits)) +\n\ + ((switch_bits + 1) << rice_order) -\n\ + (1 << exp_order));\n\ +}\n\ +\n\ +#define TODCCODEBOOK(x) ((x + 1) >> 1)\n\ +\n\ +void read_dc_vals(const uint nb_blocks)\n\ +{\n\ + int16_t dc, dc_add;\n\ + int16_t prev_dc = I16(0), sign = I16(0);\n\ +\n\ + /* Special handling for first block */\n\ + dc = get_value(I16(700));\n\ + prev_dc = (dc >> 1) ^ -(dc & I16(1));\n\ + btemp[COMP_ID][0][0] = prev_dc;\n\ +\n\ + for (uint n = 1; n < nb_blocks; n++) {\n\ + if (expectEXT(left_bits(gb) <= 0, false))\n\ + break;\n\ +\n\ + uint8_t dc_codebook;\n\ + if ((n & 15) == 1)\n\ + dc_codebook = uint8_t(100);\n\ + else\n\ + dc_codebook = dc_cb[min(TODCCODEBOOK(dc), 13 - 1)];\n\ +\n\ + dc = get_value(dc_codebook);\n\ +\n\ + sign = sign ^ dc & int16_t(1);\n\ + dc_add = (-sign ^ I16(TODCCODEBOOK(dc))) + sign;\n\ + sign = I16(dc_add < 0);\n\ + prev_dc += dc_add;\n\ +\n\ + btemp[COMP_ID][n][0] = prev_dc;\n\ + }\n\ +}\n\ +\n\ +void read_ac_vals(const uint nb_blocks)\n\ +{\n\ + const uint nb_codes = nb_blocks << 6;\n\ + const uint log2_nb_blocks = findMSB(nb_blocks);\n\ + const uint block_mask = (1 << log2_nb_blocks) - 1;\n\ +\n\ + int16_t ac, rn, ln;\n\ + int16_t ac_codebook = I16(49);\n\ + int16_t rn_codebook = I16( 0);\n\ + int16_t ln_codebook = I16(66);\n\ + int16_t sign;\n\ + int16_t val;\n\ +\n\ + for (uint n = nb_blocks; n <= nb_codes;) {\n\ + if (expectEXT(left_bits(gb) <= 0, false))\n\ + break;\n\ +\n\ + ln = get_value(ln_codebook);\n\ + for (uint i = 0; i < ln; i++) {\n\ + if (expectEXT(left_bits(gb) <= 0, false))\n\ + break;\n\ +\n\ + if (expectEXT(n >= nb_codes, false))\n\ + break;\n\ +\n\ + ac = get_value(ac_codebook);\n\ + ac_codebook = ac_cb[min(ac, 95 - 1)];\n\ + sign = -int16_t(get_bit(gb));\n\ +\n\ + val = ((ac + I16(1)) ^ sign) - sign;\n\ + btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val;\n\ +\n\ + n++;\n\ + }\n\ +\n\ + if (expectEXT(n >= nb_codes, false))\n\ + break;\n\ +\n\ + rn = get_value(rn_codebook);\n\ + rn_codebook = rn_cb[min(rn, 28 - 1)];\n\ +\n\ + n += rn + 1;\n\ + if (expectEXT(n >= nb_codes, false))\n\ + break;\n\ +\n\ + if (expectEXT(left_bits(gb) <= 0, false))\n\ + break;\n\ +\n\ + ac = get_value(ac_codebook);\n\ + sign = -int16_t(get_bit(gb));\n\ +\n\ + val = ((ac + I16(1)) ^ sign) - sign;\n\ + btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val;\n\ +\n\ + ac_codebook = ac_cb[min(ac, 95 - 1)];\n\ + ln_codebook = ln_cb[min(ac, 15 - 1)];\n\ +\n\ + n++;\n\ + }\n\ +}\n\ +\n\ +void main(void)\n\ +{\n\ + const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;\n\ + TileData td = tile_data[tile_idx];\n\ +\n\ + if (expectEXT(td.pos.x >= frame_size.x, false))\n\ + return;\n\ +\n\ + uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;\n\ + u8vec2buf hdr_data = u8vec2buf(pkt_offset);\n\ + float qscale = float(pack16(hdr_data[0].v.yx)) / 2.0f;\n\ +\n\ + ivec4 size = ivec4(td.size,\n\ + pack16(hdr_data[2].v.yx),\n\ + pack16(hdr_data[1].v.yx),\n\ + pack16(hdr_data[3].v.yx));\n\ + size[0] = size[0] - size[1] - size[2] - size[3] - 8;\n\ + if (expectEXT(size[0] < 0, false))\n\ + return;\n\ +\n\ + const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);\n\ + const uint w = min(tile_size.x, frame_size.x - td.pos.x) / 2;\n\ + const uint nb_blocks = w / 8;\n\ +\n\ + const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3],\n\ + size[2],\n\ + 0,\n\ + size[2] + size[1]);\n\ +\n\ + if (BLOCK_ID == 0 && ROW_ID == 0) {\n\ + init_get_bits(gb, u8buf(pkt_offset + 8 + comp_offset[COMP_ID]),\n\ + size[COMP_ID]);\n\ + read_dc_vals(nb_blocks);\n\ + read_ac_vals(nb_blocks);\n\ + }\n\ +\n\ + barrier();\n\ +\n\ + [[unroll]]\n\ + for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)\n\ + block[COMP_ID][BLOCK_ID][i] = (btemp[COMP_ID][BLOCK_ID][scan[i]] / 16384.0) *\n\ + (float(qmat[i]) / 295.0) *\n\ + idct_8x8_scales[i] * qscale;\n\ +\n\ + barrier();\n\ +\n\ +#ifdef PARALLEL_ROWS\n\ + idct8_horiz(ROW_ID);\n\ +\n\ + barrier();\n\ +\n\ + idct8_vert(ROW_ID);\n\ +#else\n\ + for (uint j = 0; j < 8; j++)\n\ + idct8_horiz(j);\n\ +\n\ + barrier();\n\ +\n\ + for (uint j = 0; j < 8; j++)\n\ + idct8_vert(j);\n\ +#endif\n\ +\n\ + barrier();\n\ +\n\ + [[unroll]]\n\ + for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x)\n\ + imageStore(dst,\n\ + offs + 2*ivec2(BLOCK_ID*8 + (i & 7), i >> 3),\n\ + vec4(block[COMP_ID][BLOCK_ID][i]));\n\ +}"; -- 2.49.1 From c4a23dca6cba9996ef0be2146bf866822e0a3a6f Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:40:51 +0000 Subject: [PATCH 088/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_source.h | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_source.h diff --git a/libavcodec/vulkan/vulkan_source.h b/libavcodec/vulkan/vulkan_source.h new file mode 100644 index 0000000000..35543b6d62 --- /dev/null +++ b/libavcodec/vulkan/vulkan_source.h @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_SOURCE_H +#define AVCODEC_VULKAN_SOURCE_H + +extern const char *ff_source_common_comp; +extern const char *ff_source_ffv1_common_comp; +extern const char *ff_source_ffv1_dec_comp; +extern const char *ff_source_ffv1_dec_setup_comp; +extern const char *ff_source_ffv1_enc_comp; +extern const char *ff_source_ffv1_enc_rct_comp; +extern const char *ff_source_ffv1_enc_setup_comp; +extern const char *ff_source_ffv1_rct_comp; +extern const char *ff_source_ffv1_rct_search_comp; +extern const char *ff_source_ffv1_reset_comp; +extern const char *ff_source_ffv1_vlc_comp; +extern const char *ff_source_rangecoder_comp; + +#endif /* AVCODEC_VULKAN_SOURCE_H */ -- 2.49.1 From 3c50d01c71836754d4f862ff9bd0af2445175705 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:41:33 +0000 Subject: [PATCH 089/118] Changing vulkan file directory --- libavcodec/vulkan_video.c | 482 -------------------------------------- 1 file changed, 482 deletions(-) delete mode 100644 libavcodec/vulkan_video.c diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c deleted file mode 100644 index 819940460f..0000000000 --- a/libavcodec/vulkan_video.c +++ /dev/null @@ -1,482 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/mem.h" -#include "vulkan_video.h" - -#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) -#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) - -static const struct FFVkFormatMapEntry { - VkFormat vkf; - enum AVPixelFormat pixfmt; - VkImageAspectFlags aspect; -} vk_format_map[] = { - /* Gray formats */ - { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT }, - - /* RGB formats */ - { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT }, - - /* Planar RGB */ - { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT }, - - /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ - { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE }, - { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE }, - { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE }, - { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE }, - - /* Two-plane 422 YUV at 8, 10 and 16 bits */ - { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE }, - { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE }, - { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE }, - { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE }, - - /* Two-plane 444 YUV at 8, 10 and 16 bits */ - { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE }, - { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE }, - { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE }, - { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE }, - - /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ - { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE }, - { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE }, - { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE }, - { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE }, - - /* Single plane 422 at 8, 10, 12 and 16 bits */ - { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_G16B16G16R16_422_UNORM, AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT }, - - /* Single plane 444 at 10 and 12 bits */ - { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT }, - { VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT }, -}; -static const int nb_vk_format_map = FF_ARRAY_ELEMS(vk_format_map); - -enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf) -{ - for (int i = 0; i < nb_vk_format_map; i++) - if (vk_format_map[i].vkf == vkf) - return vk_format_map[i].pixfmt; - return AV_PIX_FMT_NONE; -} - -VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf) -{ - for (int i = 0; i < nb_vk_format_map; i++) - if (vk_format_map[i].vkf == vkf) - return vk_format_map[i].aspect; - return VK_IMAGE_ASPECT_NONE; -} - -VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc) -{ - if (desc->nb_components == 1) - return VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR; - else if (!desc->log2_chroma_w && !desc->log2_chroma_h) - return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR; - else if (!desc->log2_chroma_w && desc->log2_chroma_h == 1) - return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR; - else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) - return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR; - return VK_VIDEO_CHROMA_SUBSAMPLING_INVALID_KHR; -} - -VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth) -{ - switch (depth) { - case 8: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR; - case 10: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR; - case 12: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR; - default: break; - } - return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR; -} - -int ff_vk_h264_level_to_av(StdVideoH264LevelIdc level) -{ - switch (level) { - case STD_VIDEO_H264_LEVEL_IDC_1_0: return 10; - case STD_VIDEO_H264_LEVEL_IDC_1_1: return 11; - case STD_VIDEO_H264_LEVEL_IDC_1_2: return 12; - case STD_VIDEO_H264_LEVEL_IDC_1_3: return 13; - case STD_VIDEO_H264_LEVEL_IDC_2_0: return 20; - case STD_VIDEO_H264_LEVEL_IDC_2_1: return 21; - case STD_VIDEO_H264_LEVEL_IDC_2_2: return 22; - case STD_VIDEO_H264_LEVEL_IDC_3_0: return 30; - case STD_VIDEO_H264_LEVEL_IDC_3_1: return 31; - case STD_VIDEO_H264_LEVEL_IDC_3_2: return 32; - case STD_VIDEO_H264_LEVEL_IDC_4_0: return 40; - case STD_VIDEO_H264_LEVEL_IDC_4_1: return 41; - case STD_VIDEO_H264_LEVEL_IDC_4_2: return 42; - case STD_VIDEO_H264_LEVEL_IDC_5_0: return 50; - case STD_VIDEO_H264_LEVEL_IDC_5_1: return 51; - case STD_VIDEO_H264_LEVEL_IDC_5_2: return 52; - case STD_VIDEO_H264_LEVEL_IDC_6_0: return 60; - case STD_VIDEO_H264_LEVEL_IDC_6_1: return 61; - default: - case STD_VIDEO_H264_LEVEL_IDC_6_2: return 62; - } -} - -StdVideoH264LevelIdc ff_vk_h264_level_to_vk(int level_idc) -{ - switch (level_idc) { - case 10: return STD_VIDEO_H264_LEVEL_IDC_1_0; - case 11: return STD_VIDEO_H264_LEVEL_IDC_1_1; - case 12: return STD_VIDEO_H264_LEVEL_IDC_1_2; - case 13: return STD_VIDEO_H264_LEVEL_IDC_1_3; - case 20: return STD_VIDEO_H264_LEVEL_IDC_2_0; - case 21: return STD_VIDEO_H264_LEVEL_IDC_2_1; - case 22: return STD_VIDEO_H264_LEVEL_IDC_2_2; - case 30: return STD_VIDEO_H264_LEVEL_IDC_3_0; - case 31: return STD_VIDEO_H264_LEVEL_IDC_3_1; - case 32: return STD_VIDEO_H264_LEVEL_IDC_3_2; - case 40: return STD_VIDEO_H264_LEVEL_IDC_4_0; - case 41: return STD_VIDEO_H264_LEVEL_IDC_4_1; - case 42: return STD_VIDEO_H264_LEVEL_IDC_4_2; - case 50: return STD_VIDEO_H264_LEVEL_IDC_5_0; - case 51: return STD_VIDEO_H264_LEVEL_IDC_5_1; - case 52: return STD_VIDEO_H264_LEVEL_IDC_5_2; - case 60: return STD_VIDEO_H264_LEVEL_IDC_6_0; - case 61: return STD_VIDEO_H264_LEVEL_IDC_6_1; - default: - case 62: return STD_VIDEO_H264_LEVEL_IDC_6_2; - } -} - -int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level) -{ - switch (level) { - case STD_VIDEO_H265_LEVEL_IDC_1_0: return 10; - case STD_VIDEO_H265_LEVEL_IDC_2_0: return 20; - case STD_VIDEO_H265_LEVEL_IDC_2_1: return 21; - case STD_VIDEO_H265_LEVEL_IDC_3_0: return 30; - case STD_VIDEO_H265_LEVEL_IDC_3_1: return 31; - case STD_VIDEO_H265_LEVEL_IDC_4_0: return 40; - case STD_VIDEO_H265_LEVEL_IDC_4_1: return 41; - case STD_VIDEO_H265_LEVEL_IDC_5_0: return 50; - case STD_VIDEO_H265_LEVEL_IDC_5_1: return 51; - case STD_VIDEO_H265_LEVEL_IDC_6_0: return 60; - case STD_VIDEO_H265_LEVEL_IDC_6_1: return 61; - default: - case STD_VIDEO_H265_LEVEL_IDC_6_2: return 62; - } -} - -StdVideoH265LevelIdc ff_vk_h265_level_to_vk(int level_idc) -{ - switch (level_idc) { - case 10: return STD_VIDEO_H265_LEVEL_IDC_1_0; - case 20: return STD_VIDEO_H265_LEVEL_IDC_2_0; - case 21: return STD_VIDEO_H265_LEVEL_IDC_2_1; - case 30: return STD_VIDEO_H265_LEVEL_IDC_3_0; - case 31: return STD_VIDEO_H265_LEVEL_IDC_3_1; - case 40: return STD_VIDEO_H265_LEVEL_IDC_4_0; - case 41: return STD_VIDEO_H265_LEVEL_IDC_4_1; - case 50: return STD_VIDEO_H265_LEVEL_IDC_5_0; - case 51: return STD_VIDEO_H265_LEVEL_IDC_5_1; - case 60: return STD_VIDEO_H265_LEVEL_IDC_6_0; - case 61: return STD_VIDEO_H265_LEVEL_IDC_6_1; - default: - case 62: return STD_VIDEO_H265_LEVEL_IDC_6_2; - } -} - -StdVideoAV1Level ff_vk_av1_level_to_vk(int level) -{ - switch (level) { - case 20: return STD_VIDEO_AV1_LEVEL_2_0; - case 21: return STD_VIDEO_AV1_LEVEL_2_1; - case 22: return STD_VIDEO_AV1_LEVEL_2_2; - case 23: return STD_VIDEO_AV1_LEVEL_2_3; - case 30: return STD_VIDEO_AV1_LEVEL_3_0; - case 31: return STD_VIDEO_AV1_LEVEL_3_1; - case 32: return STD_VIDEO_AV1_LEVEL_3_2; - case 33: return STD_VIDEO_AV1_LEVEL_3_3; - case 40: return STD_VIDEO_AV1_LEVEL_4_0; - case 41: return STD_VIDEO_AV1_LEVEL_4_1; - case 42: return STD_VIDEO_AV1_LEVEL_4_2; - case 43: return STD_VIDEO_AV1_LEVEL_4_3; - case 50: return STD_VIDEO_AV1_LEVEL_5_0; - case 51: return STD_VIDEO_AV1_LEVEL_5_1; - case 52: return STD_VIDEO_AV1_LEVEL_5_2; - case 53: return STD_VIDEO_AV1_LEVEL_5_3; - case 60: return STD_VIDEO_AV1_LEVEL_6_0; - case 61: return STD_VIDEO_AV1_LEVEL_6_1; - case 62: return STD_VIDEO_AV1_LEVEL_6_2; - case 63: return STD_VIDEO_AV1_LEVEL_6_3; - case 70: return STD_VIDEO_AV1_LEVEL_7_0; - case 71: return STD_VIDEO_AV1_LEVEL_7_1; - case 72: return STD_VIDEO_AV1_LEVEL_7_2; - default: - case 73: return STD_VIDEO_AV1_LEVEL_7_3; - } -} - -StdVideoH264ProfileIdc ff_vk_h264_profile_to_vk(int profile) -{ - switch (profile) { - case AV_PROFILE_H264_CONSTRAINED_BASELINE: return STD_VIDEO_H264_PROFILE_IDC_BASELINE; - case AV_PROFILE_H264_MAIN: return STD_VIDEO_H264_PROFILE_IDC_MAIN; - case AV_PROFILE_H264_HIGH: return STD_VIDEO_H264_PROFILE_IDC_HIGH; - case AV_PROFILE_H264_HIGH_444_PREDICTIVE: return STD_VIDEO_H264_PROFILE_IDC_HIGH_444_PREDICTIVE; - default: return STD_VIDEO_H264_PROFILE_IDC_INVALID; - } -} - -StdVideoH265ProfileIdc ff_vk_h265_profile_to_vk(int profile) -{ - switch (profile) { - case AV_PROFILE_HEVC_MAIN: return STD_VIDEO_H265_PROFILE_IDC_MAIN; - case AV_PROFILE_HEVC_MAIN_10: return STD_VIDEO_H265_PROFILE_IDC_MAIN_10; - case AV_PROFILE_HEVC_REXT: return STD_VIDEO_H265_PROFILE_IDC_FORMAT_RANGE_EXTENSIONS; - default: return STD_VIDEO_H265_PROFILE_IDC_INVALID; - } -} - -StdVideoAV1Profile ff_vk_av1_profile_to_vk(int profile) -{ - switch (profile) { - case AV_PROFILE_AV1_MAIN: return STD_VIDEO_AV1_PROFILE_MAIN; - case AV_PROFILE_AV1_HIGH: return STD_VIDEO_AV1_PROFILE_HIGH; - case AV_PROFILE_AV1_PROFESSIONAL: return STD_VIDEO_AV1_PROFILE_PROFESSIONAL; - default: return STD_VIDEO_AV1_PROFILE_INVALID; - } -} - -int ff_vk_create_view(FFVulkanContext *s, FFVkVideoCommon *common, - VkImageView *view, VkImageAspectFlags *aspect, - AVVkFrame *src, VkFormat vkf, int is_dpb) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(vkf); - - VkSamplerYcbcrConversionInfo yuv_sampler_info = { - .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, - .conversion = common->yuv_sampler, - }; - VkImageViewCreateInfo img_view_create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = &yuv_sampler_info, - .viewType = common->layered_dpb && is_dpb ? - VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, - .format = vkf, - .image = src->img[0], - .components = (VkComponentMapping) { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = (VkImageSubresourceRange) { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseArrayLayer = 0, - .layerCount = common->layered_dpb && is_dpb ? - VK_REMAINING_ARRAY_LAYERS : 1, - .levelCount = 1, - }, - }; - - ret = vk->CreateImageView(s->hwctx->act_dev, &img_view_create_info, - s->hwctx->alloc, view); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; - - *aspect = aspect_mask; - - return 0; -} - -av_cold void ff_vk_video_common_uninit(FFVulkanContext *s, - FFVkVideoCommon *common) -{ - FFVulkanFunctions *vk = &s->vkfn; - - if (common->session) { - vk->DestroyVideoSessionKHR(s->hwctx->act_dev, common->session, - s->hwctx->alloc); - common->session = VK_NULL_HANDLE; - } - - if (common->nb_mem && common->mem) - for (int i = 0; i < common->nb_mem; i++) - vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc); - - av_freep(&common->mem); - - if (common->layered_view) - vk->DestroyImageView(s->hwctx->act_dev, common->layered_view, - s->hwctx->alloc); - - av_frame_free(&common->layered_frame); - - av_buffer_unref(&common->dpb_hwfc_ref); - - if (common->yuv_sampler) - vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, common->yuv_sampler, - s->hwctx->alloc); -} - -av_cold int ff_vk_video_common_init(AVCodecContext *avctx, FFVulkanContext *s, - FFVkVideoCommon *common, - VkVideoSessionCreateInfoKHR *session_create) -{ - int err; - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - VkVideoSessionMemoryRequirementsKHR *mem = NULL; - VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL; - - int cxpos = 0, cypos = 0; - VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = { - .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, - .components = ff_comp_identity_map, - .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, - .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */ - .format = session_create->pictureFormat, - }; - - /* Create identity YUV sampler - * (VkImageViews of YUV image formats require it, even if it does nothing) */ - av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location); - yuv_sampler_info.xChromaOffset = cxpos >> 7; - yuv_sampler_info.yChromaOffset = cypos >> 7; - ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info, - s->hwctx->alloc, &common->yuv_sampler); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; - - /* Create session */ - ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create, - s->hwctx->alloc, &common->session); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; - - /* Get memory requirements */ - ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, - common->session, - &common->nb_mem, - NULL); - if (ret != VK_SUCCESS) { - err = AVERROR_EXTERNAL; - goto fail; - } - - /* Allocate all memory needed to actually allocate memory */ - common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem); - if (!common->mem) { - err = AVERROR(ENOMEM); - goto fail; - } - mem = av_mallocz(sizeof(*mem)*common->nb_mem); - if (!mem) { - err = AVERROR(ENOMEM); - goto fail; - } - bind_mem = av_mallocz(sizeof(*bind_mem)*common->nb_mem); - if (!bind_mem) { - err = AVERROR(ENOMEM); - goto fail; - } - - /* Set the needed fields to get the memory requirements */ - for (int i = 0; i < common->nb_mem; i++) { - mem[i] = (VkVideoSessionMemoryRequirementsKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_MEMORY_REQUIREMENTS_KHR, - }; - } - - /* Finally get the memory requirements */ - ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, - common->session, &common->nb_mem, - mem); - if (ret != VK_SUCCESS) { - err = AVERROR_EXTERNAL; - goto fail; - } - - /* Now allocate each requested memory. - * For ricing, could pool together memory that ends up in the same index. */ - for (int i = 0; i < common->nb_mem; i++) { - err = ff_vk_alloc_mem(s, &mem[i].memoryRequirements, - UINT32_MAX, NULL, NULL, &common->mem[i]); - if (err < 0) - goto fail; - - bind_mem[i] = (VkBindVideoSessionMemoryInfoKHR) { - .sType = VK_STRUCTURE_TYPE_BIND_VIDEO_SESSION_MEMORY_INFO_KHR, - .memory = common->mem[i], - .memoryBindIndex = mem[i].memoryBindIndex, - .memoryOffset = 0, - .memorySize = mem[i].memoryRequirements.size, - }; - - av_log(avctx, AV_LOG_VERBOSE, "Allocating %"PRIu64" bytes in bind index %i for video session\n", - bind_mem[i].memorySize, bind_mem[i].memoryBindIndex); - } - - /* Bind the allocated memory */ - ret = vk->BindVideoSessionMemoryKHR(s->hwctx->act_dev, common->session, - common->nb_mem, bind_mem); - if (ret != VK_SUCCESS) { - err = AVERROR_EXTERNAL; - goto fail; - } - - av_freep(&mem); - av_freep(&bind_mem); - - return 0; - -fail: - av_freep(&mem); - av_freep(&bind_mem); - - ff_vk_video_common_uninit(s, common); - return err; -} -- 2.49.1 From 129144fc65ec1b7ec3c028265eb2efa03b3e0ea8 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:42:03 +0000 Subject: [PATCH 090/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_video.c | 482 +++++++++++++++++++++++++++++++ 1 file changed, 482 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_video.c diff --git a/libavcodec/vulkan/vulkan_video.c b/libavcodec/vulkan/vulkan_video.c new file mode 100644 index 0000000000..819940460f --- /dev/null +++ b/libavcodec/vulkan/vulkan_video.c @@ -0,0 +1,482 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" +#include "vulkan_video.h" + +#define ASPECT_2PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT) +#define ASPECT_3PLANE (VK_IMAGE_ASPECT_PLANE_0_BIT | VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT) + +static const struct FFVkFormatMapEntry { + VkFormat vkf; + enum AVPixelFormat pixfmt; + VkImageAspectFlags aspect; +} vk_format_map[] = { + /* Gray formats */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GRAY8, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GRAY16, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GRAYF32, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* RGB formats */ + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGRA, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGBA, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8_UNORM, AV_PIX_FMT_RGB24, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8_UNORM, AV_PIX_FMT_BGR24, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16G16B16_UNORM, AV_PIX_FMT_RGB48, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16G16B16A16_UNORM, AV_PIX_FMT_RGBA64, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R5G6B5_UNORM_PACK16, AV_PIX_FMT_RGB565, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B5G6R5_UNORM_PACK16, AV_PIX_FMT_BGR565, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8A8_UNORM, AV_PIX_FMT_BGR0, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R8G8B8A8_UNORM, AV_PIX_FMT_RGB0, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_X2RGB10, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* Planar RGB */ + { VK_FORMAT_R8_UNORM, AV_PIX_FMT_GBRAP, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R16_UNORM, AV_PIX_FMT_GBRAP16, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRPF32, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R32_SFLOAT, AV_PIX_FMT_GBRAPF32, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* Two-plane 420 YUV at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, AV_PIX_FMT_NV12, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P010, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16, AV_PIX_FMT_P012, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, AV_PIX_FMT_P016, ASPECT_2PLANE }, + + /* Two-plane 422 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, AV_PIX_FMT_NV16, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P210, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16, AV_PIX_FMT_P212, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, AV_PIX_FMT_P216, ASPECT_2PLANE }, + + /* Two-plane 444 YUV at 8, 10 and 16 bits */ + { VK_FORMAT_G8_B8R8_2PLANE_444_UNORM, AV_PIX_FMT_NV24, ASPECT_2PLANE }, + { VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P410, ASPECT_2PLANE }, + { VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16, AV_PIX_FMT_P412, ASPECT_2PLANE }, + { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, AV_PIX_FMT_P416, ASPECT_2PLANE }, + + /* Three-plane 420, 422, 444 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, AV_PIX_FMT_YUV420P16, ASPECT_3PLANE }, + { VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, AV_PIX_FMT_YUV422P16, ASPECT_3PLANE }, + { VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P10, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P12, ASPECT_3PLANE }, + { VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, AV_PIX_FMT_YUV444P16, ASPECT_3PLANE }, + + /* Single plane 422 at 8, 10, 12 and 16 bits */ + { VK_FORMAT_G8B8G8R8_422_UNORM, AV_PIX_FMT_YUYV422, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_B8G8R8G8_422_UNORM, AV_PIX_FMT_UYVY422, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16, AV_PIX_FMT_Y210, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16, AV_PIX_FMT_Y212, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_G16B16G16R16_422_UNORM, AV_PIX_FMT_Y216, VK_IMAGE_ASPECT_COLOR_BIT }, + + /* Single plane 444 at 10 and 12 bits */ + { VK_FORMAT_A2R10G10B10_UNORM_PACK32, AV_PIX_FMT_XV30, VK_IMAGE_ASPECT_COLOR_BIT }, + { VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, AV_PIX_FMT_XV36, VK_IMAGE_ASPECT_COLOR_BIT }, +}; +static const int nb_vk_format_map = FF_ARRAY_ELEMS(vk_format_map); + +enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf) +{ + for (int i = 0; i < nb_vk_format_map; i++) + if (vk_format_map[i].vkf == vkf) + return vk_format_map[i].pixfmt; + return AV_PIX_FMT_NONE; +} + +VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf) +{ + for (int i = 0; i < nb_vk_format_map; i++) + if (vk_format_map[i].vkf == vkf) + return vk_format_map[i].aspect; + return VK_IMAGE_ASPECT_NONE; +} + +VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc) +{ + if (desc->nb_components == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR; + else if (!desc->log2_chroma_w && !desc->log2_chroma_h) + return VK_VIDEO_CHROMA_SUBSAMPLING_444_BIT_KHR; + else if (!desc->log2_chroma_w && desc->log2_chroma_h == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_422_BIT_KHR; + else if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1) + return VK_VIDEO_CHROMA_SUBSAMPLING_420_BIT_KHR; + return VK_VIDEO_CHROMA_SUBSAMPLING_INVALID_KHR; +} + +VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth) +{ + switch (depth) { + case 8: return VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR; + case 10: return VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR; + case 12: return VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR; + default: break; + } + return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR; +} + +int ff_vk_h264_level_to_av(StdVideoH264LevelIdc level) +{ + switch (level) { + case STD_VIDEO_H264_LEVEL_IDC_1_0: return 10; + case STD_VIDEO_H264_LEVEL_IDC_1_1: return 11; + case STD_VIDEO_H264_LEVEL_IDC_1_2: return 12; + case STD_VIDEO_H264_LEVEL_IDC_1_3: return 13; + case STD_VIDEO_H264_LEVEL_IDC_2_0: return 20; + case STD_VIDEO_H264_LEVEL_IDC_2_1: return 21; + case STD_VIDEO_H264_LEVEL_IDC_2_2: return 22; + case STD_VIDEO_H264_LEVEL_IDC_3_0: return 30; + case STD_VIDEO_H264_LEVEL_IDC_3_1: return 31; + case STD_VIDEO_H264_LEVEL_IDC_3_2: return 32; + case STD_VIDEO_H264_LEVEL_IDC_4_0: return 40; + case STD_VIDEO_H264_LEVEL_IDC_4_1: return 41; + case STD_VIDEO_H264_LEVEL_IDC_4_2: return 42; + case STD_VIDEO_H264_LEVEL_IDC_5_0: return 50; + case STD_VIDEO_H264_LEVEL_IDC_5_1: return 51; + case STD_VIDEO_H264_LEVEL_IDC_5_2: return 52; + case STD_VIDEO_H264_LEVEL_IDC_6_0: return 60; + case STD_VIDEO_H264_LEVEL_IDC_6_1: return 61; + default: + case STD_VIDEO_H264_LEVEL_IDC_6_2: return 62; + } +} + +StdVideoH264LevelIdc ff_vk_h264_level_to_vk(int level_idc) +{ + switch (level_idc) { + case 10: return STD_VIDEO_H264_LEVEL_IDC_1_0; + case 11: return STD_VIDEO_H264_LEVEL_IDC_1_1; + case 12: return STD_VIDEO_H264_LEVEL_IDC_1_2; + case 13: return STD_VIDEO_H264_LEVEL_IDC_1_3; + case 20: return STD_VIDEO_H264_LEVEL_IDC_2_0; + case 21: return STD_VIDEO_H264_LEVEL_IDC_2_1; + case 22: return STD_VIDEO_H264_LEVEL_IDC_2_2; + case 30: return STD_VIDEO_H264_LEVEL_IDC_3_0; + case 31: return STD_VIDEO_H264_LEVEL_IDC_3_1; + case 32: return STD_VIDEO_H264_LEVEL_IDC_3_2; + case 40: return STD_VIDEO_H264_LEVEL_IDC_4_0; + case 41: return STD_VIDEO_H264_LEVEL_IDC_4_1; + case 42: return STD_VIDEO_H264_LEVEL_IDC_4_2; + case 50: return STD_VIDEO_H264_LEVEL_IDC_5_0; + case 51: return STD_VIDEO_H264_LEVEL_IDC_5_1; + case 52: return STD_VIDEO_H264_LEVEL_IDC_5_2; + case 60: return STD_VIDEO_H264_LEVEL_IDC_6_0; + case 61: return STD_VIDEO_H264_LEVEL_IDC_6_1; + default: + case 62: return STD_VIDEO_H264_LEVEL_IDC_6_2; + } +} + +int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level) +{ + switch (level) { + case STD_VIDEO_H265_LEVEL_IDC_1_0: return 10; + case STD_VIDEO_H265_LEVEL_IDC_2_0: return 20; + case STD_VIDEO_H265_LEVEL_IDC_2_1: return 21; + case STD_VIDEO_H265_LEVEL_IDC_3_0: return 30; + case STD_VIDEO_H265_LEVEL_IDC_3_1: return 31; + case STD_VIDEO_H265_LEVEL_IDC_4_0: return 40; + case STD_VIDEO_H265_LEVEL_IDC_4_1: return 41; + case STD_VIDEO_H265_LEVEL_IDC_5_0: return 50; + case STD_VIDEO_H265_LEVEL_IDC_5_1: return 51; + case STD_VIDEO_H265_LEVEL_IDC_6_0: return 60; + case STD_VIDEO_H265_LEVEL_IDC_6_1: return 61; + default: + case STD_VIDEO_H265_LEVEL_IDC_6_2: return 62; + } +} + +StdVideoH265LevelIdc ff_vk_h265_level_to_vk(int level_idc) +{ + switch (level_idc) { + case 10: return STD_VIDEO_H265_LEVEL_IDC_1_0; + case 20: return STD_VIDEO_H265_LEVEL_IDC_2_0; + case 21: return STD_VIDEO_H265_LEVEL_IDC_2_1; + case 30: return STD_VIDEO_H265_LEVEL_IDC_3_0; + case 31: return STD_VIDEO_H265_LEVEL_IDC_3_1; + case 40: return STD_VIDEO_H265_LEVEL_IDC_4_0; + case 41: return STD_VIDEO_H265_LEVEL_IDC_4_1; + case 50: return STD_VIDEO_H265_LEVEL_IDC_5_0; + case 51: return STD_VIDEO_H265_LEVEL_IDC_5_1; + case 60: return STD_VIDEO_H265_LEVEL_IDC_6_0; + case 61: return STD_VIDEO_H265_LEVEL_IDC_6_1; + default: + case 62: return STD_VIDEO_H265_LEVEL_IDC_6_2; + } +} + +StdVideoAV1Level ff_vk_av1_level_to_vk(int level) +{ + switch (level) { + case 20: return STD_VIDEO_AV1_LEVEL_2_0; + case 21: return STD_VIDEO_AV1_LEVEL_2_1; + case 22: return STD_VIDEO_AV1_LEVEL_2_2; + case 23: return STD_VIDEO_AV1_LEVEL_2_3; + case 30: return STD_VIDEO_AV1_LEVEL_3_0; + case 31: return STD_VIDEO_AV1_LEVEL_3_1; + case 32: return STD_VIDEO_AV1_LEVEL_3_2; + case 33: return STD_VIDEO_AV1_LEVEL_3_3; + case 40: return STD_VIDEO_AV1_LEVEL_4_0; + case 41: return STD_VIDEO_AV1_LEVEL_4_1; + case 42: return STD_VIDEO_AV1_LEVEL_4_2; + case 43: return STD_VIDEO_AV1_LEVEL_4_3; + case 50: return STD_VIDEO_AV1_LEVEL_5_0; + case 51: return STD_VIDEO_AV1_LEVEL_5_1; + case 52: return STD_VIDEO_AV1_LEVEL_5_2; + case 53: return STD_VIDEO_AV1_LEVEL_5_3; + case 60: return STD_VIDEO_AV1_LEVEL_6_0; + case 61: return STD_VIDEO_AV1_LEVEL_6_1; + case 62: return STD_VIDEO_AV1_LEVEL_6_2; + case 63: return STD_VIDEO_AV1_LEVEL_6_3; + case 70: return STD_VIDEO_AV1_LEVEL_7_0; + case 71: return STD_VIDEO_AV1_LEVEL_7_1; + case 72: return STD_VIDEO_AV1_LEVEL_7_2; + default: + case 73: return STD_VIDEO_AV1_LEVEL_7_3; + } +} + +StdVideoH264ProfileIdc ff_vk_h264_profile_to_vk(int profile) +{ + switch (profile) { + case AV_PROFILE_H264_CONSTRAINED_BASELINE: return STD_VIDEO_H264_PROFILE_IDC_BASELINE; + case AV_PROFILE_H264_MAIN: return STD_VIDEO_H264_PROFILE_IDC_MAIN; + case AV_PROFILE_H264_HIGH: return STD_VIDEO_H264_PROFILE_IDC_HIGH; + case AV_PROFILE_H264_HIGH_444_PREDICTIVE: return STD_VIDEO_H264_PROFILE_IDC_HIGH_444_PREDICTIVE; + default: return STD_VIDEO_H264_PROFILE_IDC_INVALID; + } +} + +StdVideoH265ProfileIdc ff_vk_h265_profile_to_vk(int profile) +{ + switch (profile) { + case AV_PROFILE_HEVC_MAIN: return STD_VIDEO_H265_PROFILE_IDC_MAIN; + case AV_PROFILE_HEVC_MAIN_10: return STD_VIDEO_H265_PROFILE_IDC_MAIN_10; + case AV_PROFILE_HEVC_REXT: return STD_VIDEO_H265_PROFILE_IDC_FORMAT_RANGE_EXTENSIONS; + default: return STD_VIDEO_H265_PROFILE_IDC_INVALID; + } +} + +StdVideoAV1Profile ff_vk_av1_profile_to_vk(int profile) +{ + switch (profile) { + case AV_PROFILE_AV1_MAIN: return STD_VIDEO_AV1_PROFILE_MAIN; + case AV_PROFILE_AV1_HIGH: return STD_VIDEO_AV1_PROFILE_HIGH; + case AV_PROFILE_AV1_PROFESSIONAL: return STD_VIDEO_AV1_PROFILE_PROFESSIONAL; + default: return STD_VIDEO_AV1_PROFILE_INVALID; + } +} + +int ff_vk_create_view(FFVulkanContext *s, FFVkVideoCommon *common, + VkImageView *view, VkImageAspectFlags *aspect, + AVVkFrame *src, VkFormat vkf, int is_dpb) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(vkf); + + VkSamplerYcbcrConversionInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, + .conversion = common->yuv_sampler, + }; + VkImageViewCreateInfo img_view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &yuv_sampler_info, + .viewType = common->layered_dpb && is_dpb ? + VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, + .format = vkf, + .image = src->img[0], + .components = (VkComponentMapping) { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = 0, + .layerCount = common->layered_dpb && is_dpb ? + VK_REMAINING_ARRAY_LAYERS : 1, + .levelCount = 1, + }, + }; + + ret = vk->CreateImageView(s->hwctx->act_dev, &img_view_create_info, + s->hwctx->alloc, view); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + *aspect = aspect_mask; + + return 0; +} + +av_cold void ff_vk_video_common_uninit(FFVulkanContext *s, + FFVkVideoCommon *common) +{ + FFVulkanFunctions *vk = &s->vkfn; + + if (common->session) { + vk->DestroyVideoSessionKHR(s->hwctx->act_dev, common->session, + s->hwctx->alloc); + common->session = VK_NULL_HANDLE; + } + + if (common->nb_mem && common->mem) + for (int i = 0; i < common->nb_mem; i++) + vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc); + + av_freep(&common->mem); + + if (common->layered_view) + vk->DestroyImageView(s->hwctx->act_dev, common->layered_view, + s->hwctx->alloc); + + av_frame_free(&common->layered_frame); + + av_buffer_unref(&common->dpb_hwfc_ref); + + if (common->yuv_sampler) + vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, common->yuv_sampler, + s->hwctx->alloc); +} + +av_cold int ff_vk_video_common_init(AVCodecContext *avctx, FFVulkanContext *s, + FFVkVideoCommon *common, + VkVideoSessionCreateInfoKHR *session_create) +{ + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkVideoSessionMemoryRequirementsKHR *mem = NULL; + VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL; + + int cxpos = 0, cypos = 0; + VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, + .components = ff_comp_identity_map, + .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY, + .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */ + .format = session_create->pictureFormat, + }; + + /* Create identity YUV sampler + * (VkImageViews of YUV image formats require it, even if it does nothing) */ + av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location); + yuv_sampler_info.xChromaOffset = cxpos >> 7; + yuv_sampler_info.yChromaOffset = cypos >> 7; + ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info, + s->hwctx->alloc, &common->yuv_sampler); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + /* Create session */ + ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create, + s->hwctx->alloc, &common->session); + if (ret != VK_SUCCESS) + return AVERROR_EXTERNAL; + + /* Get memory requirements */ + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, + common->session, + &common->nb_mem, + NULL); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate all memory needed to actually allocate memory */ + common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem); + if (!common->mem) { + err = AVERROR(ENOMEM); + goto fail; + } + mem = av_mallocz(sizeof(*mem)*common->nb_mem); + if (!mem) { + err = AVERROR(ENOMEM); + goto fail; + } + bind_mem = av_mallocz(sizeof(*bind_mem)*common->nb_mem); + if (!bind_mem) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Set the needed fields to get the memory requirements */ + for (int i = 0; i < common->nb_mem; i++) { + mem[i] = (VkVideoSessionMemoryRequirementsKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_MEMORY_REQUIREMENTS_KHR, + }; + } + + /* Finally get the memory requirements */ + ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev, + common->session, &common->nb_mem, + mem); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Now allocate each requested memory. + * For ricing, could pool together memory that ends up in the same index. */ + for (int i = 0; i < common->nb_mem; i++) { + err = ff_vk_alloc_mem(s, &mem[i].memoryRequirements, + UINT32_MAX, NULL, NULL, &common->mem[i]); + if (err < 0) + goto fail; + + bind_mem[i] = (VkBindVideoSessionMemoryInfoKHR) { + .sType = VK_STRUCTURE_TYPE_BIND_VIDEO_SESSION_MEMORY_INFO_KHR, + .memory = common->mem[i], + .memoryBindIndex = mem[i].memoryBindIndex, + .memoryOffset = 0, + .memorySize = mem[i].memoryRequirements.size, + }; + + av_log(avctx, AV_LOG_VERBOSE, "Allocating %"PRIu64" bytes in bind index %i for video session\n", + bind_mem[i].memorySize, bind_mem[i].memoryBindIndex); + } + + /* Bind the allocated memory */ + ret = vk->BindVideoSessionMemoryKHR(s->hwctx->act_dev, common->session, + common->nb_mem, bind_mem); + if (ret != VK_SUCCESS) { + err = AVERROR_EXTERNAL; + goto fail; + } + + av_freep(&mem); + av_freep(&bind_mem); + + return 0; + +fail: + av_freep(&mem); + av_freep(&bind_mem); + + ff_vk_video_common_uninit(s, common); + return err; +} -- 2.49.1 From 3e4da93daae961ba6b9e853c2e8f43b5a9f9c109 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:42:25 +0000 Subject: [PATCH 091/118] Changing vulkan file directory --- libavcodec/vulkan_vp9.c | 371 ---------------------------------------- 1 file changed, 371 deletions(-) delete mode 100644 libavcodec/vulkan_vp9.c diff --git a/libavcodec/vulkan_vp9.c b/libavcodec/vulkan_vp9.c deleted file mode 100644 index 7b852a29a5..0000000000 --- a/libavcodec/vulkan_vp9.c +++ /dev/null @@ -1,371 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "vp9dec.h" - -#include "vulkan_decode.h" - -const FFVulkanDecodeDescriptor ff_vk_dec_vp9_desc = { - .codec_id = AV_CODEC_ID_VP9, - .decode_extension = FF_VK_EXT_VIDEO_DECODE_VP9, - .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, - .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR, - .ext_props = { - .extensionName = VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, - .specVersion = VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION, - }, -}; - -typedef struct VP9VulkanDecodePicture { - FFVulkanDecodePicture vp; - - /* TODO: investigate if this can be removed to make decoding completely - * independent. */ - FFVulkanDecodeContext *dec; - - /* Current picture */ - StdVideoVP9ColorConfig color_config; - StdVideoVP9Segmentation segmentation; - StdVideoVP9LoopFilter loop_filter; - StdVideoDecodeVP9PictureInfo std_pic_info; - VkVideoDecodeVP9PictureInfoKHR vp9_pic_info; - - const VP9Frame *ref_src[8]; - - uint8_t frame_id_set; - uint8_t frame_id; - uint8_t ref_frame_sign_bias_mask; -} VP9VulkanDecodePicture; - -static int vk_vp9_fill_pict(AVCodecContext *avctx, const VP9Frame **ref_src, - VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ - VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ - const VP9Frame *pic, int is_current) -{ - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - FFVulkanDecodeShared *ctx = dec->shared_ctx; - VP9VulkanDecodePicture *hp = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vkpic = &hp->vp; - - int err = ff_vk_decode_prepare_frame(dec, pic->tf.f, vkpic, is_current, - dec->dedicated_dpb); - if (err < 0) - return err; - - *ref = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .codedOffset = (VkOffset2D){ 0, 0 }, - .codedExtent = (VkExtent2D){ pic->tf.f->width, pic->tf.f->height }, - .baseArrayLayer = (dec->dedicated_dpb && ctx->common.layered_dpb) ? - hp->frame_id : 0, - .imageViewBinding = vkpic->view.ref[0], - }; - - *ref_slot = (VkVideoReferenceSlotInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, - .slotIndex = hp->frame_id, - .pPictureResource = ref, - }; - - if (ref_src) - *ref_src = pic; - - return 0; -} - -static enum StdVideoVP9InterpolationFilter remap_interp(uint8_t is_filter_switchable, - uint8_t raw_interpolation_filter_type) -{ - static const enum StdVideoVP9InterpolationFilter remap[] = { - STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH, - STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP, - STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SHARP, - STD_VIDEO_VP9_INTERPOLATION_FILTER_BILINEAR, - }; - if (is_filter_switchable) - return STD_VIDEO_VP9_INTERPOLATION_FILTER_SWITCHABLE; - return remap[raw_interpolation_filter_type]; -} - -static int vk_vp9_start_frame(AVCodecContext *avctx, - av_unused const AVBufferRef *buffer_ref, - av_unused const uint8_t *buffer, - av_unused uint32_t size) -{ - int err; - int ref_count = 0; - const VP9Context *priv = avctx->priv_data; - const CodedBitstreamVP9Context *vp9 = priv->cbc->priv_data; - const VP9SharedContext *s = &priv->s; - uint32_t frame_id_alloc_mask = 0; - - const VP9Frame *pic = &s->frames[CUR_FRAME]; - FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; - uint8_t profile = (pic->frame_header->profile_high_bit << 1) | pic->frame_header->profile_low_bit; - - VP9VulkanDecodePicture *ap = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &ap->vp; - - /* Use the current frame_ids in ref_frames[] to decide occupied frame_ids */ - for (int i = 0; i < STD_VIDEO_VP9_NUM_REF_FRAMES; i++) { - const VP9VulkanDecodePicture* rp = s->ref_frames[i].hwaccel_picture_private; - if (rp) - frame_id_alloc_mask |= 1 << rp->frame_id; - } - - if (!ap->frame_id_set) { - unsigned slot_idx = 0; - for (unsigned i = 0; i < 32; i++) { - if (!(frame_id_alloc_mask & (1 << i))) { - slot_idx = i; - break; - } - } - ap->frame_id = slot_idx; - ap->frame_id_set = 1; - frame_id_alloc_mask |= (1 << slot_idx); - } - - for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) { - const int idx = pic->frame_header->ref_frame_idx[i]; - const VP9Frame *ref_frame = &s->ref_frames[idx]; - VP9VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private; - int found = 0; - - if (!ref_frame->tf.f) - continue; - - for (int j = 0; j < ref_count; j++) { - if (vp->ref_slots[j].slotIndex == hp->frame_id) { - found = 1; - break; - } - } - if (found) - continue; - - err = vk_vp9_fill_pict(avctx, &ap->ref_src[ref_count], - &vp->ref_slots[ref_count], &vp->refs[ref_count], - ref_frame, 0); - if (err < 0) - return err; - - ref_count++; - } - - err = vk_vp9_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, - pic, 1); - if (err < 0) - return err; - - ap->loop_filter = (StdVideoVP9LoopFilter) { - .flags = (StdVideoVP9LoopFilterFlags) { - .loop_filter_delta_enabled = pic->frame_header->loop_filter_delta_enabled, - .loop_filter_delta_update = pic->frame_header->loop_filter_delta_update, - }, - .loop_filter_level = pic->frame_header->loop_filter_level, - .loop_filter_sharpness = pic->frame_header->loop_filter_sharpness, - .update_ref_delta = 0x0, - .update_mode_delta = 0x0, - }; - - for (int i = 0; i < STD_VIDEO_VP9_MAX_REF_FRAMES; i++) { - ap->loop_filter.loop_filter_ref_deltas[i] = vp9->loop_filter_ref_deltas[i]; - ap->loop_filter.update_ref_delta |= pic->frame_header->update_ref_delta[i]; - } - for (int i = 0; i < STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS; i++) { - ap->loop_filter.loop_filter_mode_deltas[i] = vp9->loop_filter_mode_deltas[i]; - ap->loop_filter.update_mode_delta |= pic->frame_header->update_mode_delta[i]; - } - - ap->segmentation = (StdVideoVP9Segmentation) { - .flags = (StdVideoVP9SegmentationFlags) { - .segmentation_update_map = pic->frame_header->segmentation_update_map, - .segmentation_temporal_update = pic->frame_header->segmentation_temporal_update, - .segmentation_update_data = pic->frame_header->segmentation_update_data, - .segmentation_abs_or_delta_update = pic->frame_header->segmentation_abs_or_delta_update, - }, - }; - - for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_TREE_PROBS; i++) - ap->segmentation.segmentation_tree_probs[i] = vp9->segmentation_tree_probs[i]; - for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_PRED_PROB; i++) - ap->segmentation.segmentation_pred_prob[i] = vp9->segmentation_pred_prob[i]; - for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTS; i++) { - ap->segmentation.FeatureEnabled[i] = 0x0; - for (int j = 0; j < STD_VIDEO_VP9_SEG_LVL_MAX; j++) { - ap->segmentation.FeatureEnabled[i] |= vp9->feature_enabled[i][j] << j; - ap->segmentation.FeatureData[i][j] = vp9->feature_sign[i][j] ? - -vp9->feature_value[i][j] : - +vp9->feature_value[i][j]; - } - } - - ap->color_config = (StdVideoVP9ColorConfig) { - .flags = (StdVideoVP9ColorConfigFlags) { - .color_range = pic->frame_header->color_range, - }, - .BitDepth = profile < 2 ? 8 : - pic->frame_header->ten_or_twelve_bit ? 12 : 10, - .subsampling_x = pic->frame_header->subsampling_x, - .subsampling_y = pic->frame_header->subsampling_y, - .color_space = pic->frame_header->color_space, - }; - - ap->std_pic_info = (StdVideoDecodeVP9PictureInfo) { - .flags = (StdVideoDecodeVP9PictureInfoFlags) { - .error_resilient_mode = pic->frame_header->error_resilient_mode, - .intra_only = pic->frame_header->intra_only, - .allow_high_precision_mv = pic->frame_header->allow_high_precision_mv, - .refresh_frame_context = pic->frame_header->refresh_frame_context, - .frame_parallel_decoding_mode = pic->frame_header->frame_parallel_decoding_mode, - .segmentation_enabled = pic->frame_header->segmentation_enabled, - .show_frame = pic->frame_header->segmentation_enabled, - .UsePrevFrameMvs = s->h.use_last_frame_mvs, - }, - .profile = profile, - .frame_type = pic->frame_header->frame_type, - .frame_context_idx = pic->frame_header->frame_context_idx, - .reset_frame_context = pic->frame_header->reset_frame_context, - .refresh_frame_flags = pic->frame_header->refresh_frame_flags, - .ref_frame_sign_bias_mask = 0x0, - .interpolation_filter = remap_interp(pic->frame_header->is_filter_switchable, - pic->frame_header->raw_interpolation_filter_type), - .base_q_idx = pic->frame_header->base_q_idx, - .delta_q_y_dc = pic->frame_header->delta_q_y_dc, - .delta_q_uv_dc = pic->frame_header->delta_q_uv_dc, - .delta_q_uv_ac = pic->frame_header->delta_q_uv_ac, - .tile_cols_log2 = pic->frame_header->tile_cols_log2, - .tile_rows_log2 = pic->frame_header->tile_rows_log2, - /* Reserved */ - .pColorConfig = &ap->color_config, - .pLoopFilter = &ap->loop_filter, - .pSegmentation = &ap->segmentation, - }; - - for (int i = VP9_LAST_FRAME; i <= VP9_ALTREF_FRAME; i++) - ap->std_pic_info.ref_frame_sign_bias_mask |= pic->frame_header->ref_frame_sign_bias[i] << i; - - ap->vp9_pic_info = (VkVideoDecodeVP9PictureInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR, - .pStdPictureInfo = &ap->std_pic_info, - .uncompressedHeaderOffset = 0, - .compressedHeaderOffset = s->h.uncompressed_header_size, - .tilesOffset = s->h.uncompressed_header_size + - s->h.compressed_header_size, - }; - - for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) { - const int idx = pic->frame_header->ref_frame_idx[i]; - const VP9Frame *ref_frame = &s->ref_frames[idx]; - VP9VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private; - - if (!ref_frame->tf.f) - ap->vp9_pic_info.referenceNameSlotIndices[i] = -1; - else - ap->vp9_pic_info.referenceNameSlotIndices[i] = hp->frame_id; - } - - vp->decode_info = (VkVideoDecodeInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, - .pNext = &ap->vp9_pic_info, - .flags = 0x0, - .pSetupReferenceSlot = &vp->ref_slot, - .referenceSlotCount = ref_count, - .pReferenceSlots = vp->ref_slots, - .dstPictureResource = (VkVideoPictureResourceInfoKHR) { - .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, - .codedOffset = (VkOffset2D){ 0, 0 }, - .codedExtent = (VkExtent2D){ pic->tf.f->width, pic->tf.f->height }, - .baseArrayLayer = 0, - .imageViewBinding = vp->view.out[0], - }, - }; - - ap->dec = dec; - - return 0; -} - -static int vk_vp9_decode_slice(AVCodecContext *avctx, - const uint8_t *data, - uint32_t size) -{ - int err; - const VP9SharedContext *s = avctx->priv_data; - VP9VulkanDecodePicture *ap = s->frames[CUR_FRAME].hwaccel_picture_private; - FFVulkanDecodePicture *vp = &ap->vp; - - err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, NULL, NULL); - if (err < 0) - return err; - - return 0; -} - -static int vk_vp9_end_frame(AVCodecContext *avctx) -{ - const VP9SharedContext *s = avctx->priv_data; - - const VP9Frame *pic = &s->frames[CUR_FRAME]; - VP9VulkanDecodePicture *ap = pic->hwaccel_picture_private; - FFVulkanDecodePicture *vp = &ap->vp; - FFVulkanDecodePicture *rvp[STD_VIDEO_VP9_REFS_PER_FRAME] = { 0 }; - AVFrame *rav[STD_VIDEO_VP9_REFS_PER_FRAME] = { 0 }; - - for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { - const VP9Frame *rp = ap->ref_src[i]; - VP9VulkanDecodePicture *rhp = rp->hwaccel_picture_private; - - rvp[i] = &rhp->vp; - rav[i] = ap->ref_src[i]->tf.f; - } - - av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %"SIZE_SPECIFIER" bytes\n", - vp->slices_size); - - return ff_vk_decode_frame(avctx, pic->tf.f, vp, rav, rvp); -} - -static void vk_vp9_free_frame_priv(AVRefStructOpaque _hwctx, void *data) -{ - AVHWDeviceContext *hwctx = _hwctx.nc; - VP9VulkanDecodePicture *ap = data; - - /* Free frame resources, this also destroys the session parameters. */ - ff_vk_decode_free_frame(hwctx, &ap->vp); -} - -const FFHWAccel ff_vp9_vulkan_hwaccel = { - .p.name = "vp9_vulkan", - .p.type = AVMEDIA_TYPE_VIDEO, - .p.id = AV_CODEC_ID_VP9, - .p.pix_fmt = AV_PIX_FMT_VULKAN, - .start_frame = &vk_vp9_start_frame, - .decode_slice = &vk_vp9_decode_slice, - .end_frame = &vk_vp9_end_frame, - .free_frame_priv = &vk_vp9_free_frame_priv, - .frame_priv_data_size = sizeof(VP9VulkanDecodePicture), - .init = &ff_vk_decode_init, - .update_thread_context = &ff_vk_update_thread_context, - .flush = &ff_vk_decode_flush, - .uninit = &ff_vk_decode_uninit, - .frame_params = &ff_vk_frame_params, - .priv_data_size = sizeof(FFVulkanDecodeContext), - .caps_internal = HWACCEL_CAP_ASYNC_SAFE, -}; -- 2.49.1 From 35b069c88ef3463985f3327388c577cb54cdfb78 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:42:57 +0000 Subject: [PATCH 092/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_vp9.c | 371 +++++++++++++++++++++++++++++++++ 1 file changed, 371 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_vp9.c diff --git a/libavcodec/vulkan/vulkan_vp9.c b/libavcodec/vulkan/vulkan_vp9.c new file mode 100644 index 0000000000..b150d25160 --- /dev/null +++ b/libavcodec/vulkan/vulkan_vp9.c @@ -0,0 +1,371 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/vp9dec.h" + +#include "vulkan_decode.h" + +const FFVulkanDecodeDescriptor ff_vk_dec_vp9_desc = { + .codec_id = AV_CODEC_ID_VP9, + .decode_extension = FF_VK_EXT_VIDEO_DECODE_VP9, + .queue_flags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, + .decode_op = VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR, + .ext_props = { + .extensionName = VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_EXTENSION_NAME, + .specVersion = VK_STD_VULKAN_VIDEO_CODEC_VP9_DECODE_SPEC_VERSION, + }, +}; + +typedef struct VP9VulkanDecodePicture { + FFVulkanDecodePicture vp; + + /* TODO: investigate if this can be removed to make decoding completely + * independent. */ + FFVulkanDecodeContext *dec; + + /* Current picture */ + StdVideoVP9ColorConfig color_config; + StdVideoVP9Segmentation segmentation; + StdVideoVP9LoopFilter loop_filter; + StdVideoDecodeVP9PictureInfo std_pic_info; + VkVideoDecodeVP9PictureInfoKHR vp9_pic_info; + + const VP9Frame *ref_src[8]; + + uint8_t frame_id_set; + uint8_t frame_id; + uint8_t ref_frame_sign_bias_mask; +} VP9VulkanDecodePicture; + +static int vk_vp9_fill_pict(AVCodecContext *avctx, const VP9Frame **ref_src, + VkVideoReferenceSlotInfoKHR *ref_slot, /* Main structure */ + VkVideoPictureResourceInfoKHR *ref, /* Goes in ^ */ + const VP9Frame *pic, int is_current) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + VP9VulkanDecodePicture *hp = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vkpic = &hp->vp; + + int err = ff_vk_decode_prepare_frame(dec, pic->tf.f, vkpic, is_current, + dec->dedicated_dpb); + if (err < 0) + return err; + + *ref = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->tf.f->width, pic->tf.f->height }, + .baseArrayLayer = (dec->dedicated_dpb && ctx->common.layered_dpb) ? + hp->frame_id : 0, + .imageViewBinding = vkpic->view.ref[0], + }; + + *ref_slot = (VkVideoReferenceSlotInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR, + .slotIndex = hp->frame_id, + .pPictureResource = ref, + }; + + if (ref_src) + *ref_src = pic; + + return 0; +} + +static enum StdVideoVP9InterpolationFilter remap_interp(uint8_t is_filter_switchable, + uint8_t raw_interpolation_filter_type) +{ + static const enum StdVideoVP9InterpolationFilter remap[] = { + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SMOOTH, + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP, + STD_VIDEO_VP9_INTERPOLATION_FILTER_EIGHTTAP_SHARP, + STD_VIDEO_VP9_INTERPOLATION_FILTER_BILINEAR, + }; + if (is_filter_switchable) + return STD_VIDEO_VP9_INTERPOLATION_FILTER_SWITCHABLE; + return remap[raw_interpolation_filter_type]; +} + +static int vk_vp9_start_frame(AVCodecContext *avctx, + av_unused const AVBufferRef *buffer_ref, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + int ref_count = 0; + const VP9Context *priv = avctx->priv_data; + const CodedBitstreamVP9Context *vp9 = priv->cbc->priv_data; + const VP9SharedContext *s = &priv->s; + uint32_t frame_id_alloc_mask = 0; + + const VP9Frame *pic = &s->frames[CUR_FRAME]; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + uint8_t profile = (pic->frame_header->profile_high_bit << 1) | pic->frame_header->profile_low_bit; + + VP9VulkanDecodePicture *ap = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + + /* Use the current frame_ids in ref_frames[] to decide occupied frame_ids */ + for (int i = 0; i < STD_VIDEO_VP9_NUM_REF_FRAMES; i++) { + const VP9VulkanDecodePicture* rp = s->ref_frames[i].hwaccel_picture_private; + if (rp) + frame_id_alloc_mask |= 1 << rp->frame_id; + } + + if (!ap->frame_id_set) { + unsigned slot_idx = 0; + for (unsigned i = 0; i < 32; i++) { + if (!(frame_id_alloc_mask & (1 << i))) { + slot_idx = i; + break; + } + } + ap->frame_id = slot_idx; + ap->frame_id_set = 1; + frame_id_alloc_mask |= (1 << slot_idx); + } + + for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) { + const int idx = pic->frame_header->ref_frame_idx[i]; + const VP9Frame *ref_frame = &s->ref_frames[idx]; + VP9VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private; + int found = 0; + + if (!ref_frame->tf.f) + continue; + + for (int j = 0; j < ref_count; j++) { + if (vp->ref_slots[j].slotIndex == hp->frame_id) { + found = 1; + break; + } + } + if (found) + continue; + + err = vk_vp9_fill_pict(avctx, &ap->ref_src[ref_count], + &vp->ref_slots[ref_count], &vp->refs[ref_count], + ref_frame, 0); + if (err < 0) + return err; + + ref_count++; + } + + err = vk_vp9_fill_pict(avctx, NULL, &vp->ref_slot, &vp->ref, + pic, 1); + if (err < 0) + return err; + + ap->loop_filter = (StdVideoVP9LoopFilter) { + .flags = (StdVideoVP9LoopFilterFlags) { + .loop_filter_delta_enabled = pic->frame_header->loop_filter_delta_enabled, + .loop_filter_delta_update = pic->frame_header->loop_filter_delta_update, + }, + .loop_filter_level = pic->frame_header->loop_filter_level, + .loop_filter_sharpness = pic->frame_header->loop_filter_sharpness, + .update_ref_delta = 0x0, + .update_mode_delta = 0x0, + }; + + for (int i = 0; i < STD_VIDEO_VP9_MAX_REF_FRAMES; i++) { + ap->loop_filter.loop_filter_ref_deltas[i] = vp9->loop_filter_ref_deltas[i]; + ap->loop_filter.update_ref_delta |= pic->frame_header->update_ref_delta[i]; + } + for (int i = 0; i < STD_VIDEO_VP9_LOOP_FILTER_ADJUSTMENTS; i++) { + ap->loop_filter.loop_filter_mode_deltas[i] = vp9->loop_filter_mode_deltas[i]; + ap->loop_filter.update_mode_delta |= pic->frame_header->update_mode_delta[i]; + } + + ap->segmentation = (StdVideoVP9Segmentation) { + .flags = (StdVideoVP9SegmentationFlags) { + .segmentation_update_map = pic->frame_header->segmentation_update_map, + .segmentation_temporal_update = pic->frame_header->segmentation_temporal_update, + .segmentation_update_data = pic->frame_header->segmentation_update_data, + .segmentation_abs_or_delta_update = pic->frame_header->segmentation_abs_or_delta_update, + }, + }; + + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_TREE_PROBS; i++) + ap->segmentation.segmentation_tree_probs[i] = vp9->segmentation_tree_probs[i]; + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTATION_PRED_PROB; i++) + ap->segmentation.segmentation_pred_prob[i] = vp9->segmentation_pred_prob[i]; + for (int i = 0; i < STD_VIDEO_VP9_MAX_SEGMENTS; i++) { + ap->segmentation.FeatureEnabled[i] = 0x0; + for (int j = 0; j < STD_VIDEO_VP9_SEG_LVL_MAX; j++) { + ap->segmentation.FeatureEnabled[i] |= vp9->feature_enabled[i][j] << j; + ap->segmentation.FeatureData[i][j] = vp9->feature_sign[i][j] ? + -vp9->feature_value[i][j] : + +vp9->feature_value[i][j]; + } + } + + ap->color_config = (StdVideoVP9ColorConfig) { + .flags = (StdVideoVP9ColorConfigFlags) { + .color_range = pic->frame_header->color_range, + }, + .BitDepth = profile < 2 ? 8 : + pic->frame_header->ten_or_twelve_bit ? 12 : 10, + .subsampling_x = pic->frame_header->subsampling_x, + .subsampling_y = pic->frame_header->subsampling_y, + .color_space = pic->frame_header->color_space, + }; + + ap->std_pic_info = (StdVideoDecodeVP9PictureInfo) { + .flags = (StdVideoDecodeVP9PictureInfoFlags) { + .error_resilient_mode = pic->frame_header->error_resilient_mode, + .intra_only = pic->frame_header->intra_only, + .allow_high_precision_mv = pic->frame_header->allow_high_precision_mv, + .refresh_frame_context = pic->frame_header->refresh_frame_context, + .frame_parallel_decoding_mode = pic->frame_header->frame_parallel_decoding_mode, + .segmentation_enabled = pic->frame_header->segmentation_enabled, + .show_frame = pic->frame_header->segmentation_enabled, + .UsePrevFrameMvs = s->h.use_last_frame_mvs, + }, + .profile = profile, + .frame_type = pic->frame_header->frame_type, + .frame_context_idx = pic->frame_header->frame_context_idx, + .reset_frame_context = pic->frame_header->reset_frame_context, + .refresh_frame_flags = pic->frame_header->refresh_frame_flags, + .ref_frame_sign_bias_mask = 0x0, + .interpolation_filter = remap_interp(pic->frame_header->is_filter_switchable, + pic->frame_header->raw_interpolation_filter_type), + .base_q_idx = pic->frame_header->base_q_idx, + .delta_q_y_dc = pic->frame_header->delta_q_y_dc, + .delta_q_uv_dc = pic->frame_header->delta_q_uv_dc, + .delta_q_uv_ac = pic->frame_header->delta_q_uv_ac, + .tile_cols_log2 = pic->frame_header->tile_cols_log2, + .tile_rows_log2 = pic->frame_header->tile_rows_log2, + /* Reserved */ + .pColorConfig = &ap->color_config, + .pLoopFilter = &ap->loop_filter, + .pSegmentation = &ap->segmentation, + }; + + for (int i = VP9_LAST_FRAME; i <= VP9_ALTREF_FRAME; i++) + ap->std_pic_info.ref_frame_sign_bias_mask |= pic->frame_header->ref_frame_sign_bias[i] << i; + + ap->vp9_pic_info = (VkVideoDecodeVP9PictureInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_VP9_PICTURE_INFO_KHR, + .pStdPictureInfo = &ap->std_pic_info, + .uncompressedHeaderOffset = 0, + .compressedHeaderOffset = s->h.uncompressed_header_size, + .tilesOffset = s->h.uncompressed_header_size + + s->h.compressed_header_size, + }; + + for (int i = 0; i < STD_VIDEO_VP9_REFS_PER_FRAME; i++) { + const int idx = pic->frame_header->ref_frame_idx[i]; + const VP9Frame *ref_frame = &s->ref_frames[idx]; + VP9VulkanDecodePicture *hp = ref_frame->hwaccel_picture_private; + + if (!ref_frame->tf.f) + ap->vp9_pic_info.referenceNameSlotIndices[i] = -1; + else + ap->vp9_pic_info.referenceNameSlotIndices[i] = hp->frame_id; + } + + vp->decode_info = (VkVideoDecodeInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_INFO_KHR, + .pNext = &ap->vp9_pic_info, + .flags = 0x0, + .pSetupReferenceSlot = &vp->ref_slot, + .referenceSlotCount = ref_count, + .pReferenceSlots = vp->ref_slots, + .dstPictureResource = (VkVideoPictureResourceInfoKHR) { + .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR, + .codedOffset = (VkOffset2D){ 0, 0 }, + .codedExtent = (VkExtent2D){ pic->tf.f->width, pic->tf.f->height }, + .baseArrayLayer = 0, + .imageViewBinding = vp->view.out[0], + }, + }; + + ap->dec = dec; + + return 0; +} + +static int vk_vp9_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + int err; + const VP9SharedContext *s = avctx->priv_data; + VP9VulkanDecodePicture *ap = s->frames[CUR_FRAME].hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + + err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, NULL, NULL); + if (err < 0) + return err; + + return 0; +} + +static int vk_vp9_end_frame(AVCodecContext *avctx) +{ + const VP9SharedContext *s = avctx->priv_data; + + const VP9Frame *pic = &s->frames[CUR_FRAME]; + VP9VulkanDecodePicture *ap = pic->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &ap->vp; + FFVulkanDecodePicture *rvp[STD_VIDEO_VP9_REFS_PER_FRAME] = { 0 }; + AVFrame *rav[STD_VIDEO_VP9_REFS_PER_FRAME] = { 0 }; + + for (int i = 0; i < vp->decode_info.referenceSlotCount; i++) { + const VP9Frame *rp = ap->ref_src[i]; + VP9VulkanDecodePicture *rhp = rp->hwaccel_picture_private; + + rvp[i] = &rhp->vp; + rav[i] = ap->ref_src[i]->tf.f; + } + + av_log(avctx, AV_LOG_VERBOSE, "Decoding frame, %"SIZE_SPECIFIER" bytes\n", + vp->slices_size); + + return ff_vk_decode_frame(avctx, pic->tf.f, vp, rav, rvp); +} + +static void vk_vp9_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *hwctx = _hwctx.nc; + VP9VulkanDecodePicture *ap = data; + + /* Free frame resources, this also destroys the session parameters. */ + ff_vk_decode_free_frame(hwctx, &ap->vp); +} + +const FFHWAccel ff_vp9_vulkan_hwaccel = { + .p.name = "vp9_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_VP9, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_vp9_start_frame, + .decode_slice = &vk_vp9_decode_slice, + .end_frame = &vk_vp9_end_frame, + .free_frame_priv = &vk_vp9_free_frame_priv, + .frame_priv_data_size = sizeof(VP9VulkanDecodePicture), + .init = &ff_vk_decode_init, + .update_thread_context = &ff_vk_update_thread_context, + .flush = &ff_vk_decode_flush, + .uninit = &ff_vk_decode_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +}; -- 2.49.1 From 59a745ed0eb1ae1171b0e8aa06aafd276cb76097 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:43:49 +0000 Subject: [PATCH 093/118] Changing vulkan file directory --- libavcodec/vulkan_video.h | 102 -------------------------------------- 1 file changed, 102 deletions(-) delete mode 100644 libavcodec/vulkan_video.h diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h deleted file mode 100644 index 1b29f7adc7..0000000000 --- a/libavcodec/vulkan_video.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_VULKAN_VIDEO_H -#define AVCODEC_VULKAN_VIDEO_H - -#include "avcodec.h" -#include "libavutil/vulkan.h" - -#include <vk_video/vulkan_video_codecs_common.h> - -#define CODEC_VER_MAJ(ver) (ver >> 22) -#define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1)) -#define CODEC_VER_PAT(ver) (ver & ((1 << 12) - 1)) -#define CODEC_VER(ver) CODEC_VER_MAJ(ver), CODEC_VER_MIN(ver), CODEC_VER_PAT(ver) - -typedef struct FFVkVideoSession { - VkVideoSessionKHR session; - VkDeviceMemory *mem; - uint32_t nb_mem; - - VkSamplerYcbcrConversion yuv_sampler; - - AVBufferRef *dpb_hwfc_ref; - int layered_dpb; - AVFrame *layered_frame; - VkImageView layered_view; - VkImageAspectFlags layered_aspect; -} FFVkVideoCommon; - -/** - * Get pixfmt from a Vulkan format. - */ -enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf); - -/** - * Get aspect bits which include all planes from a VkFormat. - */ -VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf); - -/** - * Get Vulkan's chroma subsampling from a pixfmt descriptor. - */ -VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc); - -/** - * Get Vulkan's bit depth from an [8:12] integer. - */ -VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth); - -/** - * Convert level from Vulkan to AV. - */ -int ff_vk_h264_level_to_av(StdVideoH264LevelIdc level); -int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level); - -StdVideoH264LevelIdc ff_vk_h264_level_to_vk(int level_idc); -StdVideoH265LevelIdc ff_vk_h265_level_to_vk(int level_idc); -StdVideoAV1Level ff_vk_av1_level_to_vk(int level); - -/** - * Convert profile from/to AV to Vulkan - */ -StdVideoH264ProfileIdc ff_vk_h264_profile_to_vk(int profile); -StdVideoH265ProfileIdc ff_vk_h265_profile_to_vk(int profile); -StdVideoAV1Profile ff_vk_av1_profile_to_vk(int profile); - -/** - * Creates image views for video frames. - */ -int ff_vk_create_view(FFVulkanContext *s, FFVkVideoCommon *common, - VkImageView *view, VkImageAspectFlags *aspect, - AVVkFrame *src, VkFormat vkf, int is_dpb); - -/** - * Initialize video session, allocating and binding necessary memory. - */ -int ff_vk_video_common_init(AVCodecContext *avctx, FFVulkanContext *s, - FFVkVideoCommon *common, - VkVideoSessionCreateInfoKHR *session_create); - -/** - * Free video session and required resources. - */ -void ff_vk_video_common_uninit(FFVulkanContext *s, FFVkVideoCommon *common); - -#endif /* AVCODEC_VULKAN_VIDEO_H */ -- 2.49.1 From 8035b576c288705df92e6c71a4c35dc4b08b1630 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:44:11 +0000 Subject: [PATCH 094/118] Changing vulkan file directory --- libavcodec/vulkan/vulkan_video.h | 102 +++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 libavcodec/vulkan/vulkan_video.h diff --git a/libavcodec/vulkan/vulkan_video.h b/libavcodec/vulkan/vulkan_video.h new file mode 100644 index 0000000000..e460fe78b1 --- /dev/null +++ b/libavcodec/vulkan/vulkan_video.h @@ -0,0 +1,102 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VULKAN_VIDEO_H +#define AVCODEC_VULKAN_VIDEO_H + +#include "libavcodec/avcodec.h" +#include "../../libavutil/vulkan/vulkan.h" + +#include <vk_video/vulkan_video_codecs_common.h> + +#define CODEC_VER_MAJ(ver) (ver >> 22) +#define CODEC_VER_MIN(ver) ((ver >> 12) & ((1 << 10) - 1)) +#define CODEC_VER_PAT(ver) (ver & ((1 << 12) - 1)) +#define CODEC_VER(ver) CODEC_VER_MAJ(ver), CODEC_VER_MIN(ver), CODEC_VER_PAT(ver) + +typedef struct FFVkVideoSession { + VkVideoSessionKHR session; + VkDeviceMemory *mem; + uint32_t nb_mem; + + VkSamplerYcbcrConversion yuv_sampler; + + AVBufferRef *dpb_hwfc_ref; + int layered_dpb; + AVFrame *layered_frame; + VkImageView layered_view; + VkImageAspectFlags layered_aspect; +} FFVkVideoCommon; + +/** + * Get pixfmt from a Vulkan format. + */ +enum AVPixelFormat ff_vk_pix_fmt_from_vkfmt(VkFormat vkf); + +/** + * Get aspect bits which include all planes from a VkFormat. + */ +VkImageAspectFlags ff_vk_aspect_bits_from_vkfmt(VkFormat vkf); + +/** + * Get Vulkan's chroma subsampling from a pixfmt descriptor. + */ +VkVideoChromaSubsamplingFlagBitsKHR ff_vk_subsampling_from_av_desc(const AVPixFmtDescriptor *desc); + +/** + * Get Vulkan's bit depth from an [8:12] integer. + */ +VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth); + +/** + * Convert level from Vulkan to AV. + */ +int ff_vk_h264_level_to_av(StdVideoH264LevelIdc level); +int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level); + +StdVideoH264LevelIdc ff_vk_h264_level_to_vk(int level_idc); +StdVideoH265LevelIdc ff_vk_h265_level_to_vk(int level_idc); +StdVideoAV1Level ff_vk_av1_level_to_vk(int level); + +/** + * Convert profile from/to AV to Vulkan + */ +StdVideoH264ProfileIdc ff_vk_h264_profile_to_vk(int profile); +StdVideoH265ProfileIdc ff_vk_h265_profile_to_vk(int profile); +StdVideoAV1Profile ff_vk_av1_profile_to_vk(int profile); + +/** + * Creates image views for video frames. + */ +int ff_vk_create_view(FFVulkanContext *s, FFVkVideoCommon *common, + VkImageView *view, VkImageAspectFlags *aspect, + AVVkFrame *src, VkFormat vkf, int is_dpb); + +/** + * Initialize video session, allocating and binding necessary memory. + */ +int ff_vk_video_common_init(AVCodecContext *avctx, FFVulkanContext *s, + FFVkVideoCommon *common, + VkVideoSessionCreateInfoKHR *session_create); + +/** + * Free video session and required resources. + */ +void ff_vk_video_common_uninit(FFVulkanContext *s, FFVkVideoCommon *common); + +#endif /* AVCODEC_VULKAN_VIDEO_H */ -- 2.49.1 From dde06811eeee2711405f8097b7c28aea38a80860 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:44:32 +0000 Subject: [PATCH 095/118] Changing vulkan file directory --- libavcodec/vulkan/common.comp | 279 ---------------------------------- 1 file changed, 279 deletions(-) delete mode 100644 libavcodec/vulkan/common.comp diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp deleted file mode 100644 index 6825693fa3..0000000000 --- a/libavcodec/vulkan/common.comp +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -layout(buffer_reference, buffer_reference_align = 1) buffer u8buf { - uint8_t v; -}; - -layout(buffer_reference, buffer_reference_align = 1) buffer u8vec2buf { - u8vec2 v; -}; - -layout(buffer_reference, buffer_reference_align = 1) buffer u8vec4buf { - u8vec4 v; -}; - -layout(buffer_reference, buffer_reference_align = 2) buffer u16buf { - uint16_t v; -}; - -layout(buffer_reference, buffer_reference_align = 4) buffer u32buf { - uint32_t v; -}; - -layout(buffer_reference, buffer_reference_align = 4) buffer u32vec2buf { - u32vec2 v; -}; - -layout(buffer_reference, buffer_reference_align = 8) buffer u64buf { - uint64_t v; -}; - -#define OFFBUF(type, b, l) \ - type(uint64_t(b) + uint64_t(l)) - -#define zero_extend(a, p) \ - ((a) & ((1 << (p)) - 1)) - -#define sign_extend(val, bits) \ - bitfieldExtract(val, 0, bits) - -#define fold(diff, bits) \ - sign_extend(diff, bits) - -#define mid_pred(a, b, c) \ - max(min((a), (b)), min(max((a), (b)), (c))) - -/* TODO: optimize */ -uint align(uint src, uint a) -{ - uint res = src % a; - if (res == 0) - return src; - return src + a - res; -} - -/* TODO: optimize */ -uint64_t align64(uint64_t src, uint64_t a) -{ - uint64_t res = src % a; - if (res == 0) - return src; - return src + a - res; -} - -#define reverse4(src) \ - (pack32(unpack8(uint32_t(src)).wzyx)) - -u32vec2 reverse8(uint64_t src) -{ - u32vec2 tmp = unpack32(src); - tmp.x = reverse4(tmp.x); - tmp.y = reverse4(tmp.y); - return tmp.yx; -} - -#ifdef PB_32 -#define BIT_BUF_TYPE uint32_t -#define BUF_TYPE u32buf -#define BUF_REVERSE(src) reverse4(src) -#define BUF_BITS uint8_t(32) -#define BUF_BYTES uint8_t(4) -#define BYTE_EXTRACT(src, byte_off) \ - (uint8_t(bitfieldExtract((src), ((byte_off) << 3), 8))) -#else -#define BIT_BUF_TYPE uint64_t -#define BUF_TYPE u32vec2buf -#define BUF_REVERSE(src) reverse8(src) -#define BUF_BITS uint8_t(64) -#define BUF_BYTES uint8_t(8) -#define BYTE_EXTRACT(src, byte_off) \ - (uint8_t(((src) >> ((byte_off) << 3)) & 0xFF)) -#endif - -struct PutBitContext { - uint64_t buf_start; - uint64_t buf; - - BIT_BUF_TYPE bit_buf; - uint8_t bit_left; -}; - -void put_bits(inout PutBitContext pb, const uint32_t n, uint32_t value) -{ - if (n < pb.bit_left) { - pb.bit_buf = (pb.bit_buf << n) | value; - pb.bit_left -= uint8_t(n); - } else { - pb.bit_buf <<= pb.bit_left; - pb.bit_buf |= (value >> (n - pb.bit_left)); - -#ifdef PB_UNALIGNED - u8buf bs = u8buf(pb.buf); - [[unroll]] - for (uint8_t i = uint8_t(0); i < BUF_BYTES; i++) - bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i); -#else -#ifdef DEBUG - if ((pb.buf % BUF_BYTES) != 0) - debugPrintfEXT("put_bits buffer is not aligned!"); -#endif - - BUF_TYPE bs = BUF_TYPE(pb.buf); - bs.v = BUF_REVERSE(pb.bit_buf); -#endif - pb.buf = uint64_t(bs) + BUF_BYTES; - - pb.bit_left += BUF_BITS - uint8_t(n); - pb.bit_buf = value; - } -} - -uint32_t flush_put_bits(inout PutBitContext pb) -{ - /* Align bits to MSBs */ - if (pb.bit_left < BUF_BITS) - pb.bit_buf <<= pb.bit_left; - - if (pb.bit_left < BUF_BITS) { - uint to_write = ((BUF_BITS - pb.bit_left - 1) >> 3) + 1; - - u8buf bs = u8buf(pb.buf); - for (int i = 0; i < to_write; i++) - bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i); - pb.buf = uint64_t(bs) + to_write; - } - - pb.bit_left = BUF_BITS; - pb.bit_buf = 0x0; - - return uint32_t(pb.buf - pb.buf_start); -} - -void init_put_bits(out PutBitContext pb, u8buf data, uint64_t len) -{ - pb.buf_start = uint64_t(data); - pb.buf = uint64_t(data); - - pb.bit_buf = 0; - pb.bit_left = BUF_BITS; -} - -uint64_t put_bits_count(in PutBitContext pb) -{ - return (pb.buf - pb.buf_start)*8 + BUF_BITS - pb.bit_left; -} - -uint32_t put_bytes_count(in PutBitContext pb) -{ - uint64_t num_bytes = (pb.buf - pb.buf_start) + ((BUF_BITS - pb.bit_left) >> 3); - return uint32_t(num_bytes); -} - -struct GetBitContext { - uint64_t buf_start; - uint64_t buf; - uint64_t buf_end; - - uint64_t bits; - int bits_valid; - int size_in_bits; -}; - -#define LOAD64() \ - { \ - u8vec4buf ptr = u8vec4buf(gb.buf); \ - uint32_t rf1 = pack32((ptr[0].v).wzyx); \ - uint32_t rf2 = pack32((ptr[1].v).wzyx); \ - gb.buf += 8; \ - gb.bits = uint64_t(rf1) << 32 | uint64_t(rf2); \ - gb.bits_valid = 64; \ - } - -#define RELOAD32() \ - { \ - u8vec4buf ptr = u8vec4buf(gb.buf); \ - uint32_t rf = pack32((ptr[0].v).wzyx); \ - gb.buf += 4; \ - gb.bits = uint64_t(rf) << (32 - gb.bits_valid) | gb.bits; \ - gb.bits_valid += 32; \ - } - -void init_get_bits(inout GetBitContext gb, u8buf data, int len) -{ - gb.buf = gb.buf_start = uint64_t(data); - gb.buf_end = uint64_t(data) + len; - gb.size_in_bits = len * 8; - - /* Preload */ - LOAD64() -} - -bool get_bit(inout GetBitContext gb) -{ - if (gb.bits_valid == 0) - LOAD64() - - bool val = bool(gb.bits >> (64 - 1)); - gb.bits <<= 1; - gb.bits_valid--; - return val; -} - -uint get_bits(inout GetBitContext gb, int n) -{ - if (n == 0) - return 0; - - if (n > gb.bits_valid) - RELOAD32() - - uint val = uint(gb.bits >> (64 - n)); - gb.bits <<= n; - gb.bits_valid -= n; - return val; -} - -uint show_bits(inout GetBitContext gb, int n) -{ - if (n > gb.bits_valid) - RELOAD32() - - return uint(gb.bits >> (64 - n)); -} - -void skip_bits(inout GetBitContext gb, int n) -{ - if (n > gb.bits_valid) - RELOAD32() - - gb.bits <<= n; - gb.bits_valid -= n; -} - -int tell_bits(in GetBitContext gb) -{ - return int(gb.buf - gb.buf_start) * 8 - gb.bits_valid; -} - -int left_bits(in GetBitContext gb) -{ - return gb.size_in_bits - int(gb.buf - gb.buf_start) * 8 + gb.bits_valid; -} -- 2.49.1 From f241968840f63c796955f65c57591201100b6a3b Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:44:41 +0000 Subject: [PATCH 096/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_common.comp | 181 ----------------------------- 1 file changed, 181 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_common.comp diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp deleted file mode 100644 index 3d40592739..0000000000 --- a/libavcodec/vulkan/ffv1_common.comp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -struct SliceContext { - RangeCoder c; - -#if !defined(DECODE) - PutBitContext pb; /* 8*8 bytes */ -#else - GetBitContext gb; -#endif - - ivec2 slice_dim; - ivec2 slice_pos; - ivec2 slice_rct_coef; - u8vec3 quant_table_idx; - - uint hdr_len; // only used for golomb - - uint slice_coding_mode; - bool slice_reset_contexts; -}; - -/* -1, { -1, 0 } */ -int predict(int L, ivec2 top) -{ - return mid_pred(L, L + top[1] - top[0], top[1]); -} - -/* { -2, -1 }, { -1, 0, 1 }, 0 */ -int get_context(VTYPE2 cur_l, VTYPE3 top_l, TYPE top2, uint8_t quant_table_idx) -{ - const int LT = top_l[0]; /* -1 */ - const int T = top_l[1]; /* 0 */ - const int RT = top_l[2]; /* 1 */ - const int L = cur_l[1]; /* -1 */ - - int base = quant_table[quant_table_idx][0][(L - LT) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][1][(LT - T) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][2][(T - RT) & MAX_QUANT_TABLE_MASK]; - - if ((quant_table[quant_table_idx][3][127] == 0) && - (quant_table[quant_table_idx][4][127] == 0)) - return base; - - const int TT = top2; /* -2 */ - const int LL = cur_l[0]; /* -2 */ - return base + - quant_table[quant_table_idx][3][(LL - L) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][4][(TT - T) & MAX_QUANT_TABLE_MASK]; -} - -const uint32_t log2_run[41] = { - 0, 0, 0, 0, 1, 1, 1, 1, - 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 5, 5, 6, 6, 7, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, -}; - -uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) -{ - uint mpw = 1 << chroma_shift; - uint awidth = align(width, mpw); - - if ((version < 4) || ((version == 4) && (micro_version < 3))) - return width * sx / num_h_slices; - - sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; - if (sx == awidth) - sx = width; - - return sx; -} - -#ifdef RGB -#define RGB_LBUF (RGB_LINECACHE - 1) -#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF))) - -ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, - int comp, int sw, uint8_t quant_table_idx, bool extend_lookup) -{ - const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? off + ivec2(1, -1) : off; - - /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */ - VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, -1)))[comp]), - TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]), - TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp])); - - /* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must - * return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous - * row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */ - TYPE cur = TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, 0)))[comp]); - - int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; - - if (expectEXT(extend_lookup, false)) { - TYPE cur2 = TYPE(0); - if (expectEXT(off.x > 0, true)) { - const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0); - cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]); - } - base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; - - /* top-2 became current upon swap */ - TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(off))[comp]); - base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; - } - - /* context, prediction */ - return ivec2(base, predict(cur, VTYPE2(top))); -} - -#else /* RGB */ - -#define LADDR(p) (p) - -ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off, - int comp, int sw, uint8_t quant_table_idx, bool extend_lookup) -{ - const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); - sp += off; - - VTYPE3 top = VTYPE3(TYPE(0), - TYPE(0), - TYPE(0)); - if (off.y > 0 && off != ivec2(0, 1)) - top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]); - if (off.y > 0) { - top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]); - top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]); - } - - TYPE cur = TYPE(0); - if (off != ivec2(0, 0)) - cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]); - - int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + - quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; - - if (expectEXT(extend_lookup, false)) { - TYPE cur2 = TYPE(0); - if (off.x > 0 && off != ivec2(1, 0)) { - const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); - cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]); - } - base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; - - TYPE top2 = TYPE(0); - if (off.y > 1) - top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]); - base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK]; - } - - /* context, prediction */ - return ivec2(base, predict(cur, VTYPE2(top))); -} -#endif -- 2.49.1 From 568a7db96c8ab8035b6082a4791611b79f9f6443 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:44:50 +0000 Subject: [PATCH 097/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_dec.comp | 302 -------------------------------- 1 file changed, 302 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_dec.comp diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp deleted file mode 100644 index eb795dcba4..0000000000 --- a/libavcodec/vulkan/ffv1_dec.comp +++ /dev/null @@ -1,302 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef GOLOMB -#ifdef CACHED_SYMBOL_READER -shared uint8_t state[CONTEXT_SIZE]; -#define READ(c, off) get_rac_direct(c, state[off]) -#else -#define READ(c, off) get_rac(c, uint64_t(slice_state) + (state_off + off)) -#endif - -int get_isymbol(inout RangeCoder c, uint state_off) -{ - if (READ(c, 0)) - return 0; - - uint e = 1; - for (; e < 33; e++) - if (!READ(c, min(e, 10))) - break; - - if (expectEXT(e == 1, false)) { - return READ(c, 11) ? -1 : 1; - } else if (expectEXT(e == 33, false)) { - corrupt = true; - return 0; - } - - int a = 1; - for (uint i = e + 20; i >= 22; i--) { - a <<= 1; - a |= int(READ(c, min(i, 31))); - } - - return READ(c, min(e + 10, 21)) ? -a : a; -} - -void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits) -{ -#ifdef CACHED_SYMBOL_READER - if (gl_LocalInvocationID.x > 0) - return; -#endif - -#ifndef RGB - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } -#endif - - for (int x = 0; x < w; x++) { - uint v = 0; - for (int i = (bits - 1); i >= 0; i--) - v |= uint(get_rac_equi(sc.c)) << i; - - imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); - } -} - -void decode_line(inout SliceContext sc, ivec2 sp, int w, - int y, int p, int bits, uint state_off, - uint8_t quant_table_idx, const int run_index) -{ -#ifndef RGB - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } -#endif - - for (int x = 0; x < w; x++) { - ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, - quant_table_idx, extend_lookup[quant_table_idx] > 0); - - uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]); -#ifdef CACHED_SYMBOL_READER - u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x); - state[gl_LocalInvocationID.x] = sb.v; - barrier(); - if (gl_LocalInvocationID.x == 0) { - -#endif - - int diff = get_isymbol(sc.c, context_off); - if (pr[0] < 0) - diff = -diff; - - uint v = zero_extend(pr[1] + diff, bits); - imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); - -#ifdef CACHED_SYMBOL_READER - } - - barrier(); - sb.v = state[gl_LocalInvocationID.x]; -#endif - } -} - -#else /* GOLOMB */ - -void decode_line(inout SliceContext sc, ivec2 sp, int w, - int y, int p, int bits, uint state_off, - uint8_t quant_table_idx, inout int run_index) -{ -#ifndef RGB - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } -#endif - - int run_count = 0; - int run_mode = 0; - - for (int x = 0; x < w; x++) { - ivec2 pos = sp + ivec2(x, y); - int diff; - ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w, - quant_table_idx, extend_lookup[quant_table_idx] > 0); - - uint context_off = state_off + VLC_STATE_SIZE*abs(pr[0]); - VlcState sb = VlcState(uint64_t(slice_state) + context_off); - - if (pr[0] == 0 && run_mode == 0) - run_mode = 1; - - if (run_mode != 0) { - if (run_count == 0 && run_mode == 1) { - int tmp_idx = int(log2_run[run_index]); - if (get_bit(sc.gb)) { - run_count = 1 << tmp_idx; - if (x + run_count <= w) - run_index++; - } else { - if (tmp_idx != 0) { - run_count = int(get_bits(sc.gb, tmp_idx)); - } else - run_count = 0; - - if (run_index != 0) - run_index--; - run_mode = 2; - } - } - - run_count--; - if (run_count < 0) { - run_mode = 0; - run_count = 0; - diff = read_vlc_symbol(sc.gb, sb, bits); - if (diff >= 0) - diff++; - } else { - diff = 0; - } - } else { - diff = read_vlc_symbol(sc.gb, sb, bits); - } - - if (pr[0] < 0) - diff = -diff; - - uint v = zero_extend(pr[1] + diff, bits); - imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v)); - } -} -#endif - -#ifdef RGB -ivec4 transform_sample(ivec4 pix, ivec2 rct_coef) -{ - pix.b -= rct_offset; - pix.r -= rct_offset; - pix.g -= (pix.b*rct_coef.y + pix.r*rct_coef.x) >> 2; - pix.b += pix.g; - pix.r += pix.g; - return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], - pix[fmt_lut[2]], pix[fmt_lut[3]]); -} - -void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) -{ - for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { - ivec2 lpos = sp + LADDR(ivec2(x, y)); - ivec2 pos = sc.slice_pos + ivec2(x, y); - - ivec4 pix; - pix.r = int(imageLoad(dec[2], lpos)[0]); - pix.g = int(imageLoad(dec[0], lpos)[0]); - pix.b = int(imageLoad(dec[1], lpos)[0]); - if (transparency != 0) - pix.a = int(imageLoad(dec[3], lpos)[0]); - - if (expectEXT(apply_rct, true)) - pix = transform_sample(pix, sc.slice_rct_coef); - - imageStore(dst[0], pos, pix); - if (planar_rgb != 0) { - for (int i = 1; i < color_planes; i++) - imageStore(dst[i], pos, ivec4(pix[i])); - } - } -} -#endif - -void decode_slice(inout SliceContext sc, const uint slice_idx) -{ - int w = sc.slice_dim.x; - ivec2 sp = sc.slice_pos; - -#ifndef RGB - int bits = bits_per_raw_sample; -#else - int bits = 9; - if (bits != 8 || sc.slice_coding_mode != 0) - bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1); - - sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE; -#endif - - /* PCM coding */ -#ifndef GOLOMB - if (sc.slice_coding_mode == 1) { -#ifndef RGB - for (int p = 0; p < planes; p++) { - int h = sc.slice_dim.y; - if (p > 0 && p < 3) - h >>= chroma_shift.y; - - for (int y = 0; y < h; y++) - decode_line_pcm(sc, sp, w, y, p, bits); - } -#else - for (int y = 0; y < sc.slice_dim.y; y++) { - for (int p = 0; p < color_planes; p++) - decode_line_pcm(sc, sp, w, y, p, bits); - - writeout_rgb(sc, sp, w, y, false); - } -#endif - } else - - /* Arithmetic coding */ -#endif - { - u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; - u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; - -#ifndef RGB - for (int p = 0; p < planes; p++) { - int h = sc.slice_dim.y; - if (p > 0 && p < 3) - h >>= chroma_shift.y; - - int run_index = 0; - for (int y = 0; y < h; y++) - decode_line(sc, sp, w, y, p, bits, - slice_state_off[p], quant_table_idx[p], run_index); - } -#else - int run_index = 0; - for (int y = 0; y < sc.slice_dim.y; y++) { - for (int p = 0; p < color_planes; p++) - decode_line(sc, sp, w, y, p, bits, - slice_state_off[p], quant_table_idx[p], run_index); - - writeout_rgb(sc, sp, w, y, true); - } -#endif - } -} - -void main(void) -{ - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - decode_slice(slice_ctx[slice_idx], slice_idx); - - uint32_t status = corrupt ? uint32_t(corrupt) : overread; - if (status != 0) - slice_status[2*slice_idx + 1] = status; -} -- 2.49.1 From 9fa42bc2aa39806085cf6093a280f4e8b038e0ab Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:45:00 +0000 Subject: [PATCH 098/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_dec_setup.comp | 140 -------------------------- 1 file changed, 140 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_dec_setup.comp diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp deleted file mode 100644 index 5da09df21c..0000000000 --- a/libavcodec/vulkan/ffv1_dec_setup.comp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -uint8_t setup_state[CONTEXT_SIZE]; - -uint get_usymbol(inout RangeCoder c) -{ - if (get_rac_direct(c, setup_state[0])) - return 0; - - int e = 0; - while (get_rac_direct(c, setup_state[1 + min(e, 9)])) { // 1..10 - e++; - if (e > 31) { - corrupt = true; - return 0; - } - } - - uint a = 1; - for (int i = e - 1; i >= 0; i--) { - a <<= 1; - a |= uint(get_rac_direct(c, setup_state[22 + min(i, 9)])); // 22..31 - } - - return a; -} - -bool decode_slice_header(inout SliceContext sc) -{ - [[unroll]] - for (int i = 0; i < CONTEXT_SIZE; i++) - setup_state[i] = uint8_t(128); - - uint sx = get_usymbol(sc.c); - uint sy = get_usymbol(sc.c); - uint sw = get_usymbol(sc.c) + 1; - uint sh = get_usymbol(sc.c) + 1; - - if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 || - sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) || - corrupt) { - return true; - } - - /* Set coordinates */ - uint sxs = slice_coord(img_size.x, sx , gl_NumWorkGroups.x, chroma_shift.x); - uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x); - uint sys = slice_coord(img_size.y, sy , gl_NumWorkGroups.y, chroma_shift.y); - uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y); - - sc.slice_pos = ivec2(sxs, sys); - sc.slice_dim = ivec2(sxe - sxs, sye - sys); - sc.slice_rct_coef = ivec2(1, 1); - sc.slice_coding_mode = int(0); - - for (uint i = 0; i < codec_planes; i++) { - uint idx = get_usymbol(sc.c); - if (idx >= quant_table_count) - return true; - sc.quant_table_idx[i] = uint8_t(idx); - } - - get_usymbol(sc.c); - get_usymbol(sc.c); - get_usymbol(sc.c); - - if (version >= 4) { - sc.slice_reset_contexts = get_rac_direct(sc.c, setup_state[0]); - sc.slice_coding_mode = get_usymbol(sc.c); - if (sc.slice_coding_mode != 1 && colorspace == 1) { - sc.slice_rct_coef.x = int(get_usymbol(sc.c)); - sc.slice_rct_coef.y = int(get_usymbol(sc.c)); - if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4) - return true; - } - } - - return false; -} - -void golomb_init(inout SliceContext sc) -{ - if (version == 3 && micro_version > 1 || version > 3) { - setup_state[0] = uint8_t(129); - get_rac_direct(sc.c, setup_state[0]); - } - - uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1; - init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count), - int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count)); -} - -void main(void) -{ - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - - u8buf bs = u8buf(slice_data + slice_offsets[2*slice_idx + 0]); - uint32_t slice_size = slice_offsets[2*slice_idx + 1]; - - rac_init_dec(slice_ctx[slice_idx].c, - bs, slice_size); - - if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1)) - get_rac_equi(slice_ctx[slice_idx].c); - - decode_slice_header(slice_ctx[slice_idx]); - - if (golomb == 1) - golomb_init(slice_ctx[slice_idx]); - - if (ec != 0 && check_crc != 0) { - uint32_t crc = crcref; - for (int i = 0; i < slice_size; i++) - crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8); - - slice_status[2*slice_idx + 0] = crc; - } - - slice_status[2*slice_idx + 1] = corrupt ? uint32_t(corrupt) : overread; -} -- 2.49.1 From e05343395790ad698ea8bcae0ff95be1a1199568 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:45:09 +0000 Subject: [PATCH 099/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_enc.comp | 358 -------------------------------- 1 file changed, 358 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_enc.comp diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp deleted file mode 100644 index 78372f5b3a..0000000000 --- a/libavcodec/vulkan/ffv1_enc.comp +++ /dev/null @@ -1,358 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef GOLOMB -#ifdef CACHED_SYMBOL_READER -shared uint8_t state[CONTEXT_SIZE]; -#define WRITE(c, off, val) put_rac_direct(c, state[off], val) -#else -#define WRITE(c, off, val) put_rac(c, uint64_t(slice_state) + (state_off + off), val) -#endif - -/* Note - only handles signed values */ -void put_symbol(inout RangeCoder c, uint state_off, int v) -{ - bool is_nil = (v == 0); - WRITE(c, 0, is_nil); - if (is_nil) - return; - - const int a = abs(v); - const int e = findMSB(a); - - for (int i = 0; i < e; i++) - WRITE(c, 1 + min(i, 9), true); - WRITE(c, 1 + min(e, 9), false); - - for (int i = e - 1; i >= 0; i--) - WRITE(c, 22 + min(i, 9), bool(bitfieldExtract(a, i, 1))); - - WRITE(c, 22 - 11 + min(e, 10), v < 0); -} - -void encode_line_pcm(inout SliceContext sc, readonly uimage2D img, - ivec2 sp, int y, int p, int comp, int bits) -{ - int w = sc.slice_dim.x; - -#ifdef CACHED_SYMBOL_READER - if (gl_LocalInvocationID.x > 0) - return; -#endif - -#ifndef RGB - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } -#endif - - for (int x = 0; x < w; x++) { - uint v = imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]; - for (int i = (bits - 1); i >= 0; i--) - put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1))); - } -} - -void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off, - ivec2 sp, int y, int p, int comp, int bits, - uint8_t quant_table_idx, const int run_index) -{ - int w = sc.slice_dim.x; - -#ifndef RGB - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } -#endif - - for (int x = 0; x < w; x++) { - ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, - quant_table_idx, extend_lookup[quant_table_idx] > 0); - d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; - - if (d[0] < 0) - d = -d; - - d[1] = fold(d[1], bits); - - uint context_off = state_off + CONTEXT_SIZE*d[0]; -#ifdef CACHED_SYMBOL_READER - u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x); - state[gl_LocalInvocationID.x] = sb.v; - barrier(); - if (gl_LocalInvocationID.x == 0) -#endif - - put_symbol(sc.c, context_off, d[1]); - -#ifdef CACHED_SYMBOL_READER - barrier(); - sb.v = state[gl_LocalInvocationID.x]; -#endif - } -} - -#else /* GOLOMB */ - -void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off, - ivec2 sp, int y, int p, int comp, int bits, - uint8_t quant_table_idx, inout int run_index) -{ - int w = sc.slice_dim.x; - -#ifndef RGB - if (p > 0 && p < 3) { - w >>= chroma_shift.x; - sp >>= chroma_shift; - } -#endif - - int run_count = 0; - bool run_mode = false; - - for (int x = 0; x < w; x++) { - ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, - quant_table_idx, extend_lookup[quant_table_idx] > 0); - d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; - - if (d[0] < 0) - d = -d; - - d[1] = fold(d[1], bits); - - if (d[0] == 0) - run_mode = true; - - if (run_mode) { - if (d[1] != 0) { - /* A very unlikely loop */ - while (run_count >= 1 << log2_run[run_index]) { - run_count -= 1 << log2_run[run_index]; - run_index++; - put_bits(sc.pb, 1, 1); - } - - put_bits(sc.pb, 1 + log2_run[run_index], run_count); - if (run_index != 0) - run_index--; - run_count = 0; - run_mode = false; - if (d[1] > 0) - d[1]--; - } else { - run_count++; - } - } - - if (!run_mode) { - VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*d[0]); - Symbol sym = get_vlc_symbol(sb, d[1], bits); - put_bits(sc.pb, sym.bits, sym.val); - } - } - - if (run_mode) { - while (run_count >= (1 << log2_run[run_index])) { - run_count -= 1 << log2_run[run_index]; - run_index++; - put_bits(sc.pb, 1, 1); - } - - if (run_count > 0) - put_bits(sc.pb, 1, 1); - } -} -#endif - -#ifdef RGB -ivec4 load_components(ivec2 pos) -{ - ivec4 pix = ivec4(imageLoad(src[0], pos)); - if (planar_rgb != 0) { - for (int i = 1; i < (3 + transparency); i++) - pix[i] = int(imageLoad(src[i], pos)[0]); - } - - return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], - pix[fmt_lut[2]], pix[fmt_lut[3]]); -} - -void transform_sample(inout ivec4 pix, ivec2 rct_coef) -{ - pix.b -= pix.g; - pix.r -= pix.g; - pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; - pix.b += rct_offset; - pix.r += rct_offset; -} - -void preload_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) -{ - for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { - ivec2 lpos = sp + LADDR(ivec2(x, y)); - ivec2 pos = sc.slice_pos + ivec2(x, y); - - ivec4 pix = load_components(pos); - - if (expectEXT(apply_rct, true)) - transform_sample(pix, sc.slice_rct_coef); - - imageStore(tmp, lpos, pix); - } -} -#endif - -void encode_slice(inout SliceContext sc, const uint slice_idx) -{ - ivec2 sp = sc.slice_pos; - -#ifndef RGB - int bits = bits_per_raw_sample; -#else - int bits = 9; - if (bits != 8 || sc.slice_coding_mode != 0) - bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1); - - sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE; -#endif - -#ifndef GOLOMB - if (sc.slice_coding_mode == 1) { -#ifndef RGB - for (int c = 0; c < components; c++) { - - int h = sc.slice_dim.y; - if (c > 0 && c < 3) - h >>= chroma_shift.y; - - /* Takes into account dual-plane YUV formats */ - int p = min(c, planes - 1); - int comp = c - p; - - for (int y = 0; y < h; y++) - encode_line_pcm(sc, src[p], sp, y, p, comp, bits); - } -#else - for (int y = 0; y < sc.slice_dim.y; y++) { - preload_rgb(sc, sp, sc.slice_dim.x, y, false); - - encode_line_pcm(sc, tmp, sp, y, 0, 1, bits); - encode_line_pcm(sc, tmp, sp, y, 0, 2, bits); - encode_line_pcm(sc, tmp, sp, y, 0, 0, bits); - if (transparency == 1) - encode_line_pcm(sc, tmp, sp, y, 0, 3, bits); - } -#endif - } else -#endif - { - u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; - u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; - -#ifndef RGB - for (int c = 0; c < components; c++) { - int run_index = 0; - - int h = sc.slice_dim.y; - if (c > 0 && c < 3) - h >>= chroma_shift.y; - - int p = min(c, planes - 1); - int comp = c - p; - - for (int y = 0; y < h; y++) - encode_line(sc, src[p], slice_state_off[c], sp, y, p, - comp, bits, quant_table_idx[c], run_index); - } -#else - int run_index = 0; - for (int y = 0; y < sc.slice_dim.y; y++) { - preload_rgb(sc, sp, sc.slice_dim.x, y, true); - - encode_line(sc, tmp, slice_state_off[0], - sp, y, 0, 1, bits, quant_table_idx[0], run_index); - encode_line(sc, tmp, slice_state_off[1], - sp, y, 0, 2, bits, quant_table_idx[1], run_index); - encode_line(sc, tmp, slice_state_off[2], - sp, y, 0, 0, bits, quant_table_idx[2], run_index); - if (transparency == 1) - encode_line(sc, tmp, slice_state_off[3], - sp, y, 0, 3, bits, quant_table_idx[3], run_index); - } -#endif - } -} - -void finalize_slice(inout SliceContext sc, const uint slice_idx) -{ -#ifdef CACHED_SYMBOL_READER - if (gl_LocalInvocationID.x > 0) - return; -#endif - -#ifdef GOLOMB - uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb); -#else - uint32_t enc_len = rac_terminate(sc.c); -#endif - - u8buf bs = u8buf(sc.c.bytestream_start); - - /* Append slice length */ - u8vec4 enc_len_p = unpack8(enc_len); - bs[enc_len + 0].v = enc_len_p.z; - bs[enc_len + 1].v = enc_len_p.y; - bs[enc_len + 2].v = enc_len_p.x; - enc_len += 3; - - /* Calculate and write CRC */ - if (ec != 0) { - bs[enc_len].v = uint8_t(0); - enc_len++; - - uint32_t crc = crcref; - for (int i = 0; i < enc_len; i++) - crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8); - - if (crcref != 0x00000000) - crc ^= 0x8CD88196; - - u8vec4 crc_p = unpack8(crc); - bs[enc_len + 0].v = crc_p.x; - bs[enc_len + 1].v = crc_p.y; - bs[enc_len + 2].v = crc_p.z; - bs[enc_len + 3].v = crc_p.w; - enc_len += 4; - } - - slice_results[slice_idx*2 + 0] = enc_len; - slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data); -} - -void main(void) -{ - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - encode_slice(slice_ctx[slice_idx], slice_idx); - finalize_slice(slice_ctx[slice_idx], slice_idx); -} -- 2.49.1 From ee72295e48e812d416d1865b6336292d7a6f42b2 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:45:26 +0000 Subject: [PATCH 100/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_enc_rct.comp | 79 ----------------------------- 1 file changed, 79 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_enc_rct.comp diff --git a/libavcodec/vulkan/ffv1_enc_rct.comp b/libavcodec/vulkan/ffv1_enc_rct.comp deleted file mode 100644 index b611f4be98..0000000000 --- a/libavcodec/vulkan/ffv1_enc_rct.comp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -ivec4 load_components(ivec2 pos) -{ - ivec4 pix = ivec4(imageLoad(src[0], pos)); - if (planar_rgb != 0) { - for (int i = 1; i < (3 + transparency); i++) - pix[i] = int(imageLoad(src[i], pos)[0]); - } - - return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], - pix[fmt_lut[2]], pix[fmt_lut[3]]); -} - -void bypass_sample(ivec2 pos) -{ - imageStore(dst[0], pos, load_components(pos)); -} - -void bypass_block(in SliceContext sc) -{ - ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos; - ivec2 end = sc.slice_pos + sc.slice_dim; - for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y) - for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x) - bypass_sample(ivec2(x, y)); -} - -void transform_sample(ivec2 pos, ivec2 rct_coef) -{ - ivec4 pix = load_components(pos); - pix.b -= pix.g; - pix.r -= pix.g; - pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; - pix.b += offset; - pix.r += offset; - imageStore(dst[0], pos, pix); -} - -void transform_block(in SliceContext sc) -{ - const ivec2 rct_coef = sc.slice_rct_coef; - const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos; - const ivec2 end = sc.slice_pos + sc.slice_dim; - - for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y) - for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x) - transform_sample(ivec2(x, y), rct_coef); -} - -void main() -{ - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - - if (slice_ctx[slice_idx].slice_coding_mode == 1) - bypass_block(slice_ctx[slice_idx]); - else - transform_block(slice_ctx[slice_idx]); -} -- 2.49.1 From 24394e99ef5cef3d53e68498149ea7ebca4ea64a Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:45:42 +0000 Subject: [PATCH 101/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_enc_setup.comp | 126 -------------------------- 1 file changed, 126 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_enc_setup.comp diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp deleted file mode 100644 index 5f8e6704b0..0000000000 --- a/libavcodec/vulkan/ffv1_enc_setup.comp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -uint8_t state[CONTEXT_SIZE]; - -void init_slice(inout SliceContext sc, const uint slice_idx) -{ - /* Set coordinates */ - uvec2 img_size = imageSize(src[0]); - uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0, - gl_NumWorkGroups.x, chroma_shift.x); - uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1, - gl_NumWorkGroups.x, chroma_shift.x); - uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0, - gl_NumWorkGroups.y, chroma_shift.y); - uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1, - gl_NumWorkGroups.y, chroma_shift.y); - - sc.slice_pos = ivec2(sxs, sys); - sc.slice_dim = ivec2(sxe - sxs, sye - sys); - sc.slice_coding_mode = int(force_pcm == 1); - sc.slice_reset_contexts = sc.slice_coding_mode == 1; - sc.quant_table_idx = u8vec3(context_model); - - if ((rct_search == 0) || (sc.slice_coding_mode == 1)) - sc.slice_rct_coef = ivec2(1, 1); - - rac_init(sc.c, - OFFBUF(u8buf, out_data, slice_idx * slice_size_max), - slice_size_max); -} - -void put_usymbol(inout RangeCoder c, uint v) -{ - bool is_nil = (v == 0); - put_rac_direct(c, state[0], is_nil); - if (is_nil) - return; - - const int e = findMSB(v); - - for (int i = 0; i < e; i++) - put_rac_direct(c, state[1 + min(i, 9)], true); - put_rac_direct(c, state[1 + min(e, 9)], false); - - for (int i = e - 1; i >= 0; i--) - put_rac_direct(c, state[22 + min(i, 9)], bool(bitfieldExtract(v, i, 1))); -} - -void write_slice_header(inout SliceContext sc) -{ - [[unroll]] - for (int i = 0; i < CONTEXT_SIZE; i++) - state[i] = uint8_t(128); - - put_usymbol(sc.c, gl_WorkGroupID.x); - put_usymbol(sc.c, gl_WorkGroupID.y); - put_usymbol(sc.c, 0); - put_usymbol(sc.c, 0); - - for (int i = 0; i < codec_planes; i++) - put_usymbol(sc.c, sc.quant_table_idx[i]); - - put_usymbol(sc.c, pic_mode); - put_usymbol(sc.c, sar.x); - put_usymbol(sc.c, sar.y); - - if (version >= 4) { - put_rac_direct(sc.c, state[0], sc.slice_reset_contexts); - put_usymbol(sc.c, sc.slice_coding_mode); - if (sc.slice_coding_mode != 1 && colorspace == 1) { - put_usymbol(sc.c, sc.slice_rct_coef.y); - put_usymbol(sc.c, sc.slice_rct_coef.x); - } - } -} - -void write_frame_header(inout SliceContext sc) -{ - put_rac_equi(sc.c, bool(key_frame)); -} - -#ifdef GOLOMB -void init_golomb(inout SliceContext sc) -{ - sc.hdr_len = rac_terminate(sc.c); - init_put_bits(sc.pb, - OFFBUF(u8buf, sc.c.bytestream_start, sc.hdr_len), - slice_size_max - sc.hdr_len); -} -#endif - -void main(void) -{ - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - - init_slice(slice_ctx[slice_idx], slice_idx); - - if (slice_idx == 0) - write_frame_header(slice_ctx[slice_idx]); - - write_slice_header(slice_ctx[slice_idx]); - -#ifdef GOLOMB - init_golomb(slice_ctx[slice_idx]); -#endif -} -- 2.49.1 From e007175b4b786003be145c307de81c6690a3a7d0 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:45:58 +0000 Subject: [PATCH 102/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_rct.comp | 90 --------------------------------- 1 file changed, 90 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_rct.comp diff --git a/libavcodec/vulkan/ffv1_rct.comp b/libavcodec/vulkan/ffv1_rct.comp deleted file mode 100644 index b10bb47132..0000000000 --- a/libavcodec/vulkan/ffv1_rct.comp +++ /dev/null @@ -1,90 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -ivec4 load_components(ivec2 pos) -{ - ivec4 pix = ivec4(imageLoad(src[0], pos)); - if (planar_rgb != 0) { - for (int i = 1; i < (3 + transparency); i++) - pix[i] = int(imageLoad(src[i], pos)[0]); - } - - return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], - pix[fmt_lut[2]], pix[fmt_lut[3]]); -} - -void bypass_sample(ivec2 pos) -{ - imageStore(dst[0], pos, load_components(pos)); -} - -void bypass_block(in SliceContext sc) -{ - ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos; - ivec2 end = sc.slice_pos + sc.slice_dim; - for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y) - for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x) - bypass_sample(ivec2(x, y)); -} - -void transform_sample(ivec2 pos, ivec2 rct_coef) -{ - ivec4 pix = load_components(pos); - pix.b -= offset; - pix.r -= offset; - pix.g -= (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; - pix.b += pix.g; - pix.r += pix.g; - imageStore(dst[0], pos, pix); -} - -void transform_sample(ivec2 pos, ivec2 rct_coef) -{ - ivec4 pix = load_components(pos); - pix.b -= pix.g; - pix.r -= pix.g; - pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; - pix.b += offset; - pix.r += offset; - imageStore(dst[0], pos, pix); -} - -void transform_block(in SliceContext sc) -{ - const ivec2 rct_coef = sc.slice_rct_coef; - const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos; - const ivec2 end = sc.slice_pos + sc.slice_dim; - - for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y) - for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x) - transform_sample(ivec2(x, y), rct_coef); -} - -void main() -{ - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - - if (slice_ctx[slice_idx].slice_coding_mode == 1) - bypass_block(slice_ctx[slice_idx]); - else - transform_block(slice_ctx[slice_idx]); -} -- 2.49.1 From 1b7b18b6595d5ca5d27af05b4c203ecbe5034255 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:46:14 +0000 Subject: [PATCH 103/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_rct_search.comp | 139 ------------------------- 1 file changed, 139 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_rct_search.comp diff --git a/libavcodec/vulkan/ffv1_rct_search.comp b/libavcodec/vulkan/ffv1_rct_search.comp deleted file mode 100644 index 055bde46c4..0000000000 --- a/libavcodec/vulkan/ffv1_rct_search.comp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -ivec3 load_components(ivec2 pos) -{ - ivec3 pix = ivec3(imageLoad(src[0], pos)); - if (planar_rgb != 0) { - for (int i = 1; i < 3; i++) - pix[i] = int(imageLoad(src[i], pos)[0]); - } - - return ivec3(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]]); -} - -#define NUM_CHECKS 15 -const ivec2 rct_y_coeff[NUM_CHECKS] = { - ivec2(0, 0), // 4G - - ivec2(0, 1), // 3G + B - ivec2(1, 0), // R + 3G - ivec2(1, 1), // R + 2G + B - - ivec2(0, 2), // 2G + 2B - ivec2(2, 0), // 2R + 2G - ivec2(2, 2), // 2R + 2B - - ivec2(0, 3), // 1G + 3B - ivec2(3, 0), // 3R + 1G - - ivec2(0, 4), // 4B - ivec2(4, 0), // 4R - - ivec2(1, 2), // R + G + 2B - ivec2(2, 1), // 2R + G + B - - ivec2(3, 1), // 3R + B - ivec2(1, 3), // R + 3B -}; - -shared ivec3 pix_buf[gl_WorkGroupSize.x + 1][gl_WorkGroupSize.y + 1] = { }; - -ivec3 transform_sample(ivec3 pix, ivec2 rct_coef) -{ - pix.b -= pix.g; - pix.r -= pix.g; - pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; - pix.b += rct_offset; - pix.r += rct_offset; - return pix; -} - -uint get_dist(ivec3 cur) -{ - ivec3 LL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 1]; - ivec3 TL = pix_buf[gl_LocalInvocationID.x + 0][gl_LocalInvocationID.y + 0]; - ivec3 TT = pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 0]; - - ivec3 pred = ivec3(predict(LL.r, ivec2(TL.r, TT.r)), - predict(LL.g, ivec2(TL.g, TT.g)), - predict(LL.b, ivec2(TL.b, TT.b))); - - uvec3 c = abs(pred - cur); - return mid_pred(c.r, c.g, c.b); -} - -shared uint score_cols[gl_WorkGroupSize.y] = { }; -shared uint score_mode[16] = { }; - -void process(ivec2 pos) -{ - ivec3 pix = load_components(pos); - - for (int i = 0; i < NUM_CHECKS; i++) { - ivec3 tx_pix = transform_sample(pix, rct_y_coeff[i]); - pix_buf[gl_LocalInvocationID.x + 1][gl_LocalInvocationID.y + 1] = tx_pix; - memoryBarrierShared(); - - uint dist = get_dist(tx_pix); - atomicAdd(score_mode[i], dist); - } -} - -void coeff_search(inout SliceContext sc) -{ - uvec2 img_size = imageSize(src[0]); - uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0, - gl_NumWorkGroups.x, 0); - uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1, - gl_NumWorkGroups.x, 0); - uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0, - gl_NumWorkGroups.y, 0); - uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1, - gl_NumWorkGroups.y, 0); - - for (uint y = sys + gl_LocalInvocationID.y; y < sye; y += gl_WorkGroupSize.y) { - for (uint x = sxs + gl_LocalInvocationID.x; x < sxe; x += gl_WorkGroupSize.x) { - process(ivec2(x, y)); - } - } - - if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) { - uint min_score = 0xFFFFFFFF; - uint min_idx = 3; - for (int i = 0; i < NUM_CHECKS; i++) { - if (score_mode[i] < min_score) { - min_score = score_mode[i]; - min_idx = i; - } - } - sc.slice_rct_coef = rct_y_coeff[min_idx]; - } -} - -void main(void) -{ - if (force_pcm == 1) - return; - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - coeff_search(slice_ctx[slice_idx]); -} -- 2.49.1 From 10545089bfaf3a97f270af1c337f0f5b1e08fc8a Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:46:29 +0000 Subject: [PATCH 104/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_reset.comp | 57 ------------------------------- 1 file changed, 57 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_reset.comp diff --git a/libavcodec/vulkan/ffv1_reset.comp b/libavcodec/vulkan/ffv1_reset.comp deleted file mode 100644 index cfb7dcc444..0000000000 --- a/libavcodec/vulkan/ffv1_reset.comp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -void main(void) -{ - const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - - if (key_frame == 0 && - slice_ctx[slice_idx].slice_reset_contexts == false) - return; - - const uint8_t qidx = slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z]; - uint contexts = context_count[qidx]; - uint64_t slice_state_off = uint64_t(slice_state) + - slice_idx*plane_state_size*codec_planes; - -#ifdef GOLOMB - uint64_t start = slice_state_off + - (gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) + gl_LocalInvocationID.x)*VLC_STATE_SIZE; - for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) { - VlcState sb = VlcState(start); - sb.drift = int16_t(0); - sb.error_sum = uint16_t(4); - sb.bias = int8_t(0); - sb.count = uint8_t(1); - start += gl_WorkGroupSize.x*VLC_STATE_SIZE; - } -#else - uint64_t start = slice_state_off + - gl_WorkGroupID.z*plane_state_size + - (gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */ - uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */); - for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) { - u32buf(start).v = 0x80808080; - start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */); - } -#endif -} -- 2.49.1 From 9ad230315c5911e06966ecae75170a126489fc5b Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:46:39 +0000 Subject: [PATCH 105/118] Changing vulkan file directory --- libavcodec/vulkan/ffv1_vlc.comp | 159 -------------------------------- 1 file changed, 159 deletions(-) delete mode 100644 libavcodec/vulkan/ffv1_vlc.comp diff --git a/libavcodec/vulkan/ffv1_vlc.comp b/libavcodec/vulkan/ffv1_vlc.comp deleted file mode 100644 index 32a6ca9f37..0000000000 --- a/libavcodec/vulkan/ffv1_vlc.comp +++ /dev/null @@ -1,159 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#define VLC_STATE_SIZE 8 -layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer VlcState { - uint32_t error_sum; - int16_t drift; - int8_t bias; - uint8_t count; -}; - -void update_vlc_state(inout VlcState state, const int v) -{ - int drift = state.drift; - int count = state.count; - int bias = state.bias; - state.error_sum += uint16_t(abs(v)); - drift += v; - - if (count == 128) { // FIXME: variable - count >>= 1; - drift >>= 1; - state.error_sum >>= 1; - } - count++; - - if (drift <= -count) { - bias = max(bias - 1, -128); - drift = max(drift + count, -count + 1); - } else if (drift > 0) { - bias = min(bias + 1, 127); - drift = min(drift - count, 0); - } - - state.bias = int8_t(bias); - state.drift = int16_t(drift); - state.count = uint8_t(count); -} - -struct Symbol { - uint32_t bits; - uint32_t val; -}; - -Symbol set_ur_golomb(int i, int k, int limit, int esc_len) -{ - int e; - Symbol sym; - -#ifdef DEBUG - if (i < 0) - debugPrintfEXT("Error: i is zero!"); -#endif - - e = i >> k; - if (e < limit) { - sym.bits = e + k + 1; - sym.val = (1 << k) + zero_extend(i, k); - } else { - sym.bits = limit + esc_len; - sym.val = i - limit + 1; - } - - return sym; -} - -/** - * write signed golomb rice code (ffv1). - */ -Symbol set_sr_golomb(int i, int k, int limit, int esc_len) -{ - int v; - - v = -2 * i - 1; - v ^= (v >> 31); - - return set_ur_golomb(v, k, limit, esc_len); -} - -Symbol get_vlc_symbol(inout VlcState state, int v, int bits) -{ - int i, k, code; - Symbol sym; - v = fold(v - int(state.bias), bits); - - i = state.count; - k = 0; - while (i < state.error_sum) { // FIXME: optimize - k++; - i += i; - } - -#ifdef DEBUG - if (k > 16) - debugPrintfEXT("Error: k > 16!"); -#endif - - code = v ^ ((2 * state.drift + state.count) >> 31); - - update_vlc_state(state, v); - - return set_sr_golomb(code, k, 12, bits); -} - -uint get_ur_golomb(inout GetBitContext gb, int k, int limit, int esc_len) -{ - for (uint i = 0; i < 12; i++) - if (get_bit(gb)) - return get_bits(gb, k) + (i << k); - - return get_bits(gb, esc_len) + 11; -} - -int get_sr_golomb(inout GetBitContext gb, int k, int limit, int esc_len) -{ - int v = int(get_ur_golomb(gb, k, limit, esc_len)); - return (v >> 1) ^ -(v & 1); -} - -int read_vlc_symbol(inout GetBitContext gb, inout VlcState state, int bits) -{ - int k, i, v, ret; - - i = state.count; - k = 0; - while (i < state.error_sum) { // FIXME: optimize - k++; - i += i; - } - - v = get_sr_golomb(gb, k, 12, bits); - - v ^= ((2 * state.drift + state.count) >> 31); - - ret = fold(v + state.bias, bits); - - update_vlc_state(state, v); - - return ret; -} -- 2.49.1 From 8c5ff2dfea361bda7859ac22a0c39b109cf29e64 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:46:57 +0000 Subject: [PATCH 106/118] Changing vulkan file directory --- libavcodec/vulkan/prores_raw.comp | 347 ------------------------------ 1 file changed, 347 deletions(-) delete mode 100644 libavcodec/vulkan/prores_raw.comp diff --git a/libavcodec/vulkan/prores_raw.comp b/libavcodec/vulkan/prores_raw.comp deleted file mode 100644 index 89eece3c7e..0000000000 --- a/libavcodec/vulkan/prores_raw.comp +++ /dev/null @@ -1,347 +0,0 @@ -/* - * ProRes RAW decoder - * - * Copyright (c) 2025 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#define I16(x) (int16_t(x)) - -#define COMP_ID (gl_LocalInvocationID.z) -#define BLOCK_ID (gl_LocalInvocationID.y) -#define ROW_ID (gl_LocalInvocationID.x) - -GetBitContext gb; -shared float btemp[gl_WorkGroupSize.z][16][64] = { }; -shared float block[gl_WorkGroupSize.z][16][64]; - -void idct8_horiz(const uint row_id) -{ - float t0, t1, t2, t3, t4, t5, t6, t7, u8; - float u0, u1, u2, u3, u4, u5, u6, u7; - - /* Input */ - t0 = block[COMP_ID][BLOCK_ID][8*row_id + 0]; - u4 = block[COMP_ID][BLOCK_ID][8*row_id + 1]; - t2 = block[COMP_ID][BLOCK_ID][8*row_id + 2]; - u6 = block[COMP_ID][BLOCK_ID][8*row_id + 3]; - t1 = block[COMP_ID][BLOCK_ID][8*row_id + 4]; - u5 = block[COMP_ID][BLOCK_ID][8*row_id + 5]; - t3 = block[COMP_ID][BLOCK_ID][8*row_id + 6]; - u7 = block[COMP_ID][BLOCK_ID][8*row_id + 7]; - - /* Embedded scaled inverse 4-point Type-II DCT */ - u0 = t0 + t1; - u1 = t0 - t1; - u3 = t2 + t3; - u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3; - t0 = u0 + u3; - t3 = u0 - u3; - t1 = u1 + u2; - t2 = u1 - u2; - - /* Embedded scaled inverse 4-point Type-IV DST */ - t5 = u5 + u6; - t6 = u5 - u6; - t7 = u4 + u7; - t4 = u4 - u7; - u7 = t7 + t5; - u5 = (t7 - t5)*(1.4142135623730950488016887242097f); - u8 = (t4 + t6)*(1.8477590650225735122563663787936f); - u4 = u8 - t4*(1.0823922002923939687994464107328f); - u6 = u8 - t6*(2.6131259297527530557132863468544f); - t7 = u7; - t6 = t7 - u6; - t5 = t6 + u5; - t4 = t5 - u4; - - /* Butterflies */ - u0 = t0 + t7; - u7 = t0 - t7; - u6 = t1 + t6; - u1 = t1 - t6; - u2 = t2 + t5; - u5 = t2 - t5; - u4 = t3 + t4; - u3 = t3 - t4; - - /* Output */ - btemp[COMP_ID][BLOCK_ID][0*8 + row_id] = u0; - btemp[COMP_ID][BLOCK_ID][1*8 + row_id] = u1; - btemp[COMP_ID][BLOCK_ID][2*8 + row_id] = u2; - btemp[COMP_ID][BLOCK_ID][3*8 + row_id] = u3; - btemp[COMP_ID][BLOCK_ID][4*8 + row_id] = u4; - btemp[COMP_ID][BLOCK_ID][5*8 + row_id] = u5; - btemp[COMP_ID][BLOCK_ID][6*8 + row_id] = u6; - btemp[COMP_ID][BLOCK_ID][7*8 + row_id] = u7; -} - -void idct8_vert(const uint row_id) -{ - float t0, t1, t2, t3, t4, t5, t6, t7, u8; - float u0, u1, u2, u3, u4, u5, u6, u7; - - /* Input */ - t0 = btemp[COMP_ID][BLOCK_ID][8*row_id + 0] + 0.5f; // NOTE - u4 = btemp[COMP_ID][BLOCK_ID][8*row_id + 1]; - t2 = btemp[COMP_ID][BLOCK_ID][8*row_id + 2]; - u6 = btemp[COMP_ID][BLOCK_ID][8*row_id + 3]; - t1 = btemp[COMP_ID][BLOCK_ID][8*row_id + 4]; - u5 = btemp[COMP_ID][BLOCK_ID][8*row_id + 5]; - t3 = btemp[COMP_ID][BLOCK_ID][8*row_id + 6]; - u7 = btemp[COMP_ID][BLOCK_ID][8*row_id + 7]; - - /* Embedded scaled inverse 4-point Type-II DCT */ - u0 = t0 + t1; - u1 = t0 - t1; - u3 = t2 + t3; - u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3; - t0 = u0 + u3; - t3 = u0 - u3; - t1 = u1 + u2; - t2 = u1 - u2; - - /* Embedded scaled inverse 4-point Type-IV DST */ - t5 = u5 + u6; - t6 = u5 - u6; - t7 = u4 + u7; - t4 = u4 - u7; - u7 = t7 + t5; - u5 = (t7 - t5)*(1.4142135623730950488016887242097f); - u8 = (t4 + t6)*(1.8477590650225735122563663787936f); - u4 = u8 - t4*(1.0823922002923939687994464107328f); - u6 = u8 - t6*(2.6131259297527530557132863468544f); - t7 = u7; - t6 = t7 - u6; - t5 = t6 + u5; - t4 = t5 - u4; - - /* Butterflies */ - u0 = t0 + t7; - u7 = t0 - t7; - u6 = t1 + t6; - u1 = t1 - t6; - u2 = t2 + t5; - u5 = t2 - t5; - u4 = t3 + t4; - u3 = t3 - t4; - - /* Output */ - block[COMP_ID][BLOCK_ID][0*8 + row_id] = u0; - block[COMP_ID][BLOCK_ID][1*8 + row_id] = u1; - block[COMP_ID][BLOCK_ID][2*8 + row_id] = u2; - block[COMP_ID][BLOCK_ID][3*8 + row_id] = u3; - block[COMP_ID][BLOCK_ID][4*8 + row_id] = u4; - block[COMP_ID][BLOCK_ID][5*8 + row_id] = u5; - block[COMP_ID][BLOCK_ID][6*8 + row_id] = u6; - block[COMP_ID][BLOCK_ID][7*8 + row_id] = u7; -} - -int16_t get_value(int16_t codebook) -{ - const int16_t switch_bits = codebook >> 8; - const int16_t rice_order = codebook & I16(0xf); - const int16_t exp_order = (codebook >> 4) & I16(0xf); - - uint32_t b = show_bits(gb, 32); - if (expectEXT(b == 0, false)) - return I16(0); - int16_t q = I16(31) - I16(findMSB(b)); - - if ((b & 0x80000000) != 0) { - skip_bits(gb, 1 + rice_order); - return I16((b & 0x7FFFFFFF) >> (31 - rice_order)); - } - - if (q <= switch_bits) { - skip_bits(gb, q + rice_order + 1); - return I16((q << rice_order) + - (((b << (q + 1)) >> 1) >> (31 - rice_order))); - } - - int16_t bits = exp_order + (q << 1) - switch_bits; - skip_bits(gb, bits); - return I16((b >> (32 - bits)) + - ((switch_bits + 1) << rice_order) - - (1 << exp_order)); -} - -#define TODCCODEBOOK(x) ((x + 1) >> 1) - -void read_dc_vals(const uint nb_blocks) -{ - int16_t dc, dc_add; - int16_t prev_dc = I16(0), sign = I16(0); - - /* Special handling for first block */ - dc = get_value(I16(700)); - prev_dc = (dc >> 1) ^ -(dc & I16(1)); - btemp[COMP_ID][0][0] = prev_dc; - - for (uint n = 1; n < nb_blocks; n++) { - if (expectEXT(left_bits(gb) <= 0, false)) - break; - - uint8_t dc_codebook; - if ((n & 15) == 1) - dc_codebook = uint8_t(100); - else - dc_codebook = dc_cb[min(TODCCODEBOOK(dc), 13 - 1)]; - - dc = get_value(dc_codebook); - - sign = sign ^ dc & int16_t(1); - dc_add = (-sign ^ I16(TODCCODEBOOK(dc))) + sign; - sign = I16(dc_add < 0); - prev_dc += dc_add; - - btemp[COMP_ID][n][0] = prev_dc; - } -} - -void read_ac_vals(const uint nb_blocks) -{ - const uint nb_codes = nb_blocks << 6; - const uint log2_nb_blocks = findMSB(nb_blocks); - const uint block_mask = (1 << log2_nb_blocks) - 1; - - int16_t ac, rn, ln; - int16_t ac_codebook = I16(49); - int16_t rn_codebook = I16( 0); - int16_t ln_codebook = I16(66); - int16_t sign; - int16_t val; - - for (uint n = nb_blocks; n <= nb_codes;) { - if (expectEXT(left_bits(gb) <= 0, false)) - break; - - ln = get_value(ln_codebook); - for (uint i = 0; i < ln; i++) { - if (expectEXT(left_bits(gb) <= 0, false)) - break; - - if (expectEXT(n >= nb_codes, false)) - break; - - ac = get_value(ac_codebook); - ac_codebook = ac_cb[min(ac, 95 - 1)]; - sign = -int16_t(get_bit(gb)); - - val = ((ac + I16(1)) ^ sign) - sign; - btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val; - - n++; - } - - if (expectEXT(n >= nb_codes, false)) - break; - - rn = get_value(rn_codebook); - rn_codebook = rn_cb[min(rn, 28 - 1)]; - - n += rn + 1; - if (expectEXT(n >= nb_codes, false)) - break; - - if (expectEXT(left_bits(gb) <= 0, false)) - break; - - ac = get_value(ac_codebook); - sign = -int16_t(get_bit(gb)); - - val = ((ac + I16(1)) ^ sign) - sign; - btemp[COMP_ID][n & block_mask][n >> log2_nb_blocks] = val; - - ac_codebook = ac_cb[min(ac, 95 - 1)]; - ln_codebook = ln_cb[min(ac, 15 - 1)]; - - n++; - } -} - -void main(void) -{ - const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - TileData td = tile_data[tile_idx]; - - if (expectEXT(td.pos.x >= frame_size.x, false)) - return; - - uint64_t pkt_offset = uint64_t(pkt_data) + td.offset; - u8vec2buf hdr_data = u8vec2buf(pkt_offset); - float qscale = float(pack16(hdr_data[0].v.yx)) / 2.0f; - - ivec4 size = ivec4(td.size, - pack16(hdr_data[2].v.yx), - pack16(hdr_data[1].v.yx), - pack16(hdr_data[3].v.yx)); - size[0] = size[0] - size[1] - size[2] - size[3] - 8; - if (expectEXT(size[0] < 0, false)) - return; - - const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1); - const uint w = min(tile_size.x, frame_size.x - td.pos.x) / 2; - const uint nb_blocks = w / 8; - - const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3], - size[2], - 0, - size[2] + size[1]); - - if (BLOCK_ID == 0 && ROW_ID == 0) { - init_get_bits(gb, u8buf(pkt_offset + 8 + comp_offset[COMP_ID]), - size[COMP_ID]); - read_dc_vals(nb_blocks); - read_ac_vals(nb_blocks); - } - - barrier(); - - [[unroll]] - for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x) - block[COMP_ID][BLOCK_ID][i] = (btemp[COMP_ID][BLOCK_ID][scan[i]] / 16384.0) * - (float(qmat[i]) / 295.0) * - idct_8x8_scales[i] * qscale; - - barrier(); - -#ifdef PARALLEL_ROWS - idct8_horiz(ROW_ID); - - barrier(); - - idct8_vert(ROW_ID); -#else - for (uint j = 0; j < 8; j++) - idct8_horiz(j); - - barrier(); - - for (uint j = 0; j < 8; j++) - idct8_vert(j); -#endif - - barrier(); - - [[unroll]] - for (uint i = gl_LocalInvocationID.x; i < 64; i += gl_WorkGroupSize.x) - imageStore(dst, - offs + 2*ivec2(BLOCK_ID*8 + (i & 7), i >> 3), - vec4(block[COMP_ID][BLOCK_ID][i])); -} -- 2.49.1 From 1968ad0cfccd3c08ee9ef591eb60a04e2ef8e503 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 13:47:07 +0000 Subject: [PATCH 107/118] Changing vulkan file directory --- libavcodec/vulkan/rangecoder.comp | 241 ------------------------------ 1 file changed, 241 deletions(-) delete mode 100644 libavcodec/vulkan/rangecoder.comp diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp deleted file mode 100644 index b6b6c0490f..0000000000 --- a/libavcodec/vulkan/rangecoder.comp +++ /dev/null @@ -1,241 +0,0 @@ -/* - * FFv1 codec - * - * Copyright (c) 2024 Lynne <dev@lynne.ee> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -struct RangeCoder { - uint64_t bytestream_start; - uint64_t bytestream; - uint64_t bytestream_end; - - int low; - int range; - uint16_t outstanding_count; - uint8_t outstanding_byte; -}; - -#ifdef FULL_RENORM -/* Full renorm version that can handle outstanding_byte == 0xFF */ -void renorm_encoder(inout RangeCoder c) -{ - int bs_cnt = 0; - u8buf bytestream = u8buf(c.bytestream); - - if (c.outstanding_byte == 0xFF) { - c.outstanding_byte = uint8_t(c.low >> 8); - } else if (c.low <= 0xFF00) { - bytestream[bs_cnt++].v = c.outstanding_byte; - uint16_t cnt = c.outstanding_count; - for (; cnt > 0; cnt--) - bytestream[bs_cnt++].v = uint8_t(0xFF); - c.outstanding_count = uint16_t(0); - c.outstanding_byte = uint8_t(c.low >> 8); - } else if (c.low >= 0x10000) { - bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1); - uint16_t cnt = c.outstanding_count; - for (; cnt > 0; cnt--) - bytestream[bs_cnt++].v = uint8_t(0x00); - c.outstanding_count = uint16_t(0); - c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8)); - } else { - c.outstanding_count++; - } - - c.bytestream += bs_cnt; - c.range <<= 8; - c.low = bitfieldInsert(0, c.low, 8, 8); -} - -#else - -/* Cannot deal with outstanding_byte == -1 in the name of speed */ -void renorm_encoder(inout RangeCoder c) -{ - uint16_t oc = c.outstanding_count + uint16_t(1); - int low = c.low; - - c.range <<= 8; - c.low = bitfieldInsert(0, low, 8, 8); - - if (low > 0xFF00 && low < 0x10000) { - c.outstanding_count = oc; - return; - } - - u8buf bs = u8buf(c.bytestream); - uint8_t outstanding_byte = c.outstanding_byte; - - c.bytestream = uint64_t(bs) + oc; - c.outstanding_count = uint16_t(0); - c.outstanding_byte = uint8_t(low >> 8); - - uint8_t obs = uint8_t(low > 0xFF00); - uint8_t fill = obs - uint8_t(1); /* unsigned underflow */ - - bs[0].v = outstanding_byte + obs; - for (int i = 1; i < oc; i++) - bs[i].v = fill; -} -#endif - -void put_rac_internal(inout RangeCoder c, const int range1, bool bit) -{ -#ifdef DEBUG - if (range1 >= c.range) - debugPrintfEXT("Error: range1 >= c.range"); - if (range1 <= 0) - debugPrintfEXT("Error: range1 <= 0"); -#endif - - int ranged = c.range - range1; - c.low += bit ? ranged : 0; - c.range = bit ? range1 : ranged; - - if (expectEXT(c.range < 0x100, false)) - renorm_encoder(c); -} - -void put_rac_direct(inout RangeCoder c, inout uint8_t state, bool bit) -{ - put_rac_internal(c, (c.range * state) >> 8, bit); - state = zero_one_state[(uint(bit) << 8) + state]; -} - -void put_rac(inout RangeCoder c, uint64_t state, bool bit) -{ - put_rac_direct(c, u8buf(state).v, bit); -} - -/* Equiprobable bit */ -void put_rac_equi(inout RangeCoder c, bool bit) -{ - put_rac_internal(c, c.range >> 1, bit); -} - -void put_rac_terminate(inout RangeCoder c) -{ - int range1 = (c.range * 129) >> 8; - -#ifdef DEBUG - if (range1 >= c.range) - debugPrintfEXT("Error: range1 >= c.range"); - if (range1 <= 0) - debugPrintfEXT("Error: range1 <= 0"); -#endif - - c.range -= range1; - if (expectEXT(c.range < 0x100, false)) - renorm_encoder(c); -} - -/* Return the number of bytes written. */ -uint32_t rac_terminate(inout RangeCoder c) -{ - put_rac_terminate(c); - c.range = uint16_t(0xFF); - c.low += 0xFF; - renorm_encoder(c); - c.range = uint16_t(0xFF); - renorm_encoder(c); - -#ifdef DEBUG - if (c.low != 0) - debugPrintfEXT("Error: c.low != 0"); - if (c.range < 0x100) - debugPrintfEXT("Error: range < 0x100"); -#endif - - return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start)); -} - -void rac_init(out RangeCoder r, u8buf data, uint buf_size) -{ - r.bytestream_start = uint64_t(data); - r.bytestream = uint64_t(data); - r.bytestream_end = uint64_t(data) + buf_size; - r.low = 0; - r.range = 0xFF00; - r.outstanding_count = uint16_t(0); - r.outstanding_byte = uint8_t(0xFF); -} - -/* Decoder */ -uint overread = 0; -bool corrupt = false; - -void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size) -{ - overread = 0; - corrupt = false; - - /* Skip priming bytes */ - rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2); - - u8vec2 prime = u8vec2buf(data).v; - /* Switch endianness of the priming bytes */ - r.low = pack16(prime.yx); - - if (r.low >= 0xFF00) { - r.low = 0xFF00; - r.bytestream_end = uint64_t(data) + 2; - } -} - -void refill(inout RangeCoder c) -{ - c.range <<= 8; - c.low <<= 8; - if (expectEXT(c.bytestream < c.bytestream_end, false)) { - c.low |= u8buf(c.bytestream).v; - c.bytestream++; - } else { - overread++; - } -} - -bool get_rac_internal(inout RangeCoder c, const int range1) -{ - int ranged = c.range - range1; - bool bit = c.low >= ranged; - c.low -= bit ? ranged : 0; - c.range = (bit ? 0 : ranged) + (bit ? range1 : 0); - - if (expectEXT(c.range < 0x100, false)) - refill(c); - - return bit; -} - -bool get_rac_direct(inout RangeCoder c, inout uint8_t state) -{ - bool bit = get_rac_internal(c, c.range * state >> 8); - state = zero_one_state[state + (bit ? 256 : 0)]; - return bit; -} - -bool get_rac(inout RangeCoder c, uint64_t state) -{ - return get_rac_direct(c, u8buf(state).v); -} - -bool get_rac_equi(inout RangeCoder c) -{ - return get_rac_internal(c, c.range >> 1); -} -- 2.49.1 From 31b2af9037bc07922a9fa8f4b122496adb678861 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:07:07 +0000 Subject: [PATCH 108/118] Changing vulkan file directory --- libavfilter/vulkan_glslang.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/vulkan_glslang.c b/libavfilter/vulkan_glslang.c index 9aa41567a3..31e7414e90 100644 --- a/libavfilter/vulkan_glslang.c +++ b/libavfilter/vulkan_glslang.c @@ -16,4 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/vulkan_glslang.c" +#include "libavutil/vulkan/vulkan_glslang.c" -- 2.49.1 From 7301c8ecbb489974974f195b33f4f5dd7b28ea97 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:07:55 +0000 Subject: [PATCH 109/118] Changing vulkan file directory --- libavfilter/vulkan_shaderc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c index 9f60bf4dfd..6b80cdb111 100644 --- a/libavfilter/vulkan_shaderc.c +++ b/libavfilter/vulkan_shaderc.c @@ -16,4 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/vulkan_shaderc.c" +#include "libavutil/vulkan/vulkan_shaderc.c" -- 2.49.1 From a8adcc58e28fa81d1ae3b8f5fecf73516b524af6 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:08:43 +0000 Subject: [PATCH 110/118] Changing vulkan file directory --- libavfilter/vulkan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c index fc8a1fa47b..f13a59e6d3 100644 --- a/libavfilter/vulkan.c +++ b/libavfilter/vulkan.c @@ -16,4 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/vulkan.c" +#include "libavutil/vulkan/vulkan.c" -- 2.49.1 From d7e2e987998eabd0f5b37f8e6d48acbd722cfce3 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:09:23 +0000 Subject: [PATCH 111/118] Changing vulkan file directory --- libavcodec/vulkan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/vulkan.c b/libavcodec/vulkan.c index fc8a1fa47b..f13a59e6d3 100644 --- a/libavcodec/vulkan.c +++ b/libavcodec/vulkan.c @@ -16,4 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/vulkan.c" +#include "libavutil/vulkan/vulkan.c" -- 2.49.1 From ddd6d34d3f8d3cf6e7ba158c9108bb79f9cbb0dd Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:09:55 +0000 Subject: [PATCH 112/118] Changing vulkan file directory --- libavcodec/vulkan_glslang.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/vulkan_glslang.c b/libavcodec/vulkan_glslang.c index 9aa41567a3..31e7414e90 100644 --- a/libavcodec/vulkan_glslang.c +++ b/libavcodec/vulkan_glslang.c @@ -16,4 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/vulkan_glslang.c" +#include "libavutil/vulkan/vulkan_glslang.c" -- 2.49.1 From 11c667d644a273439210626809821532f1867def Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:10:40 +0000 Subject: [PATCH 113/118] Changing vulkan file directory --- libavcodec/vulkan_shaderc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/vulkan_shaderc.c b/libavcodec/vulkan_shaderc.c index 9f60bf4dfd..6b80cdb111 100644 --- a/libavcodec/vulkan_shaderc.c +++ b/libavcodec/vulkan_shaderc.c @@ -16,4 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/vulkan_shaderc.c" +#include "libavutil/vulkan/vulkan_shaderc.c" -- 2.49.1 From c19c4c852935bcc1a455b34368d0d0c21e61e5e4 Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:16:36 +0000 Subject: [PATCH 114/118] Changing vulkan file directory --- libavcodec/vulkan/Makefile | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index d8e1471fa6..bf6c7c81f3 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -3,19 +3,11 @@ GEN_CLEANSUFFIXES = *.o *.c *.d clean:: $(RM) $(GEN_CLEANSUFFIXES:%=libavcodec/vulkan/%) -OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \ - vulkan/rangecoder.o vulkan/ffv1_vlc.o \ - vulkan/ffv1_common.o vulkan/ffv1_reset.o \ - vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_setup.o \ - vulkan/ffv1_rct_search.o vulkan/ffv1_enc.o +OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/vulkan_source.o -OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ - vulkan/rangecoder.o vulkan/ffv1_vlc.o \ - vulkan/ffv1_common.o vulkan/ffv1_reset.o \ - vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/vulkan_source.o -OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/common.o \ - vulkan/prores_raw.o +OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/vulkan_source.o VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp)) .SECONDARY: $(VULKAN:.comp=.c) -- 2.49.1 From 12548b1e6166631ddaf16c55fc1febaa7f8cce5f Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:20:17 +0000 Subject: [PATCH 115/118] Changing vulkan file directory --- libavcodec/Makefile | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3d036de4b6..5463dc8b1d 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -276,7 +276,7 @@ OBJS-$(CONFIG_AV1_MEDIACODEC_ENCODER) += mediacodecenc.o OBJS-$(CONFIG_AV1_NVENC_ENCODER) += nvenc_av1.o nvenc.o OBJS-$(CONFIG_AV1_QSV_ENCODER) += qsvenc_av1.o OBJS-$(CONFIG_AV1_VAAPI_ENCODER) += vaapi_encode_av1.o av1_levels.o -OBJS-$(CONFIG_AV1_VULKAN_ENCODER) += vulkan_encode.o vulkan_encode_av1.o \ +OBJS-$(CONFIG_AV1_VULKAN_ENCODER) += vulkan/vulkan_encode.o vulkan/vulkan_encode_av1.o \ hw_base_encode.o av1_levels.o OBJS-$(CONFIG_AVRN_DECODER) += avrndec.o OBJS-$(CONFIG_AVRP_DECODER) += r210dec.o @@ -378,7 +378,7 @@ OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o OBJS-$(CONFIG_FASTAUDIO_DECODER) += fastaudio.o OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1_parse.o ffv1.o OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1_parse.o ffv1.o -OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1_vulkan.o ffv1enc_vulkan.o +OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o vulkan/ffv1_vulkan.o vulkan/ffv1enc_vulkan.o OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o OBJS-$(CONFIG_FIC_DECODER) += fic.o OBJS-$(CONFIG_FITS_DECODER) += fitsdec.o fits.o @@ -442,7 +442,7 @@ OBJS-$(CONFIG_H264_QSV_ENCODER) += qsvenc_h264.o OBJS-$(CONFIG_H264_RKMPP_DECODER) += rkmppdec.o OBJS-$(CONFIG_H264_VAAPI_ENCODER) += vaapi_encode_h264.o h264_levels.o \ h2645data.o hw_base_encode_h264.o -OBJS-$(CONFIG_H264_VULKAN_ENCODER) += vulkan_encode.o vulkan_encode_h264.o \ +OBJS-$(CONFIG_H264_VULKAN_ENCODER) += vulkan/vulkan_encode.o vulkan/vulkan_encode_h264.o \ hw_base_encode.o hw_base_encode_h264.o \ h264_levels.o h2645data.o OBJS-$(CONFIG_H264_VIDEOTOOLBOX_ENCODER) += videotoolboxenc.o @@ -471,7 +471,7 @@ OBJS-$(CONFIG_HEVC_QSV_ENCODER) += qsvenc_hevc.o hevc/ps_enc.o OBJS-$(CONFIG_HEVC_RKMPP_DECODER) += rkmppdec.o OBJS-$(CONFIG_HEVC_VAAPI_ENCODER) += vaapi_encode_h265.o h265_profile_level.o \ h2645data.o hw_base_encode_h265.o -OBJS-$(CONFIG_HEVC_VULKAN_ENCODER) += vulkan_encode.o vulkan_encode_h265.o \ +OBJS-$(CONFIG_HEVC_VULKAN_ENCODER) += vulkan/vulkan_encode.o vulkan/vulkan_encode_h265.o \ hw_base_encode.o hw_base_encode_h265.o \ h265_profile_level.o h2645data.o OBJS-$(CONFIG_HEVC_V4L2M2M_DECODER) += v4l2_m2m_dec.o @@ -1025,7 +1025,7 @@ OBJS-$(CONFIG_NVDEC) += nvdec.o OBJS-$(CONFIG_VAAPI) += vaapi_decode.o OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o OBJS-$(CONFIG_VDPAU) += vdpau.o -OBJS-$(CONFIG_VULKAN) += vulkan.o vulkan_video.o +OBJS-$(CONFIG_VULKAN) += vulkan.o vulkan/vulkan_video.o OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL) += dxva2_av1.o OBJS-$(CONFIG_AV1_DXVA2_HWACCEL) += dxva2_av1.o @@ -1034,8 +1034,8 @@ OBJS-$(CONFIG_AV1_NVDEC_HWACCEL) += nvdec_av1.o OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL) += videotoolbox_av1.o -OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o -OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o +OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan/vulkan_decode.o vulkan/vulkan_av1.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/vulkan_decode.o vulkan/ffv1_vulkan.o vulkan/vulkan_ffv1.o OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o @@ -1046,7 +1046,7 @@ OBJS-$(CONFIG_H264_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o -OBJS-$(CONFIG_H264_VULKAN_HWACCEL) += vulkan_decode.o vulkan_h264.o +OBJS-$(CONFIG_H264_VULKAN_HWACCEL) += vulkan/vulkan_decode.o vulkan/vulkan_h264.o OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o OBJS-$(CONFIG_HEVC_D3D12VA_HWACCEL) += dxva2_hevc.o d3d12va_hevc.o @@ -1054,7 +1054,7 @@ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o -OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL) += vulkan_decode.o vulkan_hevc.o +OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL) += vulkan/vulkan_decode.o vulkan/vulkan_hevc.o OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o OBJS-$(CONFIG_MJPEG_VAAPI_HWACCEL) += vaapi_mjpeg.o OBJS-$(CONFIG_MPEG1_NVDEC_HWACCEL) += nvdec_mpeg12.o @@ -1088,10 +1088,10 @@ OBJS-$(CONFIG_VP9_NVDEC_HWACCEL) += nvdec_vp9.o OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += vaapi_vp9.o OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o -OBJS-$(CONFIG_VP9_VULKAN_HWACCEL) += vulkan_decode.o vulkan_vp9.o +OBJS-$(CONFIG_VP9_VULKAN_HWACCEL) += vulkan/vulkan_decode.o vulkan/vulkan_vp9.o OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o OBJS-$(CONFIG_VVC_VAAPI_HWACCEL) += vaapi_vvc.o -OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan_decode.o vulkan_prores_raw.o +OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/vulkan_decode.o vulkan/vulkan_prores_raw.o # Objects duplicated from other libraries for shared builds SHLIBOBJS += log2_tab.o reverse.o @@ -1334,8 +1334,8 @@ SKIPHEADERS-$(CONFIG_QSVENC) += qsvenc.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_decode.h vaapi_hevc.h vaapi_encode.h SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h vdpau_internal.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h -SKIPHEADERS-$(CONFIG_VULKAN) += ffv1_vulkan.h vulkan_video.h \ - vulkan_encode.h vulkan_decode.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan/ffv1_vulkan.h vulkan/vulkan_video.h \ + vulkan/vulkan_encode.h vulkan/vulkan_decode.h SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h v4l2_m2m.h SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h -- 2.49.1 From ac50d7bc58d1ef067ddd407fc52621306a609a2f Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:21:43 +0000 Subject: [PATCH 116/118] Changing vulkan file directory --- libavfilter/vulkan/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavfilter/vulkan/Makefile b/libavfilter/vulkan/Makefile index c77aaf4f6b..6170982bda 100644 --- a/libavfilter/vulkan/Makefile +++ b/libavfilter/vulkan/Makefile @@ -3,8 +3,8 @@ GEN_CLEANSUFFIXES = *.o *.c *.d clean:: $(RM) $(GEN_CLEANSUFFIXES:%=libavfilter/vulkan/%) -OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/bwdif.o -OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/debayer.o +OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/vulkan_source.o +OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/vulkan_source.o VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavfilter/vulkan/*.comp)) .SECONDARY: $(VULKAN:.comp=.c) -- 2.49.1 From bfdd56ab328925bed2e7de0b1dd00ebf6f4c390a Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:27:32 +0000 Subject: [PATCH 117/118] Changing vulkan file directory --- libavfilter/Makefile | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 07fb4c3d6c..7d59fe9e56 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -202,7 +202,7 @@ OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ opencl/avgblur.o boxblur.o -OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vf_avgblur_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vulkan/vf_avgblur_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_BACKGROUNDKEY_FILTER) += vf_backgroundkey.o OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o @@ -210,10 +210,10 @@ OBJS-$(CONFIG_BILATERAL_FILTER) += vf_bilateral.o OBJS-$(CONFIG_BILATERAL_CUDA_FILTER) += vf_bilateral_cuda.o vf_bilateral_cuda.ptx.o OBJS-$(CONFIG_BITPLANENOISE_FILTER) += vf_bitplanenoise.o OBJS-$(CONFIG_BLACKDETECT_FILTER) += vf_blackdetect.o -OBJS-$(CONFIG_BLACKDETECT_VULKAN_FILTER) += vf_blackdetect_vulkan.o +OBJS-$(CONFIG_BLACKDETECT_VULKAN_FILTER) += vulkan/vf_blackdetect_vulkan.o OBJS-$(CONFIG_BLACKFRAME_FILTER) += vf_blackframe.o OBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o framesync.o -OBJS-$(CONFIG_BLEND_VULKAN_FILTER) += vf_blend_vulkan.o framesync.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_BLEND_VULKAN_FILTER) += vulkan/vf_blend_vulkan.o framesync.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_BLOCKDETECT_FILTER) += vf_blockdetect.o OBJS-$(CONFIG_BLURDETECT_FILTER) += vf_blurdetect.o edge_common.o OBJS-$(CONFIG_BM3D_FILTER) += vf_bm3d.o framesync.o @@ -223,10 +223,10 @@ OBJS-$(CONFIG_BOXBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \ OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o bwdifdsp.o yadif_common.o OBJS-$(CONFIG_BWDIF_CUDA_FILTER) += vf_bwdif_cuda.o vf_bwdif_cuda.ptx.o \ yadif_common.o -OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vf_bwdif_vulkan.o yadif_common.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_BWDIF_VULKAN_FILTER) += vulkan/vf_bwdif_vulkan.o yadif_common.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_CAS_FILTER) += vf_cas.o OBJS-$(CONFIG_CCREPACK_FILTER) += vf_ccrepack.o -OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vf_chromaber_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_CHROMABER_VULKAN_FILTER) += vulkan/vf_chromaber_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_CHROMAHOLD_FILTER) += vf_chromakey.o OBJS-$(CONFIG_CHROMAKEY_FILTER) += vf_chromakey.o OBJS-$(CONFIG_CHROMAKEY_CUDA_FILTER) += vf_chromakey_cuda.o vf_chromakey_cuda.ptx.o @@ -331,7 +331,7 @@ OBJS-$(CONFIG_FREI0R_FILTER) += vf_frei0r.o OBJS-$(CONFIG_FSPP_FILTER) += vf_fspp.o qp_table.o OBJS-$(CONFIG_FSYNC_FILTER) += vf_fsync.o OBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o -OBJS-$(CONFIG_GBLUR_VULKAN_FILTER) += vf_gblur_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_GBLUR_VULKAN_FILTER) += vulkan/vf_gblur_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_GEQ_FILTER) += vf_geq.o OBJS-$(CONFIG_GRADFUN_FILTER) += vf_gradfun.o OBJS-$(CONFIG_GRAPHMONITOR_FILTER) += f_graphmonitor.o @@ -340,7 +340,7 @@ OBJS-$(CONFIG_GREYEDGE_FILTER) += vf_colorconstancy.o OBJS-$(CONFIG_GUIDED_FILTER) += vf_guided.o framesync.o OBJS-$(CONFIG_HALDCLUT_FILTER) += vf_lut3d.o framesync.o OBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o -OBJS-$(CONFIG_HFLIP_VULKAN_FILTER) += vf_flip_vulkan.o vulkan.o +OBJS-$(CONFIG_HFLIP_VULKAN_FILTER) += vulkan/vf_flip_vulkan.o vulkan.o OBJS-$(CONFIG_HISTEQ_FILTER) += vf_histeq.o OBJS-$(CONFIG_HISTOGRAM_FILTER) += vf_histogram.o OBJS-$(CONFIG_HQDN3D_FILTER) += vf_hqdn3d.o @@ -362,7 +362,7 @@ OBJS-$(CONFIG_IDET_FILTER) += vf_idet.o OBJS-$(CONFIG_IL_FILTER) += vf_il.o OBJS-$(CONFIG_INFLATE_FILTER) += vf_neighbor.o OBJS-$(CONFIG_INTERLACE_FILTER) += vf_tinterlace.o -OBJS-$(CONFIG_INTERLACE_VULKAN_FILTER) += vf_interlace_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_INTERLACE_VULKAN_FILTER) += vulkan/vf_interlace_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_INTERLEAVE_FILTER) += f_interleave.o OBJS-$(CONFIG_KERNDEINT_FILTER) += vf_kerndeint.o OBJS-$(CONFIG_KIRSCH_FILTER) += vf_convolution.o @@ -371,7 +371,7 @@ OBJS-$(CONFIG_LCEVC_FILTER) += vf_lcevc.o OBJS-$(CONFIG_LATENCY_FILTER) += f_latency.o OBJS-$(CONFIG_LENSCORRECTION_FILTER) += vf_lenscorrection.o OBJS-$(CONFIG_LENSFUN_FILTER) += vf_lensfun.o -OBJS-$(CONFIG_LIBPLACEBO_FILTER) += vf_libplacebo.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_LIBPLACEBO_FILTER) += vf_libplacebo.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_LIBVMAF_FILTER) += vf_libvmaf.o framesync.o OBJS-$(CONFIG_LIBVMAF_CUDA_FILTER) += vf_libvmaf.o framesync.o OBJS-$(CONFIG_LIMITDIFF_FILTER) += vf_limitdiff.o framesync.o @@ -406,7 +406,7 @@ OBJS-$(CONFIG_MULTIPLY_FILTER) += vf_multiply.o framesync.o OBJS-$(CONFIG_NEGATE_FILTER) += vf_negate.o OBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o opencl/nlmeans.o -OBJS-$(CONFIG_NLMEANS_VULKAN_FILTER) += vf_nlmeans_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_NLMEANS_VULKAN_FILTER) += vulkan/vf_nlmeans_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o @@ -422,7 +422,7 @@ OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ opencl/overlay.o framesync.o OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER) += vf_overlay_vaapi.o framesync.o vaapi_vpp.o -OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vulkan/vf_overlay_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o OBJS-$(CONFIG_PAD_CUDA_FILTER) += vf_pad_cuda.o vf_pad_cuda.ptx.o cuda/load_helper.o @@ -473,11 +473,11 @@ OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_vpp_qsv.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o OBJS-$(CONFIG_SCALE_VT_FILTER) += vf_scale_vt.o scale_eval.o -OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vulkan/vf_scale_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale_eval.o framesync.o OBJS-$(CONFIG_SCALE2REF_NPP_FILTER) += vf_scale_npp.o scale_eval.o OBJS-$(CONFIG_SCDET_FILTER) += vf_scdet.o -OBJS-$(CONFIG_SCDET_VULKAN_FILTER) += vf_scdet_vulkan.o +OBJS-$(CONFIG_SCDET_VULKAN_FILTER) += vulkan/vf_scdet_vulkan.o OBJS-$(CONFIG_SCHARR_FILTER) += vf_convolution.o OBJS-$(CONFIG_SCROLL_FILTER) += vf_scroll.o OBJS-$(CONFIG_SEGMENT_FILTER) += f_segment.o @@ -544,7 +544,7 @@ OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER) += vf_transpose_opencl.o opencl.o opencl/transpose.o OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER) += vf_transpose_vaapi.o vaapi_vpp.o OBJS-$(CONFIG_TRANSPOSE_VT_FILTER) += vf_transpose_vt.o -OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vf_transpose_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vulkan/vf_transpose_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o @@ -557,7 +557,7 @@ OBJS-$(CONFIG_VAGUEDENOISER_FILTER) += vf_vaguedenoiser.o OBJS-$(CONFIG_VARBLUR_FILTER) += vf_varblur.o framesync.o OBJS-$(CONFIG_VECTORSCOPE_FILTER) += vf_vectorscope.o OBJS-$(CONFIG_VFLIP_FILTER) += vf_vflip.o -OBJS-$(CONFIG_VFLIP_VULKAN_FILTER) += vf_flip_vulkan.o vulkan.o +OBJS-$(CONFIG_VFLIP_VULKAN_FILTER) += vulkan/vf_flip_vulkan.o vulkan.o OBJS-$(CONFIG_VFRDET_FILTER) += vf_vfrdet.o OBJS-$(CONFIG_VIBRANCE_FILTER) += vf_vibrance.o OBJS-$(CONFIG_VIDSTABDETECT_FILTER) += vidstabutils.o vf_vidstabdetect.o @@ -575,7 +575,7 @@ OBJS-$(CONFIG_XBR_FILTER) += vf_xbr.o OBJS-$(CONFIG_XCORRELATE_FILTER) += vf_convolve.o framesync.o OBJS-$(CONFIG_XFADE_FILTER) += vf_xfade.o OBJS-$(CONFIG_XFADE_OPENCL_FILTER) += vf_xfade_opencl.o opencl.o opencl/xfade.o -OBJS-$(CONFIG_XFADE_VULKAN_FILTER) += vf_xfade_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_XFADE_VULKAN_FILTER) += vulkan/vf_xfade_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_XMEDIAN_FILTER) += vf_xmedian.o framesync.o OBJS-$(CONFIG_XPSNR_FILTER) += vf_xpsnr.o framesync.o psnr.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o @@ -624,7 +624,7 @@ OBJS-$(CONFIG_RGBTESTSRC_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_SIERPINSKI_FILTER) += vsrc_sierpinski.o OBJS-$(CONFIG_SMPTEBARS_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_SMPTEHDBARS_FILTER) += vsrc_testsrc.o -OBJS-$(CONFIG_COLOR_VULKAN_FILTER) += vsrc_testsrc_vulkan.o vulkan.o vulkan_filter.o +OBJS-$(CONFIG_COLOR_VULKAN_FILTER) += vulkan/vsrc_testsrc_vulkan.o vulkan.o vulkan/vulkan_filter.o OBJS-$(CONFIG_TESTSRC_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_TESTSRC2_FILTER) += vsrc_testsrc.o OBJS-$(CONFIG_YUVTESTSRC_FILTER) += vsrc_testsrc.o @@ -674,7 +674,7 @@ SKIPHEADERS-$(CONFIG_AMF) += vf_amf_common.h SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h stack_internal.h SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h -SKIPHEADERS-$(CONFIG_VULKAN) += vulkan_filter.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan/vulkan_filter.h TOOLS = graph2dot TESTPROGS = drawutils filtfmts formats integral -- 2.49.1 From cc103ac919c759c2d7514bf811548e520b7b3bac Mon Sep 17 00:00:00 2001 From: Jamaika1 <lukaszcz18@wp.pl> Date: Wed, 3 Sep 2025 14:30:38 +0000 Subject: [PATCH 118/118] Changing vulkan file directory --- libavutil/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libavutil/Makefile b/libavutil/Makefile index ee77e51c08..33b3c2cc9c 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -217,7 +217,7 @@ OBJS-$(CONFIG_QSV) += hwcontext_qsv.o OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o -OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o vulkan.o +OBJS-$(CONFIG_VULKAN) += vulkan/hwcontext_vulkan.o vulkan/vulkan.o OBJS-$(!CONFIG_VULKAN) += hwcontext_stub.o @@ -247,11 +247,11 @@ SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h -SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h vulkan.h \ - vulkan_functions.h \ - vulkan_loader.h -SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h -SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h +SKIPHEADERS-$(CONFIG_VULKAN) += vulkan/hwcontext_vulkan.h vulkan/vulkan.h \ + vulkan/vulkan_functions.h \ + vulkan/vulkan_loader.h +SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan/vulkan_spirv.h +SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan/vulkan_spirv.h TESTPROGS = adler32 \ aes \ -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-09-05 12:06 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=175691058908.25.12192413182319126055@463a07221176 \ --to=ffmpeg-devel@ffmpeg.org \ --cc=code@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git