From: Petro Mozil <mozil.petryk@gmail.com> To: ffmpeg-devel@ffmpeg.org Subject: Re: [FFmpeg-devel] [PATCH 2/2] Add vulkan dirac hwaccel Date: Thu, 15 Aug 2024 08:45:40 +0000 Message-ID: <CAP664DVJJ6=Dt0y0SCBymMvVpmvf1bQQ3VE9NYqFrsAwVDequQ@mail.gmail.com> (raw) In-Reply-To: <20240814121856.757459-3-mozil.petryk@gmail.com> RE: [PATCH] Move vulkan_spirv.h and deps to lavutil Moved spirv_compiler into lavutil to avoid any linking errors. It's now used in lavc and lavfi, should probably be moved to lavutil Signed-off-by: Petro Mozil <mozil.petryk@gmail.com> --- configure | 2 + libavcodec/Makefile | 6 + libavcodec/vulkan_dirac.c | 2 +- libavcodec/vulkan_glslang.c | 19 +++ libavcodec/vulkan_shaderc.c | 19 +++ libavcodec/vulkan_spirv.h | 19 +++ libavfilter/vulkan_glslang.c | 266 +------------------------------- libavfilter/vulkan_shaderc.c | 111 +------------- libavfilter/vulkan_spirv.h | 28 +--- libavutil/Makefile | 5 + libavutil/vulkan_glslang.c | 283 +++++++++++++++++++++++++++++++++++ libavutil/vulkan_shaderc.c | 128 ++++++++++++++++ libavutil/vulkan_spirv.h | 45 ++++++ 13 files changed, 530 insertions(+), 403 deletions(-) create mode 100644 libavcodec/vulkan_glslang.c create mode 100644 libavcodec/vulkan_shaderc.c create mode 100644 libavcodec/vulkan_spirv.h create mode 100644 libavutil/vulkan_glslang.c create mode 100644 libavutil/vulkan_shaderc.c create mode 100644 libavutil/vulkan_spirv.h diff --git a/configure b/configure index 37178d7d81..8c83ab6511 100755 --- a/configure +++ b/configure @@ -3170,6 +3170,8 @@ av1_vdpau_hwaccel_deps="vdpau VdpPictureInfoAV1" av1_vdpau_hwaccel_select="av1_decoder" av1_vulkan_hwaccel_deps="vulkan" av1_vulkan_hwaccel_select="av1_decoder" +dirac_vulkan_hwaccel_deps="vulkan spirv_compiler" +dirac_vulkan_hwaccel_select="dirac_decoder" h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" h263_videotoolbox_hwaccel_deps="videotoolbox" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 85af31f4f0..4c912103a0 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -994,6 +994,10 @@ OBJS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.o OBJS-$(CONFIG_VDPAU) += vdpau.o OBJS-$(CONFIG_VULKAN) += vulkan.o vulkan_video.o +# vulkan libs +OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o +OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o + OBJS-$(CONFIG_AV1_D3D11VA_HWACCEL) += dxva2_av1.o OBJS-$(CONFIG_AV1_DXVA2_HWACCEL) += dxva2_av1.o OBJS-$(CONFIG_AV1_D3D12VA_HWACCEL) += dxva2_av1.o d3d12va_av1.o @@ -1263,6 +1267,8 @@ SKIPHEADERS += %_tablegen.h \ aacenc_quantization_misc.h \ bitstream_template.h \ $(ARCH)/vpx_arith.h \ +SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h +SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h SKIPHEADERS-$(CONFIG_AMF) += amfenc.h SKIPHEADERS-$(CONFIG_D3D11VA) += d3d11va.h dxva2_internal.h diff --git a/libavcodec/vulkan_dirac.c b/libavcodec/vulkan_dirac.c index bb8b75957d..b5785f7fc0 100644 --- a/libavcodec/vulkan_dirac.c +++ b/libavcodec/vulkan_dirac.c @@ -19,7 +19,7 @@ #include "diracdec.h" #include "vulkan.h" #include "vulkan_decode.h" -#include "libavfilter/vulkan_spirv.h" +#include "libavcodec/vulkan_spirv.h" #include "libavutil/vulkan_loader.h" #include "libavcodec/pthread_internal.h" #include "libavcodec/dirac_vlc.c" diff --git a/libavcodec/vulkan_glslang.c b/libavcodec/vulkan_glslang.c new file mode 100644 index 0000000000..9aa41567a3 --- /dev/null +++ b/libavcodec/vulkan_glslang.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/vulkan_glslang.c" diff --git a/libavcodec/vulkan_shaderc.c b/libavcodec/vulkan_shaderc.c new file mode 100644 index 0000000000..9f60bf4dfd --- /dev/null +++ b/libavcodec/vulkan_shaderc.c @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/vulkan_shaderc.c" diff --git a/libavcodec/vulkan_spirv.h b/libavcodec/vulkan_spirv.h new file mode 100644 index 0000000000..93584720ee --- /dev/null +++ b/libavcodec/vulkan_spirv.h @@ -0,0 +1,19 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/vulkan_spirv.h" diff --git a/libavfilter/vulkan_glslang.c b/libavfilter/vulkan_glslang.c index 845a530ee0..9aa41567a3 100644 --- a/libavfilter/vulkan_glslang.c +++ b/libavfilter/vulkan_glslang.c @@ -16,268 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <pthread.h> - -#include <glslang/build_info.h> -#include <glslang/Include/glslang_c_interface.h> - -#include "vulkan_spirv.h" -#include "libavutil/mem.h" -#include "libavutil/avassert.h" - -static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER; -static int glslc_refcount = 0; - -static const glslang_resource_t glslc_resource_limits = { - .max_lights = 32, - .max_clip_planes = 6, - .max_texture_units = 32, - .max_texture_coords = 32, - .max_vertex_attribs = 64, - .max_vertex_uniform_components = 4096, - .max_varying_floats = 64, - .max_vertex_texture_image_units = 32, - .max_combined_texture_image_units = 80, - .max_texture_image_units = 32, - .max_fragment_uniform_components = 4096, - .max_draw_buffers = 32, - .max_vertex_uniform_vectors = 128, - .max_varying_vectors = 8, - .max_fragment_uniform_vectors = 16, - .max_vertex_output_vectors = 16, - .max_fragment_input_vectors = 15, - .min_program_texel_offset = -8, - .max_program_texel_offset = 7, - .max_clip_distances = 8, - .max_compute_work_group_count_x = 65535, - .max_compute_work_group_count_y = 65535, - .max_compute_work_group_count_z = 65535, - .max_compute_work_group_size_x = 1024, - .max_compute_work_group_size_y = 1024, - .max_compute_work_group_size_z = 64, - .max_compute_uniform_components = 1024, - .max_compute_texture_image_units = 16, - .max_compute_image_uniforms = 8, - .max_compute_atomic_counters = 8, - .max_compute_atomic_counter_buffers = 1, - .max_varying_components = 60, - .max_vertex_output_components = 64, - .max_geometry_input_components = 64, - .max_geometry_output_components = 128, - .max_fragment_input_components = 128, - .max_image_units = 8, - .max_combined_image_units_and_fragment_outputs = 8, - .max_combined_shader_output_resources = 8, - .max_image_samples = 0, - .max_vertex_image_uniforms = 0, - .max_tess_control_image_uniforms = 0, - .max_tess_evaluation_image_uniforms = 0, - .max_geometry_image_uniforms = 0, - .max_fragment_image_uniforms = 8, - .max_combined_image_uniforms = 8, - .max_geometry_texture_image_units = 16, - .max_geometry_output_vertices = 256, - .max_geometry_total_output_components = 1024, - .max_geometry_uniform_components = 1024, - .max_geometry_varying_components = 64, - .max_tess_control_input_components = 128, - .max_tess_control_output_components = 128, - .max_tess_control_texture_image_units = 16, - .max_tess_control_uniform_components = 1024, - .max_tess_control_total_output_components = 4096, - .max_tess_evaluation_input_components = 128, - .max_tess_evaluation_output_components = 128, - .max_tess_evaluation_texture_image_units = 16, - .max_tess_evaluation_uniform_components = 1024, - .max_tess_patch_components = 120, - .max_patch_vertices = 32, - .max_tess_gen_level = 64, - .max_viewports = 16, - .max_vertex_atomic_counters = 0, - .max_tess_control_atomic_counters = 0, - .max_tess_evaluation_atomic_counters = 0, - .max_geometry_atomic_counters = 0, - .max_fragment_atomic_counters = 8, - .max_combined_atomic_counters = 8, - .max_atomic_counter_bindings = 1, - .max_vertex_atomic_counter_buffers = 0, - .max_tess_control_atomic_counter_buffers = 0, - .max_tess_evaluation_atomic_counter_buffers = 0, - .max_geometry_atomic_counter_buffers = 0, - .max_fragment_atomic_counter_buffers = 1, - .max_combined_atomic_counter_buffers = 1, - .max_atomic_counter_buffer_size = 16384, - .max_transform_feedback_buffers = 4, - .max_transform_feedback_interleaved_components = 64, - .max_cull_distances = 8, - .max_combined_clip_and_cull_distances = 8, - .max_samples = 4, - .max_mesh_output_vertices_nv = 256, - .max_mesh_output_primitives_nv = 512, - .max_mesh_work_group_size_x_nv = 32, - .max_mesh_work_group_size_y_nv = 1, - .max_mesh_work_group_size_z_nv = 1, - .max_task_work_group_size_x_nv = 32, - .max_task_work_group_size_y_nv = 1, - .max_task_work_group_size_z_nv = 1, - .max_mesh_view_count_nv = 4, - .maxDualSourceDrawBuffersEXT = 1, - - .limits = { - .non_inductive_for_loops = 1, - .while_loops = 1, - .do_while_loops = 1, - .general_uniform_indexing = 1, - .general_attribute_matrix_vector_indexing = 1, - .general_varying_indexing = 1, - .general_sampler_indexing = 1, - .general_variable_indexing = 1, - .general_constant_matrix_vector_indexing = 1, - } -}; - -static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, - FFVkSPIRVShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, - void **opaque) -{ - const char *messages; - glslang_shader_t *glslc_shader; - glslang_program_t *glslc_program; - - static const glslang_stage_t glslc_stage[] = { - [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_STAGE_VERTEX, - [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_STAGE_FRAGMENT, - [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_STAGE_COMPUTE, - }; - - const glslang_input_t glslc_input = { - .language = GLSLANG_SOURCE_GLSL, - .stage = glslc_stage[shd->shader.stage], - .client = GLSLANG_CLIENT_VULKAN, - /* GLSLANG_TARGET_VULKAN_1_2 before 11.6 resulted in targeting 1.0 */ -#if (((GLSLANG_VERSION_MAJOR) > 11) || ((GLSLANG_VERSION_MAJOR) == 11 && \ - (((GLSLANG_VERSION_MINOR) > 6) || ((GLSLANG_VERSION_MINOR) == 6 && \ - ((GLSLANG_VERSION_PATCH) > 0))))) - .client_version = GLSLANG_TARGET_VULKAN_1_2, - .target_language_version = GLSLANG_TARGET_SPV_1_5, -#else - .client_version = GLSLANG_TARGET_VULKAN_1_1, - .target_language_version = GLSLANG_TARGET_SPV_1_3, -#endif - .target_language = GLSLANG_TARGET_SPV, - .code = shd->src.str, - .default_version = 460, - .default_profile = GLSLANG_NO_PROFILE, - .force_default_version_and_profile = false, - .forward_compatible = false, - .messages = GLSLANG_MSG_DEFAULT_BIT, - .resource = &glslc_resource_limits, - }; - - av_assert0(glslc_refcount); - - *opaque = NULL; - - if (!(glslc_shader = glslang_shader_create(&glslc_input))) - return AVERROR(ENOMEM); - - if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) { - ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); - av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n", - glslang_shader_get_info_log(glslc_shader), - glslang_shader_get_info_debug_log(glslc_shader)); - glslang_shader_delete(glslc_shader); - return AVERROR(EINVAL); - } - - if (!glslang_shader_parse(glslc_shader, &glslc_input)) { - ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); - av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n", - glslang_shader_get_info_log(glslc_shader), - glslang_shader_get_info_debug_log(glslc_shader)); - glslang_shader_delete(glslc_shader); - return AVERROR(EINVAL); - } - - if (!(glslc_program = glslang_program_create())) { - glslang_shader_delete(glslc_shader); - return AVERROR(EINVAL); - } - - glslang_program_add_shader(glslc_program, glslc_shader); - - if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT | - GLSLANG_MSG_VULKAN_RULES_BIT)) { - ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); - av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n", - glslang_program_get_info_log(glslc_program), - glslang_program_get_info_debug_log(glslc_program)); - glslang_program_delete(glslc_program); - glslang_shader_delete(glslc_shader); - return AVERROR(EINVAL); - } - - glslang_program_SPIRV_generate(glslc_program, glslc_input.stage); - - messages = glslang_program_SPIRV_get_messages(glslc_program); - if (messages) { - ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); - av_log(avctx, AV_LOG_WARNING, "%s\n", messages); - } else { - ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE); - } - - glslang_shader_delete(glslc_shader); - - *size = glslang_program_SPIRV_get_size(glslc_program) * sizeof(unsigned int); - *data = (void *)glslang_program_SPIRV_get_ptr(glslc_program); - *opaque = glslc_program; - - return 0; -} - -static void glslc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) -{ - if (!opaque || !*opaque) - return; - - av_assert0(glslc_refcount); - glslang_program_delete(*opaque); - *opaque = NULL; -} - -static void glslc_uninit(FFVkSPIRVCompiler **ctx) -{ - if (!ctx || !*ctx) - return; - - pthread_mutex_lock(&glslc_mutex); - if (glslc_refcount && (--glslc_refcount == 0)) - glslang_finalize_process(); - pthread_mutex_unlock(&glslc_mutex); - - av_freep(ctx); -} - -FFVkSPIRVCompiler *ff_vk_glslang_init(void) -{ - FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); - if (!ret) - return NULL; - - ret->compile_shader = glslc_shader_compile; - ret->free_shader = glslc_shader_free; - ret->uninit = glslc_uninit; - - pthread_mutex_lock(&glslc_mutex); - if (!glslc_refcount++) { - if (!glslang_initialize_process()) { - av_freep(&ret); - glslc_refcount--; - } - } - pthread_mutex_unlock(&glslc_mutex); - - return ret; -} +#include "libavutil/vulkan_glslang.c" diff --git a/libavfilter/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c index 9e8a3d17ac..9f60bf4dfd 100644 --- a/libavfilter/vulkan_shaderc.c +++ b/libavfilter/vulkan_shaderc.c @@ -16,113 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <shaderc/shaderc.h> - -#include "libavutil/mem.h" -#include "vulkan_spirv.h" - -static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, - FFVkSPIRVShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, - void **opaque) -{ - int loglevel, err, warn, ret; - const char *status, *message; - shaderc_compilation_result_t res; - static const char *shdc_result[] = { - [shaderc_compilation_status_success] = "success", - [shaderc_compilation_status_invalid_stage] = "invalid stage", - [shaderc_compilation_status_compilation_error] = "error", - [shaderc_compilation_status_internal_error] = "internal error", - [shaderc_compilation_status_null_result_object] = "no result", - [shaderc_compilation_status_invalid_assembly] = "invalid assembly", - }; - static const shaderc_shader_kind shdc_kind[] = { - [VK_SHADER_STAGE_VERTEX_BIT] = shaderc_glsl_vertex_shader, - [VK_SHADER_STAGE_FRAGMENT_BIT] = shaderc_glsl_fragment_shader, - [VK_SHADER_STAGE_COMPUTE_BIT] = shaderc_glsl_compute_shader, - }; - - shaderc_compile_options_t opts = shaderc_compile_options_initialize(); - *opaque = NULL; - if (!opts) - return AVERROR(ENOMEM); - - shaderc_compile_options_set_target_env(opts, shaderc_target_env_vulkan, - shaderc_env_version_vulkan_1_2); - shaderc_compile_options_set_target_spirv(opts, shaderc_spirv_version_1_5); - shaderc_compile_options_set_generate_debug_info(opts); - shaderc_compile_options_set_optimization_level(opts, - shaderc_optimization_level_performance); - - res = shaderc_compile_into_spv((shaderc_compiler_t)ctx->priv, - shd->src.str, strlen(shd->src.str), - shdc_kind[shd->shader.stage], - shd->name, entrypoint, opts); - shaderc_compile_options_release(opts); - - ret = shaderc_result_get_compilation_status(res); - err = shaderc_result_get_num_errors(res); - warn = shaderc_result_get_num_warnings(res); - message = shaderc_result_get_error_message(res); - - if (ret != shaderc_compilation_status_success && !err) - err = 1; - - loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE; - - ff_vk_shader_print(avctx, shd, loglevel); - if (message && (err || warn)) - av_log(avctx, loglevel, "%s\n", message); - status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown"; - av_log(avctx, loglevel, "shaderc compile status '%s' (%d errors, %d warnings)\n", - status, err, warn); - - if (err > 0) - return AVERROR(EINVAL); - - *data = (uint8_t *)shaderc_result_get_bytes(res); - *size = shaderc_result_get_length(res); - *opaque = res; - - return 0; -} - -static void shdc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) -{ - if (!opaque || !*opaque) - return; - - shaderc_result_release((shaderc_compilation_result_t)*opaque); - *opaque = NULL; -} - -static void shdc_uninit(FFVkSPIRVCompiler **ctx) -{ - FFVkSPIRVCompiler *s; - - if (!ctx || !*ctx) - return; - - s = *ctx; - - shaderc_compiler_release((shaderc_compiler_t)s->priv); - av_freep(ctx); -} - -FFVkSPIRVCompiler *ff_vk_shaderc_init(void) -{ - FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); - if (!ret) - return NULL; - - ret->compile_shader = shdc_shader_compile; - ret->free_shader = shdc_shader_free; - ret->uninit = shdc_uninit; - - ret->priv = (void *)shaderc_compiler_initialize(); - if (!ret->priv) - av_freep(&ret); - - return ret; -} +#include "libavutil/vulkan_shaderc.c" diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h index 5638cd9696..93584720ee 100644 --- a/libavfilter/vulkan_spirv.h +++ b/libavfilter/vulkan_spirv.h @@ -16,30 +16,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AVFILTER_VULKAN_SPIRV_H -#define AVFILTER_VULKAN_SPIRV_H - -#include "libavutil/vulkan.h" - -#include "vulkan.h" -#include "config.h" - -typedef struct FFVkSPIRVCompiler { - void *priv; - int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, - struct FFVkSPIRVShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, void **opaque); - void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); - void (*uninit)(struct FFVkSPIRVCompiler **ctx); -} FFVkSPIRVCompiler; - -#if CONFIG_LIBGLSLANG -FFVkSPIRVCompiler *ff_vk_glslang_init(void); -#define ff_vk_spirv_init ff_vk_glslang_init -#endif -#if CONFIG_LIBSHADERC -FFVkSPIRVCompiler *ff_vk_shaderc_init(void); -#define ff_vk_spirv_init ff_vk_shaderc_init -#endif - -#endif /* AVFILTER_VULKAN_H */ +#include "libavutil/vulkan_spirv.h" diff --git a/libavutil/Makefile b/libavutil/Makefile index 6e6fa8d800..4c73663fe7 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -208,6 +208,9 @@ OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o vulkan.o OBJS-$(!CONFIG_VULKAN) += hwcontext_stub.o +OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o +OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o + OBJS += $(COMPAT_OBJS:%=../compat/%) # Windows resource file @@ -228,6 +231,8 @@ SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h vulkan.h \ vulkan_functions.h \ vulkan_loader.h +SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h +SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h TESTPROGS = adler32 \ aes \ diff --git a/libavutil/vulkan_glslang.c b/libavutil/vulkan_glslang.c new file mode 100644 index 0000000000..845a530ee0 --- /dev/null +++ b/libavutil/vulkan_glslang.c @@ -0,0 +1,283 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <pthread.h> + +#include <glslang/build_info.h> +#include <glslang/Include/glslang_c_interface.h> + +#include "vulkan_spirv.h" +#include "libavutil/mem.h" +#include "libavutil/avassert.h" + +static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER; +static int glslc_refcount = 0; + +static const glslang_resource_t glslc_resource_limits = { + .max_lights = 32, + .max_clip_planes = 6, + .max_texture_units = 32, + .max_texture_coords = 32, + .max_vertex_attribs = 64, + .max_vertex_uniform_components = 4096, + .max_varying_floats = 64, + .max_vertex_texture_image_units = 32, + .max_combined_texture_image_units = 80, + .max_texture_image_units = 32, + .max_fragment_uniform_components = 4096, + .max_draw_buffers = 32, + .max_vertex_uniform_vectors = 128, + .max_varying_vectors = 8, + .max_fragment_uniform_vectors = 16, + .max_vertex_output_vectors = 16, + .max_fragment_input_vectors = 15, + .min_program_texel_offset = -8, + .max_program_texel_offset = 7, + .max_clip_distances = 8, + .max_compute_work_group_count_x = 65535, + .max_compute_work_group_count_y = 65535, + .max_compute_work_group_count_z = 65535, + .max_compute_work_group_size_x = 1024, + .max_compute_work_group_size_y = 1024, + .max_compute_work_group_size_z = 64, + .max_compute_uniform_components = 1024, + .max_compute_texture_image_units = 16, + .max_compute_image_uniforms = 8, + .max_compute_atomic_counters = 8, + .max_compute_atomic_counter_buffers = 1, + .max_varying_components = 60, + .max_vertex_output_components = 64, + .max_geometry_input_components = 64, + .max_geometry_output_components = 128, + .max_fragment_input_components = 128, + .max_image_units = 8, + .max_combined_image_units_and_fragment_outputs = 8, + .max_combined_shader_output_resources = 8, + .max_image_samples = 0, + .max_vertex_image_uniforms = 0, + .max_tess_control_image_uniforms = 0, + .max_tess_evaluation_image_uniforms = 0, + .max_geometry_image_uniforms = 0, + .max_fragment_image_uniforms = 8, + .max_combined_image_uniforms = 8, + .max_geometry_texture_image_units = 16, + .max_geometry_output_vertices = 256, + .max_geometry_total_output_components = 1024, + .max_geometry_uniform_components = 1024, + .max_geometry_varying_components = 64, + .max_tess_control_input_components = 128, + .max_tess_control_output_components = 128, + .max_tess_control_texture_image_units = 16, + .max_tess_control_uniform_components = 1024, + .max_tess_control_total_output_components = 4096, + .max_tess_evaluation_input_components = 128, + .max_tess_evaluation_output_components = 128, + .max_tess_evaluation_texture_image_units = 16, + .max_tess_evaluation_uniform_components = 1024, + .max_tess_patch_components = 120, + .max_patch_vertices = 32, + .max_tess_gen_level = 64, + .max_viewports = 16, + .max_vertex_atomic_counters = 0, + .max_tess_control_atomic_counters = 0, + .max_tess_evaluation_atomic_counters = 0, + .max_geometry_atomic_counters = 0, + .max_fragment_atomic_counters = 8, + .max_combined_atomic_counters = 8, + .max_atomic_counter_bindings = 1, + .max_vertex_atomic_counter_buffers = 0, + .max_tess_control_atomic_counter_buffers = 0, + .max_tess_evaluation_atomic_counter_buffers = 0, + .max_geometry_atomic_counter_buffers = 0, + .max_fragment_atomic_counter_buffers = 1, + .max_combined_atomic_counter_buffers = 1, + .max_atomic_counter_buffer_size = 16384, + .max_transform_feedback_buffers = 4, + .max_transform_feedback_interleaved_components = 64, + .max_cull_distances = 8, + .max_combined_clip_and_cull_distances = 8, + .max_samples = 4, + .max_mesh_output_vertices_nv = 256, + .max_mesh_output_primitives_nv = 512, + .max_mesh_work_group_size_x_nv = 32, + .max_mesh_work_group_size_y_nv = 1, + .max_mesh_work_group_size_z_nv = 1, + .max_task_work_group_size_x_nv = 32, + .max_task_work_group_size_y_nv = 1, + .max_task_work_group_size_z_nv = 1, + .max_mesh_view_count_nv = 4, + .maxDualSourceDrawBuffersEXT = 1, + + .limits = { + .non_inductive_for_loops = 1, + .while_loops = 1, + .do_while_loops = 1, + .general_uniform_indexing = 1, + .general_attribute_matrix_vector_indexing = 1, + .general_varying_indexing = 1, + .general_sampler_indexing = 1, + .general_variable_indexing = 1, + .general_constant_matrix_vector_indexing = 1, + } +}; + +static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, + FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, + void **opaque) +{ + const char *messages; + glslang_shader_t *glslc_shader; + glslang_program_t *glslc_program; + + static const glslang_stage_t glslc_stage[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_STAGE_VERTEX, + [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_STAGE_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_STAGE_COMPUTE, + }; + + const glslang_input_t glslc_input = { + .language = GLSLANG_SOURCE_GLSL, + .stage = glslc_stage[shd->shader.stage], + .client = GLSLANG_CLIENT_VULKAN, + /* GLSLANG_TARGET_VULKAN_1_2 before 11.6 resulted in targeting 1.0 */ +#if (((GLSLANG_VERSION_MAJOR) > 11) || ((GLSLANG_VERSION_MAJOR) == 11 && \ + (((GLSLANG_VERSION_MINOR) > 6) || ((GLSLANG_VERSION_MINOR) == 6 && \ + ((GLSLANG_VERSION_PATCH) > 0))))) + .client_version = GLSLANG_TARGET_VULKAN_1_2, + .target_language_version = GLSLANG_TARGET_SPV_1_5, +#else + .client_version = GLSLANG_TARGET_VULKAN_1_1, + .target_language_version = GLSLANG_TARGET_SPV_1_3, +#endif + .target_language = GLSLANG_TARGET_SPV, + .code = shd->src.str, + .default_version = 460, + .default_profile = GLSLANG_NO_PROFILE, + .force_default_version_and_profile = false, + .forward_compatible = false, + .messages = GLSLANG_MSG_DEFAULT_BIT, + .resource = &glslc_resource_limits, + }; + + av_assert0(glslc_refcount); + + *opaque = NULL; + + if (!(glslc_shader = glslang_shader_create(&glslc_input))) + return AVERROR(ENOMEM); + + if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) { + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); + av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n", + glslang_shader_get_info_log(glslc_shader), + glslang_shader_get_info_debug_log(glslc_shader)); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + if (!glslang_shader_parse(glslc_shader, &glslc_input)) { + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); + av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n", + glslang_shader_get_info_log(glslc_shader), + glslang_shader_get_info_debug_log(glslc_shader)); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + if (!(glslc_program = glslang_program_create())) { + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + glslang_program_add_shader(glslc_program, glslc_shader); + + if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT | + GLSLANG_MSG_VULKAN_RULES_BIT)) { + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); + av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n", + glslang_program_get_info_log(glslc_program), + glslang_program_get_info_debug_log(glslc_program)); + glslang_program_delete(glslc_program); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + glslang_program_SPIRV_generate(glslc_program, glslc_input.stage); + + messages = glslang_program_SPIRV_get_messages(glslc_program); + if (messages) { + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); + av_log(avctx, AV_LOG_WARNING, "%s\n", messages); + } else { + ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE); + } + + glslang_shader_delete(glslc_shader); + + *size = glslang_program_SPIRV_get_size(glslc_program) * sizeof(unsigned int); + *data = (void *)glslang_program_SPIRV_get_ptr(glslc_program); + *opaque = glslc_program; + + return 0; +} + +static void glslc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) +{ + if (!opaque || !*opaque) + return; + + av_assert0(glslc_refcount); + glslang_program_delete(*opaque); + *opaque = NULL; +} + +static void glslc_uninit(FFVkSPIRVCompiler **ctx) +{ + if (!ctx || !*ctx) + return; + + pthread_mutex_lock(&glslc_mutex); + if (glslc_refcount && (--glslc_refcount == 0)) + glslang_finalize_process(); + pthread_mutex_unlock(&glslc_mutex); + + av_freep(ctx); +} + +FFVkSPIRVCompiler *ff_vk_glslang_init(void) +{ + FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); + if (!ret) + return NULL; + + ret->compile_shader = glslc_shader_compile; + ret->free_shader = glslc_shader_free; + ret->uninit = glslc_uninit; + + pthread_mutex_lock(&glslc_mutex); + if (!glslc_refcount++) { + if (!glslang_initialize_process()) { + av_freep(&ret); + glslc_refcount--; + } + } + pthread_mutex_unlock(&glslc_mutex); + + return ret; +} diff --git a/libavutil/vulkan_shaderc.c b/libavutil/vulkan_shaderc.c new file mode 100644 index 0000000000..9e8a3d17ac --- /dev/null +++ b/libavutil/vulkan_shaderc.c @@ -0,0 +1,128 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <shaderc/shaderc.h> + +#include "libavutil/mem.h" +#include "vulkan_spirv.h" + +static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, + FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, + void **opaque) +{ + int loglevel, err, warn, ret; + const char *status, *message; + shaderc_compilation_result_t res; + static const char *shdc_result[] = { + [shaderc_compilation_status_success] = "success", + [shaderc_compilation_status_invalid_stage] = "invalid stage", + [shaderc_compilation_status_compilation_error] = "error", + [shaderc_compilation_status_internal_error] = "internal error", + [shaderc_compilation_status_null_result_object] = "no result", + [shaderc_compilation_status_invalid_assembly] = "invalid assembly", + }; + static const shaderc_shader_kind shdc_kind[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = shaderc_glsl_vertex_shader, + [VK_SHADER_STAGE_FRAGMENT_BIT] = shaderc_glsl_fragment_shader, + [VK_SHADER_STAGE_COMPUTE_BIT] = shaderc_glsl_compute_shader, + }; + + shaderc_compile_options_t opts = shaderc_compile_options_initialize(); + *opaque = NULL; + if (!opts) + return AVERROR(ENOMEM); + + shaderc_compile_options_set_target_env(opts, shaderc_target_env_vulkan, + shaderc_env_version_vulkan_1_2); + shaderc_compile_options_set_target_spirv(opts, shaderc_spirv_version_1_5); + shaderc_compile_options_set_generate_debug_info(opts); + shaderc_compile_options_set_optimization_level(opts, + shaderc_optimization_level_performance); + + res = shaderc_compile_into_spv((shaderc_compiler_t)ctx->priv, + shd->src.str, strlen(shd->src.str), + shdc_kind[shd->shader.stage], + shd->name, entrypoint, opts); + shaderc_compile_options_release(opts); + + ret = shaderc_result_get_compilation_status(res); + err = shaderc_result_get_num_errors(res); + warn = shaderc_result_get_num_warnings(res); + message = shaderc_result_get_error_message(res); + + if (ret != shaderc_compilation_status_success && !err) + err = 1; + + loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE; + + ff_vk_shader_print(avctx, shd, loglevel); + if (message && (err || warn)) + av_log(avctx, loglevel, "%s\n", message); + status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown"; + av_log(avctx, loglevel, "shaderc compile status '%s' (%d errors, %d warnings)\n", + status, err, warn); + + if (err > 0) + return AVERROR(EINVAL); + + *data = (uint8_t *)shaderc_result_get_bytes(res); + *size = shaderc_result_get_length(res); + *opaque = res; + + return 0; +} + +static void shdc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) +{ + if (!opaque || !*opaque) + return; + + shaderc_result_release((shaderc_compilation_result_t)*opaque); + *opaque = NULL; +} + +static void shdc_uninit(FFVkSPIRVCompiler **ctx) +{ + FFVkSPIRVCompiler *s; + + if (!ctx || !*ctx) + return; + + s = *ctx; + + shaderc_compiler_release((shaderc_compiler_t)s->priv); + av_freep(ctx); +} + +FFVkSPIRVCompiler *ff_vk_shaderc_init(void) +{ + FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); + if (!ret) + return NULL; + + ret->compile_shader = shdc_shader_compile; + ret->free_shader = shdc_shader_free; + ret->uninit = shdc_uninit; + + ret->priv = (void *)shaderc_compiler_initialize(); + if (!ret->priv) + av_freep(&ret); + + return ret; +} diff --git a/libavutil/vulkan_spirv.h b/libavutil/vulkan_spirv.h new file mode 100644 index 0000000000..5638cd9696 --- /dev/null +++ b/libavutil/vulkan_spirv.h @@ -0,0 +1,45 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_SPIRV_H +#define AVFILTER_VULKAN_SPIRV_H + +#include "libavutil/vulkan.h" + +#include "vulkan.h" +#include "config.h" + +typedef struct FFVkSPIRVCompiler { + void *priv; + int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, + struct FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, void **opaque); + void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); + void (*uninit)(struct FFVkSPIRVCompiler **ctx); +} FFVkSPIRVCompiler; + +#if CONFIG_LIBGLSLANG +FFVkSPIRVCompiler *ff_vk_glslang_init(void); +#define ff_vk_spirv_init ff_vk_glslang_init +#endif +#if CONFIG_LIBSHADERC +FFVkSPIRVCompiler *ff_vk_shaderc_init(void); +#define ff_vk_spirv_init ff_vk_shaderc_init +#endif + +#endif /* AVFILTER_VULKAN_H */ -- 2.46.0 On Wed, Aug 14, 2024 at 12:19 PM Petro Mozil <mozil.petryk@gmail.com> wrote: > Signed-off-by: Petro Mozil <mozil.petryk@gmail.com> > --- > libavcodec/Makefile | 1 + > libavcodec/hwaccels.h | 1 + > libavcodec/vulkan_dirac.c | 3615 +++++++++++++++++++++++++++++++++++++ > 3 files changed, 3617 insertions(+) > create mode 100644 libavcodec/vulkan_dirac.c > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index 262d0a3d3e..85af31f4f0 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -1001,6 +1001,7 @@ OBJS-$(CONFIG_AV1_NVDEC_HWACCEL) += > nvdec_av1.o > OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o > OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o > OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o > +OBJS-$(CONFIG_DIRAC_VULKAN_HWACCEL) += vulkan_dirac.o > OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o > OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o > OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o > diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h > index 5171e4c7d7..f6d148b169 100644 > --- a/libavcodec/hwaccels.h > +++ b/libavcodec/hwaccels.h > @@ -27,6 +27,7 @@ extern const struct FFHWAccel ff_av1_nvdec_hwaccel; > extern const struct FFHWAccel ff_av1_vaapi_hwaccel; > extern const struct FFHWAccel ff_av1_vdpau_hwaccel; > extern const struct FFHWAccel ff_av1_vulkan_hwaccel; > +extern const struct FFHWAccel ff_dirac_vulkan_hwaccel; > extern const struct FFHWAccel ff_h263_vaapi_hwaccel; > extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel; > extern const struct FFHWAccel ff_h264_d3d11va_hwaccel; > diff --git a/libavcodec/vulkan_dirac.c b/libavcodec/vulkan_dirac.c > new file mode 100644 > index 0000000000..bb8b75957d > --- /dev/null > +++ b/libavcodec/vulkan_dirac.c > @@ -0,0 +1,3615 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA > + */ > + > +#include "diracdec.h" > +#include "vulkan.h" > +#include "vulkan_decode.h" > +#include "libavfilter/vulkan_spirv.h" > +#include "libavutil/vulkan_loader.h" > +#include "libavcodec/pthread_internal.h" > +#include "libavcodec/dirac_vlc.c" > + > + > +typedef struct SubbandOffset { > + int32_t base_off; > + int32_t stride; > + int32_t pad0; > + int32_t pad1; > +} SubbandOffset; > + > +typedef struct SliceCoeffVk { > + int32_t left; > + int32_t top; > + int32_t tot_h; > + int32_t tot_v; > + int32_t tot; > + int32_t offs; > + int32_t pad0; > + int32_t pad1; > +} SliceCoeffVk; > + > +typedef struct WaveletPushConst { > + int real_plane_dims[6]; > + int plane_offs[3]; > + int plane_strides[3]; > + int dw[3]; > + int wavelet_depth; > +} WaveletPushConst; > + > +typedef struct DiracVulkanDecodeContext { > + FFVulkanContext vkctx; > + VkSamplerYcbcrConversion yuv_sampler; > + VkSampler sampler; > + > + FFVulkanPipeline vert_wavelet_pl[9]; > + FFVkSPIRVShader vert_wavelet_shd[9]; > + > + FFVulkanPipeline horiz_wavelet_pl[9]; > + FFVkSPIRVShader horiz_wavelet_shd[9]; > + > + FFVulkanPipeline cpy_to_image_pl[3]; > + FFVkSPIRVShader cpy_to_image_shd[3]; > + > + FFVulkanPipeline quant_pl; > + FFVkSPIRVShader quant_shd; > + > + FFVkQueueFamilyCtx qf; > + FFVkExecPool exec_pool; > + > + int quant_val_buf_size; > + int thread_buf_size; > + int32_t *quant_val_buf_vk_ptr; > + FFVkBuffer *quant_val_buf; > + AVBufferRef *av_quant_val_buf; > + size_t quant_val_buf_offs; > + > + int n_slice_bufs; > + int slice_buf_size; > + SliceCoeffVk *slice_buf_vk_ptr; > + FFVkBuffer *quant_buf; > + AVBufferRef *av_quant_buf; > + size_t quant_buf_offs; > + > + int32_t *quant_buf_vk_ptr; > + int quant_buf_size; > + FFVkBuffer *slice_buf; > + AVBufferRef *av_slice_buf; > + size_t slice_buf_offs; > + > + FFVkBuffer tmp_buf; > + FFVkBuffer tmp_interleave_buf; > + > + FFVkBuffer subband_info; > + SubbandOffset *subband_info_ptr; > + > + int slice_vals_size; > + > + WaveletPushConst pConst; > +} DiracVulkanDecodeContext; > + > +typedef struct DiracVulkanDecodePicture { > + DiracFrame *frame; > +} DiracVulkanDecodePicture; > + > +static void free_common(AVCodecContext *avctx) > +{ > + DiracVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; > + DiracContext *ctx = avctx->priv_data; > + FFVulkanContext *s = &dec->vkctx; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + > + if (ctx->hwaccel_picture_private) { > + av_free(ctx->hwaccel_picture_private); > + } > + > + /* Wait on and free execution pool */ > + if (dec->exec_pool.cmd_bufs) { > + ff_vk_exec_pool_free(s, &dec->exec_pool); > + } > + > + ff_vk_pipeline_free(s, &dec->quant_pl); > + ff_vk_shader_free(s, &dec->quant_shd); > + > + for (int i = 0; i < 3; i++) { > + ff_vk_pipeline_free(s, &dec->cpy_to_image_pl[i]); > + ff_vk_shader_free(s, &dec->cpy_to_image_shd[i]); > + } > + > + for (int i = 0; i < 9; i++) { > + ff_vk_pipeline_free(s, &dec->vert_wavelet_pl[i]); > + ff_vk_shader_free(s, &dec->vert_wavelet_shd[i]); > + > + ff_vk_pipeline_free(s, &dec->horiz_wavelet_pl[i]); > + ff_vk_shader_free(s, &dec->horiz_wavelet_shd[i]); > + } > + // TODO: Add freeing all pipelines and shaders for wavelets > + // > + > + // if (dec->yuv_sampler) > + // vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, > dec->yuv_sampler, > + // s->hwctx->alloc); > + if (dec->sampler) > + vk->DestroySampler(s->hwctx->act_dev, dec->sampler, > s->hwctx->alloc); > + > + av_buffer_unref(&dec->av_quant_val_buf); > + av_buffer_unref(&dec->av_quant_buf); > + av_buffer_unref(&dec->av_slice_buf); > + av_buffer_unref(&dec->av_slice_buf); > + > + ff_vk_free_buf(&dec->vkctx, &dec->subband_info); > + > + ff_vk_free_buf(&dec->vkctx, &dec->tmp_buf); > + ff_vk_free_buf(&dec->vkctx, &dec->tmp_interleave_buf); > + > + ff_vk_uninit(s); > +} > + > +static av_always_inline inline void bar_read(VkBufferMemoryBarrier2 > *buf_bar, > + int *nb_buf_bar, > + FFVkBuffer *buf) { > + buf_bar[(*nb_buf_bar)++] = (VkBufferMemoryBarrier2) { > + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, > + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, > + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, > + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .buffer = buf->buf, > + .size = buf->size, > + .offset = 0, > + }; > +} > + > +static av_always_inline inline void bar_write(VkBufferMemoryBarrier2 > *buf_bar, > + int *nb_buf_bar, > + FFVkBuffer *buf) { > + buf_bar[(*nb_buf_bar)++] = (VkBufferMemoryBarrier2) { > + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, > + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, > + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, > + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .buffer = buf->buf, > + .size = buf->size, > + .offset = 0, > + }; > +} > + > +static inline int alloc_tmp_bufs(DiracContext *ctx, > DiracVulkanDecodeContext *dec) { > + int err, plane_size; > + > + plane_size = sizeof(int32_t) * > + (ctx->plane[0].idwt.width * ctx->plane[0].idwt.height + > + ctx->plane[1].idwt.width * ctx->plane[1].idwt.height + > + ctx->plane[2].idwt.width * ctx->plane[2].idwt.height); > + > + if (dec->tmp_buf.buf != NULL) { > + ff_vk_free_buf(&dec->vkctx, &dec->tmp_buf); > + ff_vk_free_buf(&dec->vkctx, &dec->tmp_interleave_buf); > + } > + > + err = ff_vk_create_buf(&dec->vkctx, &dec->tmp_buf, plane_size, > + NULL, > + NULL, > + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | > + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, > + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); > + if (err < 0) > + return err; > + > + err = ff_vk_create_buf(&dec->vkctx, &dec->tmp_interleave_buf, > plane_size, > + NULL, > + NULL, > + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | > + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, > + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); > + if (err < 0) > + return err; > + > + return 0; > +} > + > +static inline int alloc_host_mapped_buf(DiracVulkanDecodeContext *dec, > size_t req_size, > + void **mem, AVBufferRef **avbuf, > FFVkBuffer **buf) { > + // FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + // VkResult ret; > + int err; > + > + err = ff_vk_create_avbuf(&dec->vkctx, avbuf, req_size, > + NULL, > + NULL, > + // &create_desc, > + // &import_desc, > + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | > + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, > + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | > + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); > + if (err < 0) > + return err; > + > + *buf = (FFVkBuffer*)(*avbuf)->data; > + err = ff_vk_map_buffer(&dec->vkctx, *buf, > + (uint8_t **)mem, 0); > + if (err < 0) > + return err; > + > + return 0; > +} > + > +static int alloc_slices_buf(DiracContext *ctx, DiracVulkanDecodeContext > *dec) { > + int err, length = ctx->num_y * ctx->num_x; > + > + dec->n_slice_bufs = length; > + > + if (dec->slice_buf_vk_ptr) { > + av_buffer_unref(&dec->av_slice_buf); > + } > + > + dec->slice_buf_size = sizeof(SliceCoeffVk) * length * 3 * > MAX_DWT_LEVELS; > + err = alloc_host_mapped_buf(dec, dec->slice_buf_size, > + (void **)&dec->slice_buf_vk_ptr, > + &dec->av_slice_buf, > + &dec->slice_buf); > + if (err < 0) > + return err; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, &dec->quant_pl, > + NULL, 1, 2, 0, > + dec->slice_buf->address, > + dec->slice_buf->size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + return err; > + > + return 0; > +} > + > +static int alloc_dequant_buf(DiracContext *ctx, DiracVulkanDecodeContext > *dec) { > + int err, length = ctx->num_y * ctx->num_x; > + > + if (dec->quant_buf_vk_ptr) { > + av_buffer_unref(&dec->av_quant_buf); > + } > + > + dec->n_slice_bufs = length; > + > + dec->quant_buf_size = sizeof(int32_t) * MAX_DWT_LEVELS * 8 * length; > + err = alloc_host_mapped_buf(dec, dec->quant_buf_size, > + (void **)&dec->quant_buf_vk_ptr, > + &dec->av_quant_buf, > + &dec->quant_buf); > + if (err < 0) > + return err; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, &dec->quant_pl, > + NULL, 1, 1, 0, > + dec->quant_buf->address, > + dec->quant_buf->size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + return err; > + > + return 0; > +} > + > +static int subband_coeffs(const DiracContext *s, int x, int y, int p, int > off, > + SliceCoeffVk *c) > +{ > + int level, coef = 0; > + for (level = 0; level <= s->wavelet_depth; level++) { > + SliceCoeffVk *o = &c[level]; > + const SubBand *b = &s->plane[p].band[level][3]; /* orientation > doens't matter */ > + o->top = b->height * y / s->num_y; > + o->left = b->width * x / s->num_x; > + o->tot_h = ((b->width * (x + 1)) / s->num_x) - o->left; > + o->tot_v = ((b->height * (y + 1)) / s->num_y) - o->top; > + o->tot = o->tot_h * o->tot_v; > + o->offs = off + coef; > + coef += o->tot * (4 - !!level); > + } > + return coef; > +} > + > +static int alloc_quant_buf(DiracContext *ctx, DiracVulkanDecodeContext > *dec) { > + int err, length = ctx->num_y * ctx->num_x, coef_buf_size; > + SliceCoeffVk tmp[MAX_DWT_LEVELS]; > + coef_buf_size = subband_coeffs(ctx, ctx->num_x - 1, ctx->num_y - 1, > 0, 0, tmp) + 8; > + coef_buf_size = coef_buf_size + 512; > + dec->slice_vals_size = coef_buf_size; > + coef_buf_size *= sizeof(int32_t); > + > + if (dec->quant_val_buf_vk_ptr) { > + av_buffer_unref(&dec->av_quant_val_buf); > + } > + > + dec->thread_buf_size = coef_buf_size; > + > + dec->quant_val_buf_size = dec->thread_buf_size * 3 * length; > + err = alloc_host_mapped_buf(dec, dec->quant_val_buf_size, > + (void **)&dec->quant_val_buf_vk_ptr, > + &dec->av_quant_val_buf, > + &dec->quant_val_buf); > + if (err < 0) > + return err; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, &dec->quant_pl, > + NULL, 1, 0, 0, > + dec->quant_val_buf->address, > + dec->quant_val_buf->size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + return err; > + > + return 0; > +} > + > +/* ----- Copy Shader init and pipeline pass ----- */ > + > +static int init_cpy_shd(DiracVulkanDecodeContext *s, FFVkSPIRVCompiler > *spv, int idx) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->cpy_to_image_shd[idx]; > + FFVulkanPipeline *pl = &s->cpy_to_image_pl[idx]; > + FFVkExecPool *exec = &s->exec_pool; > + const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); > + > + RET(ff_vk_shader_init(pl, shd, "cpy_to_image", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->cpy_to_image_shd[idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_debug_printf : enable); > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_layout = "std430", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_img", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, > + .mem_quali = "writeonly", > + .mem_layout = ff_vk_shader_rep_fmt(vkctx->output_format), > + /*.mem_layout = "rgba32f",*/ > + .dimensions = 2, > + .elems = planes, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, int plane = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, if (!IS_WITHIN(ivec2(x, y), imageSize(out_img[plane]))) > return; ); > + GLSLC(1, int idx = plane_offs[plane] + y * plane_strides[plane] + > x; ); > + if (idx == 2) { > + GLSLC(1, int32_t ival = inBuf[idx] + 2048; > ); > + GLSLC(1, float val = float(clamp(ival, 0, 4096)) / 65535.0; > ); > + } else if (idx == 1) { > + GLSLC(1, int32_t ival = inBuf[idx] + 512; > ); > + GLSLC(1, float val = float(clamp(ival, 0, 1024)) / 65535.0; > ); > + } else { > + GLSLC(1, int32_t ival = inBuf[idx] + 128; > ); > + GLSLC(1, float val = float(clamp(ival, 0, 256)) / 256.0; > ); > + } > + GLSLC(1, imageStore(out_img[plane], ivec2(x, y), vec4(val)); > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static av_always_inline int inline > cpy_to_image_pass(DiracVulkanDecodeContext *dec, > + DiracContext *ctx, > + FFVkExecContext *exec, > + VkImageView *views, > + VkBufferMemoryBarrier2 *buf_bar, > + int *nb_buf_bar, > + VkImageMemoryBarrier2 *img_bar, > + int *nb_img_bar, int idx) { > + int err, prev_nb_bar = *nb_buf_bar, prev_nb_img_bar = *nb_img_bar; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + DiracVulkanDecodePicture *pic = ctx->hwaccel_picture_private; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, > &dec->cpy_to_image_pl[idx], > + exec, 0, 0, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + return err; > + > + ff_vk_update_descriptor_img_array(&dec->vkctx, > &dec->cpy_to_image_pl[idx], > + exec, pic->frame->avframe, views, > 0, 1, > + VK_IMAGE_LAYOUT_GENERAL, > + dec->sampler); > + > + dec->pConst.real_plane_dims[0] = ctx->plane[0].idwt.width; > + dec->pConst.real_plane_dims[1] = ctx->plane[0].idwt.height; > + dec->pConst.real_plane_dims[2] = ctx->plane[1].idwt.width; > + dec->pConst.real_plane_dims[3] = ctx->plane[1].idwt.height; > + dec->pConst.real_plane_dims[4] = ctx->plane[2].idwt.width; > + dec->pConst.real_plane_dims[5] = ctx->plane[2].idwt.height; > + > + dec->pConst.plane_strides[0] = ctx->plane[0].idwt.width; > + dec->pConst.plane_strides[1] = ctx->plane[1].idwt.width; > + dec->pConst.plane_strides[2] = ctx->plane[2].idwt.width; > + > + dec->pConst.plane_offs[0] = 0; > + dec->pConst.plane_offs[1] = ctx->plane[0].idwt.width * > ctx->plane[0].idwt.height; > + dec->pConst.plane_offs[2] = dec->pConst.plane_offs[1] > + + ctx->plane[1].idwt.width * ctx->plane[1].idwt.height; > + > + ff_vk_update_push_exec(&dec->vkctx, exec, &dec->cpy_to_image_pl[idx], > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + > + ff_vk_frame_barrier(&dec->vkctx, exec, pic->frame->avframe, > + img_bar, nb_img_bar, > + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, > + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + VK_ACCESS_SHADER_READ_BIT, > + VK_IMAGE_LAYOUT_GENERAL, > + VK_QUEUE_FAMILY_IGNORED); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + prev_nb_bar, > + .bufferMemoryBarrierCount = *nb_buf_bar - prev_nb_bar, > + .pImageMemoryBarriers = img_bar + prev_nb_img_bar, > + .imageMemoryBarrierCount = *nb_img_bar - prev_nb_img_bar, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, > &dec->cpy_to_image_pl[idx]); > + > + vk->CmdDispatch(exec->buf, > + ctx->plane[0].width >> 3, > + ctx->plane[0].height >> 3, > + 1); > + > + return 0; > +} > + > +/* ----- LeGall Wavelet init and pipeline pass ----- */ > + > +static const char get_idx[] = { > + C(0, int getIdx(int plane, int x, int y) { > ) > + C(1, return plane_offs[plane] + plane_strides[plane] * y + x; > ) > + C(0, } > ) > +}; > + > +static const char legall_low_y[] = { > + C(0, int32_t legall_low_y(int plane, int x, int y) { > ) > + C(1, const int h = plane_sizes[plane].y; > ) > + C(1, > ) > + C(1, const int y_1 = ((y - 1) > 0) ? (y - 1) : 1; > ) > + C(1, const int32_t val_1 = inBuf[getIdx(plane, x, y_1)]; > ) > + C(1, const int y0 = y; > ) > + C(1, const int32_t val0 = inBuf[getIdx(plane, x, y0)]; > ) > + C(1, const int y1 = y + 1; > ) > + C(1, const int32_t val1 = inBuf[getIdx(plane, x, y1)]; > ) > + C(1, return val0 - ((val1 + val_1 + 2) >> 2); > ) > + C(0, } > ) > +}; > + > +static const char legall_high[] = { > + C(0, int32_t legall_high(int32_t v1, int32_t v2, int32_t v3) { > ) > + C(1, return v1 + ((v2 + v3 + 1) >> 1); > ) > + C(0, } > ) > +}; > + > +static const char legall_vert[] = { > + C(0, void idwt_vert(int plane, int x, int y) { > ) > + C(1, const int h = plane_sizes[plane].y; > ) > + C(1, > ) > + C(1, const int32_t out0 = legall_low_y(plane, x, y); > ) > + C(1, const int32_t yy = ((y + 2) < h) ? (y + 2) : (h - 2); > ) > + C(1, const int32_t tmp1 = legall_low_y(plane, x, yy); > ) > + C(1, > ) > + C(1, const int y1 = y + 1; > ) > + C(1, const int32_t val1 = inBuf[getIdx(plane, x, y1)]; > ) > + C(1, > ) > + C(1, const int32_t out1 = legall_high(val1, out0, tmp1); > ) > + C(1, > ) > + C(1, outBuf[getIdx(plane, x, y)] = out0; > ) > + C(1, outBuf[getIdx(plane, x, y + 1)] = out1; > ) > + C(0, } > ) > +}; > + > +static const char legall_low_x[] = { > + C(0, int32_t legall_low_x(int plane, int x, int y) { > ) > + C(1, const int w = plane_sizes[plane].x; > ) > + C(1, const int dw = w / 2; > ) > + C(1, > ) > + C(1, const int x_1 = (x > 0) ? x : 0; > ) > + C(1, const int32_t val_1 = inBuf[getIdx(plane, x_1, y)]; > ) > + C(1, > ) > + C(1, const int x1 = (x > 0) ? (x + dw) : dw; > ) > + C(1, const int32_t val1 = inBuf[getIdx(plane, x1, y)]; > ) > + C(1, > ) > + C(1, const int x0 = (x > 0) ? (x + dw - 1) : dw; > ) > + C(1, const int32_t val0 = inBuf[getIdx(plane, x0, y)]; > ) > + C(1, > ) > + C(1, return val_1 - ((val0 + val1 + 2) >> 2); > ) > + C(0, } > ) > +}; > + > +static const char legall_horiz[] = { > + C(0, void idwt_horiz(int plane, int x, int y) { > ) > + C(1, const int w = plane_sizes[plane].x; > ) > + C(1, const int dw = w / 2 - 1; > ) > + C(1, > ) > + C(1, const int32_t out0 = legall_low_x(plane, x, y); > ) > + C(1, const int32_t tmp1 = (x == dw) ? out0 : legall_low_x(plane, x > + 1, y); ) > + C(1, > ) > + C(1, const int x1 = x + dw + 1; > ) > + C(1, const int32_t val1 = inBuf[getIdx(plane, x1, y)]; > ) > + C(1, > ) > + C(1, const int32_t out1 = legall_high(val1, out0, tmp1); > ) > + C(1, > ) > + C(1, outBuf[getIdx(plane, 2 * x, y)] = (out0 + 1) >> 1; > ) > + C(1, outBuf[getIdx(plane, 2 * x + 1, y)] = (out1 + 1) >> 1; > ) > + C(0, } > ) > +}; > + > +static int init_wavelet_shd_legall_vert(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_LEGALL5_3; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->vert_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->vert_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "legall_vert", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->vert_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(legall_low_y); > + GLSLD(legall_high); > + GLSLD(legall_vert); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; 2 * y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; x < w; x += off_x) { > ); > + GLSLC(3, idwt_vert(pic_z, x, 2 * y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static int init_wavelet_shd_legall_horiz(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_LEGALL5_3; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->horiz_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->horiz_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "legall_horiz", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->horiz_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_debug_printf : enable); > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(legall_low_x); > + GLSLD(legall_high); > + GLSLD(legall_horiz); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; 2 * x < w; x += off_x) { > ); > + GLSLC(3, idwt_horiz(pic_z, x, y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static av_always_inline int inline > wavelet_legall_pass(DiracVulkanDecodeContext *dec, > + DiracContext *ctx, > + FFVkExecContext *exec, > + VkBufferMemoryBarrier2 *buf_bar, > + int *nb_buf_bar) { > + int err; > + int barrier_num = *nb_buf_bar; > + int wavelet_idx = DWT_DIRAC_LEGALL5_3; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + > + FFVulkanPipeline *pl_hor = &dec->horiz_wavelet_pl[wavelet_idx]; > + FFVulkanPipeline *pl_vert = &dec->vert_wavelet_pl[wavelet_idx]; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 0, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 1, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 0, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 1, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + > + for (int i = ctx->wavelet_depth - 1; i >= 0; i--) { > + dec->pConst.plane_strides[0] = ctx->plane[0].idwt.width << i; > + dec->pConst.plane_strides[1] = ctx->plane[1].idwt.width << i; > + dec->pConst.plane_strides[2] = ctx->plane[2].idwt.width << i; > + > + dec->pConst.dw[0] = ctx->plane[0].idwt.width >> (i + 1); > + dec->pConst.dw[1] = ctx->plane[1].idwt.width >> (i + 1); > + dec->pConst.dw[2] = ctx->plane[2].idwt.width >> (i + 1); > + > + dec->pConst.real_plane_dims[0] = (ctx->plane[0].idwt.width) >> i; > + dec->pConst.real_plane_dims[1] = (ctx->plane[0].idwt.height) >> i; > + dec->pConst.real_plane_dims[2] = (ctx->plane[1].idwt.width) >> i; > + dec->pConst.real_plane_dims[3] = (ctx->plane[1].idwt.height) >> i; > + dec->pConst.real_plane_dims[4] = (ctx->plane[2].idwt.width) >> i; > + dec->pConst.real_plane_dims[5] = (ctx->plane[2].idwt.height) >> i; > + > + /* Vertical wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_vert, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_vert); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 3, > + dec->pConst.real_plane_dims[1] >> 4, > + 1); > + > + /* Horizontal wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_hor, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_hor); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 4, > + dec->pConst.real_plane_dims[1] >> 3, > + 1); > + } > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + return 0; > +fail: > + ff_vk_exec_discard_deps(&dec->vkctx, exec); > + return err; > +} > + > +/* ----- Fidelity init and pipeline pass ----- */ > + > +static const char fidelity_low[] = { > + C(0, int32_t fidelity_low(int32_t v0, int32_t v1, int32_t v2, > int32_t v3, int32_t v4, int32_t v5, int32_t v6, int32_t v7) {) > + C(1, return (-2 * v0 + 10 * v1 - 25 * v2 + 81 * v3 + 81 * v4 > - 25 * v5 + 10 * v6 - 2 * v7 + 128) >> 8;) > + C(0, } > ) > +}; > + > +static const char fidelity_high[] = { > + C(0, int32_t fidelity_high(int32_t v0, int32_t v1, int32_t v2, > int32_t v3, int32_t v4, int32_t v5, int32_t v6, int32_t v7) {) > + C(1, return (-8 * v0 + 21 * v1 - 46 * v2 + 161 * v3 + 161 * > v4 - 46 * v5 + 21 * v6 - 8 * v7 + 128) >> 8;) > + C(0, } > ) > +}; > + > +static const char fidelity_low_y[] = { > + C(0, int32_t fidelity_low_y(int plane, int x, int y) { > ) > + C(1, const int h = plane_sizes[plane].y; > ) > + C(1, > ) > + C(1, const int32_t v1 = inBuf[getIdx(plane, x, y + 1)]; > ) > + C(1, > ) > + C(1, const int y_6 = ((y - 6) > 0) ? (y - 6) : 0; > ) > + C(1, const int32_t v_6 = inBuf[getIdx(plane, x, y_6)]; > ) > + C(1, > ) > + C(1, const int y_4 = ((y - 4) > 0) ? (y - 4) : 0; > ) > + C(1, const int32_t v_4 = inBuf[getIdx(plane, x, y_4)]; > ) > + C(1, > ) > + C(1, const int y_2 = ((y - 2) > 0) ? (y - 2) : 0; > ) > + C(1, const int32_t v_2 = inBuf[getIdx(plane, x, y_2)]; > ) > + C(1, > ) > + C(1, const int32_t v0 = inBuf[getIdx(plane, x, y)]; > ) > + C(1, > ) > + C(1, const int y2 = ((y + 2) < h) ? (y + 2) : (h - 2); > ) > + C(1, const int32_t v2 = inBuf[getIdx(plane, x, y2)]; > ) > + C(1, > ) > + C(1, const int y4 = ((y + 4) < h) ? (y + 4) : (h - 2); > ) > + C(1, const int32_t v4 = inBuf[getIdx(plane, x, y4)]; > ) > + C(1, > ) > + C(1, const int y6 = ((y + 6) < h) ? (y + 6) : (h - 2); > ) > + C(1, const int32_t v6 = inBuf[getIdx(plane, x, y6)]; > ) > + C(1, > ) > + C(1, const int y8 = ((y + 8) < h) ? (y + 8) : (h - 2); > ) > + C(1, const int32_t v8 = inBuf[getIdx(plane, x, y8)]; > ) > + C(1, > ) > + C(1, return v1 + fidelity_low(v_6, v_4, v_2, v0, v2, v4, v6, v8); > ) > + C(0, } > ) > +}; > + > +static const char fidelity_vert[] = { > + C(0, void idwt_vert(int plane, int x, int y) { > ) > + C(1, const int h = plane_sizes[plane].y; > ) > + C(1, > ) > + C(1, const int32_t v0 = inBuf[getIdx(plane, x, y)]; > ) > + C(1, const int32_t v1 = fidelity_low_y(plane, x, y); > ) > + C(1, const int32_t v_7 = (y - 8 > 0) ? fidelity_low_y(plane, x, y > - 8) : v1; ) > + C(1, const int32_t v_5 = (y - 6 > 0) ? fidelity_low_y(plane, x, y > - 6) : v1; ) > + C(1, const int32_t v_3 = (y - 4 > 0) ? fidelity_low_y(plane, x, y > - 4) : v1; ) > + C(1, const int32_t v_1 = (y - 2 > 0) ? fidelity_low_y(plane, x, y > - 2) : v1; ) > + C(1, const int32_t v3 = (y + 2 < h) ? fidelity_low_y(plane, x, y > + 2) : ) > + C(1, fidelity_low_y(plane, x, > h - 2); ) > + C(1, const int32_t v5 = (y + 4 < h) ? fidelity_low_y(plane, x, y > + 4) : ) > + C(1, fidelity_low_y(plane, x, > h - 2); ) > + C(1, const int32_t v7 = (y + 6 < h) ? fidelity_low_y(plane, x, y > + 6) : ) > + C(1, fidelity_low_y(plane, x, > h - 2); ) > + C(1, outBuf[getIdx(plane, x, y)] = v0 - fidelity_high(v_7, v_5, > v_3, v_1, v1, v3, v5, v7);) > + C(1, outBuf[getIdx(plane, x, y + 1)] = v1; > ) > + C(0, } > ) > +}; > + > +static const char fidelity_low_x[] = { > + C(0, int32_t fidelity_low_x(int plane, int x, int y) { > ) > + C(1, const int w = plane_sizes[plane].x; > ) > + C(1, const int dw = w / 2 - 1; > ) > + C(1, > ) > + C(1, const int x_3 = clamp(x - 3, 0, dw); > ) > + C(1, const int32_t v_3 = inBuf[getIdx(plane, x_3, y)]; > ) > + C(1, > ) > + C(1, const int x_2 = clamp(x - 2, 0, dw); > ) > + C(1, const int32_t v_2 = inBuf[getIdx(plane, x_2, y)]; > ) > + C(1, > ) > + C(1, const int x_1 = clamp(x - 1, 0, dw); > ) > + C(1, const int32_t v_1 = inBuf[getIdx(plane, x_1, y)]; > ) > + C(1, > ) > + C(1, const int32_t v0 = inBuf[getIdx(plane, x, y)]; > ) > + C(1, > ) > + C(1, const int x_w = x + dw + 1; > ) > + C(1, const int32_t v_w = inBuf[getIdx(plane, x_w, y)]; > ) > + C(1, > ) > + C(1, const int x1 = clamp(x + 1, 0, dw); > ) > + C(1, const int32_t v1 = inBuf[getIdx(plane, x1, y)]; > ) > + C(1, > ) > + C(1, const int x2 = clamp(x + 2, 0, dw); > ) > + C(1, const int32_t v2 = inBuf[getIdx(plane, x2, y)]; > ) > + C(1, > ) > + C(1, const int x3 = clamp(x + 3, 0, dw); > ) > + C(1, const int32_t v3 = inBuf[getIdx(plane, x3, y)]; > ) > + C(1, > ) > + C(1, const int x4 = clamp(x + 4, 0, dw); > ) > + C(1, const int32_t v4 = inBuf[getIdx(plane, x4, y)]; > ) > + C(1, > ) > + C(1, return v_w + fidelity_low(v_3, v_2, v_1, v0, v1, v2, v3, > v4); ) > + C(0, } > ) > +}; > + > +static const char fidelity_horiz[] = { > + C(0, void idwt_horiz(int plane, int x, int y) { > ) > + C(1, const int w = plane_sizes[plane].x; > ) > + C(1, const int dw = w / 2 - 1; > ) > + C(1, > ) > + C(1, const int32_t vo0 = inBuf[getIdx(plane, x, y)]; > ) > + C(1, > ) > + C(1, const int x_4 = clamp(x - 4, 0, dw); > ) > + C(1, const int32_t v_4 = fidelity_low_x(plane, x_4, y); > ) > + C(1, const int x_3 = clamp(x - 3, 0, dw); > ) > + C(1, const int32_t v_3 = fidelity_low_x(plane, x_3, y); > ) > + C(1, const int x_2 = clamp(x - 2, 0, dw); > ) > + C(1, const int32_t v_2 = fidelity_low_x(plane, x_2, y); > ) > + C(1, const int x_1 = clamp(x - 1, 0, dw); > ) > + C(1, const int32_t v_1 = fidelity_low_x(plane, x_1, y); > ) > + C(1, const int x0 = clamp(x, 0, dw); > ) > + C(1, const int32_t v0 = fidelity_low_x(plane, x0, y); > ) > + C(1, const int x1 = clamp(x + 1, 0, dw); > ) > + C(1, const int32_t v1 = fidelity_low_x(plane, x1, y); > ) > + C(1, const int x2 = clamp(x + 2, 0, dw); > ) > + C(1, const int32_t v2 = fidelity_low_x(plane, x2, y); > ) > + C(1, const int x3 = clamp(x + 3, 0, dw); > ) > + C(1, const int32_t v3 = fidelity_low_x(plane, x3, y); > ) > + C(1, > ) > + C(1, outBuf[getIdx(plane, 2 * x, y)] = vo0 - fidelity_high(v_4, > v_3, v_2, v_1, v0, v1, v2, v3);) > + C(1, outBuf[getIdx(plane, 2 * x + 1, y)] = v0; > ) > + C(0, } > ) > +}; > + > +static int init_wavelet_shd_fidelity_vert(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_FIDELITY; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->vert_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->vert_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "fidelity_vert", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->vert_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(fidelity_low); > + GLSLD(fidelity_high); > + GLSLD(fidelity_low_y); > + GLSLD(fidelity_vert); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; 2 * y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; x < w; x += off_x) { > ); > + GLSLC(3, idwt_vert(pic_z, x, 2 * y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static int init_wavelet_shd_fidelity_horiz(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_FIDELITY; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->horiz_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->horiz_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "fidelity_horiz", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->horiz_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_debug_printf : enable); > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(fidelity_low); > + GLSLD(fidelity_high); > + GLSLD(fidelity_low_x); > + GLSLD(fidelity_horiz); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; 2 * x < w; x += off_x) { > ); > + GLSLC(3, idwt_horiz(pic_z, x, y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static av_always_inline int inline > wavelet_fidelity_pass(DiracVulkanDecodeContext *dec, > + DiracContext *ctx, > + FFVkExecContext *exec, > + VkBufferMemoryBarrier2 *buf_bar, > + int *nb_buf_bar) { > + int err; > + int barrier_num = *nb_buf_bar; > + int wavelet_idx = DWT_DIRAC_FIDELITY; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + > + FFVulkanPipeline *pl_hor = &dec->horiz_wavelet_pl[wavelet_idx]; > + FFVulkanPipeline *pl_vert = &dec->vert_wavelet_pl[wavelet_idx]; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 0, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 1, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 0, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 1, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + > + for (int i = ctx->wavelet_depth - 1; i >= 0; i--) { > + dec->pConst.plane_strides[0] = ctx->plane[0].idwt.width << i; > + dec->pConst.plane_strides[1] = ctx->plane[1].idwt.width << i; > + dec->pConst.plane_strides[2] = ctx->plane[2].idwt.width << i; > + > + dec->pConst.dw[0] = ctx->plane[0].idwt.width >> (i + 1); > + dec->pConst.dw[1] = ctx->plane[1].idwt.width >> (i + 1); > + dec->pConst.dw[2] = ctx->plane[2].idwt.width >> (i + 1); > + > + dec->pConst.real_plane_dims[0] = (ctx->plane[0].idwt.width) >> i; > + dec->pConst.real_plane_dims[1] = (ctx->plane[0].idwt.height) >> i; > + dec->pConst.real_plane_dims[2] = (ctx->plane[1].idwt.width) >> i; > + dec->pConst.real_plane_dims[3] = (ctx->plane[1].idwt.height) >> i; > + dec->pConst.real_plane_dims[4] = (ctx->plane[2].idwt.width) >> i; > + dec->pConst.real_plane_dims[5] = (ctx->plane[2].idwt.height) >> i; > + > + /* Vertical wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_vert, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_vert); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 3, > + dec->pConst.real_plane_dims[1] >> 4, > + 1); > + > + /* Horizontal wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_hor, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_hor); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 4, > + dec->pConst.real_plane_dims[1] >> 3, > + 1); > + } > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + return 0; > +fail: > + ff_vk_exec_discard_deps(&dec->vkctx, exec); > + return err; > +} > + > +/* ----- Daubechies(9, 7) init and pipeline pass ----- */ > + > +static const char daub97_low1[] = { > + C(0, int32_t daub97_low1(int32_t v1, int32_t v2, int32_t v3) { ) > + C(1, return v2 - ((1817 * (v1 + v2) + 2048) >> 12); ) > + C(0, } ) > +}; > + > +static const char daub97_high1[] = { > + C(0, int32_t daub97_high1(int32_t v1, int32_t v2, int32_t v3) { ) > + C(1, return v2 - ((113 * (v1 + v2) + 64) >> 7); ) > + C(0, } ) > +}; > + > +static const char daub97_low0[] = { > + C(0, int32_t daub97_low0(int32_t v1, int32_t v2, int32_t v3) { ) > + C(1, return v2 - ((217 * (v1 + v2) + 2048) >> 12); ) > + C(0, } ) > +}; > + > +static const char daub97_high0[] = { > + C(0, int32_t daub97_high0(int32_t v1, int32_t v2, int32_t v3) { ) > + C(1, return v2 - ((6947 * (v1 + v2) + 2048) >> 12); ) > + C(0, } ) > +}; > + > +static const char daub97_low_x0[] = { > + C(0, int32_t daub97_low_x0(int plane, int x, int y) { ) > + C(1, int w = plane_sizes[plane].x; ) > + C(1, int dw = plane_sizes[plane].x / 2; ) > + C(1, ) > + C(1, int x0 = (x == 0) ? dw : x + dw; ) > + C(1, int32_t v0 = inBuf[getIdx(plane, x0, y)]; ) > + C(1, ) > + C(1, int32_t v1 = inBuf[getIdx(plane, x, y)]; ) > + C(1, ) > + C(1, int x2 = x + dw; ) > + C(1, int32_t v2 = inBuf[getIdx(plane, x0, y)]; ) > + C(1, ) > + C(1, return daub97_low1(v0, v1, v2); ) > + C(0, } ) > +}; > + > +static const char daub97_high_x0[] = { > + C(0, int32_t daub97_high_x0(int plane, int x, int y) { ) > + C(1, int w = plane_sizes[plane].x; ) > + C(1, int dw = plane_sizes[plane].x / 2; ) > + C(1, ) > + C(1, int x0 = (x == dw - 1) ? (dw - 1) : (x - 1); ) > + C(1, int32_t v0 = daub97_low_x0(plane, x0, y); ) > + C(1, ) > + C(1, int32_t v1 = inBuf[getIdx(plane, x + dw - 1, y)]; ) > + C(1, ) > + C(1, int32_t v2 = daub97_low_x0(plane, x, y); ) > + C(1, ) > + C(1, return daub97_high1(v0, v1, v2); ) > + C(0, } ) > +}; > + > +static const char daub97_low_x1[] = { > + C(0, int32_t daub97_low_x1(int plane, int x, int y) { ) > + C(1, int w = plane_sizes[plane].x; ) > + C(1, int dw = plane_sizes[plane].x / 2; ) > + C(1, ) > + C(1, int32_t v0 = daub97_high_x0(plane, x, y); ) > + C(1, ) > + C(1, int32_t v1 = daub97_low_x0(plane, x, y); ) > + C(1, ) > + C(1, int32_t v2 = daub97_high_x0(plane, x + 1, y); ) > + C(1, ) > + C(1, return daub97_low0(v0, v1, v2); ) > + C(0, } ) > +}; > + > +static const char daub97_high_x1[] = { > + C(0, int32_t daub97_high_x1(int plane, int x, int y) { ) > + C(1, int w = plane_sizes[plane].x; ) > + C(1, int dw = plane_sizes[plane].x / 2; ) > + C(1, ) > + C(1, int x0 = clamp(x - 1, 0, dw); ) > + C(1, int32_t v0 = daub97_low_x1(plane, x0, y); ) > + C(1, ) > + C(1, int32_t v1 = daub97_high_x0(plane, x + 1, y); ) > + C(1, ) > + C(1, int x2 = clamp(x, 0, dw); ) > + C(1, int32_t v2 = daub97_low_x1(plane, x2, y); ) > + C(1, ) > + C(1, return daub97_high0(v0, v1, v2); ) > + C(0, } ) > +}; > + > +static const char daub97_horiz[] = { > + C(0, void idwt_horiz(int plane, int x, int y) { ) > + C(1, int w = plane_sizes[plane].x; ) > + C(1, int dw = plane_sizes[plane].x / 2; ) > + C(1, ) > + C(1, int32_t v0 = daub97_low_x1(plane, x, y); ) > + C(1, int32_t v1 = daub97_high_x1(plane, x, y); ) > + C(1, ) > + C(1, outBuf[getIdx(plane, 2 * x, y)] = ~((~v0) >> 1); ) > + C(1, outBuf[getIdx(plane, 2 * x + 1, y)] = ~((~v1) >> 1); ) > + C(0, } ) > +}; > + > +static int init_wavelet_shd_daub97_vert(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_DAUB9_7; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->vert_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->vert_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "daub97_vert", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->vert_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 1, 3); > + > + GLSLC(0, #extension GL_EXT_debug_printf : enable); > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + /*.mem_quali = "readonly",*/ > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(1, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(1, for (; x < w; x += off_x) { > ); > + GLSLC(2, for (int y = 0; y < h; y += 2) { > ); > + GLSLC(3, int32_t v0 = inBuf[getIdx(pic_z, x, int(clamp(y - > 1, 0, h)))]; ); > + GLSLC(3, int32_t v1 = inBuf[getIdx(pic_z, x, y + 1)]; > ); > + GLSLC(3, inBuf[getIdx(pic_z, x, y)] -= (1817 * (v0 + v1 + > 2048)) >> 12; ); > + GLSLC(2, } > ); > + GLSLC(2, for (int y = 0; y < h; y += 2) { > ); > + GLSLC(3, int32_t v0 = inBuf[getIdx(pic_z, x, y)]; > ); > + GLSLC(3, int32_t v1 = inBuf[getIdx(pic_z, x, int(clamp(y + > 2, 0, h - 2)))]; ); > + GLSLC(3, inBuf[getIdx(pic_z, x, y + 1)] -= (3616 * (v0 + > v1 + 2048)) >> 12; ); > + GLSLC(2, } > ); > + GLSLC(2, for (int y = 0; y < h; y += 2) { > ); > + GLSLC(3, int32_t v0 = inBuf[getIdx(pic_z, x, int(clamp(y - > 1, 0, h)))]; ); > + GLSLC(3, int32_t v1 = inBuf[getIdx(pic_z, x, y + 1)]; > ); > + GLSLC(3, int32_t v2 = inBuf[getIdx(pic_z, x, y)]; > ); > + GLSLC(3, outBuf[getIdx(pic_z, x, y)] = v2 + (217 * (v0 + > v1 + 2048)) >> 12; ); > + GLSLC(2, } > ); > + GLSLC(2, for (int y = 0; y < h; y += 2) { > ); > + GLSLC(3, int32_t v0 = inBuf[getIdx(pic_z, x, y)]; > ); > + GLSLC(3, int32_t v1 = inBuf[getIdx(pic_z, x, int(clamp(y + > 2, 0, h - 2)))]; ); > + GLSLC(3, int32_t v2 = inBuf[getIdx(pic_z, x, y + 1)]; > ); > + GLSLC(3, outBuf[getIdx(pic_z, x, y + 1)] = v2 + (6497 * > (v0 + v1 + 2048)) >> 12;); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static int init_wavelet_shd_daub97_horiz(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_DAUB9_7; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->horiz_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->horiz_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "daub97_horiz", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->horiz_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_debug_printf : enable); > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(daub97_low1); > + GLSLD(daub97_low0); > + GLSLD(daub97_high1); > + GLSLD(daub97_high0); > + GLSLD(daub97_low_x0); > + GLSLD(daub97_high_x0); > + GLSLD(daub97_low_x1); > + GLSLD(daub97_high_x1); > + GLSLD(daub97_horiz); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; 2 * x < w; x += off_x) { > ); > + GLSLC(3, idwt_horiz(pic_z, x, y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static av_always_inline int inline > wavelet_daub97_pass(DiracVulkanDecodeContext *dec, > + DiracContext *ctx, > + FFVkExecContext *exec, > + VkBufferMemoryBarrier2 *buf_bar, > + int *nb_buf_bar) { > + int err; > + int barrier_num = *nb_buf_bar; > + int wavelet_idx = DWT_DIRAC_DAUB9_7; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + > + FFVulkanPipeline *pl_hor = &dec->horiz_wavelet_pl[wavelet_idx]; > + FFVulkanPipeline *pl_vert = &dec->vert_wavelet_pl[wavelet_idx]; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 0, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 1, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 0, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 1, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + > + for (int i = ctx->wavelet_depth - 1; i >= 0; i--) { > + dec->pConst.plane_strides[0] = ctx->plane[0].idwt.width << i; > + dec->pConst.plane_strides[1] = ctx->plane[1].idwt.width << i; > + dec->pConst.plane_strides[2] = ctx->plane[2].idwt.width << i; > + > + dec->pConst.dw[0] = ctx->plane[0].idwt.width >> (i + 1); > + dec->pConst.dw[1] = ctx->plane[1].idwt.width >> (i + 1); > + dec->pConst.dw[2] = ctx->plane[2].idwt.width >> (i + 1); > + > + dec->pConst.real_plane_dims[0] = (ctx->plane[0].idwt.width) >> i; > + dec->pConst.real_plane_dims[1] = (ctx->plane[0].idwt.height) >> i; > + dec->pConst.real_plane_dims[2] = (ctx->plane[1].idwt.width) >> i; > + dec->pConst.real_plane_dims[3] = (ctx->plane[1].idwt.height) >> i; > + dec->pConst.real_plane_dims[4] = (ctx->plane[2].idwt.width) >> i; > + dec->pConst.real_plane_dims[5] = (ctx->plane[2].idwt.height) >> i; > + > + /* Vertical wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_vert, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_vert); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0], > + 1, > + 1); > + > + /* Horizontal wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_hor, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_hor); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 4, > + dec->pConst.real_plane_dims[1] >> 3, > + 1); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + } > + > + return 0; > +fail: > + ff_vk_exec_discard_deps(&dec->vkctx, exec); > + return err; > +} > + > +/* ----- Deslauriers-Dubuc(9, 7) init and pipeline pass ----- */ > + > +static const char dd97_high[] = { > + C(0, int32_t dd97_high(int32_t v1, int32_t v2, int32_t v3, int32_t > v4, int32_t v5) { ) > + C(1, return v3 + ((9 * v4 + 9 * v2 - v5 - v1 + 8) >> 4); > ) > + C(0, } > ) > +}; > + > +static const char dd97_vert[] = { > + C(0, void idwt_vert(int plane, int x, int y) { > ) > + C(1, const int h = plane_sizes[plane].y; > ) > + C(1, > ) > + C(1, const int32_t out0 = legall_low_y(plane, x, y); > ) > + C(1, const int32_t out_2 = (y - 2 > 0) ? legall_low_y(plane, x, y > - 2) : ) > + C(1, legall_low_y(plane, x, > 0); ) > + C(1, const int32_t out2 = (y + 2 < h) ? legall_low_y(plane, x, y + > 2) : ) > + C(1, legall_low_y(plane, x, h > - 2); ) > + C(1, const int32_t out4 = (y + 4 < h) ? legall_low_y(plane, x, y + > 4) : ) > + C(1, legall_low_y(plane, x, h > - 2); ) > + C(1, const int32_t val1 = inBuf[getIdx(plane, x, y + 1)]; > ) > + C(1, > ) > + C(1, outBuf[getIdx(plane, x, y)] = out0; > ) > + C(1, outBuf[getIdx(plane, x, y + 1)] = dd97_high(out_2, out0, > val1, out2, out4); ) > + C(1, } > ) > +}; > + > +static const char dd97_horiz[] = { > + C(0, void idwt_horiz(int plane, int x, int y) { > ) > + C(1, const int w = plane_sizes[plane].x; > ) > + C(1, const int dw = w / 2 - 1; > ) > + C(1, > ) > + C(1, const int32_t out0 = legall_low_x(plane, x, y); > ) > + C(1, > ) > + C(1, const int32_t out_1 = ((x - 1) > 0) ? legall_low_x(plane, x - > 1, y) : out0; ) > + C(1, const int32_t val3 = inBuf[getIdx(plane, x + dw + 1, y)]; > ) > + C(1, const int32_t out1 = ((x + 1) <= dw) ? legall_low_x(plane, x > + 1, y) : ) > + C(1, legall_low_x(plane, > dw, y); ) > + C(1, const int32_t out2 = ((x + 2) <= dw) ? legall_low_x(plane, x > + 2, y) : ) > + C(1, legall_low_x(plane, > dw, y); ) > + C(1, const int32_t res = dd97_high(out_1, out0, val3, out1, > out2); ) > + C(1, > ) > + C(1, outBuf[getIdx(plane, 2 * x, y)] = (out0 + 1) >> 1; > ) > + C(1, outBuf[getIdx(plane, 2 * x + 1, y)] = (res + 1) >> 1; > ) > + C(0, } > ) > +}; > + > +static int init_wavelet_shd_dd97_vert(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_DD9_7; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->vert_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->vert_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "dd97_vert", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->vert_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(legall_low_y); > + GLSLD(dd97_high); > + GLSLD(dd97_vert); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; 2 * y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; x < w; x += off_x) { > ); > + GLSLC(3, idwt_vert(pic_z, x, 2 * y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static int init_wavelet_shd_dd97_horiz(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_DD9_7; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->horiz_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->horiz_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "dd97_horiz", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->horiz_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_debug_printf : enable); > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(legall_low_x); > + GLSLD(dd97_high); > + GLSLD(dd97_horiz); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; 2 * x < w; x += off_x) { > ); > + GLSLC(3, idwt_horiz(pic_z, x, y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static av_always_inline int inline > wavelet_dd97_pass(DiracVulkanDecodeContext *dec, > + DiracContext *ctx, > + FFVkExecContext *exec, > + VkBufferMemoryBarrier2 *buf_bar, > + int *nb_buf_bar) { > + int err; > + int barrier_num = *nb_buf_bar; > + int wavelet_idx = DWT_DIRAC_DD9_7; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + > + FFVulkanPipeline *pl_hor = &dec->horiz_wavelet_pl[wavelet_idx]; > + FFVulkanPipeline *pl_vert = &dec->vert_wavelet_pl[wavelet_idx]; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 0, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 1, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 0, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 1, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + > + for (int i = ctx->wavelet_depth - 1; i >= 0; i--) { > + dec->pConst.plane_strides[0] = ctx->plane[0].idwt.width << i; > + dec->pConst.plane_strides[1] = ctx->plane[1].idwt.width << i; > + dec->pConst.plane_strides[2] = ctx->plane[2].idwt.width << i; > + > + dec->pConst.dw[0] = ctx->plane[0].idwt.width >> (i + 1); > + dec->pConst.dw[1] = ctx->plane[1].idwt.width >> (i + 1); > + dec->pConst.dw[2] = ctx->plane[2].idwt.width >> (i + 1); > + > + dec->pConst.real_plane_dims[0] = (ctx->plane[0].idwt.width) >> i; > + dec->pConst.real_plane_dims[1] = (ctx->plane[0].idwt.height) >> i; > + dec->pConst.real_plane_dims[2] = (ctx->plane[1].idwt.width) >> i; > + dec->pConst.real_plane_dims[3] = (ctx->plane[1].idwt.height) >> i; > + dec->pConst.real_plane_dims[4] = (ctx->plane[2].idwt.width) >> i; > + dec->pConst.real_plane_dims[5] = (ctx->plane[2].idwt.height) >> i; > + > + /* Vertical wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_vert, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_vert); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 3, > + dec->pConst.real_plane_dims[1] >> 4, > + 1); > + > + /* Horizontal wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_hor, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_hor); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 4, > + dec->pConst.real_plane_dims[1] >> 3, > + 1); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + } > + > + return 0; > +fail: > + ff_vk_exec_discard_deps(&dec->vkctx, exec); > + return err; > +} > + > +/* ----- Deslauriers-Dubuc(13, 7) init and pipeline pass ----- */ > +static const char dd137_low[] = { > + C(0, int32_t dd137_low(int32_t v0, int32_t v1, int32_t v2, int32_t > v3, int32_t v4) { ) > + C(0, return v2 - ((9 * v1 + 9 * v3 - v4 - v0 + 16) >> 5); > ) > + C(0, } > ) > +}; > + > +static const char dd137_low_y[] = { > + C(0, int32_t dd137_low_y(int plane, int x, int y) { > ) > + C(1, const int h = plane_sizes[plane].y; > ) > + C(1, > ) > + C(1, const int y0 = (x > 3) ? (y - 3) : 1; > ) > + C(1, const int32_t v0 = inBuf[getIdx(plane, x, y0)]; > ) > + C(1, > ) > + C(1, const int y1 = (y > 1) ? (y - 1) : 1; > ) > + C(1, const int32_t v1 = inBuf[getIdx(plane, x, y1)]; > ) > + C(1, > ) > + C(1, const int y2 = y; > ) > + C(1, const int32_t v2 = inBuf[getIdx(plane, x, y2)]; > ) > + C(1, > ) > + C(1, const int y3 = y + 1; > ) > + C(1, const int32_t v3 = inBuf[getIdx(plane, x, y3)]; > ) > + C(1, > ) > + C(1, const int y4 = (y + 3 < h) ? (y + 3) : (h - 1); > ) > + C(1, const int32_t v4 = inBuf[getIdx(plane, x, y4)]; > ) > + C(1, > ) > + C(1, return dd137_low(v0, v1, v2, v3, v4); > ) > + C(0, } > ) > +}; > + > +static const char dd137_vert[] = { > + C(0, void idwt_vert(int plane, int x, int y) { > ) > + C(1, const int h = plane_sizes[plane].y; > ) > + C(1, > ) > + C(1, const int32_t out0 = dd137_low_y(plane, x, y); > ) > + C(1, const int32_t out_2 = (y - 2 > 0) ? dd137_low_y(plane, x, y - > 2) : ) > + C(1, dd137_low_y(plane, x, > 0); ) > + C(1, const int32_t out2 = (y + 2 < h) ? dd137_low_y(plane, x, y + > 2) : ) > + C(1, dd137_low_y(plane, x, h - > 2); ) > + C(1, const int32_t out4 = (y + 4 < h) ? dd137_low_y(plane, x, y + > 4) : ) > + C(1, dd137_low_y(plane, x, h - > 2); ) > + C(1, const int32_t val1 = inBuf[getIdx(plane, x, y + 1)]; > ) > + C(1, > ) > + C(1, outBuf[getIdx(plane, x, y)] = out0; > ) > + C(1, outBuf[getIdx(plane, x, y + 1)] = dd97_high(out_2, out0, > val1, out2, out4); ) > + C(1, } > ) > +}; > + > +static const char dd137_low_x[] = { > + C(0, int32_t dd137_low_x(int plane, int x, int y) { > ) > + C(1, const int w = plane_sizes[plane].x; > ) > + C(1, const int dw = w / 2; > ) > + C(1, > ) > + C(1, const int x0 = (x > 1) ? x : dw; > ) > + C(1, const int32_t v0 = inBuf[getIdx(plane, x0, y)]; > ) > + C(1, > ) > + C(1, const int x1 = (x > 1) ? (x + dw - 2) : dw; > ) > + C(1, const int32_t v1 = inBuf[getIdx(plane, x1, y)]; > ) > + C(1, > ) > + C(1, const int x2 = x; > ) > + C(1, const int32_t v2 = inBuf[getIdx(plane, x2, y)]; > ) > + C(1, > ) > + C(1, const int x3 = x + dw; > ) > + C(1, const int32_t v3 = inBuf[getIdx(plane, x3, y)]; > ) > + C(1, > ) > + C(1, const int x4 = (x != dw - 1) ? (x + dw + 1) : (dw - 1); > ) > + C(1, const int32_t v4 = inBuf[getIdx(plane, x4, y)]; > ) > + C(1, > ) > + C(1, return dd137_low(v0, v1, v2, v3, v4); > ) > + C(0, } > ) > +}; > + > +static const char dd137_horiz[] = { > + C(0, void idwt_horiz(int plane, int x, int y) { > ) > + C(1, const int w = plane_sizes[plane].x; > ) > + C(1, const int dw = w / 2 - 1; > ) > + C(1, > ) > + C(1, const int32_t out0 = dd137_low_x(plane, x, y); > ) > + C(1, > ) > + C(1, const int32_t out_1 = ((x - 1) > 0) ? dd137_low_x(plane, x - > 1, y) : out0; ) > + C(1, const int32_t val3 = inBuf[getIdx(plane, x + dw + 1, y)]; > ) > + C(1, const int32_t out1 = ((x + 1) <= dw) ? dd137_low_x(plane, x + > 1, y) : ) > + C(1, dd137_low_x(plane, > dw, y); ) > + C(1, const int32_t out2 = ((x + 2) <= dw) ? dd137_low_x(plane, x + > 2, y) : ) > + C(1, dd137_low_x(plane, > dw, y); ) > + C(1, const int32_t res = dd97_high(out_1, out0, val3, out1, > out2); ) > + C(1, > ) > + C(1, outBuf[getIdx(plane, 2 * x, y)] = (out0 + 1) >> 1; > ) > + C(1, outBuf[getIdx(plane, 2 * x + 1, y)] = (res + 1) >> 1; > ) > + C(0, } > ) > +}; > + > +static int init_wavelet_shd_dd137_vert(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_DD13_7; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->vert_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->vert_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "dd137_vert", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->vert_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(dd97_high); > + GLSLD(dd137_low); > + GLSLD(dd137_low_y); > + GLSLD(dd137_vert); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; 2 * y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; x < w; x += off_x) { > ); > + GLSLC(3, idwt_vert(pic_z, x, 2 * y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static int init_wavelet_shd_dd137_horiz(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_DD13_7; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->horiz_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->horiz_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "dd137_horiz", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->horiz_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_debug_printf : enable); > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(get_idx); > + GLSLD(dd97_high); > + GLSLD(dd137_low); > + GLSLD(dd137_low_x); > + GLSLD(dd137_horiz); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; 2 * x < w; x += off_x) { > ); > + GLSLC(3, idwt_horiz(pic_z, x, y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static av_always_inline int inline > wavelet_dd137_pass(DiracVulkanDecodeContext *dec, > + DiracContext *ctx, > + FFVkExecContext *exec, > + VkBufferMemoryBarrier2 *buf_bar, > + int *nb_buf_bar) { > + int err; > + int barrier_num = *nb_buf_bar; > + int wavelet_idx = DWT_DIRAC_DD13_7; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + > + FFVulkanPipeline *pl_hor = &dec->horiz_wavelet_pl[wavelet_idx]; > + FFVulkanPipeline *pl_vert = &dec->vert_wavelet_pl[wavelet_idx]; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 0, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 1, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 0, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 1, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + > + for (int i = ctx->wavelet_depth - 1; i >= 0; i--) { > + dec->pConst.plane_strides[0] = ctx->plane[0].idwt.width << i; > + dec->pConst.plane_strides[1] = ctx->plane[1].idwt.width << i; > + dec->pConst.plane_strides[2] = ctx->plane[2].idwt.width << i; > + > + dec->pConst.dw[0] = ctx->plane[0].idwt.width >> (i + 1); > + dec->pConst.dw[1] = ctx->plane[1].idwt.width >> (i + 1); > + dec->pConst.dw[2] = ctx->plane[2].idwt.width >> (i + 1); > + > + dec->pConst.real_plane_dims[0] = (ctx->plane[0].idwt.width) >> i; > + dec->pConst.real_plane_dims[1] = (ctx->plane[0].idwt.height) >> i; > + dec->pConst.real_plane_dims[2] = (ctx->plane[1].idwt.width) >> i; > + dec->pConst.real_plane_dims[3] = (ctx->plane[1].idwt.height) >> i; > + dec->pConst.real_plane_dims[4] = (ctx->plane[2].idwt.width) >> i; > + dec->pConst.real_plane_dims[5] = (ctx->plane[2].idwt.height) >> i; > + > + /* Vertical wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_vert, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_vert); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 3, > + dec->pConst.real_plane_dims[1] >> 4, > + 1); > + > + /* Horizontal wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_hor, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_hor); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 4, > + dec->pConst.real_plane_dims[1] >> 3, > + 1); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + } > + > + return 0; > +fail: > + ff_vk_exec_discard_deps(&dec->vkctx, exec); > + return err; > +} > + > +/* ----- Haar Wavelet init and pipeline pass ----- */ > + > +static const char haari_horiz[] = { > + C(0, void idwt_horiz(int plane, int x, int y) { > ) > + C(1, int offs0 = plane_offs[plane] + plane_strides[plane] * y + > x; ) > + C(1, int offs1 = offs0 + plane_sizes[plane].x / 2; > ) > + C(1, int outIdx = plane_offs[plane] + plane_strides[plane] * y + x > * 2; ) > + C(1, int32_t val_orig0 = inBuf[offs0]; > ) > + C(1, int32_t val_orig1 = inBuf[offs1]; > ) > + C(1, int32_t val_new0 = val_orig0 - ((val_orig1 + 1) >> 1); > ) > + C(1, int32_t val_new1 = val_orig1 + val_new0; > ) > + C(1, outBuf[outIdx] = val_new0; > ) > + C(1, outBuf[outIdx + 1] = val_new1; > ) > + C(0, } > ) > +}; > + > +static const char haari_shift_horiz[] = { > + C(0, void idwt_horiz(int plane, int x, int y) { > ) > + C(1, int offs0 = plane_offs[plane] + plane_strides[plane] * y + > x; ) > + C(1, int offs1 = offs0 + plane_sizes[plane].x / 2; > ) > + C(1, int outIdx = plane_offs[plane] + plane_strides[plane] * y + x > * 2; ) > + C(1, int32_t val_orig0 = inBuf[offs0]; > ) > + C(1, int32_t val_orig1 = inBuf[offs1]; > ) > + C(1, int32_t val_new0 = val_orig0 - ((val_orig1 + 1) >> 1); > ) > + C(1, int32_t val_new1 = val_orig1 + val_new0; > ) > + C(1, outBuf[outIdx] = (val_new0 + 1) >> 1; > ) > + C(1, outBuf[outIdx + 1] = (val_new1 + 1) >> 1; > ) > + C(0, } > ) > +}; > + > +static const char haari_vert[] = { > + C(0, void idwt_vert(int plane, int x, int y) { > ) > + C(1, int offs0 = plane_offs[plane] + plane_strides[plane] * y + > x; ) > + C(1, int offs1 = plane_offs[plane] + plane_strides[plane] * (y + > 1) + x; ) > + C(2, int32_t val_orig0 = inBuf[offs0]; > ) > + C(1, int32_t val_orig1 = inBuf[offs1]; > ) > + C(1, int32_t val_new0 = val_orig0 - ((val_orig1 + 1) >> 1); > ) > + C(1, int32_t val_new1 = val_orig1 + val_new0; > ) > + C(1, outBuf[offs0] = val_new0; > ) > + C(1, outBuf[offs1] = val_new1; > ) > + C(0, } > ) > +}; > + > +static int init_wavelet_shd_haari_vert(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv, int shift) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_HAAR0 + shift; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->vert_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->vert_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "haari_vert", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->vert_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(haari_vert); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(2, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; 2 * y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; x < w; x += off_x) { > ); > + GLSLC(3, idwt_vert(pic_z, x, 2 * y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static int init_wavelet_shd_haari_horiz(DiracVulkanDecodeContext *s, > FFVkSPIRVCompiler *spv, int shift) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + int wavelet_idx = DWT_DIRAC_HAAR0 + shift; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->horiz_wavelet_shd[wavelet_idx]; > + FFVulkanPipeline *pl = &s->horiz_wavelet_pl[wavelet_idx]; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "haari_horiz", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->horiz_wavelet_shd[wavelet_idx]; > + ff_vk_shader_set_compute_sizes(shd, 8, 8, 3); > + > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "in_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t inBuf[];", > + .mem_quali = "readonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf[];", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLD(shift ? haari_shift_horiz : haari_horiz); > + > + GLSLC(0, void main() { > ); > + GLSLC(1, int off_y = int(gl_WorkGroupSize.y * > gl_NumWorkGroups.y); ); > + GLSLC(1, int off_x = int(gl_WorkGroupSize.x * > gl_NumWorkGroups.x); ); > + GLSLC(1, int pic_z = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, > ); > + GLSLC(1, uint w = int(plane_sizes[pic_z].x); > ); > + GLSLC(1, uint h = int(plane_sizes[pic_z].y); > ); > + GLSLC(1, > ); > + GLSLC(1, int y = int(gl_GlobalInvocationID.y); > ); > + GLSLC(1, for (; y < h; y += off_y) { > ); > + GLSLC(2, int x = int(gl_GlobalInvocationID.x); > ); > + GLSLC(2, for (; 2 * x < w; x += off_x) { > ); > + GLSLC(3, idwt_horiz(pic_z, x, y); > ); > + GLSLC(2, } > ); > + GLSLC(1, } > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static av_always_inline int inline > wavelet_haari_pass(DiracVulkanDecodeContext *dec, > + DiracContext *ctx, > + FFVkExecContext *exec, > + VkBufferMemoryBarrier2 *buf_bar, > + int *nb_buf_bar, int shift) { > + int err; > + int barrier_num = *nb_buf_bar; > + > + const int wavelet_idx = DWT_DIRAC_HAAR0 + shift; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + > + FFVulkanPipeline *pl_hor = &dec->horiz_wavelet_pl[wavelet_idx]; > + FFVulkanPipeline *pl_vert = &dec->vert_wavelet_pl[wavelet_idx]; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 0, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_vert, exec, > + 0, 1, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 0, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, pl_hor, exec, > + 0, 1, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + goto fail; > + > + for (int i = ctx->wavelet_depth - 1; i >= 0; i--) { > + dec->pConst.plane_strides[0] = ctx->plane[0].idwt.width << i; > + dec->pConst.plane_strides[1] = ctx->plane[1].idwt.width << i; > + dec->pConst.plane_strides[2] = ctx->plane[2].idwt.width << i; > + > + dec->pConst.dw[0] = ctx->plane[0].idwt.width >> (i + 1); > + dec->pConst.dw[1] = ctx->plane[1].idwt.width >> (i + 1); > + dec->pConst.dw[2] = ctx->plane[2].idwt.width >> (i + 1); > + > + dec->pConst.real_plane_dims[0] = ctx->plane[0].idwt.width >> i; > + dec->pConst.real_plane_dims[1] = ctx->plane[0].idwt.height >> i; > + dec->pConst.real_plane_dims[2] = ctx->plane[1].idwt.width >> i; > + dec->pConst.real_plane_dims[3] = ctx->plane[1].idwt.height >> i; > + dec->pConst.real_plane_dims[4] = ctx->plane[2].idwt.width >> i; > + dec->pConst.real_plane_dims[5] = ctx->plane[2].idwt.height >> i; > + > + dec->pConst.wavelet_depth = ctx->wavelet_depth; > + > + /* Vertical wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_vert, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_vert); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 3, > + dec->pConst.real_plane_dims[1] >> 4, > + 1); > + > + /* Horizontal wavelet pass */ > + ff_vk_update_push_exec(&dec->vkctx, exec, pl_hor, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, pl_hor); > + vk->CmdDispatch(exec->buf, > + dec->pConst.real_plane_dims[0] >> 4, > + dec->pConst.real_plane_dims[1] >> 3, > + 1); > + > + } > + > + barrier_num = *nb_buf_bar; > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar + barrier_num, > + .bufferMemoryBarrierCount = *nb_buf_bar - barrier_num, > + }); > + > + > + return 0; > +fail: > + ff_vk_exec_discard_deps(&dec->vkctx, exec); > + return err; > +} > + > +/* ----- Dequant Shader init and pipeline pass ----- */ > + > +static const char dequant[] = { > + C(0, void dequant(int outIdx, int idx, int qf, int qs) { ) > + C(1, int32_t val = inBuffer[idx]; ) > + C(1, val = sign(val) * ((abs(val) * qf + qs) >> 2); ) > + C(1, outBuf0[outIdx] = outBuf1[outIdx] = val; ) > + C(0, } ) > +}; > + > +static const char proc_slice[] = { > + C(0, void proc_slice(int slice_idx) { > ) > + C(1, const int plane = int(gl_GlobalInvocationID.x); > ) > + C(1, const int level = int(gl_GlobalInvocationID.y); > ) > + /*C(1, if (level >= wavelet_depth) return; > )*/ > + C(1, const int base_idx = slice_idx * DWT_LEVELS * 8; > ) > + C(1, const int base_slice_idx = slice_idx * DWT_LEVELS * 3 + plane > * DWT_LEVELS; ) > + C(1, > ) > + C(1, const Slice s = slices[base_slice_idx + level]; > ) > + C(1, int offs = s.offs; > ) > + C(1, > ) > + C(1, for(int orient = int(bool(level)); orient < 4; orient++) { > ) > + C(2, int32_t qf = quantMatrix[base_idx + level * 8 + orient]; > ) > + C(2, int32_t qs = quantMatrix[base_idx + level * 8 + orient + > 4]; ) > + C(2, > ) > + C(2, const int subband_idx = plane * DWT_LEVELS * 4 > ) > + C(2, + 4 * level + orient; > ) > + C(2, > ) > + C(2, const SubbandOffset sub_off = subband_offs[subband_idx]; > ) > + C(2, int img_idx = plane_offs[plane] + sub_off.base_off > ) > + C(2, + s.top * sub_off.stride > + s.left; ) > + C(2, > ) > + C(2, for(int y = 0; y < s.tot_v; y++) { > ) > + C(3, int img_x = img_idx; > ) > + C(3, for(int x = 0; x < s.tot_h; x++) { > ) > + C(4, dequant(img_x, offs, qf, qs); > ) > + C(4, img_x++; > ) > + C(4, offs++; > ) > + C(3, } > ) > + C(3, img_idx += sub_off.stride; > ) > + C(2, } > ) > + C(1, } > ) > + C(0, } > ) > +}; > + > +static int init_quant_shd(DiracVulkanDecodeContext *s, FFVkSPIRVCompiler > *spv) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + // const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + FFVkSPIRVShader *shd = &s->quant_shd; > + FFVulkanPipeline *pl = &s->quant_pl; > + FFVkExecPool *exec = &s->exec_pool; > + > + RET(ff_vk_shader_init(pl, shd, "dequant", > VK_SHADER_STAGE_COMPUTE_BIT, 0)); > + > + shd = &s->quant_shd; > + ff_vk_shader_set_compute_sizes(shd, 3, 1, 1); > + > + GLSLC(0, #extension GL_EXT_debug_printf : enable); > + GLSLC(0, #extension GL_EXT_scalar_block_layout : enable); > + GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : enable); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "out_buf_0", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf0[];", > + .mem_layout = "std430", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + { > + .name = "out_buf_1", > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .buf_content = "int32_t outBuf1[];", > + .mem_layout = "std430", > + .mem_quali = "writeonly", > + .dimensions = 1, > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 2, 0, 0)); > + > + GLSLC(0, struct Slice { ); > + GLSLC(1, int32_t left; ); > + GLSLC(1, int32_t top; ); > + GLSLC(1, int32_t tot_h; ); > + GLSLC(1, int32_t tot_v; ); > + GLSLC(1, int32_t tot; ); > + GLSLC(1, int32_t offs; ); > + GLSLC(1, int32_t pad0; ); > + GLSLC(1, int32_t pad1; ); > + GLSLC(0, }; ); > + > + GLSLC(0, struct SubbandOffset { ); > + GLSLC(1, int32_t base_off; ); > + GLSLC(1, int32_t stride; ); > + GLSLC(1, int32_t pad0; ); > + GLSLC(1, int32_t pad1; ); > + GLSLC(0, }; ); > + > + desc = (FFVulkanDescriptorSetBinding[]) > + { > + { > + .name = "quant_in_buf", > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .buf_content = "int32_t inBuffer[];", > + .mem_quali = "readonly", > + .mem_layout = "std430", > + }, > + { > + .name = "quant_vals_buf", > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .buf_content = "int32_t quantMatrix[];", > + .mem_quali = "readonly", > + .mem_layout = "std430", > + }, > + { > + .name = "slices_buf", > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .buf_content = "Slice slices[];", > + .mem_quali = "readonly", > + .mem_layout = "std430", > + }, > + { > + .name = "subband_buf", > + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .buf_content = "SubbandOffset subband_offs[60];", > + .mem_quali = "readonly", > + .mem_layout = "std430", > + }, > + }; > + RET(ff_vk_pipeline_descriptor_set_add(vkctx, pl, shd, desc, 4, 1, 0)); > + > + ff_vk_add_push_constant(pl, 0, sizeof(WaveletPushConst), > VK_SHADER_STAGE_COMPUTE_BIT); > + > + GLSLC(0, layout(push_constant, std430) uniform pushConstants { ); > + GLSLC(1, ivec2 plane_sizes[3]; ); > + GLSLC(1, int plane_offs[3]; ); > + GLSLC(1, int plane_strides[3]; ); > + GLSLC(1, int dw[3]; ); > + GLSLC(1, int wavelet_depth; ); > + GLSLC(0, }; ); > + GLSLC(0, ); > + > + GLSLF(0, #define DWT_LEVELS %i, MAX_DWT_LEVELS ); > + > + GLSLD(dequant); > + GLSLD(proc_slice); > + GLSLC(0, void main() > ); > + GLSLC(0, { > ); > + GLSLC(1, int idx = int(gl_GlobalInvocationID.z); > ); > + GLSLC(1, proc_slice(idx); > ); > + GLSLC(0, } > ); > + > + RET(spv->compile_shader(spv, vkctx, shd, &spv_data, &spv_len, "main", > &spv_opaque)); > + RET(ff_vk_shader_create(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_init_compute_pipeline(vkctx, pl, shd)); > + RET(ff_vk_exec_pipeline_register(vkctx, exec, pl)); > + > +fail: > + if (spv_opaque) > + spv->free_shader(spv, &spv_opaque); > + > + return err; > +} > + > +static av_always_inline int inline quant_pl_pass(DiracVulkanDecodeContext > *dec, > + DiracContext *ctx, > + FFVkExecContext *exec, > + VkBufferMemoryBarrier2 *buf_bar, > + int *nb_buf_bar) { > + int err; > + FFVulkanFunctions *vk = &dec->vkctx.vkfn; > + > + ff_vk_exec_bind_pipeline(&dec->vkctx, exec, &dec->quant_pl); > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, &dec->quant_pl, > + exec, 0, 0, 0, > + dec->tmp_buf.address, > + dec->tmp_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + return err; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, &dec->quant_pl, > + exec, 0, 1, 0, > + dec->tmp_interleave_buf.address, > + dec->tmp_interleave_buf.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + return err; > + > + ff_vk_update_push_exec(&dec->vkctx, exec, &dec->quant_pl, > + VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(WaveletPushConst), &dec->pConst); > + > + bar_read(buf_bar, nb_buf_bar, dec->quant_val_buf); > + bar_read(buf_bar, nb_buf_bar, dec->slice_buf); > + bar_read(buf_bar, nb_buf_bar, dec->quant_buf); > + bar_read(buf_bar, nb_buf_bar, &dec->subband_info); > + > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_buf); > + bar_write(buf_bar, nb_buf_bar, &dec->tmp_interleave_buf); > + > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = buf_bar, > + .bufferMemoryBarrierCount = *nb_buf_bar, > + }); > + > + vk->CmdDispatch(exec->buf, 1, ctx->wavelet_depth, ctx->num_x * > ctx->num_y); > + > + return 0; > +} > + > +static int vulkan_dirac_uninit(AVCodecContext *avctx) { > + // DiracContext *d = avctx->priv_data; > + // if (d->hwaccel_picture_private) { > + // av_freep(d->hwaccel_picture_private); > + // } > + > + free_common(avctx); > + > + return 0; > +} > + > +static inline int wavelet_init(DiracVulkanDecodeContext *dec, > + FFVkSPIRVCompiler *spv) { > + int err; > + > + err = init_wavelet_shd_daub97_horiz(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_daub97_vert(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_haari_vert(dec, spv, 0); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_haari_horiz(dec, spv, 0); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_haari_vert(dec, spv, 1); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_haari_horiz(dec, spv, 1); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_legall_vert(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_legall_horiz(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_dd97_vert(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_dd97_horiz(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_fidelity_vert(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_fidelity_horiz(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_dd137_vert(dec, spv); > + if (err < 0) { > + return err; > + } > + > + err = init_wavelet_shd_dd137_horiz(dec, spv); > + if (err < 0) { > + return err; > + } > + > + return 0; > +} > + > +static int vulkan_dirac_init(AVCodecContext *avctx) > +{ > + int err = 0; > + DiracVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; > + FFVulkanContext *s; > + FFVkSPIRVCompiler *spv; > + > + spv = ff_vk_spirv_init(); > + if (!spv) { > + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V > compiler!\n"); > + return AVERROR_EXTERNAL; > + } > + > + err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN); > + if (err < 0) > + goto fail; > + > + /* Initialize contexts */ > + s = &dec->vkctx; > + > + s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx); > + s->frames = (AVHWFramesContext *)s->frames_ref->data; > + s->hwfc = s->frames->hwctx; > + > + s->device = (AVHWDeviceContext *)s->frames->device_ref->data; > + s->hwctx = s->device->hwctx; > + > + err = ff_vk_load_props(s); > + if (err < 0) > + goto fail; > + > + /* Create queue context */ > + ff_vk_qf_init(s, &dec->qf, VK_QUEUE_COMPUTE_BIT); > + > + err = ff_vk_exec_pool_init(s, &dec->qf, &dec->exec_pool, 8, 0, 0, 0, > NULL); > + > + err = ff_vk_init_sampler(&dec->vkctx, &dec->sampler, 1, > VK_FILTER_LINEAR); > + if (err < 0) { > + goto fail; > + } > + > + av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization > sucessful\n"); > + > + err = init_quant_shd(dec, spv); > + if (err < 0) { > + goto fail; > + } > + > + err = init_cpy_shd(dec, spv, 0); > + if (err < 0) { > + goto fail; > + } > + > + err = init_cpy_shd(dec, spv, 1); > + if (err < 0) { > + goto fail; > + } > + > + err = init_cpy_shd(dec, spv, 2); > + if (err < 0) { > + goto fail; > + } > + > + err = wavelet_init(dec, spv); > + if (err < 0) { > + goto fail; > + } > + > + dec->quant_val_buf_vk_ptr = NULL; > + dec->slice_buf_vk_ptr = NULL; > + dec->quant_buf_vk_ptr = NULL; > + > + dec->av_quant_val_buf = NULL; > + dec->av_quant_buf = NULL; > + dec->av_slice_buf = NULL; > + > + dec->thread_buf_size = 0; > + dec->n_slice_bufs = 0; > + > + err = ff_vk_create_buf(&dec->vkctx, &dec->subband_info, > + sizeof(SubbandOffset) * MAX_DWT_LEVELS * 12, > NULL, NULL, > + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | > + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, > + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | > + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); > + if (err < 0) > + return err; > + > + err = ff_vk_map_buffer(&dec->vkctx, &dec->subband_info, > + (uint8_t **)&dec->subband_info_ptr, 0); > + if (err < 0) > + return err; > + > + err = ff_vk_set_descriptor_buffer(&dec->vkctx, &dec->quant_pl, > + NULL, 1, 3, 0, > + dec->subband_info.address, > + dec->subband_info.size, > + VK_FORMAT_UNDEFINED); > + if (err < 0) > + return err; > + > + return 0; > + > +fail: > + if (spv) > + { > + spv->uninit(&spv); > + } > + vulkan_dirac_uninit(avctx); > + > + return err; > +} > + > +static int vulkan_decode_bootstrap(AVCodecContext *avctx, AVBufferRef > *frames_ref) > +{ > + int err; > + DiracVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; > + AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data; > + AVHWDeviceContext *device = (AVHWDeviceContext > *)frames->device_ref->data; > + AVVulkanDeviceContext *hwctx = device->hwctx; > + > + dec->vkctx.extensions = > ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions, > + > hwctx->nb_enabled_dev_extensions); > + > + err = ff_vk_load_functions(device, &dec->vkctx.vkfn, > dec->vkctx.extensions, 1, 1); > + if (err < 0) { > + free_common(avctx); > + return err; > + } > + > + return 0; > +} > + > +static int vulkan_dirac_frame_params(AVCodecContext *avctx, AVBufferRef > *hw_frames_ctx) > +{ > + int err; > + AVHWFramesContext *frames_ctx = > (AVHWFramesContext*)hw_frames_ctx->data; > + AVVulkanFramesContext *hwfc = frames_ctx->hwctx; > + DiracContext *s = avctx->priv_data; > + > + frames_ctx->sw_format = s->sof_pix_fmt; > + > + err = vulkan_decode_bootstrap(avctx, hw_frames_ctx); > + if (err < 0) > + return err; > + > + frames_ctx->width = avctx->coded_width; > + frames_ctx->height = avctx->coded_height; > + frames_ctx->format = AV_PIX_FMT_VULKAN; > + > + for (int i = 0; i < AV_NUM_DATA_POINTERS; i++) { > + hwfc->format[i] = > av_vkfmt_from_pixfmt(frames_ctx->sw_format)[i]; > + } > + hwfc->tiling = VK_IMAGE_TILING_LINEAR; > + hwfc->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | > + VK_IMAGE_USAGE_STORAGE_BIT; > + > + return err; > +} > + > + > +static void vulkan_dirac_free_frame_priv(FFRefStructOpaque _hwctx, void > *data) > +{ > + // AVHWDeviceContext *hwctx = _hwctx.nc; > + DiracVulkanDecodePicture *dp = data; > + > + /* Free frame resources */ > + av_free(dp); > +} > + > + > +static void setup_subbands(DiracContext *ctx, DiracVulkanDecodeContext > *dec) { > + SubbandOffset *offs = dec->subband_info_ptr; > + memset(offs, 0, dec->subband_info.size); > + > + for (int plane = 0; plane < 3; plane++) { > + Plane *p = &ctx->plane[plane]; > + int w = p->idwt.width; > + int s = FFALIGN(p->idwt.width, 8); > + > + for (int level = ctx->wavelet_depth - 1; level >= 0; level--) { > + w >>= 1; > + s <<= 1; > + for (int orient = 0; orient < 4; orient++) { > + const int idx = plane * MAX_DWT_LEVELS * 4 + level * 4 + > orient; > + SubbandOffset *off = &offs[idx]; > + off->stride = s; > + off->base_off = 0; > + > + if (orient & 1) > + off->base_off += w; > + if (orient > 1) > + off->base_off += (s>>1); > + > + /*SubBand *b = &p->band[level][orient];*/ > + /*int w = (b->ibuf - p->idwt.buf) >> (1 + b->pshift);*/ > + /*off->stride = b->stride >> (1 + b->pshift);*/ > + /*off->base_off = w;*/ > + } > + } > + } > +} > + > +static int vulkan_dirac_start_frame(AVCodecContext *avctx, > + av_unused const uint8_t *buffer, > + av_unused uint32_t size) > +{ > + int err; > + DiracVulkanDecodeContext *s = avctx->internal->hwaccel_priv_data; > + DiracContext *c = avctx->priv_data; > + DiracVulkanDecodePicture *pic = c->hwaccel_picture_private; > + WaveletPushConst *pConst = &s->pConst; > + > + pic->frame = c->current_picture; > + setup_subbands(c, s); > + > + pConst->real_plane_dims[0] = c->plane[0].idwt.width; > + pConst->real_plane_dims[1] = c->plane[0].idwt.height; > + pConst->real_plane_dims[2] = c->plane[1].idwt.width; > + pConst->real_plane_dims[3] = c->plane[1].idwt.height; > + pConst->real_plane_dims[4] = c->plane[2].idwt.width; > + pConst->real_plane_dims[5] = c->plane[2].idwt.height; > + > + pConst->plane_strides[0] = c->plane[0].idwt.width; > + pConst->plane_strides[1] = c->plane[1].idwt.width; > + pConst->plane_strides[0] = c->plane[0].idwt.width; > + > + pConst->plane_offs[0] = 0; > + pConst->plane_offs[1] = c->plane[0].idwt.width * > c->plane[0].idwt.height; > + pConst->plane_offs[2] = pConst->plane_offs[1] > + + c->plane[1].idwt.width * c->plane[1].idwt.height; > + > + pConst->wavelet_depth = c->wavelet_depth; > + > + if (s->quant_buf_vk_ptr == NULL || > + s->slice_buf_vk_ptr == NULL || > + s->quant_val_buf_vk_ptr == NULL || > + c->num_x * c->num_y != s->n_slice_bufs) { > + err = alloc_quant_buf(c, s); > + if (err < 0) > + return err; > + err = alloc_dequant_buf(c, s); > + if (err < 0) > + return err; > + err = alloc_slices_buf(c, s); > + if (err < 0) > + return err; > + err = alloc_tmp_bufs(c, s); > + if (err < 0) > + return err; > + } > + > + return 0; > +} > + > +static int vulkan_dirac_end_frame(AVCodecContext *avctx) { > + int err, nb_img_bar = 0, nb_buf_bar = 0; > + DiracVulkanDecodeContext*dec = avctx->internal->hwaccel_priv_data; > + DiracContext *ctx = avctx->priv_data; > + VkImageView views[AV_NUM_DATA_POINTERS]; > + VkBufferMemoryBarrier2 buf_bar[80]; > + VkImageMemoryBarrier2 img_bar[80]; > + DiracVulkanDecodePicture *pic = ctx->hwaccel_picture_private; > + FFVkExecContext *exec = ff_vk_exec_get(&dec->exec_pool); > + enum dwt_type wavelet_idx = ctx->wavelet_idx + 2; > + > + ff_vk_exec_start(&dec->vkctx, exec); > + > + err = ff_vk_exec_add_dep_frame(&dec->vkctx, exec, pic->frame->avframe, > + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, > + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT); > + if (err < 0) > + goto fail; > + > + err = ff_vk_create_imageviews(&dec->vkctx, exec, views, > pic->frame->avframe); > + if (err < 0) > + goto fail; > + > + err = quant_pl_pass(dec, ctx, exec, buf_bar, &nb_buf_bar); > + if (err < 0) > + goto fail; > + > + switch (wavelet_idx) { > + case DWT_DIRAC_DAUB9_7: > + err = wavelet_daub97_pass(dec, ctx, exec, buf_bar, &nb_buf_bar); > + break; > + > + case DWT_DIRAC_FIDELITY: > + err = wavelet_fidelity_pass(dec, ctx, exec, buf_bar, &nb_buf_bar); > + break; > + > + case DWT_DIRAC_DD9_7: > + err = wavelet_dd97_pass(dec, ctx, exec, buf_bar, &nb_buf_bar); > + break; > + > + case DWT_DIRAC_DD13_7: > + err = wavelet_dd137_pass(dec, ctx, exec, buf_bar, &nb_buf_bar); > + break; > + > + case DWT_DIRAC_LEGALL5_3: > + err = wavelet_legall_pass(dec, ctx, exec, buf_bar, &nb_buf_bar); > + break; > + > + case DWT_DIRAC_HAAR0: > + err = wavelet_haari_pass(dec, ctx, exec, buf_bar, &nb_buf_bar, 0); > + break; > + > + case DWT_DIRAC_HAAR1: > + err = wavelet_haari_pass(dec, ctx, exec, buf_bar, &nb_buf_bar, 1); > + break; > + > + default: > + err = AVERROR_PATCHWELCOME; > + break; > + } > + > + err = cpy_to_image_pass(dec, ctx, exec, views, > + buf_bar, &nb_buf_bar, img_bar, &nb_img_bar, > + (ctx->bit_depth - 8) >> 1); > + if (err < 0) > + goto fail; > + > + return ff_vk_exec_submit(&dec->vkctx, exec); > + /*if (err < 0)*/ > + /* return err;*/ > + > + /*ff_vk_exec_wait(&dec->vkctx, exec);*/ > + > +fail: > + ff_vk_exec_discard_deps(&dec->vkctx, exec); > + return err; > +} > + > +static int vulkan_dirac_update_thread_context(AVCodecContext *dst, const > AVCodecContext *src) > +{ > + // int err; > + DiracVulkanDecodeContext *src_ctx = src->internal->hwaccel_priv_data; > + DiracVulkanDecodeContext *dst_ctx = dst->internal->hwaccel_priv_data; > + > + dst_ctx->vkctx = src_ctx->vkctx; > + dst_ctx->yuv_sampler = src_ctx->yuv_sampler; > + dst_ctx->sampler = src_ctx->sampler; > + dst_ctx->qf = src_ctx->qf; > + dst_ctx->quant_pl = src_ctx->quant_pl; > + > + return 0; > +} > + > +static inline int decode_hq_slice(const DiracContext *s, int jobnr) > +{ > + int i, level, orientation, quant_idx; > + DiracVulkanDecodeContext *dec = s->avctx->internal->hwaccel_priv_data; > + int32_t *qfactor = &dec->quant_buf_vk_ptr[jobnr * 8 * MAX_DWT_LEVELS]; > + int32_t *qoffset = &dec->quant_buf_vk_ptr[jobnr * 8 * MAX_DWT_LEVELS > + 4]; > + int32_t *quant_val_base = dec->quant_val_buf_vk_ptr; > + DiracSlice *slice = &s->slice_params_buf[jobnr]; > + SliceCoeffVk *slice_vk = &dec->slice_buf_vk_ptr[jobnr * 3 * > MAX_DWT_LEVELS]; > + GetBitContext *gb = &slice->gb; > + > + skip_bits_long(gb, 8*s->highquality.prefix_bytes); > + quant_idx = get_bits(gb, 8); > + > + if (quant_idx > DIRAC_MAX_QUANT_INDEX - 1) { > + av_log(s->avctx, AV_LOG_ERROR, "Invalid quantization index - > %i\n", quant_idx); > + return AVERROR_INVALIDDATA; > + } > + > + /* Slice quantization (slice_quantizers() in the specs) */ > + for (level = 0; level < s->wavelet_depth; level++) { > + for (orientation = !!level; orientation < 4; orientation++) { > + const int quant = FFMAX(quant_idx - > s->lowdelay.quant[level][orientation], 0); > + qfactor[level * 8 + orientation] = ff_dirac_qscale_tab[quant]; > + qoffset[level * 8 + orientation] = > ff_dirac_qoffset_intra_tab[quant] + 2; > + } > + } > + > + /* Luma + 2 Chroma planes */ > + for (i = 0; i < 3; i++) { > + int coef_num, coef_par; > + int64_t length = s->highquality.size_scaler*get_bits(gb, 8); > + int64_t bits_end = get_bits_count(gb) + 8*length; > + const uint8_t *addr = align_get_bits(gb); > + int offs = dec->slice_vals_size * (3 * jobnr + i); > + uint8_t *tmp_buf = (uint8_t *)&quant_val_base[offs]; > + > + if (length*8 > get_bits_left(gb)) { > + av_log(s->avctx, AV_LOG_ERROR, "end too far away\n"); > + return AVERROR_INVALIDDATA; > + } > + > + coef_num = subband_coeffs(s, slice->slice_x, slice->slice_y, > + i, offs, &slice_vk[MAX_DWT_LEVELS * > i]); > + > + coef_par = ff_dirac_golomb_read_32bit(addr, length, > + tmp_buf, coef_num); > + > + if (coef_num > coef_par) { > + const int start_b = coef_par * sizeof(int32_t); > + const int end_b = coef_num * sizeof(int32_t); > + memset(&tmp_buf[start_b], 0, end_b - start_b); > + } > + > + skip_bits_long(gb, bits_end - get_bits_count(gb)); > + } > + > + return 0; > +} > + > +static int decode_hq_slice_row(AVCodecContext *avctx, void *arg, int > jobnr, int threadnr) > +{ > + const DiracContext *s = avctx->priv_data; > + int i, jobn = s->num_x * jobnr; > + > + for (i = 0; i < s->num_x; i++) { > + decode_hq_slice(s, jobn); > + jobn++; > + } > + > + return 0; > +} > + > +static int vulkan_dirac_decode_slice(AVCodecContext *avctx, > + const uint8_t *data, > + uint32_t size) > +{ > + DiracContext *s = avctx->priv_data; > + > + /*avctx->execute2(avctx, decode_hq_slice_row, NULL, NULL, s->num_y);*/ > + for (int i = 0; i < s->num_y; i++) { > + decode_hq_slice_row(avctx, NULL, i, 0); > + } > + > + return 0; > +} > + > +const FFHWAccel ff_dirac_vulkan_hwaccel = { > + .p.name = "dirac_vulkan", > + .p.type = AVMEDIA_TYPE_VIDEO, > + .p.id = AV_CODEC_ID_DIRAC, > + .p.pix_fmt = AV_PIX_FMT_VULKAN, > + .start_frame = &vulkan_dirac_start_frame, > + .end_frame = &vulkan_dirac_end_frame, > + .decode_slice = &vulkan_dirac_decode_slice, > + .free_frame_priv = &vulkan_dirac_free_frame_priv, > + .uninit = &vulkan_dirac_uninit, > + .init = &vulkan_dirac_init, > + .frame_params = &vulkan_dirac_frame_params, > + .frame_priv_data_size = sizeof(DiracVulkanDecodePicture), > + .decode_params = &ff_vk_params_invalidate, > + .flush = &ff_vk_decode_flush, > + .update_thread_context = &vulkan_dirac_update_thread_context, > + .priv_data_size = sizeof(DiracVulkanDecodeContext), > + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | > HWACCEL_CAP_THREAD_SAFE, > +}; > -- > 2.46.0 > > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
prev parent reply other threads:[~2024-08-15 8:46 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top [not found] <dirac-hwaccel-0@ffmpeg.org> 2024-08-14 12:18 ` [FFmpeg-devel] [PATCH 1/2] Add dirac vulkan hwaccel usage to diracdec.c Petro Mozil 2024-08-14 12:18 ` [FFmpeg-devel] [PATCH 2/2] Add vulkan dirac hwaccel Petro Mozil 2024-08-15 8:45 ` Petro Mozil [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to='CAP664DVJJ6=Dt0y0SCBymMvVpmvf1bQQ3VE9NYqFrsAwVDequQ@mail.gmail.com' \ --to=mozil.petryk@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git