From: Lynne <dev@lynne.ee> To: ffmpeg-devel@ffmpeg.org Cc: Lynne <dev@lynne.ee> Subject: [FFmpeg-devel] [PATCH 12/12] [RFC] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation Date: Sun, 19 Jan 2025 19:38:16 +0900 Message-ID: <20250119103826.1225044-2-dev@lynne.ee> (raw) In-Reply-To: <20250119103826.1225044-1-dev@lynne.ee> This commit adds a Vulkan-based accelerated decoding of FFv1. Currently, only monochrome, arithmetic coding, version 3 and 4 are supported. Posting this more as an RFC. --- configure | 2 + libavcodec/Makefile | 1 + libavcodec/ffv1dec.c | 6 + libavcodec/hwaccels.h | 1 + libavcodec/vulkan/Makefile | 5 + libavcodec/vulkan/ffv1_dec.comp | 174 +++++ libavcodec/vulkan/ffv1_dec_setup.comp | 114 +++ libavcodec/vulkan/rangecoder.comp | 75 ++ libavcodec/vulkan_decode.c | 6 + libavcodec/vulkan_ffv1.c | 969 ++++++++++++++++++++++++++ 10 files changed, 1353 insertions(+) create mode 100644 libavcodec/vulkan/ffv1_dec.comp create mode 100644 libavcodec/vulkan/ffv1_dec_setup.comp create mode 100644 libavcodec/vulkan_ffv1.c diff --git a/configure b/configure index 3a1e72e1c6..94c14ee583 100755 --- a/configure +++ b/configure @@ -3192,6 +3192,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox" av1_videotoolbox_hwaccel_select="av1_decoder" av1_vulkan_hwaccel_deps="vulkan" av1_vulkan_hwaccel_select="av1_decoder" +ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler" +ffv1_vulkan_hwaccel_select="ffv1_decoder" h263_vaapi_hwaccel_deps="vaapi" h263_vaapi_hwaccel_select="h263_decoder" h263_videotoolbox_hwaccel_deps="videotoolbox" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 6ed0fbc705..20fc4ca0cf 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -1014,6 +1014,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL) += vaapi_av1.o OBJS-$(CONFIG_AV1_VDPAU_HWACCEL) += vdpau_av1.o OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL) += videotoolbox_av1.o OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan_decode.o ffv1_vulkan.o vulkan_ffv1.o OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o OBJS-$(CONFIG_H264_D3D11VA_HWACCEL) += dxva2_h264.o diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index 2dac905cf5..f09e7ea163 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -549,6 +549,9 @@ static int read_extra_header(FFV1Context *f) static enum AVPixelFormat get_pixel_format(FFV1Context *f) { enum AVPixelFormat pix_fmts[] = { +#if CONFIG_FFV1_VULKAN_HWACCEL + AV_PIX_FMT_VULKAN, +#endif f->pix_fmt, AV_PIX_FMT_NONE, }; @@ -1234,6 +1237,9 @@ const FFCodec ff_ffv1_decoder = { .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_USES_PROGRESSFRAMES, .hw_configs = (const AVCodecHWConfigInternal *const []) { +#if CONFIG_FFV1_VULKAN_HWACCEL + HWACCEL_VULKAN(ffv1), +#endif NULL }, }; diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h index 910a024032..0b2c725247 100644 --- a/libavcodec/hwaccels.h +++ b/libavcodec/hwaccels.h @@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel; extern const struct FFHWAccel ff_av1_vdpau_hwaccel; extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel; extern const struct FFHWAccel ff_av1_vulkan_hwaccel; +extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel; extern const struct FFHWAccel ff_h263_vaapi_hwaccel; extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel; extern const struct FFHWAccel ff_h264_d3d11va_hwaccel; diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile index 351332ee44..feb5d2ea51 100644 --- a/libavcodec/vulkan/Makefile +++ b/libavcodec/vulkan/Makefile @@ -11,6 +11,11 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \ vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \ vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o +OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \ + vulkan/rangecoder.o vulkan/ffv1_vlc.o \ + vulkan/ffv1_common.o vulkan/ffv1_reset.o \ + vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o + VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp)) .SECONDARY: $(VULKAN:.comp=.c) libavcodec/vulkan/%.c: TAG = VULKAN diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp new file mode 100644 index 0000000000..dffa0e0d2b --- /dev/null +++ b/libavcodec/vulkan/ffv1_dec.comp @@ -0,0 +1,174 @@ +/* + * FFv1 codec + * + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef GOLOMB +int get_isymbol(inout RangeCoder c, uint64_t state) +{ + if (get_rac(c, state + 0)) + return 0; + + int e = 0; + while (get_rac(c, state + 1 + min(e, 9))) { // 1..10 + e++; + if (e > 31) { + corrupt = true; + return 0; + } + } + + int a = 1; + for (int i = e - 1; i >= 0; i--) + a += a + int(get_rac(c, state + 22 + min(i, 9))); // 22..31 + + e = -int(get_rac(c, state + 11 + min(e, 10))); // 11..21 sign + return (a ^ e) - e; +} + +void decode_line_pcm(inout SliceContext sc, int y, int p, int comp, + int bits) +{ + ivec2 sp = sc.slice_pos; + int w = sc.slice_dim.x; + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } + + for (int x = 0; x < w; x++) { + uint v = 0; + for (int i = (bits - 1); i >= 0; i--) + v |= uint(get_rac_equi(sc.c)) << i; + imageStore(dst[p], (sp + ivec2(x, y)), uvec4(v)); + } +} + +ivec2 get_pred(ivec2 pos, ivec2 off, int p, int comp, int sw, + uint8_t context_model) +{ + const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); + const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0); + + TYPE top2 = TYPE(0); + if (off.y > 1) + top2 = TYPE(imageLoad(dst[p], pos + ivec2(0, -2))[comp]); + + VTYPE3 top = VTYPE3(TYPE(0), + TYPE(0), + TYPE(0)); + if (off.y > 0 && off != ivec2(0, 1)) + top[0] = TYPE(imageLoad(dst[p], pos + ivec2(-1, -1) + yoff_border1)[comp]); + if (off.y > 0) { + top[1] = TYPE(imageLoad(dst[p], pos + ivec2(0, -1))[comp]); + top[2] = TYPE(imageLoad(dst[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]); + } + + VTYPE2 cur = VTYPE2(TYPE(0), + TYPE(0)); + if (off.x > 0 && off != ivec2(1, 0)) + cur[0] = TYPE(imageLoad(dst[p], pos + ivec2(-2, 0) + yoff_border2)[comp]); + if (off != ivec2(0, 0)) + cur[1] = TYPE(imageLoad(dst[p], pos + ivec2(-1, 0) + yoff_border1)[comp]); + + int ctx = get_context(cur, top, top2, context_model); + int pred = predict(cur[1], VTYPE2(top)); + int csum = top.x ^ top.y ^ top.z ^ cur.x ^ cur.y; + + /* context, prediction */ + return ivec2(ctx, + pred); +} + +void decode_line(inout SliceContext sc, uint64_t state, + int y, int p, int comp, int bits, const int run_index) +{ + ivec2 sp = sc.slice_pos; + + int w = sc.slice_dim.x; + if (p > 0 && p < 3) { + w >>= chroma_shift.x; + sp >>= chroma_shift; + } + + for (int x = 0; x < w; x++) { + ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, comp, w, + sc.quant_table_idx[p]); + + bool neg = pr[0] < 0; + if (neg) + pr[0] = -pr[0]; + + int range = sc.c.range; + int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*pr[0]); + + if (neg) + diff = -diff; + + int pix = zero_extend(pr[0] + diff, bits); + + imageStore(dst[p], (sp + ivec2(x, y)), uvec4(pix)); + } +} +#endif + +void decode_slice(inout SliceContext sc, const uint slice_idx) +{ + int bits = bits_per_raw_sample; + +#ifndef GOLOMB + if (sc.slice_coding_mode == 1) { + for (int p = 0; p < planes; p++) { + + int h = sc.slice_dim.y; + if (p > 0 && p < 3) + h >>= chroma_shift.y; + + for (int y = 0; y < h; y++) + decode_line_pcm(sc, y, p, 0, bits); + } + } else +#endif + { + uint64_t slice_state_off = uint64_t(slice_state) + + slice_idx*plane_state_size*codec_planes; + + for (int p = 0; p < planes; p++) { + int run_index = 0; + + int h = sc.slice_dim.y; + if (p > 0 && p < 3) + h >>= chroma_shift.y; + + for (int y = 0; y < h; y++) + decode_line(sc, slice_state_off, y, p, 0, bits, run_index); + + /* For the second chroma plane, reuse the first plane's state */ + if (p != 1) + slice_state_off += plane_state_size; + } + } +} + +void main(void) +{ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + decode_slice(slice_ctx[slice_idx], slice_idx); +} diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp new file mode 100644 index 0000000000..6b88274829 --- /dev/null +++ b/libavcodec/vulkan/ffv1_dec_setup.comp @@ -0,0 +1,114 @@ +/* + * FFv1 codec + * + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +uint get_usymbol(inout RangeCoder c, uint64_t state) +{ + if (get_rac(c, state + 0)) + return 0; + + int e = 0; + while (get_rac(c, state + 1 + min(e, 9))) { // 1..10 + e++; + if (e > 31) { + corrupt = true; + return 0; + } + } + + uint a = 1; + for (int i = e - 1; i >= 0; i--) + a += a + uint(get_rac(c, state + 22 + min(i, 9))); // 22..31 + + // e = -int(get_rac(c, state + 11 + min(e, 10))); // 11..21 sign + return (a ^ e) - e; +} + +bool decode_slice_header(inout SliceContext sc, uint64_t state) +{ + u8buf sb = u8buf(state); + + [[unroll]] + for (int i = 0; i < CONTEXT_SIZE; i++) + sb[i].v = uint8_t(128); + + uint sx = get_usymbol(sc.c, state); + uint sy = get_usymbol(sc.c, state); + uint sw = get_usymbol(sc.c, state) + 1; + uint sh = get_usymbol(sc.c, state) + 1; + + if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 || + sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) || + corrupt) { + return true; + } + + /* Set coordinates */ + uint sxs = slice_coord(img_size.x, sx , gl_NumWorkGroups.x, chroma_shift.x); + uint sxe = slice_coord(img_size.x, sx + sw, gl_NumWorkGroups.x, chroma_shift.x); + uint sys = slice_coord(img_size.y, sy , gl_NumWorkGroups.y, chroma_shift.y); + uint sye = slice_coord(img_size.y, sy + sh, gl_NumWorkGroups.y, chroma_shift.y); + + sc.slice_pos = ivec2(sxs, sys); + sc.slice_dim = ivec2(sxe - sxs, sye - sys); + sc.slice_rct_coef = ivec2(1, 1); + sc.slice_coding_mode = int(0); + + for (uint i = 0; i < codec_planes; i++) { + uint idx = get_usymbol(sc.c, state); + if (idx >= quant_table_count) + return true; + sc.quant_table_idx[i] = uint8_t(idx); + sc.context_count = context_count[idx]; + } + + get_usymbol(sc.c, state); + get_usymbol(sc.c, state); + get_usymbol(sc.c, state); + + if (version >= 4) { + sc.slice_reset_contexts = get_rac(sc.c, state); + sc.slice_coding_mode = get_usymbol(sc.c, state); + if (sc.slice_coding_mode != 1 && colorspace == 1) { + sc.slice_rct_coef.x = int(get_usymbol(sc.c, state)); + sc.slice_rct_coef.y = int(get_usymbol(sc.c, state)); + if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4) + return true; + } + } + + return false; +} + +void main(void) +{ + const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; + uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE; + + rac_init_dec(slice_ctx[slice_idx].c, + u8buf(slice_data + slice_offsets[slice_idx]), + slice_offsets[slice_idx + 1] - slice_offsets[slice_idx]); + + if (slice_idx == 0) + get_rac_equi(slice_ctx[slice_idx].c); + + decode_slice_header(slice_ctx[slice_idx], scratch_state); +} diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 6e3b9c1238..ff59cbd289 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -191,3 +191,78 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size) r.outstanding_count = uint16_t(0); r.outstanding_byte = uint8_t(0xFF); } + +/* Decoder */ +uint overread; +bool corrupt; + +void rac_init_dec(out RangeCoder r, u8buf data, uint buf_size) +{ + overread = 0; + corrupt = false; + + /* Skip priming bytes */ + rac_init(r, OFFBUF(u8buf, data, 2), buf_size - 2); + + uint16_t prime = u16buf(data).v; + /* Switch endianess of the priming bytes */ + prime = pack16(unpack8(prime).yx); + + r.low = prime; + if (prime >= 0xFF00) { + r.low = 0xFF00; + r.bytestream_end = uint64_t(data) + 2; + } +} + +void refill(inout RangeCoder c) +{ + c.range <<= 8; + c.low <<= 8; + if (c.bytestream < c.bytestream_end) { + c.low += u8buf(c.bytestream).v; + c.bytestream++; + } else { + overread++; + } +} + +bool get_rac(inout RangeCoder c, uint64_t state) +{ + u8buf sb = u8buf(state); + uint val = uint(sb.v); + uint16_t range1 = uint16_t((uint(c.range) * val) >> 8); + + c.range -= range1; + + bool bit = c.low >= c.range; + sb.v = zero_one_state[(uint(bit) << 8) + val]; + + if (bit) { + c.low -= c.range; + c.range = range1; + } + + if (c.range < 0x100) + refill(c); + + return bit; +} + +bool get_rac_equi(inout RangeCoder c) +{ + uint16_t range1 = c.range >> 1; + + c.range -= range1; + + bool bit = c.low >= c.range; + if (bit) { + c.low -= c.range; + c.range = range1; + } + + if (c.range < 0x100) + refill(c); + + return bit; +} diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c index 87132651e2..010e1fadb4 100644 --- a/libavcodec/vulkan_decode.c +++ b/libavcodec/vulkan_decode.c @@ -36,6 +36,9 @@ extern const FFVulkanDecodeDescriptor ff_vk_dec_hevc_desc; #if CONFIG_AV1_VULKAN_HWACCEL extern const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc; #endif +#if CONFIG_FFV1_VULKAN_HWACCEL +extern const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc; +#endif static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_H264_VULKAN_HWACCEL @@ -47,6 +50,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = { #if CONFIG_AV1_VULKAN_HWACCEL &ff_vk_dec_av1_desc, #endif +#if CONFIG_FFV1_VULKAN_HWACCEL + &ff_vk_dec_ffv1_desc, +#endif }; static const FFVulkanDecodeDescriptor *get_codecdesc(enum AVCodecID codec_id) diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c new file mode 100644 index 0000000000..477d2b38b0 --- /dev/null +++ b/libavcodec/vulkan_ffv1.c @@ -0,0 +1,969 @@ +/* + * Copyright (c) 2024 Lynne <dev@lynne.ee> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vulkan_decode.h" +#include "hwaccel_internal.h" + +#include "ffv1.h" +#include "ffv1_vulkan.h" +#include "libavutil/vulkan_spirv.h" +#include "libavutil/mem.h" + +extern const char *ff_source_common_comp; +extern const char *ff_source_rangecoder_comp; +extern const char *ff_source_ffv1_vlc_comp; +extern const char *ff_source_ffv1_common_comp; +extern const char *ff_source_ffv1_dec_setup_comp; +extern const char *ff_source_ffv1_reset_comp; +extern const char *ff_source_ffv1_dec_comp; +extern const char *ff_source_ffv1_dec_rct_comp; + +const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { + .codec_id = AV_CODEC_ID_FFV1, + .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR, + .queue_flags = VK_QUEUE_COMPUTE_BIT, +}; + +typedef struct FFv1VulkanDecodePicture { + FFVulkanDecodePicture vp; + + VkImageView img_view_rct; + AVFrame *rct; + + AVBufferRef *tmp_data; + + AVBufferRef *slice_state; + uint32_t plane_state_size; + uint32_t slice_state_size; + uint32_t slice_data_size; + uint32_t max_context_count; + + AVBufferRef *slice_offset_buf; + uint32_t *slice_offset; + int slice_num; +} FFv1VulkanDecodePicture; + +typedef struct FFv1VulkanDecodeContext { + AVBufferRef *intermediate_frames_ref; + + FFVulkanShader setup; + FFVulkanShader reset[2]; /* AC/Golomb */ + FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */ + FFVulkanShader rct[2]; /* 16/32 bit */ + + FFVkBuffer rangecoder_static_buf; + FFVkBuffer quant_buf; + FFVkBuffer crc_tab_buf; + + AVBufferPool *slice_state_pool; + AVBufferPool *tmp_data_pool; + AVBufferPool *slice_offset_pool; +} FFv1VulkanDecodeContext; + +typedef struct FFv1VkResetParameters { + VkDeviceAddress slice_state; + uint32_t plane_state_size; + uint32_t context_count; + uint8_t codec_planes; + uint8_t key_frame; + uint8_t version; + uint8_t micro_version; + uint8_t padding[1]; +} FFv1VkResetParameters; + +typedef struct FFv1VkParameters { + uint32_t context_count[MAX_QUANT_TABLES]; + + VkDeviceAddress slice_data; + VkDeviceAddress slice_state; + VkDeviceAddress scratch_data; + + uint32_t img_size[2]; + uint32_t chroma_shift[2]; + + uint32_t plane_state_size; + uint32_t crcref; + + uint8_t bits_per_raw_sample; + uint8_t quant_table_count; + uint8_t version; + uint8_t micro_version; + uint8_t key_frame; + uint8_t planes; + uint8_t codec_planes; + uint8_t transparency; + uint8_t colorspace; + uint8_t ec; + uint8_t padding[2]; +} FFv1VkParameters; + +static void add_push_data(FFVulkanShader *shd) +{ + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES); + GLSLC(0, ); + GLSLC(1, u8buf slice_data; ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, u8buf scratch_data; ); + GLSLC(0, ); + GLSLC(1, uvec2 img_size; ); + GLSLC(1, uvec2 chroma_shift; ); + GLSLC(0, ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint32_t crcref; ); + GLSLC(0, ); + GLSLC(1, uint8_t bits_per_raw_sample; ); + GLSLC(1, uint8_t quant_table_count; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t planes; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t colorspace; ); + GLSLC(1, uint8_t ec; ); + GLSLC(1, uint8_t padding[2]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters), + VK_SHADER_STAGE_COMPUTE_BIT); +} + +static int vk_ffv1_start_frame(AVCodecContext *avctx, + av_unused const uint8_t *buffer, + av_unused uint32_t size) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + FFV1Context *f = avctx->priv_data; + size_t tmp_data_size; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + + for (int i = 0; i < f->quant_table_count; i++) + fp->max_context_count = FFMAX(f->context_count[i], fp->max_context_count); + + /* Allocate slice buffer data */ + if (f->ac == AC_GOLOMB_RICE) + fp->plane_state_size = 8; + else + fp->plane_state_size = CONTEXT_SIZE; + + fp->plane_state_size *= fp->max_context_count; + fp->slice_state_size = fp->plane_state_size*f->plane_count; + + fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */ + fp->slice_state_size += fp->slice_data_size; + fp->slice_state_size = FFALIGN(fp->slice_state_size, 8); + + /* Allocate slice state data */ + if (f->key_frame) { + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool, + &fp->slice_state, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, fp->slice_state_size*f->slice_count, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + } else { + FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; + fp->slice_state = av_buffer_ref(fpl->slice_state); + if (!fp->slice_state) + return AVERROR(ENOMEM); + } + + /* Allocate temporary data buffer */ + tmp_data_size = f->slice_count*CONTEXT_SIZE; + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool, + &fp->tmp_data, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, tmp_data_size, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (err < 0) + return err; + + /* Allocate slice offsets buffer */ + err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool, + &fp->slice_offset_buf, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + NULL, (f->max_slice_count + 8)*sizeof(uint32_t), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) + return err; + + /* Prepare frame to be used */ + err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1, + FF_VK_REP_NATIVE, 0); + if (err < 0) + return err; + + return 0; +} + +static int vk_ffv1_decode_slice(AVCodecContext *avctx, + const uint8_t *data, + uint32_t size) +{ + FFV1Context *f = avctx->priv_data; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + + int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, + &fp->slice_num, + (const uint32_t **)&fp->slice_offset); + if (err < 0) + return err; + + AV_WN32(slice_offset->mapped_mem + fp->slice_num*sizeof(uint32_t), + fp->slice_offset[fp->slice_num]); + AV_WN32(slice_offset->mapped_mem + (fp->slice_num + 1)*sizeof(uint32_t), + fp->slice_offset[fp->slice_num] + size); + + return 0; +} + +static int vk_ffv1_end_frame(AVCodecContext *avctx) +{ + int err; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFVulkanFunctions *vk = &ctx->s.vkfn; + + FFV1Context *f = avctx->priv_data; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + FFv1VkParameters pd; + FFv1VkResetParameters pd_reset; + + int is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) && + !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8); + + FFVulkanShader *decode_shader = &fv->decode[f->use32bit] + [f->ac == AC_GOLOMB_RICE] + [is_rgb]; + + FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; + FFVulkanDecodePicture *vp = &fp->vp; + + FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; + FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data; + FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; + + FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data; + + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + VkBufferMemoryBarrier2 buf_bar[8]; + int nb_buf_bar = 0; + + FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); + ff_vk_exec_start(&ctx->s, exec); + + /* Prepare deps */ + RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1)); + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); + vp->slices_buf = NULL; + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0)); + fp->slice_offset_buf = NULL; + RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0)); + fp->tmp_data = NULL; + + /* Input frame barrier */ + ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Entry barrier */ + if (!f->key_frame) { + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = 0, + .size = VK_WHOLE_SIZE, + }; + } + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + nb_img_bar = 0; + if (nb_buf_bar) { + slice_state->stage = buf_bar[1].dstStageMask; + slice_state->access = buf_bar[1].dstAccessMask; + nb_buf_bar = 0; + } + + /* Update descriptors */ + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, + 1, 1, 0, + slice_offset, + 0, (fp->slice_num + 8)*sizeof(uint32_t), + VK_FORMAT_UNDEFINED); + + ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); + pd = (FFv1VkParameters) { + /* context_count */ + + .slice_data = slices_buf->address, + .slice_state = slice_state->address, + .scratch_data = tmp_data->address, + + .img_size[0] = f->picture.f->width, + .img_size[1] = f->picture.f->height, + .chroma_shift[0] = f->chroma_h_shift, + .chroma_shift[1] = f->chroma_v_shift, + + .plane_state_size = fp->plane_state_size, + .crcref = f->crcref, + + .bits_per_raw_sample = avctx->bits_per_raw_sample, + .quant_table_count = f->quant_table_count, + .version = f->version, + .micro_version = f->micro_version, + .key_frame = f->key_frame, + .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt), + .codec_planes = f->plane_count, + .transparency = f->transparency, + .colorspace = f->colorspace, + .ec = f->ec, + }; + for (int i = 0; i < MAX_QUANT_TABLES; i++) + pd.context_count[i] = f->context_count[i]; + + ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); + + /* Reset shader */ + ff_vk_exec_bind_shader(&ctx->s, exec, &fv->reset[f->ac == AC_GOLOMB_RICE]); + + pd_reset = (FFv1VkResetParameters) { + .slice_state = slice_state->address + f->slice_count*256, + .plane_state_size = fp->plane_state_size, + .context_count = fp->max_context_count, + .codec_planes = f->plane_count, + .key_frame = f->key_frame, + .version = f->version, + .micro_version = f->micro_version, + }; + ff_vk_shader_update_push_const(&ctx->s, exec, + &fv->reset[f->ac == AC_GOLOMB_RICE], + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd_reset), &pd_reset); + + /* Sync between setup and reset shaders */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = 0, + .size = fp->slice_data_size*f->slice_count, + }; + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, + f->plane_count); + + /* Decode */ + ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader, + 1, 0, 0, + slice_state, + 0, fp->slice_data_size*f->slice_count, + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader, + f->picture.f, &vp->img_view_out, + 1, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader); + ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, + VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + + /* Sync between reset and decode shaders */ + buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = slice_state->stage, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = slice_state->access, + .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = slice_state->buf, + .offset = fp->slice_data_size*f->slice_count, + .size = slice_state->size - fp->slice_data_size*f->slice_count, + }; + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = buf_bar, + .bufferMemoryBarrierCount = nb_buf_bar, + }); + slice_state->stage = buf_bar[0].dstStageMask; + slice_state->access = buf_bar[0].dstAccessMask; + nb_buf_bar = 0; + + vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); + + err = ff_vk_exec_submit(&ctx->s, exec); + if (err < 0) + return err; + +fail: + return 0; +} + +static void define_shared_code(FFVulkanShader *shd, int use32bit) +{ + int smp_bits = use32bit ? 32 : 16; + + av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK); + + GLSLF(0, #define TYPE int%i_t ,smp_bits); + GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); + GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); + GLSLD(ff_source_rangecoder_comp); + GLSLD(ff_source_ffv1_common_comp); +} + +static int init_setup_shader(FFVulkanContext *s, FFVkExecPool *pool, + FFVkSPIRVCompiler *spv, FFVulkanShader *shd) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 1, 1, 1, + 0)); + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + { + .name = "crc_ieee_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint32_t crc_ieee[256];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); + + define_shared_code(shd, 0 /* Irrelevant */); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx[1024];", + }, + { + .name = "slice_offsets_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_quali = "readonly", + .buf_content = "uint32_t slice_offsets[1025];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); + + GLSLD(ff_source_ffv1_dec_setup_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_reset_shader(FFVulkanContext *s, FFVkExecPool *pool, + FFVkSPIRVCompiler *spv, FFVulkanShader *shd, + int ac) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024); + + RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + wg_dim, 1, 1, + 0)); + + if (ac == AC_GOLOMB_RICE) { + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); + av_bprintf(&shd->src, "#define GOLOMB\n"); + } + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint context_count; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0)); + + define_shared_code(shd, 0 /* Irrelevant */); + if (ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx[1024];", + }, + { + .name = "slice_addr_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "uint64_t slice_addr[1024];", + }, + { + .name = "slice_size_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .mem_quali = "readonly", + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "uint64_t slice_size[1024];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0)); + + GLSLD(ff_source_ffv1_reset_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int init_decode_shader(FFVulkanContext *s, FFVkExecPool *pool, + FFVkSPIRVCompiler *spv, FFVulkanShader *shd, + AVHWFramesContext *frames_ctx, + int use32bit, int ac, int rgb) +{ + int err; + FFVulkanDescriptorSetBinding *desc_set; + + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + + RET(ff_vk_shader_init(s, shd, "ffv1_dec", + VK_SHADER_STAGE_COMPUTE_BIT, + (const char *[]) { "GL_EXT_buffer_reference", + "GL_EXT_buffer_reference2" }, 2, + 1, 1, 1, + 0)); + + if (ac == AC_GOLOMB_RICE) { + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); + av_bprintf(&shd->src, "#define GOLOMB\n"); + } + + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); + av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "rangecoder_static_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint8_t zero_one_state[512];", + }, + { + .name = "quant_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]" + "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];", + }, + { + .name = "crc_ieee_buf", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "uint32_t crc_ieee[256];", + }, + }; + + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0)); + + define_shared_code(shd, use32bit); + if (ac == AC_GOLOMB_RICE) + GLSLD(ff_source_ffv1_vlc_comp); + + desc_set = (FFVulkanDescriptorSetBinding []) { + { + .name = "slice_data_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .buf_content = "SliceContext slice_ctx[1024];", + }, + { + .name = "dst", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .dimensions = 2, + .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format, + FF_VK_REP_NATIVE), + .elems = av_pix_fmt_count_planes(frames_ctx->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + }; + RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0)); + + GLSLD(ff_source_ffv1_dec_comp); + + RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main", + &spv_opaque)); + RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main")); + + RET(ff_vk_shader_register_exec(s, pool, shd)); + +fail: + if (spv_opaque) + spv->free_shader(spv, &spv_opaque); + + return err; +} + +static int vk_decode_ffv1_init(AVCodecContext *avctx) +{ + int err; + FFV1Context *f = avctx->priv_data; + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = NULL; + FFv1VulkanDecodeContext *fv; + FFVkSPIRVCompiler *spv; + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + err = ff_vk_decode_init(avctx); + if (err < 0) + return err; + ctx = dec->shared_ctx; + + fv = ctx->sd_ctx = av_mallocz(sizeof(*fv)); + if (!fv) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Setup shader */ + err = init_setup_shader(&ctx->s, &ctx->exec_pool, spv, &fv->setup); + if (err < 0) + return err; + + /* Reset shaders */ + for (int i = 0; i < 2; i++) { + err = init_reset_shader(&ctx->s, &ctx->exec_pool, + spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 0); + if (err < 0) + return err; + } + + /* Decode shaders */ + for (int i = 0; i < 1; i++) { + for (int j = 0; j < 1; j++) { + for (int k = 0; k < 1; k++) { + AVHWFramesContext *frames_ctx; + frames_ctx = k ?(AVHWFramesContext *)fv->intermediate_frames_ref->data : + (AVHWFramesContext *)avctx->hw_frames_ctx->data; + + err = init_decode_shader(&ctx->s, &ctx->exec_pool, + spv, &fv->decode[i][j][k], + frames_ctx, + i, + !j ? AC_RANGE_CUSTOM_TAB : 0, + k); + if (err < 0) + return err; + } + } + } + + /* Range coder data */ + err = ff_ffv1_vk_init_state_transition_data(&ctx->s, + &fv->rangecoder_static_buf, + f); + if (err < 0) + return err; + + /* Quantization table data */ + err = ff_ffv1_vk_init_quant_table_data(&ctx->s, + &fv->quant_buf, + f); + if (err < 0) + return err; + + /* CRC table buffer */ + err = ff_ffv1_vk_init_crc_table_data(&ctx->s, + &fv->crc_tab_buf, + f); + if (err < 0) + return err; + + /* Update setup global descriptors */ + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->setup, 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + + /* Update decode global descriptors */ + for (int i = 0; i < 1; i++) { + for (int j = 0; j < 1; j++) { + for (int k = 0; k < 1; k++) { + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 0, 0, + &fv->rangecoder_static_buf, + 0, fv->rangecoder_static_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 1, 0, + &fv->quant_buf, + 0, fv->quant_buf.size, + VK_FORMAT_UNDEFINED)); + RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], + &fv->decode[i][j][k], 0, 2, 0, + &fv->crc_tab_buf, + 0, fv->crc_tab_buf.size, + VK_FORMAT_UNDEFINED)); + } + } + } + + +fail: + return err; +} + +static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) +{ + AVHWDeviceContext *hwctx = _hwctx.nc; + + FFv1VulkanDecodePicture *fp = data; + FFVulkanDecodePicture *vp = &fp->vp; + + ff_vk_decode_free_frame(hwctx, vp); + + av_buffer_unref(&vp->slices_buf); + av_buffer_unref(&fp->slice_state); + av_buffer_unref(&fp->slice_offset_buf); + av_buffer_unref(&fp->tmp_data); + +// FFVulkanFunctions *vk = &ctx->s.vkfn; +// vk->DestroyImageView(hwctx->act_dev, fp->img_view_rct, hwctx->alloc); + + av_frame_free(&fp->rct); +} + +static int vk_decode_ffv1_uninit(AVCodecContext *avctx) +{ + FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; + FFVulkanDecodeShared *ctx = dec->shared_ctx; + FFv1VulkanDecodeContext *fv = ctx->sd_ctx; + + ff_vk_decode_uninit(avctx); + + ff_vk_shader_free(&ctx->s, &fv->setup); + + for (int i = 0; i < 2; i++) + ff_vk_shader_free(&ctx->s, &fv->reset[i]); + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) + for (int k = 0; k < 2; k++) + ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]); + + for (int i = 0; i < 2; i++) + ff_vk_shader_free(&ctx->s, &fv->rct[i]); + + av_buffer_pool_uninit(&fv->tmp_data_pool); + av_buffer_pool_uninit(&fv->slice_state_pool); + av_buffer_pool_uninit(&fv->slice_offset_pool); + + ff_vk_free_buf(&ctx->s, &fv->quant_buf); + ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf); + ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf); + + return 0; +} + +const FFHWAccel ff_ffv1_vulkan_hwaccel = { + .p.name = "ffv1_vulkan", + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_FFV1, + .p.pix_fmt = AV_PIX_FMT_VULKAN, + .start_frame = &vk_ffv1_start_frame, + .decode_slice = &vk_ffv1_decode_slice, + .end_frame = &vk_ffv1_end_frame, + .free_frame_priv = &vk_ffv1_free_frame_priv, + .frame_priv_data_size = sizeof(FFv1VulkanDecodePicture), + .init = &vk_decode_ffv1_init, + .update_thread_context = &ff_vk_update_thread_context, + .decode_params = &ff_vk_params_invalidate, + .flush = &ff_vk_decode_flush, + .uninit = &vk_decode_ffv1_uninit, + .frame_params = &ff_vk_frame_params, + .priv_data_size = sizeof(FFVulkanDecodeContext), + .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, +}; -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
prev parent reply other threads:[~2025-01-19 10:39 UTC|newest] Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format Lynne 2025-01-20 3:13 ` Michael Niedermayer 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 03/12] ffv1dec: move slice start finding into a function Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 04/12] ffv1dec: move header parsing into a separate function Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 05/12] ffv1dec: move slice decoding " Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 06/12] ffv1dec: set f->state_transition for default range coder table Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 07/12] ffv1dec: add support for hwaccels Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 09/12] ffv1_vulkan: move global buffer creation to a shared file Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 10/12] ffv1enc_vulkan: support default range coder tables Lynne 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 11/12] vulkan_decode: support software-defined decoders Lynne 2025-01-19 10:38 ` Lynne [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20250119103826.1225044-2-dev@lynne.ee \ --to=dev@lynne.ee \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git