From: Lynne <dev@lynne.ee> To: ffmpeg-devel@ffmpeg.org Cc: Lynne <dev@lynne.ee> Subject: [FFmpeg-devel] [PATCH 08/12] ffv1enc_vulkan: refactor shaders slightly to support sharing Date: Sun, 19 Jan 2025 19:36:26 +0900 Message-ID: <20250119103640.1224690-8-dev@lynne.ee> (raw) In-Reply-To: <20250119103640.1224690-1-dev@lynne.ee> The shaders were written to support sharing, but needed slight tweaking. --- libavcodec/ffv1enc_vulkan.c | 75 +++++++++++++++------------ libavcodec/vulkan/ffv1_common.comp | 24 +++++++-- libavcodec/vulkan/ffv1_enc_setup.comp | 18 +------ libavcodec/vulkan/ffv1_reset.comp | 3 +- libavcodec/vulkan/rangecoder.comp | 27 +++++----- 5 files changed, 82 insertions(+), 65 deletions(-) diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c index 243f472568..53d648bcec 100644 --- a/libavcodec/ffv1enc_vulkan.c +++ b/libavcodec/ffv1enc_vulkan.c @@ -136,14 +136,15 @@ typedef struct FFv1VkResetParameters { uint32_t context_count; uint8_t codec_planes; uint8_t key_frame; - uint8_t padding[3]; + uint8_t version; + uint8_t micro_version; + uint8_t padding[1]; } FFv1VkResetParameters; typedef struct FFv1VkParameters { VkDeviceAddress slice_state; VkDeviceAddress scratch_data; VkDeviceAddress out_data; - uint64_t slice_size_max; int32_t sar[2]; uint32_t chroma_shift[2]; @@ -151,6 +152,7 @@ typedef struct FFv1VkParameters { uint32_t plane_state_size; uint32_t context_count; uint32_t crcref; + uint32_t slice_size_max; uint8_t bits_per_raw_sample; uint8_t context_model; @@ -175,7 +177,6 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, u8buf slice_state; ); GLSLC(1, u8buf scratch_data; ); GLSLC(1, u8buf out_data; ); - GLSLC(1, uint64_t slice_size_max; ); GLSLC(0, ); GLSLC(1, ivec2 sar; ); GLSLC(1, uvec2 chroma_shift; ); @@ -183,6 +184,7 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(1, uint plane_state_size; ); GLSLC(1, uint context_count; ); GLSLC(1, uint32_t crcref; ); + GLSLC(1, uint32_t slice_size_max; ); GLSLC(0, ); GLSLC(1, uint8_t bits_per_raw_sample; ); GLSLC(1, uint8_t context_model; ); @@ -492,7 +494,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .slice_state = slice_data_buf->address + f->slice_count*256, .scratch_data = tmp_data_buf->address, .out_data = out_data_buf->address, - .slice_size_max = out_data_buf->size / f->slice_count, .bits_per_raw_sample = f->bits_per_raw_sample, .sar[0] = pict->sample_aspect_ratio.num, .sar[1] = pict->sample_aspect_ratio.den, @@ -501,6 +502,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx, .plane_state_size = plane_state_size, .context_count = context_count, .crcref = f->crcref, + .slice_size_max = out_data_buf->size / f->slice_count, .context_model = fv->ctx.context_model, .version = f->version, .micro_version = f->micro_version, @@ -966,7 +968,6 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd) GLSLF(0, #define TYPE int%i_t ,smp_bits); GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits); GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits); - GLSLD(ff_source_common_comp); GLSLD(ff_source_rangecoder_comp); if (f->ac == AC_GOLOMB_RICE) @@ -993,6 +994,10 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) 1, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + add_push_data(shd); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1038,8 +1043,6 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0)); - add_push_data(shd); - GLSLD(ff_source_ffv1_enc_setup_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1074,6 +1077,22 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) wg_dim, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, u8buf slice_state; ); + GLSLC(1, uint plane_state_size; ); + GLSLC(1, uint context_count; ); + GLSLC(1, uint8_t codec_planes; ); + GLSLC(1, uint8_t key_frame; ); + GLSLC(1, uint8_t version; ); + GLSLC(1, uint8_t micro_version; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1110,17 +1129,6 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0)); - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, u8buf slice_state; ); - GLSLC(1, uint plane_state_size; ); - GLSLC(1, uint context_count; ); - GLSLC(1, uint8_t codec_planes; ); - GLSLC(1, uint8_t key_frame; ); - GLSLC(1, uint8_t padding[3]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - GLSLD(ff_source_ffv1_reset_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1164,6 +1172,18 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) wg_count, wg_count, 1, 0)); + define_shared_code(avctx, shd); + + GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); + GLSLC(1, int offset; ); + GLSLC(1, uint8_t bits; ); + GLSLC(1, uint8_t planar_rgb; ); + GLSLC(1, uint8_t transparency; ); + GLSLC(1, uint8_t padding[1]; ); + GLSLC(0, }; ); + ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters), + VK_SHADER_STAGE_COMPUTE_BIT); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1187,8 +1207,6 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0)); - define_shared_code(avctx, shd); - desc_set = (FFVulkanDescriptorSetBinding []) { { .name = "slice_data_buf", @@ -1220,16 +1238,6 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); - GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); - GLSLC(1, int offset; ); - GLSLC(1, uint8_t bits; ); - GLSLC(1, uint8_t planar_rgb; ); - GLSLC(1, uint8_t transparency; ); - GLSLC(1, uint8_t padding[1]; ); - GLSLC(0, }; ); - ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters), - VK_SHADER_STAGE_COMPUTE_BIT); - GLSLD(ff_source_ffv1_enc_rct_comp); RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main", @@ -1268,6 +1276,11 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) 1, 1, 1, 0)); + /* Common codec header */ + GLSLD(ff_source_common_comp); + + add_push_data(shd); + av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES); av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS); av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE); @@ -1328,8 +1341,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv) }; RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0)); - add_push_data(shd); - /* Assemble the shader body */ GLSLD(ff_source_ffv1_enc_common_comp); diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp index 5b4a882367..604d03b2de 100644 --- a/libavcodec/vulkan/ffv1_common.comp +++ b/libavcodec/vulkan/ffv1_common.comp @@ -22,17 +22,18 @@ struct SliceContext { RangeCoder c; - -#ifdef GOLOMB PutBitContext pb; /* 8*8 bytes */ -#endif ivec2 slice_dim; ivec2 slice_pos; ivec2 slice_rct_coef; + u8vec4 quant_table_idx; + uint context_count; uint hdr_len; // only used for golomb - int slice_coding_mode; + + uint slice_coding_mode; + bool slice_reset_contexts; }; /* -1, { -1, 0 } */ @@ -72,3 +73,18 @@ const uint32_t log2_run[41] = { 16, 17, 18, 19, 20, 21, 22, 23, 24, }; + +uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) +{ + uint mpw = 1 << chroma_shift; + uint awidth = align(width, mpw); + + if ((version < 4) || ((version == 4) && (micro_version < 3))) + return width * sx / num_h_slices; + + sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; + if (sx == awidth) + sx = width; + + return sx; +} diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp index b861e25f74..23f09b2af6 100644 --- a/libavcodec/vulkan/ffv1_enc_setup.comp +++ b/libavcodec/vulkan/ffv1_enc_setup.comp @@ -20,21 +20,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift) -{ - uint mpw = 1 << chroma_shift; - uint awidth = align(width, mpw); - - if ((version < 4) || ((version == 4) && (micro_version < 3))) - return width * sx / num_h_slices; - - sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw; - if (sx == awidth) - sx = width; - - return sx; -} - void init_slice(out SliceContext sc, const uint slice_idx) { /* Set coordinates */ @@ -52,6 +37,7 @@ void init_slice(out SliceContext sc, const uint slice_idx) sc.slice_dim = ivec2(sxe - sxs, sye - sys); sc.slice_rct_coef = ivec2(1, 1); sc.slice_coding_mode = int(force_pcm == 1); + sc.slice_reset_contexts = sc.slice_coding_mode == 1; rac_init(sc.c, OFFBUF(u8buf, out_data, slice_idx * slice_size_max), @@ -105,7 +91,7 @@ void write_slice_header(inout SliceContext sc, uint64_t state) put_symbol_unsigned(sc.c, state, sar.y); if (version >= 4) { - put_rac_full(sc.c, state, sc.slice_coding_mode == 1); + put_rac_full(sc.c, state, sc.slice_reset_contexts); put_symbol_unsigned(sc.c, state, sc.slice_coding_mode); if (sc.slice_coding_mode != 1 && colorspace == 1) { put_symbol_unsigned(sc.c, state, sc.slice_rct_coef.y); diff --git a/libavcodec/vulkan/ffv1_reset.comp b/libavcodec/vulkan/ffv1_reset.comp index c7c7962850..1b87ca754e 100644 --- a/libavcodec/vulkan/ffv1_reset.comp +++ b/libavcodec/vulkan/ffv1_reset.comp @@ -24,7 +24,8 @@ void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - if (slice_ctx[slice_idx].slice_coding_mode == 0 && key_frame == 0) + if (key_frame == 0 && + slice_ctx[slice_idx].slice_reset_contexts == false) return; uint64_t slice_state_off = uint64_t(slice_state) + diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index 848a056fb1..6e3b9c1238 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -21,8 +21,9 @@ */ struct RangeCoder { - u8buf bytestream_start; - u8buf bytestream; + uint64_t bytestream_start; + uint64_t bytestream; + uint64_t bytestream_end; uint low; uint16_t range; @@ -34,28 +35,29 @@ struct RangeCoder { void renorm_encoder_full(inout RangeCoder c) { int bs_cnt = 0; + u8buf bytestream = u8buf(c.bytestream); if (c.outstanding_byte == 0xFF) { c.outstanding_byte = uint8_t(c.low >> 8); } else if (c.low <= 0xFF00) { - c.bytestream[bs_cnt++].v = c.outstanding_byte; + bytestream[bs_cnt++].v = c.outstanding_byte; uint16_t cnt = c.outstanding_count; for (; cnt > 0; cnt--) - c.bytestream[bs_cnt++].v = uint8_t(0xFF); + bytestream[bs_cnt++].v = uint8_t(0xFF); c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(c.low >> 8); } else if (c.low >= 0x10000) { - c.bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1); + bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1); uint16_t cnt = c.outstanding_count; for (; cnt > 0; cnt--) - c.bytestream[bs_cnt++].v = uint8_t(0x00); + bytestream[bs_cnt++].v = uint8_t(0x00); c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8)); } else { c.outstanding_count++; } - c.bytestream = OFFBUF(u8buf, c.bytestream, bs_cnt); + c.bytestream += bs_cnt; c.range <<= 8; c.low = bitfieldInsert(0, c.low, 8, 8); } @@ -74,10 +76,10 @@ void renorm_encoder(inout RangeCoder c) return; } - u8buf bs = c.bytestream; + u8buf bs = u8buf(c.bytestream); uint8_t outstanding_byte = c.outstanding_byte; - c.bytestream = OFFBUF(u8buf, bs, oc); + c.bytestream = uint64_t(bs) + oc; c.outstanding_count = uint16_t(0); c.outstanding_byte = uint8_t(low >> 8); @@ -179,10 +181,11 @@ uint32_t rac_terminate(inout RangeCoder c) return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start)); } -void rac_init(out RangeCoder r, u8buf data, uint64_t buf_size) +void rac_init(out RangeCoder r, u8buf data, uint buf_size) { - r.bytestream_start = data; - r.bytestream = data; + r.bytestream_start = uint64_t(data); + r.bytestream = uint64_t(data); + r.bytestream_end = uint64_t(data) + buf_size; r.low = 0; r.range = uint16_t(0xFF00); r.outstanding_count = uint16_t(0); -- 2.47.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2025-01-19 10:38 UTC|newest] Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-01-19 10:36 [FFmpeg-devel] [PATCH 01/12] vulkan: add ff_vk_create_imageview Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 02/12] ffv1dec: use dedicated pix_fmt field and call ff_get_format Lynne 2025-01-20 3:13 ` Michael Niedermayer 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 03/12] ffv1dec: move slice start finding into a function Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 04/12] ffv1dec: move header parsing into a separate function Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 05/12] ffv1dec: move slice decoding " Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 06/12] ffv1dec: set f->state_transition for default range coder table Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 07/12] ffv1dec: add support for hwaccels Lynne 2025-01-19 10:36 ` Lynne [this message] 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 09/12] ffv1_vulkan: move global buffer creation to a shared file Lynne 2025-01-19 10:36 ` [FFmpeg-devel] [PATCH 10/12] ffv1enc_vulkan: support default range coder tables Lynne 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 11/12] vulkan_decode: support software-defined decoders Lynne 2025-01-19 10:38 ` [FFmpeg-devel] [PATCH 12/12] [RFC] ffv1dec_vulkan: add a Vulkan compute-based hardware decoding implementation Lynne
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20250119103640.1224690-8-dev@lynne.ee \ --to=dev@lynne.ee \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git