From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <ffmpeg-devel-bounces@ffmpeg.org> Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 804254CC95 for <ffmpegdev@gitmailbox.com>; Sat, 12 Apr 2025 07:25:58 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 3538E68C77C; Sat, 12 Apr 2025 10:23:27 +0300 (EEST) Received: from vidala.pars.ee (vidala.pars.ee [116.203.72.101]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 27FA768C595 for <ffmpeg-devel@ffmpeg.org>; Sat, 12 Apr 2025 10:23:03 +0300 (EEST) DKIM-Signature: v=1; a=rsa-sha256; s=202405r; d=lynne.ee; c=relaxed/relaxed; h=Message-ID:Date:Subject:To:From; t=1744442582; bh=DCw0cJ7c5Ah3GGqbA4+3B8v DVQ/CnUQ+ASBUCpn7RL8=; b=HKOrWXNfZNA4HroOmZhfpOjDw1L82xkB7Dr0Vop776T+JpljiQ 1nflSedtbJp5rBSbJVDDin98VllnLqqCZIsgzVp5KZT0g/SsxDj6KcuufKJQZzuLi9nrAhwm1GL sWUc8iwG3YjxMgGtOhj1FMt5Lg5hYrYoMpYFh65pGWbztUQYBP/zesQk8Wq5jDqi8ywlmt9o0a7 Yi8Ss5aq2OFRSEjkW3VnE79Fscql8sEZadKsJ7BzGeR4XwycBHnD4cUsgJOuac6uOGIYsxX4t4T ypcpSL8r45NwCP+qbBjIM2vsszllMVbr+8x4x7QlBOL2cpANBKWdnFKOVh2I6Lpwg2Q==; DKIM-Signature: v=1; a=ed25519-sha256; s=202405e; d=lynne.ee; c=relaxed/relaxed; h=Message-ID:Date:Subject:To:From; t=1744442582; bh=DCw0cJ7c5Ah3GGqbA4+3B8v DVQ/CnUQ+ASBUCpn7RL8=; b=OUwnL4N1AwjzVGo4LKJ1m4O5CKDu2EPk63PhLJ/kcLp0QhWVh9 9ltVx+6A4Mvy5tUKOBhCLom6txeShcBWazCg==; From: Lynne <dev@lynne.ee> To: ffmpeg-devel@ffmpeg.org Date: Sat, 12 Apr 2025 09:22:46 +0200 Message-ID: <20250412072256.77815-15-dev@lynne.ee> X-Mailer: git-send-email 2.49.0 In-Reply-To: <20250412072256.77815-1-dev@lynne.ee> References: <20250412072256.77815-1-dev@lynne.ee> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 15/18] vulkan_ffv1: remove need for scratch data during setup X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org> List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>, <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe> List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel> List-Post: <mailto:ffmpeg-devel@ffmpeg.org> List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help> List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>, <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe> Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Lynne <dev@lynne.ee> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org> Archived-At: <https://master.gitmailbox.com/ffmpegdev/20250412072256.77815-15-dev@lynne.ee/> List-Archive: <https://master.gitmailbox.com/ffmpegdev/> List-Post: <mailto:ffmpegdev@gitmailbox.com> This saves on some VRAM, but mainly allows for a more unified path. --- libavcodec/vulkan/ffv1_dec_setup.comp | 55 ++++++++++++++------------- libavcodec/vulkan/rangecoder.comp | 17 +++++++++ libavcodec/vulkan_ffv1.c | 23 +---------- 3 files changed, 46 insertions(+), 49 deletions(-) diff --git a/libavcodec/vulkan/ffv1_dec_setup.comp b/libavcodec/vulkan/ffv1_dec_setup.comp index 5da63be56d..a27a878927 100644 --- a/libavcodec/vulkan/ffv1_dec_setup.comp +++ b/libavcodec/vulkan/ffv1_dec_setup.comp @@ -20,13 +20,15 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -uint get_usymbol(inout RangeCoder c, uint64_t state) +uint8_t setup_state[CONTEXT_SIZE]; + +uint get_usymbol(inout RangeCoder c) { - if (get_rac(c, state + 0)) + if (get_rac_direct(c, setup_state[0])) return 0; int e = 0; - while (get_rac(c, state + 1 + min(e, 9))) { // 1..10 + while (get_rac_direct(c, setup_state[1 + min(e, 9)])) { // 1..10 e++; if (e > 31) { corrupt = true; @@ -35,24 +37,24 @@ uint get_usymbol(inout RangeCoder c, uint64_t state) } uint a = 1; - for (int i = e - 1; i >= 0; i--) - a += a + uint(get_rac(c, state + 22 + min(i, 9))); // 22..31 + for (int i = e - 1; i >= 0; i--) { + a <<= 1; + a |= uint(get_rac_direct(c, setup_state[22 + min(i, 9)])); // 22..31 + } return a; } -bool decode_slice_header(inout SliceContext sc, uint64_t state) +bool decode_slice_header(inout SliceContext sc) { - u8buf sb = u8buf(state); - [[unroll]] for (int i = 0; i < CONTEXT_SIZE; i++) - sb[i].v = uint8_t(128); + setup_state[i] = uint8_t(128); - uint sx = get_usymbol(sc.c, state); - uint sy = get_usymbol(sc.c, state); - uint sw = get_usymbol(sc.c, state) + 1; - uint sh = get_usymbol(sc.c, state) + 1; + uint sx = get_usymbol(sc.c); + uint sy = get_usymbol(sc.c); + uint sw = get_usymbol(sc.c) + 1; + uint sh = get_usymbol(sc.c) + 1; if (sx < 0 || sy < 0 || sw <= 0 || sh <= 0 || sx > (gl_NumWorkGroups.x - sw) || sy > (gl_NumWorkGroups.y - sh) || @@ -72,22 +74,22 @@ bool decode_slice_header(inout SliceContext sc, uint64_t state) sc.slice_coding_mode = int(0); for (uint i = 0; i < codec_planes; i++) { - uint idx = get_usymbol(sc.c, state); + uint idx = get_usymbol(sc.c); if (idx >= quant_table_count) return true; sc.quant_table_idx[i] = uint8_t(idx); } - get_usymbol(sc.c, state); - get_usymbol(sc.c, state); - get_usymbol(sc.c, state); + get_usymbol(sc.c); + get_usymbol(sc.c); + get_usymbol(sc.c); if (version >= 4) { - sc.slice_reset_contexts = get_rac(sc.c, state); - sc.slice_coding_mode = get_usymbol(sc.c, state); + sc.slice_reset_contexts = get_rac_direct(sc.c, setup_state[0]); + sc.slice_coding_mode = get_usymbol(sc.c); if (sc.slice_coding_mode != 1 && colorspace == 1) { - sc.slice_rct_coef.x = int(get_usymbol(sc.c, state)); - sc.slice_rct_coef.y = int(get_usymbol(sc.c, state)); + sc.slice_rct_coef.x = int(get_usymbol(sc.c)); + sc.slice_rct_coef.y = int(get_usymbol(sc.c)); if (sc.slice_rct_coef.x + sc.slice_rct_coef.y > 4) return true; } @@ -96,11 +98,11 @@ bool decode_slice_header(inout SliceContext sc, uint64_t state) return false; } -void golomb_init(inout SliceContext sc, uint64_t state) +void golomb_init(inout SliceContext sc) { if (version == 3 && micro_version > 1 || version > 3) { - u8buf(state).v = uint8_t(129); - get_rac(sc.c, state); + setup_state[0] = uint8_t(129); + get_rac_direct(sc.c, setup_state[0]); } uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1; @@ -111,7 +113,6 @@ void golomb_init(inout SliceContext sc, uint64_t state) void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; - uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE; u8buf bs = u8buf(slice_data + slice_offsets[2*slice_idx + 0]); uint32_t slice_size = slice_offsets[2*slice_idx + 1]; @@ -122,10 +123,10 @@ void main(void) if (slice_idx == (gl_NumWorkGroups.x*gl_NumWorkGroups.y - 1)) get_rac_equi(slice_ctx[slice_idx].c); - decode_slice_header(slice_ctx[slice_idx], scratch_state); + decode_slice_header(slice_ctx[slice_idx]); if (golomb == 1) - golomb_init(slice_ctx[slice_idx], scratch_state); + golomb_init(slice_ctx[slice_idx]); if (ec != 0 && check_crc != 0) { uint32_t crc = crcref; diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index e332bce8a5..ff0432511d 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -245,6 +245,23 @@ bool get_rac(inout RangeCoder c, uint64_t state) return bit; } +bool get_rac_direct(inout RangeCoder c, inout uint8_t state) +{ + int range1 = -int(c.range * state >> 8); + int ranged = c.range + range1; + + bool bit = c.low >= ranged; + state = zero_one_state[state + (bit ? 256 : 0)]; + + c.low = c.low - (bit ? ranged : 0); + c.range = (bit ? 0 : ranged) - (bit ? range1 : 0); + + if (c.range < 0x100) + refill(c); + + return bit; +} + bool get_rac_equi(inout RangeCoder c) { int range1 = c.range >> 1; diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c index 72cacb1678..c1875711bc 100644 --- a/libavcodec/vulkan_ffv1.c +++ b/libavcodec/vulkan_ffv1.c @@ -43,8 +43,6 @@ const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { typedef struct FFv1VulkanDecodePicture { FFVulkanDecodePicture vp; - AVBufferRef *tmp_data; - AVBufferRef *slice_state; uint32_t plane_state_size; uint32_t slice_state_size; @@ -70,7 +68,6 @@ typedef struct FFv1VulkanDecodeContext { FFVkBuffer crc_tab_buf; AVBufferPool *slice_state_pool; - AVBufferPool *tmp_data_pool; AVBufferPool *slice_offset_pool; AVBufferPool *slice_status_pool; } FFv1VulkanDecodeContext; @@ -78,7 +75,6 @@ typedef struct FFv1VulkanDecodeContext { typedef struct FFv1VkParameters { VkDeviceAddress slice_data; VkDeviceAddress slice_state; - VkDeviceAddress scratch_data; int fmt_lut[4]; uint32_t img_size[2]; @@ -111,7 +107,6 @@ static void add_push_data(FFVulkanShader *shd) GLSLC(0, layout(push_constant, scalar) uniform pushConstants { ); GLSLC(1, u8buf slice_data; ); GLSLC(1, u8buf slice_state; ); - GLSLC(1, u8buf scratch_data; ); GLSLC(0, ); GLSLC(1, ivec4 fmt_lut; ); GLSLC(1, uvec2 img_size; ); @@ -208,16 +203,6 @@ static int vk_ffv1_start_frame(AVCodecContext *avctx, return AVERROR(ENOMEM); } - /* Allocate temporary data buffer */ - err = ff_vk_get_pooled_buffer(&ctx->s, &fv->tmp_data_pool, - &fp->tmp_data, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - NULL, f->slice_count*CONTEXT_SIZE, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (err < 0) - return err; - /* Allocate slice offsets buffer */ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_offset_pool, &fp->slice_offset_buf, @@ -327,7 +312,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_offset_buf->data; FFVkBuffer *slice_status = (FFVkBuffer *)fp->slice_status_buf->data; - FFVkBuffer *tmp_data = (FFVkBuffer *)fp->tmp_data->data; VkImageView rct_image_views[AV_NUM_DATA_POINTERS]; AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f; @@ -380,8 +364,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) vp->slices_buf = NULL; RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_offset_buf, 1, 0)); fp->slice_offset_buf = NULL; - RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->tmp_data, 1, 0)); - fp->tmp_data = NULL; /* Entry barrier for the slice state */ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { @@ -430,8 +412,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx) ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); pd = (FFv1VkParameters) { .slice_data = slices_buf->address, - .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, - .scratch_data = tmp_data->address, + .slice_state = slice_state->address + f->slice_count*fp->slice_data_size, .img_size[0] = f->picture.f->width, .img_size[1] = f->picture.f->height, @@ -990,7 +971,6 @@ static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx) ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf); ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf); - av_buffer_pool_uninit(&fv->tmp_data_pool); av_buffer_pool_uninit(&fv->slice_state_pool); av_buffer_pool_uninit(&fv->slice_offset_pool); av_buffer_pool_uninit(&fv->slice_status_pool); @@ -1148,7 +1128,6 @@ static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) av_buffer_unref(&fp->slice_state); av_buffer_unref(&fp->slice_offset_buf); av_buffer_unref(&fp->slice_status_buf); - av_buffer_unref(&fp->tmp_data); } const FFHWAccel ff_ffv1_vulkan_hwaccel = { -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".