From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.ffmpeg.org (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 34EB84C48D for ; Fri, 5 Sep 2025 17:25:15 +0000 (UTC) Authentication-Results: ffbox; dkim=fail (body hash mismatch (got b'vp4cSfbhmwEoCEVP0JWD7tZmvucek9OtkszFBei9DEY=', expected b'kP2MEUhVrc6ivY+xAEpEsTFvgbMFCr9TVzQLnWqMgdw=')) header.d=ffmpeg.org header.i=@ffmpeg.org header.a=rsa-sha256 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=ffmpeg.org; i=@ffmpeg.org; q=dns/txt; s=mail; t=1757093110; h=mime-version : to : date : message-id : reply-to : subject : list-id : list-archive : list-archive : list-help : list-owner : list-post : list-subscribe : list-unsubscribe : from : cc : content-type : content-transfer-encoding : from; bh=vp4cSfbhmwEoCEVP0JWD7tZmvucek9OtkszFBei9DEY=; b=Wnwxd8Ne/PXq60DdVcZyS0fKrr9LG8P1qRxc8jm5zsDJp09Js1B1hGCmT5T2+r01lfO7B sj9b6+OQ6BpqSOf6laB2bkBfCX/Nif0PPb2VPozsUG7+nnArA4E5tu9bTS/wLByAM47EFhv Ic4mm4R1TB3IhmYBRLwjuJxTxXvcn9dVt4itpeqkN5xhXwlBqVAmvPfEBEoBzV5mS/Ra+g9 eDwzZ2thjGfL0OdWM170KT0JYjCajfayowXhzlqp1KscKwVCndEjzBMBsFxf2TWXMkFA6VV VSAc7NcITbIvyZuOHps1I2Jc9CqQ4pbjk7iiZMBYSWEYmLVFhuE4wG9CJMjw== Received: from [172.19.0.4] (unknown [172.19.0.4]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id 0DEDC68E75B; Fri, 5 Sep 2025 20:25:10 +0300 (EEST) ARC-Seal: i=1; cv=none; a=rsa-sha256; d=ffmpeg.org; s=arc; t=1757093109; b=Hbgdow/iZT2GwM9UT9hC7KJw0FVhy9u2qx/ye02zn6vAfaIbCSgWdRJYUd3i+ad/r5NNe 0wCvQdP+QMv6JHO2uxU/FinvP/+8v81ttXA9bdvF9IJcc8UMbxQ1lzeVWLKVHOe7a0lT1xm 3ilYc1gpu+U0Sc4i5QdZUZtM6+XlEgnQvSAIXSKUHlURE9iG47af/li7bGIuHsLoeDDLPa1 Ae5v6QmznOqDygluK5azDlRUqXfETA10QesH79IKbgapZqt3aSjhkQNEDTfhoS4oZEwFdOT BGuU+pjMA+aqX0fJp5S7M5Q44ENMorUhkFpUUjALslq3oS3lH6UZbe9OaIEQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=ffmpeg.org; s=arc; t=1757093109; h=from : sender : reply-to : subject : date : message-id : to : cc : mime-version : content-type : content-transfer-encoding : content-id : content-description : resent-date : resent-from : resent-sender : resent-to : resent-cc : resent-message-id : in-reply-to : references : list-id : list-help : list-unsubscribe : list-subscribe : list-post : list-owner : list-archive; bh=vLhToOspW+0GaFpaBPGyvYeUb6G8+IttNJnqGd16Gbs=; b=klhFpagOCzkEi+u9ZNcNsuLzJOb1xRdECCdLfNBRIe1DlR8AfBYHke2sSIfFToDOLhQ75 lvMj+j0QrxQPPc+eMu8WbTcvXtmWXLCmmAEntfLqYsYbZ7DhKdkGn3az7Of8STQWNjfXE/t djCAwlk9P/elY89sHd5bx8Rr9RPxk8gZ0C0LQFGHrnUUei+u2ZWxbGKA+D9Jf5mSCPLxDzr zvibytpsjg+6LBJ2fV1iLjLVH7ovr1k6Tf2Fo6nn9cC1fg/QcEF8Dq7K8e9koBAgsxImNN4 H/snAOC+7RviaYBifQiLMYFcDc+S4g5piBkLPgUgOQ5sS9UluQXMBN4s1KGA== ARC-Authentication-Results: i=1; ffmpeg.org; dkim=pass header.d=ffmpeg.org header.i=@ffmpeg.org; arc=none; dmarc=none Authentication-Results: ffmpeg.org; dkim=pass header.d=ffmpeg.org header.i=@ffmpeg.org; arc=none (Message is not ARC signed); dmarc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=ffmpeg.org; i=@ffmpeg.org; q=dns/txt; s=mail; t=1757093098; h=content-type : mime-version : content-transfer-encoding : from : to : reply-to : subject : date : from; bh=kP2MEUhVrc6ivY+xAEpEsTFvgbMFCr9TVzQLnWqMgdw=; b=QoQFU0vpesqqKKbvbfwpImJSId3yopWZ4Vy4rJyqgipE01ExA+rw1cMhqLqivYJ1Gyntv xjpe3do+SGPB2v4S07mNhbuy9ST/HBnANK3HuhnuhaIo4gyKHc8s7VmdzZR/F/r37yO+/MS oKgEDlJoGI1ica1odYIG7FTaS8ArqgYDxkIaxskecDYQQmOx193byFBeBuqi8lEbKZX7rQr 2qJ5EOaDXWmkmmY8LMkxJwjxHeGSHA8a+BsrEl1bZV/xbsp0X5edby9WpD9E/7B79QKVebM Vn9tYuuJl/il1nvWfU1CONqQkUdiVs5syrW6PBilO60zZNNTupHJPSysVjmw== Received: from 95ff7801cf76 (code.ffmpeg.org [188.245.149.3]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTPS id 9ED33687B8E for ; Fri, 5 Sep 2025 20:24:58 +0300 (EEST) MIME-Version: 1.0 To: ffmpeg-devel@ffmpeg.org Date: Fri, 05 Sep 2025 17:24:58 -0000 Message-ID: <175709309882.25.14586465106777414812@463a07221176> Message-ID-Hash: PANJQFXKPLRBOVPYFDGAYO2PXHO7E2UO X-Message-ID-Hash: PANJQFXKPLRBOVPYFDGAYO2PXHO7E2UO X-MailFrom: code@ffmpeg.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; header-match-ffmpeg-devel.ffmpeg.org-0; header-match-ffmpeg-devel.ffmpeg.org-1; header-match-ffmpeg-devel.ffmpeg.org-2; header-match-ffmpeg-devel.ffmpeg.org-3; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list Reply-To: FFmpeg development discussions and patches Subject: [FFmpeg-devel] [PATCH] Optimize vvc_apply_bdof_block_8x (PR #20448) List-Id: FFmpeg development discussions and patches Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: From: welder via ffmpeg-devel Cc: welder Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Archived-At: List-Archive: List-Post: PR #20448 opened by welder URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20448 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20448.patch The speed improvement is attached in the commit message. The count of arithmetic operation is down from 10 to 6 and some cruft is cleaned up. >>From 8967e3a6a725358494307b51add9349d3e7dd075 Mon Sep 17 00:00:00 2001 From: Krzysztof Pyrkosz Date: Fri, 5 Sep 2025 19:17:48 +0200 Subject: [PATCH] Optimize vvc_apply_bdof_block_8x Before and after: A53: apply_bdof_8_8x16_neon: 3320.5 ( 4.02x) apply_bdof_10_8x16_neon: 3317.8 ( 3.90x) apply_bdof_12_8x16_neon: 3303.6 ( 3.91x) apply_bdof_8_8x16_neon: 3216.2 ( 4.18x) apply_bdof_10_8x16_neon: 3181.0 ( 4.09x) apply_bdof_12_8x16_neon: 3172.1 ( 4.09x) A72: apply_bdof_8_8x16_neon: 1827.4 ( 5.02x) apply_bdof_10_8x16_neon: 1838.5 ( 4.89x) apply_bdof_12_8x16_neon: 1841.1 ( 4.83x) apply_bdof_8_8x16_neon: 1691.6 ( 5.46x) apply_bdof_10_8x16_neon: 1695.9 ( 5.23x) apply_bdof_12_8x16_neon: 1695.4 ( 5.29x) A78 apply_bdof_8_8x16_neon: 648.9 ( 7.43x) apply_bdof_10_8x16_neon: 646.1 ( 7.04x) apply_bdof_12_8x16_neon: 643.8 ( 7.04x) apply_bdof_8_8x16_neon: 603.2 ( 7.97x) apply_bdof_10_8x16_neon: 604.1 ( 7.52x) apply_bdof_12_8x16_neon: 604.5 ( 7.52x) --- libavcodec/aarch64/vvc/inter.S | 39 ++++++++++++---------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S index a6648b64fc..f27b5a47f4 100644 --- a/libavcodec/aarch64/vvc/inter.S +++ b/libavcodec/aarch64/vvc/inter.S @@ -802,43 +802,33 @@ endfunc vy .req x7 ldr w8, [sp] - movi v7.4s, #(1 << (14 - \bit_depth)) mov x12, #(BDOF_BLOCK_SIZE * 2) mov x14, #(VVC_MAX_PB_SIZE * 2) .if \bit_depth >= 10 // clip pixel mov w15, #((1 << \bit_depth) - 1) - movi v18.8h, #0 dup v19.8h, w15 .endif 0: - ld1r {v0.8h}, [vx], #2 - ld1r {v1.8h}, [vy], #2 - ld1r {v2.8h}, [vx] - ld1r {v3.8h}, [vy] + ldr s0, [vx], #(2 * BDOF_MIN_BLOCK_SIZE) + ldr s1, [vy], #(2 * BDOF_MIN_BLOCK_SIZE) mov w13, #(BDOF_MIN_BLOCK_SIZE) - ins v0.d[1], v2.d[1] - ins v1.d[1], v3.d[1] 1: - ld1 {v2.8h}, [gh], x12 - ld1 {v4.8h}, [gv], x12 - smull v3.4s, v0.4h, v2.4h - smull2 v16.4s, v0.8h, v2.8h - smlal v3.4s, v1.4h, v4.4h - smlal2 v16.4s, v1.8h, v4.8h - ld1 {v5.8h}, [src0], x14 ld1 {v6.8h}, [src1], x14 - saddl v2.4s, v5.4h, v6.4h - add v2.4s, v2.4s, v7.4s - add v2.4s, v2.4s, v3.4s - saddl2 v4.4s, v5.8h, v6.8h - add v4.4s, v4.4s, v7.4s - add v4.4s, v4.4s, v16.4s + ld1 {v2.8h}, [gh], x12 + ld1 {v4.8h}, [gv], x12 - sqshrn v5.4h, v2.4s, #(15 - \bit_depth) - sqshrn2 v5.8h, v4.4s, #(15 - \bit_depth) + saddl v17.4s, v5.4h, v6.4h + saddl2 v16.4s, v5.8h, v6.8h + smlal v17.4s, v4.4h, v1.h[0] + smlal2 v16.4s, v4.8h, v1.h[1] + smlal v17.4s, v2.4h, v0.h[0] + smlal2 v16.4s, v2.8h, v0.h[1] + + sqrshrun v5.4h, v17.4s, #(15 - \bit_depth) + sqrshrun2 v5.8h, v16.4s, #(15 - \bit_depth) subs w13, w13, #1 .if \bit_depth == 8 sqxtun v5.8b, v5.8h @@ -846,14 +836,11 @@ endfunc add dst, dst, dst_stride .else smin v5.8h, v5.8h, v19.8h - smax v5.8h, v5.8h, v18.8h st1 {v5.8h}, [dst], dst_stride .endif b.ne 1b subs w8, w8, #(BDOF_MIN_BLOCK_SIZE) - add vx, vx, #(2 * BDOF_MIN_BLOCK_SIZE - 2) - add vy, vy, #(2 * BDOF_MIN_BLOCK_SIZE - 2) b.ne 0b ret -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org