From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.ffmpeg.org (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id B75484C3F4 for ; Thu, 4 Sep 2025 21:16:18 +0000 (UTC) Authentication-Results: ffbox; dkim=fail (body hash mismatch (got b'SV1JqNmWvr2965YdZM5lfxnZtmf2FNlKVQrKFHjqYGc=', expected b'4ILe+ymEMJD3yIe0MN9KjExEtuUZ6DIJGfgP2NZ3sd4=')) header.d=ffmpeg.org header.i=@ffmpeg.org header.a=rsa-sha256 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=ffmpeg.org; i=@ffmpeg.org; q=dns/txt; s=mail; t=1757020570; h=mime-version : to : message-id : reply-to : subject : list-id : list-archive : list-archive : list-help : list-owner : list-post : list-subscribe : list-unsubscribe : from : cc : content-type : content-transfer-encoding : from; bh=SV1JqNmWvr2965YdZM5lfxnZtmf2FNlKVQrKFHjqYGc=; b=byCOwLlNyy2YWjCIF7Ws3JPG3tShHFaGafifdkbohHtRSKxwLeYHNOOc56dMiINS+7k1j RzqlKXrD69Y1SVsMEPiIGDRpmWwaULzPKKInTMaht3m1a4iC3AAy9bwfj77T+UEQna6ti9S ye1VcUPcZP4UCFJzbARdm/LWzzL9Jj1GEcHDj5BOGWTQt0eyjz3jJPW33upziiLKA6c2eE2 NyyaolgMMF2/RRszKgArBFaY++koUWkAKDRKtvyjqkTtsN/mLhP3kP4jcQgFJRuUkWh5Z5+ WAh1rxuwPionRqbVaAwDMSH/VOTq9caCuuBUa7I09RVgXw+TfE0dFzxDYaQA== Received: from [172.19.0.4] (unknown [172.19.0.4]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id 442C768D506; Fri, 5 Sep 2025 00:16:10 +0300 (EEST) ARC-Seal: i=1; cv=none; a=rsa-sha256; d=ffmpeg.org; s=arc; t=1757020569; b=KCpkYqssndfVrcjKuHdw390Gs6M5Zf6N8S2tW3JU/YUhfRGfc5asyB1HMfyRmpfCGast2 kiTbnAaT1PLntXHb5wzsxx+gEeym7lCmWfIpESoXc28bX2/jBtU3c6l+EQdRIZ2AhrOqBvc xYq7viEBQmHiwdHoNZqYWnGnDC+uLo+W8iPTK6an0jgySYgtim17nsDB0XcxxKd4S5/5xzz yQUPy4AFwSZhANjss+RUXEMW7QgYDRBlSLxbBgKEucxZzQTI+jhqrmWYXlrPIqPFu15xB+r 2y7cC9nim6sLfBZMVlV5ZqR2jKmXs7pXR6LaJVNrALFjZ92Wbt9EMD3+e1EA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=ffmpeg.org; s=arc; t=1757020569; h=from : sender : reply-to : subject : date : message-id : to : cc : mime-version : content-type : content-transfer-encoding : content-id : content-description : resent-date : resent-from : resent-sender : resent-to : resent-cc : resent-message-id : in-reply-to : references : list-id : list-help : list-unsubscribe : list-subscribe : list-post : list-owner : list-archive; bh=DfHQzWPvPaUCAikC5alsI82CFURJkoIpbbXMeQ/5/KQ=; b=sfVUrjOiMm/+0I1WFqhp+RqIYLG9Y/YpPfuemYeOpe4DMGPTxn59rBFKl2LbdwfN/WlQK nZbxmUrktr3iflqaGQn8nAl8oD4fcSsF9S0VC5Dq1T9iRCX8c6+5o31Lq106mj+WyJ1DhkS lEcy6y70nag3MKl2EUm3/SeNxEqdT0TXCMgdxJA4iKycyYiAKqzxFXsY1agf6acmGgsLhew ZHLqKTv3vzdU6Wy6ZFW1933JYlHVoW97/wFU15J5l7aJCrcuDysOqr9I/pC2AtsnXJXCWiP QGScNLu9o7ike3qx7S+sWIknp1hBUu/bLCY9N/LptK6QITb7UA2BG59YSLmQ== ARC-Authentication-Results: i=1; ffmpeg.org; dkim=pass header.d=ffmpeg.org header.i=@ffmpeg.org; arc=none; dmarc=none Authentication-Results: ffmpeg.org; dkim=pass header.d=ffmpeg.org header.i=@ffmpeg.org; arc=none (Message is not ARC signed); dmarc=none DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=ffmpeg.org; i=@ffmpeg.org; q=dns/txt; s=mail; t=1757020558; h=content-type : mime-version : content-transfer-encoding : from : to : reply-to : subject : from; bh=4ILe+ymEMJD3yIe0MN9KjExEtuUZ6DIJGfgP2NZ3sd4=; b=Y2KtbjNCki9fTPJyjPvQhL75iTBcrTA76v1tIKhMczfcs2+6tJA/lLioug621UEymOU8N 43hVOo5LfMWH/IntRJftfGbru8FJ0E08Nh+Y5Or20mOyz2h0ozC+7RM61RnxkOM0NNGMQN5 n6U71r4Mq19YL+IT/CUNnEDUb7WmYxsC5i0ETV8Jk0vPWcvnZp2U1SkcyFWV8IOQCmFFY8K WDGxzWv1EHh3FbL0ggz+DczJef3ix2P9n6R1lXfPRNos8oi/ZNSDVFkPno4UIFImeKHJP0+ jBPEI80BcJU63RaCsy+M7GUPTCXpDXRs/nNfJPQ+PECRb4E/QSH4PoYcTegQ== Received: from 5d8f51c41678 (code.ffmpeg.org [188.245.149.3]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTPS id C8F0768D3E8 for ; Fri, 5 Sep 2025 00:15:58 +0300 (EEST) MIME-Version: 1.0 To: ffmpeg-devel@ffmpeg.org Message-ID: <175702055901.25.14087152834822354223@463a07221176> Message-ID-Hash: ZXXQN4RK36ZVA7BG6AERAHQ3R4CXVQ65 X-Message-ID-Hash: ZXXQN4RK36ZVA7BG6AERAHQ3R4CXVQ65 X-MailFrom: code@ffmpeg.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; loop; banned-address; header-match-ffmpeg-devel.ffmpeg.org-0; header-match-ffmpeg-devel.ffmpeg.org-1; header-match-ffmpeg-devel.ffmpeg.org-2; header-match-ffmpeg-devel.ffmpeg.org-3; emergency; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header X-Mailman-Version: 3.3.10 Precedence: list Reply-To: FFmpeg development discussions and patches Subject: [FFmpeg-devel] [PATCH] Replace uxtl with umull in dmvr_hv_8 (PR #20442) List-Id: FFmpeg development discussions and patches Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: From: welder via ffmpeg-devel Cc: welder Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Archived-At: List-Archive: List-Post: PR #20442 opened by welder URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20442 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20442.patch A low hanging fruit Before and after on A78: dmvr_hv_8_12x20_neon: 205.3 ( 5.21x) dmvr_hv_8_20x12_neon: 171.8 ( 3.15x) dmvr_hv_8_20x20_neon: 282.7 ( 3.11x) dmvr_hv_8_12x20_neon: 172.7 ( 5.58x) dmvr_hv_8_20x12_neon: 133.3 ( 3.36x) dmvr_hv_8_20x20_neon: 214.6 ( 3.40x) >>From 55e2f5d2661e23e5adab5351effec892294fd708 Mon Sep 17 00:00:00 2001 From: Krzysztof Pyrkosz Date: Thu, 4 Sep 2025 22:56:43 +0200 Subject: [PATCH] Replace uxtl with umull in dmvr_hv_8 Before and after on A78: dmvr_hv_8_12x20_neon: 205.3 ( 5.21x) dmvr_hv_8_20x12_neon: 171.8 ( 3.15x) dmvr_hv_8_20x20_neon: 282.7 ( 3.11x) dmvr_hv_8_12x20_neon: 172.7 ( 5.58x) dmvr_hv_8_20x12_neon: 133.3 ( 3.36x) dmvr_hv_8_20x20_neon: 214.6 ( 3.40x) --- libavcodec/aarch64/vvc/inter.S | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S index 50fc073dc9..a6648b64fc 100644 --- a/libavcodec/aarch64/vvc/inter.S +++ b/libavcodec/aarch64/vvc/inter.S @@ -393,13 +393,10 @@ function ff_vvc_dmvr_hv_8_neon, export=1 movrel x9, X(ff_vvc_inter_luma_dmvr_filters) add x12, x9, mx, lsl #1 - ldrb w10, [x12] - ldrb w11, [x12, #1] mov tmp0, sp add tmp1, tmp0, #(VVC_MAX_PB_SIZE * 2) // We know the value are positive - dup v0.8h, w10 // filter_x[0] - dup v1.8h, w11 // filter_x[1] + ld2r {v0.16b, v1.16b}, [x12] add x12, x9, my, lsl #1 ldrb w10, [x12] @@ -424,14 +421,10 @@ function ff_vvc_dmvr_hv_8_neon, export=1 // width > 16 ldur q5, [src, #1] ldr q4, [src], #16 - uxtl v7.8h, v5.8b - uxtl2 v17.8h, v5.16b - uxtl v6.8h, v4.8b - uxtl2 v16.8h, v4.16b - mul v6.8h, v6.8h, v0.8h - mul v16.8h, v16.8h, v0.8h - mla v6.8h, v7.8h, v1.8h - mla v16.8h, v17.8h, v1.8h + umull v6.8h, v4.8b, v0.8b + umull2 v16.8h, v4.16b, v0.16b + umlal v6.8h, v5.8b, v1.8b + umlal2 v16.8h, v5.16b, v1.16b urshr v6.8h, v6.8h, #(8 - 6) urshr v7.8h, v16.8h, #(8 - 6) stp q6, q7, [x13], #32 @@ -451,10 +444,8 @@ function ff_vvc_dmvr_hv_8_neon, export=1 // width > 8 ldur d5, [src, #1] ldr d4, [src], #8 - uxtl v7.8h, v5.8b - uxtl v6.8h, v4.8b - mul v6.8h, v6.8h, v0.8h - mla v6.8h, v7.8h, v1.8h + umull v6.8h, v4.8b, v0.8b + umlal v6.8h, v5.8b, v1.8b urshr v6.8h, v6.8h, #(8 - 6) str q6, [x13], #16 @@ -468,10 +459,8 @@ function ff_vvc_dmvr_hv_8_neon, export=1 3: ldur s5, [src, #1] ldr s4, [src], #4 - uxtl v7.8h, v5.8b - uxtl v6.8h, v4.8b - mul v6.4h, v6.4h, v0.4h - mla v6.4h, v7.4h, v1.4h + umull v6.8h, v4.8b, v0.8b + umlal v6.8h, v5.8b, v1.8b urshr v6.4h, v6.4h, #(8 - 6) str d6, [x13], #8 -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org