Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: welder via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: welder <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] Replace uxtl with umull in dmvr_hv_8 (PR #20442)
Message-ID: <175702055901.25.14087152834822354223@463a07221176> (raw)

PR #20442 opened by welder
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20442
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20442.patch

A low hanging fruit

Before and after on A78:
dmvr_hv_8_12x20_neon:                                  205.3 ( 5.21x)
dmvr_hv_8_20x12_neon:                                  171.8 ( 3.15x)
dmvr_hv_8_20x20_neon:                                  282.7 ( 3.11x)

dmvr_hv_8_12x20_neon:                                  172.7 ( 5.58x)
dmvr_hv_8_20x12_neon:                                  133.3 ( 3.36x)
dmvr_hv_8_20x20_neon:                                  214.6 ( 3.40x)


>From 55e2f5d2661e23e5adab5351effec892294fd708 Mon Sep 17 00:00:00 2001
From: Krzysztof Pyrkosz <ffmpeg@szaka.eu>
Date: Thu, 4 Sep 2025 22:56:43 +0200
Subject: [PATCH] Replace uxtl with umull in dmvr_hv_8

Before and after on A78:
dmvr_hv_8_12x20_neon:                                  205.3 ( 5.21x)
dmvr_hv_8_20x12_neon:                                  171.8 ( 3.15x)
dmvr_hv_8_20x20_neon:                                  282.7 ( 3.11x)

dmvr_hv_8_12x20_neon:                                  172.7 ( 5.58x)
dmvr_hv_8_20x12_neon:                                  133.3 ( 3.36x)
dmvr_hv_8_20x20_neon:                                  214.6 ( 3.40x)
---
 libavcodec/aarch64/vvc/inter.S | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S
index 50fc073dc9..a6648b64fc 100644
--- a/libavcodec/aarch64/vvc/inter.S
+++ b/libavcodec/aarch64/vvc/inter.S
@@ -393,13 +393,10 @@ function ff_vvc_dmvr_hv_8_neon, export=1
 
         movrel          x9, X(ff_vvc_inter_luma_dmvr_filters)
         add             x12, x9, mx, lsl #1
-        ldrb            w10, [x12]
-        ldrb            w11, [x12, #1]
         mov             tmp0, sp
         add             tmp1, tmp0, #(VVC_MAX_PB_SIZE * 2)
         // We know the value are positive
-        dup             v0.8h, w10                  // filter_x[0]
-        dup             v1.8h, w11                  // filter_x[1]
+        ld2r            {v0.16b, v1.16b}, [x12]
 
         add             x12, x9, my, lsl #1
         ldrb            w10, [x12]
@@ -424,14 +421,10 @@ function ff_vvc_dmvr_hv_8_neon, export=1
         // width > 16
         ldur            q5, [src, #1]
         ldr             q4, [src], #16
-        uxtl            v7.8h, v5.8b
-        uxtl2           v17.8h, v5.16b
-        uxtl            v6.8h, v4.8b
-        uxtl2           v16.8h, v4.16b
-        mul             v6.8h, v6.8h, v0.8h
-        mul             v16.8h, v16.8h, v0.8h
-        mla             v6.8h, v7.8h, v1.8h
-        mla             v16.8h, v17.8h, v1.8h
+        umull           v6.8h, v4.8b, v0.8b
+        umull2          v16.8h, v4.16b, v0.16b
+        umlal           v6.8h, v5.8b, v1.8b
+        umlal2          v16.8h, v5.16b, v1.16b
         urshr           v6.8h, v6.8h, #(8 - 6)
         urshr           v7.8h, v16.8h, #(8 - 6)
         stp             q6, q7, [x13], #32
@@ -451,10 +444,8 @@ function ff_vvc_dmvr_hv_8_neon, export=1
         // width > 8
         ldur            d5, [src, #1]
         ldr             d4, [src], #8
-        uxtl            v7.8h, v5.8b
-        uxtl            v6.8h, v4.8b
-        mul             v6.8h, v6.8h, v0.8h
-        mla             v6.8h, v7.8h, v1.8h
+        umull           v6.8h, v4.8b, v0.8b
+        umlal           v6.8h, v5.8b, v1.8b
         urshr           v6.8h, v6.8h, #(8 - 6)
         str             q6, [x13], #16
 
@@ -468,10 +459,8 @@ function ff_vvc_dmvr_hv_8_neon, export=1
 3:
         ldur            s5, [src, #1]
         ldr             s4, [src], #4
-        uxtl            v7.8h, v5.8b
-        uxtl            v6.8h, v4.8b
-        mul             v6.4h, v6.4h, v0.4h
-        mla             v6.4h, v7.4h, v1.4h
+        umull           v6.8h, v4.8b, v0.8b
+        umlal           v6.8h, v5.8b, v1.8b
         urshr           v6.4h, v6.4h, #(8 - 6)
         str             d6, [x13], #8
 
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2025-09-04 21:16 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=175702055901.25.14087152834822354223@463a07221176 \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git