* [FFmpeg-devel] [PATCH] Replace uxtl with umull in dmvr_hv_8 (PR #20442)
@ 2025-09-04 21:16 welder via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: welder via ffmpeg-devel @ 2025-09-04 21:16 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: welder
PR #20442 opened by welder
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20442
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20442.patch
A low hanging fruit
Before and after on A78:
dmvr_hv_8_12x20_neon: 205.3 ( 5.21x)
dmvr_hv_8_20x12_neon: 171.8 ( 3.15x)
dmvr_hv_8_20x20_neon: 282.7 ( 3.11x)
dmvr_hv_8_12x20_neon: 172.7 ( 5.58x)
dmvr_hv_8_20x12_neon: 133.3 ( 3.36x)
dmvr_hv_8_20x20_neon: 214.6 ( 3.40x)
>From 55e2f5d2661e23e5adab5351effec892294fd708 Mon Sep 17 00:00:00 2001
From: Krzysztof Pyrkosz <ffmpeg@szaka.eu>
Date: Thu, 4 Sep 2025 22:56:43 +0200
Subject: [PATCH] Replace uxtl with umull in dmvr_hv_8
Before and after on A78:
dmvr_hv_8_12x20_neon: 205.3 ( 5.21x)
dmvr_hv_8_20x12_neon: 171.8 ( 3.15x)
dmvr_hv_8_20x20_neon: 282.7 ( 3.11x)
dmvr_hv_8_12x20_neon: 172.7 ( 5.58x)
dmvr_hv_8_20x12_neon: 133.3 ( 3.36x)
dmvr_hv_8_20x20_neon: 214.6 ( 3.40x)
---
libavcodec/aarch64/vvc/inter.S | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S
index 50fc073dc9..a6648b64fc 100644
--- a/libavcodec/aarch64/vvc/inter.S
+++ b/libavcodec/aarch64/vvc/inter.S
@@ -393,13 +393,10 @@ function ff_vvc_dmvr_hv_8_neon, export=1
movrel x9, X(ff_vvc_inter_luma_dmvr_filters)
add x12, x9, mx, lsl #1
- ldrb w10, [x12]
- ldrb w11, [x12, #1]
mov tmp0, sp
add tmp1, tmp0, #(VVC_MAX_PB_SIZE * 2)
// We know the value are positive
- dup v0.8h, w10 // filter_x[0]
- dup v1.8h, w11 // filter_x[1]
+ ld2r {v0.16b, v1.16b}, [x12]
add x12, x9, my, lsl #1
ldrb w10, [x12]
@@ -424,14 +421,10 @@ function ff_vvc_dmvr_hv_8_neon, export=1
// width > 16
ldur q5, [src, #1]
ldr q4, [src], #16
- uxtl v7.8h, v5.8b
- uxtl2 v17.8h, v5.16b
- uxtl v6.8h, v4.8b
- uxtl2 v16.8h, v4.16b
- mul v6.8h, v6.8h, v0.8h
- mul v16.8h, v16.8h, v0.8h
- mla v6.8h, v7.8h, v1.8h
- mla v16.8h, v17.8h, v1.8h
+ umull v6.8h, v4.8b, v0.8b
+ umull2 v16.8h, v4.16b, v0.16b
+ umlal v6.8h, v5.8b, v1.8b
+ umlal2 v16.8h, v5.16b, v1.16b
urshr v6.8h, v6.8h, #(8 - 6)
urshr v7.8h, v16.8h, #(8 - 6)
stp q6, q7, [x13], #32
@@ -451,10 +444,8 @@ function ff_vvc_dmvr_hv_8_neon, export=1
// width > 8
ldur d5, [src, #1]
ldr d4, [src], #8
- uxtl v7.8h, v5.8b
- uxtl v6.8h, v4.8b
- mul v6.8h, v6.8h, v0.8h
- mla v6.8h, v7.8h, v1.8h
+ umull v6.8h, v4.8b, v0.8b
+ umlal v6.8h, v5.8b, v1.8b
urshr v6.8h, v6.8h, #(8 - 6)
str q6, [x13], #16
@@ -468,10 +459,8 @@ function ff_vvc_dmvr_hv_8_neon, export=1
3:
ldur s5, [src, #1]
ldr s4, [src], #4
- uxtl v7.8h, v5.8b
- uxtl v6.8h, v4.8b
- mul v6.4h, v6.4h, v0.4h
- mla v6.4h, v7.4h, v1.4h
+ umull v6.8h, v4.8b, v0.8b
+ umlal v6.8h, v5.8b, v1.8b
urshr v6.4h, v6.4h, #(8 - 6)
str d6, [x13], #8
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-09-04 21:16 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-04 21:16 [FFmpeg-devel] [PATCH] Replace uxtl with umull in dmvr_hv_8 (PR #20442) welder via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git