* [FFmpeg-devel] [PATCH] avcodec/aarch64/vvc: Implement dmvr_v_8 (PR #20563)
@ 2025-09-20 20:01 welder via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: welder via ffmpeg-devel @ 2025-09-20 20:01 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: welder
PR #20563 opened by welder
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20563
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20563.patch
The primary optimization is to load the first row before entering the loop instead of loading two rows each iteration.
>From 832f354be2ae0e63e8c47dd1805225bdfff21851 Mon Sep 17 00:00:00 2001
From: Krzysztof Pyrkosz <ffmpeg@szaka.eu>
Date: Mon, 8 Sep 2025 20:56:24 +0200
Subject: [PATCH] avcodec/aarch64/vvc: Implement dmvr_v_8
A72
dmvr_v_8_12x20_neon: 207.0 ( 4.15x)
dmvr_v_8_20x12_neon: 170.4 ( 4.37x)
dmvr_v_8_20x20_neon: 273.4 ( 4.58x)
A53
dmvr_v_8_12x20_neon: 450.6 ( 4.21x)
dmvr_v_8_20x12_neon: 342.8 ( 3.70x)
dmvr_v_8_20x20_neon: 550.9 ( 3.79x)
---
libavcodec/aarch64/vvc/dsp_init.c | 2 ++
libavcodec/aarch64/vvc/inter.S | 56 +++++++++++++++++++++++++++++++
2 files changed, 58 insertions(+)
diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c
index bdfa142a5a..b7dc1d89f8 100644
--- a/libavcodec/aarch64/vvc/dsp_init.c
+++ b/libavcodec/aarch64/vvc/dsp_init.c
@@ -101,6 +101,7 @@ DMVR_FUN(, 12)
DMVR_FUN(h_, 8)
DMVR_FUN(h_, 10)
DMVR_FUN(h_, 12)
+DMVR_FUN(v_, 8)
DMVR_FUN(hv_, 8)
DMVR_FUN(hv_, 10)
DMVR_FUN(hv_, 12)
@@ -195,6 +196,7 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
c->inter.w_avg = vvc_w_avg_8;
c->inter.dmvr[0][0] = ff_vvc_dmvr_8_neon;
c->inter.dmvr[0][1] = ff_vvc_dmvr_h_8_neon;
+ c->inter.dmvr[1][0] = ff_vvc_dmvr_v_8_neon;
c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_8_neon;
c->inter.apply_bdof = ff_vvc_apply_bdof_8_neon;
diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S
index 01d2ff155c..d9c545ccb5 100644
--- a/libavcodec/aarch64/vvc/inter.S
+++ b/libavcodec/aarch64/vvc/inter.S
@@ -385,6 +385,62 @@ function ff_vvc_dmvr_12_neon, export=1
ret
endfunc
+function ff_vvc_dmvr_v_8_neon, export=1
+ movrel x7, X(ff_vvc_inter_luma_dmvr_filters)
+ add x7, x7, x5, lsl #1
+ ld2r {v0.16b, v1.16b}, [x7]
+ tbz w6, #4, 12f
+
+ ldr s16, [x1, #16]
+ ld1 {v2.16b}, [x1], x2
+20:
+ ldr s17, [x1, #16]
+ umull v4.8h, v0.8b, v2.8b
+ umull2 v5.8h, v0.16b, v2.16b
+ ld1 {v3.16b}, [x1], x2
+ umull v16.8h, v0.8b, v16.8b
+ umull v6.8h, v1.8b, v3.8b
+ umull2 v7.8h, v1.16b, v3.16b
+ add v4.8h, v4.8h, v6.8h
+ umull v18.8h, v1.8b, v17.8b
+ add v5.8h, v5.8h, v7.8h
+ urshr v4.8h, v4.8h, #2
+ add v19.4h, v16.4h, v18.4h
+ urshr v5.8h, v5.8h, #2
+ urshr v19.4h, v19.4h, #2
+ st1 {v4.8h, v5.8h}, [x0], #32
+ subs w3, w3, #1
+ mov v2.16b, v3.16b
+ st1 {v19.4h}, [x0], #8
+ mov v16.16b, v17.16b
+ add x0, x0, #(VVC_MAX_PB_SIZE * 2 - 32 - 8)
+ b.ne 20b
+ ret
+
+12:
+ ldr s16, [x1, #8]
+ ld1 {v2.8b}, [x1], x2
+2:
+ ldr s17, [x1, #8]
+ umull v4.8h, v0.8b, v2.8b
+ ld1 {v3.8b}, [x1], x2
+ umull v16.8h, v0.8b, v16.8b
+ umull v6.8h, v1.8b, v3.8b
+ add v4.8h, v4.8h, v6.8h
+ umull v18.8h, v1.8b, v17.8b
+ srshr v4.8h, v4.8h, #2
+ add v19.4h, v16.4h, v18.4h
+ srshr v19.4h, v19.4h, #2
+ st1 {v4.8h}, [x0], #16
+ subs w3, w3, #1
+ mov v2.16b, v3.16b
+ st1 {v19.4h}, [x0], #8
+ mov v16.16b, v17.16b
+ add x0, x0, #(VVC_MAX_PB_SIZE * 2 - 16 - 8)
+ b.ne 2b
+ ret
+endfunc
+
function ff_vvc_dmvr_h_8_neon, export=1
movrel x7, X(ff_vvc_inter_luma_dmvr_filters)
add x7, x7, x4, lsl #1
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-09-20 20:01 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-20 20:01 [FFmpeg-devel] [PATCH] avcodec/aarch64/vvc: Implement dmvr_v_8 (PR #20563) welder via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git