From: welder via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: welder <code@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] avcodec/aarch64/vvc: Implement dmvr_v_8 (PR #20563) Date: Sat, 20 Sep 2025 20:01:11 -0000 Message-ID: <175839847284.25.14950100753767509775@463a07221176> (raw) PR #20563 opened by welder URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20563 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20563.patch The primary optimization is to load the first row before entering the loop instead of loading two rows each iteration. >From 832f354be2ae0e63e8c47dd1805225bdfff21851 Mon Sep 17 00:00:00 2001 From: Krzysztof Pyrkosz <ffmpeg@szaka.eu> Date: Mon, 8 Sep 2025 20:56:24 +0200 Subject: [PATCH] avcodec/aarch64/vvc: Implement dmvr_v_8 A72 dmvr_v_8_12x20_neon: 207.0 ( 4.15x) dmvr_v_8_20x12_neon: 170.4 ( 4.37x) dmvr_v_8_20x20_neon: 273.4 ( 4.58x) A53 dmvr_v_8_12x20_neon: 450.6 ( 4.21x) dmvr_v_8_20x12_neon: 342.8 ( 3.70x) dmvr_v_8_20x20_neon: 550.9 ( 3.79x) --- libavcodec/aarch64/vvc/dsp_init.c | 2 ++ libavcodec/aarch64/vvc/inter.S | 56 +++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c index bdfa142a5a..b7dc1d89f8 100644 --- a/libavcodec/aarch64/vvc/dsp_init.c +++ b/libavcodec/aarch64/vvc/dsp_init.c @@ -101,6 +101,7 @@ DMVR_FUN(, 12) DMVR_FUN(h_, 8) DMVR_FUN(h_, 10) DMVR_FUN(h_, 12) +DMVR_FUN(v_, 8) DMVR_FUN(hv_, 8) DMVR_FUN(hv_, 10) DMVR_FUN(hv_, 12) @@ -195,6 +196,7 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd) c->inter.w_avg = vvc_w_avg_8; c->inter.dmvr[0][0] = ff_vvc_dmvr_8_neon; c->inter.dmvr[0][1] = ff_vvc_dmvr_h_8_neon; + c->inter.dmvr[1][0] = ff_vvc_dmvr_v_8_neon; c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_8_neon; c->inter.apply_bdof = ff_vvc_apply_bdof_8_neon; diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S index 01d2ff155c..d9c545ccb5 100644 --- a/libavcodec/aarch64/vvc/inter.S +++ b/libavcodec/aarch64/vvc/inter.S @@ -385,6 +385,62 @@ function ff_vvc_dmvr_12_neon, export=1 ret endfunc +function ff_vvc_dmvr_v_8_neon, export=1 + movrel x7, X(ff_vvc_inter_luma_dmvr_filters) + add x7, x7, x5, lsl #1 + ld2r {v0.16b, v1.16b}, [x7] + tbz w6, #4, 12f + + ldr s16, [x1, #16] + ld1 {v2.16b}, [x1], x2 +20: + ldr s17, [x1, #16] + umull v4.8h, v0.8b, v2.8b + umull2 v5.8h, v0.16b, v2.16b + ld1 {v3.16b}, [x1], x2 + umull v16.8h, v0.8b, v16.8b + umull v6.8h, v1.8b, v3.8b + umull2 v7.8h, v1.16b, v3.16b + add v4.8h, v4.8h, v6.8h + umull v18.8h, v1.8b, v17.8b + add v5.8h, v5.8h, v7.8h + urshr v4.8h, v4.8h, #2 + add v19.4h, v16.4h, v18.4h + urshr v5.8h, v5.8h, #2 + urshr v19.4h, v19.4h, #2 + st1 {v4.8h, v5.8h}, [x0], #32 + subs w3, w3, #1 + mov v2.16b, v3.16b + st1 {v19.4h}, [x0], #8 + mov v16.16b, v17.16b + add x0, x0, #(VVC_MAX_PB_SIZE * 2 - 32 - 8) + b.ne 20b + ret + +12: + ldr s16, [x1, #8] + ld1 {v2.8b}, [x1], x2 +2: + ldr s17, [x1, #8] + umull v4.8h, v0.8b, v2.8b + ld1 {v3.8b}, [x1], x2 + umull v16.8h, v0.8b, v16.8b + umull v6.8h, v1.8b, v3.8b + add v4.8h, v4.8h, v6.8h + umull v18.8h, v1.8b, v17.8b + srshr v4.8h, v4.8h, #2 + add v19.4h, v16.4h, v18.4h + srshr v19.4h, v19.4h, #2 + st1 {v4.8h}, [x0], #16 + subs w3, w3, #1 + mov v2.16b, v3.16b + st1 {v19.4h}, [x0], #8 + mov v16.16b, v17.16b + add x0, x0, #(VVC_MAX_PB_SIZE * 2 - 16 - 8) + b.ne 2b + ret +endfunc + function ff_vvc_dmvr_h_8_neon, export=1 movrel x7, X(ff_vvc_inter_luma_dmvr_filters) add x7, x7, x4, lsl #1 -- 2.49.1 _______________________________________________ ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-09-20 20:01 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=175839847284.25.14950100753767509775@463a07221176 \ --to=ffmpeg-devel@ffmpeg.org \ --cc=code@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git