From: Krzysztof Pyrkosz via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Krzysztof Pyrkosz <ffmpeg@szaka.eu>
Subject: [FFmpeg-devel] [PATCH 1/2] avcodec/aarch64/vvc: Optimize vvc_avg{8, 10, 12}
Date: Thu, 20 Feb 2025 19:49:28 +0100
Message-ID: <20250220184929.1943-1-ffmpeg@szaka.eu> (raw)
In-Reply-To: <tencent_7B45D309A84A73FFD37DD5DFE907EEA8C505@qq.com>
---
libavcodec/aarch64/vvc/inter.S | 125 ++++++++++++++++++++++++++++++++-
1 file changed, 122 insertions(+), 3 deletions(-)
diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S
index 0edc861f97..b65920e640 100644
--- a/libavcodec/aarch64/vvc/inter.S
+++ b/libavcodec/aarch64/vvc/inter.S
@@ -217,13 +217,132 @@ function ff_vvc_\type\()_\bit_depth\()_neon, export=1
endfunc
.endm
-vvc_avg avg, 8
-vvc_avg avg, 10
-vvc_avg avg, 12
vvc_avg w_avg, 8
vvc_avg w_avg, 10
vvc_avg w_avg, 12
+.macro vvc_avg2 bit_depth
+function ff_vvc_avg_\bit_depth\()_neon, export=1
+ mov x10, #(VVC_MAX_PB_SIZE * 2)
+.if \bit_depth != 8
+ movi v16.8h, #0
+ movi v17.16b, #255
+ ushr v17.8h, v17.8h, #(16 - \bit_depth)
+.endif
+ cmp w4, #8
+ b.gt 16f
+ b.eq 8f
+ cmp w4, #4
+ b.eq 4f
+
+2: // width == 2
+ ldr s0, [x2]
+ subs w5, w5, #1
+ ldr s1, [x3]
+.if \bit_depth == 8
+ shadd v0.4h, v0.4h, v1.4h
+ sqrshrun v0.8b, v0.8h, #(15 - 1 - \bit_depth)
+ str h0, [x0]
+.else
+ shadd v0.4h, v0.4h, v1.4h
+ srshr v0.4h, v0.4h, #(15 - 1 - \bit_depth)
+ smax v0.4h, v0.4h, v16.4h
+ smin v0.4h, v0.4h, v17.4h
+ str s0, [x0]
+.endif
+ add x2, x2, #(VVC_MAX_PB_SIZE * 2)
+ add x3, x3, #(VVC_MAX_PB_SIZE * 2)
+ add x0, x0, x1
+ b.ne 2b
+ ret
+
+4: // width == 4
+ ldr d0, [x2]
+ subs w5, w5, #1
+ ldr d1, [x3]
+.if \bit_depth == 8
+ shadd v0.4h, v0.4h, v1.4h
+ sqrshrun v0.8b, v0.8h, #(15 - 1 - \bit_depth)
+ str s0, [x0]
+.else
+ shadd v0.4h, v0.4h, v1.4h
+ srshr v0.4h, v0.4h, #(15 - 1 - \bit_depth)
+ smax v0.4h, v0.4h, v16.4h
+ smin v0.4h, v0.4h, v17.4h
+ str d0, [x0]
+.endif
+ add x2, x2, #(VVC_MAX_PB_SIZE * 2)
+ add x3, x3, #(VVC_MAX_PB_SIZE * 2)
+ add x0, x0, x1
+ b.ne 4b
+ ret
+
+8: // width == 8
+ ldr q0, [x2]
+ subs w5, w5, #1
+ ldr q1, [x3]
+.if \bit_depth == 8
+ shadd v0.8h, v0.8h, v1.8h
+ sqrshrun v0.8b, v0.8h, #(15 - 1 - \bit_depth)
+ str d0, [x0]
+.else
+ shadd v0.8h, v0.8h, v1.8h
+ srshr v0.8h, v0.8h, #(15 - 1 - \bit_depth)
+ smax v0.8h, v0.8h, v16.8h
+ smin v0.8h, v0.8h, v17.8h
+ str q0, [x0]
+.endif
+ add x2, x2, #(VVC_MAX_PB_SIZE * 2)
+ add x3, x3, #(VVC_MAX_PB_SIZE * 2)
+ add x0, x0, x1
+ b.ne 8b
+ ret
+
+16: // width >= 16
+.if \bit_depth == 8
+ sub x1, x1, w4, sxtw
+.else
+ sub x1, x1, w4, sxtw #1
+.endif
+ sub x10, x10, w4, sxtw #1
+3:
+ mov w6, w4 // width
+1:
+ ldp q0, q1, [x2], #32
+ subs w6, w6, #16
+ ldp q2, q3, [x3], #32
+.if \bit_depth == 8
+ shadd v4.8h, v0.8h, v2.8h
+ shadd v5.8h, v1.8h, v3.8h
+ sqrshrun v0.8b, v4.8h, #6
+ sqrshrun2 v0.16b, v5.8h, #6
+ st1 {v0.16b}, [x0], #16
+.else
+ shadd v4.8h, v0.8h, v2.8h
+ shadd v5.8h, v1.8h, v3.8h
+ srshr v0.8h, v4.8h, #(15 - 1 - \bit_depth)
+ srshr v1.8h, v5.8h, #(15 - 1 - \bit_depth)
+ smax v0.8h, v0.8h, v16.8h
+ smax v1.8h, v1.8h, v16.8h
+ smin v0.8h, v0.8h, v17.8h
+ smin v1.8h, v1.8h, v17.8h
+ stp q0, q1, [x0], #32
+.endif
+ b.ne 1b
+
+ subs w5, w5, #1
+ add x2, x2, x10
+ add x3, x3, x10
+ add x0, x0, x1
+ b.ne 3b
+ ret
+endfunc
+.endm
+
+vvc_avg2 8
+vvc_avg2 10
+vvc_avg2 12
+
/* x0: int16_t *dst
* x1: const uint8_t *_src
* x2: ptrdiff_t _src_stride
--
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2025-02-20 18:49 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-19 17:40 Krzysztof Pyrkosz via ffmpeg-devel
2025-02-19 17:40 ` [FFmpeg-devel] [PATCH 2/2] avcodec/aarch64/vvc: Use rounding shift NEON instruction Krzysztof Pyrkosz via ffmpeg-devel
2025-02-20 8:08 ` Zhao Zhili
2025-03-01 22:34 ` Martin Storsjö
2025-03-03 21:32 ` [FFmpeg-devel] [PATCH v2] avcodec/aarch64/vvc: Optimize NEON version of vvc_dmvr Krzysztof Pyrkosz via ffmpeg-devel
2025-03-04 8:36 ` Martin Storsjö
2025-02-20 7:20 ` [FFmpeg-devel] [PATCH 1/2] avcodec/aarch64/vvc: Optimize vvc_avg{8, 10, 12} Zhao Zhili
2025-02-20 18:49 ` Krzysztof Pyrkosz via ffmpeg-devel [this message]
2025-02-20 18:49 ` [FFmpeg-devel] [PATCH 2/2] avcodec/aarch64/vvc: Use rounding shift NEON instruction Krzysztof Pyrkosz via ffmpeg-devel
2025-02-26 8:54 ` [FFmpeg-devel] [PATCH 1/2] avcodec/aarch64/vvc: Optimize vvc_avg{8, 10, 12} Zhao Zhili
2025-03-01 22:21 ` Martin Storsjö
2025-03-03 21:18 ` [FFmpeg-devel] [PATCH v2] " Krzysztof Pyrkosz via ffmpeg-devel
2025-03-07 13:56 ` Martin Storsjö
2025-03-09 13:43 ` Nuo Mi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250220184929.1943-1-ffmpeg@szaka.eu \
--to=ffmpeg-devel@ffmpeg.org \
--cc=ffmpeg@szaka.eu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git