From: Krzysztof Pyrkosz via ffmpeg-devel <ffmpeg-devel@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: Krzysztof Pyrkosz <ffmpeg@szaka.eu> Subject: [FFmpeg-devel] [PATCH 1/2] avcodec/aarch64/vvc: Optimize vvc_avg{8, 10, 12} Date: Thu, 20 Feb 2025 19:49:28 +0100 Message-ID: <20250220184929.1943-1-ffmpeg@szaka.eu> (raw) In-Reply-To: <tencent_7B45D309A84A73FFD37DD5DFE907EEA8C505@qq.com> --- libavcodec/aarch64/vvc/inter.S | 125 ++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 3 deletions(-) diff --git a/libavcodec/aarch64/vvc/inter.S b/libavcodec/aarch64/vvc/inter.S index 0edc861f97..b65920e640 100644 --- a/libavcodec/aarch64/vvc/inter.S +++ b/libavcodec/aarch64/vvc/inter.S @@ -217,13 +217,132 @@ function ff_vvc_\type\()_\bit_depth\()_neon, export=1 endfunc .endm -vvc_avg avg, 8 -vvc_avg avg, 10 -vvc_avg avg, 12 vvc_avg w_avg, 8 vvc_avg w_avg, 10 vvc_avg w_avg, 12 +.macro vvc_avg2 bit_depth +function ff_vvc_avg_\bit_depth\()_neon, export=1 + mov x10, #(VVC_MAX_PB_SIZE * 2) +.if \bit_depth != 8 + movi v16.8h, #0 + movi v17.16b, #255 + ushr v17.8h, v17.8h, #(16 - \bit_depth) +.endif + cmp w4, #8 + b.gt 16f + b.eq 8f + cmp w4, #4 + b.eq 4f + +2: // width == 2 + ldr s0, [x2] + subs w5, w5, #1 + ldr s1, [x3] +.if \bit_depth == 8 + shadd v0.4h, v0.4h, v1.4h + sqrshrun v0.8b, v0.8h, #(15 - 1 - \bit_depth) + str h0, [x0] +.else + shadd v0.4h, v0.4h, v1.4h + srshr v0.4h, v0.4h, #(15 - 1 - \bit_depth) + smax v0.4h, v0.4h, v16.4h + smin v0.4h, v0.4h, v17.4h + str s0, [x0] +.endif + add x2, x2, #(VVC_MAX_PB_SIZE * 2) + add x3, x3, #(VVC_MAX_PB_SIZE * 2) + add x0, x0, x1 + b.ne 2b + ret + +4: // width == 4 + ldr d0, [x2] + subs w5, w5, #1 + ldr d1, [x3] +.if \bit_depth == 8 + shadd v0.4h, v0.4h, v1.4h + sqrshrun v0.8b, v0.8h, #(15 - 1 - \bit_depth) + str s0, [x0] +.else + shadd v0.4h, v0.4h, v1.4h + srshr v0.4h, v0.4h, #(15 - 1 - \bit_depth) + smax v0.4h, v0.4h, v16.4h + smin v0.4h, v0.4h, v17.4h + str d0, [x0] +.endif + add x2, x2, #(VVC_MAX_PB_SIZE * 2) + add x3, x3, #(VVC_MAX_PB_SIZE * 2) + add x0, x0, x1 + b.ne 4b + ret + +8: // width == 8 + ldr q0, [x2] + subs w5, w5, #1 + ldr q1, [x3] +.if \bit_depth == 8 + shadd v0.8h, v0.8h, v1.8h + sqrshrun v0.8b, v0.8h, #(15 - 1 - \bit_depth) + str d0, [x0] +.else + shadd v0.8h, v0.8h, v1.8h + srshr v0.8h, v0.8h, #(15 - 1 - \bit_depth) + smax v0.8h, v0.8h, v16.8h + smin v0.8h, v0.8h, v17.8h + str q0, [x0] +.endif + add x2, x2, #(VVC_MAX_PB_SIZE * 2) + add x3, x3, #(VVC_MAX_PB_SIZE * 2) + add x0, x0, x1 + b.ne 8b + ret + +16: // width >= 16 +.if \bit_depth == 8 + sub x1, x1, w4, sxtw +.else + sub x1, x1, w4, sxtw #1 +.endif + sub x10, x10, w4, sxtw #1 +3: + mov w6, w4 // width +1: + ldp q0, q1, [x2], #32 + subs w6, w6, #16 + ldp q2, q3, [x3], #32 +.if \bit_depth == 8 + shadd v4.8h, v0.8h, v2.8h + shadd v5.8h, v1.8h, v3.8h + sqrshrun v0.8b, v4.8h, #6 + sqrshrun2 v0.16b, v5.8h, #6 + st1 {v0.16b}, [x0], #16 +.else + shadd v4.8h, v0.8h, v2.8h + shadd v5.8h, v1.8h, v3.8h + srshr v0.8h, v4.8h, #(15 - 1 - \bit_depth) + srshr v1.8h, v5.8h, #(15 - 1 - \bit_depth) + smax v0.8h, v0.8h, v16.8h + smax v1.8h, v1.8h, v16.8h + smin v0.8h, v0.8h, v17.8h + smin v1.8h, v1.8h, v17.8h + stp q0, q1, [x0], #32 +.endif + b.ne 1b + + subs w5, w5, #1 + add x2, x2, x10 + add x3, x3, x10 + add x0, x0, x1 + b.ne 3b + ret +endfunc +.endm + +vvc_avg2 8 +vvc_avg2 10 +vvc_avg2 12 + /* x0: int16_t *dst * x1: const uint8_t *_src * x2: ptrdiff_t _src_stride -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2025-02-20 18:49 UTC|newest] Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-02-19 17:40 Krzysztof Pyrkosz via ffmpeg-devel 2025-02-19 17:40 ` [FFmpeg-devel] [PATCH 2/2] avcodec/aarch64/vvc: Use rounding shift NEON instruction Krzysztof Pyrkosz via ffmpeg-devel 2025-02-20 8:08 ` Zhao Zhili 2025-02-20 7:20 ` [FFmpeg-devel] [PATCH 1/2] avcodec/aarch64/vvc: Optimize vvc_avg{8, 10, 12} Zhao Zhili 2025-02-20 18:49 ` Krzysztof Pyrkosz via ffmpeg-devel [this message] 2025-02-20 18:49 ` [FFmpeg-devel] [PATCH 2/2] avcodec/aarch64/vvc: Use rounding shift NEON instruction Krzysztof Pyrkosz via ffmpeg-devel
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20250220184929.1943-1-ffmpeg@szaka.eu \ --to=ffmpeg-devel@ffmpeg.org \ --cc=ffmpeg@szaka.eu \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git