From: "J. Dekker" <jdek@itanimul.li>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH 2/3] lavc/aarch64: reformat add_res funcs
Date: Thu, 23 Jun 2022 20:04:06 +0200
Message-ID: <20220623180407.21081-2-jdek@itanimul.li> (raw)
In-Reply-To: <20220623180407.21081-1-jdek@itanimul.li>
Signed-off-by: J. Dekker <jdek@itanimul.li>
---
libavcodec/aarch64/hevcdsp_idct_neon.S | 216 ++++++++++++-------------
1 file changed, 108 insertions(+), 108 deletions(-)
diff --git a/libavcodec/aarch64/hevcdsp_idct_neon.S b/libavcodec/aarch64/hevcdsp_idct_neon.S
index 0869431294..484eea8437 100644
--- a/libavcodec/aarch64/hevcdsp_idct_neon.S
+++ b/libavcodec/aarch64/hevcdsp_idct_neon.S
@@ -27,21 +27,21 @@
#include "libavutil/aarch64/asm.S"
const trans, align=4
- .short 64, 83, 64, 36
- .short 89, 75, 50, 18
- .short 90, 87, 80, 70
- .short 57, 43, 25, 9
- .short 90, 90, 88, 85
- .short 82, 78, 73, 67
- .short 61, 54, 46, 38
- .short 31, 22, 13, 4
+ .short 64, 83, 64, 36
+ .short 89, 75, 50, 18
+ .short 90, 87, 80, 70
+ .short 57, 43, 25, 9
+ .short 90, 90, 88, 85
+ .short 82, 78, 73, 67
+ .short 61, 54, 46, 38
+ .short 31, 22, 13, 4
endconst
.macro clip10 in1, in2, c1, c2
- smax \in1, \in1, \c1
- smax \in2, \in2, \c1
- smin \in1, \in1, \c2
- smin \in2, \in2, \c2
+ smax \in1, \in1, \c1
+ smax \in2, \in2, \c1
+ smin \in1, \in1, \c2
+ smin \in2, \in2, \c2
.endm
function ff_hevc_add_residual_4x4_8_neon, export=1
@@ -50,13 +50,13 @@ function ff_hevc_add_residual_4x4_8_neon, export=1
ld1 {v2.s}[1], [x0], x2
ld1 {v2.s}[2], [x0], x2
ld1 {v2.s}[3], [x0], x2
- sub x0, x0, x2, lsl #2
- uxtl v6.8h, v2.8b
- uxtl2 v7.8h, v2.16b
- sqadd v0.8h, v0.8h, v6.8h
- sqadd v1.8h, v1.8h, v7.8h
- sqxtun v0.8b, v0.8h
- sqxtun2 v0.16b, v1.8h
+ sub x0, x0, x2, lsl #2
+ uxtl v6.8h, v2.8b
+ uxtl2 v7.8h, v2.16b
+ sqadd v0.8h, v0.8h, v6.8h
+ sqadd v1.8h, v1.8h, v7.8h
+ sqxtun v0.8b, v0.8h
+ sqxtun2 v0.16b, v1.8h
st1 {v0.s}[0], [x0], x2
st1 {v0.s}[1], [x0], x2
st1 {v0.s}[2], [x0], x2
@@ -70,63 +70,63 @@ function ff_hevc_add_residual_4x4_10_neon, export=1
ld1 {v2.d}[0], [x12], x2
ld1 {v2.d}[1], [x12], x2
ld1 {v3.d}[0], [x12], x2
- sqadd v0.8h, v0.8h, v2.8h
+ sqadd v0.8h, v0.8h, v2.8h
ld1 {v3.d}[1], [x12], x2
- movi v4.8h, #0
- sqadd v1.8h, v1.8h, v3.8h
- mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF
- clip10 v0.8h, v1.8h, v4.8h, v5.8h
- st1 {v0.d}[0], [x0], x2
- st1 {v0.d}[1], [x0], x2
- st1 {v1.d}[0], [x0], x2
- st1 {v1.d}[1], [x0], x2
+ movi v4.8h, #0
+ sqadd v1.8h, v1.8h, v3.8h
+ mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF
+ clip10 v0.8h, v1.8h, v4.8h, v5.8h
+ st1 {v0.d}[0], [x0], x2
+ st1 {v0.d}[1], [x0], x2
+ st1 {v1.d}[0], [x0], x2
+ st1 {v1.d}[1], [x0], x2
ret
endfunc
function ff_hevc_add_residual_8x8_8_neon, export=1
- add x12, x0, x2
- add x2, x2, x2
- mov x3, #8
-1: subs x3, x3, #2
- ld1 {v2.d}[0], [x0]
- ld1 {v2.d}[1], [x12]
- uxtl v3.8h, v2.8b
+ add x12, x0, x2
+ add x2, x2, x2
+ mov x3, #8
+1: subs x3, x3, #2
+ ld1 {v2.d}[0], [x0]
+ ld1 {v2.d}[1], [x12]
+ uxtl v3.8h, v2.8b
ld1 {v0.8h-v1.8h}, [x1], #32
- uxtl2 v2.8h, v2.16b
- sqadd v0.8h, v0.8h, v3.8h
- sqadd v1.8h, v1.8h, v2.8h
- sqxtun v0.8b, v0.8h
- sqxtun2 v0.16b, v1.8h
- st1 {v0.d}[0], [x0], x2
- st1 {v0.d}[1], [x12], x2
- bne 1b
+ uxtl2 v2.8h, v2.16b
+ sqadd v0.8h, v0.8h, v3.8h
+ sqadd v1.8h, v1.8h, v2.8h
+ sqxtun v0.8b, v0.8h
+ sqxtun2 v0.16b, v1.8h
+ st1 {v0.d}[0], [x0], x2
+ st1 {v0.d}[1], [x12], x2
+ bne 1b
ret
endfunc
function ff_hevc_add_residual_8x8_10_neon, export=1
- add x12, x0, x2
- add x2, x2, x2
- mov x3, #8
- movi v4.8h, #0
- mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF
-1: subs x3, x3, #2
+ add x12, x0, x2
+ add x2, x2, x2
+ mov x3, #8
+ movi v4.8h, #0
+ mvni v5.8h, #0xFC, lsl #8 // movi #0x3FF
+1: subs x3, x3, #2
ld1 {v0.8h-v1.8h}, [x1], #32
- ld1 {v2.8h}, [x0]
- sqadd v0.8h, v0.8h, v2.8h
- ld1 {v3.8h}, [x12]
- sqadd v1.8h, v1.8h, v3.8h
- clip10 v0.8h, v1.8h, v4.8h, v5.8h
- st1 {v0.8h}, [x0], x2
- st1 {v1.8h}, [x12], x2
- bne 1b
+ ld1 {v2.8h}, [x0]
+ sqadd v0.8h, v0.8h, v2.8h
+ ld1 {v3.8h}, [x12]
+ sqadd v1.8h, v1.8h, v3.8h
+ clip10 v0.8h, v1.8h, v4.8h, v5.8h
+ st1 {v0.8h}, [x0], x2
+ st1 {v1.8h}, [x12], x2
+ bne 1b
ret
endfunc
function ff_hevc_add_residual_16x16_8_neon, export=1
- mov x3, #16
+ mov x3, #16
add x12, x0, x2
- add x2, x2, x2
-1: subs x3, x3, #2
+ add x2, x2, x2
+1: subs x3, x3, #2
ld1 {v16.16b}, [x0]
ld1 {v0.8h-v3.8h}, [x1], #64
ld1 {v19.16b}, [x12]
@@ -134,47 +134,47 @@ function ff_hevc_add_residual_16x16_8_neon, export=1
uxtl2 v18.8h, v16.16b
uxtl v20.8h, v19.8b
uxtl2 v21.8h, v19.16b
- sqadd v0.8h, v0.8h, v17.8h
- sqadd v1.8h, v1.8h, v18.8h
- sqadd v2.8h, v2.8h, v20.8h
- sqadd v3.8h, v3.8h, v21.8h
- sqxtun v0.8b, v0.8h
+ sqadd v0.8h, v0.8h, v17.8h
+ sqadd v1.8h, v1.8h, v18.8h
+ sqadd v2.8h, v2.8h, v20.8h
+ sqadd v3.8h, v3.8h, v21.8h
+ sqxtun v0.8b, v0.8h
sqxtun2 v0.16b, v1.8h
- sqxtun v1.8b, v2.8h
+ sqxtun v1.8b, v2.8h
sqxtun2 v1.16b, v3.8h
st1 {v0.16b}, [x0], x2
st1 {v1.16b}, [x12], x2
- bne 1b
+ bne 1b
ret
endfunc
function ff_hevc_add_residual_16x16_10_neon, export=1
- mov x3, #16
+ mov x3, #16
movi v20.8h, #0
mvni v21.8h, #0xFC, lsl #8 // movi #0x3FF
add x12, x0, x2
- add x2, x2, x2
-1: subs x3, x3, #2
+ add x2, x2, x2
+1: subs x3, x3, #2
ld1 {v16.8h-v17.8h}, [x0]
- ld1 {v0.8h-v3.8h}, [x1], #64
- sqadd v0.8h, v0.8h, v16.8h
+ ld1 {v0.8h-v3.8h}, [x1], #64
+ sqadd v0.8h, v0.8h, v16.8h
ld1 {v18.8h-v19.8h}, [x12]
- sqadd v1.8h, v1.8h, v17.8h
- sqadd v2.8h, v2.8h, v18.8h
- sqadd v3.8h, v3.8h, v19.8h
- clip10 v0.8h, v1.8h, v20.8h, v21.8h
- clip10 v2.8h, v3.8h, v20.8h, v21.8h
- st1 {v0.8h-v1.8h}, [x0], x2
- st1 {v2.8h-v3.8h}, [x12], x2
- bne 1b
+ sqadd v1.8h, v1.8h, v17.8h
+ sqadd v2.8h, v2.8h, v18.8h
+ sqadd v3.8h, v3.8h, v19.8h
+ clip10 v0.8h, v1.8h, v20.8h, v21.8h
+ clip10 v2.8h, v3.8h, v20.8h, v21.8h
+ st1 {v0.8h-v1.8h}, [x0], x2
+ st1 {v2.8h-v3.8h}, [x12], x2
+ bne 1b
ret
endfunc
function ff_hevc_add_residual_32x32_8_neon, export=1
add x12, x0, x2
- add x2, x2, x2
- mov x3, #32
-1: subs x3, x3, #2
+ add x2, x2, x2
+ mov x3, #32
+1: subs x3, x3, #2
ld1 {v20.16b, v21.16b}, [x0]
uxtl v16.8h, v20.8b
uxtl2 v17.8h, v20.16b
@@ -187,43 +187,43 @@ function ff_hevc_add_residual_32x32_8_neon, export=1
uxtl2 v21.8h, v22.16b
uxtl v22.8h, v23.8b
uxtl2 v23.8h, v23.16b
- sqadd v0.8h, v0.8h, v16.8h
- sqadd v1.8h, v1.8h, v17.8h
- sqadd v2.8h, v2.8h, v18.8h
- sqadd v3.8h, v3.8h, v19.8h
- sqadd v4.8h, v4.8h, v20.8h
- sqadd v5.8h, v5.8h, v21.8h
- sqadd v6.8h, v6.8h, v22.8h
- sqadd v7.8h, v7.8h, v23.8h
- sqxtun v0.8b, v0.8h
+ sqadd v0.8h, v0.8h, v16.8h
+ sqadd v1.8h, v1.8h, v17.8h
+ sqadd v2.8h, v2.8h, v18.8h
+ sqadd v3.8h, v3.8h, v19.8h
+ sqadd v4.8h, v4.8h, v20.8h
+ sqadd v5.8h, v5.8h, v21.8h
+ sqadd v6.8h, v6.8h, v22.8h
+ sqadd v7.8h, v7.8h, v23.8h
+ sqxtun v0.8b, v0.8h
sqxtun2 v0.16b, v1.8h
- sqxtun v1.8b, v2.8h
+ sqxtun v1.8b, v2.8h
sqxtun2 v1.16b, v3.8h
- sqxtun v2.8b, v4.8h
+ sqxtun v2.8b, v4.8h
sqxtun2 v2.16b, v5.8h
- st1 {v0.16b, v1.16b}, [x0], x2
- sqxtun v3.8b, v6.8h
+ st1 {v0.16b, v1.16b}, [x0], x2
+ sqxtun v3.8b, v6.8h
sqxtun2 v3.16b, v7.8h
st1 {v2.16b, v3.16b}, [x12], x2
- bne 1b
+ bne 1b
ret
endfunc
function ff_hevc_add_residual_32x32_10_neon, export=1
- mov x3, #32
+ mov x3, #32
movi v20.8h, #0
mvni v21.8h, #0xFC, lsl #8 // movi #0x3FF
-1: subs x3, x3, #1
- ld1 {v0.8h-v3.8h}, [x1], #64
+1: subs x3, x3, #1
+ ld1 {v0.8h -v3.8h}, [x1], #64
ld1 {v16.8h-v19.8h}, [x0]
- sqadd v0.8h, v0.8h, v16.8h
- sqadd v1.8h, v1.8h, v17.8h
- sqadd v2.8h, v2.8h, v18.8h
- sqadd v3.8h, v3.8h, v19.8h
- clip10 v0.8h, v1.8h, v20.8h, v21.8h
- clip10 v2.8h, v3.8h, v20.8h, v21.8h
- st1 {v0.8h-v3.8h}, [x0], x2
- bne 1b
+ sqadd v0.8h, v0.8h, v16.8h
+ sqadd v1.8h, v1.8h, v17.8h
+ sqadd v2.8h, v2.8h, v18.8h
+ sqadd v3.8h, v3.8h, v19.8h
+ clip10 v0.8h, v1.8h, v20.8h, v21.8h
+ clip10 v2.8h, v3.8h, v20.8h, v21.8h
+ st1 {v0.8h-v3.8h}, [x0], x2
+ bne 1b
ret
endfunc
--
2.32.0 (Apple Git-132)
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-06-23 18:04 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-23 18:04 [FFmpeg-devel] [PATCH 1/3] checkasm/hevc_add_res: add 12bit test J. Dekker
2022-06-23 18:04 ` J. Dekker [this message]
2022-08-09 11:04 ` [FFmpeg-devel] [PATCH 2/3] lavc/aarch64: reformat add_res funcs Martin Storsjö
2022-06-23 18:04 ` [FFmpeg-devel] [PATCH 3/3] lavc/aarch64: hevc_add_res add 12bit variants J. Dekker
2022-08-09 11:13 ` Martin Storsjö
2022-08-09 11:21 ` Martin Storsjö
2022-08-16 5:01 ` [FFmpeg-devel] [PATCH v2] " J. Dekker
2022-08-16 11:38 ` Martin Storsjö
2022-08-16 12:12 ` [FFmpeg-devel] [PATCH v3] " J. Dekker
2022-08-16 12:46 ` Martin Storsjö
2022-08-18 13:07 ` J. Dekker
2022-08-09 11:02 ` [FFmpeg-devel] [PATCH 1/3] checkasm/hevc_add_res: add 12bit test Martin Storsjö
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220623180407.21081-2-jdek@itanimul.li \
--to=jdek@itanimul.li \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git