From: Geoff Hill <geoff@geoffhill.org>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH v3 4/5] avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON
Date: Tue, 2 Apr 2024 23:43:34 -0700
Message-ID: <17305bcf-364b-4f60-8e60-633466da4918@geoffhill.org> (raw)
Signed-off-by: Geoff Hill <geoff@geoffhill.org>
---
libavcodec/aarch64/ac3dsp_init_aarch64.c | 5 +++++
libavcodec/aarch64/ac3dsp_neon.S | 24 +++++++++++++++++++++
tests/checkasm/ac3dsp.c | 27 ++++++++++++++++++++++++
3 files changed, 56 insertions(+)
diff --git a/libavcodec/aarch64/ac3dsp_init_aarch64.c b/libavcodec/aarch64/ac3dsp_init_aarch64.c
index 1bdc215b51..e95436c651 100644
--- a/libavcodec/aarch64/ac3dsp_init_aarch64.c
+++ b/libavcodec/aarch64/ac3dsp_init_aarch64.c
@@ -28,6 +28,10 @@
void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
+void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
+ const int32_t *coef0,
+ const int32_t *coef1,
+ int len);
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
{
@@ -37,4 +41,5 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
c->ac3_exponent_min = ff_ac3_exponent_min_neon;
c->extract_exponents = ff_ac3_extract_exponents_neon;
c->float_to_fixed24 = ff_float_to_fixed24_neon;
+ c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
}
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
index b26f71a3f6..fa8fcf2e47 100644
--- a/libavcodec/aarch64/ac3dsp_neon.S
+++ b/libavcodec/aarch64/ac3dsp_neon.S
@@ -64,3 +64,27 @@ function ff_float_to_fixed24_neon, export=1
b.ne 0b
ret
endfunc
+
+function ff_ac3_sum_square_butterfly_int32_neon, export=1
+ cbz w3, 1f
+ movi v0.2d, #0
+ movi v1.2d, #0
+ movi v2.2d, #0
+ movi v3.2d, #0
+0: ld1 {v4.2s}, [x1], #8
+ ld1 {v5.2s}, [x2], #8
+ add v6.2s, v4.2s, v5.2s
+ sub v7.2s, v4.2s, v5.2s
+ smlal v0.2d, v4.2s, v4.2s
+ smlal v1.2d, v5.2s, v5.2s
+ smlal v2.2d, v6.2s, v6.2s
+ smlal v3.2d, v7.2s, v7.2s
+ subs w3, w3, #2
+ b.gt 0b
+ addp d0, v0.2d
+ addp d1, v1.2d
+ addp d2, v2.2d
+ addp d3, v3.2d
+ st1 {v0.1d-v3.1d}, [x0]
+1: ret
+endfunc
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
index a8a20349f9..c920dc9eb0 100644
--- a/tests/checkasm/ac3dsp.c
+++ b/tests/checkasm/ac3dsp.c
@@ -136,6 +136,32 @@ static void check_float_to_fixed24(AC3DSPContext *c) {
report("float_to_fixed24");
}
+static void check_ac3_sum_square_butterfly_int32(AC3DSPContext *c) {
+#define ELEMS 240
+ LOCAL_ALIGNED_16(int32_t, lt, [ELEMS]);
+ LOCAL_ALIGNED_16(int32_t, rt, [ELEMS]);
+ LOCAL_ALIGNED_16(uint64_t, v1, [4]);
+ LOCAL_ALIGNED_16(uint64_t, v2, [4]);
+
+ declare_func(void, int64_t[4], const int32_t *, const int32_t *, int);
+
+ randomize_i24(lt, ELEMS);
+ randomize_i24(rt, ELEMS);
+
+ if (check_func(c->sum_square_butterfly_int32,
+ "ac3_sum_square_bufferfly_int32")) {
+ call_ref(v1, lt, rt, ELEMS);
+ call_new(v2, lt, rt, ELEMS);
+
+ if (memcmp(v1, v2, sizeof(int64_t[4])) != 0)
+ fail();
+
+ bench_new(v2, lt, rt, ELEMS);
+ }
+
+ report("ac3_sum_square_butterfly_int32");
+}
+
void checkasm_check_ac3dsp(void)
{
AC3DSPContext c;
@@ -144,4 +170,5 @@ void checkasm_check_ac3dsp(void)
check_ac3_exponent_min(&c);
check_ac3_extract_exponents(&c);
check_float_to_fixed24(&c);
+ check_ac3_sum_square_butterfly_int32(&c);
}
--
2.44.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2024-04-03 6:44 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-03 6:43 Geoff Hill [this message]
2024-04-04 12:58 ` Martin Storsjö
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=17305bcf-364b-4f60-8e60-633466da4918@geoffhill.org \
--to=geoff@geoffhill.org \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git