* [FFmpeg-devel] [PATCH v3 4/5] avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON
@ 2024-04-03 6:43 Geoff Hill
2024-04-04 12:58 ` Martin Storsjö
0 siblings, 1 reply; 2+ messages in thread
From: Geoff Hill @ 2024-04-03 6:43 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: Geoff Hill <geoff@geoffhill.org>
---
libavcodec/aarch64/ac3dsp_init_aarch64.c | 5 +++++
libavcodec/aarch64/ac3dsp_neon.S | 24 +++++++++++++++++++++
tests/checkasm/ac3dsp.c | 27 ++++++++++++++++++++++++
3 files changed, 56 insertions(+)
diff --git a/libavcodec/aarch64/ac3dsp_init_aarch64.c b/libavcodec/aarch64/ac3dsp_init_aarch64.c
index 1bdc215b51..e95436c651 100644
--- a/libavcodec/aarch64/ac3dsp_init_aarch64.c
+++ b/libavcodec/aarch64/ac3dsp_init_aarch64.c
@@ -28,6 +28,10 @@
void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
+void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
+ const int32_t *coef0,
+ const int32_t *coef1,
+ int len);
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
{
@@ -37,4 +41,5 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
c->ac3_exponent_min = ff_ac3_exponent_min_neon;
c->extract_exponents = ff_ac3_extract_exponents_neon;
c->float_to_fixed24 = ff_float_to_fixed24_neon;
+ c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
}
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
index b26f71a3f6..fa8fcf2e47 100644
--- a/libavcodec/aarch64/ac3dsp_neon.S
+++ b/libavcodec/aarch64/ac3dsp_neon.S
@@ -64,3 +64,27 @@ function ff_float_to_fixed24_neon, export=1
b.ne 0b
ret
endfunc
+
+function ff_ac3_sum_square_butterfly_int32_neon, export=1
+ cbz w3, 1f
+ movi v0.2d, #0
+ movi v1.2d, #0
+ movi v2.2d, #0
+ movi v3.2d, #0
+0: ld1 {v4.2s}, [x1], #8
+ ld1 {v5.2s}, [x2], #8
+ add v6.2s, v4.2s, v5.2s
+ sub v7.2s, v4.2s, v5.2s
+ smlal v0.2d, v4.2s, v4.2s
+ smlal v1.2d, v5.2s, v5.2s
+ smlal v2.2d, v6.2s, v6.2s
+ smlal v3.2d, v7.2s, v7.2s
+ subs w3, w3, #2
+ b.gt 0b
+ addp d0, v0.2d
+ addp d1, v1.2d
+ addp d2, v2.2d
+ addp d3, v3.2d
+ st1 {v0.1d-v3.1d}, [x0]
+1: ret
+endfunc
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
index a8a20349f9..c920dc9eb0 100644
--- a/tests/checkasm/ac3dsp.c
+++ b/tests/checkasm/ac3dsp.c
@@ -136,6 +136,32 @@ static void check_float_to_fixed24(AC3DSPContext *c) {
report("float_to_fixed24");
}
+static void check_ac3_sum_square_butterfly_int32(AC3DSPContext *c) {
+#define ELEMS 240
+ LOCAL_ALIGNED_16(int32_t, lt, [ELEMS]);
+ LOCAL_ALIGNED_16(int32_t, rt, [ELEMS]);
+ LOCAL_ALIGNED_16(uint64_t, v1, [4]);
+ LOCAL_ALIGNED_16(uint64_t, v2, [4]);
+
+ declare_func(void, int64_t[4], const int32_t *, const int32_t *, int);
+
+ randomize_i24(lt, ELEMS);
+ randomize_i24(rt, ELEMS);
+
+ if (check_func(c->sum_square_butterfly_int32,
+ "ac3_sum_square_bufferfly_int32")) {
+ call_ref(v1, lt, rt, ELEMS);
+ call_new(v2, lt, rt, ELEMS);
+
+ if (memcmp(v1, v2, sizeof(int64_t[4])) != 0)
+ fail();
+
+ bench_new(v2, lt, rt, ELEMS);
+ }
+
+ report("ac3_sum_square_butterfly_int32");
+}
+
void checkasm_check_ac3dsp(void)
{
AC3DSPContext c;
@@ -144,4 +170,5 @@ void checkasm_check_ac3dsp(void)
check_ac3_exponent_min(&c);
check_ac3_extract_exponents(&c);
check_float_to_fixed24(&c);
+ check_ac3_sum_square_butterfly_int32(&c);
}
--
2.44.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 4/5] avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON
2024-04-03 6:43 [FFmpeg-devel] [PATCH v3 4/5] avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON Geoff Hill
@ 2024-04-04 12:58 ` Martin Storsjö
0 siblings, 0 replies; 2+ messages in thread
From: Martin Storsjö @ 2024-04-04 12:58 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Tue, 2 Apr 2024, Geoff Hill wrote:
> Signed-off-by: Geoff Hill <geoff@geoffhill.org>
> ---
> libavcodec/aarch64/ac3dsp_init_aarch64.c | 5 +++++
> libavcodec/aarch64/ac3dsp_neon.S | 24 +++++++++++++++++++++
> tests/checkasm/ac3dsp.c | 27 ++++++++++++++++++++++++
> 3 files changed, 56 insertions(+)
>
> diff --git a/libavcodec/aarch64/ac3dsp_init_aarch64.c b/libavcodec/aarch64/ac3dsp_init_aarch64.c
> index 1bdc215b51..e95436c651 100644
> --- a/libavcodec/aarch64/ac3dsp_init_aarch64.c
> +++ b/libavcodec/aarch64/ac3dsp_init_aarch64.c
> @@ -28,6 +28,10 @@
> void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
> void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
> void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
> +void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
> + const int32_t *coef0,
> + const int32_t *coef1,
> + int len);
>
> av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
> {
> @@ -37,4 +41,5 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
> c->ac3_exponent_min = ff_ac3_exponent_min_neon;
> c->extract_exponents = ff_ac3_extract_exponents_neon;
> c->float_to_fixed24 = ff_float_to_fixed24_neon;
> + c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
> }
> diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
> index b26f71a3f6..fa8fcf2e47 100644
> --- a/libavcodec/aarch64/ac3dsp_neon.S
> +++ b/libavcodec/aarch64/ac3dsp_neon.S
> @@ -64,3 +64,27 @@ function ff_float_to_fixed24_neon, export=1
> b.ne 0b
> ret
> endfunc
> +
> +function ff_ac3_sum_square_butterfly_int32_neon, export=1
> + cbz w3, 1f
The arm version of this patch doesn't have any corresponding check for
whether this parameter is zero, and the checkasm test doesn't test that
behaviour either. Is that never feasiable (and we could leave it out here)
or should we test that and fix it in other assembly versions? In the
latter case, it's of course ok to defer that to a separate later patch,
not holding up this one.
// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-04-04 12:59 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-03 6:43 [FFmpeg-devel] [PATCH v3 4/5] avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON Geoff Hill
2024-04-04 12:58 ` Martin Storsjö
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git