Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Geoff Hill <geoff@geoffhill.org>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH v4 5/5] avcodec/ac3: Implement sum_square_butterfly_float for aarch64 NEON
Date: Sat, 6 Apr 2024 07:26:13 -0700
Message-ID: <70079918-e9e5-42de-a3b1-2c34496e57fa@geoffhill.org> (raw)
In-Reply-To: <51f7be0a-4267-47bf-ab0b-bd6585806da7@geoffhill.org>

Signed-off-by: Geoff Hill <geoff@geoffhill.org>
---
 libavcodec/aarch64/ac3dsp_init_aarch64.c |  5 ++++
 libavcodec/aarch64/ac3dsp_neon.S         | 30 ++++++++++++++++++++++++
 tests/checkasm/ac3dsp.c                  | 26 ++++++++++++++++++++
 3 files changed, 61 insertions(+)

diff --git a/libavcodec/aarch64/ac3dsp_init_aarch64.c b/libavcodec/aarch64/ac3dsp_init_aarch64.c
index e95436c651..e367353e11 100644
--- a/libavcodec/aarch64/ac3dsp_init_aarch64.c
+++ b/libavcodec/aarch64/ac3dsp_init_aarch64.c
@@ -32,6 +32,10 @@ void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
                                             const int32_t *coef0,
                                             const int32_t *coef1,
                                             int len);
+void ff_ac3_sum_square_butterfly_float_neon(float sum[4],
+                                            const float *coef0,
+                                            const float *coef1,
+                                            int len);
 
 av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
 {
@@ -42,4 +46,5 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
     c->extract_exponents = ff_ac3_extract_exponents_neon;
     c->float_to_fixed24 = ff_float_to_fixed24_neon;
     c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
+    c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
 }
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
index 77f9d20275..20beb6cc50 100644
--- a/libavcodec/aarch64/ac3dsp_neon.S
+++ b/libavcodec/aarch64/ac3dsp_neon.S
@@ -87,3 +87,33 @@ function ff_ac3_sum_square_butterfly_int32_neon, export=1
         st1             {v0.1d-v3.1d}, [x0]
         ret
 endfunc
+
+function ff_ac3_sum_square_butterfly_float_neon, export=1
+        movi            v0.4s, #0
+        movi            v1.4s, #0
+        movi            v2.4s, #0
+        movi            v3.4s, #0
+1:      ld1             {v30.4s}, [x1], #16
+        ld1             {v31.4s}, [x2], #16
+        fadd            v16.4s, v30.4s, v31.4s
+        fsub            v17.4s, v30.4s, v31.4s
+        fmla            v0.4s, v30.4s, v30.4s
+        fmla            v1.4s, v31.4s, v31.4s
+        fmla            v2.4s, v16.4s, v16.4s
+        fmla            v3.4s, v17.4s, v17.4s
+        subs            w3, w3, #4
+        b.gt            1b
+        faddp           v0.4s, v0.4s, v0.4s
+        faddp           v0.2s, v0.2s, v0.2s
+        st1             {v0.s}[0], [x0], #4
+        faddp           v1.4s, v1.4s, v1.4s
+        faddp           v1.2s, v1.2s, v1.2s
+        st1             {v1.s}[0], [x0], #4
+        faddp           v2.4s, v2.4s, v2.4s
+        faddp           v2.2s, v2.2s, v2.2s
+        st1             {v2.s}[0], [x0], #4
+        faddp           v3.4s, v3.4s, v3.4s
+        faddp           v3.2s, v3.2s, v3.2s
+        st1             {v3.s}[0], [x0]
+        ret
+endfunc
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
index 573a76c764..442e965f3b 100644
--- a/tests/checkasm/ac3dsp.c
+++ b/tests/checkasm/ac3dsp.c
@@ -165,6 +165,31 @@ static void check_ac3_sum_square_butterfly_int32(AC3DSPContext *c) {
     report("ac3_sum_square_butterfly_int32");
 }
 
+static void check_ac3_sum_square_butterfly_float(AC3DSPContext *c) {
+    LOCAL_ALIGNED_32(float, lt, [ELEMS]);
+    LOCAL_ALIGNED_32(float, rt, [ELEMS]);
+    LOCAL_ALIGNED_16(float, v1, [4]);
+    LOCAL_ALIGNED_16(float, v2, [4]);
+
+    declare_func(void, float[4], const float *, const float *, int);
+
+    randomize_float(lt, ELEMS);
+    randomize_float(rt, ELEMS);
+
+    if (check_func(c->sum_square_butterfly_float,
+                   "ac3_sum_square_bufferfly_float")) {
+        call_ref(v1, lt, rt, ELEMS);
+        call_new(v2, lt, rt, ELEMS);
+
+        if (!float_near_ulp_array(v1, v2, 10, 4))
+            fail();
+
+        bench_new(v2, lt, rt, ELEMS);
+    }
+
+    report("ac3_sum_square_butterfly_float");
+}
+
 void checkasm_check_ac3dsp(void)
 {
     AC3DSPContext c;
@@ -174,4 +199,5 @@ void checkasm_check_ac3dsp(void)
     check_ac3_extract_exponents(&c);
     check_float_to_fixed24(&c);
     check_ac3_sum_square_butterfly_int32(&c);
+    check_ac3_sum_square_butterfly_float(&c);
 }
-- 
2.42.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2024-04-06 14:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-06 14:23 [FFmpeg-devel] [PATCH v4 0/5] avcodec/ac3: Add aarch64 NEON DSP Geoff Hill
2024-04-06 14:25 ` [FFmpeg-devel] [PATCH v4 1/5] avcodec/ac3: Implement float_to_fixed24 for aarch64 NEON Geoff Hill
2024-04-06 14:25 ` [FFmpeg-devel] [PATCH v4 2/5] avcodec/ac3: Implement ac3_exponent_min " Geoff Hill
2024-04-06 14:26 ` [FFmpeg-devel] [PATCH v4 3/5] avcodec/ac3: Implement ac3_extract_exponents " Geoff Hill
2024-04-06 14:26 ` [FFmpeg-devel] [PATCH v4 4/5] avcodec/ac3: Implement sum_square_butterfly_int32 " Geoff Hill
2024-04-06 14:26 ` Geoff Hill [this message]
2024-04-08 10:47 ` [FFmpeg-devel] [PATCH v4 0/5] avcodec/ac3: Add aarch64 NEON DSP Martin Storsjö

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=70079918-e9e5-42de-a3b1-2c34496e57fa@geoffhill.org \
    --to=geoff@geoffhill.org \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git