Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: "Rémi Denis-Courmont" <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH 2/2] lavc/lpc: R-V V compute_autocorr
Date: Tue, 12 Dec 2023 23:02:40 +0200
Message-ID: <20231212210240.19886-2-remi@remlab.net> (raw)
In-Reply-To: <20231212210240.19886-1-remi@remlab.net>

The loop iterates over the length of the vector, not the order. This is
to avoid reloading the same data for each lag value. However this means
the loop only works if the maximum order is no larger than VLENB.

The loop is roughly equivalent to:

    for (size_t j = 0; j < lag; j++)
        autoc[j] = 1.;

    while (len > lag) {
        for (ptrdiff_t j = 0; j < lag; j++)
            autoc[j] += data[j] * *data;
        data++;
        len--;
    }

    while (len > 0) {
        for (ptrdiff_t j = 0; j < len; j++)
            autoc[j] += data[j] * *data;
        data++;
        len--;
    }

Since register pressure is only at 50%, it should be possible to implement
the same loop for order up to 2xVLENB. But this is left for future work.

Performance numbers are all over the place from ~1.25x to ~4x speedups,
but at least they are always noticeably better than nothing.
---
 libavcodec/riscv/lpc_init.c |  8 +++++++-
 libavcodec/riscv/lpc_rvv.S  | 29 +++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/libavcodec/riscv/lpc_init.c b/libavcodec/riscv/lpc_init.c
index c16e5745f0..ab91956f2d 100644
--- a/libavcodec/riscv/lpc_init.c
+++ b/libavcodec/riscv/lpc_init.c
@@ -22,16 +22,22 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
 #include "libavcodec/lpc.h"
 
 void ff_lpc_apply_welch_window_rvv(const int32_t *, ptrdiff_t, double *);
+void ff_lpc_compute_autocorr_rvv(const double *, ptrdiff_t, int, double *);
 
 av_cold void ff_lpc_init_riscv(LPCContext *c)
 {
 #if HAVE_RVV && (__riscv_xlen >= 64)
     int flags = av_get_cpu_flags();
 
-    if ((flags & AV_CPU_FLAG_RVV_F64) && (flags & AV_CPU_FLAG_RVB_ADDR))
+    if ((flags & AV_CPU_FLAG_RVV_F64) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
         c->lpc_apply_welch_window = ff_lpc_apply_welch_window_rvv;
+
+        if (ff_get_rv_vlenb() >= c->max_order)
+            c->lpc_compute_autocorr = ff_lpc_compute_autocorr_rvv;
+    }
 #endif
 }
diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S
index f81a2392c1..654156bf12 100644
--- a/libavcodec/riscv/lpc_rvv.S
+++ b/libavcodec/riscv/lpc_rvv.S
@@ -85,4 +85,33 @@ func ff_lpc_apply_welch_window_rvv, zve64d
 
         ret
 endfunc
+
+func ff_lpc_compute_autocorr_rvv, zve64d
+        li        t0, 1
+        vsetvli   t1, a2, e64, m8, ta, ma
+        fcvt.d.l  ft0, t0
+        vle64.v   v0, (a0)
+        sh3add    a0, a2, a0   # data += lag
+        vfmv.v.f  v16, ft0
+        bge       a2, a1, 2f
+1:
+        vfmv.f.s  ft0, v0
+        fld       ft1, (a0)    # ft1 = data[lag + i]
+        vfmacc.vf v16, ft0, v0 # v16[j] += data[i] * data[i + j]
+        addi      a1, a1, -1
+        vfslide1down.vf v0, v0, ft1
+        addi      a0, a0, 8
+        bgt       a1, a2, 1b   # while (len > lag);
+2:
+        vfmv.f.s  ft0, v0
+        vsetvli   zero, a1, e64, m8, tu, ma
+        vfmacc.vf v16, ft0, v0
+        addi      a1, a1, -1
+        vslide1down.vx v0, v0, zero
+        bnez      a1, 2b       # while (len > 0);
+
+        vsetvli   zero, a2, e64, m8, ta, ma
+        vse64.v   v16, (a3)
+        ret
+endfunc
 #endif
-- 
2.43.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  reply	other threads:[~2023-12-12 21:02 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-12 21:02 [FFmpeg-devel] [PATCH 1/2] checkasm/lpc: test compute_autocorr Rémi Denis-Courmont
2023-12-12 21:02 ` Rémi Denis-Courmont [this message]
2023-12-12 21:05   ` [FFmpeg-devel] [PATCH 2/2] lavc/lpc: R-V V compute_autocorr Rémi Denis-Courmont
2023-12-14 16:41 ` [FFmpeg-devel] [PATCH 1/2] checkasm/lpc: test compute_autocorr Michael Niedermayer
2023-12-14 16:56   ` Rémi Denis-Courmont

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231212210240.19886-2-remi@remlab.net \
    --to=remi@remlab.net \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git