Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: "Rémi Denis-Courmont" <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH] lavc/vp8dsp: R-V V vp8_idct_add
Date: Wed,  5 Jun 2024 22:45:46 +0300
Message-ID: <20240605194546.19151-1-remi@remlab.net> (raw)

T-Head C908 (cycles):
vp8_idct_add_c:       312.2
vp8_idct_add_rvv_i32: 117.0
---
 libavcodec/riscv/vp8dsp_init.c |  2 ++
 libavcodec/riscv/vp8dsp_rvv.S  | 59 ++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index 5911d195ba..d9e2beb237 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -27,6 +27,7 @@
 #include "vp8dsp.h"
 
 void ff_vp8_luma_dc_wht_rvv(int16_t block[4][4][16], int16_t dc[16]);
+void ff_vp8_idct_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
 void ff_vp8_idct_dc_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
 void ff_vp8_idct_dc_add4y_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
 void ff_vp8_idct_dc_add4uv_rvv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
@@ -129,6 +130,7 @@ av_cold void ff_vp8dsp_init_riscv(VP8DSPContext *c)
         if (flags & AV_CPU_FLAG_RVV_I64)
             c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_rvv;
 #endif
+        c->vp8_idct_add = ff_vp8_idct_add_rvv;
         c->vp8_idct_dc_add = ff_vp8_idct_dc_add_rvv;
         c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_rvv;
         if (flags & AV_CPU_FLAG_RVV_I64)
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 02eeb4cc2c..b4b7b63195 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -100,6 +100,65 @@ func ff_vp8_luma_dc_wht_rvv, zve64x
 endfunc
 #endif
 
+func ff_vp8_idct_add_rvv, zve32x
+        csrwi       vxrm, 0
+        vsetivli    zero, 4, e16, mf2, ta, ma
+        addi        a3, a1, 1 * 4 * 2
+        addi        a4, a1, 2 * 4 * 2
+        addi        a5, a1, 3 * 4 * 2
+        li          t1, 20091
+        li          t2, 35468
+        jal         t0, 1f
+        vsseg4e16.v v0, (a1)
+        jal         t0, 1f
+        vlsseg4e8.v v4, (a0), a2
+        vssra.vi    v0, v0, 3
+        sd          zero,   (a1)
+        vssra.vi    v1, v1, 3
+        sd          zero,  8(a1)
+        vssra.vi    v2, v2, 3
+        sd          zero, 16(a1)
+        vssra.vi    v3, v3, 3
+        sd          zero, 24(a1)
+        vsetvli     zero, zero, e8, mf4, ta, ma
+        vwaddu.wv   v0, v0, v4
+        vwaddu.wv   v1, v1, v5
+        vwaddu.wv   v2, v2, v6
+        vwaddu.wv   v3, v3, v7
+        vsetvli     zero, zero, e16, mf2, ta, ma
+        vmax.vx     v0, v0, zero
+        vmax.vx     v1, v1, zero
+        vmax.vx     v2, v2, zero
+        vmax.vx     v3, v3, zero
+        vsetvli     zero, zero, e8, mf4, ta, ma
+        vnclipu.wi  v4, v0, 0
+        vnclipu.wi  v5, v1, 0
+        vnclipu.wi  v6, v2, 0
+        vnclipu.wi  v7, v3, 0
+        vssseg4e8.v v4, (a0), a2
+        ret
+1:
+        vle16.v    v0, (a1)
+        vle16.v    v2, (a4)
+        vle16.v    v1, (a3)
+        vle16.v    v3, (a5)
+        vadd.vv    v4, v0, v2 # t0
+        vsub.vv    v5, v0, v2 # t1
+        vmulhsu.vx v8, v3, t1
+        vmulhsu.vx v6, v1, t2
+        vadd.vv    v8, v8, v3
+        vmulhsu.vx v7, v1, t1
+        vmulhsu.vx v9, v3, t2
+        vadd.vv    v7, v7, v1
+        vsub.vv    v6, v6, v8 # t2
+        vadd.vv    v7, v7, v9 # t3
+        vadd.vv    v1, v5, v6
+        vsub.vv    v2, v5, v6
+        vadd.vv    v0, v4, v7
+        vsub.vv    v3, v4, v7
+        jr         t0
+endfunc
+
 func ff_vp8_idct_dc_add_rvv, zve32x
         lh      a3, (a1)
         addi    a3, a3, 4
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

                 reply	other threads:[~2024-06-05 19:45 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240605194546.19151-1-remi@remlab.net \
    --to=remi@remlab.net \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git