From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by master.gitmailbox.com (Postfix) with ESMTP id C323E4B4E2
	for <ffmpegdev@gitmailbox.com>; Sun,  7 Jul 2024 18:49:35 +0000 (UTC)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id AE1E968DBF5;
	Sun,  7 Jul 2024 21:49:33 +0300 (EEST)
Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 508A568D9AF
 for <ffmpeg-devel@ffmpeg.org>; Sun,  7 Jul 2024 21:49:27 +0300 (EEST)
Received: from basile.remlab.net (localhost [IPv6:::1])
 by ursule.remlab.net (Postfix) with ESMTP id ED4AAC006F
 for <ffmpeg-devel@ffmpeg.org>; Sun,  7 Jul 2024 21:49:26 +0300 (EEST)
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Date: Sun,  7 Jul 2024 21:49:26 +0300
Message-ID: <20240707184926.139234-1-remi@remlab.net>
X-Mailer: git-send-email 2.45.2
MIME-Version: 1.0
Subject: [FFmpeg-devel] [PATCH] lavc/h264dsp: R-V V 8-bit
 chroma_dc_dequant_idct
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Archived-At: <https://master.gitmailbox.com/ffmpegdev/20240707184926.139234-1-remi@remlab.net/>
List-Archive: <https://master.gitmailbox.com/ffmpegdev/>
List-Post: <mailto:ffmpegdev@gitmailbox.com>

Performance gains is around 0.5% on K230 cycle counter, and not
measurable with the architecture timer, so probably no worth merging.
---
 libavcodec/riscv/h264dsp_init.c |  4 +++
 libavcodec/riscv/h264idct_rvv.S | 57 +++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 88afec8df0..8a83e2dee2 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -51,6 +51,7 @@ void ff_h264_idct_add16intra_8_rvv(uint8_t *dst, const int *blockoffset,
 void ff_h264_idct8_add4_8_rvv(uint8_t *dst, const int *blockoffset,
                               int16_t *block, int stride,
                               const uint8_t nnzc[5 * 8]);
+void ff_h264_luma_dc_dequant_idct_8_rvv(int16_t *out, int16_t *in, int qmul);
 
 extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
 extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@@ -84,6 +85,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
             dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
             dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
             dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
+            if (flags & AV_CPU_FLAG_RVV_I64)
+                dsp->h264_luma_dc_dequant_idct =
+                    ff_h264_luma_dc_dequant_idct_8_rvv;
 #  endif
         }
         dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index 370d162bf2..eeeba0c7eb 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -452,4 +452,61 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
         ret
 endfunc
 .endr
+
+func ff_h264_luma_dc_dequant_idct_8_rvv, zve64x
+        csrwi   vxrm, 0
+        vsetivli    zero, 4, e16, mf2, ta, ma
+        vlseg4e16.v v0, (a1)
+        addi    sp, sp, -16 * 2
+        vadd.vv     v8, v0, v1    # z0
+        vsub.vv     v10, v0, v1   # z1
+        addi    t1, sp, 1 * 4 * 2
+        vsub.vv     v12, v2, v3   # z2
+        addi    t2, sp, 2 * 4 * 2
+        vadd.vv     v14, v2, v3   # z3
+        vadd.vv     v0, v8, v14
+        addi    t3, sp, 3 * 4 * 2
+        vsub.vv     v1, v8, v14
+        vsub.vv     v2, v10, v12
+        vadd.vv     v3, v10, v12
+        vsseg4e16.v v0, (sp)
+        vle16.v     v0, (sp)
+        vle16.v     v2, (t2)
+        vle16.v     v1, (t1)
+        # same as 4-point iDCT but without right shifts
+        vadd.vv     v8, v0, v2    # z0
+        vle16.v     v3, (t3)
+        vsub.vv     v10, v0, v2   # z1
+        addi    sp, sp, 16 * 2
+        vsub.vv     v12, v1, v3   # z2
+        lui     t0, 0x50004             # 0x5000_4000
+        vadd.vv     v14, v1, v3   # z3
+        vwadd.vv    v0, v8, v14
+        slli    t0, t0, 16 - 12         # 0x5_0004_0000
+        vwadd.vv    v2, v10, v12
+        vwsub.vv    v4, v10, v12
+        ori     t0, t0, 1               # 0x5_0004_0001
+        vwsub.vv    v6, v8, v14
+        vsetvli     zero, zero, e32, m1, ta, ma
+        vmul.vx     v0, v0, a2
+        slli    t0, t0, 1 + 16 + 4 + 1  # 0x00A0_0080_0020_0000
+        vmul.vx     v2, v2, a2
+        vmul.vx     v4, v4, a2
+        addi    t1, a0, 1 * 16 * 2
+        vmul.vx     v6, v6, a2
+        vsetvli     zero, zero, e64, m2, ta, ma
+        vmv.s.x     v12, t0
+        vsetvli     zero, zero, e16, mf2, ta, ma
+        vnclip.wi   v8, v0, 8
+        addi    t2, a0, 4 * 16 * 2
+        vnclip.wi   v9, v2, 8
+        vnclip.wi   v10, v4, 8
+        addi    t3, a0, 5 * 16 * 2
+        vnclip.wi   v11, v6, 8
+        vsuxei16.v  v8, (a0), v12
+        vsuxei16.v  v9, (t1), v12
+        vsuxei16.v  v10, (t2), v12
+        vsuxei16.v  v11, (t3), v12
+        ret
+endfunc
 #endif
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".