From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by master.gitmailbox.com (Postfix) with ESMTP id 233DF4AC31
	for <ffmpegdev@gitmailbox.com>; Thu, 16 May 2024 16:48:50 +0000 (UTC)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 94E4E68D4A5;
	Thu, 16 May 2024 19:48:47 +0300 (EEST)
Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 4493F68C72F
 for <ffmpeg-devel@ffmpeg.org>; Thu, 16 May 2024 19:48:41 +0300 (EEST)
Received: from basile.remlab.net (localhost [IPv6:::1])
 by ursule.remlab.net (Postfix) with ESMTP id 96CD2C013B
 for <ffmpeg-devel@ffmpeg.org>; Thu, 16 May 2024 19:48:40 +0300 (EEST)
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Date: Thu, 16 May 2024 19:48:37 +0300
Message-ID: <20240516164840.19025-1-remi@remlab.net>
X-Mailer: git-send-email 2.43.0
MIME-Version: 1.0
Subject: [FFmpeg-devel] [PATCHv4 1/4] lavu/riscv: add assembler macros for
 adjusting vector LMUL
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Archived-At: <https://master.gitmailbox.com/ffmpegdev/20240516164840.19025-1-remi@remlab.net/>
List-Archive: <https://master.gitmailbox.com/ffmpegdev/>
List-Post: <mailto:ffmpegdev@gitmailbox.com>

vtype_vli computes the VTYPE value with the optimal LMUL for a given
element width, tail and mask policies and a run-time vector length.

vtype_ivli does the same, but with the compile-time constant vector
length.

vwtypei and vntypei can be used to widen or narrow a VTYPE value for
use in mixed-width vector-optimised functions.
---
 libavutil/riscv/asm.S | 166 +++++++++++++++++++++++++++++-------------
 1 file changed, 117 insertions(+), 49 deletions(-)

diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index 14be5055f5..1e6358dcb5 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -96,77 +96,145 @@
         .endm
 #endif
 
-        /* Convenience macro to load a Vector type (vtype) as immediate */
-        .macro  lvtypei rd, e, m=m1, tp=tu, mp=mu
+#if defined (__riscv_v_elen)
+# define RV_V_ELEN __riscv_v_elen
+#else
+/* Run-time detection of the V extension implies ELEN >= 64. */
+# define RV_V_ELEN 64
+#endif
+#if RV_V_ELEN == 32
+# define VSEW_MAX 2
+#else
+# define VSEW_MAX 3
+#endif
 
-        .ifc \e,e8
-        .equ ei, 0
+        .macro  parse_vtype ew, tp, mp
+        .ifc    \ew,e8
+        .equ    vsew, 0
         .else
-        .ifc \e,e16
-        .equ ei, 8
+        .ifc    \ew,e16
+        .equ    vsew, 1
         .else
-        .ifc \e,e32
-        .equ ei, 16
+        .ifc    \ew,e32
+        .equ    vsew, 2
         .else
-        .ifc \e,e64
-        .equ ei, 24
+        .ifc    \ew,e64
+        .equ    vsew, 3
         .else
-        .error "Unknown element type"
+        .error  "Unknown element width \ew"
         .endif
         .endif
         .endif
         .endif
 
-        .ifc \m,m1
-        .equ mi, 0
-        .else
-        .ifc \m,m2
-        .equ mi, 1
-        .else
-        .ifc \m,m4
-        .equ mi, 2
+        .ifc    \tp,tu
+        .equ    tp, 0
         .else
-        .ifc \m,m8
-        .equ mi, 3
+        .ifc    \tp,ta
+        .equ    tp, 1
         .else
-        .ifc \m,mf8
-        .equ mi, 5
-        .else
-        .ifc \m,mf4
-        .equ mi, 6
-        .else
-        .ifc \m,mf2
-        .equ mi, 7
-        .else
-        .error "Unknown multiplier"
-        .equ mi, 3
-        .endif
-        .endif
-        .endif
-        .endif
-        .endif
+        .error  "Unknown tail policy \tp"
         .endif
         .endif
 
-        .ifc \tp,tu
-        .equ tpi, 0
+        .ifc    \mp,mu
+        .equ    mp, 0
         .else
-        .ifc \tp,ta
-        .equ tpi, 64
+        .ifc    \mp,ma
+        .equ    mp, 1
         .else
-        .error "Unknown tail policy"
+        .error  "Unknown mask policy \mp"
         .endif
         .endif
+        .endm
 
-        .ifc \mp,mu
-        .equ mpi, 0
-        .else
-        .ifc \mp,ma
-        .equ mpi, 128
+        /**
+         * Gets the vector type with the smallest suitable LMUL value.
+         * @param[out] rd vector type destination register
+         * @param vl vector length constant
+         * @param ew element width: e8, e16, e32 or e64
+         * @param tp tail policy: tu or ta
+         * @param mp mask policty: mu or ma
+         */
+        .macro  vtype_ivli rd, avl, ew, tp=tu, mp=mu
+        .if     \avl <= 1
+        .equ    log2vl, 0
+        .elseif \avl <= 2
+        .equ    log2vl, 1
+        .elseif \avl <= 4
+        .equ    log2vl, 2
+        .elseif \avl <= 8
+        .equ    log2vl, 3
+        .elseif \avl <= 16
+        .equ    log2vl, 4
+        .elseif \avl <= 32
+        .equ    log2vl, 5
+        .elseif \avl <= 64
+        .equ    log2vl, 6
+        .elseif \avl <= 128
+        .equ    log2vl, 7
         .else
-        .error "Unknown mask policy"
+        .error  "Vector length \avl out of range"
         .endif
+        parse_vtype \ew, \tp, \mp
+        csrr    \rd, vlenb
+        clz     \rd, \rd
+        addi    \rd, \rd, log2vl + 1 + VSEW_MAX - __riscv_xlen
+        max     \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
+        .if     vsew < VSEW_MAX
+        addi    \rd, \rd, vsew - VSEW_MAX
+        andi    \rd, \rd, 7
         .endif
+        ori     \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7)
+        .endm
+
+        /**
+         * Gets the vector type with the smallest suitable LMUL value.
+         * @param[out] rd vector type destination register
+         * @param rs vector length source register
+         * @param[out] tmp temporary register to be clobbered
+         * @param ew element width: e8, e16, e32 or e64
+         * @param tp tail policy: tu or ta
+         * @param mp mask policty: mu or ma
+         */
+        .macro  vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu
+        parse_vtype \ew, \tp, \mp
+        /*
+         * The difference between the CLZ's notionally equals the VLMUL value
+         * for 4-bit elements. But we want the value for SEW_MAX-bit elements.
+         */
+        slli    \tmp, \rs, 1 + VSEW_MAX
+        csrr    \rd, vlenb
+        addi    \tmp, \tmp, -1
+        clz     \rd, \rd
+        clz     \tmp, \tmp
+        sub     \rd, \rd, \tmp
+        max     \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX
+        .if     vsew < VSEW_MAX
+        addi    \rd, \rd, vsew - VSEW_MAX
+        andi    \rd, \rd, 7
+        .endif
+        ori     \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7)
+        .endm
+
+        /**
+         * Widens a vector type.
+         * @param[out] rd widened vector type destination register
+         * @param rs vector type source register
+         * @param n number of times to widen (once by default)
+         */
+        .macro  vwtypei rd, rs, n=1
+        xori    \rd, \rs, 4
+        addi    \rd, \rd, (\n) * 011
+        xori    \rd, \rd, 4
+        .endm
 
-        li      \rd, (ei | mi | tpi | mpi)
+        /**
+         * Narrows a vector type.
+         * @param[out] rd narrowed vector type destination register
+         * @param rs vector type source register
+         * @param n number of times to narrow (once by default)
+         */
+        .macro  vntypei rd, rs, n=1
+        vwtypei \rd, \rs, -(\n)
         .endm
-- 
2.43.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".