From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTP id 233DF4AC31 for ; Thu, 16 May 2024 16:48:50 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 94E4E68D4A5; Thu, 16 May 2024 19:48:47 +0300 (EEST) Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 4493F68C72F for ; Thu, 16 May 2024 19:48:41 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id 96CD2C013B for ; Thu, 16 May 2024 19:48:40 +0300 (EEST) From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Thu, 16 May 2024 19:48:37 +0300 Message-ID: <20240516164840.19025-1-remi@remlab.net> X-Mailer: git-send-email 2.43.0 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCHv4 1/4] lavu/riscv: add assembler macros for adjusting vector LMUL X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: vtype_vli computes the VTYPE value with the optimal LMUL for a given element width, tail and mask policies and a run-time vector length. vtype_ivli does the same, but with the compile-time constant vector length. vwtypei and vntypei can be used to widen or narrow a VTYPE value for use in mixed-width vector-optimised functions. --- libavutil/riscv/asm.S | 166 +++++++++++++++++++++++++++++------------- 1 file changed, 117 insertions(+), 49 deletions(-) diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S index 14be5055f5..1e6358dcb5 100644 --- a/libavutil/riscv/asm.S +++ b/libavutil/riscv/asm.S @@ -96,77 +96,145 @@ .endm #endif - /* Convenience macro to load a Vector type (vtype) as immediate */ - .macro lvtypei rd, e, m=m1, tp=tu, mp=mu +#if defined (__riscv_v_elen) +# define RV_V_ELEN __riscv_v_elen +#else +/* Run-time detection of the V extension implies ELEN >= 64. */ +# define RV_V_ELEN 64 +#endif +#if RV_V_ELEN == 32 +# define VSEW_MAX 2 +#else +# define VSEW_MAX 3 +#endif - .ifc \e,e8 - .equ ei, 0 + .macro parse_vtype ew, tp, mp + .ifc \ew,e8 + .equ vsew, 0 .else - .ifc \e,e16 - .equ ei, 8 + .ifc \ew,e16 + .equ vsew, 1 .else - .ifc \e,e32 - .equ ei, 16 + .ifc \ew,e32 + .equ vsew, 2 .else - .ifc \e,e64 - .equ ei, 24 + .ifc \ew,e64 + .equ vsew, 3 .else - .error "Unknown element type" + .error "Unknown element width \ew" .endif .endif .endif .endif - .ifc \m,m1 - .equ mi, 0 - .else - .ifc \m,m2 - .equ mi, 1 - .else - .ifc \m,m4 - .equ mi, 2 + .ifc \tp,tu + .equ tp, 0 .else - .ifc \m,m8 - .equ mi, 3 + .ifc \tp,ta + .equ tp, 1 .else - .ifc \m,mf8 - .equ mi, 5 - .else - .ifc \m,mf4 - .equ mi, 6 - .else - .ifc \m,mf2 - .equ mi, 7 - .else - .error "Unknown multiplier" - .equ mi, 3 - .endif - .endif - .endif - .endif - .endif + .error "Unknown tail policy \tp" .endif .endif - .ifc \tp,tu - .equ tpi, 0 + .ifc \mp,mu + .equ mp, 0 .else - .ifc \tp,ta - .equ tpi, 64 + .ifc \mp,ma + .equ mp, 1 .else - .error "Unknown tail policy" + .error "Unknown mask policy \mp" .endif .endif + .endm - .ifc \mp,mu - .equ mpi, 0 - .else - .ifc \mp,ma - .equ mpi, 128 + /** + * Gets the vector type with the smallest suitable LMUL value. + * @param[out] rd vector type destination register + * @param vl vector length constant + * @param ew element width: e8, e16, e32 or e64 + * @param tp tail policy: tu or ta + * @param mp mask policty: mu or ma + */ + .macro vtype_ivli rd, avl, ew, tp=tu, mp=mu + .if \avl <= 1 + .equ log2vl, 0 + .elseif \avl <= 2 + .equ log2vl, 1 + .elseif \avl <= 4 + .equ log2vl, 2 + .elseif \avl <= 8 + .equ log2vl, 3 + .elseif \avl <= 16 + .equ log2vl, 4 + .elseif \avl <= 32 + .equ log2vl, 5 + .elseif \avl <= 64 + .equ log2vl, 6 + .elseif \avl <= 128 + .equ log2vl, 7 .else - .error "Unknown mask policy" + .error "Vector length \avl out of range" .endif + parse_vtype \ew, \tp, \mp + csrr \rd, vlenb + clz \rd, \rd + addi \rd, \rd, log2vl + 1 + VSEW_MAX - __riscv_xlen + max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX + .if vsew < VSEW_MAX + addi \rd, \rd, vsew - VSEW_MAX + andi \rd, \rd, 7 .endif + ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7) + .endm + + /** + * Gets the vector type with the smallest suitable LMUL value. + * @param[out] rd vector type destination register + * @param rs vector length source register + * @param[out] tmp temporary register to be clobbered + * @param ew element width: e8, e16, e32 or e64 + * @param tp tail policy: tu or ta + * @param mp mask policty: mu or ma + */ + .macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu + parse_vtype \ew, \tp, \mp + /* + * The difference between the CLZ's notionally equals the VLMUL value + * for 4-bit elements. But we want the value for SEW_MAX-bit elements. + */ + slli \tmp, \rs, 1 + VSEW_MAX + csrr \rd, vlenb + addi \tmp, \tmp, -1 + clz \rd, \rd + clz \tmp, \tmp + sub \rd, \rd, \tmp + max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX + .if vsew < VSEW_MAX + addi \rd, \rd, vsew - VSEW_MAX + andi \rd, \rd, 7 + .endif + ori \rd, \rd, (vsew << 3) | (tp << 6) | (mp << 7) + .endm + + /** + * Widens a vector type. + * @param[out] rd widened vector type destination register + * @param rs vector type source register + * @param n number of times to widen (once by default) + */ + .macro vwtypei rd, rs, n=1 + xori \rd, \rs, 4 + addi \rd, \rd, (\n) * 011 + xori \rd, \rd, 4 + .endm - li \rd, (ei | mi | tpi | mpi) + /** + * Narrows a vector type. + * @param[out] rd narrowed vector type destination register + * @param rs vector type source register + * @param n number of times to narrow (once by default) + */ + .macro vntypei rd, rs, n=1 + vwtypei \rd, \rs, -(\n) .endm -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".