From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTP id 9278B4AC97 for ; Sun, 19 May 2024 08:18:27 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id A316A68CABD; Sun, 19 May 2024 11:18:24 +0300 (EEST) Received: from out203-205-221-221.mail.qq.com (out203-205-221-221.mail.qq.com [203.205.221.221]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 18C1D68C10E for ; Sun, 19 May 2024 11:18:16 +0300 (EEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=foxmail.com; s=s201512; t=1716106686; bh=MVKZfDwjYKnWNuVqYpzemsvgddCjOGwg2fjf4uJigNg=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=Sd6MQjcnBL7pVOzI01AZqjOmdO5AI19s8u0AtkvChemAI30E+4S7QrTaY4FWTuwBu ioWSHBnX+OECCLEhWTj/wH8WKf2Ko2yGr/9AfySx6teFfHQtg8CDJm5LZiujXASOya EB7eQKYXZaJVe8sa0nHg6VGBodFjZKoijkaNWcmY= Received: from localhost.localdomain ([42.56.223.122]) by newxmesmtplogicsvrszc19-0.qq.com (NewEsmtp) with SMTP id 48536C52; Sun, 19 May 2024 16:18:05 +0800 X-QQ-mid: xmsmtpt1716106685tb6dl8guy Message-ID: X-QQ-XMAILINFO: MxUBm8splV+pjFmparWp9jLT68t9Cpigw6lY26sVoti8znAEt5Hk5l8+CG2Tmt lnOJDgvXbru9LhMUXyDHjc9mPt+7KkqAWuceDeL8pyeZYRXqs6d6teKhCfvoBuDEtkSd78t1biMl 6I/uJyKFV8w4h3sh7XfhMa2e/+AKJNrBZLgQqEdlTwNdAcKD0OtIHOCgQc1Lw3LRXTre06+x3cul OvUjhNAG0emTebT3+zTbZKakf0Ihf2DzvVyspZajUa8ap40T1G6x5tNHR9iBCu3lVIgWFP+lSTH8 mwxivxAyG3eWPZQwJW7cWvx0jS3h3tEf86+KU62Cz+Lq170rrJ645Q2+T6gnjXsHFxZlDGLrzbdN yFCWRj/izMdtqvzYufud81K5RbdcRmqIAYX5cO6oze2DJuTOi9tFNZvbt0vUd6aae8kYCkSt5zdM DpPeQ3ugGZ8/FRDtrabRgFuEsIYUnxTcnECywQNjlUP/Xvd9yuSItJ+GrzTkrpdsMf5Ye2aEWzoJ 9Jod1qyQ59fl7X4FfqVa6HD43H1UG4/BwfWVoFSwO/pvrinMSK8KJa792I6zOp4ZPvtxJJovS5mt su/NWlQ994l0vzQNB5ZysMg2scLh3OPoi/AOUwuDShDbBeDktQXCtsmEhx3LY/Yuqggv7TPpveOx bNHEF38sGeYNkYEqnIHY0OnSWLKivUcmEsJuWumV+/rgxOzqtqEyn5sAcKjP8+ljTmH1tN7V6v9u sBAMJ76kDweAeGqRKHxjbcwzdIdTETvcA+ikbcFqK15vDoiTETl8GyhK6hRnqM7OuxdcfbZy1xwc RiGx2jHmTQD9qGTveUE55QUKV324fNpT2FLFIO12m11c9CUsdRm+8G2cQl2KsWf9hS7gaHs/Zjk9 3Q7NUl0tS7tbSndVDp3Kj01TEJz7baWl9pGsnKBh7tGX0Vyg9JK8LxqrxQtkANLo9EApTZHoQw X-QQ-XMRINFO: MSVp+SPm3vtS1Vd6Y4Mggwc= From: uk7b@foxmail.com To: ffmpeg-devel@ffmpeg.org Date: Sun, 19 May 2024 16:18:03 +0800 X-OQ-MSGID: <20240519081803.2590358-1-uk7b@foxmail.com> X-Mailer: git-send-email 2.45.1 In-Reply-To: References: MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 1/4] lavc/vp8dsp: R-V V put_epel hv X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: sunyuechi Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: From: sunyuechi C908: vp8_put_epel4_h4v4_c: 20.0 vp8_put_epel4_h4v4_rvv_i32: 11.0 vp8_put_epel4_h4v6_c: 25.2 vp8_put_epel4_h4v6_rvv_i32: 13.5 vp8_put_epel4_h6v4_c: 22.2 vp8_put_epel4_h6v4_rvv_i32: 14.5 vp8_put_epel4_h6v6_c: 29.0 vp8_put_epel4_h6v6_rvv_i32: 15.7 vp8_put_epel8_h4v4_c: 73.0 vp8_put_epel8_h4v4_rvv_i32: 22.2 vp8_put_epel8_h4v6_c: 90.5 vp8_put_epel8_h4v6_rvv_i32: 26.7 vp8_put_epel8_h6v4_c: 85.0 vp8_put_epel8_h6v4_rvv_i32: 27.2 vp8_put_epel8_h6v6_c: 104.7 vp8_put_epel8_h6v6_rvv_i32: 29.5 vp8_put_epel16_h4v4_c: 145.5 vp8_put_epel16_h4v4_rvv_i32: 26.5 vp8_put_epel16_h4v6_c: 190.7 vp8_put_epel16_h4v6_rvv_i32: 47.5 vp8_put_epel16_h6v4_c: 173.7 vp8_put_epel16_h6v4_rvv_i32: 33.2 vp8_put_epel16_h6v6_c: 222.2 vp8_put_epel16_h6v6_rvv_i32: 35.5 --- libavcodec/riscv/vp8dsp_init.c | 13 ++++ libavcodec/riscv/vp8dsp_rvv.S | 123 +++++++++++++++++++++++++++------ 2 files changed, 115 insertions(+), 21 deletions(-) diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c index 31e8227fa4..86927907e0 100644 --- a/libavcodec/riscv/vp8dsp_init.c +++ b/libavcodec/riscv/vp8dsp_init.c @@ -97,6 +97,19 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c) c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv; c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv; c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv; + + c->put_vp8_epel_pixels_tab[0][2][2] = ff_put_vp8_epel16_h6v6_rvv; + c->put_vp8_epel_pixels_tab[1][2][2] = ff_put_vp8_epel8_h6v6_rvv; + c->put_vp8_epel_pixels_tab[2][2][2] = ff_put_vp8_epel4_h6v6_rvv; + c->put_vp8_epel_pixels_tab[0][2][1] = ff_put_vp8_epel16_h4v6_rvv; + c->put_vp8_epel_pixels_tab[1][2][1] = ff_put_vp8_epel8_h4v6_rvv; + c->put_vp8_epel_pixels_tab[2][2][1] = ff_put_vp8_epel4_h4v6_rvv; + c->put_vp8_epel_pixels_tab[0][1][1] = ff_put_vp8_epel16_h4v4_rvv; + c->put_vp8_epel_pixels_tab[1][1][1] = ff_put_vp8_epel8_h4v4_rvv; + c->put_vp8_epel_pixels_tab[2][1][1] = ff_put_vp8_epel4_h4v4_rvv; + c->put_vp8_epel_pixels_tab[0][1][2] = ff_put_vp8_epel16_h6v4_rvv; + c->put_vp8_epel_pixels_tab[1][1][2] = ff_put_vp8_epel8_h6v4_rvv; + c->put_vp8_epel_pixels_tab[2][1][2] = ff_put_vp8_epel4_h6v4_rvv; } #endif #endif diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S index 0ba9fa443d..c79a8afacf 100644 --- a/libavcodec/riscv/vp8dsp_rvv.S +++ b/libavcodec/riscv/vp8dsp_rvv.S @@ -161,26 +161,26 @@ const subpel_filters .byte 0, -1, 12, 123, -6, 0 endconst -.macro epel_filter size type - lla t2, subpel_filters +.macro epel_filter size type regtype + lla \regtype\()2, subpel_filters .ifc \type,v - addi t0, a6, -1 + addi \regtype\()0, a6, -1 .else - addi t0, a5, -1 + addi \regtype\()0, a5, -1 .endif - li t1, 6 - mul t0, t0, t1 - add t0, t0, t2 + li \regtype\()1, 6 + mul \regtype\()0, \regtype\()0, \regtype\()1 + add \regtype\()0, \regtype\()0, \regtype\()2 .irp n,1,2,3,4 - lb t\n, \n(t0) + lb \regtype\n, \n(\regtype\()0) .endr .ifc \size,6 - lb t5, 5(t0) - lb t0, (t0) + lb \regtype\()5, 5(\regtype\()0) + lb \regtype\()0, (\regtype\()0) .endif .endm -.macro epel_load dst len size type +.macro epel_load dst len size type from_mem regtype .ifc \type,v mv a5, a3 .else @@ -189,24 +189,35 @@ endconst sub t6, a2, a5 add a7, a2, a5 +.if \from_mem vle8.v v24, (a2) vle8.v v22, (t6) vle8.v v26, (a7) add a7, a7, a5 vle8.v v28, (a7) - vwmulu.vx v16, v24, t2 - vwmulu.vx v20, v26, t3 + vwmulu.vx v16, v24, \regtype\()2 + vwmulu.vx v20, v26, \regtype\()3 .ifc \size,6 sub t6, t6, a5 add a7, a7, a5 vle8.v v24, (t6) vle8.v v26, (a7) - vwmaccu.vx v16, t0, v24 - vwmaccu.vx v16, t5, v26 + vwmaccu.vx v16, \regtype\()0, v24 + vwmaccu.vx v16, \regtype\()5, v26 +.endif + vwmaccsu.vx v16, \regtype\()1, v22 + vwmaccsu.vx v16, \regtype\()4, v28 +.else + vwmulu.vx v16, v4, \regtype\()2 + vwmulu.vx v20, v6, \regtype\()3 + .ifc \size,6 + vwmaccu.vx v16, \regtype\()0, v0 + vwmaccu.vx v16, \regtype\()5, v10 + .endif + vwmaccsu.vx v16, \regtype\()1, v2 + vwmaccsu.vx v16, \regtype\()4, v8 .endif li t6, 64 - vwmaccsu.vx v16, t1, v22 - vwmaccsu.vx v16, t4, v28 vwadd.wx v16, v16, t6 vsetvlstatic16 \len vwadd.vv v24, v16, v20 @@ -216,18 +227,18 @@ endconst vnclipu.wi \dst, v24, 0 .endm -.macro epel_load_inc dst len size type - epel_load \dst \len \size \type +.macro epel_load_inc dst len size type from_mem regtype + epel_load \dst \len \size \type \from_mem \regtype add a2, a2, a3 .endm .macro epel len size type func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x - epel_filter \size \type + epel_filter \size \type t vsetvlstatic8 \len 1: addi a4, a4, -1 - epel_load_inc v30 \len \size \type + epel_load_inc v30 \len \size \type 1 t vse8.v v30, (a0) add a0, a0, a1 bnez a4, 1b @@ -236,6 +247,72 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x endfunc .endm +.macro epel_hv len hsize vsize +func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x +#if __riscv_xlen == 64 + addi sp, sp, -48 + .irp n,0,1,2,3,4,5 + sd s\n, \n\()<<3(sp) + .endr +#else + addi sp, sp, -24 + .irp n,0,1,2,3,4,5 + sw s\n, \n\()<<2(sp) + .endr +#endif + sub a2, a2, a3 + epel_filter \hsize h t + epel_filter \vsize v s + vsetvlstatic8 \len +.if \hsize == 6 || \vsize == 6 + sub a2, a2, a3 + epel_load_inc v0 \len \hsize h 1 t +.endif + epel_load_inc v2 \len \hsize h 1 t + epel_load_inc v4 \len \hsize h 1 t + epel_load_inc v6 \len \hsize h 1 t + epel_load_inc v8 \len \hsize h 1 t +.if \hsize == 6 || \vsize == 6 + epel_load_inc v10 \len \hsize h 1 t +.endif + addi a4, a4, -1 +1: + addi a4, a4, -1 + epel_load v30 \len \vsize v 0 s + vse8.v v30, (a0) +.if \hsize == 6 || \vsize == 6 + vmv.v.v v0, v2 +.endif + vmv.v.v v2, v4 + vmv.v.v v4, v6 + vmv.v.v v6, v8 +.if \hsize == 6 || \vsize == 6 + vmv.v.v v8, v10 + epel_load_inc v10 \len \hsize h 1 t +.else + epel_load_inc v8 \len 4 h 1 t +.endif + add a0, a0, a1 + bnez a4, 1b + epel_load v30 \len \vsize v 0 s + vse8.v v30, (a0) + +#if __riscv_xlen == 64 + .irp n,0,1,2,3,4,5 + ld s\n, \n\()<<3(sp) + .endr + addi sp, sp, 48 +#else + .irp n,0,1,2,3,4,5 + lw s\n, \n\()<<2(sp) + .endr + addi sp, sp, 24 +#endif + + ret +endfunc +.endm + .irp len,16,8,4 put_vp8_bilin_h_v \len h a5 put_vp8_bilin_h_v \len v a6 @@ -244,4 +321,8 @@ epel \len 6 h epel \len 4 h epel \len 6 v epel \len 4 v +epel_hv \len 6 6 +epel_hv \len 4 4 +epel_hv \len 6 4 +epel_hv \len 4 6 .endr -- 2.45.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".