Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/5] lavc/vp8dsp: avoid one multiplication on RISC-V
@ 2024-05-25 15:38 Rémi Denis-Courmont
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 2/5] lavc/vp8dsp: expand single use R-V macros Rémi Denis-Courmont
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-25 15:38 UTC (permalink / raw)
  To: ffmpeg-devel

Use shifts rather than multiply, and save one instruction.
---
 libavcodec/riscv/vp8dsp_init.c | 26 ++++++++++++++------------
 libavcodec/riscv/vp8dsp_rvv.S  |  7 +++----
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index 31e8227fa4..2413fbf449 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -84,19 +84,21 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
         c->put_vp8_bilinear_pixels_tab[2][2][1] = ff_put_vp8_bilin4_hv_rvv;
         c->put_vp8_bilinear_pixels_tab[2][2][2] = ff_put_vp8_bilin4_hv_rvv;
 
-        c->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_rvv;
-        c->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_rvv;
-        c->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_rvv;
-        c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv;
-        c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv;
-        c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv;
+        if (flags & AV_CPU_FLAG_RVB_ADDR) {
+            c->put_vp8_epel_pixels_tab[0][0][2] = ff_put_vp8_epel16_h6_rvv;
+            c->put_vp8_epel_pixels_tab[1][0][2] = ff_put_vp8_epel8_h6_rvv;
+            c->put_vp8_epel_pixels_tab[2][0][2] = ff_put_vp8_epel4_h6_rvv;
+            c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv;
+            c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv;
+            c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv;
 
-        c->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_rvv;
-        c->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_rvv;
-        c->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_rvv;
-        c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv;
-        c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv;
-        c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv;
+            c->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_rvv;
+            c->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_rvv;
+            c->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_rvv;
+            c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv;
+            c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv;
+            c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv;
+        }
     }
 #endif
 #endif
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 9c84b1503e..cb9b0b8b5f 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -162,15 +162,14 @@ const subpel_filters
 endconst
 
 .macro epel_filter size type
-        lla             t2, subpel_filters
 .ifc \type,v
         addi            t0, a6, -1
 .else
         addi            t0, a5, -1
 .endif
-        li              t1, 6
-        mul             t0, t0, t1
-        add             t0, t0, t2
+        lla             t2, subpel_filters
+        sh1add          t0, t0, t0
+        sh1add          t0, t0, t2
         .irp n,1,2,3,4
         lb              t\n, \n(t0)
         .endr
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [FFmpeg-devel] [PATCH 2/5] lavc/vp8dsp: expand single use R-V macros
  2024-05-25 15:38 [FFmpeg-devel] [PATCH 1/5] lavc/vp8dsp: avoid one multiplication on RISC-V Rémi Denis-Courmont
@ 2024-05-25 15:38 ` Rémi Denis-Courmont
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 3/5] lavc/vp8dsp: factor R-V V bilin functions Rémi Denis-Courmont
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-25 15:38 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavcodec/riscv/vp8dsp_rvv.S | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index cb9b0b8b5f..bb0c7bf02a 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -161,7 +161,8 @@ const subpel_filters
         .byte 0,  -1,  12, 123,  -6, 0
 endconst
 
-.macro epel_filter size type
+.macro epel len size type
+func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
 .ifc \type,v
         addi            t0, a6, -1
 .else
@@ -177,9 +178,9 @@ endconst
         lb              t5, 5(t0)
         lb              t0, (t0)
 .endif
-.endm
-
-.macro epel_load dst len size type
+        vsetvlstatic8   \len
+1:
+        addi            a4, a4, -1
 .ifc \type,v
         mv              a5, a3
 .else
@@ -212,21 +213,8 @@ endconst
         vnsra.wi        v24, v24, 7
         vmax.vx         v24, v24, zero
         vsetvlstatic8   \len
-        vnclipu.wi      \dst, v24, 0
-.endm
-
-.macro epel_load_inc dst len size type
-        epel_load       \dst \len \size \type
+        vnclipu.wi      v30, v24, 0
         add             a2, a2, a3
-.endm
-
-.macro epel len size type
-func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
-        epel_filter     \size \type
-        vsetvlstatic8   \len
-1:
-        addi            a4, a4, -1
-        epel_load_inc   v30 \len \size \type
         vse8.v          v30, (a0)
         add             a0, a0, a1
         bnez            a4, 1b
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [FFmpeg-devel] [PATCH 3/5] lavc/vp8dsp: factor R-V V bilin functions
  2024-05-25 15:38 [FFmpeg-devel] [PATCH 1/5] lavc/vp8dsp: avoid one multiplication on RISC-V Rémi Denis-Courmont
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 2/5] lavc/vp8dsp: expand single use R-V macros Rémi Denis-Courmont
@ 2024-05-25 15:38 ` Rémi Denis-Courmont
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 4/5] lavc/vp8dsp: save one R-V GPR Rémi Denis-Courmont
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths Rémi Denis-Courmont
  3 siblings, 0 replies; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-25 15:38 UTC (permalink / raw)
  To: ffmpeg-devel

For a given type, only the first VSETVLI instruction varies depending
on the size.
---
 libavcodec/riscv/vp8dsp_rvv.S | 37 +++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index bb0c7bf02a..545c2e9728 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -108,9 +108,10 @@ endfunc
         vnsra.wi        \dst, v24, 3
 .endm
 
-.macro put_vp8_bilin_h_v len type mn
-func ff_put_vp8_bilin\len\()_\type\()_rvv, zve32x
-        vsetvlstatic8   \len
+.macro put_vp8_bilin_h_v type mn
+func ff_put_vp8_bilin4_\type\()_rvv, zve32x
+        vsetvlstatic8   4
+.Lbilin_\type:
         li              t1, 8
         li              t4, 4
         sub             t1, t1, \mn
@@ -126,9 +127,12 @@ func ff_put_vp8_bilin\len\()_\type\()_rvv, zve32x
 endfunc
 .endm
 
-.macro put_vp8_bilin_hv len
-func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
-        vsetvlstatic8   \len
+put_vp8_bilin_h_v h a5
+put_vp8_bilin_h_v v a6
+
+func ff_put_vp8_bilin4_hv_rvv, zve32x
+        vsetvlstatic8   4
+.Lbilin_hv:
         li              t3, 8
         sub             t1, t3, a5
         sub             t2, t3, a6
@@ -149,7 +153,23 @@ func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
 
         ret
 endfunc
-.endm
+
+.irp len,16,8
+func ff_put_vp8_bilin\len\()_h_rvv, zve32x
+        vsetvlstatic8 \len
+        j             .Lbilin_h
+endfunc
+
+func ff_put_vp8_bilin\len\()_v_rvv, zve32x
+        vsetvlstatic8 \len
+        j             .Lbilin_v
+endfunc
+
+func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
+        vsetvlstatic8 \len
+        j             .Lbilin_hv
+endfunc
+.endr
 
 const subpel_filters
         .byte 0,  -6, 123,  12,  -1, 0
@@ -224,9 +244,6 @@ endfunc
 .endm
 
 .irp len,16,8,4
-put_vp8_bilin_h_v \len h a5
-put_vp8_bilin_h_v \len v a6
-put_vp8_bilin_hv \len
 epel \len 6 h
 epel \len 4 h
 epel \len 6 v
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [FFmpeg-devel] [PATCH 4/5] lavc/vp8dsp: save one R-V GPR
  2024-05-25 15:38 [FFmpeg-devel] [PATCH 1/5] lavc/vp8dsp: avoid one multiplication on RISC-V Rémi Denis-Courmont
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 2/5] lavc/vp8dsp: expand single use R-V macros Rémi Denis-Courmont
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 3/5] lavc/vp8dsp: factor R-V V bilin functions Rémi Denis-Courmont
@ 2024-05-25 15:38 ` Rémi Denis-Courmont
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths Rémi Denis-Courmont
  3 siblings, 0 replies; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-25 15:38 UTC (permalink / raw)
  To: ffmpeg-devel

This saves one instruction and frees up A5, which will be repurposed in
later changes. Unfortunately, we need to add quite a lot of alternative
code for this.
---
 libavcodec/riscv/vp8dsp_rvv.S | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 545c2e9728..a4fcd158a5 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -202,23 +202,31 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
 1:
         addi            a4, a4, -1
 .ifc \type,v
-        mv              a5, a3
+        sub             t6, a2, a3
+        add             a7, a2, a3
 .else
-        li              a5, 1
+        addi            t6, a2, -1
+        addi            a7, a2, 1
 .endif
-        sub             t6, a2, a5
-        add             a7, a2, a5
-
         vle8.v          v24, (a2)
         vle8.v          v22, (t6)
         vle8.v          v26, (a7)
-        add             a7, a7, a5
+.ifc \type,v
+        add             a7, a7, a3
+.else
+        addi            a7, a7, 1
+.endif
         vle8.v          v28, (a7)
         vwmulu.vx       v16, v24, t2
         vwmulu.vx       v20, v26, t3
 .ifc \size,6
-        sub             t6, t6, a5
-        add             a7, a7, a5
+.ifc \type,v
+        sub             t6, t6, a3
+        add             a7, a7, a3
+.else
+        addi            t6, t6, -1
+        addi            a7, a7, 1
+.endif
         vle8.v          v24, (t6)
         vle8.v          v26, (a7)
         vwmaccu.vx      v16, t0, v24
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths
  2024-05-25 15:38 [FFmpeg-devel] [PATCH 1/5] lavc/vp8dsp: avoid one multiplication on RISC-V Rémi Denis-Courmont
                   ` (2 preceding siblings ...)
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 4/5] lavc/vp8dsp: save one R-V GPR Rémi Denis-Courmont
@ 2024-05-25 15:38 ` Rémi Denis-Courmont
  2024-05-25 18:16   ` flow gg
  3 siblings, 1 reply; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-25 15:38 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavcodec/riscv/vp8dsp_rvv.S | 56 ++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index a4fcd158a5..002e7f3174 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -32,16 +32,6 @@
 .endif
 .endm
 
-.macro vsetvlstatic16 len
-.if \len <= 4
-        vsetivli        zero, \len, e16, mf2, ta, ma
-.elseif \len <= 8
-        vsetivli        zero, \len, e16, m1, ta, ma
-.elseif \len <= 16
-        vsetivli        zero, \len, e16, m2, ta, ma
-.endif
-.endm
-
 .macro vp8_idct_dc_add
         vlse32.v      v0, (a0), a2
         lh            a5, 0(a1)
@@ -181,13 +171,8 @@ const subpel_filters
         .byte 0,  -1,  12, 123,  -6, 0
 endconst
 
-.macro epel len size type
-func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
-.ifc \type,v
-        addi            t0, a6, -1
-.else
-        addi            t0, a5, -1
-.endif
+.macro epel_common size, type
+func ff_put_vp8_epel_\type\()\size\().rvv, zve32x
         lla             t2, subpel_filters
         sh1add          t0, t0, t0
         sh1add          t0, t0, t2
@@ -198,7 +183,6 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
         lb              t5, 5(t0)
         lb              t0, (t0)
 .endif
-        vsetvlstatic8   \len
 1:
         addi            a4, a4, -1
 .ifc \type,v
@@ -236,11 +220,11 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
         vwmaccsu.vx     v16, t1, v22
         vwmaccsu.vx     v16, t4, v28
         vwadd.wx        v16, v16, t6
-        vsetvlstatic16  \len
+        vsetvl          zero, zero, a6 # e16
         vwadd.vv        v24, v16, v20
         vnsra.wi        v24, v24, 7
         vmax.vx         v24, v24, zero
-        vsetvlstatic8   \len
+        vsetvl          zero, zero, a5 # e8
         vnclipu.wi      v30, v24, 0
         add             a2, a2, a3
         vse8.v          v30, (a0)
@@ -251,9 +235,33 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
 endfunc
 .endm
 
+.macro epel len, size, type
+func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
+.ifc \type,v
+        addi    t0, a6, -1
+.else
+        addi    t0, a5, -1
+.endif
+.if \len <= 4
+        li      a5, 0306 # e8, mf4, ta, ma
+        li      a6, 0317 # e16, mf2, ta, ma
+.elseif \len <= 8
+        li      a5, 0307 # e8, mf2, ta, ma
+        li      a6, 0310 # e16, m1, ta, ma
+.else # if len <= 16
+        li      a5, 0300 # e8, m1, ta, ma
+        li      a6, 0311 # e16, m2, ta, ma
+.endif
+        vsetvlstatic8 \len
+        j       ff_put_vp8_epel_\type\()\size\().rvv
+endfunc
+.endm
+
+.irp type,h,v
+.irp size,4,6
+epel_common \size, \type
 .irp len,16,8,4
-epel \len 6 h
-epel \len 4 h
-epel \len 6 v
-epel \len 4 v
+epel \len, \size, \type
+.endr
+.endr
 .endr
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths
  2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths Rémi Denis-Courmont
@ 2024-05-25 18:16   ` flow gg
  2024-05-25 18:29     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 9+ messages in thread
From: flow gg @ 2024-05-25 18:16 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Would it be better to replace the two vsetvlstatic8 and vsetvlstatic16 with
two vsetvl? This would require the previous patch and this one to work
together, increasing the number of lines of code and making the code a bit
harder to read.
Additionally, I have a question about patch 4 'save one R-V GPR' and patch
5. Should they be submitted as a single patch? Because patch 4 looks
similar to what I initially submitted, and you suggested changing it to
save lines of code. If it is only for patch 5, shouldn't they be combined
together?

Rémi Denis-Courmont <remi@remlab.net> 于2024年5月25日周六 23:39写道:

> ---
>  libavcodec/riscv/vp8dsp_rvv.S | 56 ++++++++++++++++++++---------------
>  1 file changed, 32 insertions(+), 24 deletions(-)
>
> diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
> index a4fcd158a5..002e7f3174 100644
> --- a/libavcodec/riscv/vp8dsp_rvv.S
> +++ b/libavcodec/riscv/vp8dsp_rvv.S
> @@ -32,16 +32,6 @@
>  .endif
>  .endm
>
> -.macro vsetvlstatic16 len
> -.if \len <= 4
> -        vsetivli        zero, \len, e16, mf2, ta, ma
> -.elseif \len <= 8
> -        vsetivli        zero, \len, e16, m1, ta, ma
> -.elseif \len <= 16
> -        vsetivli        zero, \len, e16, m2, ta, ma
> -.endif
> -.endm
> -
>  .macro vp8_idct_dc_add
>          vlse32.v      v0, (a0), a2
>          lh            a5, 0(a1)
> @@ -181,13 +171,8 @@ const subpel_filters
>          .byte 0,  -1,  12, 123,  -6, 0
>  endconst
>
> -.macro epel len size type
> -func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
> -.ifc \type,v
> -        addi            t0, a6, -1
> -.else
> -        addi            t0, a5, -1
> -.endif
> +.macro epel_common size, type
> +func ff_put_vp8_epel_\type\()\size\().rvv, zve32x
>          lla             t2, subpel_filters
>          sh1add          t0, t0, t0
>          sh1add          t0, t0, t2
> @@ -198,7 +183,6 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv,
> zve32x
>          lb              t5, 5(t0)
>          lb              t0, (t0)
>  .endif
> -        vsetvlstatic8   \len
>  1:
>          addi            a4, a4, -1
>  .ifc \type,v
> @@ -236,11 +220,11 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv,
> zve32x
>          vwmaccsu.vx     v16, t1, v22
>          vwmaccsu.vx     v16, t4, v28
>          vwadd.wx        v16, v16, t6
> -        vsetvlstatic16  \len
> +        vsetvl          zero, zero, a6 # e16
>          vwadd.vv        v24, v16, v20
>          vnsra.wi        v24, v24, 7
>          vmax.vx         v24, v24, zero
> -        vsetvlstatic8   \len
> +        vsetvl          zero, zero, a5 # e8
>          vnclipu.wi      v30, v24, 0
>          add             a2, a2, a3
>          vse8.v          v30, (a0)
> @@ -251,9 +235,33 @@ func ff_put_vp8_epel\len\()_\type\()\size\()_rvv,
> zve32x
>  endfunc
>  .endm
>
> +.macro epel len, size, type
> +func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
> +.ifc \type,v
> +        addi    t0, a6, -1
> +.else
> +        addi    t0, a5, -1
> +.endif
> +.if \len <= 4
> +        li      a5, 0306 # e8, mf4, ta, ma
> +        li      a6, 0317 # e16, mf2, ta, ma
> +.elseif \len <= 8
> +        li      a5, 0307 # e8, mf2, ta, ma
> +        li      a6, 0310 # e16, m1, ta, ma
> +.else # if len <= 16
> +        li      a5, 0300 # e8, m1, ta, ma
> +        li      a6, 0311 # e16, m2, ta, ma
> +.endif
> +        vsetvlstatic8 \len
> +        j       ff_put_vp8_epel_\type\()\size\().rvv
> +endfunc
> +.endm
> +
> +.irp type,h,v
> +.irp size,4,6
> +epel_common \size, \type
>  .irp len,16,8,4
> -epel \len 6 h
> -epel \len 4 h
> -epel \len 6 v
> -epel \len 4 v
> +epel \len, \size, \type
> +.endr
> +.endr
>  .endr
> --
> 2.45.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths
  2024-05-25 18:16   ` flow gg
@ 2024-05-25 18:29     ` Rémi Denis-Courmont
  2024-05-25 18:56       ` flow gg
  0 siblings, 1 reply; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-25 18:29 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le lauantaina 25. toukokuuta 2024, 21.16.22 EEST flow gg a écrit :
> Would it be better to replace the two vsetvlstatic8 and vsetvlstatic16 with
> two vsetvl?

The other option is to hard-code the most pessimistic multiplier. That would 
be easier to read and save two instructions in the head, it would most likely 
end up slower overall, due to increased latency from the vector unit in the 
main loop.

On the other hand, with vsetvl, we have the option to adjust the multiplier at 
run-time depending on hardware vector size. That will not be possible with 
vsetvli unless we patch the code live (yikes).

> This would require the previous patch and this one to work
> together,

Yes, patch order matters.

> increasing the number of lines of code

This is reducing code size by over 2 kib of code, or several hundreds of 
instructions.

> Additionally, I have a question about patch 4 'save one R-V GPR' and patch
> 5. Should they be submitted as a single patch? Because patch 4 looks
> similar to what I initially submitted, and you suggested changing it to
> save lines of code. If it is only for patch 5, shouldn't they be combined
> together?

I think people here like to have as small and many patches as possible, as is 
generally considered the right way to use Git. Since patch 4 is a very minor 
but still independent (from patch 5) improvement, it should be separate, as 
far as I understand FFmpeg's practices.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths
  2024-05-25 18:29     ` Rémi Denis-Courmont
@ 2024-05-25 18:56       ` flow gg
  2024-05-25 19:15         ` Rémi Denis-Courmont
  0 siblings, 1 reply; 9+ messages in thread
From: flow gg @ 2024-05-25 18:56 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Well, I'm mainly considering that we have added some vset related lines,
but they haven't played a new role for the time being. If it's for future
modifications, it does make sense.

> This is reducing code size by over 2 kib of code, or several hundreds of
instructions.

The reduction in code size seems to be due to switching to using j labels,
doesn't seem to be about vset, but another issue. j labels are indeed
better. I will make similar modifications.

Rémi Denis-Courmont <remi@remlab.net> 于2024年5月26日周日 02:29写道:

> Le lauantaina 25. toukokuuta 2024, 21.16.22 EEST flow gg a écrit :
> > Would it be better to replace the two vsetvlstatic8 and vsetvlstatic16
> with
> > two vsetvl?
>
> The other option is to hard-code the most pessimistic multiplier. That
> would
> be easier to read and save two instructions in the head, it would most
> likely
> end up slower overall, due to increased latency from the vector unit in
> the
> main loop.
>
> On the other hand, with vsetvl, we have the option to adjust the
> multiplier at
> run-time depending on hardware vector size. That will not be possible with
> vsetvli unless we patch the code live (yikes).
>
> > This would require the previous patch and this one to work
> > together,
>
> Yes, patch order matters.
>
> > increasing the number of lines of code
>
> This is reducing code size by over 2 kib of code, or several hundreds of
> instructions.
>
> > Additionally, I have a question about patch 4 'save one R-V GPR' and
> patch
> > 5. Should they be submitted as a single patch? Because patch 4 looks
> > similar to what I initially submitted, and you suggested changing it to
> > save lines of code. If it is only for patch 5, shouldn't they be combined
> > together?
>
> I think people here like to have as small and many patches as possible, as
> is
> generally considered the right way to use Git. Since patch 4 is a very
> minor
> but still independent (from patch 5) improvement, it should be separate,
> as
> far as I understand FFmpeg's practices.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths
  2024-05-25 18:56       ` flow gg
@ 2024-05-25 19:15         ` Rémi Denis-Courmont
  0 siblings, 0 replies; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-25 19:15 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le lauantaina 25. toukokuuta 2024, 21.56.58 EEST flow gg a écrit :
> The reduction in code size seems to be due to switching to using j labels,
> doesn't seem to be about vset, but another issue. j labels are indeed
> better. I will make similar modifications.

If we don't use vsetvl, then we have to hard-code the worst-case multipler for 
all lengths, which should be slower. That or we can't share the code with 
jumps at all.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-05-25 19:15 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-25 15:38 [FFmpeg-devel] [PATCH 1/5] lavc/vp8dsp: avoid one multiplication on RISC-V Rémi Denis-Courmont
2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 2/5] lavc/vp8dsp: expand single use R-V macros Rémi Denis-Courmont
2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 3/5] lavc/vp8dsp: factor R-V V bilin functions Rémi Denis-Courmont
2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 4/5] lavc/vp8dsp: save one R-V GPR Rémi Denis-Courmont
2024-05-25 15:38 ` [FFmpeg-devel] [PATCH 5/5] lavc/vp8dsp: factor R-V V EPEL functions for all lengths Rémi Denis-Courmont
2024-05-25 18:16   ` flow gg
2024-05-25 18:29     ` Rémi Denis-Courmont
2024-05-25 18:56       ` flow gg
2024-05-25 19:15         ` Rémi Denis-Courmont

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git