Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v
@ 2024-03-22  6:01 flow gg
  2024-03-27 15:36 ` Rémi Denis-Courmont
  0 siblings, 1 reply; 3+ messages in thread
From: flow gg @ 2024-03-22  6:01 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0002-lavc-vp8dsp-R-V-V-put_epel-v.patch --]
[-- Type: text/x-patch, Size: 4379 bytes --]

From a59509c554a319f8271ad4175da40788445f7a56 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Thu, 21 Mar 2024 17:49:54 +0800
Subject: [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v

C908:
vp8_put_epel4_v4_c: 11.0
vp8_put_epel4_v4_rvv_i32: 5.0
vp8_put_epel4_v6_c: 16.5
vp8_put_epel4_v6_rvv_i32: 6.2
vp8_put_epel8_v4_c: 43.7
vp8_put_epel8_v4_rvv_i32: 11.2
vp8_put_epel8_v6_c: 68.7
vp8_put_epel8_v6_rvv_i32: 13.2
vp8_put_epel16_v4_c: 92.5
vp8_put_epel16_v4_rvv_i32: 13.7
vp8_put_epel16_v6_c: 135.7
vp8_put_epel16_v6_rvv_i32: 16.5
---
 libavcodec/riscv/vp8dsp_init.c |  7 ++++++
 libavcodec/riscv/vp8dsp_rvv.S  | 44 +++++++++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index 6614d661f7..2f123b67fe 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -85,6 +85,13 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
         c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv;
         c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv;
         c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv;
+
+        c->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_rvv;
+        c->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_rvv;
+        c->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_rvv;
+        c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv;
+        c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv;
+        c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index a0dd46e3a8..134154acfc 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -233,9 +233,13 @@ subpel_filters:
         .byte 1,  -8,  36, 108, -11, 2
         .byte 0,  -1,  12, 123,  -6, 0
 
-.macro epel_filter size
+.macro epel_filter size type
         lla             t2, subpel_filters
+.ifc \type,v
+        addi            t0, a6, -1
+.elseif \type == h
         addi            t0, a5, -1
+.endif
         li              t1, 6
         mul             t0, t0, t1
         add             t0, t0, t2
@@ -248,19 +252,33 @@ subpel_filters:
 .endif
 .endm
 
-.macro epel_load dst len size
+.macro epel_load dst len size type
+.ifc \type,v
+        sub             t6, a2, a3
+        add             a7, a2, a3
+.elseif \type == h
         addi            t6, a2, -1
         addi            a7, a2, 1
+.endif
         vle8.v          v24, (a2)
         vle8.v          v22, (t6)
         vle8.v          v26, (a7)
+.ifc \type,v
+        add             a7, a7, a3
+.elseif \type == h
         addi            a7, a7, 1
+.endif
         vle8.v          v28, (a7)
         vwmulu.vx       v16, v24, t2
         vwmulu.vx       v20, v26, t3
 .ifc \size,6
+.ifc \type,v
+        sub             t6, t6, a3
+        add             a7, a7, a3
+.elseif \type == h
         addi            t6, t6, -1
         addi            a7, a7, 1
+.endif
         vle8.v          v24, (t6)
         vle8.v          v26, (a7)
         vwmaccu.vx      v16, t0, v24
@@ -292,13 +310,13 @@ subpel_filters:
         vnclipu.wi      \dst, v24, 0
 .endm
 
-.macro epel_load_inc dst len size
-        epel_load       \dst \len \size
+.macro epel_load_inc dst len size type
+        epel_load       \dst \len \size \type
         add             a2, a2, a3
 .endm
 
-.macro epel len size
-        epel_filter     \size
+.macro epel len size type
+        epel_filter     \size \type
 
 .ifc \len,4
         vsetivli        zero, 4, e8, mf4, ta, ma
@@ -310,7 +328,7 @@ subpel_filters:
 
 1:
         addi            a4, a4, -1
-        epel_load_inc   v30 \len \size
+        epel_load_inc   v30 \len \size \type
         vse8.v          v30, (a0)
         add             a0, a0, a1
         bnez            a4, 1b
@@ -320,10 +338,18 @@ subpel_filters:
 
 .irp len 16,8,4
 func ff_put_vp8_epel\len\()_h6_rvv, zve32x
-        epel \len 6
+        epel \len 6 h
 endfunc
 
 func ff_put_vp8_epel\len\()_h4_rvv, zve32x
-        epel \len 4
+        epel \len 4 h
+endfunc
+
+func ff_put_vp8_epel\len\()_v6_rvv, zve32x
+        epel \len 6 v
+endfunc
+
+func ff_put_vp8_epel\len\()_v4_rvv, zve32x
+        epel \len 4 v
 endfunc
 .endr
-- 
2.44.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v
  2024-03-22  6:01 [FFmpeg-devel] [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v flow gg
@ 2024-03-27 15:36 ` Rémi Denis-Courmont
  2024-03-28  2:16   ` flow gg
  0 siblings, 1 reply; 3+ messages in thread
From: Rémi Denis-Courmont @ 2024-03-27 15:36 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le perjantaina 22. maaliskuuta 2024, 8.01.21 EET flow gg a écrit :
> 

IMO, you could just as well share the code and avoid most if's. Not like one 
additional `li a3, 1` per function call is going to matter in the grand scheme 
of things. It might even help by reducing I-cache pressure.

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v
  2024-03-27 15:36 ` Rémi Denis-Courmont
@ 2024-03-28  2:16   ` flow gg
  0 siblings, 0 replies; 3+ messages in thread
From: flow gg @ 2024-03-28  2:16 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1066 bytes --]

Okay, changed in the reply and github (another reason for not doing so
initially was the thought that there weren't enough registers available,
and that other changes would need to be made that could cause side effects,
but now it's found that the vp8 registers are sufficient.. it's just that
vp9 doesn't have enough)

Rémi Denis-Courmont <remi@remlab.net> 于2024年3月27日周三 23:36写道:

> Le perjantaina 22. maaliskuuta 2024, 8.01.21 EET flow gg a écrit :
> >
>
> IMO, you could just as well share the code and avoid most if's. Not like
> one
> additional `li a3, 1` per function call is going to matter in the grand
> scheme
> of things. It might even help by reducing I-cache pressure.
>
> --
> 雷米‧德尼-库尔蒙
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>

[-- Attachment #2: 0002-lavc-vp8dsp-R-V-V-put_epel-v.patch --]
[-- Type: text/x-patch, Size: 4308 bytes --]

From 920e5274b9fb98fc1ac97d0644a9bb7c890e8f39 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Thu, 21 Mar 2024 17:49:54 +0800
Subject: [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v

C908:
vp8_put_epel4_v4_c: 11.0
vp8_put_epel4_v4_rvv_i32: 5.0
vp8_put_epel4_v6_c: 16.5
vp8_put_epel4_v6_rvv_i32: 6.2
vp8_put_epel8_v4_c: 43.7
vp8_put_epel8_v4_rvv_i32: 11.2
vp8_put_epel8_v6_c: 68.7
vp8_put_epel8_v6_rvv_i32: 13.2
vp8_put_epel16_v4_c: 92.5
vp8_put_epel16_v4_rvv_i32: 13.7
vp8_put_epel16_v6_c: 135.7
vp8_put_epel16_v6_rvv_i32: 16.5
---
 libavcodec/riscv/vp8dsp_init.c |  7 ++++++
 libavcodec/riscv/vp8dsp_rvv.S  | 46 +++++++++++++++++++++++-----------
 2 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/libavcodec/riscv/vp8dsp_init.c b/libavcodec/riscv/vp8dsp_init.c
index 6614d661f7..2f123b67fe 100644
--- a/libavcodec/riscv/vp8dsp_init.c
+++ b/libavcodec/riscv/vp8dsp_init.c
@@ -85,6 +85,13 @@ av_cold void ff_vp78dsp_init_riscv(VP8DSPContext *c)
         c->put_vp8_epel_pixels_tab[0][0][1] = ff_put_vp8_epel16_h4_rvv;
         c->put_vp8_epel_pixels_tab[1][0][1] = ff_put_vp8_epel8_h4_rvv;
         c->put_vp8_epel_pixels_tab[2][0][1] = ff_put_vp8_epel4_h4_rvv;
+
+        c->put_vp8_epel_pixels_tab[0][2][0] = ff_put_vp8_epel16_v6_rvv;
+        c->put_vp8_epel_pixels_tab[1][2][0] = ff_put_vp8_epel8_v6_rvv;
+        c->put_vp8_epel_pixels_tab[2][2][0] = ff_put_vp8_epel4_v6_rvv;
+        c->put_vp8_epel_pixels_tab[0][1][0] = ff_put_vp8_epel16_v4_rvv;
+        c->put_vp8_epel_pixels_tab[1][1][0] = ff_put_vp8_epel8_v4_rvv;
+        c->put_vp8_epel_pixels_tab[2][1][0] = ff_put_vp8_epel4_v4_rvv;
     }
 #endif
 }
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index 84e8ec61de..440a965ddd 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -234,9 +234,13 @@ const subpel_filters
         .byte 0,  -1,  12, 123,  -6, 0
 endconst
 
-.macro epel_filter size
+.macro epel_filter size type
         lla             t2, subpel_filters
+.ifc \type,v
+        addi            t0, a6, -1
+.elseif \type == h
         addi            t0, a5, -1
+.endif
         li              t1, 6
         mul             t0, t0, t1
         add             t0, t0, t2
@@ -249,19 +253,25 @@ endconst
 .endif
 .endm
 
-.macro epel_load dst len size
-        addi            t6, a2, -1
-        addi            a7, a2, 1
+.macro epel_load dst len size type
+.ifc \type,v
+        mv              a5, a3
+.else
+        li              a5, 1
+.endif
+        sub             t6, a2, a5
+        add             a7, a2, a5
+.if \from_mem
         vle8.v          v24, (a2)
         vle8.v          v22, (t6)
         vle8.v          v26, (a7)
-        addi            a7, a7, 1
+        add             a7, a7, a5
         vle8.v          v28, (a7)
         vwmulu.vx       v16, v24, t2
         vwmulu.vx       v20, v26, t3
 .ifc \size,6
-        addi            t6, t6, -1
-        addi            a7, a7, 1
+        sub             t6, t6, a5
+        add             a7, a7, a5
         vle8.v          v24, (t6)
         vle8.v          v26, (a7)
         vwmaccu.vx      v16, t0, v24
@@ -293,13 +303,13 @@ endconst
         vnclipu.wi      \dst, v24, 0
 .endm
 
-.macro epel_load_inc dst len size
-        epel_load       \dst \len \size
+.macro epel_load_inc dst len size type
+        epel_load       \dst \len \size \type
         add             a2, a2, a3
 .endm
 
-.macro epel len size
-        epel_filter     \size
+.macro epel len size type
+        epel_filter     \size \type
 
 .ifc \len,4
         vsetivli        zero, 4, e8, mf4, ta, ma
@@ -311,7 +321,7 @@ endconst
 
 1:
         addi            a4, a4, -1
-        epel_load_inc   v30 \len \size
+        epel_load_inc   v30 \len \size \type
         vse8.v          v30, (a0)
         add             a0, a0, a1
         bnez            a4, 1b
@@ -321,10 +331,18 @@ endconst
 
 .irp len 16,8,4
 func ff_put_vp8_epel\len\()_h6_rvv, zve32x
-        epel \len 6
+        epel \len 6 h
 endfunc
 
 func ff_put_vp8_epel\len\()_h4_rvv, zve32x
-        epel \len 4
+        epel \len 4 h
+endfunc
+
+func ff_put_vp8_epel\len\()_v6_rvv, zve32x
+        epel \len 6 v
+endfunc
+
+func ff_put_vp8_epel\len\()_v4_rvv, zve32x
+        epel \len 4 v
 endfunc
 .endr
-- 
2.44.0


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-03-28  2:16 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-22  6:01 [FFmpeg-devel] [PATCH 2/3] lavc/vp8dsp: R-V V put_epel v flow gg
2024-03-27 15:36 ` Rémi Denis-Courmont
2024-03-28  2:16   ` flow gg

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git