Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/2] lavu/riscv: assembler macros for VTYPE fields
@ 2024-05-14 19:35 Rémi Denis-Courmont
  2024-05-14 19:35 ` [FFmpeg-devel] [PATCH 2/2] lavc/flacdsp: optimise RVV vector type for lpc16 Rémi Denis-Courmont
  0 siblings, 1 reply; 3+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-14 19:35 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavutil/riscv/asm.S | 48 +++++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index 14be5055f5..ecf3081e61 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -96,20 +96,38 @@
         .endm
 #endif
 
+#define VTYPE_E8   000
+#define VTYPE_E16  010
+#define VTYPE_E32  020
+#define VTYPE_E64  030
+
+#define VTYPE_MF8   05
+#define VTYPE_MF4   06
+#define VTYPE_MF2   07
+#define VTYPE_M1    00
+#define VTYPE_M2    01
+#define VTYPE_M4    02
+#define VTYPE_M8    03
+
+#define VTYPE_TU  0000
+#define VTYPE_TA  0100
+#define VTYPE_MU  0000
+#define VTYPE_MA  0200
+
         /* Convenience macro to load a Vector type (vtype) as immediate */
         .macro  lvtypei rd, e, m=m1, tp=tu, mp=mu
 
         .ifc \e,e8
-        .equ ei, 0
+        .equ ei, VTYPE_E8
         .else
         .ifc \e,e16
-        .equ ei, 8
+        .equ ei, VTYPE_E16
         .else
         .ifc \e,e32
-        .equ ei, 16
+        .equ ei, VTYPE_E32
         .else
         .ifc \e,e64
-        .equ ei, 24
+        .equ ei, VTYPE_E64
         .else
         .error "Unknown element type"
         .endif
@@ -118,25 +136,25 @@
         .endif
 
         .ifc \m,m1
-        .equ mi, 0
+        .equ mi, VTYPE_M1
         .else
         .ifc \m,m2
-        .equ mi, 1
+        .equ mi, VTYPE_M2
         .else
         .ifc \m,m4
-        .equ mi, 2
+        .equ mi, VTYPE_M4
         .else
         .ifc \m,m8
-        .equ mi, 3
+        .equ mi, VTYPE_M8
         .else
         .ifc \m,mf8
-        .equ mi, 5
+        .equ mi, VTYPE_MF8
         .else
         .ifc \m,mf4
-        .equ mi, 6
+        .equ mi, VTYPE_MF4
         .else
         .ifc \m,mf2
-        .equ mi, 7
+        .equ mi, VTYPE_MF2
         .else
         .error "Unknown multiplier"
         .equ mi, 3
@@ -149,20 +167,20 @@
         .endif
 
         .ifc \tp,tu
-        .equ tpi, 0
+        .equ tpi, VTYPE_TU
         .else
         .ifc \tp,ta
-        .equ tpi, 64
+        .equ tpi, VTYPE_TA
         .else
         .error "Unknown tail policy"
         .endif
         .endif
 
         .ifc \mp,mu
-        .equ mpi, 0
+        .equ mpi, VTYPE_MU
         .else
         .ifc \mp,ma
-        .equ mpi, 128
+        .equ mpi, VTYPE_MA
         .else
         .error "Unknown mask policy"
         .endif
-- 
2.43.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [FFmpeg-devel] [PATCH 2/2] lavc/flacdsp: optimise RVV vector type for lpc16
  2024-05-14 19:35 [FFmpeg-devel] [PATCH 1/2] lavu/riscv: assembler macros for VTYPE fields Rémi Denis-Courmont
@ 2024-05-14 19:35 ` Rémi Denis-Courmont
  2024-05-14 19:42   ` Rémi Denis-Courmont
  0 siblings, 1 reply; 3+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-14 19:35 UTC (permalink / raw)
  To: ffmpeg-devel

This calculates the optimal vector type value at run-time based on the
hardware vector length and the FLAC LPC prediction order. In this
particular case, the additional computation is easily amortised over
the loop iterations:

T-Head C908:       C        V before   V after
flac_lpc_16_13:     14180.2  11229.0     7338.5
flac_lpc_16_16:     16833.2  11091.0     7248.5
flac_lpc_16_29:     28817.2  11455.7    10506.5
flac_lpc_16_32:     31059.7  10368.5    11305.2

With 128-bit vectors, improvements are expected for the first two
test cases only. For the other two, there is overhead but below noise.
Improvements should be better observable with prediction order of 8
and less, or on hardware with larger vector sizes.

The same optimisation strategy should be applicable to LPC32
(and work-in-progress LPC33), but is left as a future exercise.
---
 libavcodec/riscv/flacdsp_init.c |  2 +-
 libavcodec/riscv/flacdsp_rvv.S  | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c
index 77ffd09244..097f938f04 100644
--- a/libavcodec/riscv/flacdsp_init.c
+++ b/libavcodec/riscv/flacdsp_init.c
@@ -71,7 +71,7 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
     if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
         int vlenb = ff_get_rv_vlenb();
 
-        if (vlenb >= 16)
+        if ((flags & AV_CPU_FLAG_RVB_BASIC) && vlenb >= 16)
             c->lpc16 = ff_flac_lpc16_rvv;
 
         c->wasted32 = ff_flac_wasted32_rvv;
diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
index 8b9c626198..42cece9786 100644
--- a/libavcodec/riscv/flacdsp_rvv.S
+++ b/libavcodec/riscv/flacdsp_rvv.S
@@ -20,8 +20,14 @@
 
 #include "libavutil/riscv/asm.S"
 
-func ff_flac_lpc16_rvv, zve32x
-        vsetvli zero, a2, e32, m8, ta, ma
+func ff_flac_lpc16_rvv, zve32x, zbb
+        csrr    t0, vlenb
+        addi    t2, a2, -1
+        clz     t0, t0
+        clz     t2, t2
+        addi    t0, t0, VTYPE_E32 | VTYPE_M8 | VTYPE_TA | VTYPE_MA
+        sub     t0, t0, t2 // t0 += log2(next_power_of_two(len) / vlenb) - 1
+        vsetvl  zero, a2, t0
         vle32.v v8, (a1)
         sub     a4, a4, a2
         vle32.v v16, (a0)
-- 
2.43.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] lavc/flacdsp: optimise RVV vector type for lpc16
  2024-05-14 19:35 ` [FFmpeg-devel] [PATCH 2/2] lavc/flacdsp: optimise RVV vector type for lpc16 Rémi Denis-Courmont
@ 2024-05-14 19:42   ` Rémi Denis-Courmont
  0 siblings, 0 replies; 3+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-14 19:42 UTC (permalink / raw)
  To: ffmpeg-devel

Le tiistaina 14. toukokuuta 2024, 22.35.57 EEST Rémi Denis-Courmont a écrit :
> This calculates the optimal vector type value at run-time based on the
> hardware vector length and the FLAC LPC prediction order. In this
> particular case, the additional computation is easily amortised over
> the loop iterations:
> 
> T-Head C908:       C        V before   V after
> flac_lpc_16_13:     14180.2  11229.0     7338.5
> flac_lpc_16_16:     16833.2  11091.0     7248.5
> flac_lpc_16_29:     28817.2  11455.7    10506.5
> flac_lpc_16_32:     31059.7  10368.5    11305.2
> 
> With 128-bit vectors, improvements are expected for the first two
> test cases only. For the other two, there is overhead but below noise.
> Improvements should be better observable with prediction order of 8
> and less, or on hardware with larger vector sizes.
> 
> The same optimisation strategy should be applicable to LPC32
> (and work-in-progress LPC33), but is left as a future exercise.
> ---
>  libavcodec/riscv/flacdsp_init.c |  2 +-
>  libavcodec/riscv/flacdsp_rvv.S  | 10 ++++++++--
>  2 files changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/libavcodec/riscv/flacdsp_init.c
> b/libavcodec/riscv/flacdsp_init.c index 77ffd09244..097f938f04 100644
> --- a/libavcodec/riscv/flacdsp_init.c
> +++ b/libavcodec/riscv/flacdsp_init.c
> @@ -71,7 +71,7 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum
> AVSampleFormat fmt, if ((flags & AV_CPU_FLAG_RVV_I32) && (flags &
> AV_CPU_FLAG_RVB_ADDR)) { int vlenb = ff_get_rv_vlenb();
> 
> -        if (vlenb >= 16)
> +        if ((flags & AV_CPU_FLAG_RVB_BASIC) && vlenb >= 16)
>              c->lpc16 = ff_flac_lpc16_rvv;
> 
>          c->wasted32 = ff_flac_wasted32_rvv;
> diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
> index 8b9c626198..42cece9786 100644
> --- a/libavcodec/riscv/flacdsp_rvv.S
> +++ b/libavcodec/riscv/flacdsp_rvv.S
> @@ -20,8 +20,14 @@
> 
>  #include "libavutil/riscv/asm.S"
> 
> -func ff_flac_lpc16_rvv, zve32x
> -        vsetvli zero, a2, e32, m8, ta, ma
> +func ff_flac_lpc16_rvv, zve32x, zbb
> +        csrr    t0, vlenb
> +        addi    t2, a2, -1
> +        clz     t0, t0
> +        clz     t2, t2
> +        addi    t0, t0, VTYPE_E32 | VTYPE_M8 | VTYPE_TA | VTYPE_MA
> +        sub     t0, t0, t2 // t0 += log2(next_power_of_two(len) / vlenb) -
> 1

Ok so checkasm can't sense it since we don't test that,
but I guess that this might crash due to illegal vector configuration if
- pred_order <= 2 with 128-bit vectors,
- pred_order <= 4 with 256-bit vectors,
- and so on.

This needs a little bit more work.

> +        vsetvl  zero, a2, t0
>          vle32.v v8, (a1)
>          sub     a4, a4, a2
>          vle32.v v16, (a0)


-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-05-14 19:42 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-14 19:35 [FFmpeg-devel] [PATCH 1/2] lavu/riscv: assembler macros for VTYPE fields Rémi Denis-Courmont
2024-05-14 19:35 ` [FFmpeg-devel] [PATCH 2/2] lavc/flacdsp: optimise RVV vector type for lpc16 Rémi Denis-Courmont
2024-05-14 19:42   ` Rémi Denis-Courmont

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git