Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
@ 2024-05-13 16:59 uk7b
  2024-05-13 17:01 ` flow gg
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: uk7b @ 2024-05-13 16:59 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: sunyuechi

From: sunyuechi <sunyuechi@iscas.ac.cn>

C908:
vp9_vert_8x8_8bpp_c: 22.0
vp9_vert_8x8_8bpp_rvi: 15.7
vp9_vert_16x16_8bpp_c: 71.2
vp9_vert_16x16_8bpp_rvi: 39.0
vp9_vert_32x32_8bpp_c: 300.2
vp9_vert_32x32_8bpp_rvi: 135.2
---
 libavcodec/riscv/Makefile        |  1 +
 libavcodec/riscv/vp9_intra_rvi.S | 71 ++++++++++++++++++++++++++++++++
 libavcodec/riscv/vp9dsp.h        |  6 +++
 libavcodec/riscv/vp9dsp_init.c   | 15 +++++--
 4 files changed, 90 insertions(+), 3 deletions(-)
 create mode 100644 libavcodec/riscv/vp9_intra_rvi.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 89273b1cad..ccd060c666 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -62,6 +62,7 @@ OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
 RV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvi.o
 RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
 OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o
 RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
 OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
 RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S
new file mode 100644
index 0000000000..16b6bdb25a
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvi.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+#if __riscv_xlen >= 64
+func ff_v_32x32_rvi
+        ld           t0, (a3)
+        ld           t1, 8(a3)
+        ld           t2, 16(a3)
+        ld           t3, 24(a3)
+        .rept 16
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sd           t1, 8(a0)
+        sd           t2, 16(a0)
+        sd           t3, 24(a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        sd           t1, 8(a7)
+        sd           t2, 16(a7)
+        sd           t3, 24(a7)
+        .endr
+
+        ret
+endfunc
+
+func ff_v_16x16_rvi
+        ld           t0, (a3)
+        ld           t1, 8(a3)
+        .rept 8
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sd           t1, 8(a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        sd           t1, 8(a7)
+        .endr
+
+        ret
+endfunc
+
+func ff_v_8x8_rvi
+        ld           t0, (a3)
+        .rept 4
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        .endr
+
+        ret
+endfunc
+#endif
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
index 25047ed507..f8bc6563a5 100644
--- a/libavcodec/riscv/vp9dsp.h
+++ b/libavcodec/riscv/vp9dsp.h
@@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
                          const uint8_t *a);
 void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
                        const uint8_t *a);
+void ff_v_32x32_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_16x16_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_8x8_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                  const uint8_t *a);
 
 #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)                         \
 void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride,   \
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index dd418bd5bf..0f64afc6d2 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -24,11 +24,19 @@
 #include "libavcodec/vp9dsp.h"
 #include "vp9dsp.h"
 
-static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
+static av_cold void vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int bpp)
 {
-#if HAVE_RVV
+#if HAVE_RV
     int flags = av_get_cpu_flags();
 
+    if (bpp == 8 && (flags & AV_CPU_FLAG_RV_MISALIGNED) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
+# if __riscv_xlen >= 64
+        dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvi;
+        dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvi;
+        dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvi;
+# endif
+    }
+#if HAVE_RVV
     if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_rv_vlen_least(128)) {
         dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
         dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
@@ -53,9 +61,10 @@ static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
         dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
     }
 #endif
+#endif
 }
 
 av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
 {
-    vp9dsp_intrapred_init_rvv(dsp, bpp);
+    vp9dsp_intrapred_init_riscv(dsp, bpp);
 }
-- 
2.45.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
  2024-05-13 16:59 [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert uk7b
@ 2024-05-13 17:01 ` flow gg
  2024-05-13 19:47 ` Rémi Denis-Courmont
  2024-05-13 19:53 ` Rémi Denis-Courmont
  2 siblings, 0 replies; 9+ messages in thread
From: flow gg @ 2024-05-13 17:01 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

just rebase

<uk7b@foxmail.com> 于2024年5月14日周二 01:00写道:

> From: sunyuechi <sunyuechi@iscas.ac.cn>
>
> C908:
> vp9_vert_8x8_8bpp_c: 22.0
> vp9_vert_8x8_8bpp_rvi: 15.7
> vp9_vert_16x16_8bpp_c: 71.2
> vp9_vert_16x16_8bpp_rvi: 39.0
> vp9_vert_32x32_8bpp_c: 300.2
> vp9_vert_32x32_8bpp_rvi: 135.2
> ---
>  libavcodec/riscv/Makefile        |  1 +
>  libavcodec/riscv/vp9_intra_rvi.S | 71 ++++++++++++++++++++++++++++++++
>  libavcodec/riscv/vp9dsp.h        |  6 +++
>  libavcodec/riscv/vp9dsp_init.c   | 15 +++++--
>  4 files changed, 90 insertions(+), 3 deletions(-)
>  create mode 100644 libavcodec/riscv/vp9_intra_rvi.S
>
> diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> index 89273b1cad..ccd060c666 100644
> --- a/libavcodec/riscv/Makefile
> +++ b/libavcodec/riscv/Makefile
> @@ -62,6 +62,7 @@ OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
>  RV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvi.o
>  RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
>  OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
> +RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o
>  RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
>  OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
>  RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
> diff --git a/libavcodec/riscv/vp9_intra_rvi.S
> b/libavcodec/riscv/vp9_intra_rvi.S
> new file mode 100644
> index 0000000000..16b6bdb25a
> --- /dev/null
> +++ b/libavcodec/riscv/vp9_intra_rvi.S
> @@ -0,0 +1,71 @@
> +/*
> + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> (ISCAS).
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +#if __riscv_xlen >= 64
> +func ff_v_32x32_rvi
> +        ld           t0, (a3)
> +        ld           t1, 8(a3)
> +        ld           t2, 16(a3)
> +        ld           t3, 24(a3)
> +        .rept 16
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sd           t1, 8(a0)
> +        sd           t2, 16(a0)
> +        sd           t3, 24(a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        sd           t1, 8(a7)
> +        sd           t2, 16(a7)
> +        sd           t3, 24(a7)
> +        .endr
> +
> +        ret
> +endfunc
> +
> +func ff_v_16x16_rvi
> +        ld           t0, (a3)
> +        ld           t1, 8(a3)
> +        .rept 8
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sd           t1, 8(a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        sd           t1, 8(a7)
> +        .endr
> +
> +        ret
> +endfunc
> +
> +func ff_v_8x8_rvi
> +        ld           t0, (a3)
> +        .rept 4
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        .endr
> +
> +        ret
> +endfunc
> +#endif
> diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
> index 25047ed507..f8bc6563a5 100644
> --- a/libavcodec/riscv/vp9dsp.h
> +++ b/libavcodec/riscv/vp9dsp.h
> @@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t
> stride, const uint8_t *l,
>                           const uint8_t *a);
>  void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
>                         const uint8_t *a);
> +void ff_v_32x32_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_16x16_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_8x8_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                  const uint8_t *a);
>
>  #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)
>      \
>  void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t
> dststride,   \
> diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c
> index dd418bd5bf..0f64afc6d2 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -24,11 +24,19 @@
>  #include "libavcodec/vp9dsp.h"
>  #include "vp9dsp.h"
>
> -static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
> +static av_cold void vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int
> bpp)
>  {
> -#if HAVE_RVV
> +#if HAVE_RV
>      int flags = av_get_cpu_flags();
>
> +    if (bpp == 8 && (flags & AV_CPU_FLAG_RV_MISALIGNED) && (flags &
> AV_CPU_FLAG_RVB_ADDR)) {
> +# if __riscv_xlen >= 64
> +        dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvi;
> +        dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvi;
> +        dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvi;
> +# endif
> +    }
> +#if HAVE_RVV
>      if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_rv_vlen_least(128))
> {
>          dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
>          dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
> @@ -53,9 +61,10 @@ static av_cold void
> vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
>          dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
>      }
>  #endif
> +#endif
>  }
>
>  av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int
> bitexact)
>  {
> -    vp9dsp_intrapred_init_rvv(dsp, bpp);
> +    vp9dsp_intrapred_init_riscv(dsp, bpp);
>  }
> --
> 2.45.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
  2024-05-13 16:59 [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert uk7b
  2024-05-13 17:01 ` flow gg
@ 2024-05-13 19:47 ` Rémi Denis-Courmont
  2024-05-13 19:53 ` Rémi Denis-Courmont
  2 siblings, 0 replies; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-13 19:47 UTC (permalink / raw)
  To: ffmpeg-devel

Le maanantaina 13. toukokuuta 2024, 19.59.18 EEST uk7b@foxmail.com a écrit :
> From: sunyuechi <sunyuechi@iscas.ac.cn>
> 
> C908:
> vp9_vert_8x8_8bpp_c: 22.0
> vp9_vert_8x8_8bpp_rvi: 15.7
> vp9_vert_16x16_8bpp_c: 71.2
> vp9_vert_16x16_8bpp_rvi: 39.0
> vp9_vert_32x32_8bpp_c: 300.2
> vp9_vert_32x32_8bpp_rvi: 135.2
> ---
>  libavcodec/riscv/Makefile        |  1 +
>  libavcodec/riscv/vp9_intra_rvi.S | 71 ++++++++++++++++++++++++++++++++
>  libavcodec/riscv/vp9dsp.h        |  6 +++
>  libavcodec/riscv/vp9dsp_init.c   | 15 +++++--
>  4 files changed, 90 insertions(+), 3 deletions(-)
>  create mode 100644 libavcodec/riscv/vp9_intra_rvi.S
> 
> diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> index 89273b1cad..ccd060c666 100644
> --- a/libavcodec/riscv/Makefile
> +++ b/libavcodec/riscv/Makefile
> @@ -62,6 +62,7 @@ OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
>  RV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvi.o
>  RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
>  OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
> +RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o
>  RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
>  OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
>  RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
> diff --git a/libavcodec/riscv/vp9_intra_rvi.S
> b/libavcodec/riscv/vp9_intra_rvi.S new file mode 100644
> index 0000000000..16b6bdb25a
> --- /dev/null
> +++ b/libavcodec/riscv/vp9_intra_rvi.S
> @@ -0,0 +1,71 @@
> +/*
> + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> (ISCAS). + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> USA + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +#if __riscv_xlen >= 64
> +func ff_v_32x32_rvi
> +        ld           t0, (a3)
> +        ld           t1, 8(a3)
> +        ld           t2, 16(a3)
> +        ld           t3, 24(a3)
> +        .rept 16
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sd           t1, 8(a0)
> +        sd           t2, 16(a0)
> +        sd           t3, 24(a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        sd           t1, 8(a7)
> +        sd           t2, 16(a7)
> +        sd           t3, 24(a7)
> +        .endr
> +
> +        ret
> +endfunc
> +
> +func ff_v_16x16_rvi
> +        ld           t0, (a3)
> +        ld           t1, 8(a3)
> +        .rept 8
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sd           t1, 8(a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        sd           t1, 8(a7)
> +        .endr
> +
> +        ret
> +endfunc
> +
> +func ff_v_8x8_rvi
> +        ld           t0, (a3)
> +        .rept 4
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        .endr
> +
> +        ret
> +endfunc
> +#endif
> diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
> index 25047ed507..f8bc6563a5 100644
> --- a/libavcodec/riscv/vp9dsp.h
> +++ b/libavcodec/riscv/vp9dsp.h
> @@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride,
> const uint8_t *l, const uint8_t *a);
>  void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
>                         const uint8_t *a);
> +void ff_v_32x32_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_16x16_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_8x8_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                  const uint8_t *a);
> 
>  #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)                      
>   \ void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t
> dststride,   \ diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c index dd418bd5bf..0f64afc6d2 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -24,11 +24,19 @@
>  #include "libavcodec/vp9dsp.h"
>  #include "vp9dsp.h"
> 
> -static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
> +static av_cold void vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int
> bpp) {
> -#if HAVE_RVV
> +#if HAVE_RV
>      int flags = av_get_cpu_flags();
> 
> +    if (bpp == 8 && (flags & AV_CPU_FLAG_RV_MISALIGNED) && (flags &
> AV_CPU_FLAG_RVB_ADDR)) { +# if __riscv_xlen >= 64
> +        dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvi;
> +        dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvi;
> +        dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvi;
> +# endif

AFAICT, those are aligned accesses? At least in checkasm, they are?

> +    }
> +#if HAVE_RVV
>      if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_rv_vlen_least(128)) {
> dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
>          dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
> @@ -53,9 +61,10 @@ static av_cold void
> vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
> dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv; }
>  #endif
> +#endif
>  }
> 
>  av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int
> bitexact) {
> -    vp9dsp_intrapred_init_rvv(dsp, bpp);
> +    vp9dsp_intrapred_init_riscv(dsp, bpp);
>  }


-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
  2024-05-13 16:59 [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert uk7b
  2024-05-13 17:01 ` flow gg
  2024-05-13 19:47 ` Rémi Denis-Courmont
@ 2024-05-13 19:53 ` Rémi Denis-Courmont
  2024-05-14  4:45   ` flow gg
  2 siblings, 1 reply; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-13 19:53 UTC (permalink / raw)
  To: ffmpeg-devel

Le maanantaina 13. toukokuuta 2024, 19.59.18 EEST uk7b@foxmail.com a écrit :
> From: sunyuechi <sunyuechi@iscas.ac.cn>
> 
> C908:
> vp9_vert_8x8_8bpp_c: 22.0
> vp9_vert_8x8_8bpp_rvi: 15.7
> vp9_vert_16x16_8bpp_c: 71.2
> vp9_vert_16x16_8bpp_rvi: 39.0
> vp9_vert_32x32_8bpp_c: 300.2
> vp9_vert_32x32_8bpp_rvi: 135.2

Not sure how you get that. It should be reported under rvb_a (Zba) or 
misaligned, AFAIU.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
  2024-05-13 19:53 ` Rémi Denis-Courmont
@ 2024-05-14  4:45   ` flow gg
  2024-05-14 17:00     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 9+ messages in thread
From: flow gg @ 2024-05-14  4:45 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

I am locally using:
    if (bpp == 8 && (flags & AV_CPU_FLAG_RVI) && (flags &
AV_CPU_FLAG_RVB_ADDR)) {
this performs better on k230/banana_f3 than C.
For email, refer to [FFmpeg-devel] [PATCH 2/2] lavc/vp8dsp: restrict RVI
optimisations and change it to
    if (bpp == 8 && (flags & AV_CPU_FLAG_RV_MISALIGNED) && (flags &
AV_CPU_FLAG_RVB_ADDR)) {
So no output, but I think the same modification should be made here?

Rémi Denis-Courmont <remi@remlab.net> 于2024年5月14日周二 03:54写道:

> Le maanantaina 13. toukokuuta 2024, 19.59.18 EEST uk7b@foxmail.com a
> écrit :
> > From: sunyuechi <sunyuechi@iscas.ac.cn>
> >
> > C908:
> > vp9_vert_8x8_8bpp_c: 22.0
> > vp9_vert_8x8_8bpp_rvi: 15.7
> > vp9_vert_16x16_8bpp_c: 71.2
> > vp9_vert_16x16_8bpp_rvi: 39.0
> > vp9_vert_32x32_8bpp_c: 300.2
> > vp9_vert_32x32_8bpp_rvi: 135.2
>
> Not sure how you get that. It should be reported under rvb_a (Zba) or
> misaligned, AFAIU.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
  2024-05-14  4:45   ` flow gg
@ 2024-05-14 17:00     ` Rémi Denis-Courmont
  2024-05-14 17:22       ` flow gg
  0 siblings, 1 reply; 9+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-14 17:00 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le tiistaina 14. toukokuuta 2024, 7.45.29 EEST flow gg a écrit :
> I am locally using:
>     if (bpp == 8 && (flags & AV_CPU_FLAG_RVI) && (flags &
> AV_CPU_FLAG_RVB_ADDR)) {

There is no point testing the I flag if you test any other flag. The I flag is 
always set (since we don't, and probably never will, support RV32E) and only 
exists for the benefit of checkasm.

> this performs better on k230/banana_f3 than C.

It also performs better than C on SiFive U74, even though that design has 
veeeeeery slow unaligned access (emulated in SBI). Of course, it could just be 
that checkasm only tests aligned accesses and unaligned accesses are legal, 
hence my earlier question.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
  2024-05-14 17:00     ` Rémi Denis-Courmont
@ 2024-05-14 17:22       ` flow gg
  0 siblings, 0 replies; 9+ messages in thread
From: flow gg @ 2024-05-14 17:22 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Okay, learned it

Rémi Denis-Courmont <remi@remlab.net> 于2024年5月15日周三 01:00写道:

> Le tiistaina 14. toukokuuta 2024, 7.45.29 EEST flow gg a écrit :
> > I am locally using:
> >     if (bpp == 8 && (flags & AV_CPU_FLAG_RVI) && (flags &
> > AV_CPU_FLAG_RVB_ADDR)) {
>
> There is no point testing the I flag if you test any other flag. The I
> flag is
> always set (since we don't, and probably never will, support RV32E) and
> only
> exists for the benefit of checkasm.
>
> > this performs better on k230/banana_f3 than C.
>
> It also performs better than C on SiFive U74, even though that design has
> veeeeeery slow unaligned access (emulated in SBI). Of course, it could
> just be
> that checkasm only tests aligned accesses and unaligned accesses are
> legal,
> hence my earlier question.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
  2024-05-12 10:03 uk7b
@ 2024-05-12 10:06 ` flow gg
  0 siblings, 0 replies; 9+ messages in thread
From: flow gg @ 2024-05-12 10:06 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

> It should be possible to improve ordering to avoid immediate dependency
from ADD to SD

Okay, updated it.

Additionally improved the mc-tap_64 on vlen>=256 and something

<uk7b@foxmail.com> 于2024年5月12日周日 18:04写道:

> From: sunyuechi <sunyuechi@iscas.ac.cn>
>
> C908:
> vp9_vert_8x8_8bpp_c: 22.0
> vp9_vert_8x8_8bpp_rvi: 15.7
> vp9_vert_16x16_8bpp_c: 71.2
> vp9_vert_16x16_8bpp_rvi: 39.0
> vp9_vert_32x32_8bpp_c: 300.2
> vp9_vert_32x32_8bpp_rvi: 135.2
> ---
>  libavcodec/riscv/Makefile        |  1 +
>  libavcodec/riscv/vp9_intra_rvi.S | 71 ++++++++++++++++++++++++++++++++
>  libavcodec/riscv/vp9dsp.h        |  6 +++
>  libavcodec/riscv/vp9dsp_init.c   | 63 ++++++++++++++++------------
>  4 files changed, 114 insertions(+), 27 deletions(-)
>  create mode 100644 libavcodec/riscv/vp9_intra_rvi.S
>
> diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
> index 89273b1cad..ccd060c666 100644
> --- a/libavcodec/riscv/Makefile
> +++ b/libavcodec/riscv/Makefile
> @@ -62,6 +62,7 @@ OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
>  RV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvi.o
>  RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
>  OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
> +RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o
>  RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
>  OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
>  RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
> diff --git a/libavcodec/riscv/vp9_intra_rvi.S
> b/libavcodec/riscv/vp9_intra_rvi.S
> new file mode 100644
> index 0000000000..16b6bdb25a
> --- /dev/null
> +++ b/libavcodec/riscv/vp9_intra_rvi.S
> @@ -0,0 +1,71 @@
> +/*
> + * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences
> (ISCAS).
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include "libavutil/riscv/asm.S"
> +
> +#if __riscv_xlen >= 64
> +func ff_v_32x32_rvi
> +        ld           t0, (a3)
> +        ld           t1, 8(a3)
> +        ld           t2, 16(a3)
> +        ld           t3, 24(a3)
> +        .rept 16
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sd           t1, 8(a0)
> +        sd           t2, 16(a0)
> +        sd           t3, 24(a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        sd           t1, 8(a7)
> +        sd           t2, 16(a7)
> +        sd           t3, 24(a7)
> +        .endr
> +
> +        ret
> +endfunc
> +
> +func ff_v_16x16_rvi
> +        ld           t0, (a3)
> +        ld           t1, 8(a3)
> +        .rept 8
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sd           t1, 8(a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        sd           t1, 8(a7)
> +        .endr
> +
> +        ret
> +endfunc
> +
> +func ff_v_8x8_rvi
> +        ld           t0, (a3)
> +        .rept 4
> +        add          a7, a0, a1
> +        sd           t0, (a0)
> +        sh1add       a0, a1, a0
> +        sd           t0, (a7)
> +        .endr
> +
> +        ret
> +endfunc
> +#endif
> diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
> index 25047ed507..f8bc6563a5 100644
> --- a/libavcodec/riscv/vp9dsp.h
> +++ b/libavcodec/riscv/vp9dsp.h
> @@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t
> stride, const uint8_t *l,
>                           const uint8_t *a);
>  void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
>                         const uint8_t *a);
> +void ff_v_32x32_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_16x16_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_v_8x8_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                  const uint8_t *a);
>
>  #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)
>      \
>  void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t
> dststride,   \
> diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c
> index 69ab39004c..e377d377e3 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -24,38 +24,47 @@
>  #include "libavcodec/vp9dsp.h"
>  #include "vp9dsp.h"
>
> -static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
> +static av_cold void vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int
> bpp)
>  {
> -    #if HAVE_RVV
> -        int flags = av_get_cpu_flags();
> +#if HAVE_RV
> +    int flags = av_get_cpu_flags();
>
> -        if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb()
> >= 16) {
> -            dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
> -            dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
> -            dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
> -            dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
> -            dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
> -            dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
> -        }
> +    if (bpp == 8 && (flags & AV_CPU_FLAG_RVI) && (flags &
> AV_CPU_FLAG_RVB_ADDR)) {
> +# if __riscv_xlen >= 64
> +        dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvi;
> +        dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvi;
> +        dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvi;
> +# endif
> +    }
> +#if HAVE_RVV
> +    if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >=
> 16) {
> +        dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
> +        dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
> +        dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
> +        dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
> +        dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
> +        dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
> +    }
>
> -        if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb()
> >= 16) {
> -            dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
> -            dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
> -            dsp->intra_pred[TX_32X32][LEFT_DC_PRED] =
> ff_dc_left_32x32_rvv;
> -            dsp->intra_pred[TX_16X16][LEFT_DC_PRED] =
> ff_dc_left_16x16_rvv;
> -            dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
> -            dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
> -            dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
> -            dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
> -            dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
> -            dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
> -            dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
> -            dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
> -        }
> -    #endif
> +    if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >=
> 16) {
> +        dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
> +        dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
> +        dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
> +        dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
> +        dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
> +        dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
> +        dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
> +        dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
> +        dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
> +        dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
> +        dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
> +        dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
> +    }
> +#endif
> +#endif
>  }
>
>  av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int
> bitexact)
>  {
> -    vp9dsp_intrapred_init_rvv(dsp, bpp);
> +    vp9dsp_intrapred_init_riscv(dsp, bpp);
>  }
> --
> 2.45.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert
@ 2024-05-12 10:03 uk7b
  2024-05-12 10:06 ` flow gg
  0 siblings, 1 reply; 9+ messages in thread
From: uk7b @ 2024-05-12 10:03 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: sunyuechi

From: sunyuechi <sunyuechi@iscas.ac.cn>

C908:
vp9_vert_8x8_8bpp_c: 22.0
vp9_vert_8x8_8bpp_rvi: 15.7
vp9_vert_16x16_8bpp_c: 71.2
vp9_vert_16x16_8bpp_rvi: 39.0
vp9_vert_32x32_8bpp_c: 300.2
vp9_vert_32x32_8bpp_rvi: 135.2
---
 libavcodec/riscv/Makefile        |  1 +
 libavcodec/riscv/vp9_intra_rvi.S | 71 ++++++++++++++++++++++++++++++++
 libavcodec/riscv/vp9dsp.h        |  6 +++
 libavcodec/riscv/vp9dsp_init.c   | 63 ++++++++++++++++------------
 4 files changed, 114 insertions(+), 27 deletions(-)
 create mode 100644 libavcodec/riscv/vp9_intra_rvi.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 89273b1cad..ccd060c666 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -62,6 +62,7 @@ OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
 RV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvi.o
 RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
 OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o
 RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
 OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
 RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S
new file mode 100644
index 0000000000..16b6bdb25a
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvi.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+#if __riscv_xlen >= 64
+func ff_v_32x32_rvi
+        ld           t0, (a3)
+        ld           t1, 8(a3)
+        ld           t2, 16(a3)
+        ld           t3, 24(a3)
+        .rept 16
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sd           t1, 8(a0)
+        sd           t2, 16(a0)
+        sd           t3, 24(a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        sd           t1, 8(a7)
+        sd           t2, 16(a7)
+        sd           t3, 24(a7)
+        .endr
+
+        ret
+endfunc
+
+func ff_v_16x16_rvi
+        ld           t0, (a3)
+        ld           t1, 8(a3)
+        .rept 8
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sd           t1, 8(a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        sd           t1, 8(a7)
+        .endr
+
+        ret
+endfunc
+
+func ff_v_8x8_rvi
+        ld           t0, (a3)
+        .rept 4
+        add          a7, a0, a1
+        sd           t0, (a0)
+        sh1add       a0, a1, a0
+        sd           t0, (a7)
+        .endr
+
+        ret
+endfunc
+#endif
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
index 25047ed507..f8bc6563a5 100644
--- a/libavcodec/riscv/vp9dsp.h
+++ b/libavcodec/riscv/vp9dsp.h
@@ -60,6 +60,12 @@ void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
                          const uint8_t *a);
 void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
                        const uint8_t *a);
+void ff_v_32x32_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_16x16_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                    const uint8_t *a);
+void ff_v_8x8_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+                  const uint8_t *a);
 
 #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)                         \
 void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride,   \
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 69ab39004c..e377d377e3 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -24,38 +24,47 @@
 #include "libavcodec/vp9dsp.h"
 #include "vp9dsp.h"
 
-static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
+static av_cold void vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int bpp)
 {
-    #if HAVE_RVV
-        int flags = av_get_cpu_flags();
+#if HAVE_RV
+    int flags = av_get_cpu_flags();
 
-        if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
-            dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
-            dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
-            dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
-            dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
-            dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
-            dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
-        }
+    if (bpp == 8 && (flags & AV_CPU_FLAG_RVI) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
+# if __riscv_xlen >= 64
+        dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvi;
+        dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvi;
+        dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvi;
+# endif
+    }
+#if HAVE_RVV
+    if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+        dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
+        dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
+        dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
+        dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
+        dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
+        dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+    }
 
-        if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
-            dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
-            dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
-            dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
-            dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
-            dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
-            dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
-            dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
-            dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
-            dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
-            dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
-            dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
-            dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
-        }
-    #endif
+    if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+        dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
+        dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
+        dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
+        dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
+        dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
+        dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
+        dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
+        dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
+        dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
+        dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
+        dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
+        dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+    }
+#endif
+#endif
 }
 
 av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
 {
-    vp9dsp_intrapred_init_rvv(dsp, bpp);
+    vp9dsp_intrapred_init_riscv(dsp, bpp);
 }
-- 
2.45.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-05-14 17:22 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-13 16:59 [FFmpeg-devel] [PATCH v3 1/9] lavc/vp9dsp: R-V ipred vert uk7b
2024-05-13 17:01 ` flow gg
2024-05-13 19:47 ` Rémi Denis-Courmont
2024-05-13 19:53 ` Rémi Denis-Courmont
2024-05-14  4:45   ` flow gg
2024-05-14 17:00     ` Rémi Denis-Courmont
2024-05-14 17:22       ` flow gg
  -- strict thread matches above, loose matches on Subject: below --
2024-05-12 10:03 uk7b
2024-05-12 10:06 ` flow gg

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git