* [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
@ 2024-03-02 7:42 flow gg
2024-03-02 9:03 ` Rémi Denis-Courmont
0 siblings, 1 reply; 12+ messages in thread
From: flow gg @ 2024-03-02 7:42 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: 0001-lavc-vp9dsp-R-V-V-ipred-dc.patch --]
[-- Type: text/x-patch, Size: 15048 bytes --]
From adaae06a3e18bccec1772a3134334cbea652ae77 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Mon, 26 Feb 2024 14:42:17 +0800
Subject: [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
C908:
vp9_dc_8x8_8bpp_c: 46.0
vp9_dc_8x8_8bpp_rvv_i64: 41.0
vp9_dc_16x16_8bpp_c: 109.2
vp9_dc_16x16_8bpp_rvv_i32: 72.7
vp9_dc_32x32_8bpp_c: 365.2
vp9_dc_32x32_8bpp_rvv_i32: 165.5
vp9_dc_127_8x8_8bpp_c: 23.0
vp9_dc_127_8x8_8bpp_rvv_i64: 22.0
vp9_dc_127_16x16_8bpp_c: 70.2
vp9_dc_127_16x16_8bpp_rvv_i32: 51.7
vp9_dc_127_32x32_8bpp_c: 295.2
vp9_dc_127_32x32_8bpp_rvv_i32: 140.2
vp9_dc_128_8x8_8bpp_c: 23.0
vp9_dc_128_8x8_8bpp_rvv_i64: 22.0
vp9_dc_128_16x16_8bpp_c: 70.2
vp9_dc_128_16x16_8bpp_rvv_i32: 51.7
vp9_dc_128_32x32_8bpp_c: 295.2
vp9_dc_128_32x32_8bpp_rvv_i32: 140.2
vp9_dc_129_8x8_8bpp_c: 23.0
vp9_dc_129_8x8_8bpp_rvv_i64: 22.0
vp9_dc_129_16x16_8bpp_c: 70.2
vp9_dc_129_16x16_8bpp_rvv_i32: 51.7
vp9_dc_129_32x32_8bpp_c: 295.2
vp9_dc_129_32x32_8bpp_rvv_i32: 140.2
vp9_dc_left_8x8_8bpp_c: 38.0
vp9_dc_left_8x8_8bpp_rvv_i64: 36.0
vp9_dc_left_16x16_8bpp_c: 93.2
vp9_dc_left_16x16_8bpp_rvv_i32: 67.7
vp9_dc_left_32x32_8bpp_c: 333.2
vp9_dc_left_32x32_8bpp_rvv_i32: 158.5
vp9_dc_top_8x8_8bpp_c: 38.7
vp9_dc_top_8x8_8bpp_rvv_i64: 36.0
vp9_dc_top_16x16_8bpp_c: 93.2
vp9_dc_top_16x16_8bpp_rvv_i32: 67.7
vp9_dc_top_32x32_8bpp_c: 333.2
vp9_dc_top_32x32_8bpp_rvv_i32: 156.2
---
libavcodec/riscv/Makefile | 2 +
libavcodec/riscv/vp9_intra_rvv.S | 201 +++++++++++++++++++++++++++++++
libavcodec/riscv/vp9dsp.h | 64 ++++++++++
libavcodec/riscv/vp9dsp_init.c | 61 ++++++++++
libavcodec/vp9dsp.c | 2 +
libavcodec/vp9dsp.h | 1 +
6 files changed, 331 insertions(+)
create mode 100644 libavcodec/riscv/vp9_intra_rvv.S
create mode 100644 libavcodec/riscv/vp9dsp.h
create mode 100644 libavcodec/riscv/vp9dsp_init.c
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index dff8784102..c237e60800 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -60,5 +60,7 @@ OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
+OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
new file mode 100644
index 0000000000..b3b0470cfc
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro getdc type size
+ vmv.v.x v16, zero
+.ifc \type,top
+ vle8.v v8, (a3)
+ vwredsumu.vs v16, v8, v16
+ vsetivli zero, 1, e16, m1, ta, ma
+ vmv.x.s t1, v16
+ .ifc \size,32
+ addi t1, t1, 16
+ srai t1, t1, 5
+ .elseif \size == 16
+ addi t1, t1, 8
+ srai t1, t1, 4
+ .elseif \size == 8
+ addi t1, t1, 4
+ srai t1, t1, 3
+ .endif
+.elseif \type == left
+ vle8.v v8, (a2)
+ vwredsumu.vs v16, v8, v16
+ vsetivli zero, 1, e16, m1, ta, ma
+ vmv.x.s t1, v16
+ .ifc \size,32
+ addi t1, t1, 16
+ srai t1, t1, 5
+ .elseif \size == 16
+ addi t1, t1, 8
+ srai t1, t1, 4
+ .elseif \size == 8
+ addi t1, t1, 4
+ srai t1, t1, 3
+ .endif
+.elseif \type == 127
+ li t1, 127
+.elseif \type == 128
+ li t1, 128
+.elseif \type == 129
+ li t1, 129
+.elseif \type == none
+ vle8.v v8, (a2)
+ vwredsumu.vs v16, v8, v16
+ vle8.v v8, (a3)
+ vwredsumu.vs v16, v8, v16
+ vsetivli zero, 1, e16, m1, ta, ma
+ vmv.x.s t1, v16
+ .ifc \size,32
+ addi t1, t1, 32
+ srai t1, t1, 6
+ .elseif \size == 16
+ addi t1, t1, 16
+ srai t1, t1, 5
+ .elseif \size == 8
+ addi t1, t1, 8
+ srai t1, t1, 4
+ .endif
+.endif
+.endm
+
+.macro dc32x32 type restore
+ li t0, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+ getdc \type 32
+
+ .ifc \restore,1
+ vsetvli zero, t0, e8, m2, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 31
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc16x16 type restore
+ vsetivli zero, 16, e8, m1, ta, ma
+ getdc \type 16
+
+ .ifc \restore,1
+ vsetivli zero, 16, e8, m1, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 15
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc8x8 type
+ vsetivli zero, 8, e8, mf2, ta, ma
+ getdc \type 8
+
+ li t0, 64
+ vsetvli zero, t0, e8, m4, ta, ma
+ vmv.v.x v0, t1
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v0, (a0), a1
+
+ ret
+.endm
+
+func ff_dc_127_32x32_rvv, zve32x
+ dc32x32 127 0
+endfunc
+
+func ff_dc_127_16x16_rvv, zve32x
+ dc16x16 127 0
+endfunc
+
+func ff_dc_127_8x8_rvv, zve64x
+ dc8x8 127
+endfunc
+
+func ff_dc_128_32x32_rvv, zve32x
+ dc32x32 128 0
+endfunc
+
+func ff_dc_128_16x16_rvv, zve32x
+ dc16x16 128 0
+endfunc
+
+func ff_dc_128_8x8_rvv, zve64x
+ dc8x8 128
+endfunc
+
+func ff_dc_129_32x32_rvv, zve32x
+ dc32x32 129 0
+endfunc
+
+func ff_dc_129_16x16_rvv, zve32x
+ dc16x16 129 0
+endfunc
+
+func ff_dc_129_8x8_rvv, zve64x
+ dc8x8 129
+endfunc
+
+func ff_dc_32x32_rvv, zve32x
+ dc32x32 none 1
+endfunc
+
+func ff_dc_16x16_rvv, zve32x
+ dc16x16 none 1
+endfunc
+
+func ff_dc_8x8_rvv, zve64x
+ dc8x8 none
+endfunc
+
+func ff_dc_left_32x32_rvv, zve32x
+ dc32x32 left 1
+endfunc
+
+func ff_dc_left_16x16_rvv, zve32x
+ dc16x16 left 1
+endfunc
+
+func ff_dc_left_8x8_rvv, zve64x
+ dc8x8 left
+endfunc
+
+func ff_dc_top_32x32_rvv, zve32x
+ dc32x32 top 1
+endfunc
+
+func ff_dc_top_16x16_rvv, zve32x
+ dc16x16 top 1
+endfunc
+
+func ff_dc_top_8x8_rvv, zve64x
+ dc8x8 top
+endfunc
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
new file mode 100644
index 0000000000..abd57bd836
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
+#define AVCODEC_RISCV_VP9DSP_RISCV_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+
+#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
new file mode 100644
index 0000000000..69ab39004c
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lervvr General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lervvr General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lervvr General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp.h"
+
+static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
+{
+ #if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
+ dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
+ dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+ }
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
+ dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
+ dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
+ dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
+ dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+ }
+ #endif
+}
+
+av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+ vp9dsp_intrapred_init_rvv(dsp, bpp);
+}
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index d8ddf74d4f..967e6e1e1a 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -100,6 +100,8 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
ff_vp9dsp_init_aarch64(dsp, bpp);
#elif ARCH_ARM
ff_vp9dsp_init_arm(dsp, bpp);
+#elif ARCH_RISCV
+ ff_vp9dsp_init_riscv(dsp, bpp, bitexact);
#elif ARCH_X86
ff_vp9dsp_init_x86(dsp, bpp, bitexact);
#elif ARCH_MIPS
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index be0ac0b181..772848e349 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -131,6 +131,7 @@ void ff_vp9dsp_init_12(VP9DSPContext *dsp);
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
--
2.44.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-02 7:42 [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc flow gg
@ 2024-03-02 9:03 ` Rémi Denis-Courmont
2024-03-02 9:48 ` flow gg
0 siblings, 1 reply; 12+ messages in thread
From: Rémi Denis-Courmont @ 2024-03-02 9:03 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le lauantaina 2. maaliskuuta 2024, 9.42.06 EET flow gg a écrit :
>
You would need a lot fewer if/else if you passed the order/bit-width instead
of the size as macro parameter.
Similarly, this can be folded as a single .else:
+.elseif \type == 127
+ li t1, 127
+.elseif \type == 128
+ li t1, 128
+.elseif \type == 129
+ li t1, 129
--
レミ・デニ-クールモン
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-02 9:03 ` Rémi Denis-Courmont
@ 2024-03-02 9:48 ` flow gg
2024-03-03 1:59 ` flow gg
0 siblings, 1 reply; 12+ messages in thread
From: flow gg @ 2024-03-02 9:48 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 904 bytes --]
Okay, reduced if/else in the response.
Rémi Denis-Courmont <remi@remlab.net> 于2024年3月2日周六 17:03写道:
> Le lauantaina 2. maaliskuuta 2024, 9.42.06 EET flow gg a écrit :
> >
>
> You would need a lot fewer if/else if you passed the order/bit-width
> instead
> of the size as macro parameter.
>
> Similarly, this can be folded as a single .else:
>
> +.elseif \type == 127
> + li t1, 127
> +.elseif \type == 128
> + li t1, 128
> +.elseif \type == 129
> + li t1, 129
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
[-- Attachment #2: 0001-lavc-vp9dsp-R-V-V-ipred-dc.patch --]
[-- Type: text/x-patch, Size: 14146 bytes --]
From 7a4cc34b2345e330e1e43efa720bcbafb4e9a2f2 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Mon, 26 Feb 2024 14:42:17 +0800
Subject: [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
C908:
vp9_dc_8x8_8bpp_c: 46.0
vp9_dc_8x8_8bpp_rvv_i64: 41.0
vp9_dc_16x16_8bpp_c: 109.2
vp9_dc_16x16_8bpp_rvv_i32: 72.7
vp9_dc_32x32_8bpp_c: 365.2
vp9_dc_32x32_8bpp_rvv_i32: 165.5
vp9_dc_127_8x8_8bpp_c: 23.0
vp9_dc_127_8x8_8bpp_rvv_i64: 22.0
vp9_dc_127_16x16_8bpp_c: 70.2
vp9_dc_127_16x16_8bpp_rvv_i32: 51.7
vp9_dc_127_32x32_8bpp_c: 295.2
vp9_dc_127_32x32_8bpp_rvv_i32: 140.2
vp9_dc_128_8x8_8bpp_c: 23.0
vp9_dc_128_8x8_8bpp_rvv_i64: 22.0
vp9_dc_128_16x16_8bpp_c: 70.2
vp9_dc_128_16x16_8bpp_rvv_i32: 51.7
vp9_dc_128_32x32_8bpp_c: 295.2
vp9_dc_128_32x32_8bpp_rvv_i32: 140.2
vp9_dc_129_8x8_8bpp_c: 23.0
vp9_dc_129_8x8_8bpp_rvv_i64: 22.0
vp9_dc_129_16x16_8bpp_c: 70.2
vp9_dc_129_16x16_8bpp_rvv_i32: 51.7
vp9_dc_129_32x32_8bpp_c: 295.2
vp9_dc_129_32x32_8bpp_rvv_i32: 140.2
vp9_dc_left_8x8_8bpp_c: 38.0
vp9_dc_left_8x8_8bpp_rvv_i64: 36.0
vp9_dc_left_16x16_8bpp_c: 93.2
vp9_dc_left_16x16_8bpp_rvv_i32: 67.7
vp9_dc_left_32x32_8bpp_c: 333.2
vp9_dc_left_32x32_8bpp_rvv_i32: 158.5
vp9_dc_top_8x8_8bpp_c: 38.7
vp9_dc_top_8x8_8bpp_rvv_i64: 36.0
vp9_dc_top_16x16_8bpp_c: 93.2
vp9_dc_top_16x16_8bpp_rvv_i32: 67.7
vp9_dc_top_32x32_8bpp_c: 333.2
vp9_dc_top_32x32_8bpp_rvv_i32: 156.2
---
libavcodec/riscv/Makefile | 2 +
libavcodec/riscv/vp9_intra_rvv.S | 171 +++++++++++++++++++++++++++++++
libavcodec/riscv/vp9dsp.h | 64 ++++++++++++
libavcodec/riscv/vp9dsp_init.c | 61 +++++++++++
libavcodec/vp9dsp.c | 2 +
libavcodec/vp9dsp.h | 1 +
6 files changed, 301 insertions(+)
create mode 100644 libavcodec/riscv/vp9_intra_rvv.S
create mode 100644 libavcodec/riscv/vp9dsp.h
create mode 100644 libavcodec/riscv/vp9dsp_init.c
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index dff8784102..c237e60800 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -60,5 +60,7 @@ OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
+OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
new file mode 100644
index 0000000000..be0c8a8094
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro avgdc size size2
+ vsetivli zero, 1, e16, m1, ta, ma
+ vmv.x.s t1, v16
+ addi t1, t1, \size
+ srai t1, t1, \size2
+.endm
+
+.macro getdc type size size2
+ vmv.v.x v16, zero
+.ifc \type,top
+ vle8.v v8, (a3)
+ vwredsumu.vs v16, v8, v16
+ avgdc \size \size2
+.elseif \type == left
+ vle8.v v8, (a2)
+ vwredsumu.vs v16, v8, v16
+ avgdc \size \size2
+.elseif \type == none
+ vle8.v v8, (a2)
+ vwredsumu.vs v16, v8, v16
+ vle8.v v8, (a3)
+ vwredsumu.vs v16, v8, v16
+ avgdc \size \size2
+.else
+ li t1, \type
+.endif
+.endm
+
+.macro dc32x32 type size size2 restore
+ li t0, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+ getdc \type \size \size2
+
+ .ifc \restore,1
+ vsetvli zero, t0, e8, m2, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 31
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc16x16 type size size2 restore
+ vsetivli zero, 16, e8, m1, ta, ma
+ getdc \type \size \size2
+
+ .ifc \restore,1
+ vsetivli zero, 16, e8, m1, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 15
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc8x8 type size size2 restore
+ vsetivli zero, 8, e8, mf2, ta, ma
+ getdc \type \size \size2
+
+ li t0, 64
+ vsetvli zero, t0, e8, m4, ta, ma
+ vmv.v.x v0, t1
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v0, (a0), a1
+
+ ret
+.endm
+
+func ff_dc_127_32x32_rvv, zve32x
+ dc32x32 127 0 0 0
+endfunc
+
+func ff_dc_127_16x16_rvv, zve32x
+ dc16x16 127 0 0 0
+endfunc
+
+func ff_dc_127_8x8_rvv, zve64x
+ dc8x8 127 0 0 0
+endfunc
+
+func ff_dc_128_32x32_rvv, zve32x
+ dc32x32 128 0 0 0
+endfunc
+
+func ff_dc_128_16x16_rvv, zve32x
+ dc16x16 128 0 0 0
+endfunc
+
+func ff_dc_128_8x8_rvv, zve64x
+ dc8x8 128 0 0 0
+endfunc
+
+func ff_dc_129_32x32_rvv, zve32x
+ dc32x32 129 0 0 0
+endfunc
+
+func ff_dc_129_16x16_rvv, zve32x
+ dc16x16 129 0 0 0
+endfunc
+
+func ff_dc_129_8x8_rvv, zve64x
+ dc8x8 129 0 0 0
+endfunc
+
+func ff_dc_32x32_rvv, zve32x
+ dc32x32 none 32 6 1
+endfunc
+
+func ff_dc_16x16_rvv, zve32x
+ dc16x16 none 16 5 1
+endfunc
+
+func ff_dc_8x8_rvv, zve64x
+ dc8x8 none 8 4 0
+endfunc
+
+func ff_dc_left_32x32_rvv, zve32x
+ dc32x32 left 16 5 1
+endfunc
+
+func ff_dc_left_16x16_rvv, zve32x
+ dc16x16 left 8 4 1
+endfunc
+
+func ff_dc_left_8x8_rvv, zve64x
+ dc8x8 left 4 3 0
+endfunc
+
+func ff_dc_top_32x32_rvv, zve32x
+ dc32x32 top 16 5 1
+endfunc
+
+func ff_dc_top_16x16_rvv, zve32x
+ dc16x16 top 8 4 1
+endfunc
+
+func ff_dc_top_8x8_rvv, zve64x
+ dc8x8 top 4 3 0
+endfunc
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
new file mode 100644
index 0000000000..abd57bd836
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
+#define AVCODEC_RISCV_VP9DSP_RISCV_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+
+#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
new file mode 100644
index 0000000000..69ab39004c
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lervvr General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lervvr General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lervvr General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp.h"
+
+static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
+{
+ #if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
+ dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
+ dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+ }
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
+ dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
+ dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
+ dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
+ dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+ }
+ #endif
+}
+
+av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+ vp9dsp_intrapred_init_rvv(dsp, bpp);
+}
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index d8ddf74d4f..967e6e1e1a 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -100,6 +100,8 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
ff_vp9dsp_init_aarch64(dsp, bpp);
#elif ARCH_ARM
ff_vp9dsp_init_arm(dsp, bpp);
+#elif ARCH_RISCV
+ ff_vp9dsp_init_riscv(dsp, bpp, bitexact);
#elif ARCH_X86
ff_vp9dsp_init_x86(dsp, bpp, bitexact);
#elif ARCH_MIPS
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index be0ac0b181..772848e349 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -131,6 +131,7 @@ void ff_vp9dsp_init_12(VP9DSPContext *dsp);
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
--
2.44.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-02 9:48 ` flow gg
@ 2024-03-03 1:59 ` flow gg
2024-03-03 14:46 ` Rémi Denis-Courmont
0 siblings, 1 reply; 12+ messages in thread
From: flow gg @ 2024-03-03 1:59 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1063 bytes --]
updated a little improve in this reply
flow gg <hlefthleft@gmail.com> 于2024年3月2日周六 17:48写道:
> Okay, reduced if/else in the response.
>
> Rémi Denis-Courmont <remi@remlab.net> 于2024年3月2日周六 17:03写道:
>
>> Le lauantaina 2. maaliskuuta 2024, 9.42.06 EET flow gg a écrit :
>> >
>>
>> You would need a lot fewer if/else if you passed the order/bit-width
>> instead
>> of the size as macro parameter.
>>
>> Similarly, this can be folded as a single .else:
>>
>> +.elseif \type == 127
>> + li t1, 127
>> +.elseif \type == 128
>> + li t1, 128
>> +.elseif \type == 129
>> + li t1, 129
>>
>> --
>> レミ・デニ-クールモン
>> http://www.remlab.net/
>>
>>
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
[-- Attachment #2: 0001-lavc-vp9dsp-R-V-V-ipred-dc.patch --]
[-- Type: text/x-patch, Size: 14140 bytes --]
From 38089c897fb3776de776e28630eb5b21b1484012 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Mon, 26 Feb 2024 14:42:17 +0800
Subject: [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
C908:
vp9_dc_8x8_8bpp_c: 46.0
vp9_dc_8x8_8bpp_rvv_i64: 41.0
vp9_dc_16x16_8bpp_c: 109.2
vp9_dc_16x16_8bpp_rvv_i32: 72.7
vp9_dc_32x32_8bpp_c: 365.2
vp9_dc_32x32_8bpp_rvv_i32: 165.5
vp9_dc_127_8x8_8bpp_c: 23.0
vp9_dc_127_8x8_8bpp_rvv_i64: 22.0
vp9_dc_127_16x16_8bpp_c: 70.2
vp9_dc_127_16x16_8bpp_rvv_i32: 50.2
vp9_dc_127_32x32_8bpp_c: 295.2
vp9_dc_127_32x32_8bpp_rvv_i32: 136.7
vp9_dc_128_8x8_8bpp_c: 23.0
vp9_dc_128_8x8_8bpp_rvv_i64: 22.0
vp9_dc_128_16x16_8bpp_c: 70.2
vp9_dc_128_16x16_8bpp_rvv_i32: 50.2
vp9_dc_128_32x32_8bpp_c: 295.2
vp9_dc_128_32x32_8bpp_rvv_i32: 136.7
vp9_dc_129_8x8_8bpp_c: 23.0
vp9_dc_129_8x8_8bpp_rvv_i64: 22.0
vp9_dc_129_16x16_8bpp_c: 70.2
vp9_dc_129_16x16_8bpp_rvv_i32: 50.2
vp9_dc_129_32x32_8bpp_c: 295.2
vp9_dc_129_32x32_8bpp_rvv_i32: 136.7
vp9_dc_left_8x8_8bpp_c: 38.0
vp9_dc_left_8x8_8bpp_rvv_i64: 36.0
vp9_dc_left_16x16_8bpp_c: 93.2
vp9_dc_left_16x16_8bpp_rvv_i32: 67.7
vp9_dc_left_32x32_8bpp_c: 333.2
vp9_dc_left_32x32_8bpp_rvv_i32: 158.5
vp9_dc_top_8x8_8bpp_c: 38.7
vp9_dc_top_8x8_8bpp_rvv_i64: 36.0
vp9_dc_top_16x16_8bpp_c: 93.2
vp9_dc_top_16x16_8bpp_rvv_i32: 67.7
vp9_dc_top_32x32_8bpp_c: 333.2
vp9_dc_top_32x32_8bpp_rvv_i32: 156.2
---
libavcodec/riscv/Makefile | 2 +
libavcodec/riscv/vp9_intra_rvv.S | 171 +++++++++++++++++++++++++++++++
libavcodec/riscv/vp9dsp.h | 64 ++++++++++++
libavcodec/riscv/vp9dsp_init.c | 61 +++++++++++
libavcodec/vp9dsp.c | 2 +
libavcodec/vp9dsp.h | 1 +
6 files changed, 301 insertions(+)
create mode 100644 libavcodec/riscv/vp9_intra_rvv.S
create mode 100644 libavcodec/riscv/vp9dsp.h
create mode 100644 libavcodec/riscv/vp9dsp_init.c
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index dff8784102..c237e60800 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -60,5 +60,7 @@ OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
+OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
new file mode 100644
index 0000000000..bc4beb9c07
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro avgdc size size2
+ vwredsumu.vs v16, v8, v16
+ vsetivli zero, 1, e16, m1, ta, ma
+ vmv.x.s t1, v16
+ addi t1, t1, \size
+ srai t1, t1, \size2
+.endm
+
+.macro getdc type size size2
+.ifc \type,top
+ vmv.v.x v16, zero
+ vle8.v v8, (a3)
+ avgdc \size \size2
+.elseif \type == left
+ vmv.v.x v16, zero
+ vle8.v v8, (a2)
+ avgdc \size \size2
+.elseif \type == none
+ vmv.v.x v16, zero
+ vle8.v v8, (a2)
+ vwredsumu.vs v16, v8, v16
+ vle8.v v8, (a3)
+ avgdc \size \size2
+.else
+ li t1, \type
+.endif
+.endm
+
+.macro dc32x32 type size size2 restore
+ li t0, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+ getdc \type \size \size2
+
+ .ifc \restore,1
+ vsetvli zero, t0, e8, m2, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 31
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc16x16 type size size2 restore
+ vsetivli zero, 16, e8, m1, ta, ma
+ getdc \type \size \size2
+
+ .ifc \restore,1
+ vsetivli zero, 16, e8, m1, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 15
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc8x8 type size size2 restore
+ vsetivli zero, 8, e8, mf2, ta, ma
+ getdc \type \size \size2
+
+ li t0, 64
+ vsetvli zero, t0, e8, m4, ta, ma
+ vmv.v.x v0, t1
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v0, (a0), a1
+
+ ret
+.endm
+
+func ff_dc_127_32x32_rvv, zve32x
+ dc32x32 127 0 0 0
+endfunc
+
+func ff_dc_127_16x16_rvv, zve32x
+ dc16x16 127 0 0 0
+endfunc
+
+func ff_dc_127_8x8_rvv, zve64x
+ dc8x8 127 0 0 0
+endfunc
+
+func ff_dc_128_32x32_rvv, zve32x
+ dc32x32 128 0 0 0
+endfunc
+
+func ff_dc_128_16x16_rvv, zve32x
+ dc16x16 128 0 0 0
+endfunc
+
+func ff_dc_128_8x8_rvv, zve64x
+ dc8x8 128 0 0 0
+endfunc
+
+func ff_dc_129_32x32_rvv, zve32x
+ dc32x32 129 0 0 0
+endfunc
+
+func ff_dc_129_16x16_rvv, zve32x
+ dc16x16 129 0 0 0
+endfunc
+
+func ff_dc_129_8x8_rvv, zve64x
+ dc8x8 129 0 0 0
+endfunc
+
+func ff_dc_32x32_rvv, zve32x
+ dc32x32 none 32 6 1
+endfunc
+
+func ff_dc_16x16_rvv, zve32x
+ dc16x16 none 16 5 1
+endfunc
+
+func ff_dc_8x8_rvv, zve64x
+ dc8x8 none 8 4 0
+endfunc
+
+func ff_dc_left_32x32_rvv, zve32x
+ dc32x32 left 16 5 1
+endfunc
+
+func ff_dc_left_16x16_rvv, zve32x
+ dc16x16 left 8 4 1
+endfunc
+
+func ff_dc_left_8x8_rvv, zve64x
+ dc8x8 left 4 3 0
+endfunc
+
+func ff_dc_top_32x32_rvv, zve32x
+ dc32x32 top 16 5 1
+endfunc
+
+func ff_dc_top_16x16_rvv, zve32x
+ dc16x16 top 8 4 1
+endfunc
+
+func ff_dc_top_8x8_rvv, zve64x
+ dc8x8 top 4 3 0
+endfunc
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
new file mode 100644
index 0000000000..abd57bd836
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
+#define AVCODEC_RISCV_VP9DSP_RISCV_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+
+#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
new file mode 100644
index 0000000000..69ab39004c
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lervvr General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lervvr General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lervvr General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp.h"
+
+static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
+{
+ #if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
+ dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
+ dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+ }
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
+ dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
+ dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
+ dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
+ dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+ }
+ #endif
+}
+
+av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+ vp9dsp_intrapred_init_rvv(dsp, bpp);
+}
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index d8ddf74d4f..967e6e1e1a 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -100,6 +100,8 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
ff_vp9dsp_init_aarch64(dsp, bpp);
#elif ARCH_ARM
ff_vp9dsp_init_arm(dsp, bpp);
+#elif ARCH_RISCV
+ ff_vp9dsp_init_riscv(dsp, bpp, bitexact);
#elif ARCH_X86
ff_vp9dsp_init_x86(dsp, bpp, bitexact);
#elif ARCH_MIPS
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index be0ac0b181..772848e349 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -131,6 +131,7 @@ void ff_vp9dsp_init_12(VP9DSPContext *dsp);
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
--
2.44.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-03 1:59 ` flow gg
@ 2024-03-03 14:46 ` Rémi Denis-Courmont
2024-03-03 15:31 ` flow gg
0 siblings, 1 reply; 12+ messages in thread
From: Rémi Denis-Courmont @ 2024-03-03 14:46 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le sunnuntaina 3. maaliskuuta 2024, 3.59.00 EET flow gg a écrit :
> updated a little improve in this reply
As noted eaerlier, I don't understand why you have two size parameters. It
seems that \size is always either the same as (1 << (\size2 - 1)) a.k.a. ((1
<< \size2) / 2), or unused. The assembler *can* compute arithmetic constants.
Similarly, you can use \restore as a truth value directly: `.if \restore`.
FWIW, it seems that you could just as well include func/endfunc inside the
macros.
--
レミ・デニ-クールモン
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-03 14:46 ` Rémi Denis-Courmont
@ 2024-03-03 15:31 ` flow gg
2024-03-07 11:20 ` flow gg
0 siblings, 1 reply; 12+ messages in thread
From: flow gg @ 2024-03-03 15:31 UTC (permalink / raw)
To: FFmpeg development discussions and patches
> As noted eaerlier, I don't understand why you have two size parameters. It
seems that \size is always either the same as (1 << (\size2 - 1)) a.k.a. ((1
<< \size2) / 2), or unused. The assembler *can* compute arithmetic
constants.
Thanks , I didn't know that before
> Similarly, you can use \restore as a truth value directly: `.if \restore`.
Okay
FWIW, it seems that you could just as well include func/endfunc inside the
macros.
Do you mean to generate func/endfunc using macros?
Rémi Denis-Courmont <remi@remlab.net> 于2024年3月3日周日 22:46写道:
> Le sunnuntaina 3. maaliskuuta 2024, 3.59.00 EET flow gg a écrit :
> > updated a little improve in this reply
>
> As noted eaerlier, I don't understand why you have two size parameters. It
> seems that \size is always either the same as (1 << (\size2 - 1)) a.k.a.
> ((1
> << \size2) / 2), or unused. The assembler *can* compute arithmetic
> constants.
>
> Similarly, you can use \restore as a truth value directly: `.if \restore`.
>
> FWIW, it seems that you could just as well include func/endfunc inside the
> macros.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-03 15:31 ` flow gg
@ 2024-03-07 11:20 ` flow gg
2024-03-22 6:02 ` flow gg
0 siblings, 1 reply; 12+ messages in thread
From: flow gg @ 2024-03-07 11:20 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1653 bytes --]
updated it in the reply
flow gg <hlefthleft@gmail.com> 于2024年3月3日周日 23:31写道:
> > As noted eaerlier, I don't understand why you have two size parameters.
> It
> seems that \size is always either the same as (1 << (\size2 - 1)) a.k.a.
> ((1
> << \size2) / 2), or unused. The assembler *can* compute arithmetic
> constants.
>
> Thanks , I didn't know that before
>
> > Similarly, you can use \restore as a truth value directly: `.if
> \restore`.
>
> Okay
>
> FWIW, it seems that you could just as well include func/endfunc inside the
> macros.
>
> Do you mean to generate func/endfunc using macros?
>
> Rémi Denis-Courmont <remi@remlab.net> 于2024年3月3日周日 22:46写道:
>
>> Le sunnuntaina 3. maaliskuuta 2024, 3.59.00 EET flow gg a écrit :
>> > updated a little improve in this reply
>>
>> As noted eaerlier, I don't understand why you have two size parameters.
>> It
>> seems that \size is always either the same as (1 << (\size2 - 1)) a.k.a.
>> ((1
>> << \size2) / 2), or unused. The assembler *can* compute arithmetic
>> constants.
>>
>> Similarly, you can use \restore as a truth value directly: `.if \restore`.
>>
>> FWIW, it seems that you could just as well include func/endfunc inside
>> the
>> macros.
>>
>> --
>> レミ・デニ-クールモン
>> http://www.remlab.net/
>>
>>
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
[-- Attachment #2: 0001-lavc-vp9dsp-R-V-V-ipred-dc.patch --]
[-- Type: text/x-patch, Size: 14032 bytes --]
From ba101f21d108a906103c144bf0e67bd5f36fd6f1 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Mon, 26 Feb 2024 14:42:17 +0800
Subject: [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
C908:
vp9_dc_8x8_8bpp_c: 46.0
vp9_dc_8x8_8bpp_rvv_i64: 41.0
vp9_dc_16x16_8bpp_c: 109.2
vp9_dc_16x16_8bpp_rvv_i32: 72.7
vp9_dc_32x32_8bpp_c: 365.2
vp9_dc_32x32_8bpp_rvv_i32: 165.5
vp9_dc_127_8x8_8bpp_c: 23.0
vp9_dc_127_8x8_8bpp_rvv_i64: 22.0
vp9_dc_127_16x16_8bpp_c: 70.2
vp9_dc_127_16x16_8bpp_rvv_i32: 50.2
vp9_dc_127_32x32_8bpp_c: 295.2
vp9_dc_127_32x32_8bpp_rvv_i32: 136.7
vp9_dc_128_8x8_8bpp_c: 23.0
vp9_dc_128_8x8_8bpp_rvv_i64: 22.0
vp9_dc_128_16x16_8bpp_c: 70.2
vp9_dc_128_16x16_8bpp_rvv_i32: 50.2
vp9_dc_128_32x32_8bpp_c: 295.2
vp9_dc_128_32x32_8bpp_rvv_i32: 136.7
vp9_dc_129_8x8_8bpp_c: 23.0
vp9_dc_129_8x8_8bpp_rvv_i64: 22.0
vp9_dc_129_16x16_8bpp_c: 70.2
vp9_dc_129_16x16_8bpp_rvv_i32: 50.2
vp9_dc_129_32x32_8bpp_c: 295.2
vp9_dc_129_32x32_8bpp_rvv_i32: 136.7
vp9_dc_left_8x8_8bpp_c: 38.0
vp9_dc_left_8x8_8bpp_rvv_i64: 36.0
vp9_dc_left_16x16_8bpp_c: 93.2
vp9_dc_left_16x16_8bpp_rvv_i32: 67.7
vp9_dc_left_32x32_8bpp_c: 333.2
vp9_dc_left_32x32_8bpp_rvv_i32: 158.5
vp9_dc_top_8x8_8bpp_c: 38.7
vp9_dc_top_8x8_8bpp_rvv_i64: 36.0
vp9_dc_top_16x16_8bpp_c: 93.2
vp9_dc_top_16x16_8bpp_rvv_i32: 67.7
vp9_dc_top_32x32_8bpp_c: 333.2
vp9_dc_top_32x32_8bpp_rvv_i32: 156.2
---
libavcodec/riscv/Makefile | 2 +
libavcodec/riscv/vp9_intra_rvv.S | 171 +++++++++++++++++++++++++++++++
libavcodec/riscv/vp9dsp.h | 64 ++++++++++++
libavcodec/riscv/vp9dsp_init.c | 61 +++++++++++
libavcodec/vp9dsp.c | 2 +
libavcodec/vp9dsp.h | 1 +
6 files changed, 301 insertions(+)
create mode 100644 libavcodec/riscv/vp9_intra_rvv.S
create mode 100644 libavcodec/riscv/vp9dsp.h
create mode 100644 libavcodec/riscv/vp9dsp_init.c
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index dff8784102..c237e60800 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -60,5 +60,7 @@ OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
+OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
new file mode 100644
index 0000000000..b66f466f4b
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro avgdc size
+ vwredsumu.vs v16, v8, v16
+ vsetivli zero, 1, e16, m1, ta, ma
+ vmv.x.s t1, v16
+ addi t1, t1, 1 << (\size - 1)
+ srai t1, t1, \size
+.endm
+
+.macro getdc type size
+.ifc \type,top
+ vmv.v.x v16, zero
+ vle8.v v8, (a3)
+ avgdc \size
+.elseif \type == left
+ vmv.v.x v16, zero
+ vle8.v v8, (a2)
+ avgdc \size
+.elseif \type == none
+ vmv.v.x v16, zero
+ vle8.v v8, (a2)
+ vwredsumu.vs v16, v8, v16
+ vle8.v v8, (a3)
+ avgdc \size
+.else
+ li t1, \type
+.endif
+.endm
+
+.macro dc32x32 type size restore
+ li t0, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+ getdc \type \size
+
+ .if \restore
+ vsetvli zero, t0, e8, m2, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 31
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc16x16 type size restore
+ vsetivli zero, 16, e8, m1, ta, ma
+ getdc \type \size
+
+ .if \restore
+ vsetivli zero, 16, e8, m1, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 15
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc8x8 type size restore
+ vsetivli zero, 8, e8, mf2, ta, ma
+ getdc \type \size
+
+ li t0, 64
+ vsetvli zero, t0, e8, m4, ta, ma
+ vmv.v.x v0, t1
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v0, (a0), a1
+
+ ret
+.endm
+
+func ff_dc_127_32x32_rvv, zve32x
+ dc32x32 127 0 0
+endfunc
+
+func ff_dc_127_16x16_rvv, zve32x
+ dc16x16 127 0 0
+endfunc
+
+func ff_dc_127_8x8_rvv, zve64x
+ dc8x8 127 0 0
+endfunc
+
+func ff_dc_128_32x32_rvv, zve32x
+ dc32x32 128 0 0
+endfunc
+
+func ff_dc_128_16x16_rvv, zve32x
+ dc16x16 128 0 0
+endfunc
+
+func ff_dc_128_8x8_rvv, zve64x
+ dc8x8 128 0 0
+endfunc
+
+func ff_dc_129_32x32_rvv, zve32x
+ dc32x32 129 0 0
+endfunc
+
+func ff_dc_129_16x16_rvv, zve32x
+ dc16x16 129 0 0
+endfunc
+
+func ff_dc_129_8x8_rvv, zve64x
+ dc8x8 129 0 0
+endfunc
+
+func ff_dc_32x32_rvv, zve32x
+ dc32x32 none 6 1
+endfunc
+
+func ff_dc_16x16_rvv, zve32x
+ dc16x16 none 5 1
+endfunc
+
+func ff_dc_8x8_rvv, zve64x
+ dc8x8 none 4 0
+endfunc
+
+func ff_dc_left_32x32_rvv, zve32x
+ dc32x32 left 5 1
+endfunc
+
+func ff_dc_left_16x16_rvv, zve32x
+ dc16x16 left 4 1
+endfunc
+
+func ff_dc_left_8x8_rvv, zve64x
+ dc8x8 left 3 0
+endfunc
+
+func ff_dc_top_32x32_rvv, zve32x
+ dc32x32 top 5 1
+endfunc
+
+func ff_dc_top_16x16_rvv, zve32x
+ dc16x16 top 4 1
+endfunc
+
+func ff_dc_top_8x8_rvv, zve64x
+ dc8x8 top 3 0
+endfunc
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
new file mode 100644
index 0000000000..abd57bd836
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
+#define AVCODEC_RISCV_VP9DSP_RISCV_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+
+#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
new file mode 100644
index 0000000000..69ab39004c
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lervvr General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lervvr General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lervvr General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp.h"
+
+static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
+{
+ #if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
+ dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
+ dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+ }
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
+ dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
+ dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
+ dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
+ dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+ }
+ #endif
+}
+
+av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+ vp9dsp_intrapred_init_rvv(dsp, bpp);
+}
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index d8ddf74d4f..967e6e1e1a 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -100,6 +100,8 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
ff_vp9dsp_init_aarch64(dsp, bpp);
#elif ARCH_ARM
ff_vp9dsp_init_arm(dsp, bpp);
+#elif ARCH_RISCV
+ ff_vp9dsp_init_riscv(dsp, bpp, bitexact);
#elif ARCH_X86
ff_vp9dsp_init_x86(dsp, bpp, bitexact);
#elif ARCH_MIPS
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index be0ac0b181..772848e349 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -131,6 +131,7 @@ void ff_vp9dsp_init_12(VP9DSPContext *dsp);
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
--
2.44.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-07 11:20 ` flow gg
@ 2024-03-22 6:02 ` flow gg
2024-03-27 15:41 ` Rémi Denis-Courmont
0 siblings, 1 reply; 12+ messages in thread
From: flow gg @ 2024-03-22 6:02 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1864 bytes --]
Using macros to shorten function definitions, updated in this response
flow gg <hlefthleft@gmail.com> 于2024年3月7日周四 19:20写道:
> updated it in the reply
>
> flow gg <hlefthleft@gmail.com> 于2024年3月3日周日 23:31写道:
>
>> > As noted eaerlier, I don't understand why you have two size parameters.
>> It
>> seems that \size is always either the same as (1 << (\size2 - 1)) a.k.a.
>> ((1
>> << \size2) / 2), or unused. The assembler *can* compute arithmetic
>> constants.
>>
>> Thanks , I didn't know that before
>>
>> > Similarly, you can use \restore as a truth value directly: `.if
>> \restore`.
>>
>> Okay
>>
>> FWIW, it seems that you could just as well include func/endfunc inside the
>> macros.
>>
>> Do you mean to generate func/endfunc using macros?
>>
>> Rémi Denis-Courmont <remi@remlab.net> 于2024年3月3日周日 22:46写道:
>>
>>> Le sunnuntaina 3. maaliskuuta 2024, 3.59.00 EET flow gg a écrit :
>>> > updated a little improve in this reply
>>>
>>> As noted eaerlier, I don't understand why you have two size parameters.
>>> It
>>> seems that \size is always either the same as (1 << (\size2 - 1)) a.k.a.
>>> ((1
>>> << \size2) / 2), or unused. The assembler *can* compute arithmetic
>>> constants.
>>>
>>> Similarly, you can use \restore as a truth value directly: `.if
>>> \restore`.
>>>
>>> FWIW, it seems that you could just as well include func/endfunc inside
>>> the
>>> macros.
>>>
>>> --
>>> レミ・デニ-クールモン
>>> http://www.remlab.net/
>>>
>>>
>>>
>>> _______________________________________________
>>> ffmpeg-devel mailing list
>>> ffmpeg-devel@ffmpeg.org
>>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>>
>>> To unsubscribe, visit link above, or email
>>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>>
>>
[-- Attachment #2: 0001-lavc-vp9dsp-R-V-V-ipred-dc.patch --]
[-- Type: text/x-patch, Size: 20164 bytes --]
From a0cf73bae4033c62279104c039dbd87049998051 Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Mon, 26 Feb 2024 14:42:17 +0800
Subject: [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
C908:
vp9_dc_8x8_8bpp_c: 46.0
vp9_dc_8x8_8bpp_rvv_i64: 41.0
vp9_dc_16x16_8bpp_c: 109.2
vp9_dc_16x16_8bpp_rvv_i32: 72.7
vp9_dc_32x32_8bpp_c: 365.2
vp9_dc_32x32_8bpp_rvv_i32: 165.5
vp9_dc_127_8x8_8bpp_c: 23.0
vp9_dc_127_8x8_8bpp_rvv_i64: 22.0
vp9_dc_127_16x16_8bpp_c: 70.2
vp9_dc_127_16x16_8bpp_rvv_i32: 50.2
vp9_dc_127_32x32_8bpp_c: 295.2
vp9_dc_127_32x32_8bpp_rvv_i32: 136.7
vp9_dc_128_8x8_8bpp_c: 23.0
vp9_dc_128_8x8_8bpp_rvv_i64: 22.0
vp9_dc_128_16x16_8bpp_c: 70.2
vp9_dc_128_16x16_8bpp_rvv_i32: 50.2
vp9_dc_128_32x32_8bpp_c: 295.2
vp9_dc_128_32x32_8bpp_rvv_i32: 136.7
vp9_dc_129_8x8_8bpp_c: 23.0
vp9_dc_129_8x8_8bpp_rvv_i64: 22.0
vp9_dc_129_16x16_8bpp_c: 70.2
vp9_dc_129_16x16_8bpp_rvv_i32: 50.2
vp9_dc_129_32x32_8bpp_c: 295.2
vp9_dc_129_32x32_8bpp_rvv_i32: 136.7
vp9_dc_left_8x8_8bpp_c: 38.0
vp9_dc_left_8x8_8bpp_rvv_i64: 36.0
vp9_dc_left_16x16_8bpp_c: 93.2
vp9_dc_left_16x16_8bpp_rvv_i32: 67.7
vp9_dc_left_32x32_8bpp_c: 333.2
vp9_dc_left_32x32_8bpp_rvv_i32: 158.5
vp9_dc_top_8x8_8bpp_c: 38.7
vp9_dc_top_8x8_8bpp_rvv_i64: 36.0
vp9_dc_top_16x16_8bpp_c: 93.2
vp9_dc_top_16x16_8bpp_rvv_i32: 67.7
vp9_dc_top_32x32_8bpp_c: 333.2
vp9_dc_top_32x32_8bpp_rvv_i32: 156.2
---
libavcodec/riscv/Makefile | 2 +
libavcodec/riscv/vp9_intra_rvv.S | 124 ++++++++++++++++++++++
libavcodec/riscv/vp9dsp.h | 171 +++++++++++++++++++++++++++++++
libavcodec/riscv/vp9dsp_init.c | 61 +++++++++++
libavcodec/vp9dsp.c | 2 +
libavcodec/vp9dsp.h | 1 +
6 files changed, 361 insertions(+)
create mode 100644 libavcodec/riscv/vp9_intra_rvv.S
create mode 100644 libavcodec/riscv/vp9dsp.h
create mode 100644 libavcodec/riscv/vp9dsp_init.c
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index dff8784102..c237e60800 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -60,5 +60,7 @@ OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
+OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
new file mode 100644
index 0000000000..181f186d7c
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro avgdc size
+ vwredsumu.vs v16, v8, v16
+ vsetivli zero, 1, e16, m1, ta, ma
+ vmv.x.s t1, v16
+ addi t1, t1, 1 << (\size - 1)
+ srai t1, t1, \size
+.endm
+
+.macro getdc type size
+.ifc \type,top
+ vmv.v.x v16, zero
+ vle8.v v8, (a3)
+ avgdc \size
+.elseif \type == left
+ vmv.v.x v16, zero
+ vle8.v v8, (a2)
+ avgdc \size
+.elseif \type == dc
+ vmv.v.x v16, zero
+ vle8.v v8, (a2)
+ vwredsumu.vs v16, v8, v16
+ vle8.v v8, (a3)
+ avgdc \size
+.else
+ li t1, \type
+.endif
+.endm
+
+.macro dc32x32 type size restore
+ li t0, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+ getdc \type \size
+
+ .if \restore
+ vsetvli zero, t0, e8, m2, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 31
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc16x16 type size restore
+ vsetivli zero, 16, e8, m1, ta, ma
+ getdc \type \size
+
+ .if \restore
+ vsetivli zero, 16, e8, m1, ta, ma
+ .endif
+ vmv.v.x v0, t1
+
+ .rept 15
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+ vse8.v v0, (a0)
+
+ ret
+.endm
+
+.macro dc8x8 type size restore
+ vsetivli zero, 8, e8, mf2, ta, ma
+ getdc \type \size
+
+ li t0, 64
+ vsetvli zero, t0, e8, m4, ta, ma
+ vmv.v.x v0, t1
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v0, (a0), a1
+
+ ret
+.endm
+
+.macro func_dc name size type n restore ext
+func ff_\()\name\()_\()\size\()_rvv, \ext
+ dc\size \type \n \restore
+endfunc
+.endm
+
+func_dc dc_127 32x32 127 0 0 zve32x
+func_dc dc_127 16x16 127 0 0 zve32x
+func_dc dc_127 8x8 127 0 0 zve64x
+func_dc dc_128 32x32 128 0 0 zve32x
+func_dc dc_128 16x16 128 0 0 zve32x
+func_dc dc_128 8x8 128 0 0 zve64x
+func_dc dc_129 32x32 129 0 0 zve32x
+func_dc dc_129 16x16 129 0 0 zve32x
+func_dc dc_129 8x8 129 0 0 zve64x
+func_dc dc 32x32 dc 6 1 zve32x
+func_dc dc 16x16 dc 5 1 zve32x
+func_dc dc 8x8 dc 4 0 zve64x
+func_dc dc_left 32x32 left 5 1 zve32x
+func_dc dc_left 16x16 left 4 1 zve32x
+func_dc dc_left 8x8 left 3 0 zve64x
+func_dc dc_top 32x32 top 5 1 zve32x
+func_dc dc_top 16x16 top 4 1 zve32x
+func_dc dc_top 8x8 top 3 0 zve64x
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
new file mode 100644
index 0000000000..0540c7303d
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
+#define AVCODEC_RISCV_VP9DSP_RISCV_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+
+#define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx) \
+void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_put_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_put_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my);
+
+#define VP9_BILINEAR_RISCV_RVV_FUNC(SIZE) \
+void ff_put_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_put_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_put_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my);
+
+#define VP9_COPY_AVG_RISCV_RVV_FUNC(SIZE) \
+void ff_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(32, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(16, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(8, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(4, regular, FILTER_8TAP_REGULAR);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(32, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(16, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(8, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(4, sharp, FILTER_8TAP_SHARP);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
+
+VP9_BILINEAR_RISCV_RVV_FUNC(64);
+VP9_BILINEAR_RISCV_RVV_FUNC(32);
+VP9_BILINEAR_RISCV_RVV_FUNC(16);
+VP9_BILINEAR_RISCV_RVV_FUNC(8);
+VP9_BILINEAR_RISCV_RVV_FUNC(4);
+
+VP9_COPY_AVG_RISCV_RVV_FUNC(64);
+VP9_COPY_AVG_RISCV_RVV_FUNC(32);
+VP9_COPY_AVG_RISCV_RVV_FUNC(16);
+VP9_COPY_AVG_RISCV_RVV_FUNC(8);
+VP9_COPY_AVG_RISCV_RVV_FUNC(4);
+
+#define VP9_COPY_RISCV_RVI_FUNC(SIZE) \
+void ff_copy##SIZE##_rvi(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my);
+
+VP9_COPY_RISCV_RVI_FUNC(8);
+VP9_COPY_RISCV_RVI_FUNC(4);
+
+#undef VP9_8TAP_RISCV_RVV_FUNC
+#undef VP9_BILINEAR_RISCV_RVV_FUNC
+#undef VP9_COPY_AVG_RISCV_RVV_FUNC
+
+#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
new file mode 100644
index 0000000000..69ab39004c
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lervvr General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lervvr General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lervvr General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp.h"
+
+static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
+{
+ #if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
+ dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
+ dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+ }
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
+ dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
+ dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
+ dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
+ dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+ }
+ #endif
+}
+
+av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+ vp9dsp_intrapred_init_rvv(dsp, bpp);
+}
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index d8ddf74d4f..967e6e1e1a 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -100,6 +100,8 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
ff_vp9dsp_init_aarch64(dsp, bpp);
#elif ARCH_ARM
ff_vp9dsp_init_arm(dsp, bpp);
+#elif ARCH_RISCV
+ ff_vp9dsp_init_riscv(dsp, bpp, bitexact);
#elif ARCH_X86
ff_vp9dsp_init_x86(dsp, bpp, bitexact);
#elif ARCH_MIPS
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index be0ac0b181..772848e349 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -131,6 +131,7 @@ void ff_vp9dsp_init_12(VP9DSPContext *dsp);
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
--
2.44.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-22 6:02 ` flow gg
@ 2024-03-27 15:41 ` Rémi Denis-Courmont
2024-03-28 2:44 ` flow gg
0 siblings, 1 reply; 12+ messages in thread
From: Rémi Denis-Courmont @ 2024-03-27 15:41 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le perjantaina 22. maaliskuuta 2024, 8.02.08 EET flow gg a écrit :
> Using macros to shorten function definitions, updated in this response
Did you try to share the common code after getdc and see how slower it is? If
an extra static branch has negligible overhead, it would reduce binary size
quite a bit here, AFAICT.
--
レミ・デニ-クールモン
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-27 15:41 ` Rémi Denis-Courmont
@ 2024-03-28 2:44 ` flow gg
2024-04-03 20:21 ` Rémi Denis-Courmont
0 siblings, 1 reply; 12+ messages in thread
From: flow gg @ 2024-03-28 2:44 UTC (permalink / raw)
To: FFmpeg development discussions and patches
I don't quite understand, I think here 8x8 because zve64x is not suitable
for sharing, it shares between dc16x16 and dc32x32, there isn't much common
code, it would require adding 3 if-else statements and function parameters,
it feels okay not to extract too.
Rémi Denis-Courmont <remi@remlab.net> 于2024年3月27日周三 23:41写道:
> Le perjantaina 22. maaliskuuta 2024, 8.02.08 EET flow gg a écrit :
> > Using macros to shorten function definitions, updated in this response
>
> Did you try to share the common code after getdc and see how slower it is?
> If
> an extra static branch has negligible overhead, it would reduce binary
> size
> quite a bit here, AFAICT.
>
> --
> レミ・デニ-クールモン
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-03-28 2:44 ` flow gg
@ 2024-04-03 20:21 ` Rémi Denis-Courmont
2024-04-07 5:38 ` flow gg
0 siblings, 1 reply; 12+ messages in thread
From: Rémi Denis-Courmont @ 2024-04-03 20:21 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le torstaina 28. maaliskuuta 2024, 4.44.33 EEST flow gg a écrit :
> I don't quite understand, I think here 8x8 because zve64x is not suitable
> for sharing, it shares between dc16x16 and dc32x32, there isn't much common
> code, it would require adding 3 if-else statements and function parameters,
> it feels okay not to extract too.
I agree that we can't realistically share code between the different block
sizes. My point was that the code after getdc is lengthy (after expansion) and
fixed for a given block size, so *that* code could be shared and jumped as
common function tail.
--
Rémi Denis-Courmont
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc
2024-04-03 20:21 ` Rémi Denis-Courmont
@ 2024-04-07 5:38 ` flow gg
0 siblings, 0 replies; 12+ messages in thread
From: flow gg @ 2024-04-07 5:38 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1140 bytes --]
Okay, updated it in the reply and github(
https://github.com/hleft/FFmpeg/tree/vp8vp9)
Rémi Denis-Courmont <remi@remlab.net> 于2024年4月4日周四 04:22写道:
> Le torstaina 28. maaliskuuta 2024, 4.44.33 EEST flow gg a écrit :
> > I don't quite understand, I think here 8x8 because zve64x is not suitable
> > for sharing, it shares between dc16x16 and dc32x32, there isn't much
> common
> > code, it would require adding 3 if-else statements and function
> parameters,
> > it feels okay not to extract too.
>
> I agree that we can't realistically share code between the different block
> sizes. My point was that the code after getdc is lengthy (after expansion)
> and
> fixed for a given block size, so *that* code could be shared and jumped as
> common function tail.
>
> --
> Rémi Denis-Courmont
> http://www.remlab.net/
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
[-- Attachment #2: 0008-lavc-vp9dsp-R-V-V-ipred-dc.patch --]
[-- Type: text/x-patch, Size: 19927 bytes --]
From f4e49d6f26c1ed85907a4ef7596dcc7b77cd9b8c Mon Sep 17 00:00:00 2001
From: sunyuechi <sunyuechi@iscas.ac.cn>
Date: Mon, 26 Feb 2024 14:42:17 +0800
Subject: [PATCH 08/18] lavc/vp9dsp: R-V V ipred dc
C908:
vp9_dc_8x8_8bpp_c: 46.0
vp9_dc_8x8_8bpp_rvv_i64: 41.0
vp9_dc_16x16_8bpp_c: 109.2
vp9_dc_16x16_8bpp_rvv_i32: 72.7
vp9_dc_32x32_8bpp_c: 365.2
vp9_dc_32x32_8bpp_rvv_i32: 165.5
vp9_dc_127_8x8_8bpp_c: 23.0
vp9_dc_127_8x8_8bpp_rvv_i64: 22.0
vp9_dc_127_16x16_8bpp_c: 70.2
vp9_dc_127_16x16_8bpp_rvv_i32: 50.2
vp9_dc_127_32x32_8bpp_c: 295.2
vp9_dc_127_32x32_8bpp_rvv_i32: 136.7
vp9_dc_128_8x8_8bpp_c: 23.0
vp9_dc_128_8x8_8bpp_rvv_i64: 22.0
vp9_dc_128_16x16_8bpp_c: 70.2
vp9_dc_128_16x16_8bpp_rvv_i32: 50.2
vp9_dc_128_32x32_8bpp_c: 295.2
vp9_dc_128_32x32_8bpp_rvv_i32: 136.7
vp9_dc_129_8x8_8bpp_c: 23.0
vp9_dc_129_8x8_8bpp_rvv_i64: 22.0
vp9_dc_129_16x16_8bpp_c: 70.2
vp9_dc_129_16x16_8bpp_rvv_i32: 50.2
vp9_dc_129_32x32_8bpp_c: 295.2
vp9_dc_129_32x32_8bpp_rvv_i32: 136.7
vp9_dc_left_8x8_8bpp_c: 38.0
vp9_dc_left_8x8_8bpp_rvv_i64: 36.0
vp9_dc_left_16x16_8bpp_c: 93.2
vp9_dc_left_16x16_8bpp_rvv_i32: 67.7
vp9_dc_left_32x32_8bpp_c: 333.2
vp9_dc_left_32x32_8bpp_rvv_i32: 158.5
vp9_dc_top_8x8_8bpp_c: 38.7
vp9_dc_top_8x8_8bpp_rvv_i64: 36.0
vp9_dc_top_16x16_8bpp_c: 93.2
vp9_dc_top_16x16_8bpp_rvv_i32: 67.7
vp9_dc_top_32x32_8bpp_c: 333.2
vp9_dc_top_32x32_8bpp_rvv_i32: 156.2
---
libavcodec/riscv/Makefile | 2 +
libavcodec/riscv/vp9_intra_rvv.S | 115 +++++++++++++++++++++
libavcodec/riscv/vp9dsp.h | 171 +++++++++++++++++++++++++++++++
libavcodec/riscv/vp9dsp_init.c | 61 +++++++++++
libavcodec/vp9dsp.c | 2 +
libavcodec/vp9dsp.h | 1 +
6 files changed, 352 insertions(+)
create mode 100644 libavcodec/riscv/vp9_intra_rvv.S
create mode 100644 libavcodec/riscv/vp9dsp.h
create mode 100644 libavcodec/riscv/vp9dsp_init.c
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 6c2ce3001a..69ccd0896d 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -58,5 +58,7 @@ OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
+OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
+RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
new file mode 100644
index 0000000000..db9774c263
--- /dev/null
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+.macro avgdc size
+ vwredsumu.vs v16, v8, v16
+ vsetivli zero, 1, e16, m1, ta, ma
+ vmv.x.s t1, v16
+ addi t1, t1, 1 << (\size - 1)
+ srai t1, t1, \size
+.endm
+
+.macro getdc type size
+.ifc \type,top
+ vmv.v.x v16, zero
+ vle8.v v8, (a3)
+ avgdc \size
+.elseif \type == left
+ vmv.v.x v16, zero
+ vle8.v v8, (a2)
+ avgdc \size
+.elseif \type == dc
+ vmv.v.x v16, zero
+ vle8.v v8, (a2)
+ vwredsumu.vs v16, v8, v16
+ vle8.v v8, (a3)
+ avgdc \size
+.else
+ li t1, \type
+.endif
+.endm
+
+.macro dc_e32 type size n restore
+.ifc \size,32
+ li t0, 32
+ vsetvli zero, t0, e8, m2, ta, ma
+.else
+ vsetivli zero, 16, e8, m1, ta, ma
+.endif
+ getdc \type \n
+
+.if \restore == 1 && \size == 32
+ vsetvli zero, t0, e8, m2, ta, ma
+.elseif \restore == 1 && \size == 16
+ vsetivli zero, 16, e8, m1, ta, ma
+.endif
+ vmv.v.x v0, t1
+
+ .rept \size
+ vse8.v v0, (a0)
+ add a0, a0, a1
+ .endr
+
+ ret
+.endm
+
+.macro dc_e64 type size n restore
+ vsetivli zero, 8, e8, mf2, ta, ma
+ getdc \type \n
+
+ li t0, 64
+ vsetvli zero, t0, e8, m4, ta, ma
+ vmv.v.x v0, t1
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v0, (a0), a1
+
+ ret
+.endm
+
+.macro func_dc name size type n restore ext
+func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext
+.ifc \size,8
+ dc_e64 \type \size \n \restore
+.else
+ dc_e32 \type \size \n \restore
+.endif
+endfunc
+.endm
+
+func_dc dc_127 32 127 0 0 zve32x
+func_dc dc_127 16 127 0 0 zve32x
+func_dc dc_127 8 127 0 0 zve64x
+func_dc dc_128 32 128 0 0 zve32x
+func_dc dc_128 16 128 0 0 zve32x
+func_dc dc_128 8 128 0 0 zve64x
+func_dc dc_129 32 129 0 0 zve32x
+func_dc dc_129 16 129 0 0 zve32x
+func_dc dc_129 8 129 0 0 zve64x
+func_dc dc 32 dc 6 1 zve32x
+func_dc dc 16 dc 5 1 zve32x
+func_dc dc 8 dc 4 0 zve64x
+func_dc dc_left 32 left 5 1 zve32x
+func_dc dc_left 16 left 4 1 zve32x
+func_dc dc_left 8 left 3 0 zve64x
+func_dc dc_top 32 top 5 1 zve32x
+func_dc dc_top 16 top 4 1 zve32x
+func_dc dc_top 8 top 3 0 zve64x
diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
new file mode 100644
index 0000000000..0540c7303d
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
+#define AVCODEC_RISCV_VP9DSP_RISCV_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
+ const uint8_t *a);
+
+#define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx) \
+void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_put_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_put_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, \
+ ptrdiff_t srcstride, \
+ int h, int mx, int my);
+
+#define VP9_BILINEAR_RISCV_RVV_FUNC(SIZE) \
+void ff_put_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_put_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_put_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my);
+
+#define VP9_COPY_AVG_RISCV_RVV_FUNC(SIZE) \
+void ff_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my); \
+ \
+void ff_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(32, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(16, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(8, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_RISCV_RVV_FUNC(4, regular, FILTER_8TAP_REGULAR);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(32, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(16, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(8, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_RISCV_RVV_FUNC(4, sharp, FILTER_8TAP_SHARP);
+
+VP9_8TAP_RISCV_RVV_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_RISCV_RVV_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
+
+VP9_BILINEAR_RISCV_RVV_FUNC(64);
+VP9_BILINEAR_RISCV_RVV_FUNC(32);
+VP9_BILINEAR_RISCV_RVV_FUNC(16);
+VP9_BILINEAR_RISCV_RVV_FUNC(8);
+VP9_BILINEAR_RISCV_RVV_FUNC(4);
+
+VP9_COPY_AVG_RISCV_RVV_FUNC(64);
+VP9_COPY_AVG_RISCV_RVV_FUNC(32);
+VP9_COPY_AVG_RISCV_RVV_FUNC(16);
+VP9_COPY_AVG_RISCV_RVV_FUNC(8);
+VP9_COPY_AVG_RISCV_RVV_FUNC(4);
+
+#define VP9_COPY_RISCV_RVI_FUNC(SIZE) \
+void ff_copy##SIZE##_rvi(uint8_t *dst, ptrdiff_t dststride, \
+ const uint8_t *src, ptrdiff_t srcstride, \
+ int h, int mx, int my);
+
+VP9_COPY_RISCV_RVI_FUNC(8);
+VP9_COPY_RISCV_RVI_FUNC(4);
+
+#undef VP9_8TAP_RISCV_RVV_FUNC
+#undef VP9_BILINEAR_RISCV_RVV_FUNC
+#undef VP9_COPY_AVG_RISCV_RVV_FUNC
+
+#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
new file mode 100644
index 0000000000..69ab39004c
--- /dev/null
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lervvr General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lervvr General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lervvr General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/riscv/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp.h"
+
+static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
+{
+ #if HAVE_RVV
+ int flags = av_get_cpu_flags();
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
+ dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
+ dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
+ dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
+ }
+
+ if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
+ dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
+ dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
+ dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
+ dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
+ dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
+ dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
+ dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
+ }
+ #endif
+}
+
+av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+ vp9dsp_intrapred_init_rvv(dsp, bpp);
+}
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index d8ddf74d4f..967e6e1e1a 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -100,6 +100,8 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
ff_vp9dsp_init_aarch64(dsp, bpp);
#elif ARCH_ARM
ff_vp9dsp_init_arm(dsp, bpp);
+#elif ARCH_RISCV
+ ff_vp9dsp_init_riscv(dsp, bpp, bitexact);
#elif ARCH_X86
ff_vp9dsp_init_x86(dsp, bpp, bitexact);
#elif ARCH_MIPS
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index be0ac0b181..772848e349 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -131,6 +131,7 @@ void ff_vp9dsp_init_12(VP9DSPContext *dsp);
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);
--
2.44.0
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2024-04-07 5:38 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-02 7:42 [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: R-V V ipred dc flow gg
2024-03-02 9:03 ` Rémi Denis-Courmont
2024-03-02 9:48 ` flow gg
2024-03-03 1:59 ` flow gg
2024-03-03 14:46 ` Rémi Denis-Courmont
2024-03-03 15:31 ` flow gg
2024-03-07 11:20 ` flow gg
2024-03-22 6:02 ` flow gg
2024-03-27 15:41 ` Rémi Denis-Courmont
2024-03-28 2:44 ` flow gg
2024-04-03 20:21 ` Rémi Denis-Courmont
2024-04-07 5:38 ` flow gg
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git