* [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8
@ 2024-07-13 13:01 Rémi Denis-Courmont
2024-07-13 13:01 ` [FFmpeg-devel] [PATCH 2/2] lavc/h264dsp: R-V V high-depth add_pixels8 Rémi Denis-Courmont
2024-07-13 17:49 ` [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont
0 siblings, 2 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-13 13:01 UTC (permalink / raw)
To: ffmpeg-devel
---
libavcodec/riscv/Makefile | 3 +-
libavcodec/riscv/h264addpx_rvv.S | 89 ++++++++++++++++++++++++++++++++
libavcodec/riscv/h264dsp_init.c | 11 ++++
3 files changed, 102 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/riscv/h264addpx_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 92e1544e76..0bbdd38116 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -31,7 +31,8 @@ RVV-OBJS-$(CONFIG_H263DSP) += riscv/h263dsp_rvv.o
OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
-RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o riscv/h264idct_rvv.o
+RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o riscv/h264dsp_rvv.o \
+ riscv/h264idct_rvv.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S
new file mode 100644
index 0000000000..fd36bd4896
--- /dev/null
+++ b/libavcodec/riscv/h264addpx_rvv.S
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libavutil/riscv/asm.S"
+
+ .macro sx rd, addr
+#if (__riscv_xlen == 32)
+ sw \rd, \addr
+#elif (__riscv_xlen == 64)
+ sd \rd, \addr
+#else
+ sq \rd, \addr
+#endif
+ .endm
+
+func ff_h264_add_pixels4_8_rvv, zve32x
+ vsetivli zero, 4, e8, mf4, ta, ma
+ vlse32.v v8, (a0), a2
+ vsetivli zero, 4 * 4, e8, m1, ta, ma
+ vle16.v v16, (a1)
+ .equ offset, 0
+ .rept 256 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 4, e8, mf4, ta, ma
+ vsse32.v v8, (a0), a2
+ ret
+endfunc
+
+func ff_h264_add_pixels4_16_rvv, zve64x
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vlse64.v v8, (a0), a2
+ vsetivli zero, 4 * 4, e16, m2, ta, ma
+ vle32.v v16, (a1)
+ .equ offset, 0
+ .rept 512 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vsse64.v v8, (a0), a2
+ ret
+endfunc
+
+func ff_h264_add_pixels8_8_rvv, zve64x
+ li t0, 8 * 8
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vlse64.v v8, (a0), a2
+ vsetvli zero, t0, e8, m4, ta, ma
+ vle16.v v16, (a1)
+ .equ offset, 0
+ .rept 1024 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vncvt.x.x.w v24, v16
+ vadd.vv v8, v8, v24
+ vsetivli zero, 8, e8, mf2, ta, ma
+ vsse64.v v8, (a0), a2
+ ret
+endfunc
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 825f34443b..a6d06b3ac4 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -61,6 +61,10 @@ void ff_h264_idct8_add_12_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride);
+
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@@ -96,6 +100,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
# if __riscv_xlen == 64
dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
# endif
+ if (flags & AV_CPU_FLAG_RVV_I64)
+ dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
+ dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
}
if (bit_depth == 9) {
@@ -118,6 +125,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
dsp->h264_idct_add = ff_h264_idct_add_14_rvv;
dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv;
}
+ if (bit_depth > 8 && zvl128b) {
+ if (flags & AV_CPU_FLAG_RVV_I64)
+ dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
+ }
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
}
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* [FFmpeg-devel] [PATCH 2/2] lavc/h264dsp: R-V V high-depth add_pixels8
2024-07-13 13:01 [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont
@ 2024-07-13 13:01 ` Rémi Denis-Courmont
2024-07-13 17:50 ` Rémi Denis-Courmont
2024-07-13 17:49 ` [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont
1 sibling, 1 reply; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-13 13:01 UTC (permalink / raw)
To: ffmpeg-devel
---
libavcodec/riscv/h264addpx_rvv.S | 22 ++++++++++++++++++++++
libavcodec/riscv/h264dsp_init.c | 2 ++
2 files changed, 24 insertions(+)
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S
index fd36bd4896..3d307b2cb8 100644
--- a/libavcodec/riscv/h264addpx_rvv.S
+++ b/libavcodec/riscv/h264addpx_rvv.S
@@ -87,3 +87,25 @@ func ff_h264_add_pixels8_8_rvv, zve64x
vsse64.v v8, (a0), a2
ret
endfunc
+
+func ff_h264_add_pixels8_16_rvv, zve32x
+ li t0, 8
+ vsetivli zero, 8, e16, m1, ta, ma
+1:
+ vle32.v v16, (a1)
+ addi t0, t0, -1
+ vle16.v v8, (a0)
+ addi a1, a1, 8 * 4
+ vncvt.x.x.w v24, v16
+ .equ offset, 0
+ .rept 256 / __riscv_xlen
+ sx zero, offset(a1)
+ .equ offset, offset + (__riscv_xlen / 8)
+ .endr
+ vadd.vv v8, v8, v24
+ vse16.v v8, (a0)
+ add a0, a0, a2
+ bnez t0, 1b
+
+ ret
+endfunc
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index a6d06b3ac4..ea89314952 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -63,6 +63,7 @@ void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels8_16_rvv(uint8_t *dst, int16_t *block, int stride);
void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride);
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
@@ -126,6 +127,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv;
}
if (bit_depth > 8 && zvl128b) {
+ dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_16_rvv;
if (flags & AV_CPU_FLAG_RVV_I64)
dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
}
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8
2024-07-13 13:01 [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont
2024-07-13 13:01 ` [FFmpeg-devel] [PATCH 2/2] lavc/h264dsp: R-V V high-depth add_pixels8 Rémi Denis-Courmont
@ 2024-07-13 17:49 ` Rémi Denis-Courmont
1 sibling, 0 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-13 17:49 UTC (permalink / raw)
To: ffmpeg-devel
T-Head C908 (cycles):
h264_add_pixels4_8bpp_c: 93.5
h264_add_pixels4_8bpp_rvv_i32: 39.5
h264_add_pixels4_9bpp_c: 87.5
h264_add_pixels4_9bpp_rvv_i64: 50.5
h264_add_pixels4_10bpp_c: 87.5
h264_add_pixels4_10bpp_rvv_i64: 50.5
h264_add_pixels4_12bpp_c: 87.5
h264_add_pixels4_12bpp_rvv_i64: 50.5
h264_add_pixels4_14bpp_c: 87.5
h264_add_pixels4_14bpp_rvv_i64: 50.5
h264_add_pixels8_8bpp_c: 265.2
h264_add_pixels8_8bpp_rvv_i64: 84.5
--
雷米‧德尼-库尔蒙
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-07-13 17:50 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-13 13:01 [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont
2024-07-13 13:01 ` [FFmpeg-devel] [PATCH 2/2] lavc/h264dsp: R-V V high-depth add_pixels8 Rémi Denis-Courmont
2024-07-13 17:50 ` Rémi Denis-Courmont
2024-07-13 17:49 ` [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git