Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8
@ 2024-07-13 13:01 Rémi Denis-Courmont
  2024-07-13 13:01 ` [FFmpeg-devel] [PATCH 2/2] lavc/h264dsp: R-V V high-depth add_pixels8 Rémi Denis-Courmont
  2024-07-13 17:49 ` [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont
  0 siblings, 2 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-13 13:01 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavcodec/riscv/Makefile        |  3 +-
 libavcodec/riscv/h264addpx_rvv.S | 89 ++++++++++++++++++++++++++++++++
 libavcodec/riscv/h264dsp_init.c  | 11 ++++
 3 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/h264addpx_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 92e1544e76..0bbdd38116 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -31,7 +31,8 @@ RVV-OBJS-$(CONFIG_H263DSP) += riscv/h263dsp_rvv.o
 OBJS-$(CONFIG_H264CHROMA) += riscv/h264_chroma_init_riscv.o
 RVV-OBJS-$(CONFIG_H264CHROMA) += riscv/h264_mc_chroma.o
 OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_init.o
-RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264dsp_rvv.o riscv/h264idct_rvv.o
+RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o riscv/h264dsp_rvv.o \
+                              riscv/h264idct_rvv.o
 OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
 RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
 OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S
new file mode 100644
index 0000000000..fd36bd4896
--- /dev/null
+++ b/libavcodec/riscv/h264addpx_rvv.S
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2024 Rémi Denis-Courmont.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "libavutil/riscv/asm.S"
+
+        .macro  sx rd, addr
+#if (__riscv_xlen == 32)
+        sw      \rd, \addr
+#elif (__riscv_xlen == 64)
+        sd      \rd, \addr
+#else
+        sq      \rd, \addr
+#endif
+        .endm
+
+func ff_h264_add_pixels4_8_rvv, zve32x
+        vsetivli        zero, 4, e8, mf4, ta, ma
+        vlse32.v        v8, (a0), a2
+        vsetivli        zero, 4 * 4, e8, m1, ta, ma
+        vle16.v         v16, (a1)
+        .equ    offset, 0
+        .rept   256 / __riscv_xlen
+        sx      zero, offset(a1)
+        .equ    offset, offset + (__riscv_xlen / 8)
+        .endr
+        vncvt.x.x.w     v24, v16
+        vadd.vv         v8, v8, v24
+        vsetivli        zero, 4, e8, mf4, ta, ma
+        vsse32.v        v8, (a0), a2
+        ret
+endfunc
+
+func ff_h264_add_pixels4_16_rvv, zve64x
+        vsetivli        zero, 4, e16, mf2, ta, ma
+        vlse64.v        v8, (a0), a2
+        vsetivli        zero, 4 * 4, e16, m2, ta, ma
+        vle32.v         v16, (a1)
+        .equ    offset, 0
+        .rept   512 / __riscv_xlen
+        sx      zero, offset(a1)
+        .equ    offset, offset + (__riscv_xlen / 8)
+        .endr
+        vncvt.x.x.w     v24, v16
+        vadd.vv         v8, v8, v24
+        vsetivli        zero, 4, e16, mf2, ta, ma
+        vsse64.v        v8, (a0), a2
+        ret
+endfunc
+
+func ff_h264_add_pixels8_8_rvv, zve64x
+        li      t0, 8 * 8
+        vsetivli        zero, 8, e8, mf2, ta, ma
+        vlse64.v        v8, (a0), a2
+        vsetvli         zero, t0, e8, m4, ta, ma
+        vle16.v         v16, (a1)
+        .equ    offset, 0
+        .rept   1024 / __riscv_xlen
+        sx      zero, offset(a1)
+        .equ    offset, offset + (__riscv_xlen / 8)
+        .endr
+        vncvt.x.x.w     v24, v16
+        vadd.vv         v8, v8, v24
+        vsetivli        zero, 8, e8, mf2, ta, ma
+        vsse64.v        v8, (a0), a2
+        ret
+endfunc
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 825f34443b..a6d06b3ac4 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -61,6 +61,10 @@ void ff_h264_idct8_add_12_rvv(uint8_t *dst, int16_t *block, int stride);
 void ff_h264_idct_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
 void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride);
 
+void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride);
+
 extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
 extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
 
@@ -96,6 +100,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
 #  if __riscv_xlen == 64
             dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
 #  endif
+            if (flags & AV_CPU_FLAG_RVV_I64)
+                dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv;
+            dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_8_rvv;
         }
 
         if (bit_depth == 9) {
@@ -118,6 +125,10 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
                 dsp->h264_idct_add = ff_h264_idct_add_14_rvv;
             dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv;
         }
+        if (bit_depth > 8 && zvl128b) {
+            if (flags & AV_CPU_FLAG_RVV_I64)
+                dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv;
+        }
 
         dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
     }
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-07-13 17:50 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-13 13:01 [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont
2024-07-13 13:01 ` [FFmpeg-devel] [PATCH 2/2] lavc/h264dsp: R-V V high-depth add_pixels8 Rémi Denis-Courmont
2024-07-13 17:50   ` Rémi Denis-Courmont
2024-07-13 17:49 ` [FFmpeg-devel] [PATCH 1/2] lavc/h264dsp: R-V V add_pixels4 and 8-bit add_pixels8 Rémi Denis-Courmont

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git