* [FFmpeg-devel] [PATCH] libavcodec/riscv: add RVV optimized sao_edge_filter for HEVC (PR #20593)
@ 2025-09-24 9:12 CheryDan via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: CheryDan via ffmpeg-devel @ 2025-09-24 9:12 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: CheryDan
PR #20593 opened by CheryDan
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20593
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20593.patch
This patch adds rvv support for hevc_sao_edge_filter
On Banana PI F3(256-bit vectors):
hevc_sao_edge_8_8_c: 988.9 ( 1.00x)
hevc_sao_edge_8_8_rvv_i32: 465.6 ( 2.12x)
hevc_sao_edge_16_8_c: 3906.6 ( 1.00x)
hevc_sao_edge_16_8_rvv_i32: 922.4 ( 4.24x)
hevc_sao_edge_32_8_c: 15666.9 ( 1.00x)
hevc_sao_edge_32_8_rvv_i32: 1819.8 ( 8.61x)
hevc_sao_edge_48_8_c: 35322.3 ( 1.00x)
hevc_sao_edge_48_8_rvv_i32: 5378.8 ( 6.57x)
hevc_sao_edge_64_8_c: 63081.8 ( 1.00x)
hevc_sao_edge_64_8_rvv_i32: 7154.4 ( 8.82x)
Signed-off-by: daichengrong <daichengrong@iscas.ac.cn>
>From 42470b5e2918cbc113522e8365fb9809389532a3 Mon Sep 17 00:00:00 2001
From: daichengrong <daichengrong@iscas.ac.cn>
Date: Wed, 24 Sep 2025 16:51:49 +0800
Subject: [PATCH] libavcodec/riscv: add RVV optimized sao_edge_filter for HEVC
This patch adds rvv support for hevc_sao_edge_filter
Signed-off-by: daichengrong <daichengrong@iscas.ac.cn>
---
libavcodec/riscv/Makefile | 3 +-
libavcodec/riscv/h26x/h2656dsp.h | 3 +
libavcodec/riscv/h26x/sao_rvv.S | 110 +++++++++++++++++++++++++++++++
libavcodec/riscv/hevcdsp_init.c | 14 ++++
4 files changed, 129 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/riscv/h26x/sao_rvv.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 736f873fe8..b5faa49c0f 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -36,7 +36,8 @@ RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o riscv/h264dsp_rvv.o \
OBJS-$(CONFIG_H264QPEL) += riscv/h264qpel_init.o
RVV-OBJS-$(CONFIG_H264QPEL) += riscv/h264qpel_rvv.o
OBJS-$(CONFIG_HEVC_DECODER) += riscv/hevcdsp_init.o
-RVV-OBJS-$(CONFIG_HEVC_DECODER) += riscv/h26x/h2656_inter_rvv.o
+RVV-OBJS-$(CONFIG_HEVC_DECODER) += riscv/h26x/h2656_inter_rvv.o \
+ riscv/h26x/sao_rvv.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
diff --git a/libavcodec/riscv/h26x/h2656dsp.h b/libavcodec/riscv/h26x/h2656dsp.h
index 6d2ac55556..a3c11efe1c 100644
--- a/libavcodec/riscv/h26x/h2656dsp.h
+++ b/libavcodec/riscv/h26x/h2656dsp.h
@@ -23,5 +23,8 @@
void ff_h2656_put_pixels_8_rvv_256(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width);
void ff_h2656_put_pixels_8_rvv_128(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width);
+void ff_hevc_sao_edge_filter_8_rvv(uint8_t *_dst, const uint8_t *_src,
+ ptrdiff_t stride_dst, const int16_t *sao_offset_val,
+ int sao_eo_class, int width, int height);
#endif
diff --git a/libavcodec/riscv/h26x/sao_rvv.S b/libavcodec/riscv/h26x/sao_rvv.S
new file mode 100644
index 0000000000..7a928a8217
--- /dev/null
+++ b/libavcodec/riscv/h26x/sao_rvv.S
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2025 Institute of Software, Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+#define HEVC_MAX_PB_SIZE 64
+#define AV_INPUT_BUFFER_PADDING_SIZE 64
+#define HEVC_SAO_STRIDE (2 * HEVC_MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE)
+
+const .Lhevc_sao_edge_pos, align=4
+.4byte 1 // horizontal
+.4byte HEVC_SAO_STRIDE // vertical
+.4byte HEVC_SAO_STRIDE + 1 // 45 degree
+.4byte HEVC_SAO_STRIDE - 1 // 135 degree
+endconst
+
+const .Lhevc_edge_idx, align=0
+ .2byte 1,2,0,3,4
+endconst
+
+func ff_hevc_sao_edge_filter_8_rvv, zve32x
+ lla a7, .Lhevc_sao_edge_pos
+ li t5, HEVC_SAO_STRIDE
+
+ slli a4, a4, 2
+ add a4, a7, a4
+ lw a4, (a4) // stride_src
+
+ lla t0, .Lhevc_edge_idx
+ vsetivli zero, 5, e16, m1, ta, ma
+ vle16.v v2, (a3) // load sao_offset_val
+ vle16.v v4, (t0)
+ vrgather.vv v6, v2, v4 // reorder to [1,2,0,3,4]
+
+ sub t5, t5, a5 // stride_src - width
+ sub t6, a2, a5 // stride_dst - width
+1:
+ mv t4, a5
+ sub t2, a1, a4 // src_a (prev) = src - sao_edge_pos
+ add t3, a1, a4 // src_b (next) = src + sao_edge_pos
+2:
+ vsetvli t1, t4, e8, m1, ta, ma
+ vle8.v v3, (a1) // load src
+ vle8.v v4, (t2) // load src_a (prev)
+ vle8.v v5, (t3) // load src_b (next)
+ add a1, a1, t1
+ add t2, t2, t1
+ add t3, t3, t1
+
+ vsetvli zero, zero, e8, m1, ta, mu
+ vmv.v.i v1, 0
+ vmsgtu.vv v0, v3, v4 // (cur > prev)
+ vmerge.vim v17, v1, 1, v0
+ vmsgtu.vv v0, v4, v3 // (prev > cur)
+ vmerge.vim v20, v17, -1, v0 // diff0 = CMP(cur, prev) = (cur > prev) - (cur < prev)
+
+ vmsgtu.vv v0, v3, v5 // (cur > next)
+ vmerge.vim v19, v1, 1, v0
+ vmsgtu.vv v0, v5, v3 // (next > cur)
+ vmerge.vim v21, v19, -1, v0 // diff1 = CMP(cur, next) = (cur > next) - (cur < next)
+
+ vwadd.vv v22, v20, v21 // diff = diff0 + diff1
+
+ vsetvli zero, zero, e16, m2, ta, ma
+ vadd.vi v20, v22, 2 // offset_val = diff + 2
+ vrgather.vv v16, v6, v20 // sao_offset_val
+
+ vzext.vf2 v18, v3
+ vsadd.vv v20, v16, v18 // + sao_offset_val
+
+ vsetvli zero, zero, e16, m2, ta, mu
+ li t0, 255
+ vmsgt.vx v0, v20, t0
+ vmerge.vxm v16, v20, t0, v0 // > 255
+ vmslt.vi v0, v20, 0
+ vmerge.vim v16, v16, 0, v0 // < 0
+
+ vsetvli zero, zero, e8, m1, ta, ma
+ vnsrl.wi v20, v16, 0
+
+ vse8.v v20, (a0)
+ add a0, a0, t1
+ sub t4, t4, t1
+ bnez t4, 2b
+
+ add a1, a1, t5
+ add a0, a0, t6
+ // no width to filter, setup next line
+ addi a6, a6, -1
+ bnez a6, 1b
+
+ ret
+endfunc
\ No newline at end of file
diff --git a/libavcodec/riscv/hevcdsp_init.c b/libavcodec/riscv/hevcdsp_init.c
index 70bc8ebea7..802fcb02f0 100644
--- a/libavcodec/riscv/hevcdsp_init.c
+++ b/libavcodec/riscv/hevcdsp_init.c
@@ -40,6 +40,20 @@ void ff_hevc_dsp_init_riscv(HEVCDSPContext *c, const int bit_depth)
const int flags = av_get_cpu_flags();
int vlenb;
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ switch (bit_depth) {
+ case 8:
+ c->sao_edge_filter[0] =
+ c->sao_edge_filter[1] =
+ c->sao_edge_filter[2] =
+ c->sao_edge_filter[3] =
+ c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_8_rvv;
+ break;
+ default:
+ break;
+ }
+ }
+
if (!(flags & AV_CPU_FLAG_RVV_I32) || !(flags & AV_CPU_FLAG_RVB))
return;
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-09-24 9:13 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-24 9:12 [FFmpeg-devel] [PATCH] libavcodec/riscv: add RVV optimized sao_edge_filter for HEVC (PR #20593) CheryDan via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror http://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ http://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git