Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: CheryDan via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: CheryDan <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] libavcodec/riscv: add RVV optimized sao_edge_filter for HEVC (PR #20593)
Date: Wed, 24 Sep 2025 09:12:54 -0000
Message-ID: <175870517514.25.6988579964894695642@bf249f23a2c8> (raw)

PR #20593 opened by CheryDan
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20593
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20593.patch

This patch adds rvv support for hevc_sao_edge_filter

On Banana PI F3(256-bit vectors):
    hevc_sao_edge_8_8_c:                                   988.9 ( 1.00x)
    hevc_sao_edge_8_8_rvv_i32:                             465.6 ( 2.12x)
    hevc_sao_edge_16_8_c:                                 3906.6 ( 1.00x)
    hevc_sao_edge_16_8_rvv_i32:                            922.4 ( 4.24x)
    hevc_sao_edge_32_8_c:                                15666.9 ( 1.00x)
    hevc_sao_edge_32_8_rvv_i32:                           1819.8 ( 8.61x)
    hevc_sao_edge_48_8_c:                                35322.3 ( 1.00x)
    hevc_sao_edge_48_8_rvv_i32:                           5378.8 ( 6.57x)
    hevc_sao_edge_64_8_c:                                63081.8 ( 1.00x)
    hevc_sao_edge_64_8_rvv_i32:                           7154.4 ( 8.82x)

Signed-off-by: daichengrong <daichengrong@iscas.ac.cn>


>From 42470b5e2918cbc113522e8365fb9809389532a3 Mon Sep 17 00:00:00 2001
From: daichengrong <daichengrong@iscas.ac.cn>
Date: Wed, 24 Sep 2025 16:51:49 +0800
Subject: [PATCH] libavcodec/riscv: add RVV optimized sao_edge_filter for HEVC

This patch adds rvv support for hevc_sao_edge_filter

Signed-off-by: daichengrong <daichengrong@iscas.ac.cn>
---
 libavcodec/riscv/Makefile        |   3 +-
 libavcodec/riscv/h26x/h2656dsp.h |   3 +
 libavcodec/riscv/h26x/sao_rvv.S  | 110 +++++++++++++++++++++++++++++++
 libavcodec/riscv/hevcdsp_init.c  |  14 ++++
 4 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/riscv/h26x/sao_rvv.S

diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 736f873fe8..b5faa49c0f 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -36,7 +36,8 @@ RVV-OBJS-$(CONFIG_H264DSP) += riscv/h264addpx_rvv.o riscv/h264dsp_rvv.o \
 OBJS-$(CONFIG_H264QPEL) += riscv/h264qpel_init.o
 RVV-OBJS-$(CONFIG_H264QPEL) += riscv/h264qpel_rvv.o
 OBJS-$(CONFIG_HEVC_DECODER) += riscv/hevcdsp_init.o
-RVV-OBJS-$(CONFIG_HEVC_DECODER)  += riscv/h26x/h2656_inter_rvv.o
+RVV-OBJS-$(CONFIG_HEVC_DECODER)  += riscv/h26x/h2656_inter_rvv.o \
+                                    riscv/h26x/sao_rvv.o
 OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_init.o
 RVV-OBJS-$(CONFIG_HUFFYUV_DECODER) += riscv/huffyuvdsp_rvv.o
 OBJS-$(CONFIG_IDCTDSP) += riscv/idctdsp_init.o
diff --git a/libavcodec/riscv/h26x/h2656dsp.h b/libavcodec/riscv/h26x/h2656dsp.h
index 6d2ac55556..a3c11efe1c 100644
--- a/libavcodec/riscv/h26x/h2656dsp.h
+++ b/libavcodec/riscv/h26x/h2656dsp.h
@@ -23,5 +23,8 @@
 
 void ff_h2656_put_pixels_8_rvv_256(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width);
 void ff_h2656_put_pixels_8_rvv_128(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width);
+void ff_hevc_sao_edge_filter_8_rvv(uint8_t *_dst, const uint8_t *_src,
+                                        ptrdiff_t stride_dst, const int16_t *sao_offset_val,
+                                        int sao_eo_class, int width, int height);
 
 #endif
diff --git a/libavcodec/riscv/h26x/sao_rvv.S b/libavcodec/riscv/h26x/sao_rvv.S
new file mode 100644
index 0000000000..7a928a8217
--- /dev/null
+++ b/libavcodec/riscv/h26x/sao_rvv.S
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2025 Institute of Software, Chinese Academy of Sciences (ISCAS).
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/riscv/asm.S"
+
+#define HEVC_MAX_PB_SIZE 64
+#define AV_INPUT_BUFFER_PADDING_SIZE 64
+#define HEVC_SAO_STRIDE (2 * HEVC_MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE)
+
+const .Lhevc_sao_edge_pos, align=4
+.4byte 1 // horizontal
+.4byte HEVC_SAO_STRIDE // vertical
+.4byte HEVC_SAO_STRIDE + 1 // 45 degree
+.4byte HEVC_SAO_STRIDE - 1 // 135 degree
+endconst
+
+const .Lhevc_edge_idx, align=0
+        .2byte  1,2,0,3,4
+endconst
+
+func ff_hevc_sao_edge_filter_8_rvv, zve32x
+        lla             a7, .Lhevc_sao_edge_pos 
+        li              t5, HEVC_SAO_STRIDE
+
+        slli            a4, a4, 2
+        add             a4, a7, a4
+        lw              a4, (a4)                // stride_src
+
+        lla             t0, .Lhevc_edge_idx
+        vsetivli        zero, 5, e16, m1, ta, ma
+        vle16.v         v2, (a3)                // load sao_offset_val
+        vle16.v         v4, (t0)
+        vrgather.vv     v6, v2, v4              // reorder to [1,2,0,3,4]
+
+        sub             t5, t5, a5              // stride_src - width
+        sub             t6, a2, a5              // stride_dst - width
+1:
+        mv              t4, a5
+        sub             t2, a1, a4              // src_a (prev) = src - sao_edge_pos
+        add             t3, a1, a4              // src_b (next) = src + sao_edge_pos
+2:
+        vsetvli         t1, t4, e8, m1, ta, ma
+        vle8.v          v3, (a1)                // load src
+        vle8.v          v4, (t2)                // load src_a (prev)
+        vle8.v          v5, (t3)                // load src_b (next)
+        add             a1, a1, t1
+        add             t2, t2, t1
+        add             t3, t3, t1
+
+        vsetvli         zero, zero, e8, m1, ta, mu
+        vmv.v.i         v1, 0
+        vmsgtu.vv       v0, v3, v4              // (cur > prev)
+        vmerge.vim      v17, v1, 1, v0
+        vmsgtu.vv       v0, v4, v3              // (prev > cur)
+        vmerge.vim      v20, v17, -1, v0        // diff0 = CMP(cur, prev) = (cur > prev) - (cur < prev)
+
+        vmsgtu.vv       v0, v3, v5             // (cur > next)
+        vmerge.vim      v19, v1, 1, v0          
+        vmsgtu.vv        v0, v5, v3             // (next > cur)
+        vmerge.vim      v21, v19, -1, v0        // diff1 = CMP(cur, next) = (cur > next) - (cur < next)
+
+        vwadd.vv        v22, v20, v21           // diff = diff0 + diff1
+
+        vsetvli         zero, zero, e16, m2, ta, ma
+        vadd.vi         v20, v22, 2             // offset_val = diff + 2
+        vrgather.vv     v16, v6, v20            // sao_offset_val
+
+        vzext.vf2       v18, v3
+        vsadd.vv        v20, v16, v18           // + sao_offset_val
+
+        vsetvli         zero, zero, e16, m2, ta, mu
+        li              t0, 255
+        vmsgt.vx        v0, v20, t0
+        vmerge.vxm      v16, v20, t0, v0        // > 255
+        vmslt.vi        v0, v20, 0
+        vmerge.vim      v16, v16, 0, v0         // < 0
+
+        vsetvli         zero, zero, e8, m1, ta, ma
+        vnsrl.wi        v20, v16, 0
+
+        vse8.v          v20, (a0)
+        add             a0, a0, t1
+        sub             t4, t4, t1
+        bnez            t4, 2b
+
+        add             a1, a1, t5
+        add             a0, a0, t6
+        // no width to filter, setup next line
+        addi            a6, a6, -1
+        bnez            a6, 1b
+
+        ret
+endfunc
\ No newline at end of file
diff --git a/libavcodec/riscv/hevcdsp_init.c b/libavcodec/riscv/hevcdsp_init.c
index 70bc8ebea7..802fcb02f0 100644
--- a/libavcodec/riscv/hevcdsp_init.c
+++ b/libavcodec/riscv/hevcdsp_init.c
@@ -40,6 +40,20 @@ void ff_hevc_dsp_init_riscv(HEVCDSPContext *c, const int bit_depth)
     const int flags = av_get_cpu_flags();
     int vlenb;
 
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        switch (bit_depth) {
+            case 8:
+                c->sao_edge_filter[0]          =
+                c->sao_edge_filter[1]          =
+                c->sao_edge_filter[2]          =
+                c->sao_edge_filter[3]          =
+                c->sao_edge_filter[4]          = ff_hevc_sao_edge_filter_8_rvv;
+                break;
+            default:
+                break;
+        }
+    }
+
     if (!(flags & AV_CPU_FLAG_RVV_I32) || !(flags & AV_CPU_FLAG_RVB))
         return;
 
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2025-09-24  9:13 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=175870517514.25.6988579964894695642@bf249f23a2c8 \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ http://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git