From: Zhao Zhili <quinkblack-at-foxmail.com@ffmpeg.org> To: ffmpeg-devel@ffmpeg.org Cc: Zhao Zhili <zhilizhao@tencent.com> Subject: [FFmpeg-devel] [PATCH 1/2] wasm/hevc: Add sao_band_filter Date: Sat, 7 Jun 2025 18:18:11 +0800 Message-ID: <tencent_44DE2A891328DC528B8E2621508DA6F0A706@qq.com> (raw) From: Zhao Zhili <zhilizhao@tencent.com> hevc_sao_band_8_8_c: 63.0 ( 1.00x) hevc_sao_band_8_8_simd128: 10.4 ( 6.06x) hevc_sao_band_16_8_c: 230.4 ( 1.00x) hevc_sao_band_16_8_simd128: 22.9 (10.07x) hevc_sao_band_32_8_c: 900.4 ( 1.00x) hevc_sao_band_32_8_simd128: 81.5 (11.05x) hevc_sao_band_48_8_c: 2009.1 ( 1.00x) hevc_sao_band_48_8_simd128: 170.2 (11.80x) hevc_sao_band_64_8_c: 3535.0 ( 1.00x) hevc_sao_band_64_8_simd128: 297.5 (11.88x) Signed-off-by: Zhao Zhili <zhilizhao@tencent.com> --- libavcodec/wasm/hevc/Makefile | 3 +- libavcodec/wasm/hevc/dsp_init.c | 7 ++ libavcodec/wasm/hevc/sao.c | 113 ++++++++++++++++++++++++++++++++ libavcodec/wasm/hevc/sao.h | 41 ++++++++++++ 4 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 libavcodec/wasm/hevc/sao.c create mode 100644 libavcodec/wasm/hevc/sao.h diff --git a/libavcodec/wasm/hevc/Makefile b/libavcodec/wasm/hevc/Makefile index 132daa3106..7e8ab3776e 100644 --- a/libavcodec/wasm/hevc/Makefile +++ b/libavcodec/wasm/hevc/Makefile @@ -1,3 +1,4 @@ OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/dsp_init.o -SIMD128-OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/idct.o +SIMD128-OBJS-$(CONFIG_HEVC_DECODER) += wasm/hevc/idct.o \ + wasm/hevc/sao.o diff --git a/libavcodec/wasm/hevc/dsp_init.c b/libavcodec/wasm/hevc/dsp_init.c index e5c8a2ebb6..76a1031ff4 100644 --- a/libavcodec/wasm/hevc/dsp_init.c +++ b/libavcodec/wasm/hevc/dsp_init.c @@ -21,6 +21,7 @@ #include "libavutil/cpu_internal.h" #include "libavcodec/hevc/dsp.h" #include "libavcodec/wasm/hevc/idct.h" +#include "libavcodec/wasm/hevc/sao.h" av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth) { @@ -35,6 +36,12 @@ av_cold void ff_hevc_dsp_init_wasm(HEVCDSPContext *c, const int bit_depth) c->idct[1] = ff_hevc_idct_8x8_8_simd128; c->idct[2] = ff_hevc_idct_16x16_8_simd128; c->idct[3] = ff_hevc_idct_32x32_8_simd128; + + c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_simd128; + c->sao_band_filter[1] = + c->sao_band_filter[2] = + c->sao_band_filter[3] = + c->sao_band_filter[4] = ff_hevc_sao_band_filter_16x16_8_simd128; } else if (bit_depth == 10) { c->idct[0] = ff_hevc_idct_4x4_10_simd128; c->idct[1] = ff_hevc_idct_8x8_10_simd128; diff --git a/libavcodec/wasm/hevc/sao.c b/libavcodec/wasm/hevc/sao.c new file mode 100644 index 0000000000..82134af7f3 --- /dev/null +++ b/libavcodec/wasm/hevc/sao.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2025 Zhao Zhili + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "sao.h" + +#include <wasm_simd128.h> + +void ff_hevc_sao_band_filter_8x8_8_simd128(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride_dst, + ptrdiff_t stride_src, + const int16_t *sao_offset_val, + int sao_left_class, int width, + int height) +{ + int8_t offset_table[32] = {0}; + v128_t offset_low, offset_high; + + for (int k = 0; k < 4; k++) + offset_table[(k + sao_left_class) & 31] = (int8_t)sao_offset_val[k + 1]; + + offset_low = wasm_v128_load(offset_table); + offset_high = wasm_v128_load(&offset_table[16]); + + for (int y = height; y > 0; y -= 2) { + v128_t src_v, src_high; + v128_t v0, v1; + + src_v = wasm_v128_load64_zero(src); + src += stride_src; + src_v = wasm_v128_load64_lane(src, src_v, 1); + src += stride_src; + + v0 = wasm_u8x16_shr(src_v, 3); + v1 = wasm_i8x16_sub(v0, wasm_i8x16_const_splat(16)); + v0 = wasm_i8x16_swizzle(offset_low, v0); + v1 = wasm_i8x16_swizzle(offset_high, v1); + v0 = wasm_v128_or(v0, v1); + src_high = wasm_u16x8_extend_high_u8x16(src_v); + v1 = wasm_i16x8_extend_high_i8x16(v0); + src_v = wasm_u16x8_extend_low_u8x16(src_v); + v0 = wasm_i16x8_extend_low_i8x16(v0); + + v0 = wasm_i16x8_add_sat(src_v, v0); + v1 = wasm_i16x8_add_sat(src_high, v1); + v0 = wasm_u8x16_narrow_i16x8(v0, v1); + + wasm_v128_store64_lane(dst, v0, 0); + dst += stride_dst; + wasm_v128_store64_lane(dst, v0, 1); + dst += stride_dst; + } +} + +void ff_hevc_sao_band_filter_16x16_8_simd128(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride_dst, + ptrdiff_t stride_src, + const int16_t *sao_offset_val, + int sao_left_class, int width, + int height) +{ + int8_t offset_table[32] = {0}; + v128_t offset_low, offset_high; + + for (int k = 0; k < 4; k++) + offset_table[(k + sao_left_class) & 31] = (int8_t)sao_offset_val[k + 1]; + + offset_low = wasm_v128_load(offset_table); + offset_high = wasm_v128_load(&offset_table[16]); + + for (int y = height; y > 0; y--) { + for (int x = 0; x < width; x += 16) { + v128_t src_v, src_high; + v128_t v0, v1; + + src_v = wasm_v128_load(&src[x]); + + v0 = wasm_u8x16_shr(src_v, 3); + v1 = wasm_i8x16_sub(v0, wasm_i8x16_const_splat(16)); + v0 = wasm_i8x16_swizzle(offset_low, v0); + v1 = wasm_i8x16_swizzle(offset_high, v1); + v0 = wasm_v128_or(v0, v1); + src_high = wasm_u16x8_extend_high_u8x16(src_v); + v1 = wasm_i16x8_extend_high_i8x16(v0); + src_v = wasm_u16x8_extend_low_u8x16(src_v); + v0 = wasm_i16x8_extend_low_i8x16(v0); + + v0 = wasm_i16x8_add_sat(src_v, v0); + v1 = wasm_i16x8_add_sat(src_high, v1); + v0 = wasm_u8x16_narrow_i16x8(v0, v1); + wasm_v128_store(&dst[x], v0); + } + + dst += stride_dst; + src += stride_src; + } +} diff --git a/libavcodec/wasm/hevc/sao.h b/libavcodec/wasm/hevc/sao.h new file mode 100644 index 0000000000..6119ec90f1 --- /dev/null +++ b/libavcodec/wasm/hevc/sao.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2025 Zhao Zhili + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_WASM_HEVC_SAO_H +#define AVCODEC_WASM_HEVC_SAO_H + +#include <stddef.h> +#include <stdint.h> + +void ff_hevc_sao_band_filter_8x8_8_simd128(uint8_t *_dst, const uint8_t *_src, + ptrdiff_t _stride_dst, + ptrdiff_t _stride_src, + const int16_t *sao_offset_val, + int sao_left_class, int width, + int height); + +void ff_hevc_sao_band_filter_16x16_8_simd128(uint8_t *_dst, const uint8_t *_src, + ptrdiff_t _stride_dst, + ptrdiff_t _stride_src, + const int16_t *sao_offset_val, + int sao_left_class, int width, + int height); + +#endif \ No newline at end of file -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
reply other threads:[~2025-06-07 10:18 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=tencent_44DE2A891328DC528B8E2621508DA6F0A706@qq.com \ --to=quinkblack-at-foxmail.com@ffmpeg.org \ --cc=ffmpeg-devel@ffmpeg.org \ --cc=zhilizhao@tencent.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git