From: pengxu <pengxu@loongson.cn> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH v2 1/2] avutil/loongarch: add LSX optimization for aac audio decode Date: Thu, 18 Apr 2024 15:36:08 +0800 Message-ID: <20240418073609.19365-2-pengxu@loongson.cn> (raw) In-Reply-To: <20240418073609.19365-1-pengxu@loongson.cn> Add functions: vector_fmul_window_lsx butterflies_float_lsx vector_fmul_scalar_lsx ./ffmpeg -i ../../1.aac -f null - before:482x after:523x --- libavutil/float_dsp.c | 2 + libavutil/float_dsp.h | 1 + libavutil/loongarch/Makefile | 5 +- libavutil/loongarch/float_dsp.S | 287 ++++++++++++++++++ libavutil/loongarch/float_dsp.h | 32 ++ .../loongarch/float_dsp_init_loongarch.c | 35 +++ 6 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 libavutil/loongarch/float_dsp.S create mode 100644 libavutil/loongarch/float_dsp.h create mode 100644 libavutil/loongarch/float_dsp_init_loongarch.c diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index e9fb023466..7128ff3f96 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -162,6 +162,8 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact) ff_float_dsp_init_x86(fdsp); #elif ARCH_MIPS ff_float_dsp_init_mips(fdsp); +#elif ARCH_LOONGARCH64 + ff_float_dsp_init_loongarch(fdsp); #endif return fdsp; } diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 342a8715c5..679a930eab 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -206,6 +206,7 @@ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict); void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp); void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp); void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp); +void ff_float_dsp_init_loongarch(AVFloatDSPContext *fdsp); /** * Allocate a float DSP context. diff --git a/libavutil/loongarch/Makefile b/libavutil/loongarch/Makefile index 2addd9351c..ae710f0515 100644 --- a/libavutil/loongarch/Makefile +++ b/libavutil/loongarch/Makefile @@ -1 +1,4 @@ -OBJS += loongarch/cpu.o +OBJS += loongarch/cpu.o \ + loongarch/float_dsp_init_loongarch.o + +LSX-OBJS += loongarch/float_dsp.o diff --git a/libavutil/loongarch/float_dsp.S b/libavutil/loongarch/float_dsp.S new file mode 100644 index 0000000000..5073c8424f --- /dev/null +++ b/libavutil/loongarch/float_dsp.S @@ -0,0 +1,287 @@ +/* + * Loongarch LASX/LSX optimizeds dsp + * + * Copyright (c) 2024 Loongson Technology Corporation Limited + * Contributed by PengXu <pengxu@loongson.cn> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/loongarch/loongson_asm.S" + + +/* void vector_fmul_window(float *dst, const float *src0, + const float *src1, const float *win, int len) */ +function vector_fmul_window_lsx + addi.d sp, sp, -8 + st.d $r23, sp, 0 + + move t4, a0 + move t5, a1 + move t6, a2 + move t7, a3 + move t8, a4 + slli.d t8, t8, 2 + + add.d t4, t4, t8 + add.d t7, t7, t8 + add.d t5, t5, t8 + + add.d a6, $r0, t8 + addi.d a7, t8, -4 + + move a5, $r0 + srai.d t0, a4, 2 + beq a5, t0, .VFW02 + +.VFW01: + sub.d t1, t5, a6 + addi.d t2, a7, -12 + vld vr1, t1, 0x00 //s0 + vldx vr2, a2, t2 //s1 + + sub.d t1, t7, a6 + vld vr3, t1, 0x00 //wi + vldx vr4, t7, t2 //wj + + vpermi.w vr2, vr2, 0x1b + vpermi.w vr4, vr4, 0x1b + + vfmul.s vr5, vr2, vr3 + vfmsub.s vr5, vr1, vr4, vr5 //dsti + + vfmul.s vr6, vr2, vr4 + vfmadd.s vr6, vr1, vr3, vr6 //dstj + + vpermi.w vr6, vr6, 0x1b + + sub.d t1, t4, a6 + vst vr5, t1, 0x00 + vstx vr6, t4, t2 + + addi.d a6, a6, -16 + addi.d a7, a7, -16 + + addi.d a5, a5, 1 + blt a5, t0, .VFW01 + +.VFW02: + andi t0, a4, 2 + beq $r0, t0, .VFW03 + + sub.d t0, t5, a6 + addi.d t1, a7, -4 + add.d t1, t5, t1 + + sub.d t2, t7, a6 + addi.d t3, a7, -4 + add.d t3, t7, t3 + + fld.s f0, t0, 0x00 //s0 + fld.s f1, t0, 0x04 + + fld.s f2, t1, 0x04 //s1 + fld.s f3, t1, 0x00 + + fld.s f4, t2, 0x00 //wi + fld.s f5, t2, 0x04 + + fld.s f6, t3, 0x04 //wj + fld.s f7, t3, 0x00 + + fmul.s f8, f2, f4 + fmsub.s f8, f0, f6, f8 //dsti + fmul.s f9, f3, f5 + fmsub.s f9, f1, f7, f9 + + fmul.s f10, f2, f6 + fmadd.s f10, f0, f4, f10 //dstj + fmul.s f11, f3, f7 + fmadd.s f11, f1, f5, f11 + + sub.d t2, t4, a6 + add.d t3, t4, a7 + addi.d t3, t3, -4 + + fst.s f8, t2, 0x00 + fst.s f9, t2, 0x04 + fst.s f10, t3, 0x04 + fst.s f11, t3, 0x00 + + addi.d a6, a6, -2 + addi.d a7, a7, -2 + +.VFW03: + andi t0, a4, 1 + beq $r0, t0, .VFW04 + + sub.d t0, t5, a6 + + fldx.s f0, t5, t0 //s0 + fldx.s f2, t6, a7 //s1 + fldx.s f4, t7, t0 //wi + fldx.s f6, t7, a7 //wj + + fmul.s f8, f2, f4 + fmsub.s f8, f0, f6, f8 //dsti + + fmul.s f10, f2, f6 + fmadd.s f10, f0, f4, f10 //dstj + + sub.d t0, t4, a6 + + fst.s f8, t0, 0x00 + fstx.s f10, t4, a7 + + addi.d a6, a6, -1 + addi.d a7, a7, -1 + +.VFW04: + ld.d $r23, sp, 0 + addi.d sp, sp, 8 + +endfunc + + +/* void butterflies_float(float *restrict v1, float *restrict v2, + int len) */ +function butterflies_float_lsx + move a6, $r0 + move a7, $r0 + + move t4, a0 + move t5, a1 + move t6, a2 + + srai.d t0, t6, 2 + beq a6, t0, .BFL02 + +.BFL01: + vldx vr0, t4, a7 + vldx vr1, t5, a7 + + vfsub.s vr3, vr0, vr1 + vfadd.s vr4, vr0, vr1 + + vstx vr4, t4, a7 + vstx vr3, t5, a7 + + addi.d a7, a7, 16 + addi.d a6, a6, 1 + blt a6, t0, .BFL01 + +.BFL02: + andi t0, t6, 2 + beq $r0, t0, .BFL03 + + add.d t1, t4, a7 + add.d t2, t5, a7 + + fld.s f0, t1, 0x00 + fld.s f1, t1, 0x04 + fld.s f2, t2, 0x00 + fld.s f3, t2, 0x04 + + fsub.s f4, f0, f2 + fsub.s f5, f1, f3 + fadd.s f6, f0, f2 + fadd.s f7, f1, f3 + + fst.s f6, t1, 0x00 + fst.s f7, t1, 0x04 + fst.s f4, t2, 0x00 + fst.s f5, t2, 0x04 + + addi.d a7, a7, 8 + +.BFL03: + andi t0, t6, 1 + beq $r0, t0, .BFL04 + + fldx.s f0, t4, a7 + fldx.s f2, t5, a7 + + fsub.s f4, f0, f2 + fadd.s f6, f0, f2 + + fstx.s f6, t4, a7 + fstx.s f4, t5, a7 + + addi.d a7, a7, 4 + +.BFL04: +endfunc + + +/* void vector_fmul_scalar_lsx(float *dst, const float *src, float mul, + int len) */ +function vector_fmul_scalar_lsx + move a6, $r0 + move a7, $r0 + + move t4, a0 + move t5, a1 + move t6, a2 + + vpermi.w vr0, vr0, 0x00 + + srai.d t0, t6, 2 + beq a6, t0, .BFS02 + +.BFS01: + vldx vr1, t5, a7 + + vfmul.s vr2, vr1, vr0 + + vstx vr2, t4, a7 + + addi.d a7, a7, 16 + addi.d a6, a6, 1 + blt a6, t0, .BFS01 + +.BFS02: + andi t0, t6, 2 + beq $r0, t0, .BFS03 + + add.d t1, t5, a7 + add.d t2, t4, a7 + + fld.s f1, t1, 0x00 + fld.s f2, t1, 0x04 + + fmul.s f3, f1, f0 + fmul.s f4, f2, f0 + + fst.s f3, t2, 0x00 + fst.s f4, t2, 0x04 + + addi.d a7, a7, 8 + +.BFS03: + andi t0, t6, 1 + beq $r0, t0, .BFS04 + + fldx.s f1, t5, a7 + + fmul.s f3, f1, f0 + + fstx.s f3, t4, a7 + + addi.d a7, a7, 4 + +.BFS04: +endfunc \ No newline at end of file diff --git a/libavutil/loongarch/float_dsp.h b/libavutil/loongarch/float_dsp.h new file mode 100644 index 0000000000..644c1f3713 --- /dev/null +++ b/libavutil/loongarch/float_dsp.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024 Loongson Technology Corporation Limited + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_LOONGARCH_FLOAT_DSP_H +#define AVUTIL_LOONGARCH_FLOAT_DSP_H + +#include "libavutil/float_dsp.h" + +void vector_fmul_window_lsx(float *dst, const float *src0, + const float *src1, const float *win, int len); + +void butterflies_float_lsx(float *restrict v1, float *restrict v2, int len); + +void vector_fmul_scalar_lsx(float *dst, const float *src, float mul, int len); + +#endif /* AVUTIL_LOONGARCH_FLOAT_DSP_H */ \ No newline at end of file diff --git a/libavutil/loongarch/float_dsp_init_loongarch.c b/libavutil/loongarch/float_dsp_init_loongarch.c new file mode 100644 index 0000000000..592ba78058 --- /dev/null +++ b/libavutil/loongarch/float_dsp_init_loongarch.c @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "float_dsp.h" +#include "libavutil/loongarch/cpu.h" + +av_cold void ff_float_dsp_init_loongarch(AVFloatDSPContext *fdsp) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_lsx(cpu_flags)) { + fdsp->vector_fmul_window = vector_fmul_window_lsx; + fdsp->butterflies_float = butterflies_float_lsx; + fdsp->vector_fmul_scalar = vector_fmul_scalar_lsx; + } +} -- 2.20.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-04-18 7:36 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-04-18 7:36 [FFmpeg-devel] (no subject) pengxu 2024-04-18 7:36 ` pengxu [this message] 2024-04-18 7:36 ` [FFmpeg-devel] [PATCH v2 2/2] avcodec/loongarch: add LSX optimization for aac audio encode pengxu
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240418073609.19365-2-pengxu@loongson.cn \ --to=pengxu@loongson.cn \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git