* [FFmpeg-devel] [PATCH v1 1/2] avutil/loongarch:add LSX optimization for aac audio decode
@ 2024-04-09 12:37 pengxu
2024-04-09 12:37 ` [FFmpeg-devel] [PATCH v1 2/2] avcodec/loongarch:add LSX optimization for aac audio encode pengxu
0 siblings, 1 reply; 2+ messages in thread
From: pengxu @ 2024-04-09 12:37 UTC (permalink / raw)
To: ffmpeg-devel
Add functions:
vector_fmul_window_lsx
butterflies_float_lsx
vector_fmul_scalar_lsx
./ffmpeg -i ../../1.aac -f null -
before:482x
after:523x
---
libavutil/float_dsp.c | 2 +
libavutil/float_dsp.h | 1 +
libavutil/loongarch/Makefile | 5 +-
libavutil/loongarch/float_dsp.S | 287 ++++++++++++++++++
libavutil/loongarch/float_dsp.h | 32 ++
.../loongarch/float_dsp_init_loongarch.c | 35 +++
6 files changed, 361 insertions(+), 1 deletion(-)
create mode 100644 libavutil/loongarch/float_dsp.S
create mode 100644 libavutil/loongarch/float_dsp.h
create mode 100644 libavutil/loongarch/float_dsp_init_loongarch.c
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index e9fb023466..7128ff3f96 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -162,6 +162,8 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
ff_float_dsp_init_x86(fdsp);
#elif ARCH_MIPS
ff_float_dsp_init_mips(fdsp);
+#elif ARCH_LOONGARCH64
+ ff_float_dsp_init_loongarch(fdsp);
#endif
return fdsp;
}
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index 342a8715c5..679a930eab 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -206,6 +206,7 @@ void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp);
void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp);
+void ff_float_dsp_init_loongarch(AVFloatDSPContext *fdsp);
/**
* Allocate a float DSP context.
diff --git a/libavutil/loongarch/Makefile b/libavutil/loongarch/Makefile
index 2addd9351c..ae710f0515 100644
--- a/libavutil/loongarch/Makefile
+++ b/libavutil/loongarch/Makefile
@@ -1 +1,4 @@
-OBJS += loongarch/cpu.o
+OBJS += loongarch/cpu.o \
+ loongarch/float_dsp_init_loongarch.o
+
+LSX-OBJS += loongarch/float_dsp.o
diff --git a/libavutil/loongarch/float_dsp.S b/libavutil/loongarch/float_dsp.S
new file mode 100644
index 0000000000..5073c8424f
--- /dev/null
+++ b/libavutil/loongarch/float_dsp.S
@@ -0,0 +1,287 @@
+/*
+ * Loongarch LASX/LSX optimizeds dsp
+ *
+ * Copyright (c) 2024 Loongson Technology Corporation Limited
+ * Contributed by PengXu <pengxu@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/loongarch/loongson_asm.S"
+
+
+/* void vector_fmul_window(float *dst, const float *src0,
+ const float *src1, const float *win, int len) */
+function vector_fmul_window_lsx
+ addi.d sp, sp, -8
+ st.d $r23, sp, 0
+
+ move t4, a0
+ move t5, a1
+ move t6, a2
+ move t7, a3
+ move t8, a4
+ slli.d t8, t8, 2
+
+ add.d t4, t4, t8
+ add.d t7, t7, t8
+ add.d t5, t5, t8
+
+ add.d a6, $r0, t8
+ addi.d a7, t8, -4
+
+ move a5, $r0
+ srai.d t0, a4, 2
+ beq a5, t0, .VFW02
+
+.VFW01:
+ sub.d t1, t5, a6
+ addi.d t2, a7, -12
+ vld vr1, t1, 0x00 //s0
+ vldx vr2, a2, t2 //s1
+
+ sub.d t1, t7, a6
+ vld vr3, t1, 0x00 //wi
+ vldx vr4, t7, t2 //wj
+
+ vpermi.w vr2, vr2, 0x1b
+ vpermi.w vr4, vr4, 0x1b
+
+ vfmul.s vr5, vr2, vr3
+ vfmsub.s vr5, vr1, vr4, vr5 //dsti
+
+ vfmul.s vr6, vr2, vr4
+ vfmadd.s vr6, vr1, vr3, vr6 //dstj
+
+ vpermi.w vr6, vr6, 0x1b
+
+ sub.d t1, t4, a6
+ vst vr5, t1, 0x00
+ vstx vr6, t4, t2
+
+ addi.d a6, a6, -16
+ addi.d a7, a7, -16
+
+ addi.d a5, a5, 1
+ blt a5, t0, .VFW01
+
+.VFW02:
+ andi t0, a4, 2
+ beq $r0, t0, .VFW03
+
+ sub.d t0, t5, a6
+ addi.d t1, a7, -4
+ add.d t1, t5, t1
+
+ sub.d t2, t7, a6
+ addi.d t3, a7, -4
+ add.d t3, t7, t3
+
+ fld.s f0, t0, 0x00 //s0
+ fld.s f1, t0, 0x04
+
+ fld.s f2, t1, 0x04 //s1
+ fld.s f3, t1, 0x00
+
+ fld.s f4, t2, 0x00 //wi
+ fld.s f5, t2, 0x04
+
+ fld.s f6, t3, 0x04 //wj
+ fld.s f7, t3, 0x00
+
+ fmul.s f8, f2, f4
+ fmsub.s f8, f0, f6, f8 //dsti
+ fmul.s f9, f3, f5
+ fmsub.s f9, f1, f7, f9
+
+ fmul.s f10, f2, f6
+ fmadd.s f10, f0, f4, f10 //dstj
+ fmul.s f11, f3, f7
+ fmadd.s f11, f1, f5, f11
+
+ sub.d t2, t4, a6
+ add.d t3, t4, a7
+ addi.d t3, t3, -4
+
+ fst.s f8, t2, 0x00
+ fst.s f9, t2, 0x04
+ fst.s f10, t3, 0x04
+ fst.s f11, t3, 0x00
+
+ addi.d a6, a6, -2
+ addi.d a7, a7, -2
+
+.VFW03:
+ andi t0, a4, 1
+ beq $r0, t0, .VFW04
+
+ sub.d t0, t5, a6
+
+ fldx.s f0, t5, t0 //s0
+ fldx.s f2, t6, a7 //s1
+ fldx.s f4, t7, t0 //wi
+ fldx.s f6, t7, a7 //wj
+
+ fmul.s f8, f2, f4
+ fmsub.s f8, f0, f6, f8 //dsti
+
+ fmul.s f10, f2, f6
+ fmadd.s f10, f0, f4, f10 //dstj
+
+ sub.d t0, t4, a6
+
+ fst.s f8, t0, 0x00
+ fstx.s f10, t4, a7
+
+ addi.d a6, a6, -1
+ addi.d a7, a7, -1
+
+.VFW04:
+ ld.d $r23, sp, 0
+ addi.d sp, sp, 8
+
+endfunc
+
+
+/* void butterflies_float(float *restrict v1, float *restrict v2,
+ int len) */
+function butterflies_float_lsx
+ move a6, $r0
+ move a7, $r0
+
+ move t4, a0
+ move t5, a1
+ move t6, a2
+
+ srai.d t0, t6, 2
+ beq a6, t0, .BFL02
+
+.BFL01:
+ vldx vr0, t4, a7
+ vldx vr1, t5, a7
+
+ vfsub.s vr3, vr0, vr1
+ vfadd.s vr4, vr0, vr1
+
+ vstx vr4, t4, a7
+ vstx vr3, t5, a7
+
+ addi.d a7, a7, 16
+ addi.d a6, a6, 1
+ blt a6, t0, .BFL01
+
+.BFL02:
+ andi t0, t6, 2
+ beq $r0, t0, .BFL03
+
+ add.d t1, t4, a7
+ add.d t2, t5, a7
+
+ fld.s f0, t1, 0x00
+ fld.s f1, t1, 0x04
+ fld.s f2, t2, 0x00
+ fld.s f3, t2, 0x04
+
+ fsub.s f4, f0, f2
+ fsub.s f5, f1, f3
+ fadd.s f6, f0, f2
+ fadd.s f7, f1, f3
+
+ fst.s f6, t1, 0x00
+ fst.s f7, t1, 0x04
+ fst.s f4, t2, 0x00
+ fst.s f5, t2, 0x04
+
+ addi.d a7, a7, 8
+
+.BFL03:
+ andi t0, t6, 1
+ beq $r0, t0, .BFL04
+
+ fldx.s f0, t4, a7
+ fldx.s f2, t5, a7
+
+ fsub.s f4, f0, f2
+ fadd.s f6, f0, f2
+
+ fstx.s f6, t4, a7
+ fstx.s f4, t5, a7
+
+ addi.d a7, a7, 4
+
+.BFL04:
+endfunc
+
+
+/* void vector_fmul_scalar_lsx(float *dst, const float *src, float mul,
+ int len) */
+function vector_fmul_scalar_lsx
+ move a6, $r0
+ move a7, $r0
+
+ move t4, a0
+ move t5, a1
+ move t6, a2
+
+ vpermi.w vr0, vr0, 0x00
+
+ srai.d t0, t6, 2
+ beq a6, t0, .BFS02
+
+.BFS01:
+ vldx vr1, t5, a7
+
+ vfmul.s vr2, vr1, vr0
+
+ vstx vr2, t4, a7
+
+ addi.d a7, a7, 16
+ addi.d a6, a6, 1
+ blt a6, t0, .BFS01
+
+.BFS02:
+ andi t0, t6, 2
+ beq $r0, t0, .BFS03
+
+ add.d t1, t5, a7
+ add.d t2, t4, a7
+
+ fld.s f1, t1, 0x00
+ fld.s f2, t1, 0x04
+
+ fmul.s f3, f1, f0
+ fmul.s f4, f2, f0
+
+ fst.s f3, t2, 0x00
+ fst.s f4, t2, 0x04
+
+ addi.d a7, a7, 8
+
+.BFS03:
+ andi t0, t6, 1
+ beq $r0, t0, .BFS04
+
+ fldx.s f1, t5, a7
+
+ fmul.s f3, f1, f0
+
+ fstx.s f3, t4, a7
+
+ addi.d a7, a7, 4
+
+.BFS04:
+endfunc
\ No newline at end of file
diff --git a/libavutil/loongarch/float_dsp.h b/libavutil/loongarch/float_dsp.h
new file mode 100644
index 0000000000..644c1f3713
--- /dev/null
+++ b/libavutil/loongarch/float_dsp.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2024 Loongson Technology Corporation Limited
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_LOONGARCH_FLOAT_DSP_H
+#define AVUTIL_LOONGARCH_FLOAT_DSP_H
+
+#include "libavutil/float_dsp.h"
+
+void vector_fmul_window_lsx(float *dst, const float *src0,
+ const float *src1, const float *win, int len);
+
+void butterflies_float_lsx(float *restrict v1, float *restrict v2, int len);
+
+void vector_fmul_scalar_lsx(float *dst, const float *src, float mul, int len);
+
+#endif /* AVUTIL_LOONGARCH_FLOAT_DSP_H */
\ No newline at end of file
diff --git a/libavutil/loongarch/float_dsp_init_loongarch.c b/libavutil/loongarch/float_dsp_init_loongarch.c
new file mode 100644
index 0000000000..592ba78058
--- /dev/null
+++ b/libavutil/loongarch/float_dsp_init_loongarch.c
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "float_dsp.h"
+#include "libavutil/loongarch/cpu.h"
+
+av_cold void ff_float_dsp_init_loongarch(AVFloatDSPContext *fdsp)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_lsx(cpu_flags)) {
+ fdsp->vector_fmul_window = vector_fmul_window_lsx;
+ fdsp->butterflies_float = butterflies_float_lsx;
+ fdsp->vector_fmul_scalar = vector_fmul_scalar_lsx;
+ }
+}
--
2.20.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
* [FFmpeg-devel] [PATCH v1 2/2] avcodec/loongarch:add LSX optimization for aac audio encode
2024-04-09 12:37 [FFmpeg-devel] [PATCH v1 1/2] avutil/loongarch:add LSX optimization for aac audio decode pengxu
@ 2024-04-09 12:37 ` pengxu
0 siblings, 0 replies; 2+ messages in thread
From: pengxu @ 2024-04-09 12:37 UTC (permalink / raw)
To: ffmpeg-devel
Add functions:
ff_abs_pow34_lsx
ff_aac_quantize_bands_lsx
./ffmpeg -f s16le -ac 2 -i ../../1.pcm -c:a aac -f null -
before:37.5x
after:48.1x
---
libavcodec/aacencdsp.h | 3 +
libavcodec/loongarch/Makefile | 2 +
libavcodec/loongarch/aacencdsp.S | 255 ++++++++++++++++++
libavcodec/loongarch/aacencdsp.h | 35 +++
.../loongarch/aacencdsp_init_loongarch.c | 33 +++
5 files changed, 328 insertions(+)
create mode 100644 libavcodec/loongarch/aacencdsp.S
create mode 100644 libavcodec/loongarch/aacencdsp.h
create mode 100644 libavcodec/loongarch/aacencdsp_init_loongarch.c
diff --git a/libavcodec/aacencdsp.h b/libavcodec/aacencdsp.h
index 67836d8cf7..5db27a95a9 100644
--- a/libavcodec/aacencdsp.h
+++ b/libavcodec/aacencdsp.h
@@ -34,6 +34,7 @@ typedef struct AACEncDSPContext {
void ff_aacenc_dsp_init_riscv(AACEncDSPContext *s);
void ff_aacenc_dsp_init_x86(AACEncDSPContext *s);
+void ff_aacenc_dsp_init_loongarch(AACEncDSPContext *s);
static inline void abs_pow34_v(float *out, const float *in, const int size)
{
@@ -66,6 +67,8 @@ static inline void ff_aacenc_dsp_init(AACEncDSPContext *s)
ff_aacenc_dsp_init_riscv(s);
#elif ARCH_X86
ff_aacenc_dsp_init_x86(s);
+#elif ARCH_LOONGARCH64
+ ff_aacenc_dsp_init_loongarch(s);
#endif
}
diff --git a/libavcodec/loongarch/Makefile b/libavcodec/loongarch/Makefile
index 07da2964e4..483917d336 100644
--- a/libavcodec/loongarch/Makefile
+++ b/libavcodec/loongarch/Makefile
@@ -9,6 +9,7 @@ OBJS-$(CONFIG_HPELDSP) += loongarch/hpeldsp_init_loongarch.o
OBJS-$(CONFIG_IDCTDSP) += loongarch/idctdsp_init_loongarch.o
OBJS-$(CONFIG_VIDEODSP) += loongarch/videodsp_init.o
OBJS-$(CONFIG_HEVC_DECODER) += loongarch/hevcdsp_init_loongarch.o
+OBJS-$(CONFIG_AAC_ENCODER) += loongarch/aacencdsp_init_loongarch.o
LASX-OBJS-$(CONFIG_H264QPEL) += loongarch/h264qpel_lasx.o
LASX-OBJS-$(CONFIG_H264DSP) += loongarch/h264dsp_lasx.o \
loongarch/h264_deblock_lasx.o
@@ -38,3 +39,4 @@ LSX-OBJS-$(CONFIG_H264QPEL) += loongarch/h264qpel.o \
loongarch/h264qpel_lsx.o
LSX-OBJS-$(CONFIG_H264CHROMA) += loongarch/h264chroma.o
LSX-OBJS-$(CONFIG_H264PRED) += loongarch/h264intrapred.o
+LSX-OBJS-$(CONFIG_AAC_ENCODER) += loongarch/aacencdsp.o
diff --git a/libavcodec/loongarch/aacencdsp.S b/libavcodec/loongarch/aacencdsp.S
new file mode 100644
index 0000000000..a7cfd3bb1c
--- /dev/null
+++ b/libavcodec/loongarch/aacencdsp.S
@@ -0,0 +1,255 @@
+/*
+ * Loongarch LASX/LSX optimizeds AAC encoder DSP functions
+ *
+ * Copyright (c) 2024 Loongson Technology Corporation Limited
+ * Contributed by PengXu <pengxu@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "loongson_asm.S"
+
+
+/* void ff_abs_pow34_lsx(float *out, const float *in, const int size); */
+// Param, out:a0, in:a1, size:a2
+function ff_abs_pow34_lsx
+ move t0, zero //loop param
+ move t1, zero //data index
+
+ srai.d t2, a2, 2
+ beq zero, t2, .FAPL02
+
+.FAPL01:
+ add.d t3, a1, t1
+ fld.s f0, t3, 0x00
+ fld.s f1, t3, 0x04
+ fld.s f2, t3, 0x08
+ fld.s f3, t3, 0x0c
+
+ fabs.s f0, f0
+ fabs.s f1, f1
+ fabs.s f2, f2
+ fabs.s f3, f3
+
+ vextrins.w vr0, vr1, 0x10
+ vextrins.w vr0, vr2, 0x20
+ vextrins.w vr0, vr3, 0x30
+
+ vfsqrt.s vr4, vr0
+ vfmul.s vr5, vr0, vr4
+ vfsqrt.s vr6, vr5
+
+ vstx vr6, a0, t1
+
+ addi.d t1, t1, 16
+ addi.d t0, t0, 1
+ blt t0, t2, .FAPL01
+
+.FAPL02: /* &2 */
+ andi t0, a2, 2
+ beq zero, t0, .FAPL03
+
+ add.d t3, a1, t1
+ add.d t4, a0, t1
+
+ fld.s f0, t3, 0x00
+ fld.s f1, t3, 0x04
+
+ fabs.s f0, f0
+ fabs.s f1, f1
+
+ fsqrt.s f2, f0
+ fsqrt.s f3, f1
+
+ fmul.s f4, f0, f2
+ fmul.s f5, f1, f3
+
+ fsqrt.s f6, f4
+ fsqrt.s f7, f5
+
+ fld.s f6, t4, 0x00
+ fld.s f7, t4, 0x04
+
+ addi.d t1, t1, 8
+
+.FAPL03: /* &1 */
+ andi t0, a2, 1
+ beq zero, t0, .FAPL04
+
+ fldx.s f0, a1, t1
+
+ fabs.s f0, f0
+ fsqrt.s f2, f0
+ fmul.s f4, f0, f2
+ fsqrt.s f6, f4
+
+ fldx.s f6, a0, t1
+
+ addi.d t1, t1, 4
+
+.FAPL04:
+endfunc
+
+
+
+/* void ff_aac_quantize_bands_lsx(int *out, const float *in, const float *scaled,
+ int size, int is_signed, int maxval, const float Q34,
+ const float rounding) */
+// param:
+// out: a0
+// in: a1
+// scaled: a2
+// size: a3
+// is_signed: a4
+// maxval: a5
+// Q34: f0
+// rounding: f1
+function ff_aac_quantize_bands_lsx
+ move t0, zero //loop param
+ move t1, zero //data index
+
+ vpermi.w vr0, vr0, 0x00 //Q34
+ vpermi.w vr1, vr1, 0x00 //rounding
+
+ srai.d t2, a3, 2 ////loop max
+ beq zero, t2, .FAQBL02
+
+.FAQBL01: /* /4 */
+ vldx vr2, a2, t1
+ vfmul.s vr3, vr2, vr0 //qc
+ vfadd.s vr4, vr3, vr1
+
+ movgr2fr.w f5, a5
+ ffint.s.w f5, f5
+ vpermi.w vr5, vr5, 0x00 //maxval
+ vfmin.s vr6, vr4, vr5
+ vfrintrz.s vr7, vr6 //(float .0)tmp
+
+ beq a4, zero, .S4ISEND
+
+ fsub.s f8, f0, f0
+ vshuf4i.w vr8, vr8, 0x00 //0.0f
+ vldx vr9, a1, t1 //in
+ vextrins.w vr10, vr9, 0x01
+ vextrins.w vr11, vr9, 0x02
+ vextrins.w vr12, vr9, 0x03
+.S4IS00:
+ fcmp.clt.s $fcc0, f9, f8
+ bceqz $fcc0, .S4IS01
+ fneg.s f7, f7
+.S4IS01:
+ fcmp.clt.s $fcc1, f10, f8
+ bceqz $fcc1, .S4IS02
+ vextrins.w vr13, vr7, 0x01
+ fneg.s f13, f13
+ vextrins.w vr7, vr13, 0x10
+.S4IS02:
+ fcmp.clt.s $fcc2, f11, f8
+ bceqz $fcc2, .S4IS03
+ vextrins.w vr13, vr7, 0x02
+ fneg.s f13, f13
+ vextrins.w vr7, vr13, 0x20
+.S4IS03:
+ fcmp.clt.s $fcc3, f12, f8
+ bceqz $fcc3, .S4ISEND
+ vextrins.w vr13, vr7, 0x03
+ fneg.s f13, f13
+ vextrins.w vr7, vr13, 0x30
+.S4ISEND:
+ vftintrz.w.s vr14, vr7
+ vstx vr14, a0, t1
+ addi.d t1, t1, 16
+ addi.d t0, t0, 1
+ blt t0, t2, .FAQBL01
+
+.FAQBL02: /* &2 */
+ andi t2, a3, 2
+ beq $r0, t2, .FAQBL03
+
+ add.d t2, a2, t1
+ fld.s f2, t2, 0x00
+ fld.s f3, t2, 0x04
+
+ fmul.s f2, f2, f0
+ fmul.s f3, f3, f0 //qc
+
+ fadd.s f2, f2, f1
+ fadd.s f3, f3, f1
+
+ movgr2fr.w f5, a5
+ ffint.s.w f5, f5 //maxval
+
+ fmin.s f2, f2, f5
+ fmin.s f3, f3, f5 //tmp
+
+ vextrins.w vr2, vr3, 0x10
+ vfrintrz.s vr2, vr2 //(float .0)tmp
+ vextrins.w vr3, vr2, 0x01
+
+ beq a4, zero, .S2ISEND
+
+ fsub.s f4, f0, f0
+ add.d t3, a1, t1
+ fld.s f6, t3, 0x00
+ fld.s f7, t3, 0x04 //in
+
+.S2IS00:
+ fcmp.clt.s $fcc0, f6, f4
+ bceqz $fcc0, .S2IS01
+ fneg.s f8, f2
+.S2IS01:
+ fcmp.clt.s $fcc1, f7, f4
+ bceqz $fcc1, .S2ISEND
+ fneg.s f9, f3
+.S2ISEND:
+ ftintrz.w.s f8, f8
+ ftintrz.w.s f9, f9
+ add.d t2, a0, t1
+ fst.s f8, t2, 0x00
+ fst.s f9, t2, 0x04
+ addi.d t1, t1, 8
+
+.FAQBL03: /* &1 */
+ andi t2, a3, 1
+ beq $r0, t2, .FAQBL04
+
+ fldx.s f2, a2, t1
+ fmul.s f2, f2, f0 //qc
+ fadd.s f2, f2, f1
+
+ movgr2fr.w f5, a5
+ ffint.s.w f5, f5 //maxval
+
+ fmin.s f2, f2, f5
+ vfrintrz.s vr2, vr2 //(float .0)tmp
+
+ beq a4, zero, .S1ISEND
+
+ fsub.s f4, f0, f0
+ fldx.s f6, a1, t1 //in
+
+.S1IS00:
+ fcmp.clt.s $fcc0, f6, f4
+ bceqz $fcc0, .S1ISEND
+ fneg.s f8, f2
+.S1ISEND:
+ ftintrz.w.s f8, f8
+ fstx.s f8, a0, t1
+ addi.d t1, t1, 4
+
+.FAQBL04:
+endfunc
\ No newline at end of file
diff --git a/libavcodec/loongarch/aacencdsp.h b/libavcodec/loongarch/aacencdsp.h
new file mode 100644
index 0000000000..076cd4d247
--- /dev/null
+++ b/libavcodec/loongarch/aacencdsp.h
@@ -0,0 +1,35 @@
+/*
+ * AAC encoder assembly optimizations
+ * Copyright (c) 2024 Loongson Technology Corporation Limited
+ * Contributed by PengXu <pengxu@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_LOONGARCH_AACENC_H
+#define AVCODEC_LOONGARCH_AACENC_H
+
+#include "libavutil/float_dsp.h"
+#include "libavutil/loongarch/cpu.h"
+#include "libavcodec/aacenc.h"
+
+void ff_abs_pow34_lsx(float *out, const float *in, const int size);
+void ff_aac_quantize_bands_lsx(int *out, const float *in, const float *scaled,
+ int size, int is_signed, int maxval, const float Q34,
+ const float rounding);
+
+#endif /* AVCODEC_LOONGARCH_AACENC_H */
\ No newline at end of file
diff --git a/libavcodec/loongarch/aacencdsp_init_loongarch.c b/libavcodec/loongarch/aacencdsp_init_loongarch.c
new file mode 100644
index 0000000000..3b33d50567
--- /dev/null
+++ b/libavcodec/loongarch/aacencdsp_init_loongarch.c
@@ -0,0 +1,33 @@
+/*
+ * AAC encoder assembly optimizations
+ * Copyright (c) 2024 Loongson Technology Corporation Limited
+ * Contributed by PengXu <pengxu@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacencdsp.h"
+
+av_cold void ff_aacenc_dsp_init_loongarch(AACEncDSPContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_lsx(cpu_flags)) {
+ s->abs_pow34 = ff_abs_pow34_lsx;
+ s->quant_bands = ff_aac_quantize_bands_lsx;
+ }
+}
\ No newline at end of file
--
2.20.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-04-09 12:38 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-09 12:37 [FFmpeg-devel] [PATCH v1 1/2] avutil/loongarch:add LSX optimization for aac audio decode pengxu
2024-04-09 12:37 ` [FFmpeg-devel] [PATCH v1 2/2] avcodec/loongarch:add LSX optimization for aac audio encode pengxu
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git