* [FFmpeg-devel] [PATCH] x86/float_dsp: add SSE2 and AVX versions of scalarproduct_double
@ 2024-05-31 19:47 James Almer
2024-06-03 2:39 ` James Almer
0 siblings, 1 reply; 2+ messages in thread
From: James Almer @ 2024-05-31 19:47 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavutil/x86/float_dsp.asm | 52 ++++++++++++++++++++++++++++++++++
libavutil/x86/float_dsp_init.c | 5 ++++
2 files changed, 57 insertions(+)
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index e84ba52566..e9816cdf02 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -567,6 +567,58 @@ cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
%endif
RET
+;---------------------------------------------------------------------------------
+; double scalarproduct_double(const double *v1, const double *v2, size_t len)
+;---------------------------------------------------------------------------------
+%macro SCALARPRODUCT_DOUBLE 0
+cglobal scalarproduct_double, 3,3,8, v1, v2, offset
+ shl offsetq, 3
+ add v1q, offsetq
+ add v2q, offsetq
+ neg offsetq
+ xorpd m0, m0
+ xorpd m1, m1
+ xorpd m2, m2
+ xorpd m3, m3
+align 16
+.loop:
+ movapd m4, [v1q+offsetq+mmsize*0]
+ movapd m5, [v1q+offsetq+mmsize*1]
+ movapd m6, [v1q+offsetq+mmsize*2]
+ movapd m7, [v1q+offsetq+mmsize*3]
+ mulpd m4, [v2q+offsetq+mmsize*0]
+ mulpd m5, [v2q+offsetq+mmsize*1]
+ mulpd m6, [v2q+offsetq+mmsize*2]
+ mulpd m7, [v2q+offsetq+mmsize*3]
+ addpd m0, m4
+ addpd m1, m5
+ addpd m2, m6
+ addpd m3, m7
+ add offsetq, mmsize*4
+ jl .loop
+ addpd m0, m1
+ addpd m2, m3
+ addpd m0, m2
+%if mmsize == 32
+ vextractf128 xm1, m0, 1
+ addpd xm0, xm1
+%endif
+ movhlps xm1, xm0
+ addpd xm0, xm1
+%if ARCH_X86_64 == 0
+ movsd r0m, xm0
+ fld qword r0m
+%endif
+ RET
+%endmacro
+
+INIT_XMM sse2
+SCALARPRODUCT_DOUBLE
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+SCALARPRODUCT_DOUBLE
+%endif
+
;-----------------------------------------------------------------------------
; void ff_butterflies_float(float *src0, float *src1, int len);
;-----------------------------------------------------------------------------
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index 093bce9b94..6cf0b4a277 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -73,6 +73,9 @@ void ff_vector_fmul_reverse_avx2(float *dst, const float *src0,
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
float ff_scalarproduct_float_fma3(const float *v1, const float *v2, int order);
+double ff_scalarproduct_double_sse2(const double *v1, const double *v2, size_t order);
+double ff_scalarproduct_double_avx(const double *v1, const double *v2, size_t order);
+
void ff_butterflies_float_sse(float *restrict src0, float *restrict src1, int len);
av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
@@ -93,6 +96,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_dmul = ff_vector_dmul_sse2;
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
+ fdsp->scalarproduct_double = ff_scalarproduct_double_sse2;
}
if (EXTERNAL_AVX_FAST(cpu_flags)) {
fdsp->vector_fmul = ff_vector_fmul_avx;
@@ -102,6 +106,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx;
fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
+ fdsp->scalarproduct_double = ff_scalarproduct_double_avx;
}
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2;
--
2.45.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [FFmpeg-devel] [PATCH] x86/float_dsp: add SSE2 and AVX versions of scalarproduct_double
2024-05-31 19:47 [FFmpeg-devel] [PATCH] x86/float_dsp: add SSE2 and AVX versions of scalarproduct_double James Almer
@ 2024-06-03 2:39 ` James Almer
0 siblings, 0 replies; 2+ messages in thread
From: James Almer @ 2024-06-03 2:39 UTC (permalink / raw)
To: ffmpeg-devel
On 5/31/2024 4:47 PM, James Almer wrote:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> libavutil/x86/float_dsp.asm | 52 ++++++++++++++++++++++++++++++++++
> libavutil/x86/float_dsp_init.c | 5 ++++
> 2 files changed, 57 insertions(+)
>
> diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
> index e84ba52566..e9816cdf02 100644
> --- a/libavutil/x86/float_dsp.asm
> +++ b/libavutil/x86/float_dsp.asm
> @@ -567,6 +567,58 @@ cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset
> %endif
> RET
>
> +;---------------------------------------------------------------------------------
> +; double scalarproduct_double(const double *v1, const double *v2, size_t len)
> +;---------------------------------------------------------------------------------
> +%macro SCALARPRODUCT_DOUBLE 0
> +cglobal scalarproduct_double, 3,3,8, v1, v2, offset
> + shl offsetq, 3
> + add v1q, offsetq
> + add v2q, offsetq
> + neg offsetq
> + xorpd m0, m0
> + xorpd m1, m1
> + xorpd m2, m2
> + xorpd m3, m3
> +align 16
> +.loop:
> + movapd m4, [v1q+offsetq+mmsize*0]
> + movapd m5, [v1q+offsetq+mmsize*1]
> + movapd m6, [v1q+offsetq+mmsize*2]
> + movapd m7, [v1q+offsetq+mmsize*3]
> + mulpd m4, [v2q+offsetq+mmsize*0]
> + mulpd m5, [v2q+offsetq+mmsize*1]
> + mulpd m6, [v2q+offsetq+mmsize*2]
> + mulpd m7, [v2q+offsetq+mmsize*3]
> + addpd m0, m4
> + addpd m1, m5
> + addpd m2, m6
> + addpd m3, m7
> + add offsetq, mmsize*4
> + jl .loop
> + addpd m0, m1
> + addpd m2, m3
> + addpd m0, m2
> +%if mmsize == 32
> + vextractf128 xm1, m0, 1
> + addpd xm0, xm1
> +%endif
> + movhlps xm1, xm0
> + addpd xm0, xm1
> +%if ARCH_X86_64 == 0
> + movsd r0m, xm0
> + fld qword r0m
> +%endif
> + RET
> +%endmacro
> +
> +INIT_XMM sse2
> +SCALARPRODUCT_DOUBLE
> +%if HAVE_AVX_EXTERNAL
> +INIT_YMM avx
> +SCALARPRODUCT_DOUBLE
> +%endif
> +
> ;-----------------------------------------------------------------------------
> ; void ff_butterflies_float(float *src0, float *src1, int len);
> ;-----------------------------------------------------------------------------
> diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
> index 093bce9b94..6cf0b4a277 100644
> --- a/libavutil/x86/float_dsp_init.c
> +++ b/libavutil/x86/float_dsp_init.c
> @@ -73,6 +73,9 @@ void ff_vector_fmul_reverse_avx2(float *dst, const float *src0,
> float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
> float ff_scalarproduct_float_fma3(const float *v1, const float *v2, int order);
>
> +double ff_scalarproduct_double_sse2(const double *v1, const double *v2, size_t order);
> +double ff_scalarproduct_double_avx(const double *v1, const double *v2, size_t order);
> +
> void ff_butterflies_float_sse(float *restrict src0, float *restrict src1, int len);
>
> av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
> @@ -93,6 +96,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
> fdsp->vector_dmul = ff_vector_dmul_sse2;
> fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2;
> fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
> + fdsp->scalarproduct_double = ff_scalarproduct_double_sse2;
> }
> if (EXTERNAL_AVX_FAST(cpu_flags)) {
> fdsp->vector_fmul = ff_vector_fmul_avx;
> @@ -102,6 +106,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
> fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx;
> fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
> fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
> + fdsp->scalarproduct_double = ff_scalarproduct_double_avx;
> }
> if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2;
Will apply.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-06-03 2:39 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-31 19:47 [FFmpeg-devel] [PATCH] x86/float_dsp: add SSE2 and AVX versions of scalarproduct_double James Almer
2024-06-03 2:39 ` James Almer
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git