From: James Almer <jamrial@gmail.com> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH] x86/float_dsp: add SSE2 and AVX versions of scalarproduct_double Date: Fri, 31 May 2024 16:47:08 -0300 Message-ID: <20240531194708.6146-1-jamrial@gmail.com> (raw) Signed-off-by: James Almer <jamrial@gmail.com> --- libavutil/x86/float_dsp.asm | 52 ++++++++++++++++++++++++++++++++++ libavutil/x86/float_dsp_init.c | 5 ++++ 2 files changed, 57 insertions(+) diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index e84ba52566..e9816cdf02 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -567,6 +567,58 @@ cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset %endif RET +;--------------------------------------------------------------------------------- +; double scalarproduct_double(const double *v1, const double *v2, size_t len) +;--------------------------------------------------------------------------------- +%macro SCALARPRODUCT_DOUBLE 0 +cglobal scalarproduct_double, 3,3,8, v1, v2, offset + shl offsetq, 3 + add v1q, offsetq + add v2q, offsetq + neg offsetq + xorpd m0, m0 + xorpd m1, m1 + xorpd m2, m2 + xorpd m3, m3 +align 16 +.loop: + movapd m4, [v1q+offsetq+mmsize*0] + movapd m5, [v1q+offsetq+mmsize*1] + movapd m6, [v1q+offsetq+mmsize*2] + movapd m7, [v1q+offsetq+mmsize*3] + mulpd m4, [v2q+offsetq+mmsize*0] + mulpd m5, [v2q+offsetq+mmsize*1] + mulpd m6, [v2q+offsetq+mmsize*2] + mulpd m7, [v2q+offsetq+mmsize*3] + addpd m0, m4 + addpd m1, m5 + addpd m2, m6 + addpd m3, m7 + add offsetq, mmsize*4 + jl .loop + addpd m0, m1 + addpd m2, m3 + addpd m0, m2 +%if mmsize == 32 + vextractf128 xm1, m0, 1 + addpd xm0, xm1 +%endif + movhlps xm1, xm0 + addpd xm0, xm1 +%if ARCH_X86_64 == 0 + movsd r0m, xm0 + fld qword r0m +%endif + RET +%endmacro + +INIT_XMM sse2 +SCALARPRODUCT_DOUBLE +%if HAVE_AVX_EXTERNAL +INIT_YMM avx +SCALARPRODUCT_DOUBLE +%endif + ;----------------------------------------------------------------------------- ; void ff_butterflies_float(float *src0, float *src1, int len); ;----------------------------------------------------------------------------- diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 093bce9b94..6cf0b4a277 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -73,6 +73,9 @@ void ff_vector_fmul_reverse_avx2(float *dst, const float *src0, float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); float ff_scalarproduct_float_fma3(const float *v1, const float *v2, int order); +double ff_scalarproduct_double_sse2(const double *v1, const double *v2, size_t order); +double ff_scalarproduct_double_avx(const double *v1, const double *v2, size_t order); + void ff_butterflies_float_sse(float *restrict src0, float *restrict src1, int len); av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) @@ -93,6 +96,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_dmul = ff_vector_dmul_sse2; fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_sse2; fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; + fdsp->scalarproduct_double = ff_scalarproduct_double_sse2; } if (EXTERNAL_AVX_FAST(cpu_flags)) { fdsp->vector_fmul = ff_vector_fmul_avx; @@ -102,6 +106,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_avx; fdsp->vector_fmul_add = ff_vector_fmul_add_avx; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; + fdsp->scalarproduct_double = ff_scalarproduct_double_avx; } if (EXTERNAL_AVX2_FAST(cpu_flags)) { fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2; -- 2.45.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2024-05-31 19:47 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-05-31 19:47 James Almer [this message] 2024-06-03 2:39 ` James Almer
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240531194708.6146-1-jamrial@gmail.com \ --to=jamrial@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git