From: Paul B Mahol <onemda@gmail.com> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] avutil/x86/float_dsp: add fma3 for scalarproduct Date: Mon, 12 Sep 2022 17:22:58 +0200 Message-ID: <CAPYw7P4ivOABgsb6YzPZnqkEf0FFpma7WvWx0jhAcTeP6=1Ujg@mail.gmail.com> (raw) [-- Attachment #1: Type: text/plain, Size: 16 bytes --] Patch attached. [-- Attachment #2: 0001-avutil-x86-float_dsp-add-fma3-for-scalarproduct.patch --] [-- Type: text/x-patch, Size: 4782 bytes --] From f7c47b8eefa1c06a74d17f13b4e9010785dc6430 Mon Sep 17 00:00:00 2001 From: Paul B Mahol <onemda@gmail.com> Date: Wed, 20 Jan 2021 16:58:31 +0100 Subject: [PATCH] avutil/x86/float_dsp: add fma3 for scalarproduct Signed-off-by: Paul B Mahol <onemda@gmail.com> --- libavutil/x86/float_dsp.asm | 127 +++++++++++++++++++++++++++++++++ libavutil/x86/float_dsp_init.c | 2 + 2 files changed, 129 insertions(+) diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index cca4d019c7..8f8e6dddf5 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -440,6 +440,133 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset %endif RET +INIT_YMM fma3 +cglobal scalarproduct_float, 3,5,8, v1, v2, size, len, offset + xor offsetq, offsetq + xorps m0, m0 + shl sized, 2 + mov lenq, sizeq + cmp lenq, 32 + jl .l16 + cmp lenq, 64 + jl .l32 + xorps m1, m1 + cmp lenq, 128 + jl .l64 + and lenq, ~127 + xorps m2, m2 + xorps m3, m3 +.loop128: + movups m4, [v1q+offsetq] + movups m5, [v1q+offsetq + 32] + movups m6, [v1q+offsetq + 64] + movups m7, [v1q+offsetq + 96] + fmaddps m0, m4, [v2q+offsetq ], m0 + fmaddps m1, m5, [v2q+offsetq + 32], m1 + fmaddps m2, m6, [v2q+offsetq + 64], m2 + fmaddps m3, m7, [v2q+offsetq + 96], m3 + add offsetq, 128 + cmp offsetq, lenq + jl .loop128 + addps m0, m2 + addps m1, m3 + mov lenq, sizeq + and lenq, 127 + cmp lenq, 64 + jge .l64 + addps m0, m1 + cmp lenq, 32 + jge .l32 + vextractf128 xmm2, m0, 1 + addps xmm0, xmm2 + cmp lenq, 16 + jge .l16 + movhlps xmm1, xmm0 + addps xmm0, xmm1 + movss xmm1, xmm0 + shufps xmm0, xmm0, 1 + addss xmm0, xmm1 +%if ARCH_X86_64 == 0 + movss r0m, xm0 + fld dword r0m +%endif + RET +.l64: + and lenq, ~63 + add lenq, offsetq +.loop64: + movups m4, [v1q+offsetq] + movups m5, [v1q+offsetq + 32] + fmaddps m0, m4, [v2q+offsetq], m0 + fmaddps m1, m5, [v2q+offsetq + 32], m1 + add offsetq, 64 + cmp offsetq, lenq + jl .loop64 + addps m0, m1 + mov lenq, sizeq + and lenq, 63 + cmp lenq, 32 + jge .l32 + vextractf128 xmm2, m0, 1 + addps xmm0, xmm2 + cmp lenq, 16 + jge .l16 + movhlps xmm1, xmm0 + addps xmm0, xmm1 + movss xmm1, xmm0 + shufps xmm0, xmm0, 1 + addss xmm0, xmm1 +%if ARCH_X86_64 == 0 + movss r0m, xm0 + fld dword r0m +%endif + RET +.l32: + and lenq, ~31 + add lenq, offsetq +.loop32: + movups m4, [v1q+offsetq] + fmaddps m0, m4, [v2q+offsetq], m0 + add offsetq, 32 + cmp offsetq, lenq + jl .loop32 + vextractf128 xmm2, m0, 1 + addps xmm0, xmm2 + mov lenq, sizeq + and lenq, 31 + cmp lenq, 16 + jge .l16 + movhlps xmm1, xmm0 + addps xmm0, xmm1 + movss xmm1, xmm0 + shufps xmm0, xmm0, 1 + addss xmm0, xmm1 +%if ARCH_X86_64 == 0 + movss r0m, xm0 + fld dword r0m +%endif + RET +.l16: + and lenq, ~15 + add lenq, offsetq +.loop16: + movaps xmm1, [v1q+offsetq] + mulps xmm1, [v2q+offsetq] + addps xmm0, xmm1 + add offsetq, 16 + cmp offsetq, lenq + jl .loop16 + movhlps xmm1, xmm0 + addps xmm0, xmm1 + movss xmm1, xmm0 + shufps xmm0, xmm0, 1 + addss xmm0, xmm1 +%if ARCH_X86_64 == 0 + movss r0m, xm0 + fld dword r0m +%endif + RET + ;----------------------------------------------------------------------------- ; void ff_butterflies_float(float *src0, float *src1, int len); ;----------------------------------------------------------------------------- diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index ad17bc2044..ad6b506259 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -74,6 +74,7 @@ void ff_vector_fmul_reverse_avx2(float *dst, const float *src0, const float *src1, int len); float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); +float ff_scalarproduct_float_fma3(const float *v1, const float *v2, int order); void ff_butterflies_float_sse(float *av_restrict src0, float *av_restrict src1, int len); @@ -112,5 +113,6 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; fdsp->vector_fmul_add = ff_vector_fmul_add_fma3; fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_fma3; + fdsp->scalarproduct_float = ff_scalarproduct_float_fma3; } } -- 2.37.2 [-- Attachment #3: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
reply other threads:[~2022-09-12 15:23 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to='CAPYw7P4ivOABgsb6YzPZnqkEf0FFpma7WvWx0jhAcTeP6=1Ujg@mail.gmail.com' \ --to=onemda@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git