From: James Almer <jamrial@gmail.com> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH 3/3] x86/blockdsp: add sse2 and avx2 versions of fill_block_tab Date: Tue, 7 May 2024 12:02:05 -0300 Message-ID: <20240507150205.2039-2-jamrial@gmail.com> (raw) In-Reply-To: <20240507150205.2039-1-jamrial@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com> --- libavcodec/x86/blockdsp.asm | 33 +++++++++++++++++++++++++++++++++ libavcodec/x86/blockdsp_init.c | 13 +++++++++++++ 2 files changed, 46 insertions(+) diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm index e380308d4a..cccc9a801a 100644 --- a/libavcodec/x86/blockdsp.asm +++ b/libavcodec/x86/blockdsp.asm @@ -80,3 +80,36 @@ INIT_XMM sse CLEAR_BLOCKS 1 INIT_YMM avx CLEAR_BLOCKS 1 + +;----------------------------------------- +; void ff_fill_block_tab_%1(uint8_t *block, uint8_t value, +; ptrdiff_t line_size, int h); +;----------------------------------------- +%macro FILL_BLOCK_TAB 2 +cglobal fill_block_tab_%1, 4, 5, 1, block, value, stride, h, stride3 + lea stride3q, [strideq + strideq * 2] +%if cpuflag(avx2) + movd m0, valued + vpbroadcastb m0, m0 +%else + SPLATB_REG m0, value, x +%endif +.loop: + mov%2 [blockq], m0 + mov%2 [blockq + strideq], m0 + mov%2 [blockq + strideq * 2], m0 + mov%2 [blockq + stride3q], m0 + lea blockq, [blockq + strideq * 4] + sub hd, 4 + jg .loop + RET +%endmacro + +INIT_XMM sse2 +FILL_BLOCK_TAB 8, q +FILL_BLOCK_TAB 16, a +%if HAVE_AVX2_EXTERNAL +INIT_XMM avx2 +FILL_BLOCK_TAB 8, q +FILL_BLOCK_TAB 16, a +%endif diff --git a/libavcodec/x86/blockdsp_init.c b/libavcodec/x86/blockdsp_init.c index 996124114f..37f3bb6a84 100644 --- a/libavcodec/x86/blockdsp_init.c +++ b/libavcodec/x86/blockdsp_init.c @@ -29,6 +29,11 @@ void ff_clear_block_avx(int16_t *block); void ff_clear_blocks_sse(int16_t *blocks); void ff_clear_blocks_avx(int16_t *blocks); +void ff_fill_block_tab_16_sse2(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h); +void ff_fill_block_tab_8_sse2(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h); +void ff_fill_block_tab_16_avx2(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h); +void ff_fill_block_tab_8_avx2(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h); + av_cold void ff_blockdsp_init_x86(BlockDSPContext *c) { #if HAVE_X86ASM @@ -38,9 +43,17 @@ av_cold void ff_blockdsp_init_x86(BlockDSPContext *c) c->clear_block = ff_clear_block_sse; c->clear_blocks = ff_clear_blocks_sse; } + if (EXTERNAL_SSE2(cpu_flags)) { + c->fill_block_tab[0] = ff_fill_block_tab_16_sse2; + c->fill_block_tab[1] = ff_fill_block_tab_8_sse2; + } if (EXTERNAL_AVX_FAST(cpu_flags)) { c->clear_block = ff_clear_block_avx; c->clear_blocks = ff_clear_blocks_avx; } + if (EXTERNAL_AVX2(cpu_flags)) { + c->fill_block_tab[0] = ff_fill_block_tab_16_avx2; + c->fill_block_tab[1] = ff_fill_block_tab_8_avx2; + } #endif /* HAVE_X86ASM */ } -- 2.45.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-05-07 15:02 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-05-07 0:27 [FFmpeg-devel] [PATCH] checkasm/blockdsp: don't randomize the buffers for fill_block_tab James Almer 2024-05-07 5:44 ` Martin Storsjö 2024-05-07 10:49 ` Andreas Rheinhardt 2024-05-07 10:52 ` Martin Storsjö 2024-05-07 11:11 ` James Almer 2024-05-07 15:02 ` [FFmpeg-devel] [PATCH 2/3] checkasm/blockdsp: use smallest allowed aligned buffers for fill_block_tab tests James Almer 2024-05-07 15:02 ` James Almer [this message] 2024-05-07 15:10 ` [FFmpeg-devel] [PATCH 3/3] x86/blockdsp: add sse2 and avx2 versions of fill_block_tab Andreas Rheinhardt 2024-05-07 15:15 ` James Almer 2024-05-07 15:14 ` [FFmpeg-devel] [PATCH 2/3] checkasm/blockdsp: use smallest allowed aligned buffers for fill_block_tab tests Andreas Rheinhardt 2024-05-07 15:26 ` James Almer
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240507150205.2039-2-jamrial@gmail.com \ --to=jamrial@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git