From: "J. Dekker" <jdek@itanimul.li> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH] avcodec/x86/hevc: fix luma 12b overflow Date: Sun, 25 Feb 2024 09:27:55 +0100 Message-ID: <20240225082755.355295-1-jdek@itanimul.li> (raw) Weak filter can overflow in delta0 calculation before >> 4 in int16. Signed-off-by: J. Dekker <jdek@itanimul.li> --- I do not know x86 simd at all, so this is just an attempt to fix the implementation rather than write extremely performant code. Suggestions welcome. libavcodec/x86/hevc_deblock.asm | 47 +++++++++++++++++++++++++++++++++ libavcodec/x86/hevcdsp_init.c | 8 ------ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm index 85ee4800bb..ce9221ebc7 100644 --- a/libavcodec/x86/hevc_deblock.asm +++ b/libavcodec/x86/hevc_deblock.asm @@ -541,6 +541,7 @@ ALIGN 16 add betaq, r13 shr betaq, 3; ((beta + (beta >> 1)) >> 3)) +%if %1 < 12 mova m13, [pw_8] psubw m12, m4, m3 ; q0 - p0 psllw m10, m12, 3; 8 * (q0 - p0) @@ -553,7 +554,49 @@ ALIGN 16 paddw m12, m13; + 8 psraw m12, 4; >> 4 , delta0 PABSW m13, m12; abs(delta0) +%else + psubw m12, m4, m3 ; q0 - p0 + pmovsxwd m13, m12 ; m13 low + movhlps m12, m12 + pmovsxwd m12, m12 ; m12 high + + ; m8 low, m10 high + pslld m8, m13, 3; 8 * (q0 - p0) + pslld m10, m12, 3 + + paddd m8, m13 ; 9 * (q0 - p0) + paddd m10, m12 + + psubw m12, m5, m2 ; q1 - p1 + pmovsxwd m13, m12 ; m13 low + movhlps m12, m12 + pmovsxwd m12, m12 ; m12 high + psubd m8, m13 ; 9 * (q0 - p0) - ( q1 - p1 ) + psubd m10, m12 + + pslld m13, m13, 1; 2 * ( q1 - p1 ) + pslld m12, m12, 1 + + psubd m8, m13; 9 * (q0 - p0) - 3 * ( q1 - p1 ) + psubd m10, m12 + + mova m13, [pw_8] + pmovsxwd m13, m13 + + paddd m8, m13 ; + 8 + paddd m10, m13 + + psrad m8, 4; >> 4 , delta0 + psrad m10, 4 + + packssdw m12, m8 + packssdw m10, m10 + + psrldq m12, 8 + punpcklqdq m12, m10 + PABSW m13, m12; abs(delta0) +%endif psllw m10, m9, 2; 8 * tc paddw m10, m9; 10 * tc @@ -746,6 +789,7 @@ cglobal hevc_v_loop_filter_luma_10, 4, 14, 16, pix, stride, beta, tc, pix0, src3 .bypassluma: RET +%if cpuflag(avx) cglobal hevc_v_loop_filter_luma_12, 4, 14, 16, pix, stride, beta, tc, pix0, src3stride sub pixq, 8 lea pix0q, [3 * strideq] @@ -757,6 +801,7 @@ cglobal hevc_v_loop_filter_luma_12, 4, 14, 16, pix, stride, beta, tc, pix0, src3 TRANSPOSE8x8W_STORE PASS8ROWS(src3strideq, pixq, r1, pix0q), [pw_pixel_max_12] .bypassluma: RET +%endif ;----------------------------------------------------------------------------- ; void ff_hevc_h_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta, @@ -829,6 +874,7 @@ cglobal hevc_h_loop_filter_luma_10, 4, 14, 16, pix, stride, beta, tc, pix0, src3 .bypassluma: RET +%if cpuflag(avx) cglobal hevc_h_loop_filter_luma_12, 4, 14, 16, pix, stride, beta, tc, pix0, src3stride lea src3strideq, [3 * strideq] mov pix0q, pixq @@ -859,6 +905,7 @@ cglobal hevc_h_loop_filter_luma_12, 4, 14, 16, pix, stride, beta, tc, pix0, src3 movdqu [pixq + 2 * strideq], m6; q2 .bypassluma: RET +%endif %endmacro diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index f5bc342cd5..e3fcb7b591 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -1205,10 +1205,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; - if (ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; - } SAO_BAND_INIT(12, sse2); SAO_EDGE_INIT(12, sse2); @@ -1216,10 +1212,6 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_sse2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_sse2; } - if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { - c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; - c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3; - } if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) { EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4); EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h, 12, sse4); -- 2.43.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2024-02-25 8:28 UTC|newest] Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-02-25 8:27 J. Dekker [this message] 2024-02-25 15:56 ` Ronald S. Bultje 2024-02-25 16:22 ` Ronald S. Bultje 2024-02-25 16:24 ` Ronald S. Bultje 2024-02-25 16:28 ` James Almer 2024-02-25 16:41 ` Ronald S. Bultje 2024-02-25 22:30 ` Henrik Gramner via ffmpeg-devel 2024-02-25 23:00 ` Ronald S. Bultje 2024-02-26 14:26 ` J. Dekker
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240225082755.355295-1-jdek@itanimul.li \ --to=jdek@itanimul.li \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git