From: Stone Chen <chen.stonechen@gmail.com> To: ffmpeg-devel@ffmpeg.org Cc: Stone Chen <chen.stonechen@gmail.com> Subject: [FFmpeg-devel] [PATCH v4 1/2][GSoC 2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC Date: Sun, 19 May 2024 20:42:01 -0400 Message-ID: <20240520004210.11489-2-chen.stonechen@gmail.com> (raw) Implements AVX2 DMVR (decoder-side motion vector refinement) SAD functions. DMVR SAD is only calculated if w >= 8, h >= 8, and w * h > 128. To reduce complexity, SAD is only calculated on even rows. This is calculated for all video bitdepths, but the values passed to the function are always 16bit (even if the original video bitdepth is 8). The AVX2 implementation uses min/max/sub. Benchmarks ( AMD 7940HS ) Before: BQTerrace_1920x1080_60_10_420_22_RA.vvc | 106.0 | Chimera_8bit_1080P_1000_frames.vvc | 204.3 | NovosobornayaSquare_1920x1080.bin | 197.3 | RitualDance_1920x1080_60_10_420_37_RA.266 | 174.0 | After: BQTerrace_1920x1080_60_10_420_22_RA.vvc | 109.3 | Chimera_8bit_1080P_1000_frames.vvc | 216.0 | NovosobornayaSquare_1920x1080.bin | 204.0| RitualDance_1920x1080_60_10_420_37_RA.266 | 181.7 | --- libavcodec/x86/vvc/Makefile | 3 +- libavcodec/x86/vvc/vvc_sad.asm | 138 +++++++++++++++++++++++++++++++ libavcodec/x86/vvc/vvcdsp_init.c | 6 ++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 libavcodec/x86/vvc/vvc_sad.asm diff --git a/libavcodec/x86/vvc/Makefile b/libavcodec/x86/vvc/Makefile index d6a66f860a..7b2438ce17 100644 --- a/libavcodec/x86/vvc/Makefile +++ b/libavcodec/x86/vvc/Makefile @@ -5,4 +5,5 @@ OBJS-$(CONFIG_VVC_DECODER) += x86/vvc/vvcdsp_init.o \ x86/h26x/h2656dsp.o X86ASM-OBJS-$(CONFIG_VVC_DECODER) += x86/vvc/vvc_alf.o \ x86/vvc/vvc_mc.o \ - x86/h26x/h2656_inter.o + x86/vvc/vvc_sad.o \ + x86/h26x/h2656_inter.o diff --git a/libavcodec/x86/vvc/vvc_sad.asm b/libavcodec/x86/vvc/vvc_sad.asm new file mode 100644 index 0000000000..58a24635d2 --- /dev/null +++ b/libavcodec/x86/vvc/vvc_sad.asm @@ -0,0 +1,138 @@ +; /* +; * Provide SIMD DMVR SAD functions for VVC decoding +; * +; * Copyright (c) 2024 Stone Chen +; * +; * This file is part of FFmpeg. +; * +; * FFmpeg is free software; you can redistribute it and/or +; * modify it under the terms of the GNU Lesser General Public +; * License as published by the Free Software Foundation; either +; * version 2.1 of the License, or (at your option) any later version. +; * +; * FFmpeg is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +; * Lesser General Public License for more details. +; * +; * You should have received a copy of the GNU Lesser General Public +; * License along with FFmpeg; if not, write to the Free Software +; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +; */ + +%include "libavutil/x86/x86util.asm" +%define MAX_PB_SIZE 128 +%define ROWS 2 + +SECTION_RODATA + +pw_1: dw 1 + +; DMVR SAD is only calculated on even rows to reduce complexity +SECTION .text + +%macro MIN_MAX_SAD 3 ; + pminuw %3, %2, %1 + pmaxuw %1, %2, %1 + psubusw %1, %1, %3 +%endmacro + +%macro HORIZ_ADD 3 ; xm0, xm1, m1 + vextracti128 %1, %3, q0001 ; 3 2 1 0 + paddd %1, %2 ; xm0 (7 + 3) (6 + 2) (5 + 1) (4 + 0) + pshufd %2, %1, q0032 ; xm1 - - (7 + 3) (6 + 2) + paddd %1, %1, %2 ; xm0 _ _ (5 1 7 3) (4 0 6 2) + pshufd %2, %1, q0001 ; xm1 _ _ (5 1 7 3) (5 1 7 3) + paddd %1, %1, %2 ; (01234567) +%endmacro + +%macro INIT_OFFSET 6 ; src1, src2, dxq, dyq, off1, off2 + sub %3, 2 + sub %4, 2 + + mov %5, 2 + mov %6, 2 + + add %5, %4 + sub %6, %4 + + imul %5, 128 + imul %6, 128 + + add %5, 2 + add %6, 2 + + add %5, %3 + sub %6, %3 + + lea %1, [%1 + %5 * 2] + lea %2, [%2 + %6 * 2] +%endmacro + +%if ARCH_X86_64 +%if HAVE_AVX2_EXTERNAL + +INIT_YMM avx2 + +cglobal vvc_sad, 6, 11, 5, src1, src2, dx, dy, block_w, block_h, off1, off2, row_idx, dx2, dy2 + movsxd dx2q, dxd + movsxd dy2q, dyd + INIT_OFFSET src1q, src2q, dx2q, dy2q, off1q, off2q + pxor m3, m3 + vpbroadcastw m4, [pw_1] + + cmp block_wd, 16 + jge vvc_sad_16_128 + + vvc_sad_8: + .loop_height: + movu xm0, [src1q] + vinserti128 m0, [src1q + MAX_PB_SIZE * ROWS * 2], 1 + movu xm1, [src2q] + vinserti128 m1, [src2q + MAX_PB_SIZE * ROWS * 2], 1 + + MIN_MAX_SAD m1, m0, m2 + pmaddwd m1, m4 + paddd m3, m1 + + add src1q, 2 * MAX_PB_SIZE * ROWS * 2 + add src2q, 2 * MAX_PB_SIZE * ROWS * 2 + + sub block_hd, 4 + jg .loop_height + + HORIZ_ADD xm0, xm3, m3 + movd eax, xm0 + RET + + vvc_sad_16_128: + .loop_height: + mov off1q, src1q + mov off2q, src2q + mov row_idxd, block_wd + sar row_idxd, 4 + + .loop_width: + movu m0, [src1q] + movu m1, [src2q] + MIN_MAX_SAD m1, m0, m2 + pmaddwd m1, m4 + paddd m3, m1 + + add src1q, 32 + add src2q, 32 + dec row_idxd + jg .loop_width + + lea src1q, [off1q + ROWS * MAX_PB_SIZE * 2] + lea src2q, [off2q + ROWS * MAX_PB_SIZE * 2] + + sub block_hd, 2 + jg .loop_height + + HORIZ_ADD xm0, xm3, m3 + movd eax, xm0 + RET + +%endif +%endif diff --git a/libavcodec/x86/vvc/vvcdsp_init.c b/libavcodec/x86/vvc/vvcdsp_init.c index 0e68971b2c..4b4a2aa937 100644 --- a/libavcodec/x86/vvc/vvcdsp_init.c +++ b/libavcodec/x86/vvc/vvcdsp_init.c @@ -311,6 +311,9 @@ ALF_FUNCS(16, 12, avx2) c->alf.filter[CHROMA] = ff_vvc_alf_filter_chroma_##bd##_avx2; \ c->alf.classify = ff_vvc_alf_classify_##bd##_avx2; \ } while (0) + +int ff_vvc_sad_avx2(const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h); +#define SAD_INIT() c->inter.sad = ff_vvc_sad_avx2 #endif void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) @@ -327,6 +330,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) ALF_INIT(8); AVG_INIT(8, avx2); MC_LINKS_AVX2(8); + SAD_INIT(); } break; case 10: @@ -338,6 +342,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) AVG_INIT(10, avx2); MC_LINKS_AVX2(10); MC_LINKS_16BPC_AVX2(10); + SAD_INIT(); } break; case 12: @@ -349,6 +354,7 @@ void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd) AVG_INIT(12, avx2); MC_LINKS_AVX2(12); MC_LINKS_16BPC_AVX2(12); + SAD_INIT(); } break; default: -- 2.45.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2024-05-20 0:46 UTC|newest] Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-05-20 0:42 Stone Chen [this message] 2024-05-20 0:42 ` [FFmpeg-devel] [PATCH v4 2/2][GSoC 2024] tests/checkasm: Add check_vvc_sad to vvc_mc.c Stone Chen 2024-05-21 5:12 ` Rémi Denis-Courmont 2024-05-21 6:37 ` Martin Storsjö 2024-05-21 8:47 ` Rémi Denis-Courmont 2024-05-21 10:12 ` Martin Storsjö 2024-05-21 14:35 ` Ronald S. Bultje 2024-05-20 11:23 ` [FFmpeg-devel] [PATCH v4 1/2][GSoC 2024] libavcodec/x86/vvc: Add AVX2 DMVR SAD functions for VVC Ronald S. Bultje 2024-05-20 15:52 ` Ronald S. Bultje 2024-05-22 0:05 ` Stone Chen -- strict thread matches above, loose matches on Subject: below -- 2024-05-20 0:37 Stone Chen
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20240520004210.11489-2-chen.stonechen@gmail.com \ --to=chen.stonechen@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git