From: Nuo Mi <nuomi2021@gmail.com> To: ffmpeg-devel@ffmpeg.org Cc: benjamin.bross@hhi.fraunhofer.de, Nuo Mi <nuomi2021@gmail.com> Subject: [FFmpeg-devel] [PATCH 3/4] x86/vvc_alf: avoid overwriting for non-16 aligned widths Date: Sat, 22 Jun 2024 12:21:13 +0800 Message-ID: <TYSPR06MB64330DE80FBFDCE3D34F93EBAACA2@TYSPR06MB6433.apcprd06.prod.outlook.com> (raw) In-Reply-To: <20240622042114.92873-1-nuomi2021@gmail.com> Previously, the code allowed overwriting on 16-aligned blocks, which was suitable when there were no picture's virtual boundaries because both CTU sizes and strides were 16-aligned. However, with picture's virtual boundaries, each CTU is divided into four ALF blocks, leading to potential issues with overwriting later CTUs. In cases involving picture virtual boundaries, each ALF block is 8-pixel aligned. For luma, we consistently ensure an 8-aligned width. For chroma in 4:2:0 format, we need to account for a 4-aligned width. --- libavcodec/x86/vvc/vvc_alf.asm | 85 ++++++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/libavcodec/x86/vvc/vvc_alf.asm b/libavcodec/x86/vvc/vvc_alf.asm index b35dd9b0e9..f69a69f05f 100644 --- a/libavcodec/x86/vvc/vvc_alf.asm +++ b/libavcodec/x86/vvc/vvc_alf.asm @@ -324,18 +324,69 @@ SECTION .text %endif %endmacro -; STORE_PIXELS(dst, src) -%macro STORE_PIXELS 2 +; STORE_PIXELS_W16(dst, src) +%macro STORE_PIXELS_W16 2 %if ps == 2 - movu %1, m%2 + movu [%1], m%2 %else + movu [%1], xm%2 + %endif +%endmacro + +%macro STORE_PIXELS_W8 2 + %if ps == 2 + movu [%1], xm%2 + %else + movq [%1], xm%2 + %endif +%endmacro + +; STORE_PIXELS_W4(dst, src, offset) +%macro STORE_PIXELS_W4 3 + %if ps == 2 + movq [%1 + %3 * ps], xm%2 + %else + movd [%1 + %3], xm%2 + %endif +%endmacro + +%macro STORE_PIXELS_W8LE 3 + cmp %3, 8 + jl .w4 + STORE_PIXELS_W8 %1, %2 + cmp %3, 12 + %if ps == 2 + vpermq m%2, m%2, q0302 + %else + vpermq m%2, m%2, q0101 + %endif + jl .end + STORE_PIXELS_W4 %1, %2, 8 + jmp .end +.w4: + STORE_PIXELS_W4 %1, %2, 0 +.end: +%endmacro + +; STORE_PIXELS(dst, src, width) +%macro STORE_PIXELS 3 + %if ps == 1 packuswb m%2, m%2 vpermq m%2, m%2, 0x8 - movu %1, xm%2 + %endif + + %ifidn %3, 16 + STORE_PIXELS_W16 %1, %2 + %else + %if LUMA + STORE_PIXELS_W8 %1, %2 + %else + STORE_PIXELS_W8LE %1, %2, %3 + %endif %endif %endmacro -%macro FILTER_16x4 0 +%macro FILTER_16x4 1 %if LUMA push clipq push strideq @@ -362,7 +413,7 @@ SECTION .text ; clip to pixel CLIPW m0, m14, m15 - STORE_PIXELS [dstq], 0 + STORE_PIXELS dstq, 0, %1 lea srcq, [srcq + src_strideq] lea dstq, [dstq + dst_strideq] @@ -399,7 +450,7 @@ SECTION .text ; const uint8_t *src, ptrdiff_t src_stride, const ptrdiff_t width, cosnt ptr_diff_t height, ; const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); ; ****************************** -cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \ +cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x30, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \ offset, x, s5, s6 %define ps (%1 / 8) ; pixel size movd xm15, pixel_maxd @@ -409,18 +460,32 @@ cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_s .loop: push srcq push dstq + push widthq xor xq, xq .loop_w: + cmp widthq, 16 + jl .loop_w_end + LOAD_PARAMS - FILTER_16x4 + FILTER_16x4 16 add srcq, 16 * ps add dstq, 16 * ps add xq, 16 - cmp xq, widthq - jl .loop_w + sub widthq, 16 + jmp .loop_w + +.loop_w_end: + cmp widthq, 0 + je .w_end + + LOAD_PARAMS + FILTER_16x4 widthq + +.w_end: + pop widthq pop dstq pop srcq lea srcq, [srcq + 4 * src_strideq] -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-06-22 4:21 UTC|newest] Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top [not found] <20240622042114.92873-1-nuomi2021@gmail.com> 2024-06-22 4:21 ` [FFmpeg-devel] [PATCH 2/4] checkasm/vvc_alf: random select alf virtual boundaries position Nuo Mi 2024-06-22 4:21 ` Nuo Mi [this message] 2024-06-22 4:21 ` [FFmpeg-devel] [PATCH 4/4] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm Nuo Mi 2024-06-25 11:55 ` Nuo Mi
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=TYSPR06MB64330DE80FBFDCE3D34F93EBAACA2@TYSPR06MB6433.apcprd06.prod.outlook.com \ --to=nuomi2021@gmail.com \ --cc=benjamin.bross@hhi.fraunhofer.de \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git