From: Nuo Mi <nuomi2021@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Cc: benjamin.bross@hhi.fraunhofer.de, Nuo Mi <nuomi2021@gmail.com>
Subject: [FFmpeg-devel] [PATCH 3/4] x86/vvc_alf: avoid overwriting for non-16 aligned widths
Date: Sat, 22 Jun 2024 12:21:13 +0800
Message-ID: <TYSPR06MB64330DE80FBFDCE3D34F93EBAACA2@TYSPR06MB6433.apcprd06.prod.outlook.com> (raw)
In-Reply-To: <20240622042114.92873-1-nuomi2021@gmail.com>
Previously, the code allowed overwriting on 16-aligned blocks, which was suitable when there were
no picture's virtual boundaries because both CTU sizes and strides were 16-aligned. However, with
picture's virtual boundaries, each CTU is divided into four ALF blocks, leading to potential issues
with overwriting later CTUs.
In cases involving picture virtual boundaries, each ALF block is 8-pixel aligned.
For luma, we consistently ensure an 8-aligned width. For chroma in 4:2:0 format,
we need to account for a 4-aligned width.
---
libavcodec/x86/vvc/vvc_alf.asm | 85 ++++++++++++++++++++++++++++++----
1 file changed, 75 insertions(+), 10 deletions(-)
diff --git a/libavcodec/x86/vvc/vvc_alf.asm b/libavcodec/x86/vvc/vvc_alf.asm
index b35dd9b0e9..f69a69f05f 100644
--- a/libavcodec/x86/vvc/vvc_alf.asm
+++ b/libavcodec/x86/vvc/vvc_alf.asm
@@ -324,18 +324,69 @@ SECTION .text
%endif
%endmacro
-; STORE_PIXELS(dst, src)
-%macro STORE_PIXELS 2
+; STORE_PIXELS_W16(dst, src)
+%macro STORE_PIXELS_W16 2
%if ps == 2
- movu %1, m%2
+ movu [%1], m%2
%else
+ movu [%1], xm%2
+ %endif
+%endmacro
+
+%macro STORE_PIXELS_W8 2
+ %if ps == 2
+ movu [%1], xm%2
+ %else
+ movq [%1], xm%2
+ %endif
+%endmacro
+
+; STORE_PIXELS_W4(dst, src, offset)
+%macro STORE_PIXELS_W4 3
+ %if ps == 2
+ movq [%1 + %3 * ps], xm%2
+ %else
+ movd [%1 + %3], xm%2
+ %endif
+%endmacro
+
+%macro STORE_PIXELS_W8LE 3
+ cmp %3, 8
+ jl .w4
+ STORE_PIXELS_W8 %1, %2
+ cmp %3, 12
+ %if ps == 2
+ vpermq m%2, m%2, q0302
+ %else
+ vpermq m%2, m%2, q0101
+ %endif
+ jl .end
+ STORE_PIXELS_W4 %1, %2, 8
+ jmp .end
+.w4:
+ STORE_PIXELS_W4 %1, %2, 0
+.end:
+%endmacro
+
+; STORE_PIXELS(dst, src, width)
+%macro STORE_PIXELS 3
+ %if ps == 1
packuswb m%2, m%2
vpermq m%2, m%2, 0x8
- movu %1, xm%2
+ %endif
+
+ %ifidn %3, 16
+ STORE_PIXELS_W16 %1, %2
+ %else
+ %if LUMA
+ STORE_PIXELS_W8 %1, %2
+ %else
+ STORE_PIXELS_W8LE %1, %2, %3
+ %endif
%endif
%endmacro
-%macro FILTER_16x4 0
+%macro FILTER_16x4 1
%if LUMA
push clipq
push strideq
@@ -362,7 +413,7 @@ SECTION .text
; clip to pixel
CLIPW m0, m14, m15
- STORE_PIXELS [dstq], 0
+ STORE_PIXELS dstq, 0, %1
lea srcq, [srcq + src_strideq]
lea dstq, [dstq + dst_strideq]
@@ -399,7 +450,7 @@ SECTION .text
; const uint8_t *src, ptrdiff_t src_stride, const ptrdiff_t width, cosnt ptr_diff_t height,
; const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max);
; ******************************
-cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \
+cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x30, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \
offset, x, s5, s6
%define ps (%1 / 8) ; pixel size
movd xm15, pixel_maxd
@@ -409,18 +460,32 @@ cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_s
.loop:
push srcq
push dstq
+ push widthq
xor xq, xq
.loop_w:
+ cmp widthq, 16
+ jl .loop_w_end
+
LOAD_PARAMS
- FILTER_16x4
+ FILTER_16x4 16
add srcq, 16 * ps
add dstq, 16 * ps
add xq, 16
- cmp xq, widthq
- jl .loop_w
+ sub widthq, 16
+ jmp .loop_w
+
+.loop_w_end:
+ cmp widthq, 0
+ je .w_end
+
+ LOAD_PARAMS
+ FILTER_16x4 widthq
+
+.w_end:
+ pop widthq
pop dstq
pop srcq
lea srcq, [srcq + 4 * src_strideq]
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-06-22 4:21 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20240622042114.92873-1-nuomi2021@gmail.com>
2024-06-22 4:21 ` [FFmpeg-devel] [PATCH 2/4] checkasm/vvc_alf: random select alf virtual boundaries position Nuo Mi
2024-06-22 4:21 ` Nuo Mi [this message]
2024-06-22 4:21 ` [FFmpeg-devel] [PATCH 4/4] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm Nuo Mi
2024-06-25 11:55 ` Nuo Mi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=TYSPR06MB64330DE80FBFDCE3D34F93EBAACA2@TYSPR06MB6433.apcprd06.prod.outlook.com \
--to=nuomi2021@gmail.com \
--cc=benjamin.bross@hhi.fraunhofer.de \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git