Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 2/4] checkasm/vvc_alf: random select alf virtual boundaries position
       [not found] <20240622042114.92873-1-nuomi2021@gmail.com>
@ 2024-06-22  4:21 ` Nuo Mi
  2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 3/4] x86/vvc_alf: avoid overwriting for non-16 aligned widths Nuo Mi
  2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 4/4] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm Nuo Mi
  2 siblings, 0 replies; 4+ messages in thread
From: Nuo Mi @ 2024-06-22  4:21 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: benjamin.bross, Nuo Mi

A picture's virtual boundaries will split a CTU into 4 ALF blocks.
The ALF virtual boundary may cross or not cross a ALF block.
---
 tests/checkasm/vvc_alf.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
index 902757aff1..be8b930810 100644
--- a/tests/checkasm/vvc_alf.c
+++ b/tests/checkasm/vvc_alf.c
@@ -66,6 +66,14 @@ static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
         }                                                   \
     } while (0)
 
+static int get_alf_vb_pos(const int h, const int vb_pos_above)
+{
+    if (h == MAX_CTU_SIZE)
+        return MAX_CTU_SIZE - vb_pos_above;
+    // If h < MAX_CTU_SIZE and picture virtual boundaries are involved, ALF virtual boundaries can either be within or outside this ALF block.
+    return ((rnd() & 1) ? h : MAX_CTU_SIZE) - vb_pos_above;
+}
+
 static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
 {
     LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
@@ -92,11 +100,10 @@ static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
 
     for (int h = 4; h <= MAX_CTU_SIZE; h += 4) {
         for (int w = 4; w <= MAX_CTU_SIZE; w += 4) {
-            const int ctu_size = MAX_CTU_SIZE;
             //Both picture size and virtual boundaries are 8-aligned. For luma, we only need to check 8-aligned sizes.
             if (!(w % 8) && !(h % 8)) {
                 if (check_func(c->alf.filter[LUMA], "vvc_alf_filter_luma_%dx%d_%d", w, h, bit_depth)) {
-                    const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_LUMA;
+                    const int vb_pos = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_LUMA);
                     memset(dst0, 0, DST_BUF_SIZE);
                     memset(dst1, 0, DST_BUF_SIZE);
                     call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
@@ -113,7 +120,7 @@ static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
             //For chroma, once it exceeds 64, it's not a 4:2:0 format, so we only need to check 8-aligned sizes as well.
             if ((w <= 64 || !(w % 8)) && (h <= 64 || !(h % 8))) {
                 if (check_func(c->alf.filter[CHROMA], "vvc_alf_filter_chroma_%dx%d_%d", w, h, bit_depth)) {
-                    const int vb_pos = ctu_size - ALF_VB_POS_ABOVE_CHROMA;
+                    const int vb_pos = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_CHROMA);
                     memset(dst0, 0, DST_BUF_SIZE);
                     memset(dst1, 0, DST_BUF_SIZE);
                     call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
@@ -152,7 +159,7 @@ static void check_alf_classify(VVCDSPContext *c, const int bit_depth)
     for (int h = 8; h <= MAX_CTU_SIZE; h += 8) {
         for (int w = 8; w <= MAX_CTU_SIZE; w += 8) {
             const int id_size = w * h / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE * sizeof(int);
-            const int vb_pos  = MAX_CTU_SIZE - ALF_BLOCK_SIZE;
+            const int vb_pos  = get_alf_vb_pos(h, ALF_VB_POS_ABOVE_LUMA);
             if (check_func(c->alf.classify, "vvc_alf_classify_%dx%d_%d", w, h, bit_depth)) {
                 memset(class_idx0, 0, id_size);
                 memset(class_idx1, 0, id_size);
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] x86/vvc_alf: avoid overwriting for non-16 aligned widths
       [not found] <20240622042114.92873-1-nuomi2021@gmail.com>
  2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 2/4] checkasm/vvc_alf: random select alf virtual boundaries position Nuo Mi
@ 2024-06-22  4:21 ` Nuo Mi
  2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 4/4] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm Nuo Mi
  2 siblings, 0 replies; 4+ messages in thread
From: Nuo Mi @ 2024-06-22  4:21 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: benjamin.bross, Nuo Mi

Previously, the code allowed overwriting on 16-aligned blocks, which was suitable when there were
no picture's virtual boundaries because both CTU sizes and strides were 16-aligned. However, with
picture's virtual boundaries, each CTU is divided into four ALF blocks, leading to potential issues
with overwriting later CTUs.

In cases involving picture virtual boundaries, each ALF block is 8-pixel aligned.
For luma, we consistently ensure an 8-aligned width. For chroma in 4:2:0 format,
we need to account for a 4-aligned width.
---
 libavcodec/x86/vvc/vvc_alf.asm | 85 ++++++++++++++++++++++++++++++----
 1 file changed, 75 insertions(+), 10 deletions(-)

diff --git a/libavcodec/x86/vvc/vvc_alf.asm b/libavcodec/x86/vvc/vvc_alf.asm
index b35dd9b0e9..f69a69f05f 100644
--- a/libavcodec/x86/vvc/vvc_alf.asm
+++ b/libavcodec/x86/vvc/vvc_alf.asm
@@ -324,18 +324,69 @@ SECTION .text
 %endif
 %endmacro
 
-; STORE_PIXELS(dst, src)
-%macro STORE_PIXELS 2
+; STORE_PIXELS_W16(dst, src)
+%macro STORE_PIXELS_W16 2
     %if ps == 2
-        movu         %1, m%2
+        movu       [%1],  m%2
     %else
+        movu       [%1], xm%2
+    %endif
+%endmacro
+
+%macro STORE_PIXELS_W8 2
+    %if ps == 2
+        movu       [%1], xm%2
+    %else
+        movq       [%1], xm%2
+    %endif
+%endmacro
+
+; STORE_PIXELS_W4(dst, src, offset)
+%macro STORE_PIXELS_W4 3
+    %if ps == 2
+        movq   [%1 + %3 * ps], xm%2
+    %else
+        movd        [%1 + %3], xm%2
+    %endif
+%endmacro
+
+%macro STORE_PIXELS_W8LE 3
+    cmp %3, 8
+    jl .w4
+    STORE_PIXELS_W8 %1, %2
+    cmp %3, 12
+    %if ps == 2
+        vpermq      m%2,  m%2, q0302
+    %else
+        vpermq      m%2,  m%2, q0101
+    %endif
+    jl .end
+    STORE_PIXELS_W4 %1, %2, 8
+    jmp .end
+.w4:
+    STORE_PIXELS_W4 %1, %2, 0
+.end:
+%endmacro
+
+; STORE_PIXELS(dst, src, width)
+%macro STORE_PIXELS 3
+    %if ps == 1
         packuswb    m%2, m%2
         vpermq      m%2, m%2, 0x8
-        movu         %1, xm%2
+    %endif
+
+    %ifidn %3, 16
+        STORE_PIXELS_W16  %1, %2
+    %else
+        %if LUMA
+            STORE_PIXELS_W8   %1, %2
+        %else
+            STORE_PIXELS_W8LE %1, %2, %3
+        %endif
     %endif
 %endmacro
 
-%macro FILTER_16x4 0
+%macro FILTER_16x4 1
 %if LUMA
     push clipq
     push strideq
@@ -362,7 +413,7 @@ SECTION .text
     ; clip to pixel
     CLIPW             m0, m14, m15
 
-    STORE_PIXELS  [dstq], 0
+    STORE_PIXELS    dstq, 0, %1
 
     lea             srcq, [srcq + src_strideq]
     lea             dstq, [dstq + dst_strideq]
@@ -399,7 +450,7 @@ SECTION .text
 ;      const uint8_t *src, ptrdiff_t src_stride, const ptrdiff_t width, cosnt ptr_diff_t height,
 ;      const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max);
 ; ******************************
-cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \
+cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x30, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \
     offset, x, s5, s6
 %define ps (%1 / 8) ; pixel size
     movd            xm15, pixel_maxd
@@ -409,18 +460,32 @@ cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_s
 .loop:
     push            srcq
     push            dstq
+    push          widthq
     xor               xq, xq
 
     .loop_w:
+        cmp       widthq, 16
+        jl   .loop_w_end
+
         LOAD_PARAMS
-        FILTER_16x4
+        FILTER_16x4   16
 
         add         srcq, 16 * ps
         add         dstq, 16 * ps
         add           xq, 16
-        cmp           xq, widthq
-        jl       .loop_w
+        sub       widthq, 16
+        jmp      .loop_w
+
+.loop_w_end:
+    cmp           widthq, 0
+    je            .w_end
+
+    LOAD_PARAMS
+    FILTER_16x4  widthq
+
+.w_end:
 
+    pop           widthq
     pop             dstq
     pop             srcq
     lea             srcq, [srcq + 4 * src_strideq]
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [FFmpeg-devel] [PATCH 4/4] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm
       [not found] <20240622042114.92873-1-nuomi2021@gmail.com>
  2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 2/4] checkasm/vvc_alf: random select alf virtual boundaries position Nuo Mi
  2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 3/4] x86/vvc_alf: avoid overwriting for non-16 aligned widths Nuo Mi
@ 2024-06-22  4:21 ` Nuo Mi
  2024-06-25 11:55   ` Nuo Mi
  2 siblings, 1 reply; 4+ messages in thread
From: Nuo Mi @ 2024-06-22  4:21 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: benjamin.bross, Nuo Mi

---
 tests/checkasm/vvc_alf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
index be8b930810..15e79289cd 100644
--- a/tests/checkasm/vvc_alf.c
+++ b/tests/checkasm/vvc_alf.c
@@ -108,8 +108,8 @@ static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
                     memset(dst1, 0, DST_BUF_SIZE);
                     call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
                     call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
-                    for (int i = 0; i < h; i++) {
-                        if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
+                    for (int i = 0; i < (h + 1); i++) {
+                        if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, (w + 1) * SIZEOF_PIXEL))
                             fail();
                     }
                     // Bench only square sizes, and ones with dimensions being a power of two.
@@ -125,8 +125,8 @@ static void check_alf_filter(VVCDSPContext *c, const int bit_depth)
                     memset(dst1, 0, DST_BUF_SIZE);
                     call_ref(dst0, dst_stride, src0 + offset, src_stride, w, h, filter, clip, vb_pos);
                     call_new(dst1, dst_stride, src1 + offset, src_stride, w, h, filter, clip, vb_pos);
-                    for (int i = 0; i < h; i++) {
-                        if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, w * SIZEOF_PIXEL))
+                    for (int i = 0; i < (h + 1); i++) {
+                        if (memcmp(dst0 + i * dst_stride, dst1 + i * dst_stride, (w + 1) * SIZEOF_PIXEL))
                             fail();
                     }
                     if (w == h && (w & (w - 1)) == 0)
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [FFmpeg-devel] [PATCH 4/4] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm
  2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 4/4] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm Nuo Mi
@ 2024-06-25 11:55   ` Nuo Mi
  0 siblings, 0 replies; 4+ messages in thread
From: Nuo Mi @ 2024-06-25 11:55 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: benjamin.bross

Applied.
Thank you Ben for reviewing.

On Sat, Jun 22, 2024 at 12:21 PM Nuo Mi <nuomi2021@gmail.com> wrote:

> ---
>  tests/checkasm/vvc_alf.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/tests/checkasm/vvc_alf.c b/tests/checkasm/vvc_alf.c
> index be8b930810..15e79289cd 100644
> --- a/tests/checkasm/vvc_alf.c
> +++ b/tests/checkasm/vvc_alf.c
> @@ -108,8 +108,8 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
>                      memset(dst1, 0, DST_BUF_SIZE);
>                      call_ref(dst0, dst_stride, src0 + offset, src_stride,
> w, h, filter, clip, vb_pos);
>                      call_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> -                    for (int i = 0; i < h; i++) {
> -                        if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> +                    for (int i = 0; i < (h + 1); i++) {
> +                        if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, (w + 1) * SIZEOF_PIXEL))
>                              fail();
>                      }
>                      // Bench only square sizes, and ones with dimensions
> being a power of two.
> @@ -125,8 +125,8 @@ static void check_alf_filter(VVCDSPContext *c, const
> int bit_depth)
>                      memset(dst1, 0, DST_BUF_SIZE);
>                      call_ref(dst0, dst_stride, src0 + offset, src_stride,
> w, h, filter, clip, vb_pos);
>                      call_new(dst1, dst_stride, src1 + offset, src_stride,
> w, h, filter, clip, vb_pos);
> -                    for (int i = 0; i < h; i++) {
> -                        if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, w * SIZEOF_PIXEL))
> +                    for (int i = 0; i < (h + 1); i++) {
> +                        if (memcmp(dst0 + i * dst_stride, dst1 + i *
> dst_stride, (w + 1) * SIZEOF_PIXEL))
>                              fail();
>                      }
>                      if (w == h && (w & (w - 1)) == 0)
> --
> 2.34.1
>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-06-25 11:55 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20240622042114.92873-1-nuomi2021@gmail.com>
2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 2/4] checkasm/vvc_alf: random select alf virtual boundaries position Nuo Mi
2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 3/4] x86/vvc_alf: avoid overwriting for non-16 aligned widths Nuo Mi
2024-06-22  4:21 ` [FFmpeg-devel] [PATCH 4/4] checkasm/vvc_alf: ensure right and bottom boundaries are not overwritten by asm Nuo Mi
2024-06-25 11:55   ` Nuo Mi

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git