Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/2] checkasm: updated tests for sw_scale
@ 2022-06-13 16:36 Swinney, Jonathan
  2022-06-21 20:16 ` Martin Storsjö
  0 siblings, 1 reply; 3+ messages in thread
From: Swinney, Jonathan @ 2022-06-13 16:36 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Martin Storsjö, J. Dekker, Pop, Sebastian

- added a test for yuv2plane1 (currently disabled for x86_64)
- fixed test for yuv2planeX for aarch64 which was previously not working at all

Signed-off-by: Jonathan Swinney <jswinney@amazon.com>
---
 tests/checkasm/sw_scale.c | 176 +++++++++++++++++++++++++++++++++-----
 1 file changed, 156 insertions(+), 20 deletions(-)

diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c
index 31d9a525e9..537cbd3265 100644
--- a/tests/checkasm/sw_scale.c
+++ b/tests/checkasm/sw_scale.c
@@ -35,12 +35,13 @@
             AV_WN32(buf + j, rnd());      \
     } while (0)
 
-// This reference function is the same approximate algorithm employed by the
-// SIMD functions
-static void ref_function(const int16_t *filter, int filterSize,
-                                                 const int16_t **src, uint8_t *dest, int dstW,
-                                                 const uint8_t *dither, int offset)
+static void yuv2planeX_8_ref(const int16_t *filter, int filterSize,
+                             const int16_t **src, uint8_t *dest, int dstW,
+                             const uint8_t *dither, int offset)
 {
+#if ARCH_X86_64
+    // This reference function is the same approximate algorithm employed by the
+    // SIMD functions on x86.
     int i, d;
     d = ((filterSize - 1) * 8 + dither[0]) >> 4;
     for ( i = 0; i < dstW; i++) {
@@ -56,6 +57,120 @@ static void ref_function(const int16_t *filter, int filterSize,
         }
         dest[i]= av_clip_uint8(val>>3);
     }
+#else
+    // Other architectures use the default implementation as the reference.
+    int i;
+    for (i=0; i<dstW; i++) {
+        int val = dither[(i + offset) & 7] << 12;
+        int j;
+        for (j=0; j<filterSize; j++)
+            val += src[j][i] * filter[j];
+
+        dest[i]= av_clip_uint8(val>>19);
+    }
+#endif
+}
+static void yuv2plane1_8_ref(const int16_t *src, uint8_t *dest, int dstW,
+                             const uint8_t *dither, int offset)
+{
+    int i;
+    for (i=0; i<dstW; i++) {
+        int val = (src[i] + dither[(i + offset) & 7]) >> 7;
+        dest[i]= av_clip_uint8(val);
+    }
+}
+
+static void print_data(uint8_t *p, size_t len, size_t offset)
+{
+    size_t i = 0;
+    for (; i < len; i++) {
+        if (i % 8 == 0) {
+            printf("0x%04lx: ", i+offset);
+        }
+        printf("0x%02x ", (uint32_t) p[i]);
+        if (i % 8 == 7) {
+            printf("\n");
+        }
+    }
+    if (i % 8 != 0) {
+        printf("\n");
+    }
+}
+
+static size_t show_differences(uint8_t *a, uint8_t *b, size_t len)
+{
+    for (size_t i = 0; i < len; i++) {
+        if (a[i] != b[i]) {
+            size_t offset_of_mismatch = i;
+            size_t offset;
+            if (i >= 8) i-=8;
+            offset = i & (~7);
+            printf("test a:\n");
+            print_data(&a[offset], 32, offset);
+            printf("\ntest b:\n");
+            print_data(&b[offset], 32, offset);
+            printf("\n");
+            return offset_of_mismatch;
+        }
+    }
+    return len;
+}
+
+static void check_yuv2yuv1(void)
+{
+    struct SwsContext *ctx;
+    int osi, isi;
+    int dstW, offset;
+    size_t fail_offset;
+    const int input_sizes[] = {8, 24, 128, 144, 256, 512};
+    const int INPUT_SIZES = sizeof(input_sizes)/sizeof(input_sizes[0]);
+    #define LARGEST_INPUT_SIZE 512
+
+    const int offsets[] = {0, 3, 8, 11, 16, 19};
+    const int OFFSET_SIZES = sizeof(offsets)/sizeof(offsets[0]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void,
+                      const int16_t *src, uint8_t *dest,
+                      int dstW, const uint8_t *dither, int offset);
+
+    LOCAL_ALIGNED_8(int16_t, src_pixels, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, dst0, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, dst1, [LARGEST_INPUT_SIZE]);
+    LOCAL_ALIGNED_8(uint8_t, dither, [8]);
+
+    randomize_buffers((uint8_t*)dither, 8);
+    randomize_buffers((uint8_t*)src_pixels, LARGEST_INPUT_SIZE * sizeof(int16_t));
+    ctx = sws_alloc_context();
+    if (sws_init_context(ctx, NULL, NULL) < 0)
+        fail();
+
+    ff_sws_init_scale(ctx);
+    for(isi = 0; isi < INPUT_SIZES; ++isi){
+        dstW = input_sizes[isi];
+        for(osi = 0; osi < OFFSET_SIZES; osi++){
+            offset = offsets[osi];
+            if (check_func(ctx->yuv2plane1, "yuv2yuv1_%d_%d", offset, dstW)){
+                memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
+                memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
+
+                yuv2plane1_8_ref(src_pixels, dst0, dstW, dither, offset);
+                call_new(src_pixels, dst1, dstW, dither, offset);
+                if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]))) {
+                    fail();
+                    printf("failed: yuv2yuv1_%d_%d\n", offset, dstW);
+                    fail_offset = show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
+                    printf("failing values: src: 0x%04x dither: 0x%02x dst-c: %02x dst-asm: %02x\n",
+                            (int) src_pixels[fail_offset],
+                            (int) dither[(fail_offset + fail_offset) & 7],
+                            (int) dst0[fail_offset],
+                            (int) dst1[fail_offset]);
+                }
+                if(dstW == LARGEST_INPUT_SIZE)
+                    bench_new(src_pixels, dst1, dstW, dither, offset);
+            }
+        }
+    }
+    sws_freeContext(ctx);
 }
 
 static void check_yuv2yuvX(void)
@@ -64,11 +179,11 @@ static void check_yuv2yuvX(void)
     int fsi, osi, isi, i, j;
     int dstW;
 #define LARGEST_FILTER 16
-#define FILTER_SIZES 4
-    static const int filter_sizes[FILTER_SIZES] = {1, 4, 8, 16};
+    const int filter_sizes[] = {1, 2, 3, 4, 8, 16};
+    const int FILTER_SIZES = sizeof(filter_sizes)/sizeof(filter_sizes[0]);
 #define LARGEST_INPUT_SIZE 512
-#define INPUT_SIZES 6
-    static const int input_sizes[INPUT_SIZES] = {8, 24, 128, 144, 256, 512};
+    static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
+    const int INPUT_SIZES = sizeof(input_sizes)/sizeof(input_sizes[0]);
 
     declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *filter,
                       int filterSize, const int16_t **src, uint8_t *dest,
@@ -95,7 +210,7 @@ static void check_yuv2yuvX(void)
     ff_sws_init_scale(ctx);
     for(isi = 0; isi < INPUT_SIZES; ++isi){
         dstW = input_sizes[isi];
-        for(osi = 0; osi < 64; osi += 16){
+        for(osi = 0; osi < 1; osi += 16){
             for(fsi = 0; fsi < FILTER_SIZES; ++fsi){
                 src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
                 vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
@@ -110,18 +225,35 @@ static void check_yuv2yuvX(void)
                     memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
                     memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
 
-                    // The reference function is not the scalar function selected when mmx
-                    // is deactivated as the SIMD functions do not give the same result as
-                    // the scalar ones due to rounding. The SIMD functions are activated by
-                    // the flag SWS_ACCURATE_RND
-                    ref_function(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
-                    // There's no point in calling new for the reference function
-                    if(ctx->use_mmx_vfilter){
-                        call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
-                        if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0])))
+                    if (ARCH_X86_64) {
+                        // The reference function is not the scalar function selected when mmx
+                        // is deactivated as the SIMD functions do not give the same result as
+                        // the scalar ones due to rounding. The SIMD functions are activated by
+                        // the flag SWS_ACCURATE_RND
+                        yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
+                        // There's no point in calling new for the reference function
+                        if(ctx->use_mmx_vfilter) {
+                            call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+                            if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]))) {
+                                fail();
+                                printf("failed: yuv2yuvX_%d_%d_%d\n", filter_sizes[fsi], osi, dstW);
+                                show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
+                            }
+                            if(dstW == LARGEST_INPUT_SIZE)
+                                bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+                        }
+                    }
+
+                    if (ARCH_AARCH64) {
+                        yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
+                        call_new(&filter_coeff[0], filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+                        if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]))) {
                             fail();
+                            printf("failed: yuv2yuvX_%d_%d_%d\n", filter_sizes[fsi], osi, dstW);
+                            show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
+                        }
                         if(dstW == LARGEST_INPUT_SIZE)
-                            bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+                            bench_new(&filter_coeff[0], filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
                     }
                 }
                 av_freep(&src);
@@ -245,6 +377,10 @@ void checkasm_check_sw_scale(void)
 {
     check_hscale();
     report("hscale");
+    if (!ARCH_X86_64) {
+        check_yuv2yuv1();
+        report("yuv2yuv1");
+    }
     check_yuv2yuvX();
     report("yuv2yuvX");
 }
-- 
2.32.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/2] checkasm: updated tests for sw_scale
  2022-06-13 16:36 [FFmpeg-devel] [PATCH 1/2] checkasm: updated tests for sw_scale Swinney, Jonathan
@ 2022-06-21 20:16 ` Martin Storsjö
  2022-06-22  9:22   ` Martin Storsjö
  0 siblings, 1 reply; 3+ messages in thread
From: Martin Storsjö @ 2022-06-21 20:16 UTC (permalink / raw)
  To: Swinney, Jonathan; +Cc: J. Dekker, Pop, Sebastian, ffmpeg-devel

On Mon, 13 Jun 2022, Swinney, Jonathan wrote:

> - added a test for yuv2plane1 (currently disabled for x86_64)

What's the reason for having it disabled for x86 - is it another case 
where the current implementations there aren't bitexact? Could we avoid 
that by setting the bitexact flag for the new yuv2yuv1 test?

> - fixed test for yuv2planeX for aarch64 which was previously not working 
> at all

Could we make the test fuzzy and allow minor differences from the 
reference, when the bitexact flag isn't set, and separately test with the 
bitexact flag and require exact matches?

> @@ -95,7 +210,7 @@ static void check_yuv2yuvX(void)
>     ff_sws_init_scale(ctx);
>     for(isi = 0; isi < INPUT_SIZES; ++isi){
>         dstW = input_sizes[isi];
> -        for(osi = 0; osi < 64; osi += 16){
> +        for(osi = 0; osi < 1; osi += 16){

This looks like a stray leftover change?

// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/2] checkasm: updated tests for sw_scale
  2022-06-21 20:16 ` Martin Storsjö
@ 2022-06-22  9:22   ` Martin Storsjö
  0 siblings, 0 replies; 3+ messages in thread
From: Martin Storsjö @ 2022-06-22  9:22 UTC (permalink / raw)
  To: Swinney, Jonathan; +Cc: J. Dekker, Pop, Sebastian, ffmpeg-devel

[-- Attachment #1: Type: text/plain, Size: 1508 bytes --]

On Tue, 21 Jun 2022, Martin Storsjö wrote:

> On Mon, 13 Jun 2022, Swinney, Jonathan wrote:
>
>> - added a test for yuv2plane1 (currently disabled for x86_64)
>
> What's the reason for having it disabled for x86 - is it another case where 
> the current implementations there aren't bitexact? Could we avoid that by 
> setting the bitexact flag for the new yuv2yuv1 test?
>
>> - fixed test for yuv2planeX for aarch64 which was previously not working at 
>> all
>
> Could we make the test fuzzy and allow minor differences from the reference, 
> when the bitexact flag isn't set, and separately test with the bitexact flag 
> and require exact matches?
>
>> @@ -95,7 +210,7 @@ static void check_yuv2yuvX(void)
>>     ff_sws_init_scale(ctx);
>>     for(isi = 0; isi < INPUT_SIZES; ++isi){
>>         dstW = input_sizes[isi];
>> -        for(osi = 0; osi < 64; osi += 16){
>> +        for(osi = 0; osi < 1; osi += 16){
>
> This looks like a stray leftover change?

I had a look at this, trying to fix things up. This now passes tests on 
x86_32, x86_64 and aarch64. See the attached patch, which goes on top of 
yours.

It's not intended as a final version of how things should be necessarily, 
but as a more concrete pointer about how it could be done - it needs at 
least reindenting after adding the outer for loop.

I also had to skip the filter sizes 1 and 3 in check_yuv2yuvX, because 
ff_yuv2planeX_8_sse2 couldn't handle those. I presume that means that in 
practice, those aren't ever used?

// Martin

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: Type: text/x-diff; name=0001-checkasm-sw_scale-Fixups.patch, Size: 13386 bytes --]

From e6d33cc199a879060e8e954e6eaea8aebe96b433 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 22 Jun 2022 12:16:15 +0300
Subject: [PATCH] checkasm: sw_scale: Fixups

Changes:
- Omit x86 yuv2plane1 if SWS_ACCURATE_RND is set
- Remove yuv2plane1_8_ref from the testcase; this function can
  be tested with the usual call_ref pattern
- Add testing with and without SWS_ACCURATE_RND in both check_yuv2yuvX
  and check_yuv2yuv1
- Remove the x86 specific reference function in checkasm; use the
  generic reference function which matches swscale's C code, and
  compare with a tolerance of 2 unless SWS_ACCURATE_RND is set
- Reinstate testing of offsets in check_yuv2yuvX
- Make all tests arch independent
- Clarify why we can't use call_ref in check_yuv2yuvX

The code isn't properly indented (I added an outer for loop, without
reindenting the loop body, for patch clarity).
---
 libswscale/x86/swscale.c  |  11 ++--
 tests/checkasm/sw_scale.c | 113 +++++++++++++++-----------------------
 2 files changed, 50 insertions(+), 74 deletions(-)

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 73869355b8..1c57b3a700 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -550,7 +550,8 @@ switch(c->dstBpc){ \
     if (EXTERNAL_MMX(cpu_flags)) {
         ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx);
         ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx);
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
+        if (!(c->flags & SWS_ACCURATE_RND))
+            ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT);
 
         switch (c->srcFormat) {
         case AV_PIX_FMT_YA8:
@@ -599,7 +600,8 @@ switch(c->dstBpc){ \
         ASSIGN_SSE_SCALE_FUNC(c->hcScale, c->hChrFilterSize, sse2, sse2);
         ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse2, ,
                             HAVE_ALIGNED_STACK || ARCH_X86_64);
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
+        if (!(c->flags & SWS_ACCURATE_RND))
+            ASSIGN_VSCALE_FUNC(c->yuv2plane1, sse2, sse2, 1);
 
         switch (c->srcFormat) {
         case AV_PIX_FMT_YA8:
@@ -648,14 +650,15 @@ switch(c->dstBpc){ \
         ASSIGN_VSCALEX_FUNC(c->yuv2planeX, sse4,
                             if (!isBE(c->dstFormat)) c->yuv2planeX = ff_yuv2planeX_16_sse4,
                             HAVE_ALIGNED_STACK || ARCH_X86_64);
-        if (c->dstBpc == 16 && !isBE(c->dstFormat))
+        if (c->dstBpc == 16 && !isBE(c->dstFormat) && !(c->flags & SWS_ACCURATE_RND))
             c->yuv2plane1 = ff_yuv2plane1_16_sse4;
     }
 
     if (EXTERNAL_AVX(cpu_flags)) {
         ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
                             HAVE_ALIGNED_STACK || ARCH_X86_64);
-        ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
+        if (!(c->flags & SWS_ACCURATE_RND))
+            ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
 
         switch (c->srcFormat) {
         case AV_PIX_FMT_YUYV422:
diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c
index edce3cbe4e..0c19974034 100644
--- a/tests/checkasm/sw_scale.c
+++ b/tests/checkasm/sw_scale.c
@@ -39,45 +39,25 @@ static void yuv2planeX_8_ref(const int16_t *filter, int filterSize,
                              const int16_t **src, uint8_t *dest, int dstW,
                              const uint8_t *dither, int offset)
 {
-#if ARCH_X86_64
-    // This reference function is the same approximate algorithm employed by the
-    // SIMD functions on x86.
-    int i, d;
-    d = ((filterSize - 1) * 8 + dither[0]) >> 4;
-    for ( i = 0; i < dstW; i++) {
-        int16_t val = d;
-        int j;
-        union {
-            int val;
-            int16_t v[2];
-        } t;
-        for (j = 0; j < filterSize; j++){
-            t.val = (int)src[j][i + offset] * (int)filter[j];
-            val += t.v[1];
-        }
-        dest[i]= av_clip_uint8(val>>3);
-    }
-#else
-    // Other architectures use the default implementation as the reference.
+    // This corresponds to the yuv2planeX_8_c function
     int i;
-    for (i=0; i<dstW; i++) {
+    for (i = 0; i < dstW; i++) {
         int val = dither[(i + offset) & 7] << 12;
         int j;
-        for (j=0; j<filterSize; j++)
+        for (j = 0; j < filterSize; j++)
             val += src[j][i] * filter[j];
 
-        dest[i]= av_clip_uint8(val>>19);
+        dest[i]= av_clip_uint8(val >> 19);
     }
-#endif
 }
-static void yuv2plane1_8_ref(const int16_t *src, uint8_t *dest, int dstW,
-                             const uint8_t *dither, int offset)
+
+static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
 {
-    int i;
-    for (i=0; i<dstW; i++) {
-        int val = (src[i] + dither[(i + offset) & 7]) >> 7;
-        dest[i]= av_clip_uint8(val);
+    for (size_t i = 0; i < n; i++) {
+        if (abs(ref[i] - test[i]) > accuracy)
+            return 1;
     }
+    return 0;
 }
 
 static void print_data(uint8_t *p, size_t len, size_t offset)
@@ -85,7 +65,7 @@ static void print_data(uint8_t *p, size_t len, size_t offset)
     size_t i = 0;
     for (; i < len; i++) {
         if (i % 8 == 0) {
-            printf("0x%04lx: ", i+offset);
+            printf("0x%04zx: ", i+offset);
         }
         printf("0x%02x ", (uint32_t) p[i]);
         if (i % 8 == 7) {
@@ -140,7 +120,10 @@ static void check_yuv2yuv1(void)
 
     randomize_buffers((uint8_t*)dither, 8);
     randomize_buffers((uint8_t*)src_pixels, LARGEST_INPUT_SIZE * sizeof(int16_t));
+  for (int accurate = 0; accurate <= 1; accurate++) {
     ctx = sws_alloc_context();
+    if (accurate)
+        ctx->flags |= SWS_ACCURATE_RND;
     if (sws_init_context(ctx, NULL, NULL) < 0)
         fail();
 
@@ -149,13 +132,13 @@ static void check_yuv2yuv1(void)
         dstW = input_sizes[isi];
         for(osi = 0; osi < OFFSET_SIZES; osi++){
             offset = offsets[osi];
-            if (check_func(ctx->yuv2plane1, "yuv2yuv1_%d_%d", offset, dstW)){
+            if (check_func(ctx->yuv2plane1, "yuv2yuv1_%d_%d%s", offset, dstW, accurate ? "_accurate" : "")){
                 memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
                 memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
 
-                yuv2plane1_8_ref(src_pixels, dst0, dstW, dither, offset);
+                call_ref(src_pixels, dst0, dstW, dither, offset);
                 call_new(src_pixels, dst1, dstW, dither, offset);
-                if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]))) {
+                if (cmp_off_by_n(dst0, dst1, dstW * sizeof(dst0[0]), accurate ? 0 : 2)) {
                     fail();
                     printf("failed: yuv2yuv1_%d_%d\n", offset, dstW);
                     fail_offset = show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
@@ -171,6 +154,7 @@ static void check_yuv2yuv1(void)
         }
     }
     sws_freeContext(ctx);
+  }
 }
 
 static void check_yuv2yuvX(void)
@@ -179,7 +163,8 @@ static void check_yuv2yuvX(void)
     int fsi, osi, isi, i, j;
     int dstW;
 #define LARGEST_FILTER 16
-    const int filter_sizes[] = {1, 2, 3, 4, 8, 16};
+    // ff_yuv2planeX_8_sse2 can't handle odd filter sizes
+    const int filter_sizes[] = {2, 4, 8, 16};
     const int FILTER_SIZES = sizeof(filter_sizes)/sizeof(filter_sizes[0]);
 #define LARGEST_INPUT_SIZE 512
     static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
@@ -203,58 +188,47 @@ static void check_yuv2yuvX(void)
     memset(dither, d_val, LARGEST_INPUT_SIZE);
     randomize_buffers((uint8_t*)src_pixels, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int16_t));
     randomize_buffers((uint8_t*)filter_coeff, LARGEST_FILTER * sizeof(int16_t));
+  for (int accurate = 0; accurate <= 1; accurate++) {
     ctx = sws_alloc_context();
+    if (accurate)
+        ctx->flags |= SWS_ACCURATE_RND;
     if (sws_init_context(ctx, NULL, NULL) < 0)
         fail();
 
     ff_sws_init_scale(ctx);
     for(isi = 0; isi < INPUT_SIZES; ++isi){
         dstW = input_sizes[isi];
-        for(osi = 0; osi < 1; osi += 16){
+        for(osi = 0; osi < 64; osi += 16){
+            if (dstW <= osi)
+                continue;
             for(fsi = 0; fsi < FILTER_SIZES; ++fsi){
                 src = av_malloc(sizeof(int16_t*) * filter_sizes[fsi]);
                 vFilterData = av_malloc((filter_sizes[fsi] + 2) * sizeof(union VFilterData));
                 memset(vFilterData, 0, (filter_sizes[fsi] + 2) * sizeof(union VFilterData));
                 for(i = 0; i < filter_sizes[fsi]; ++i){
                     src[i] = &src_pixels[i * LARGEST_INPUT_SIZE];
-                    vFilterData[i].src = src[i];
+                    vFilterData[i].src = src[i] - osi;
                     for(j = 0; j < 4; ++j)
                         vFilterData[i].coeff[j + 4] = filter_coeff[i];
                 }
-                if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d", filter_sizes[fsi], osi, dstW)){
+                if (check_func(ctx->yuv2planeX, "yuv2yuvX_%d_%d_%d%s", filter_sizes[fsi], osi, dstW, accurate ? "_accurate" : "")){
+                    const int16_t *filter = ctx->use_mmx_vfilter ? (const int16_t*)vFilterData : &filter_coeff[0];
                     memset(dst0, 0, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
                     memset(dst1, 0, LARGEST_INPUT_SIZE * sizeof(dst1[0]));
 
-                    if (ARCH_X86_64) {
-                        // The reference function is not the scalar function selected when mmx
-                        // is deactivated as the SIMD functions do not give the same result as
-                        // the scalar ones due to rounding. The SIMD functions are activated by
-                        // the flag SWS_ACCURATE_RND
-                        yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
-                        // There's no point in calling new for the reference function
-                        if(ctx->use_mmx_vfilter) {
-                            call_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
-                            if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]))) {
-                                fail();
-                                printf("failed: yuv2yuvX_%d_%d_%d\n", filter_sizes[fsi], osi, dstW);
-                                show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
-                            }
-                            if(dstW == LARGEST_INPUT_SIZE)
-                                bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
-                        }
-                    }
+                    // We can't use call_ref here, because we don't know if use_mmx_vfilter was set for that
+                    // function or not, so we can't pass it the parameters correctly.
+                    yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
 
-                    if (ARCH_AARCH64) {
-                        yuv2planeX_8_ref(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
-                        call_new(&filter_coeff[0], filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
-                        if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]))) {
-                            fail();
-                            printf("failed: yuv2yuvX_%d_%d_%d\n", filter_sizes[fsi], osi, dstW);
-                            show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
-                        }
-                        if(dstW == LARGEST_INPUT_SIZE)
-                            bench_new(&filter_coeff[0], filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+                    call_new(filter, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+                    if (cmp_off_by_n(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]), accurate ? 0 : 2)) {
+                        fail();
+                        printf("failed: yuv2yuvX_%d_%d_%d%s\n", filter_sizes[fsi], osi, dstW, accurate ? "_accurate" : "");
+                        show_differences(dst0, dst1, LARGEST_INPUT_SIZE * sizeof(dst0[0]));
                     }
+                    if(dstW == LARGEST_INPUT_SIZE)
+                        bench_new((const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
+
                 }
                 av_freep(&src);
                 av_freep(&vFilterData);
@@ -262,6 +236,7 @@ static void check_yuv2yuvX(void)
         }
     }
     sws_freeContext(ctx);
+  }
 #undef FILTER_SIZES
 }
 
@@ -377,10 +352,8 @@ void checkasm_check_sw_scale(void)
 {
     check_hscale();
     report("hscale");
-    if (!ARCH_X86_64) {
-        check_yuv2yuv1();
-        report("yuv2yuv1");
-    }
+    check_yuv2yuv1();
+    report("yuv2yuv1");
     check_yuv2yuvX();
     report("yuv2yuvX");
 }
-- 
2.32.0 (Apple Git-132)


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-06-22  9:23 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-13 16:36 [FFmpeg-devel] [PATCH 1/2] checkasm: updated tests for sw_scale Swinney, Jonathan
2022-06-21 20:16 ` Martin Storsjö
2022-06-22  9:22   ` Martin Storsjö

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git