* [FFmpeg-devel] [WIP PATCH 1/2] checkasm/sw_rgb: test rgb24 to yuv
@ 2024-06-03 13:02 Zhao Zhili
2024-06-03 18:47 ` James Almer
0 siblings, 1 reply; 2+ messages in thread
From: Zhao Zhili @ 2024-06-03 13:02 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Zhao Zhili
From: Zhao Zhili <zhilizhao@tencent.com>
---
The test still failed on x86, but success on arm64 and longarch.
I have tried to call rgb24ToY_c and ff_rgb24ToY_avx directly and
compare the results, they don't match.
https://github.com/quink-black/FFmpeg/actions/runs/9347753270
https://patchwork.ffmpeg.org/project/ffmpeg/patch/tencent_90E6136AF5D6E919AEA9254393048855B305@qq.com/
tests/checkasm/sw_rgb.c | 123 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 123 insertions(+)
diff --git a/tests/checkasm/sw_rgb.c b/tests/checkasm/sw_rgb.c
index 7cd815e5be..18fd4255a6 100644
--- a/tests/checkasm/sw_rgb.c
+++ b/tests/checkasm/sw_rgb.c
@@ -24,6 +24,8 @@
#include "libavutil/mem_internal.h"
#include "libswscale/rgb2rgb.h"
+#include "libswscale/swscale.h"
+#include "libswscale/swscale_internal.h"
#include "checkasm.h"
@@ -41,6 +43,7 @@ static const struct {uint8_t w, h, s;} planes[] = {
#define MAX_STRIDE 128
#define MAX_HEIGHT 128
+#define LARGEST_INPUT_SIZE 4096
static void check_shuffle_bytes(void * func, const char * report)
{
@@ -111,6 +114,120 @@ static void check_uyvy_to_422p(void)
}
}
+static void check_rgb_to_y(void)
+{
+ struct SwsContext *ctx;
+ static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
+ int32_t rgb2yuv[9] = {0};
+
+ declare_func(void, uint8_t *dst, const uint8_t *src,
+ const uint8_t *unused1, const uint8_t *unused2, int width,
+ uint32_t *rgb2yuv, void *opq);
+
+ LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
+ LOCAL_ALIGNED_32(uint8_t, dst0_y, [LARGEST_INPUT_SIZE * 2]);
+ LOCAL_ALIGNED_32(uint8_t, dst1_y, [LARGEST_INPUT_SIZE * 2]);
+
+ randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
+ rgb2yuv[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+
+ ctx = sws_alloc_context();
+ if (sws_init_context(ctx, NULL, NULL) < 0)
+ fail();
+
+ for (int i = 0; i < FF_ARRAY_ELEMS(input_sizes); i++) {
+ int w = input_sizes[i];
+
+ ctx->srcFormat = AV_PIX_FMT_RGB24;
+ ctx->dstFormat = AV_PIX_FMT_YUV420P;
+
+ ff_sws_init_scale(ctx);
+ if (check_func(ctx->lumToYV12, "rgb24_to_y_%d", w)) {
+ memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * 2);
+ memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * 2);
+
+ call_ref(dst0_y, src, NULL, NULL, w, rgb2yuv, NULL);
+ call_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
+
+ if (memcmp(dst0_y, dst1_y, w * 2))
+ fail();
+
+ bench_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
+ }
+ }
+
+ sws_freeContext(ctx);
+}
+
+static void check_rgb_to_uv(void)
+{
+ struct SwsContext *ctx;
+ static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
+ int32_t rgb2yuv[9] = {0};
+
+ declare_func(void, uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
+ int width, uint32_t *pal, void *opq);
+
+ LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
+ LOCAL_ALIGNED_32(uint8_t, dst0_u, [LARGEST_INPUT_SIZE * 2]);
+ LOCAL_ALIGNED_32(uint8_t, dst0_v, [LARGEST_INPUT_SIZE * 2]);
+ LOCAL_ALIGNED_32(uint8_t, dst1_u, [LARGEST_INPUT_SIZE * 2]);
+ LOCAL_ALIGNED_32(uint8_t, dst1_v, [LARGEST_INPUT_SIZE * 2]);
+
+ randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
+ rgb2yuv[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+ rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
+
+ ctx = sws_alloc_context();
+ if (sws_init_context(ctx, NULL, NULL) < 0)
+ fail();
+
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) {
+ int w = input_sizes[j] >> i;
+
+ ctx->chrSrcHSubSample = i ? 1 : 0;
+ ctx->srcFormat = AV_PIX_FMT_RGB24;
+ ctx->dstFormat = i ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV444P;
+
+ ff_sws_init_scale(ctx);
+
+ if (check_func(ctx->chrToYV12, "rgb24_to_uv%s_%d", i ? "_half" : "", w)) {
+ memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * 2);
+ memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * 2);
+ memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * 2);
+ memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * 2);
+
+ call_ref(dst0_u, dst0_v, NULL, src, src, w, rgb2yuv, NULL);
+ call_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
+
+ if (memcmp(dst0_u, dst1_u, w * 2) || memcmp(dst0_v, dst1_v, w * 2))
+ fail();
+
+ bench_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
+ }
+ }
+ }
+
+ sws_freeContext(ctx);
+}
+
static void check_interleave_bytes(void)
{
LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
@@ -201,6 +318,12 @@ void checkasm_check_sw_rgb(void)
check_uyvy_to_422p();
report("uyvytoyuv422");
+ check_rgb_to_y();
+ report("rgb_to_y");
+
+ check_rgb_to_uv();
+ report("rgb_to_uv");
+
check_interleave_bytes();
report("interleave_bytes");
}
--
2.42.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [FFmpeg-devel] [WIP PATCH 1/2] checkasm/sw_rgb: test rgb24 to yuv
2024-06-03 13:02 [FFmpeg-devel] [WIP PATCH 1/2] checkasm/sw_rgb: test rgb24 to yuv Zhao Zhili
@ 2024-06-03 18:47 ` James Almer
0 siblings, 0 replies; 2+ messages in thread
From: James Almer @ 2024-06-03 18:47 UTC (permalink / raw)
To: ffmpeg-devel
On 6/3/2024 10:02 AM, Zhao Zhili wrote:
> From: Zhao Zhili <zhilizhao@tencent.com>
>
> ---
> The test still failed on x86, but success on arm64 and longarch.
>
> I have tried to call rgb24ToY_c and ff_rgb24ToY_avx directly and
> compare the results, they don't match.
You're using an incomplete table. See below.
>
> https://github.com/quink-black/FFmpeg/actions/runs/9347753270
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/tencent_90E6136AF5D6E919AEA9254393048855B305@qq.com/
>
> tests/checkasm/sw_rgb.c | 123 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 123 insertions(+)
>
> diff --git a/tests/checkasm/sw_rgb.c b/tests/checkasm/sw_rgb.c
> index 7cd815e5be..18fd4255a6 100644
> --- a/tests/checkasm/sw_rgb.c
> +++ b/tests/checkasm/sw_rgb.c
> @@ -24,6 +24,8 @@
> #include "libavutil/mem_internal.h"
>
> #include "libswscale/rgb2rgb.h"
> +#include "libswscale/swscale.h"
> +#include "libswscale/swscale_internal.h"
>
> #include "checkasm.h"
>
> @@ -41,6 +43,7 @@ static const struct {uint8_t w, h, s;} planes[] = {
>
> #define MAX_STRIDE 128
> #define MAX_HEIGHT 128
> +#define LARGEST_INPUT_SIZE 4096
>
> static void check_shuffle_bytes(void * func, const char * report)
> {
> @@ -111,6 +114,120 @@ static void check_uyvy_to_422p(void)
> }
> }
>
> +static void check_rgb_to_y(void)
> +{
> + struct SwsContext *ctx;
> + static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
> + int32_t rgb2yuv[9] = {0};
> +
> + declare_func(void, uint8_t *dst, const uint8_t *src,
> + const uint8_t *unused1, const uint8_t *unused2, int width,
> + uint32_t *rgb2yuv, void *opq);
> +
> + LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
> + LOCAL_ALIGNED_32(uint8_t, dst0_y, [LARGEST_INPUT_SIZE * 2]);
> + LOCAL_ALIGNED_32(uint8_t, dst1_y, [LARGEST_INPUT_SIZE * 2]);
> +
> + randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
> + rgb2yuv[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +
> + ctx = sws_alloc_context();
> + if (sws_init_context(ctx, NULL, NULL) < 0)
> + fail();
Allocate and initiate this once in checkasm_check_sw_rgb() and reuse it.
> +
> + for (int i = 0; i < FF_ARRAY_ELEMS(input_sizes); i++) {
> + int w = input_sizes[i];
> +
> + ctx->srcFormat = AV_PIX_FMT_RGB24;
> + ctx->dstFormat = AV_PIX_FMT_YUV420P;
> +
> + ff_sws_init_scale(ctx);
> + if (check_func(ctx->lumToYV12, "rgb24_to_y_%d", w)) {
> + memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * 2);
> + memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * 2);
> +
> + call_ref(dst0_y, src, NULL, NULL, w, rgb2yuv, NULL);
Don't use a custom filled table, more so when it's smaller than needed.
Use ctx->input_rgb2yuv_table directly here and everywhere else. It's
filled with the values the C and any simd version may need.
With that, the tests pass on x86.
> + call_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
> +
> + if (memcmp(dst0_y, dst1_y, w * 2))
> + fail();
> +
> + bench_new(dst1_y, src, NULL, NULL, w, rgb2yuv, NULL);
> + }
> + }
> +
> + sws_freeContext(ctx);
> +}
> +
> +static void check_rgb_to_uv(void)
> +{
> + struct SwsContext *ctx;
> + static const int input_sizes[] = {8, 128, 1280, 1080, LARGEST_INPUT_SIZE};
> + int32_t rgb2yuv[9] = {0};
> +
> + declare_func(void, uint8_t *dstU, uint8_t *dstV,
> + const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
> + int width, uint32_t *pal, void *opq);
> +
> + LOCAL_ALIGNED_32(uint8_t, src, [LARGEST_INPUT_SIZE * 3]);
> + LOCAL_ALIGNED_32(uint8_t, dst0_u, [LARGEST_INPUT_SIZE * 2]);
> + LOCAL_ALIGNED_32(uint8_t, dst0_v, [LARGEST_INPUT_SIZE * 2]);
> + LOCAL_ALIGNED_32(uint8_t, dst1_u, [LARGEST_INPUT_SIZE * 2]);
> + LOCAL_ALIGNED_32(uint8_t, dst1_v, [LARGEST_INPUT_SIZE * 2]);
> +
> + randomize_buffers(src, LARGEST_INPUT_SIZE * 3);
> + rgb2yuv[BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> + rgb2yuv[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
> +
> + ctx = sws_alloc_context();
> + if (sws_init_context(ctx, NULL, NULL) < 0)
> + fail();
> +
> + for (int i = 0; i < 2; i++) {
> + for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) {
> + int w = input_sizes[j] >> i;
> +
> + ctx->chrSrcHSubSample = i ? 1 : 0;
> + ctx->srcFormat = AV_PIX_FMT_RGB24;
> + ctx->dstFormat = i ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV444P;
> +
> + ff_sws_init_scale(ctx);
> +
> + if (check_func(ctx->chrToYV12, "rgb24_to_uv%s_%d", i ? "_half" : "", w)) {
> + memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * 2);
> + memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * 2);
> + memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * 2);
> + memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * 2);
> +
> + call_ref(dst0_u, dst0_v, NULL, src, src, w, rgb2yuv, NULL);
> + call_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
> +
> + if (memcmp(dst0_u, dst1_u, w * 2) || memcmp(dst0_v, dst1_v, w * 2))
> + fail();
> +
> + bench_new(dst1_u, dst1_v, NULL, src, src, w, rgb2yuv, NULL);
> + }
> + }
> + }
> +
> + sws_freeContext(ctx);
> +}
> +
> static void check_interleave_bytes(void)
> {
> LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
> @@ -201,6 +318,12 @@ void checkasm_check_sw_rgb(void)
> check_uyvy_to_422p();
> report("uyvytoyuv422");
>
> + check_rgb_to_y();
> + report("rgb_to_y");
> +
> + check_rgb_to_uv();
> + report("rgb_to_uv");
> +
> check_interleave_bytes();
> report("interleave_bytes");
> }
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-06-03 18:47 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-03 13:02 [FFmpeg-devel] [WIP PATCH 1/2] checkasm/sw_rgb: test rgb24 to yuv Zhao Zhili
2024-06-03 18:47 ` James Almer
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git