From: Niklas Haas <ffmpeg@haasn.xyz> To: ffmpeg-devel@ffmpeg.org Cc: Niklas Haas <git@haasn.dev> Subject: [FFmpeg-devel] [PATCH 09/11] tests/swscale: calculate theoretical expected SSIM Date: Mon, 17 Mar 2025 11:43:55 +0100 Message-ID: <20250317104357.307832-9-ffmpeg@haasn.xyz> (raw) In-Reply-To: <20250317104357.307832-1-ffmpeg@haasn.xyz> From: Niklas Haas <git@haasn.dev> We can calculate with some confidence the theoretical expected SSIM from an "ideal" conversion, by computing the reference SSIM level for an image dithered with uniformly distributed quatization noise. This gives us an additional safety net to check for regressions even in the absence of a reference to compare against. --- libswscale/tests/swscale.c | 74 +++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 12 deletions(-) diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c index 47c58524f6..bce495db90 100644 --- a/libswscale/tests/swscale.c +++ b/libswscale/tests/swscale.c @@ -99,6 +99,29 @@ static void exit_handler(int sig) exit(sig); } +/* Estimate luma variance assuming uniform dither noise distribution */ +static float estimate_quantization_noise(enum AVPixelFormat fmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); + float variance = 1.0 / 12; + if (desc->comp[0].depth < 8) { + /* Extra headroom for very low bit depth output */ + variance *= (8 - desc->comp[0].depth); + } + + if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { + return 0.0; + } else if (desc->flags & AV_PIX_FMT_FLAG_RGB) { + const float r = 0.299 / (1 << desc->comp[0].depth); + const float g = 0.587 / (1 << desc->comp[1].depth); + const float b = 0.114 / (1 << desc->comp[2].depth); + return (r * r + g * g + b * b) * variance; + } else { + const float y = 1.0 / (1 << desc->comp[0].depth); + return y * y * variance; + } +} + static int fmt_comps(enum AVPixelFormat fmt) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); @@ -156,6 +179,18 @@ static void get_ssim(float ssim[4], const AVFrame *out, const AVFrame *ref, int } } +static float get_loss(const float ssim[4]) +{ + const float weights[3] = { 0.8, 0.1, 0.1 }; /* tuned for Y'CrCr */ + + float sum = 0; + for (int i = 0; i < 3; i++) + sum += weights[i] * ssim[i]; + sum *= ssim[3]; /* ensure alpha errors get caught */ + + return 1.0 - sum; +} + static int scale_legacy(AVFrame *dst, const AVFrame *src, struct mode mode, struct options opts) { @@ -198,6 +233,18 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, int64_t time, time_ref = 0; int ret = -1; + /* Estimate the expected amount of loss from bit depth reduction */ + const float c1 = 0.01 * 0.01; /* stabilization constant */ + const float ref_var = 1.0 / 12.0; /* uniformly distributed signal */ + const float src_var = estimate_quantization_noise(src_fmt); + const float dst_var = estimate_quantization_noise(dst_fmt); + const float out_var = estimate_quantization_noise(ref->format); + const float total_var = src_var + dst_var + out_var; + const float ssim_luma = (2 * ref_var + c1) / (2 * ref_var + total_var + c1); + const float ssim_expected[4] = { ssim_luma, 1, 1, 1 }; /* for simplicity */ + const float expected_loss = get_loss(ssim_expected); + float loss; + src = av_frame_alloc(); dst = av_frame_alloc(); out = av_frame_alloc(); @@ -251,6 +298,15 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, mode.flags, mode.dither, ssim[0], ssim[1], ssim[2], ssim[3]); + loss = get_loss(ssim); + if (loss - expected_loss > 1e-4 && dst_w >= ref->width && dst_h >= ref->height) { + int bad = loss - expected_loss > 1e-2; + printf("\033[1;31m loss %g is %s by %g, expected loss %g\033[0m\n", + loss, bad ? "WORSE" : "worse", loss - expected_loss, expected_loss); + if (bad) + goto error; + } + if (!ssim_ref && sws_isSupportedInput(src->format) && sws_isSupportedOutput(dst->format)) { /* Compare against the legacy swscale API as a reference */ time_ref = av_gettime_relative(); @@ -269,18 +325,12 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, } if (ssim_ref) { - const float weights[4] = { 0.8, 0.1, 0.1, 1.0 }; /* tuned for Y'CrCr */ - float err, sum = 0, sum_ref = 0; - for (int i = 0; i < 4; i++) { - sum += weights[i] * ssim[i]; - sum_ref += weights[i] * ssim_ref[i]; - } - - err = sum_ref / sum - 1.0; /* relative error */ - if (err > 1e-4 /* 0.01% headroom for dither noise etc */) { - int bad = err > 1e-2; /* 1% */ - printf("\033[1;31m %s by %f%%, ref SSIM {Y=%f U=%f V=%f A=%f}\033[0m\n", - bad ? "WORSE" : "worse", 100.0 * err, + const float loss_ref = get_loss(ssim_ref); + if (loss - loss_ref > 1e-4) { + int bad = loss - loss_ref > 1e-2; + printf("\033[1;31m loss %g is %s by %g, ref loss %g, " + "SSIM {Y=%f U=%f V=%f A=%f}\033[0m\n", + loss, bad ? "WORSE" : "worse", loss - loss_ref, loss_ref, ssim_ref[0], ssim_ref[1], ssim_ref[2], ssim_ref[3]); if (bad) goto error; -- 2.48.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2025-03-17 10:46 UTC|newest] Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-03-17 10:43 [FFmpeg-devel] [PATCH 01/11] tests/swscale: allow choosing specific flags and dither mode Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 02/11] tests/swscale: allow testing only unscaled convertors Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 03/11] tests/swscale: print speedup numbers in color Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 04/11] tests/swscale: use yuva444p as reference Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 05/11] tests/swscale: switch from MSE to SSIM Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 06/11] tests/swscale: print performance stats on exit Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 07/11] tests/swscale: check supported inputs for legacy swscale separately Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 08/11] tests/swscale: remove stray whitespace in scanf format Niklas Haas 2025-03-17 10:43 ` Niklas Haas [this message] 2025-03-17 10:53 ` [FFmpeg-devel] [PATCH 09/11] tests/swscale: calculate theoretical expected SSIM Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 10/11] tests/swscale: constrain reference SSIM for low bit depth formats Niklas Haas 2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 11/11] tests/swscale: allow setting log verbosity Niklas Haas
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20250317104357.307832-9-ffmpeg@haasn.xyz \ --to=ffmpeg@haasn.xyz \ --cc=ffmpeg-devel@ffmpeg.org \ --cc=git@haasn.dev \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git