Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Niklas Haas <ffmpeg@haasn.xyz>
To: ffmpeg-devel@ffmpeg.org
Cc: Niklas Haas <git@haasn.dev>
Subject: Re: [FFmpeg-devel] [PATCH 09/11] tests/swscale: calculate theoretical expected SSIM
Date: Mon, 17 Mar 2025 11:53:36 +0100
Message-ID: <20250317115336.GB310232@haasn.xyz> (raw)
In-Reply-To: <20250317104357.307832-9-ffmpeg@haasn.xyz>

On Mon, 17 Mar 2025 11:43:55 +0100 Niklas Haas <ffmpeg@haasn.xyz> wrote:
> From: Niklas Haas <git@haasn.dev>
>
> We can calculate with some confidence the theoretical expected SSIM
> from an "ideal" conversion, by computing the reference SSIM level
> for an image dithered with uniformly distributed quatization noise.
>
> This gives us an additional safety net to check for regressions even in
> the absence of a reference to compare against.

It's worth pointing out that this does reveal some bugs in the current
implementation that were not covered by any pre existing tests.

> ---
>  libswscale/tests/swscale.c | 74 +++++++++++++++++++++++++++++++-------
>  1 file changed, 62 insertions(+), 12 deletions(-)
>
> diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c
> index 47c58524f6..bce495db90 100644
> --- a/libswscale/tests/swscale.c
> +++ b/libswscale/tests/swscale.c
> @@ -99,6 +99,29 @@ static void exit_handler(int sig)
>      exit(sig);
>  }
>
> +/* Estimate luma variance assuming uniform dither noise distribution */
> +static float estimate_quantization_noise(enum AVPixelFormat fmt)
> +{
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
> +    float variance = 1.0 / 12;
> +    if (desc->comp[0].depth < 8) {
> +        /* Extra headroom for very low bit depth output */
> +        variance *= (8 - desc->comp[0].depth);
> +    }
> +
> +    if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) {
> +        return 0.0;
> +    } else if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
> +        const float r = 0.299 / (1 << desc->comp[0].depth);
> +        const float g = 0.587 / (1 << desc->comp[1].depth);
> +        const float b = 0.114 / (1 << desc->comp[2].depth);
> +        return (r * r + g * g + b * b) * variance;
> +    } else {
> +        const float y = 1.0 / (1 << desc->comp[0].depth);
> +        return y * y * variance;
> +    }
> +}
> +
>  static int fmt_comps(enum AVPixelFormat fmt)
>  {
>      const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
> @@ -156,6 +179,18 @@ static void get_ssim(float ssim[4], const AVFrame *out, const AVFrame *ref, int
>      }
>  }
>
> +static float get_loss(const float ssim[4])
> +{
> +    const float weights[3] = { 0.8, 0.1, 0.1 }; /* tuned for Y'CrCr */
> +
> +    float sum = 0;
> +    for (int i = 0; i < 3; i++)
> +        sum += weights[i] * ssim[i];
> +    sum *= ssim[3]; /* ensure alpha errors get caught */
> +
> +    return 1.0 - sum;
> +}
> +
>  static int scale_legacy(AVFrame *dst, const AVFrame *src, struct mode mode,
>                          struct options opts)
>  {
> @@ -198,6 +233,18 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt,
>      int64_t time, time_ref = 0;
>      int ret = -1;
>
> +    /* Estimate the expected amount of loss from bit depth reduction */
> +    const float c1 = 0.01 * 0.01; /* stabilization constant */
> +    const float ref_var = 1.0 / 12.0; /* uniformly distributed signal */
> +    const float src_var = estimate_quantization_noise(src_fmt);
> +    const float dst_var = estimate_quantization_noise(dst_fmt);
> +    const float out_var = estimate_quantization_noise(ref->format);
> +    const float total_var = src_var + dst_var + out_var;
> +    const float ssim_luma = (2 * ref_var + c1) / (2 * ref_var + total_var + c1);
> +    const float ssim_expected[4] = { ssim_luma, 1, 1, 1 }; /* for simplicity */
> +    const float expected_loss = get_loss(ssim_expected);
> +    float loss;
> +
>      src = av_frame_alloc();
>      dst = av_frame_alloc();
>      out = av_frame_alloc();
> @@ -251,6 +298,15 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt,
>             mode.flags, mode.dither,
>             ssim[0], ssim[1], ssim[2], ssim[3]);
>
> +    loss = get_loss(ssim);
> +    if (loss - expected_loss > 1e-4 && dst_w >= ref->width && dst_h >= ref->height) {
> +        int bad = loss - expected_loss > 1e-2;
> +        printf("\033[1;31m  loss %g is %s by %g, expected loss %g\033[0m\n",
> +               loss, bad ? "WORSE" : "worse", loss - expected_loss, expected_loss);
> +        if (bad)
> +            goto error;
> +    }
> +
>      if (!ssim_ref && sws_isSupportedInput(src->format) && sws_isSupportedOutput(dst->format)) {
>          /* Compare against the legacy swscale API as a reference */
>          time_ref = av_gettime_relative();
> @@ -269,18 +325,12 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt,
>      }
>
>      if (ssim_ref) {
> -        const float weights[4] = { 0.8, 0.1, 0.1, 1.0 }; /* tuned for Y'CrCr */
> -        float err, sum = 0, sum_ref = 0;
> -        for (int i = 0; i < 4; i++) {
> -            sum     += weights[i] * ssim[i];
> -            sum_ref += weights[i] * ssim_ref[i];
> -        }
> -
> -        err = sum_ref / sum - 1.0; /* relative error */
> -        if (err > 1e-4 /* 0.01% headroom for dither noise etc */) {
> -            int bad = err > 1e-2; /* 1% */
> -            printf("\033[1;31m  %s by %f%%, ref SSIM {Y=%f U=%f V=%f A=%f}\033[0m\n",
> -                   bad ? "WORSE" : "worse", 100.0 * err,
> +        const float loss_ref = get_loss(ssim_ref);
> +        if (loss - loss_ref > 1e-4) {
> +            int bad = loss - loss_ref > 1e-2;
> +            printf("\033[1;31m  loss %g is %s by %g, ref loss %g, "
> +                   "SSIM {Y=%f U=%f V=%f A=%f}\033[0m\n",
> +                   loss, bad ? "WORSE" : "worse", loss - loss_ref, loss_ref,
>                     ssim_ref[0], ssim_ref[1], ssim_ref[2], ssim_ref[3]);
>              if (bad)
>                  goto error;
> --
> 2.48.1
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  reply	other threads:[~2025-03-17 10:53 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-17 10:43 [FFmpeg-devel] [PATCH 01/11] tests/swscale: allow choosing specific flags and dither mode Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 02/11] tests/swscale: allow testing only unscaled convertors Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 03/11] tests/swscale: print speedup numbers in color Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 04/11] tests/swscale: use yuva444p as reference Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 05/11] tests/swscale: switch from MSE to SSIM Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 06/11] tests/swscale: print performance stats on exit Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 07/11] tests/swscale: check supported inputs for legacy swscale separately Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 08/11] tests/swscale: remove stray whitespace in scanf format Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 09/11] tests/swscale: calculate theoretical expected SSIM Niklas Haas
2025-03-17 10:53   ` Niklas Haas [this message]
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 10/11] tests/swscale: constrain reference SSIM for low bit depth formats Niklas Haas
2025-03-17 10:43 ` [FFmpeg-devel] [PATCH 11/11] tests/swscale: allow setting log verbosity Niklas Haas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250317115336.GB310232@haasn.xyz \
    --to=ffmpeg@haasn.xyz \
    --cc=ffmpeg-devel@ffmpeg.org \
    --cc=git@haasn.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git