From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: <ffmpeg-devel-bounces@ffmpeg.org> Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 105A54E85F for <ffmpegdev@gitmailbox.com>; Mon, 17 Mar 2025 10:53:47 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 5832C68AB35; Mon, 17 Mar 2025 12:53:43 +0200 (EET) Received: from haasn.dev (haasn.dev [78.46.187.166]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 76ECD687C46 for <ffmpeg-devel@ffmpeg.org>; Mon, 17 Mar 2025 12:53:37 +0200 (EET) Received: from haasn.dev (unknown [10.30.1.1]) by haasn.dev (Postfix) with UTF8SMTP id 32FCC406F0; Mon, 17 Mar 2025 11:53:37 +0100 (CET) Date: Mon, 17 Mar 2025 11:53:36 +0100 Message-ID: <20250317115336.GB310232@haasn.xyz> From: Niklas Haas <ffmpeg@haasn.xyz> To: ffmpeg-devel@ffmpeg.org In-Reply-To: <20250317104357.307832-9-ffmpeg@haasn.xyz> References: <20250317104357.307832-1-ffmpeg@haasn.xyz> <20250317104357.307832-9-ffmpeg@haasn.xyz> MIME-Version: 1.0 Content-Disposition: inline Subject: Re: [FFmpeg-devel] [PATCH 09/11] tests/swscale: calculate theoretical expected SSIM X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org> List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>, <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe> List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel> List-Post: <mailto:ffmpeg-devel@ffmpeg.org> List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help> List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>, <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe> Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Niklas Haas <git@haasn.dev> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org> Archived-At: <https://master.gitmailbox.com/ffmpegdev/20250317115336.GB310232@haasn.xyz/> List-Archive: <https://master.gitmailbox.com/ffmpegdev/> List-Post: <mailto:ffmpegdev@gitmailbox.com> On Mon, 17 Mar 2025 11:43:55 +0100 Niklas Haas <ffmpeg@haasn.xyz> wrote: > From: Niklas Haas <git@haasn.dev> > > We can calculate with some confidence the theoretical expected SSIM > from an "ideal" conversion, by computing the reference SSIM level > for an image dithered with uniformly distributed quatization noise. > > This gives us an additional safety net to check for regressions even in > the absence of a reference to compare against. It's worth pointing out that this does reveal some bugs in the current implementation that were not covered by any pre existing tests. > --- > libswscale/tests/swscale.c | 74 +++++++++++++++++++++++++++++++------- > 1 file changed, 62 insertions(+), 12 deletions(-) > > diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c > index 47c58524f6..bce495db90 100644 > --- a/libswscale/tests/swscale.c > +++ b/libswscale/tests/swscale.c > @@ -99,6 +99,29 @@ static void exit_handler(int sig) > exit(sig); > } > > +/* Estimate luma variance assuming uniform dither noise distribution */ > +static float estimate_quantization_noise(enum AVPixelFormat fmt) > +{ > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); > + float variance = 1.0 / 12; > + if (desc->comp[0].depth < 8) { > + /* Extra headroom for very low bit depth output */ > + variance *= (8 - desc->comp[0].depth); > + } > + > + if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { > + return 0.0; > + } else if (desc->flags & AV_PIX_FMT_FLAG_RGB) { > + const float r = 0.299 / (1 << desc->comp[0].depth); > + const float g = 0.587 / (1 << desc->comp[1].depth); > + const float b = 0.114 / (1 << desc->comp[2].depth); > + return (r * r + g * g + b * b) * variance; > + } else { > + const float y = 1.0 / (1 << desc->comp[0].depth); > + return y * y * variance; > + } > +} > + > static int fmt_comps(enum AVPixelFormat fmt) > { > const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); > @@ -156,6 +179,18 @@ static void get_ssim(float ssim[4], const AVFrame *out, const AVFrame *ref, int > } > } > > +static float get_loss(const float ssim[4]) > +{ > + const float weights[3] = { 0.8, 0.1, 0.1 }; /* tuned for Y'CrCr */ > + > + float sum = 0; > + for (int i = 0; i < 3; i++) > + sum += weights[i] * ssim[i]; > + sum *= ssim[3]; /* ensure alpha errors get caught */ > + > + return 1.0 - sum; > +} > + > static int scale_legacy(AVFrame *dst, const AVFrame *src, struct mode mode, > struct options opts) > { > @@ -198,6 +233,18 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, > int64_t time, time_ref = 0; > int ret = -1; > > + /* Estimate the expected amount of loss from bit depth reduction */ > + const float c1 = 0.01 * 0.01; /* stabilization constant */ > + const float ref_var = 1.0 / 12.0; /* uniformly distributed signal */ > + const float src_var = estimate_quantization_noise(src_fmt); > + const float dst_var = estimate_quantization_noise(dst_fmt); > + const float out_var = estimate_quantization_noise(ref->format); > + const float total_var = src_var + dst_var + out_var; > + const float ssim_luma = (2 * ref_var + c1) / (2 * ref_var + total_var + c1); > + const float ssim_expected[4] = { ssim_luma, 1, 1, 1 }; /* for simplicity */ > + const float expected_loss = get_loss(ssim_expected); > + float loss; > + > src = av_frame_alloc(); > dst = av_frame_alloc(); > out = av_frame_alloc(); > @@ -251,6 +298,15 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, > mode.flags, mode.dither, > ssim[0], ssim[1], ssim[2], ssim[3]); > > + loss = get_loss(ssim); > + if (loss - expected_loss > 1e-4 && dst_w >= ref->width && dst_h >= ref->height) { > + int bad = loss - expected_loss > 1e-2; > + printf("\033[1;31m loss %g is %s by %g, expected loss %g\033[0m\n", > + loss, bad ? "WORSE" : "worse", loss - expected_loss, expected_loss); > + if (bad) > + goto error; > + } > + > if (!ssim_ref && sws_isSupportedInput(src->format) && sws_isSupportedOutput(dst->format)) { > /* Compare against the legacy swscale API as a reference */ > time_ref = av_gettime_relative(); > @@ -269,18 +325,12 @@ static int run_test(enum AVPixelFormat src_fmt, enum AVPixelFormat dst_fmt, > } > > if (ssim_ref) { > - const float weights[4] = { 0.8, 0.1, 0.1, 1.0 }; /* tuned for Y'CrCr */ > - float err, sum = 0, sum_ref = 0; > - for (int i = 0; i < 4; i++) { > - sum += weights[i] * ssim[i]; > - sum_ref += weights[i] * ssim_ref[i]; > - } > - > - err = sum_ref / sum - 1.0; /* relative error */ > - if (err > 1e-4 /* 0.01% headroom for dither noise etc */) { > - int bad = err > 1e-2; /* 1% */ > - printf("\033[1;31m %s by %f%%, ref SSIM {Y=%f U=%f V=%f A=%f}\033[0m\n", > - bad ? "WORSE" : "worse", 100.0 * err, > + const float loss_ref = get_loss(ssim_ref); > + if (loss - loss_ref > 1e-4) { > + int bad = loss - loss_ref > 1e-2; > + printf("\033[1;31m loss %g is %s by %g, ref loss %g, " > + "SSIM {Y=%f U=%f V=%f A=%f}\033[0m\n", > + loss, bad ? "WORSE" : "worse", loss - loss_ref, loss_ref, > ssim_ref[0], ssim_ref[1], ssim_ref[2], ssim_ref[3]); > if (bad) > goto error; > -- > 2.48.1 > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".