On Sun, Oct 30, 2022 at 05:32:35PM -0700, mindmark@gmail.com wrote: > From: Mark Reid > > --- > libswscale/output.c | 92 ++++++++++++++++++++++++ > libswscale/swscale_unscaled.c | 4 +- > libswscale/tests/floatimg_cmp.c | 4 +- > libswscale/utils.c | 16 +++-- > libswscale/yuv2rgb.c | 2 + > tests/ref/fate/filter-pixdesc-rgbaf32be | 1 + > tests/ref/fate/filter-pixdesc-rgbaf32le | 1 + > tests/ref/fate/filter-pixdesc-rgbf32be | 1 + > tests/ref/fate/filter-pixdesc-rgbf32le | 1 + > tests/ref/fate/filter-pixfmts-copy | 4 ++ > tests/ref/fate/filter-pixfmts-crop | 4 ++ > tests/ref/fate/filter-pixfmts-field | 4 ++ > tests/ref/fate/filter-pixfmts-fieldorder | 4 ++ > tests/ref/fate/filter-pixfmts-hflip | 4 ++ > tests/ref/fate/filter-pixfmts-il | 4 ++ > tests/ref/fate/filter-pixfmts-null | 4 ++ > tests/ref/fate/filter-pixfmts-scale | 4 ++ > tests/ref/fate/filter-pixfmts-transpose | 4 ++ > tests/ref/fate/filter-pixfmts-vflip | 4 ++ > tests/ref/fate/sws-floatimg-cmp | 16 +++++ > 20 files changed, 170 insertions(+), 8 deletions(-) > create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32be > create mode 100644 tests/ref/fate/filter-pixdesc-rgbaf32le > create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32be > create mode 100644 tests/ref/fate/filter-pixdesc-rgbf32le > > diff --git a/libswscale/output.c b/libswscale/output.c > index 0e1c1225a0..e2ec9cbdf5 100644 > --- a/libswscale/output.c > +++ b/libswscale/output.c > @@ -2474,6 +2474,92 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, > } > } > > +static void > +yuv2rgbaf32_full_X_c(SwsContext *c, const int16_t *lumFilter, > + const int16_t **lumSrcx, int lumFilterSize, > + const int16_t *chrFilter, const int16_t **chrUSrcx, > + const int16_t **chrVSrcx, int chrFilterSize, > + const int16_t **alpSrcx, uint8_t *dest, > + int dstW, int y) > +{ > + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); > + int i; > + int alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA; > + int hasAlpha = alpha && alpSrcx; > + int pixelStep = alpha ? 4 : 3; > + uint32_t *dest32 = (uint32_t*)dest; > + const int32_t **lumSrc = (const int32_t**)lumSrcx; > + const int32_t **chrUSrc = (const int32_t**)chrUSrcx; > + const int32_t **chrVSrc = (const int32_t**)chrVSrcx; > + const int32_t **alpSrc = (const int32_t**)alpSrcx; > + static const float float_mult = 1.0f / 65535.0f; > + uint32_t a = av_float2int(1.0f); > + > + for (i = 0; i < dstW; i++) { > + int j; > + int Y = -0x40000000; > + int U = -(128 << 23); > + int V = -(128 << 23); > + int R, G, B, A; > + > + for (j = 0; j < lumFilterSize; j++) > + Y += lumSrc[j][i] * (unsigned)lumFilter[j]; > + > + for (j = 0; j < chrFilterSize; j++) { > + U += chrUSrc[j][i] * (unsigned)chrFilter[j]; > + V += chrVSrc[j][i] * (unsigned)chrFilter[j]; > + } > + > + Y >>= 14; > + Y += 0x10000; > + U >>= 14; > + V >>= 14; > + > + if (hasAlpha) { > + A = -0x40000000; > + > + for (j = 0; j < lumFilterSize; j++) > + A += alpSrc[j][i] * (unsigned)lumFilter[j]; > + > + A >>= 1; > + A += 0x20002000; > + a = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); > + } > + > + Y -= c->yuv2rgb_y_offset; > + Y *= c->yuv2rgb_y_coeff; > + Y += 1 << 13; > + R = V * c->yuv2rgb_v2r_coeff; > + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; > + B = U * c->yuv2rgb_u2b_coeff; > + > + R = av_clip_uintp2(Y + R, 30); > + G = av_clip_uintp2(Y + G, 30); > + B = av_clip_uintp2(Y + B, 30); these additions can overflow i think given sufficiently "bad" input especially with the bt2020 matrix ive posted a proposed solution for the rgba64 / gbrp16/32f cases something similar can be done here thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Opposition brings concord. Out of discord comes the fairest harmony. -- Heraclitus