On 18/01/2025 14:00, Michael Niedermayer wrote: > This performs a tiny bit better than not using it but it is incompatible > with the RCT which gives a bigger gain > > Sponsored-by: Sovereign Tech Fund > Signed-off-by: Michael Niedermayer > --- > libavcodec/ffv1.h | 20 ++++++++++++++++++++ > libavcodec/ffv1enc_template.c | 33 ++++++++++++++++++++++++--------- > 2 files changed, 44 insertions(+), 9 deletions(-) > > diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h > index 2c2df154037..84a443613df 100644 > --- a/libavcodec/ffv1.h > +++ b/libavcodec/ffv1.h > @@ -196,6 +196,26 @@ static av_always_inline int fold(int diff, int bits) > return diff; > } > > +// We treat infinite as a big number, 0 as a small non 0 number, this is close enough, we ignore denormal numbers > +static av_always_inline int64_t f2i(uint16_t f) > +{ > + int s = -(f>>15); > + int64_t v = (1024L + (f & 1023)) << ((f & 0x7C00) >> 10); > + return (v + s) ^ s; > +} > + > +//undo above > +static av_always_inline uint16_t i2f(int64_t v) > +{ > + int s = v>>63; > + int e; > + v = (v + s) ^ s; > + > + e = av_log2(v>>10); > + > + return (s&32768) + (e<<10) + ((v + (1U<> e) - 1024; > +} > + > static inline void update_vlc_state(VlcState *const state, const int v) > { > int drift = state->drift; > diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c > index bc14926ab95..12d519320c0 100644 > --- a/libavcodec/ffv1enc_template.c > +++ b/libavcodec/ffv1enc_template.c > @@ -64,7 +64,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc, > > context = RENAME(get_context)(f->quant_tables[p->quant_table_index], > sample[0] + x, sample[1] + x, sample[2] + x); > - diff = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x); > + > + int64_t L = f2i(sample[0][x-1]); > + int64_t T = f2i(sample[1][x]); > + int64_t LT = f2i(sample[1][x-1]); > + > + diff = sample[0][x] - mid_pred(i2f(L), i2f(L + T - LT), i2f(T)); > +// diff = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x); > > if (context < 0) { > context = -context; > @@ -147,7 +153,16 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc, > > memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES * > (w + 6) * sizeof(*RENAME(sc->sample_buffer))); > - > + if(0){ > + int v; > + for (v = 0; v<65536; v++) { > + int64_t i = f2i(v); > + int v2 = i2f(i); > + if (v != v2) > + av_log(0,0, "D %X %X %LX\n", v, v2, i); > + av_assert0(v2 == v); > + } > + } > for (y = 0; y < h; y++) { > for (i = 0; i < ring_size; i++) > for (p = 0; p < MAX_PLANES; p++) > @@ -180,13 +195,13 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc, > r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y)); > } > > - if (sc->slice_coding_mode != 1) { > - b -= g; > - r -= g; > - g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; > - b += offset; > - r += offset; > - } > +// if (sc->slice_coding_mode != 1) { > +// b -= g; > +// r -= g; > +// g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; > +// b += offset; > +// r += offset; > +// } > > sample[0][0][x] = g; > sample[1][0][x] = b; I think using prediction independently on both exponent and mantissa, and coding the two via different contexts would be better, not to mention it would avoid the f2i hacks.