On Sat, Jan 18, 2025 at 05:33:19PM +0900, Lynne wrote: > On 18/01/2025 14:00, Michael Niedermayer wrote: > > This performs a tiny bit better than not using it but it is incompatible > > with the RCT which gives a bigger gain > > > > Sponsored-by: Sovereign Tech Fund > > Signed-off-by: Michael Niedermayer > > --- > > libavcodec/ffv1.h | 20 ++++++++++++++++++++ > > libavcodec/ffv1enc_template.c | 33 ++++++++++++++++++++++++--------- > > 2 files changed, 44 insertions(+), 9 deletions(-) > > > > diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h > > index 2c2df154037..84a443613df 100644 > > --- a/libavcodec/ffv1.h > > +++ b/libavcodec/ffv1.h > > @@ -196,6 +196,26 @@ static av_always_inline int fold(int diff, int bits) > > return diff; > > } > > +// We treat infinite as a big number, 0 as a small non 0 number, this is close enough, we ignore denormal numbers > > +static av_always_inline int64_t f2i(uint16_t f) > > +{ > > + int s = -(f>>15); > > + int64_t v = (1024L + (f & 1023)) << ((f & 0x7C00) >> 10); > > + return (v + s) ^ s; > > +} > > + > > +//undo above > > +static av_always_inline uint16_t i2f(int64_t v) > > +{ > > + int s = v>>63; > > + int e; > > + v = (v + s) ^ s; > > + > > + e = av_log2(v>>10); > > + > > + return (s&32768) + (e<<10) + ((v + (1U<> e) - 1024; > > +} > > + > > static inline void update_vlc_state(VlcState *const state, const int v) > > { > > int drift = state->drift; > > diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c > > index bc14926ab95..12d519320c0 100644 > > --- a/libavcodec/ffv1enc_template.c > > +++ b/libavcodec/ffv1enc_template.c > > @@ -64,7 +64,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc, > > context = RENAME(get_context)(f->quant_tables[p->quant_table_index], > > sample[0] + x, sample[1] + x, sample[2] + x); > > - diff = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x); > > + > > + int64_t L = f2i(sample[0][x-1]); > > + int64_t T = f2i(sample[1][x]); > > + int64_t LT = f2i(sample[1][x-1]); > > + > > + diff = sample[0][x] - mid_pred(i2f(L), i2f(L + T - LT), i2f(T)); > > +// diff = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x); > > if (context < 0) { > > context = -context; > > @@ -147,7 +153,16 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc, > > memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES * > > (w + 6) * sizeof(*RENAME(sc->sample_buffer))); > > - > > + if(0){ > > + int v; > > + for (v = 0; v<65536; v++) { > > + int64_t i = f2i(v); > > + int v2 = i2f(i); > > + if (v != v2) > > + av_log(0,0, "D %X %X %LX\n", v, v2, i); > > + av_assert0(v2 == v); > > + } > > + } > > for (y = 0; y < h; y++) { > > for (i = 0; i < ring_size; i++) > > for (p = 0; p < MAX_PLANES; p++) > > @@ -180,13 +195,13 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc, > > r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y)); > > } > > - if (sc->slice_coding_mode != 1) { > > - b -= g; > > - r -= g; > > - g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; > > - b += offset; > > - r += offset; > > - } > > +// if (sc->slice_coding_mode != 1) { > > +// b -= g; > > +// r -= g; > > +// g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; > > +// b += offset; > > +// r += offset; > > +// } > > sample[0][0][x] = g; > > sample[1][0][x] = b; > > I think using prediction independently on both exponent and mantissa, and > coding the two via different contexts would be better, not to mention it > would avoid the f2i hacks. i thought so too but so far i dont have a piece of code that stores them seperately and does perform better thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB There will always be a question for which you do not know the correct answer.