On Sat, Jan 18, 2025 at 05:33:19PM +0900, Lynne wrote:
> On 18/01/2025 14:00, Michael Niedermayer wrote:
> > This performs a tiny bit better than not using it but it is incompatible
> > with the RCT which gives a bigger gain
> > 
> > Sponsored-by: Sovereign Tech Fund
> > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > ---
> >   libavcodec/ffv1.h             | 20 ++++++++++++++++++++
> >   libavcodec/ffv1enc_template.c | 33 ++++++++++++++++++++++++---------
> >   2 files changed, 44 insertions(+), 9 deletions(-)
> > 
> > diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> > index 2c2df154037..84a443613df 100644
> > --- a/libavcodec/ffv1.h
> > +++ b/libavcodec/ffv1.h
> > @@ -196,6 +196,26 @@ static av_always_inline int fold(int diff, int bits)
> >       return diff;
> >   }
> > +// We treat infinite as a big number, 0 as a small non 0 number, this is close enough, we ignore denormal numbers
> > +static av_always_inline int64_t f2i(uint16_t f)
> > +{
> > +    int s = -(f>>15);
> > +    int64_t v = (1024L + (f & 1023)) << ((f & 0x7C00) >> 10);
> > +    return (v + s) ^ s;
> > +}
> > +
> > +//undo above
> > +static av_always_inline uint16_t i2f(int64_t v)
> > +{
> > +    int s = v>>63;
> > +    int e;
> > +    v = (v + s) ^ s;
> > +
> > +    e = av_log2(v>>10);
> > +
> > +    return (s&32768) + (e<<10) + ((v + (1U<<e)/2) >> e) - 1024;
> > +}
> > +
> >   static inline void update_vlc_state(VlcState *const state, const int v)
> >   {
> >       int drift = state->drift;
> > diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
> > index bc14926ab95..12d519320c0 100644
> > --- a/libavcodec/ffv1enc_template.c
> > +++ b/libavcodec/ffv1enc_template.c
> > @@ -64,7 +64,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
> >           context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
> >                                         sample[0] + x, sample[1] + x, sample[2] + x);
> > -        diff    = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x);
> > +
> > +        int64_t  L = f2i(sample[0][x-1]);
> > +        int64_t  T = f2i(sample[1][x]);
> > +        int64_t LT = f2i(sample[1][x-1]);
> > +
> > +        diff    = sample[0][x] - mid_pred(i2f(L), i2f(L + T - LT), i2f(T));
> > +//         diff    = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x);
> >           if (context < 0) {
> >               context = -context;
> > @@ -147,7 +153,16 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
> >       memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES *
> >              (w + 6) * sizeof(*RENAME(sc->sample_buffer)));
> > -
> > +    if(0){
> > +        int v;
> > +        for (v = 0; v<65536; v++) {
> > +            int64_t i = f2i(v);
> > +            int v2 = i2f(i);
> > +            if (v != v2)
> > +                av_log(0,0, "D %X %X %LX\n", v, v2, i);
> > +            av_assert0(v2 == v);
> > +        }
> > +    }
> >       for (y = 0; y < h; y++) {
> >           for (i = 0; i < ring_size; i++)
> >               for (p = 0; p < MAX_PLANES; p++)
> > @@ -180,13 +195,13 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
> >                   r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y));
> >               }
> > -            if (sc->slice_coding_mode != 1) {
> > -                b -= g;
> > -                r -= g;
> > -                g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2;
> > -                b += offset;
> > -                r += offset;
> > -            }
> > +//             if (sc->slice_coding_mode != 1) {
> > +//                 b -= g;
> > +//                 r -= g;
> > +//                 g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2;
> > +//                 b += offset;
> > +//                 r += offset;
> > +//             }
> >               sample[0][0][x] = g;
> >               sample[1][0][x] = b;
> 
> I think using prediction independently on both exponent and mantissa, and
> coding the two via different contexts would be better, not to mention it
> would avoid the f2i hacks.

i thought so too but so far i dont have a piece of code that stores
them seperately and does perform better

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

There will always be a question for which you do not know the correct answer.