On 18/01/2025 14:00, Michael Niedermayer wrote:
> This performs a tiny bit better than not using it but it is incompatible
> with the RCT which gives a bigger gain
> 
> Sponsored-by: Sovereign Tech Fund
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>   libavcodec/ffv1.h             | 20 ++++++++++++++++++++
>   libavcodec/ffv1enc_template.c | 33 ++++++++++++++++++++++++---------
>   2 files changed, 44 insertions(+), 9 deletions(-)
> 
> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> index 2c2df154037..84a443613df 100644
> --- a/libavcodec/ffv1.h
> +++ b/libavcodec/ffv1.h
> @@ -196,6 +196,26 @@ static av_always_inline int fold(int diff, int bits)
>       return diff;
>   }
>   
> +// We treat infinite as a big number, 0 as a small non 0 number, this is close enough, we ignore denormal numbers
> +static av_always_inline int64_t f2i(uint16_t f)
> +{
> +    int s = -(f>>15);
> +    int64_t v = (1024L + (f & 1023)) << ((f & 0x7C00) >> 10);
> +    return (v + s) ^ s;
> +}
> +
> +//undo above
> +static av_always_inline uint16_t i2f(int64_t v)
> +{
> +    int s = v>>63;
> +    int e;
> +    v = (v + s) ^ s;
> +
> +    e = av_log2(v>>10);
> +
> +    return (s&32768) + (e<<10) + ((v + (1U<<e)/2) >> e) - 1024;
> +}
> +
>   static inline void update_vlc_state(VlcState *const state, const int v)
>   {
>       int drift = state->drift;
> diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
> index bc14926ab95..12d519320c0 100644
> --- a/libavcodec/ffv1enc_template.c
> +++ b/libavcodec/ffv1enc_template.c
> @@ -64,7 +64,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
>   
>           context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
>                                         sample[0] + x, sample[1] + x, sample[2] + x);
> -        diff    = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x);
> +
> +        int64_t  L = f2i(sample[0][x-1]);
> +        int64_t  T = f2i(sample[1][x]);
> +        int64_t LT = f2i(sample[1][x-1]);
> +
> +        diff    = sample[0][x] - mid_pred(i2f(L), i2f(L + T - LT), i2f(T));
> +//         diff    = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x);
>   
>           if (context < 0) {
>               context = -context;
> @@ -147,7 +153,16 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
>   
>       memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES *
>              (w + 6) * sizeof(*RENAME(sc->sample_buffer)));
> -
> +    if(0){
> +        int v;
> +        for (v = 0; v<65536; v++) {
> +            int64_t i = f2i(v);
> +            int v2 = i2f(i);
> +            if (v != v2)
> +                av_log(0,0, "D %X %X %LX\n", v, v2, i);
> +            av_assert0(v2 == v);
> +        }
> +    }
>       for (y = 0; y < h; y++) {
>           for (i = 0; i < ring_size; i++)
>               for (p = 0; p < MAX_PLANES; p++)
> @@ -180,13 +195,13 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
>                   r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y));
>               }
>   
> -            if (sc->slice_coding_mode != 1) {
> -                b -= g;
> -                r -= g;
> -                g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2;
> -                b += offset;
> -                r += offset;
> -            }
> +//             if (sc->slice_coding_mode != 1) {
> +//                 b -= g;
> +//                 r -= g;
> +//                 g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2;
> +//                 b += offset;
> +//                 r += offset;
> +//             }
>   
>               sample[0][0][x] = g;
>               sample[1][0][x] = b;

I think using prediction independently on both exponent and mantissa, 
and coding the two via different contexts would be better, not to 
mention it would avoid the f2i hacks.