[FFmpeg-devel] [PATCH] avcodec/ffv1: NOT FOR GIT experiment to store exponent and mantisse seperately

From: Michael Niedermayer <michael@niedermayer.cc>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] avcodec/ffv1: NOT FOR GIT experiment to store exponent and mantisse seperately
Date: Sun, 19 Jan 2025 21:44:19 +0100
Message-ID: <20250119204419.4078171-1-michael@niedermayer.cc> (raw)

Storing them separately performs slightly worse (compression wise)
in every variant i tried.

It also would likely be slower

if someone has ideas, they are welcome

This is the result of several hours tuning first the
sign bit storage (theres little to gain here as its 1% of the bitstream)
then the exponent, after i was done tuning predictor and context for
the exponent, i tried combining sign and exponent and it performed
better, so this implementation has them combined.
With the mantissa then we achieve 534mb while the combined
integer coder achieves 531mb without RCT and with RCT 503mb
the RCT works with integers not sign/exp/mantissa vectors.
But if we apply the RCT as if these where integers the split coder
gives us 527mb, so the integer coder + integer RCT with some tweaks
still performs best and also is likely faster

but ill probably try again after sleeping over this
also the qusntization tables need to be attempted to be used with the
split coder

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/ffv1.h             |   2 +
 libavcodec/ffv1enc_template.c | 115 +++++++++++++++++++++++++++-------
 2 files changed, 95 insertions(+), 22 deletions(-)

diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 3254ae1b56b..934c271954b 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -106,6 +106,8 @@ typedef struct FFV1SliceContext {
         };
     };
     uint16_t   fltmap[4][65536];
+
+    uint8_t newstate[256*256*256*16];
 } FFV1SliceContext;
 
 typedef struct FFV1Context {
diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
index 4c7cd2e647c..23d6dbe2d05 100644
--- a/libavcodec/ffv1enc_template.c
+++ b/libavcodec/ffv1enc_template.c
@@ -148,6 +148,9 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
     memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES *
            (w + 6) * sizeof(*RENAME(sc->sample_buffer)));
 
+    if (!sc->newstate[sizeof(sc->newstate)-1])
+        memset(sc->newstate, 128, sizeof(sc->newstate));
+
     if (sc->remap) {
         memset(sc->fltmap, 0, sizeof(sc->fltmap));
 
@@ -221,39 +224,107 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc,
                 r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y));
             }
 
-            if (sc->remap) {
-                r = sc->fltmap[0][r];
-                g = sc->fltmap[1][g];
-                b = sc->fltmap[2][b];
-                if (transparency)
-                    a = sc->fltmap[3][a];
-            }
+            if (!(0x8000&r))
+                r ^= 0x7FFF;
+            if (!(0x8000&g))
+                g ^= 0x7FFF;
+            if (!(0x8000&b))
+                b ^= 0x7FFF;
 
-            if (sc->slice_coding_mode != 1) {
-                b -= g;
-                r -= g;
-                g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2;
-                b += offset;
-                r += offset;
-            }
+//             if (sc->remap) {
+//                 r = sc->fltmap[0][r];
+//                 g = sc->fltmap[1][g];
+//                 b = sc->fltmap[2][b];
+//                 if (transparency)
+//                     a = sc->fltmap[3][a];
+//             }
+//
+//             if (sc->slice_coding_mode != 1) {
+//                 b -= g;
+//                 r -= g;
+//                 g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2;
+//                 b += offset;
+//                 r += offset;
+//             }
 
             sample[0][0][x] = g;
             sample[1][0][x] = b;
             sample[2][0][x] = r;
             sample[3][0][x] = a;
         }
+
+// Using the exponent context for the sign helps alot for the sign compression
+// Combining sign and exponent simply performs better than seperate sign and exponent
+
         for (p = 0; p < 3 + transparency; p++) {
-            int ret;
             sample[p][0][-1] = sample[p][1][0  ];
             sample[p][1][ w] = sample[p][1][w-1];
-            if (lbd && sc->slice_coding_mode == 0)
-                ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], (p + 1) / 2, 9, ac, pass1);
-            else
-                ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], (p + 1) / 2,
-                                          bits + (sc->slice_coding_mode != 1), ac, pass1);
-            if (ret < 0)
-                return ret;
+            for (x = 0; x < w; x++) {
+                uint8_t * state = sc->newstate;
+                int X = sample[p][0][x];
+                int L = sample[p][0][x-1];
+                int LL= sample[p][0][x-2];
+                int TT= sample[p][2][x];
+                int T = sample[p][1][x];
+                int TL= sample[p][1][x-1];
+                int TR= sample[p][1][x+1];
+//                 int sign_ctx = (sample[p][0][x-1]>>15) + 2*(sample[p][1][x]>>15) + 4*p;
+//                 put_rac(&sc->c, sc->newstate + sign_ctx, sample[p][0][x] >> 15);
+
+                int sign_ctx = (L>>15) + (T>>15) + 3*p;
+//                 put_rac(&sc->c, state + sign_ctx, X >> 15);
+//                 state += 4*4;
+
+                int LE = (L >>10)&63;
+                int LLE= (LL>>10)&31;
+                int TE = (T >>10)&63;
+                int TTE= (TT>>10)&31;
+                int TLE= (TL>>10)&31;
+                int TRE= (TR>>10)&31;
+                int pred = (LE+TE)/2;
+//                 int pred = mid_pred(LE, TE, LE + TE - TLE);
+                int diff = ((X>>10)&63) - pred;
+                int exp_ctx = (TLE&1) + 8*(LE) + 8*(TE) + 2*(TRE&1) + 4*(LE&1) + 8*128*p;
+//                 put_rac(&sc->c, state + exp_ctx + 32*8*64*4*4, X >> 15);
+                put_symbol_inline(&sc->c, state + 32*exp_ctx, diff, 1, NULL, NULL);
+
+                state += 32*8*128*4;
+
+                int LM = (L  &1023);
+                int LLM= (LL &1023);
+                int TM = (TL &1023);
+                int TTM= (TT &1023);
+                int TLM= (TL &1023);
+                int TRM= (TR &1023);
+                pred = (TM+LM)/2; //bad
+                pred = ((L+T)/2)&1023;
+//                 pred = ((L+T-TL))&1023;
+                pred = mid_pred(L,T,L+T-TL)&1023;
+
+                int mant_ctx = 2*(LE) + 2*(TE) + 2*128*p; //we loose 0.1% by not including LE&1 but it halfs context space
+//                 mant_ctx += 2*128*4*((LM>>9) + 2*(TM>>9)) ;
+                mant_ctx = (TLE&1) + 8*(LE) + 8*(TE) + 2*(TRE&1) + 4*(LE&1) + 8*128*p;
+                mant_ctx += 8*128*4*((TM>>8) + 4*(LM>>8));
+
+                diff = fold((X&1023) - pred, 10);
+                put_symbol_inline(&sc->c, state + 32*mant_ctx, diff, 1, NULL, NULL);
+            }
         }
+
+
+
+//         for (p = 0; p < 3 + transparency; p++) {
+//             int ret;
+//             sample[p][0][-1] = sample[p][1][0  ];
+//             sample[p][1][ w] = sample[p][1][w-1];
+//             if (lbd && sc->slice_coding_mode == 0)
+//                 ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], (p + 1) / 2, 9, ac, pass1);
+//             else
+//                 ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], (p + 1) / 2,
+//                                           bits + (sc->slice_coding_mode != 1), ac, pass1);
+//             if (ret < 0)
+//                 return ret;
+//         }
     }
     return 0;
 }
-- 
2.48.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".