[FFmpeg-devel] [PATCH 1/2] libavcodec/ffv1: Support storing decorrelated LSB raw without rangecoder

From: Michael Niedermayer <michael@niedermayer.cc>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH 1/2] libavcodec/ffv1: Support storing decorrelated LSB raw without rangecoder
Date: Fri,  7 Mar 2025 01:36:02 +0100
Message-ID: <20250307003603.962753-1-michael@niedermayer.cc> (raw)

With 16bit float rawlsb 2 gives 0.66% better compression. This is maybe due
to the quantization tables being tuned to smaller number of bits

rawlsb 4 is about 30% faster than 0 and about 1% worse compression

Above was tested using ACES_OT_VWG_SampleFrames

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/ffv1.h             |  4 ++++
 libavcodec/ffv1_template.c    | 19 ++++++++++---------
 libavcodec/ffv1dec.c          | 16 ++++++++++++++--
 libavcodec/ffv1dec_template.c | 20 ++++++++++++++++----
 libavcodec/ffv1enc.c          | 30 ++++++++++++++++++++++++++++--
 libavcodec/ffv1enc_template.c | 22 +++++++++++++++++++---
 6 files changed, 91 insertions(+), 20 deletions(-)

diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index c23d64d54a4..189004f7981 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -85,10 +85,13 @@ typedef struct FFV1SliceContext {
     int slice_rct_by_coef;
     int slice_rct_ry_coef;
     int remap;
+    int rawlsb;
 
     // RefStruct reference, array of MAX_PLANES elements
     PlaneContext *plane;
     PutBitContext pb;
+    PutBitContext rawlsb_pb;
+    GetBitContext rawlsb_gb;
     RangeCoder c;
 
     int ac_byte_count;                   ///< number of bytes used for AC coding
@@ -146,6 +149,7 @@ typedef struct FFV1Context {
     int key_frame_ok;
     int context_model;
     int qtable;
+    int rawlsb;
 
     int bits_per_raw_sample;
     int packed_at_lsb;
diff --git a/libavcodec/ffv1_template.c b/libavcodec/ffv1_template.c
index abb90a12e49..10206702ee8 100644
--- a/libavcodec/ffv1_template.c
+++ b/libavcodec/ffv1_template.c
@@ -30,24 +30,25 @@ static inline int RENAME(predict)(TYPE *src, TYPE *last)
 }
 
 static inline int RENAME(get_context)(const int16_t quant_table[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE],
-                                      TYPE *src, TYPE *last, TYPE *last2)
+                                      TYPE *src, TYPE *last, TYPE *last2, int rawlsb)
 {
     const int LT = last[-1];
     const int T  = last[0];
     const int RT = last[1];
     const int L  = src[-1];
+    const int rawoff = (1<<rawlsb) >> 1;
 
     if (quant_table[3][127] || quant_table[4][127]) {
         const int TT = last2[0];
         const int LL = src[-2];
-        return quant_table[0][(L - LT) & MAX_QUANT_TABLE_MASK] +
-               quant_table[1][(LT - T) & MAX_QUANT_TABLE_MASK] +
-               quant_table[2][(T - RT) & MAX_QUANT_TABLE_MASK] +
-               quant_table[3][(LL - L) & MAX_QUANT_TABLE_MASK] +
-               quant_table[4][(TT - T) & MAX_QUANT_TABLE_MASK];
+        return quant_table[0][(L - LT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+               quant_table[1][(LT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+               quant_table[2][(T - RT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+               quant_table[3][(LL - L + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+               quant_table[4][(TT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK];
     } else
-        return quant_table[0][(L - LT) & MAX_QUANT_TABLE_MASK] +
-               quant_table[1][(LT - T) & MAX_QUANT_TABLE_MASK] +
-               quant_table[2][(T - RT) & MAX_QUANT_TABLE_MASK];
+        return quant_table[0][(L - LT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+               quant_table[1][(LT - T + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK] +
+               quant_table[2][(T - RT + rawoff >> rawlsb) & MAX_QUANT_TABLE_MASK];
 }
 
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 3926659ebc9..0f2956eabf7 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -227,6 +227,7 @@ static int decode_slice_header(const FFV1Context *f,
                 av_log(f->avctx, AV_LOG_ERROR, "unsupported remap %d\n", sc->remap);
                 return AVERROR_INVALIDDATA;
             }
+            sc->rawlsb = ff_ffv1_get_symbol(c, state, 0);
         }
     }
 
@@ -248,6 +249,7 @@ static int decode_slice(AVCodecContext *c, void *arg)
     FFV1Context *f    = c->priv_data;
     FFV1SliceContext *sc = arg;
     int width, height, x, y, ret;
+    int chroma_width, chroma_height;
     const int ps      = av_pix_fmt_desc_get(f->pix_fmt)->comp[0].step;
     AVFrame * const p = f->picture.f;
     const int      si = sc - f->slices;
@@ -284,6 +286,8 @@ static int decode_slice(AVCodecContext *c, void *arg)
     height = sc->slice_height;
     x      = sc->slice_x;
     y      = sc->slice_y;
+    chroma_width  = AV_CEIL_RSHIFT(width,  f->chroma_h_shift);
+    chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
 
     if (ac == AC_GOLOMB_RICE) {
         if (f->combined_version >= 0x30002)
@@ -293,11 +297,17 @@ static int decode_slice(AVCodecContext *c, void *arg)
                       sc->c.bytestream_start + sc->ac_byte_count,
                       (sc->c.bytestream_end - sc->c.bytestream_start - sc->ac_byte_count) * 8);
     }
+    if (sc->rawlsb) {
+        int lsb_size = sc->rawlsb * (width * height * (1 + f->transparency) + chroma_width * chroma_height * 2 * f->chroma_planes);
+        int lsb_size_bytes = (lsb_size + 7) >> 3;
+
+        init_get_bits(&sc->rawlsb_gb,
+                      sc->c.bytestream_end - lsb_size_bytes - 3 - 5*!!f->ec,
+                      lsb_size);
+    }
 
     av_assert1(width && height);
     if (f->colorspace == 0 && (f->chroma_planes || !f->transparency)) {
-        const int chroma_width  = AV_CEIL_RSHIFT(width,  f->chroma_h_shift);
-        const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
         const int cx            = x >> f->chroma_h_shift;
         const int cy            = y >> f->chroma_v_shift;
         decode_plane(f, sc, &gb, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1, ac);
@@ -328,6 +338,8 @@ static int decode_slice(AVCodecContext *c, void *arg)
         int v;
         get_rac(&sc->c, (uint8_t[]) { 129 });
         v = sc->c.bytestream_end - sc->c.bytestream - 2 - 5*!!f->ec;
+        if (sc->rawlsb)
+            v -= get_bits_count(&sc->rawlsb_gb) + 7 >> 3;
         if (v) {
             av_log(f->avctx, AV_LOG_ERROR, "bytestream end mismatching by %d\n", v);
             slice_set_damaged(f, sc);
diff --git a/libavcodec/ffv1dec_template.c b/libavcodec/ffv1dec_template.c
index f2c88734fe1..37caacb758d 100644
--- a/libavcodec/ffv1dec_template.c
+++ b/libavcodec/ffv1dec_template.c
@@ -43,10 +43,12 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
         int i;
         for (x = 0; x < w; x++) {
             int v = 0;
-            for (i=0; i<bits; i++) {
+            for (i=0; i<bits - sc->rawlsb; i++) {
                 uint8_t state = 128;
                 v += v + get_rac(c, &state);
             }
+            if (sc->rawlsb)
+                v = (v << sc->rawlsb) + get_bits(&sc->rawlsb_gb, sc->rawlsb);
             sample[1][x] = v;
         }
         return 0;
@@ -60,8 +62,13 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
                 return AVERROR_INVALIDDATA;
         }
 
-        context = RENAME(get_context)(quant_table,
-                                      sample[1] + x, sample[0] + x, sample[1] + x);
+        if (sc->rawlsb) {
+            context = RENAME(get_context)(quant_table,
+                                          sample[1] + x, sample[0] + x, sample[1] + x, sc->rawlsb);
+        } else {
+            context = RENAME(get_context)(quant_table,
+                                          sample[1] + x, sample[0] + x, sample[1] + x, 0);
+        }
         if (context < 0) {
             context = -context;
             sign    = 1;
@@ -71,7 +78,12 @@ RENAME(decode_line)(FFV1Context *f, FFV1SliceContext *sc,
         av_assert2(context < p->context_count);
 
         if (ac != AC_GOLOMB_RICE) {
-            diff = get_symbol_inline(c, p->state[context], 1);
+            if (sc->rawlsb) {
+                const int rawoff = (1<<sc->rawlsb) >> 1;
+                diff = get_bits(&sc->rawlsb_gb, sc->rawlsb);
+                diff += (get_symbol_inline(c, p->state[context], 1) << sc->rawlsb) - rawoff;
+            } else
+                diff = get_symbol_inline(c, p->state[context], 1);
         } else {
             if (context == 0 && run_mode == 0)
                 run_mode = 1;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 1292b4227d3..b4080f29002 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -594,6 +594,9 @@ av_cold int ff_ffv1_encode_init(AVCodecContext *avctx)
     if (s->ec == 2)
         s->version = FFMAX(s->version, 4);
 
+    if (s->rawlsb)
+        s->version = FFMAX(s->version, 4);
+
     if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
         av_log(avctx, AV_LOG_ERROR, "Version 2 or 4 needed for requested features but version 2 or 4 is experimental and not enabled\n");
         return AVERROR_INVALIDDATA;
@@ -920,6 +923,11 @@ static int encode_init_internal(AVCodecContext *avctx)
         }
     }
 
+    if (s->rawlsb > s->bits_per_raw_sample) {
+        av_log(avctx, AV_LOG_ERROR, "too many raw lsb\n");
+        return AVERROR(EINVAL);
+    }
+
 
     ret = ff_ffv1_encode_init(avctx);
     if (ret < 0)
@@ -1003,6 +1011,7 @@ static void encode_slice_header(FFV1Context *f, FFV1SliceContext *sc)
             put_symbol(c, state, sc->slice_rct_ry_coef, 0);
         }
         put_symbol(c, state, sc->remap, 0);
+        put_symbol(c, state, sc->rawlsb, 0);
     }
 }
 
@@ -1113,6 +1122,8 @@ static int encode_slice(AVCodecContext *c, void *arg)
     const int ps     = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step;
     int ret;
     RangeCoder c_bak = sc->c;
+    const int chroma_width  = AV_CEIL_RSHIFT(width,  f->chroma_h_shift);
+    const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
     const uint8_t *planes[4] = {p->data[0] + ps*x + y*p->linesize[0],
                                 p->data[1] ? p->data[1] + ps*x + y*p->linesize[1] : NULL,
                                 p->data[2] ? p->data[2] + ps*x + y*p->linesize[2] : NULL,
@@ -1127,6 +1138,8 @@ static int encode_slice(AVCodecContext *c, void *arg)
         sc->slice_rct_ry_coef = 1;
     }
 
+    sc->rawlsb = f->rawlsb; // we do not optimize this per slice, but other encoders could
+
 retry:
     if (f->key_frame)
         ff_ffv1_clear_slice_state(f, sc);
@@ -1139,10 +1152,15 @@ retry:
                       sc->c.bytestream_start + sc->ac_byte_count,
                       sc->c.bytestream_end - sc->c.bytestream_start - sc->ac_byte_count);
     }
+    if (sc->rawlsb) {
+        int lsb_size = sc->rawlsb * (width * height * (1 + !!f->transparency) + chroma_width * chroma_height * 2 * f->chroma_planes);
+        int lsb_size_bytes = (lsb_size + 7) >> 3;
+        init_put_bits(&sc->rawlsb_pb,
+                      sc->c.bytestream_end - lsb_size_bytes,
+                      lsb_size_bytes);
+    }
 
     if (f->colorspace == 0 && c->pix_fmt != AV_PIX_FMT_YA8) {
-        const int chroma_width  = AV_CEIL_RSHIFT(width,  f->chroma_h_shift);
-        const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
         const int cx            = x >> f->chroma_h_shift;
         const int cy            = y >> f->chroma_v_shift;
 
@@ -1318,6 +1336,12 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         if (i > 0 || f->version > 2) {
             av_assert0(bytes < pkt->size / f->slice_count);
             memmove(buf_p, sc->c.bytestream_start, bytes);
+            if (sc->rawlsb) {
+                flush_put_bits(&sc->rawlsb_pb);
+                av_assert0(bytes + put_bytes_count(&sc->rawlsb_pb, 1) < pkt->size / f->slice_count);
+                memmove(buf_p + bytes, sc->rawlsb_pb.buf, put_bytes_count(&sc->rawlsb_pb, 1));
+                bytes += put_bytes_count(&sc->rawlsb_pb, 1);
+            }
             av_assert0(bytes < (1 << 24));
             AV_WB24(buf_p + bytes, bytes);
             bytes += 3;
@@ -1377,6 +1401,8 @@ static const AVOption options[] = {
             { .i64 = QTABLE_8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" },
         { "greater8bit", NULL, 0, AV_OPT_TYPE_CONST,
             { .i64 = QTABLE_GT8BIT }, INT_MIN, INT_MAX, VE, .unit = "qtable" },
+    { "rawlsb", "number of LSBs stored RAW", OFFSET(rawlsb), AV_OPT_TYPE_INT,
+            { .i64 = 0 }, 0, 8, VE },
 
     { NULL }
 };
diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c
index af3354497ae..9e1252010dd 100644
--- a/libavcodec/ffv1enc_template.c
+++ b/libavcodec/ffv1enc_template.c
@@ -34,6 +34,8 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
     int run_index = sc->run_index;
     int run_count = 0;
     int run_mode  = 0;
+    const int rawoff = (1<<sc->rawlsb) >> 1;
+    const unsigned mask = (1<<sc->rawlsb) - 1;
 
     if (ac != AC_GOLOMB_RICE) {
         if (c->bytestream_end - c->bytestream < w * 35) {
@@ -51,10 +53,13 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
         for (x = 0; x < w; x++) {
             int i;
             int v = sample[0][x];
-            for (i = bits-1; i>=0; i--) {
+
+            for (i = bits-1; i>=sc->rawlsb; i--) {
                 uint8_t state = 128;
                 put_rac(c, &state, (v>>i) & 1);
             }
+            if (sc->rawlsb)
+                put_bits(&sc->rawlsb_pb, sc->rawlsb, v & mask);
         }
         return 0;
     }
@@ -62,8 +67,14 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
     for (x = 0; x < w; x++) {
         int diff, context;
 
-        context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
-                                      sample[0] + x, sample[1] + x, sample[2] + x);
+        if (sc->rawlsb) {
+            context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
+                                        sample[0] + x, sample[1] + x, sample[2] + x, sc->rawlsb);
+        } else {
+            //try to force a version with rawlsb optimized out
+            context = RENAME(get_context)(f->quant_tables[p->quant_table_index],
+                                        sample[0] + x, sample[1] + x, sample[2] + x, 0);
+        }
         diff    = sample[0][x] - RENAME(predict)(sample[0] + x, sample[1] + x);
 
         if (context < 0) {
@@ -74,6 +85,11 @@ RENAME(encode_line)(FFV1Context *f, FFV1SliceContext *sc,
         diff = fold(diff, bits);
 
         if (ac != AC_GOLOMB_RICE) {
+            if (sc->rawlsb) {
+                diff += rawoff;
+                put_bits(&sc->rawlsb_pb, sc->rawlsb, diff & mask);
+                diff = diff >> sc->rawlsb; // Note, this will be biased on small rawlsb
+            }
             if (pass1) {
                 put_symbol_inline(c, p->state[context], diff, 1, sc->rc_stat,
                                   sc->rc_stat2[p->quant_table_index][context]);
-- 
2.48.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".