Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
@ 2025-03-20 22:30 Michael Niedermayer
  2025-03-20 23:07 ` Lynne
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-20 22:30 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

This performs about as good as the non LRU system for 16bit and
better than then the LRU system for 16 converted to 32. So
its basically performing best in all cases we have atm making
the LRU system unneeded.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/ffv1dec.c | 47 ++++++++++++++++++++++++++++--
 libavcodec/ffv1enc.c | 69 +++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index d45aabbbde8..a28aeacfcbc 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -283,25 +283,66 @@ static int decode_remap(FFV1Context *f, FFV1SliceContext *sc)
     for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) {
         int j = 0;
         int lu = 0;
-        uint8_t state[2][32];
+        uint8_t state[2][2][32];
         int64_t i;
+        int mul[4096+1];
+        int mul_count;
+
+        memset(state, 128, sizeof(state));
+        mul_count = ff_ffv1_get_symbol(&sc->c, state[0][0], 0);
+
+        if (mul_count > 4096U)
+            return AVERROR_INVALIDDATA;
+        for (int i = 0; i<mul_count; i++) {
+            mul[i] = ff_ffv1_get_symbol(&sc->c, state[0][0], 0);
+
+            if (mul[i] > (1U<<30))
+                return AVERROR_PATCHWELCOME;
+        }
+        mul[mul_count] = 1;
+
         memset(state, 128, sizeof(state));
         for (i=0; i <= end ; i++) {
-            unsigned run = get_symbol_inline(&sc->c, state[lu], 0);
+            unsigned run = get_symbol_inline(&sc->c, state[lu][0], 0);
+
             if (run > end - i + 1)
                 return AVERROR_INVALIDDATA;
             if (lu) {
+                if (run > 65536 - j)
+                    return AVERROR_INVALIDDATA;
                 lu ^= !run;
                 while (run--) {
+                    int current_mul = mul[(i * mul_count) >> 32];
+
+                    if (current_mul > 1) {
+                        int delta = get_symbol_inline(&sc->c, state[lu][1], 1);
+
+                        if (delta <= -current_mul || delta > current_mul/2)
+                            return AVERROR_INVALIDDATA; //not sure we should check this
+                        i += current_mul - 1 + delta;
+                    }
                     if (end == 0xFFFF) {
                         sc->fltmap  [p][j++] = i ^ ((i&    0x8000) ? 0 : flip);
                     } else
                         sc->fltmap32[p][j++] = i ^ ((i&0x80000000) ? 0 : flip);
                     i++;
                 }
+                int current_mul = mul[(i * mul_count) >> 32];
+
+                i += current_mul - 1;
             } else {
-                i += run;
+                int current_mul = mul[(i * mul_count) >> 32];
+
+                if (current_mul > 1) {
+                    int delta = get_symbol_inline(&sc->c, state[lu][1], 1);
+                    if (delta <= -current_mul || delta > current_mul/2)
+                        return AVERROR_INVALIDDATA;  //not sure we should check this
+                    i += (run + 1) * current_mul - 1 + delta;
+                } else
+                    i += run;
                 if (i <= end) {
+                    if (j > 65535)
+                        return AVERROR_INVALIDDATA;
                     if (end == 0xFFFF) {
                         sc->fltmap  [p][j++] = i ^ ((i&    0x8000) ? 0 : flip);
                     } else {
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index e557e7fcdfe..cce091ad3c3 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -433,7 +433,7 @@ static void set_micro_version(FFV1Context *f)
         if (f->version == 3) {
             f->micro_version = 4;
         } else if (f->version == 4) {
-            f->micro_version = 6;
+            f->micro_version = 7;
         } else
             av_assert0(0);
 
@@ -1179,6 +1179,9 @@ static void encode_histogram_remap(FFV1Context *f, FFV1SliceContext *sc)
         int lu = 0;
         uint8_t state[2][32];
         int run = 0;
+
+        memset(state, 128, sizeof(state));
+        put_symbol(&sc->c, state[0], 0, 0);
         memset(state, 128, sizeof(state));
         for (int i= 0; i<65536; i++) {
             int ri = i ^ ((i&0x8000) ? 0 : flip);
@@ -1267,11 +1270,44 @@ static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
 
     for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) {
         int lu = 0;
-        uint8_t state[2][32];
+        uint8_t state[2][2][32];
         int run = 0;
         int64_t last_val = -1;
         int compact_index = -1;
+        int mul[4096+1];
+        int mul_count;
+        int delta_stack[65536];
+
+        float score_tab[16] = {0};
+        int best_index = 0;
+        for (int i= 0; i<pixel_num; i++) {
+            int64_t val = unit[p][i].val;
+            if (val != last_val) {
+                av_assert2(last_val < val);
+                for(int si= 0; si < FF_ARRAY_ELEMS(score_tab); si++) {
+                    int64_t delta = val - last_val;
+                    int mul = 1<<si;
+                    int64_t cost = FFMAX((delta + mul/2)  / mul, 1);
+                    score_tab[si] += log2(cost) + fabs(delta - cost*mul);
+                }
+            }
+            last_val = val;
+        }
+        for(int si= 1; si < FF_ARRAY_ELEMS(score_tab); si++) {
+            if (score_tab[si] < score_tab[ best_index ])
+                best_index = si;
+        }
+        memset(state, 128, sizeof(state));
+        mul_count = 1;
+        put_symbol(&sc->c, state[0][0], mul_count, 0);
+        for (int i = 0; i<mul_count; i++) {
+            mul[i] = 1<<best_index;
+            put_symbol(&sc->c, state[0][0], mul[i], 0);
+        }
+        mul[mul_count] = 1;
 
+
+        last_val = -1;
         memset(state, 128, sizeof(state));
         for (int i= 0; i<pixel_num+1; i++) {
             int64_t val;
@@ -1285,26 +1321,45 @@ static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
                 val = unit[p][i].val;
 
             if (last_val != val) {
+                int current_mul = mul[((last_val + 1) * mul_count) >> 32];
+                int64_t delta = 0;
+                av_assert2(last_val < val);
+                if (current_mul > 1) {
+                    delta = val - last_val;
+                    val = FFMAX(1, (delta + current_mul/2) / current_mul);
+
+                    delta -= val*current_mul;
+                    av_assert2(delta <= current_mul/2);
+                    av_assert2(delta > -current_mul);
+                    val += last_val;
+                }
                 av_assert2(last_val < val);
                 if (lu) {
                     if (val - last_val == 1) {
+                        av_assert2(run < FF_ARRAY_ELEMS(delta_stack));
+                        delta_stack[run] = delta;
                         run ++;
-                        last_val = val;
+                        last_val += current_mul + delta;
                     } else {
-                        put_symbol_inline(&sc->c, state[lu], run, 0, NULL, NULL);
+                        put_symbol_inline(&sc->c, state[lu][0], run, 0, NULL, NULL);
+                        if (current_mul>1)
+                            for(int k=0; k<run; k++)
+                                put_symbol_inline(&sc->c, state[lu][1], delta_stack[k], 1, NULL, NULL);
                         if (run == 0)
                             lu ^= 1;
                         run = 0;
                         i--; // we did not encode val so we need to backstep
-                        last_val ++;
+                        last_val += current_mul;
                         continue;
                     }
                 } else {
                     av_assert2(run == 0);
-                    put_symbol_inline(&sc->c, state[lu], val - last_val - 1, 0, NULL, NULL);
+                    put_symbol_inline(&sc->c, state[lu][0], val - last_val - 1, 0, NULL, NULL);
+                    if (current_mul > 1)
+                        put_symbol_inline(&sc->c, state[lu][1], delta, 1, NULL, NULL);
                     if (val - last_val == 1)
                         lu ^= 1;
-                    last_val = val;
+                    last_val += (val - last_val) * current_mul + delta;
                 }
                 compact_index ++;
             }
-- 
2.48.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-20 22:30 [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap Michael Niedermayer
@ 2025-03-20 23:07 ` Lynne
  2025-03-21 20:13   ` Michael Niedermayer
  0 siblings, 1 reply; 11+ messages in thread
From: Lynne @ 2025-03-20 23:07 UTC (permalink / raw)
  To: ffmpeg-devel

On 20/03/2025 23:30, Michael Niedermayer wrote:
> This performs about as good as the non LRU system for 16bit and
> better than then the LRU system for 16 converted to 32. So
> its basically performing best in all cases we have atm making
> the LRU system unneeded.

Test on *real* 32-bit content, please. You can generate some by using 
the tonemap filter, or any of the others that support it.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-20 23:07 ` Lynne
@ 2025-03-21 20:13   ` Michael Niedermayer
  2025-03-21 21:12     ` Michael Niedermayer
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-21 20:13 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 733 bytes --]

On Fri, Mar 21, 2025 at 12:07:30AM +0100, Lynne wrote:
> On 20/03/2025 23:30, Michael Niedermayer wrote:
> > This performs about as good as the non LRU system for 16bit and
> > better than then the LRU system for 16 converted to 32. So
> > its basically performing best in all cases we have atm making
> > the LRU system unneeded.
> 
> Test on *real* 32-bit content, please. You can generate some by using the
> tonemap filter, or any of the others that support it.

iam happy to test tonemap output but
tonemap output is not "real content" either

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Never trust a computer, one day, it may think you are the virus. -- Compn

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-21 20:13   ` Michael Niedermayer
@ 2025-03-21 21:12     ` Michael Niedermayer
  2025-03-21 21:36       ` Michael Niedermayer
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-21 21:12 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1024 bytes --]

Hi

On Fri, Mar 21, 2025 at 09:13:49PM +0100, Michael Niedermayer wrote:
> On Fri, Mar 21, 2025 at 12:07:30AM +0100, Lynne wrote:
> > On 20/03/2025 23:30, Michael Niedermayer wrote:
> > > This performs about as good as the non LRU system for 16bit and
> > > better than then the LRU system for 16 converted to 32. So
> > > its basically performing best in all cases we have atm making
> > > the LRU system unneeded.
> > 
> > Test on *real* 32-bit content, please. You can generate some by using the
> > tonemap filter, or any of the others that support it.
> 
> iam happy to test tonemap output but
> tonemap output is not "real content" either

tested the previous LRU code and this with ACES_OT_VWG run through tonemap
this still performs better than the previous LRU code.

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

"Nothing to hide" only works if the folks in power share the values of
you and everyone you know entirely and always will -- Tom Scott


[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-21 21:12     ` Michael Niedermayer
@ 2025-03-21 21:36       ` Michael Niedermayer
  2025-03-21 22:22         ` Michael Niedermayer
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-21 21:36 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2901 bytes --]

On Fri, Mar 21, 2025 at 10:12:50PM +0100, Michael Niedermayer wrote:
> Hi
> 
> On Fri, Mar 21, 2025 at 09:13:49PM +0100, Michael Niedermayer wrote:
> > On Fri, Mar 21, 2025 at 12:07:30AM +0100, Lynne wrote:
> > > On 20/03/2025 23:30, Michael Niedermayer wrote:
> > > > This performs about as good as the non LRU system for 16bit and
> > > > better than then the LRU system for 16 converted to 32. So
> > > > its basically performing best in all cases we have atm making
> > > > the LRU system unneeded.
> > > 
> > > Test on *real* 32-bit content, please. You can generate some by using the
> > > tonemap filter, or any of the others that support it.
> > 
> > iam happy to test tonemap output but
> > tonemap output is not "real content" either
> 
> tested the previous LRU code and this with ACES_OT_VWG run through tonemap
> this still performs better than the previous LRU code.

heres the test results,
the try1 and try256 case try hardcoded mul values of 1 and 256, they
perform worse than the automatically selected ones
noremapstor simply does not store the remap table and thus shows how big that
table is (its quite huge with the tonemap output)
the rest shows that the LRU code performs worse in every tested case
that gz file is just a sanity check to ensure that we arent writing tons
of low entropy data.

-rw-r----- 1 michael michael  694591360 Mar 21 21:57 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-noremapstor.nut
-rw-r----- 1 michael michael  916492722 Mar 21 21:54 float-303503-c1-m2-s40-tmf32-nolsb-retrrr.nut.gz
-rw-r----- 1 michael michael  917135003 Mar 21 21:54 float-303503-c1-m2-s40-tmf32-nolsb-retrrr.nut
-rw-r----- 1 michael michael  921698263 Mar 21 22:03 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try256.nut
-rw-r----- 1 michael michael  921725671 Mar 21 22:04 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU.nut
-rw-r----- 1 michael michael  921729598 Mar 21 22:01 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try1.nut
-rw-r----- 1 michael michael  928459175 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-linear.nut
-rw-r----- 1 michael michael  932903780 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-linear.nut
-rw-r----- 1 michael michael 1100100630 Mar 21 22:24 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-gamma.nut
-rw-r----- 1 michael michael 1101005617 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-gamma.nut
-rw-r----- 1 michael michael 1150326564 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-hable.nut
-rw-r----- 1 michael michael 1153310394 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-hable.nut

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The difference between a dictatorship and a democracy is that every 4 years
the population together is allowed to provide 1 bit of input to the government.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-21 21:36       ` Michael Niedermayer
@ 2025-03-21 22:22         ` Michael Niedermayer
  2025-03-22 17:45           ` Michael Niedermayer
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-21 22:22 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 5629 bytes --]

On Fri, Mar 21, 2025 at 10:36:58PM +0100, Michael Niedermayer wrote:
> On Fri, Mar 21, 2025 at 10:12:50PM +0100, Michael Niedermayer wrote:
> > Hi
> > 
> > On Fri, Mar 21, 2025 at 09:13:49PM +0100, Michael Niedermayer wrote:
> > > On Fri, Mar 21, 2025 at 12:07:30AM +0100, Lynne wrote:
> > > > On 20/03/2025 23:30, Michael Niedermayer wrote:
> > > > > This performs about as good as the non LRU system for 16bit and
> > > > > better than then the LRU system for 16 converted to 32. So
> > > > > its basically performing best in all cases we have atm making
> > > > > the LRU system unneeded.
> > > > 
> > > > Test on *real* 32-bit content, please. You can generate some by using the
> > > > tonemap filter, or any of the others that support it.
> > > 
> > > iam happy to test tonemap output but
> > > tonemap output is not "real content" either
> > 
> > tested the previous LRU code and this with ACES_OT_VWG run through tonemap
> > this still performs better than the previous LRU code.
> 
> heres the test results,
> the try1 and try256 case try hardcoded mul values of 1 and 256, they
> perform worse than the automatically selected ones
> noremapstor simply does not store the remap table and thus shows how big that
> table is (its quite huge with the tonemap output)
> the rest shows that the LRU code performs worse in every tested case
> that gz file is just a sanity check to ensure that we arent writing tons
> of low entropy data.
> 
> -rw-r----- 1 michael michael  694591360 Mar 21 21:57 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-noremapstor.nut
> -rw-r----- 1 michael michael  916492722 Mar 21 21:54 float-303503-c1-m2-s40-tmf32-nolsb-retrrr.nut.gz
> -rw-r----- 1 michael michael  917135003 Mar 21 21:54 float-303503-c1-m2-s40-tmf32-nolsb-retrrr.nut
> -rw-r----- 1 michael michael  921698263 Mar 21 22:03 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try256.nut
> -rw-r----- 1 michael michael  921725671 Mar 21 22:04 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU.nut
> -rw-r----- 1 michael michael  921729598 Mar 21 22:01 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try1.nut
> -rw-r----- 1 michael michael  928459175 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-linear.nut
> -rw-r----- 1 michael michael  932903780 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-linear.nut
> -rw-r----- 1 michael michael 1100100630 Mar 21 22:24 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-gamma.nut
> -rw-r----- 1 michael michael 1101005617 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-gamma.nut
> -rw-r----- 1 michael michael 1150326564 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-hable.nut
> -rw-r----- 1 michael michael 1153310394 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-hable.nut

and of course my testing had a bug, i set the wrong remap mode

-rw-r----- 1 michael michael  694591360 Mar 21 21:57 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-noremapstor.nut
-rw-r----- 1 michael michael  915326963 Mar 21 22:55 float-303503-c1-m3-s40-tmf32-nolsb-retrrr-tryLRU.nut
-rw-r----- 1 michael michael  917135003 Mar 21 21:54 float-303503-c1-m2-s40-tmf32-nolsb-retrrr.nut
-rw-r----- 1 michael michael  921698263 Mar 21 22:03 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try256.nut
-rw-r----- 1 michael michael  921729598 Mar 21 22:01 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try1.nut
-rw-r----- 1 michael michael  922576142 Mar 21 22:54 float-303503-c1-m3-s40-tmf32-nolsb-retrrr-tryLRU-linear.nut
-rw-r----- 1 michael michael  928459175 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-linear.nut
-rw-r----- 1 michael michael 1100100630 Mar 21 22:24 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-gamma.nut
-rw-r----- 1 michael michael 1114541572 Mar 21 22:52 float-303503-c1-m3-s40-tmf32-nolsb-retrrr-tryLRU-gamma.nut
-rw-r----- 1 michael michael 1150326564 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-hable.nut
-rw-r----- 1 michael michael 1157209215 Mar 21 22:52 float-303503-c1-m3-s40-tmf32-nolsb-retrrr-tryLRU-hable.nut

so for the linear and none tonemaps, LRU is 0.2% and 0.6% better, for the others its worse.
That said, for the 2D RLE only 1 mul value and only powers of 2 of that is searched.

That said, we can try other algorithms. My main goal ATM is though to keep
things simple, ideally 1 simple algorithm.
Both the LRU/RLE and 2d RLE algorithms are quite simple
the 2d RLE is very simple on the decoder side and on the encoder side its the
encoders choice is it wants to optimize the parameters or just treat it like
a normal RLE
also the 2d RLE allows us to tune the step (=mul) for the whole range which
allows storing various lower than 32 bit floating point formats efficiently
(thats tested and confirmed with that float16 input)
and we can also store a step per exponent which should allow storing fixed
point formats efficiently. (thats not tested)
For true float32 that value is just 1 and its just RLE. (which we confirmed
to work too now)

The same remap system can also be used with non float data like 16bit integers
where the mul value would allow very compact removial of any fixed pattern
in the LSB while allowing efficient coding of exceptions.

How much better than this simple way of storing the remap table can we do?
I dont know, maybe i have a new idea when i wake up tomorrow :)
but ATM i like it as its simple and seems to work well for its simplicity

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

What does censorship reveal? It reveals fear. -- Julian Assange

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-21 22:22         ` Michael Niedermayer
@ 2025-03-22 17:45           ` Michael Niedermayer
  2025-03-22 17:54             ` Jerome Martinez
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-22 17:45 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 6864 bytes --]

On Fri, Mar 21, 2025 at 11:22:45PM +0100, Michael Niedermayer wrote:
> On Fri, Mar 21, 2025 at 10:36:58PM +0100, Michael Niedermayer wrote:
> > On Fri, Mar 21, 2025 at 10:12:50PM +0100, Michael Niedermayer wrote:
> > > Hi
> > > 
> > > On Fri, Mar 21, 2025 at 09:13:49PM +0100, Michael Niedermayer wrote:
> > > > On Fri, Mar 21, 2025 at 12:07:30AM +0100, Lynne wrote:
> > > > > On 20/03/2025 23:30, Michael Niedermayer wrote:
> > > > > > This performs about as good as the non LRU system for 16bit and
> > > > > > better than then the LRU system for 16 converted to 32. So
> > > > > > its basically performing best in all cases we have atm making
> > > > > > the LRU system unneeded.
> > > > > 
> > > > > Test on *real* 32-bit content, please. You can generate some by using the
> > > > > tonemap filter, or any of the others that support it.
> > > > 
> > > > iam happy to test tonemap output but
> > > > tonemap output is not "real content" either
> > > 
> > > tested the previous LRU code and this with ACES_OT_VWG run through tonemap
> > > this still performs better than the previous LRU code.
> > 
> > heres the test results,
> > the try1 and try256 case try hardcoded mul values of 1 and 256, they
> > perform worse than the automatically selected ones
> > noremapstor simply does not store the remap table and thus shows how big that
> > table is (its quite huge with the tonemap output)
> > the rest shows that the LRU code performs worse in every tested case
> > that gz file is just a sanity check to ensure that we arent writing tons
> > of low entropy data.
> > 
> > -rw-r----- 1 michael michael  694591360 Mar 21 21:57 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-noremapstor.nut
> > -rw-r----- 1 michael michael  916492722 Mar 21 21:54 float-303503-c1-m2-s40-tmf32-nolsb-retrrr.nut.gz
> > -rw-r----- 1 michael michael  917135003 Mar 21 21:54 float-303503-c1-m2-s40-tmf32-nolsb-retrrr.nut
> > -rw-r----- 1 michael michael  921698263 Mar 21 22:03 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try256.nut
> > -rw-r----- 1 michael michael  921725671 Mar 21 22:04 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU.nut
> > -rw-r----- 1 michael michael  921729598 Mar 21 22:01 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try1.nut
> > -rw-r----- 1 michael michael  928459175 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-linear.nut
> > -rw-r----- 1 michael michael  932903780 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-linear.nut
> > -rw-r----- 1 michael michael 1100100630 Mar 21 22:24 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-gamma.nut
> > -rw-r----- 1 michael michael 1101005617 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-gamma.nut
> > -rw-r----- 1 michael michael 1150326564 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-hable.nut
> > -rw-r----- 1 michael michael 1153310394 Mar 21 22:22 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-tryLRU-hable.nut
> 
> and of course my testing had a bug, i set the wrong remap mode
> 
> -rw-r----- 1 michael michael  694591360 Mar 21 21:57 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-noremapstor.nut
> -rw-r----- 1 michael michael  915326963 Mar 21 22:55 float-303503-c1-m3-s40-tmf32-nolsb-retrrr-tryLRU.nut
> -rw-r----- 1 michael michael  917135003 Mar 21 21:54 float-303503-c1-m2-s40-tmf32-nolsb-retrrr.nut
> -rw-r----- 1 michael michael  921698263 Mar 21 22:03 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try256.nut
> -rw-r----- 1 michael michael  921729598 Mar 21 22:01 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try1.nut
> -rw-r----- 1 michael michael  922576142 Mar 21 22:54 float-303503-c1-m3-s40-tmf32-nolsb-retrrr-tryLRU-linear.nut
> -rw-r----- 1 michael michael  928459175 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-linear.nut
> -rw-r----- 1 michael michael 1100100630 Mar 21 22:24 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-gamma.nut
> -rw-r----- 1 michael michael 1114541572 Mar 21 22:52 float-303503-c1-m3-s40-tmf32-nolsb-retrrr-tryLRU-gamma.nut
> -rw-r----- 1 michael michael 1150326564 Mar 21 22:23 float-303503-c1-m2-s40-tmf32-nolsb-retrrr-try-hable.nut
> -rw-r----- 1 michael michael 1157209215 Mar 21 22:52 float-303503-c1-m3-s40-tmf32-nolsb-retrrr-tryLRU-hable.nut
> 
> so for the linear and none tonemaps, LRU is 0.2% and 0.6% better, for the others its worse.
> That said, for the 2D RLE only 1 mul value and only powers of 2 of that is searched.
> 
> That said, we can try other algorithms. My main goal ATM is though to keep
> things simple, ideally 1 simple algorithm.
> Both the LRU/RLE and 2d RLE algorithms are quite simple
> the 2d RLE is very simple on the decoder side and on the encoder side its the
> encoders choice is it wants to optimize the parameters or just treat it like
> a normal RLE
> also the 2d RLE allows us to tune the step (=mul) for the whole range which
> allows storing various lower than 32 bit floating point formats efficiently
> (thats tested and confirmed with that float16 input)
> and we can also store a step per exponent which should allow storing fixed
> point formats efficiently. (thats not tested)
> For true float32 that value is just 1 and its just RLE. (which we confirmed
> to work too now)
> 
> The same remap system can also be used with non float data like 16bit integers
> where the mul value would allow very compact removial of any fixed pattern
> in the LSB while allowing efficient coding of exceptions.
> 
> How much better than this simple way of storing the remap table can we do?
> I dont know, maybe i have a new idea when i wake up tomorrow :)
> but ATM i like it as its simple and seems to work well for its simplicity

Also spend most of the day today working on this more. Ive simplified the
remap decoder code, fixed a bunch of issues in the encoder and tested
my heuristic that chooses the mul value vs brute force of all power of 2
values. Basically brute force is not worth the extra time.
Also I failed to find any worthy gain from adjusting mul_count so
while the code in the encoder looks complex ATM alot of that can be
dropped later if for example we choose to never put mul_count > 1 into
the specification and ATM it makes no sense to put that in as theres no
significant gain with the material i tested

The remap encoder code is also more flexible now as it can easily and cleanly
be run several times (to compare effects of parameters)

I do intend to apply my current code for float32 support and the remap
code in the next 24h (with whatever bugfixes it has at the time)

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Whats the most studid thing your enemy could do ? Blow himself up
Whats the most studid thing you could do ? Give up your rights and
freedom because your enemy blew himself up.


[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-22 17:45           ` Michael Niedermayer
@ 2025-03-22 17:54             ` Jerome Martinez
  2025-03-24  1:53               ` Michael Niedermayer
  0 siblings, 1 reply; 11+ messages in thread
From: Jerome Martinez @ 2025-03-22 17:54 UTC (permalink / raw)
  To: ffmpeg-devel

Le 22/03/2025 à 18:45, Michael Niedermayer a écrit :
> [...]
> Also I failed to find any worthy gain from adjusting mul_count so
> while the code in the encoder looks complex ATM alot of that can be
> dropped later if for example we choose to never put mul_count > 1 into
> the specification and ATM it makes no sense to put that in as theres no
> significant gain with the material i tested

I would prefer we don't put mul_count > 1 related code in FFmpeg, even 
if it is still experimental, without a use case demonstrating that this 
is useful.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-22 17:54             ` Jerome Martinez
@ 2025-03-24  1:53               ` Michael Niedermayer
  2025-03-24 12:31                 ` Michael Niedermayer
  0 siblings, 1 reply; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-24  1:53 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2145 bytes --]

Hi Jerome

On Sat, Mar 22, 2025 at 06:54:25PM +0100, Jerome Martinez wrote:
> Le 22/03/2025 à 18:45, Michael Niedermayer a écrit :
> > [...]
> > Also I failed to find any worthy gain from adjusting mul_count so
> > while the code in the encoder looks complex ATM alot of that can be
> > dropped later if for example we choose to never put mul_count > 1 into
> > the specification and ATM it makes no sense to put that in as theres no
> > significant gain with the material i tested
> 
> I would prefer we don't put mul_count > 1 related code in FFmpeg, even if it
> is still experimental, without a use case demonstrating that this is useful.

Ok, ive done some more tests

mul_count>1 is for fixed point in float and decimal mantisse*exponent
formats. This is also what the paper concentrated on that ive looked at.

Real world sensors return integers (to the best of my knowledge), so these
fixed point in float cases are expected to be significant.

Sadly i have very little of any float32 material. So to test this i
had to generate my own. using the same tonemap and source from last time
i put a gbrp16 in the middle to make it 16bit fixed point in float.

and then encode this with a single static repeated dumb handwritten
mul_count=512 table vs the computer optimized mul_count=1 table optimized
for each slice. Still the mul_count=512 tables are 6% smaller overall

I see the same effect with a random gbrp10 image in float32, just that
the tables are all smaller.

I thus will move forward with commiting the code i have
I also have simplified the code more today.

Droping all mul_count > 1 code would require an alternative solution.

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Modern terrorism, a quick summary: Need oil, start war with country that
has oil, kill hundread thousand in war. Let country fall into chaos,
be surprised about raise of fundamantalists. Drop more bombs, kill more
people, be surprised about them taking revenge and drop even more bombs
and strip your own citizens of their rights and freedoms. to be continued

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
  2025-03-24  1:53               ` Michael Niedermayer
@ 2025-03-24 12:31                 ` Michael Niedermayer
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-24 12:31 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 3490 bytes --]

Hi

On Mon, Mar 24, 2025 at 02:53:27AM +0100, Michael Niedermayer wrote:
> Hi Jerome
> 
> On Sat, Mar 22, 2025 at 06:54:25PM +0100, Jerome Martinez wrote:
> > Le 22/03/2025 à 18:45, Michael Niedermayer a écrit :
> > > [...]
> > > Also I failed to find any worthy gain from adjusting mul_count so
> > > while the code in the encoder looks complex ATM alot of that can be
> > > dropped later if for example we choose to never put mul_count > 1 into
> > > the specification and ATM it makes no sense to put that in as theres no
> > > significant gain with the material i tested
> > 
> > I would prefer we don't put mul_count > 1 related code in FFmpeg, even if it
> > is still experimental, without a use case demonstrating that this is useful.
> 
> Ok, ive done some more tests
> 
> mul_count>1 is for fixed point in float and decimal mantisse*exponent
> formats. This is also what the paper concentrated on that ive looked at.

And before people ask about the paper:
its "ALP: Adaptive Lossless floating-Point Compression"

And before people ask, why we are not using it:
1. its about compressing 1D academic data not images, things like
    Barometric Pressure (kPa), Temperature (C°), Monetary (Stocks), ...
2. its primarly about 64bit double precission floats
3. with 64bit doubles they tested their method against zstd and zstd beats
   them 3.1 to 3.0 compression rate wise.
4. now to 32bit floats, which they tested too, and for that they beat zstd
   they achieve 28.1bits per float and zstd 29.7bit/float, all other variants
   they compare against are above 33.4bits per float.

So how do we compare to zstd?
Using ACES_OT_VWG run through -vf tonemap=hable,format=gbrp16 -pix_fmt gbrpf32
(to have some sort of simulation of 16bit fixed point sensor data in 32 bit float)
and using 256 slices to maximize the effect of the remap tables (the file with 40
in it uses 40 slices to compare)
rawdump is simply storing the remap tables as raw 32bit floats
rawvideo is rawvideo instead of ffv1
zst is zstd -k -19
noremapstored is simply the ffv1 data without the remap table stored

-rw-r----- 1 michael michael  483868924 Mär 24 13:24 float-303503-fixed-40-noremapstored.nut
-rw-r----- 1 michael michael  491567660 Mär 24 01:14 float-303503-fixed-256-noremapstored.nut.zst
-rw-r----- 1 michael michael  491835695 Mär 24 01:14 float-303503-fixed-256-noremapstored.nut
-rw-r----- 1 michael michael  499059484 Mär 24 12:52 float-303503-fixed-40.nut
-rw-r----- 1 michael michael  545695302 Mär 24 01:13 float-303503-fixed-256.nut
-rw-r----- 1 michael michael  600666368 Mär 24 12:26 float-303503-fixed-256-rawdump2.nut.zst
-rw-r----- 1 michael michael  807719501 Mär 24 12:54 float-303503-fixed-rawvideo.nut.zst
-rw-r----- 1 michael michael 1085998190 Mär 24 12:26 float-303503-fixed-256-rawdump2.nut
-rw-r----- 1 michael michael 1990659135 Mär 24 12:54 float-303503-fixed-rawvideo.nut

From this you can see, that for this example (assuming i made no mistake)
1. our code to store remap tables needs about half the space that zstd needs
2. ffv1 needs only 60% the space zstd of rawvideo needs
3. the remap tables are 3% of the file with big slices but over 10% with small

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In fact, the RIAA has been known to suggest that students drop out
of college or go to community college in order to be able to afford
settlements. -- The RIAA

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap
@ 2025-03-22 18:01 Michael Niedermayer
  0 siblings, 0 replies; 11+ messages in thread
From: Michael Niedermayer @ 2025-03-22 18:01 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

ATM this performs as well or better as any other algorithm tried.
Its simple for the decoder.
On the encoder side complexity depends on how parameters are
choosen. But with a fixed mul_count of 1 and basic heuristic
it performs as well as any more complex choice i tried so far.

The encoder code here is flexible and allows mul_count > 1
and also can easily be used to exactly test various parameters.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/ffv1dec.c |  77 +++++++++++-----
 libavcodec/ffv1enc.c | 205 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 218 insertions(+), 64 deletions(-)

diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index f254c875a3f..19eb546d3f1 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -274,6 +274,17 @@ static void slice_set_damaged(FFV1Context *f, FFV1SliceContext *sc)
         f->frame_damaged = 1;
 }
 
+static int decode_current_mul(RangeCoder *rc, uint8_t state[32], int *mul, int mul_count, int64_t i)
+{
+    int ndx = (i * mul_count) >> 32;
+    av_assert2(ndx <= 4096U);
+
+    if (mul[ndx] < 0)
+        mul[ndx] = ff_ffv1_get_symbol(rc, state, 0) & 0x3FFFFFFF;
+
+    return mul[ndx];
+}
+
 static int decode_remap(FFV1Context *f, FFV1SliceContext *sc)
 {
     unsigned int end = f->avctx->bits_per_raw_sample == 32 ? 0xFFFFFFFF : 0xFFFF;
@@ -282,33 +293,53 @@ static int decode_remap(FFV1Context *f, FFV1SliceContext *sc)
     for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) {
         int j = 0;
         int lu = 0;
-        uint8_t state[2][32];
+        uint8_t state[2][3][32];
         int64_t i;
+        int mul[4096+1];
+        int mul_count;
+
         memset(state, 128, sizeof(state));
-        for (i=0; i <= end ; i++) {
-            unsigned run = get_symbol_inline(&sc->c, state[lu], 0);
-            if (run > end - i + 1)
-                return AVERROR_INVALIDDATA;
-            if (lu) {
-                lu ^= !run;
-                while (run--) {
-                    if (end == 0xFFFF) {
-                        sc->fltmap  [p][j++] = i ^ ((i&    0x8000) ? 0 : flip);
-                    } else
-                        sc->fltmap32[p][j++] = i ^ ((i&0x80000000) ? 0 : flip);
-                    i++;
-                }
-            } else {
-                i += run;
-                if (i <= end) {
-                    if (end == 0xFFFF) {
-                        sc->fltmap  [p][j++] = i ^ ((i&    0x8000) ? 0 : flip);
-                    } else {
-                        sc->fltmap32[p][j++] = i ^ ((i&0x80000000) ? 0 : flip);
-                    }
+        mul_count = ff_ffv1_get_symbol(&sc->c, state[0][0], 0);
+
+        if (mul_count > 4096U)
+            return AVERROR_INVALIDDATA;
+        for (int i = 0; i<mul_count; i++) {
+            mul[i] = -1;
+
+        }
+        mul[mul_count] = 1;
+
+        memset(state, 128, sizeof(state));
+        int current_mul = 1;
+        for (i=0; i <= end ;) {
+            unsigned run = get_symbol_inline(&sc->c, state[lu][0], 0);
+            unsigned run0 = lu ? 0   : run;
+            unsigned run1 = lu ? run : 1;
+
+            i += run0 * current_mul;
+
+            while (run1--) {
+                if (current_mul > 1) {
+                    int delta = get_symbol_inline(&sc->c, state[lu][1], 1);
+                    if (delta <= -current_mul || delta > current_mul/2)
+                        return AVERROR_INVALIDDATA; //not sure we should check this
+                    i += current_mul - 1 + delta;
                 }
-                lu ^= !run;
+                if (i == end)
+                    break;
+                if (i - 1 > end || j > 65535)
+                    return AVERROR_INVALIDDATA;
+                if (end == 0xFFFF) {
+                    sc->fltmap  [p][j++] = i ^ ((i&    0x8000) ? 0 : flip);
+                } else
+                    sc->fltmap32[p][j++] = i ^ ((i&0x80000000) ? 0 : flip);
+                i++;
+                current_mul = decode_current_mul(&sc->c, state[0][2], mul, mul_count, i);
+            }
+            if (lu) {
+                i += current_mul;
             }
+            lu ^= !run;
         }
     }
     return 0;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index e557e7fcdfe..5b251ac2e80 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -433,7 +433,7 @@ static void set_micro_version(FFV1Context *f)
         if (f->version == 3) {
             f->micro_version = 4;
         } else if (f->version == 4) {
-            f->micro_version = 6;
+            f->micro_version = 7;
         } else
             av_assert0(0);
 
@@ -1179,6 +1179,9 @@ static void encode_histogram_remap(FFV1Context *f, FFV1SliceContext *sc)
         int lu = 0;
         uint8_t state[2][32];
         int run = 0;
+
+        memset(state, 128, sizeof(state));
+        put_symbol(&sc->c, state[0], 0, 0);
         memset(state, 128, sizeof(state));
         for (int i= 0; i<65536; i++) {
             int ri = i ^ ((i&0x8000) ? 0 : flip);
@@ -1258,59 +1261,179 @@ static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc,
         AV_QSORT(unit[3], i, Unit, CMP);
 }
 
+typedef struct RemapEncoderState {
+    int delta_stack[65536];     //We need to encode the run value before the adjustments, this stores the adjustments until we know the length of the run
+    int16_t index_stack[65537]; //only needed with multiple segments
+    uint8_t state[2][3][32];
+    int mul[4096+1];
+    RangeCoder rc;
+    int lu;
+    int run;
+    int64_t last_val;
+    int compact_index;
+    int mul_count;
+    int i;
+    int pixel_num;
+    int p;
+} RemapEncoderState;
+
+static inline void copy_state(RemapEncoderState *dst, const RemapEncoderState *src)
+{
+    dst->rc = src->rc;
+    memcpy(dst->mul, src->mul, (src->mul_count + 1) * sizeof(src->mul[0]));
+    memcpy(dst->delta_stack, src->delta_stack, src->run * sizeof(src->delta_stack[0]));
+    memcpy(dst->index_stack, src->index_stack, (src->run + 1) * sizeof(src->index_stack[0]));
+    memcpy(dst->state, src->state, sizeof(dst->state));
+    dst->lu             = src->lu;
+    dst->run            = src->run;
+    dst->last_val       = src->last_val;
+    dst->compact_index  = src->compact_index;
+    dst->mul_count      = src->mul_count;
+    dst->i              = src->i;
+    dst->pixel_num      = src->pixel_num;
+    dst->p              = src->p;
+}
+
+static inline void encode_mul(RemapEncoderState *s, int mul_index)
+{
+    av_assert2(s->mul[ mul_index ]);
+    if (s->mul[ mul_index ] < 0) {
+        s->mul[ mul_index ] *= -1;
+        put_symbol_inline(&s->rc, s->state[0][2], s->mul[ mul_index ], 0, NULL, NULL);
+    }
+}
+
+static int encode_float32_remap_segment(FFV1SliceContext *sc, Unit unit[4][65536],
+                                        RemapEncoderState *state_arg, int update, int final)
+{
+    RemapEncoderState s;
+
+    copy_state(&s, state_arg);
+
+    if (s.i == 0) {
+        memset(s.state, 128, sizeof(s.state));
+        put_symbol(&s.rc, s.state[0][0], s.mul_count, 0);
+        memset(s.state, 128, sizeof(s.state));
+        s.last_val = -1;
+        s.compact_index = -1;
+        s.lu = 0;
+        s.run = 0;
+    }
+
+    for (; s.i < s.pixel_num+1; s.i++) {
+        int64_t val;
+        if (s.i == s.pixel_num) {
+            if (s.last_val == 0xFFFFFFFF) {
+                break;
+            } else {
+                val = 1LL<<32;
+            }
+        } else
+            val = unit[s.p][s.i].val;
+
+        if (s.last_val != val) {
+            int current_mul_index = ((s.last_val + 1) * s.mul_count) >> 32;
+            int current_mul = s.i ? FFABS(s.mul[current_mul_index]) : 1;
+            int64_t delta = 0;
+            av_assert2(s.last_val < val);
+            av_assert2(current_mul > 0);
+
+            if (current_mul > 1) {
+                delta = val - s.last_val;
+                val = FFMAX(1, (delta + current_mul/2) / current_mul);
+
+                delta -= val*current_mul;
+                av_assert2(delta <= current_mul/2);
+                av_assert2(delta > -current_mul);
+                val += s.last_val;
+            }
+            av_assert2(s.last_val < val);
+            if (s.lu) {
+                s.index_stack[s.run] = current_mul_index;
+                av_assert2(s.run < FF_ARRAY_ELEMS(s.delta_stack));
+                if (val - s.last_val == 1) {
+                    s.delta_stack[s.run] = delta;
+                    s.run ++;
+                    s.last_val += current_mul + delta;
+                } else {
+                    put_symbol_inline(&s.rc, s.state[s.lu][0], s.run, 0, NULL, NULL);
+                    for(int k=0; k<s.run; k++) {
+                        int stack_mul = s.mul[ s.index_stack[k] ];
+                        if (stack_mul>1)
+                            put_symbol_inline(&s.rc, s.state[s.lu][1], s.delta_stack[k], 1, NULL, NULL);
+                        encode_mul(&s, s.index_stack[k+1]);
+                    }
+                    if (s.run == 0)
+                        s.lu ^= 1;
+                    s.run = 0;
+                    s.i--; // we did not encode val so we need to backstep
+                    s.last_val += current_mul;
+                    continue;
+                }
+            } else {
+                av_assert2(s.run == 0);
+                put_symbol_inline(&s.rc, s.state[s.lu][0], val - s.last_val - 1, 0, NULL, NULL);
+                if (current_mul > 1)
+                    put_symbol_inline(&s.rc, s.state[s.lu][1], delta, 1, NULL, NULL);
+                if (val - s.last_val == 1)
+                    s.lu ^= 1;
+                s.last_val += (val - s.last_val) * current_mul + delta;
+
+                encode_mul(&s, ((s.last_val + 1) * s.mul_count) >> 32);
+            }
+            s.compact_index ++;
+        }
+        if (final && s.i < s.pixel_num)
+            sc->bitmap[s.p][unit[s.p][s.i].ndx] = s.compact_index;
+    }
+
+    if (update) {
+        copy_state(state_arg, &s);
+    }
+    return get_rac_count(&s.rc);
+}
+
 static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
                                  const uint8_t *src[4], Unit unit[4][65536])
 {
-    int pixel_num = sc->slice_width * sc->slice_height;
+    RemapEncoderState s;
+    s.pixel_num = sc->slice_width * sc->slice_height;
 
-    av_assert0 (pixel_num <= 65536);
+    av_assert0 (s.pixel_num <= 65536);
 
     for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) {
-        int lu = 0;
-        uint8_t state[2][32];
-        int run = 0;
+        float score_tab[16] = {0};
         int64_t last_val = -1;
-        int compact_index = -1;
+        s.rc = sc->c;
+        s.i = 0;
+        s.p = p;
 
-        memset(state, 128, sizeof(state));
-        for (int i= 0; i<pixel_num+1; i++) {
-            int64_t val;
-            if (i == pixel_num) {
-                if (last_val == 0xFFFFFFFF) {
-                    break;
-                } else {
-                    val = 1LL<<32;
-                }
-            } else
-                val = unit[p][i].val;
+        s.mul_count = 1;
 
-            if (last_val != val) {
+        for (int i= 0; i<s.pixel_num; i++) {
+            int64_t val = unit[p][i].val;
+            if (val != last_val) {
                 av_assert2(last_val < val);
-                if (lu) {
-                    if (val - last_val == 1) {
-                        run ++;
-                        last_val = val;
-                    } else {
-                        put_symbol_inline(&sc->c, state[lu], run, 0, NULL, NULL);
-                        if (run == 0)
-                            lu ^= 1;
-                        run = 0;
-                        i--; // we did not encode val so we need to backstep
-                        last_val ++;
-                        continue;
-                    }
-                } else {
-                    av_assert2(run == 0);
-                    put_symbol_inline(&sc->c, state[lu], val - last_val - 1, 0, NULL, NULL);
-                    if (val - last_val == 1)
-                        lu ^= 1;
-                    last_val = val;
+                for(int si= 0; si < FF_ARRAY_ELEMS(score_tab); si++) {
+                    int64_t delta = val - last_val;
+                    int mul = last_val < 0 ? 1 : (1<<si);
+                    int64_t cost = FFMAX((delta + mul/2)  / mul, 1);
+                    score_tab[si] += log2(cost) + fabs(delta - cost*mul);
                 }
-                compact_index ++;
+                last_val = val;
             }
-            if (i < pixel_num)
-                sc->bitmap[p][unit[p][i].ndx] = compact_index;
         }
+        int best_index = 0;
+        for(int si= 1; si < FF_ARRAY_ELEMS(score_tab); si++) {
+            if (score_tab[si] < score_tab[ best_index ])
+                best_index = si;
+        }
+        s.mul[0] = -1 << best_index;
+        s.mul[s.mul_count] = 1;
+
+        encode_float32_remap_segment(sc, unit, &s, 1, 1);
+
+        sc->c = s.rc;
     }
 }
 
-- 
2.48.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-03-24 12:31 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-20 22:30 [FFmpeg-devel] [PATCH] avcodec/ffv1: Implement 2D RLE for remap Michael Niedermayer
2025-03-20 23:07 ` Lynne
2025-03-21 20:13   ` Michael Niedermayer
2025-03-21 21:12     ` Michael Niedermayer
2025-03-21 21:36       ` Michael Niedermayer
2025-03-21 22:22         ` Michael Niedermayer
2025-03-22 17:45           ` Michael Niedermayer
2025-03-22 17:54             ` Jerome Martinez
2025-03-24  1:53               ` Michael Niedermayer
2025-03-24 12:31                 ` Michael Niedermayer
2025-03-22 18:01 Michael Niedermayer

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git