Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage
@ 2025-03-24 22:20 James Almer
  2025-03-25  1:50 ` Michael Niedermayer
  0 siblings, 1 reply; 6+ messages in thread
From: James Almer @ 2025-03-24 22:20 UTC (permalink / raw)
  To: ffmpeg-devel

Continues from commit 702239bc500b, fixing FATE failures on MacOS.

Signed-off-by: James Almer <jamrial@gmail.com>
---
Confirmed by Martin Storsjö. Float encoding untested.

 libavcodec/ffv1.h    |  16 ++++
 libavcodec/ffv1enc.c | 177 +++++++++++++++++--------------------------
 2 files changed, 84 insertions(+), 109 deletions(-)

diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 09118e0b7d..d1c239f138 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -115,6 +115,22 @@ typedef struct FFV1SliceContext {
         uint32_t val; //this is unneeded if you accept a dereference on each access
         uint16_t ndx;
     } unit[4][65536];
+    struct RemapEncoderState {
+        int delta_stack[65536];     //We need to encode the run value before the adjustments, this stores the adjustments until we know the length of the run
+        int16_t index_stack[65537]; //only needed with multiple segments
+        uint8_t state[2][3][32];
+        int mul[4096+1];
+        RangeCoder rc;
+        int lu;
+        int run;
+        int64_t last_val;
+        int compact_index;
+        int mul_count;
+        int i;
+        int pixel_num;
+        int p;
+        int current_mul_index;
+    } remap_state;
 } FFV1SliceContext;
 
 typedef struct FFV1Context {
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 4340a301e1..8dcca7e1d6 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -1246,42 +1246,7 @@ static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc,
         AV_QSORT(sc->unit[3], i, struct Unit, CMP);
 }
 
-typedef struct RemapEncoderState {
-    int delta_stack[65536];     //We need to encode the run value before the adjustments, this stores the adjustments until we know the length of the run
-    int16_t index_stack[65537]; //only needed with multiple segments
-    uint8_t state[2][3][32];
-    int mul[4096+1];
-    RangeCoder rc;
-    int lu;
-    int run;
-    int64_t last_val;
-    int compact_index;
-    int mul_count;
-    int i;
-    int pixel_num;
-    int p;
-    int current_mul_index;
-} RemapEncoderState;
-
-static inline void copy_state(RemapEncoderState *dst, const RemapEncoderState *src)
-{
-    dst->rc = src->rc;
-    memcpy(dst->mul, src->mul, (src->mul_count + 1) * sizeof(src->mul[0]));
-    memcpy(dst->delta_stack, src->delta_stack, src->run * sizeof(src->delta_stack[0]));
-    memcpy(dst->index_stack, src->index_stack, (src->run + 1) * sizeof(src->index_stack[0]));
-    memcpy(dst->state, src->state, sizeof(dst->state));
-    dst->lu             = src->lu;
-    dst->run            = src->run;
-    dst->last_val       = src->last_val;
-    dst->compact_index  = src->compact_index;
-    dst->mul_count      = src->mul_count;
-    dst->i              = src->i;
-    dst->pixel_num      = src->pixel_num;
-    dst->p              = src->p;
-    dst->current_mul_index = src->current_mul_index;
-}
-
-static inline void encode_mul(RemapEncoderState *s, int mul_index)
+static inline void encode_mul(struct RemapEncoderState *s, int mul_index)
 {
     av_assert2(s->mul[ mul_index ]);
     if (s->mul[ mul_index ] < 0) {
@@ -1290,122 +1255,116 @@ static inline void encode_mul(RemapEncoderState *s, int mul_index)
     }
 }
 
-static int encode_float32_remap_segment(FFV1SliceContext *sc,
-                                        RemapEncoderState *state_arg, int update, int final)
+static int encode_float32_remap_segment(FFV1SliceContext *sc, int final)
 {
-    RemapEncoderState s;
-
-    copy_state(&s, state_arg);
-
-    if (s.i == 0) {
-        memset(s.state, 128, sizeof(s.state));
-        put_symbol(&s.rc, s.state[0][0], s.mul_count, 0);
-        memset(s.state, 128, sizeof(s.state));
-        s.last_val = -1;
-        s.compact_index = -1;
-        s.lu = 0;
-        s.run = 0;
-        s.current_mul_index = -1;
+    struct RemapEncoderState *s = &sc->remap_state;
+
+    if (s->i == 0) {
+        memset(s->state, 128, sizeof(s->state));
+        put_symbol(&s->rc, s->state[0][0], s->mul_count, 0);
+        memset(s->state, 128, sizeof(s->state));
+        s->last_val = -1;
+        s->compact_index = -1;
+        s->lu = 0;
+        s->run = 0;
+        s->current_mul_index = -1;
     }
 
-    for (; s.i < s.pixel_num+1; s.i++) {
-        int current_mul = s.current_mul_index < 0 ? 1 : FFABS(s.mul[s.current_mul_index]);
+    for (; s->i < s->pixel_num+1; s->i++) {
+        int current_mul = s->current_mul_index < 0 ? 1 : FFABS(s->mul[s->current_mul_index]);
         int64_t val;
-        if (s.i == s.pixel_num) {
-            if (s.last_val == 0xFFFFFFFF) {
+        if (s->i == s->pixel_num) {
+            if (s->last_val == 0xFFFFFFFF) {
                 break;
             } else {
                 val = 1LL<<32;
             }
         } else
-            val = sc->unit[s.p][s.i].val;
+            val = sc->unit[s->p][s->i].val;
 
-        if (s.last_val != val) {
+        if (s->last_val != val) {
             int64_t delta = 0;
-            av_assert2(s.last_val < val);
+            av_assert2(s->last_val < val);
             av_assert2(current_mul > 0);
 
             if (current_mul > 1) {
-                delta = val - s.last_val;
+                delta = val - s->last_val;
                 val = FFMAX(1, (delta + current_mul/2) / current_mul);
 
                 delta -= val*current_mul;
                 av_assert2(delta <= current_mul/2);
                 av_assert2(delta > -current_mul);
-                val += s.last_val;
+                val += s->last_val;
             }
-            av_assert2(s.last_val < val);
-            if (s.lu) {
-                s.index_stack[s.run] = s.current_mul_index;
-                av_assert2(s.run < FF_ARRAY_ELEMS(s.delta_stack));
-                if (val - s.last_val == 1) {
-                    s.delta_stack[s.run] = delta;
-                    s.run ++;
-                    av_assert2(s.i == s.pixel_num || s.last_val + current_mul + delta == sc->unit[s.p][s.i].val);
-                    s.last_val += current_mul + delta;
+            av_assert2(s->last_val < val);
+            if (s->lu) {
+                s->index_stack[s->run] = s->current_mul_index;
+                av_assert2(s->run < FF_ARRAY_ELEMS(s->delta_stack));
+                if (val - s->last_val == 1) {
+                    s->delta_stack[s->run] = delta;
+                    s->run ++;
+                    av_assert2(s->i == s->pixel_num || s->last_val + current_mul + delta == sc->unit[s->p][s->i].val);
+                    s->last_val += current_mul + delta;
                 } else {
-                    put_symbol_inline(&s.rc, s.state[s.lu][0], s.run, 0, NULL, NULL);
+                    put_symbol_inline(&s->rc, s->state[s->lu][0], s->run, 0, NULL, NULL);
 
-                    for(int k=0; k<s.run; k++) {
-                        int stack_mul = s.mul[ s.index_stack[k] ];
+                    for(int k=0; k<s->run; k++) {
+                        int stack_mul = s->mul[ s->index_stack[k] ];
                         if (stack_mul>1)
-                            put_symbol_inline(&s.rc, s.state[s.lu][1], s.delta_stack[k], 1, NULL, NULL);
-                        encode_mul(&s, s.index_stack[k+1]);
+                            put_symbol_inline(&s->rc, s->state[s->lu][1], s->delta_stack[k], 1, NULL, NULL);
+                        encode_mul(s, s->index_stack[k+1]);
                     }
-                    if (s.run == 0)
-                        s.lu ^= 1;
-                    s.run = 0;
-                    s.i--; // we did not encode val so we need to backstep
-                    s.last_val += current_mul;
+                    if (s->run == 0)
+                        s->lu ^= 1;
+                    s->run = 0;
+                    s->i--; // we did not encode val so we need to backstep
+                    s->last_val += current_mul;
                     continue;
                 }
             } else {
-                av_assert2(s.run == 0);
-                put_symbol_inline(&s.rc, s.state[s.lu][0], val - s.last_val - 1, 0, NULL, NULL);
+                av_assert2(s->run == 0);
+                put_symbol_inline(&s->rc, s->state[s->lu][0], val - s->last_val - 1, 0, NULL, NULL);
 
                 if (current_mul > 1)
-                    put_symbol_inline(&s.rc, s.state[s.lu][1], delta, 1, NULL, NULL);
-                if (val - s.last_val == 1)
-                    s.lu ^= 1;
+                    put_symbol_inline(&s->rc, s->state[s->lu][1], delta, 1, NULL, NULL);
+                if (val - s->last_val == 1)
+                    s->lu ^= 1;
 
-                av_assert2(s.i == s.pixel_num || s.last_val + (val - s.last_val) * current_mul + delta == sc->unit[s.p][s.i].val);
-                if (s.i < s.pixel_num)
-                    s.last_val = sc->unit[s.p][s.i].val;
+                av_assert2(s->i == s->pixel_num || s->last_val + (val - s->last_val) * current_mul + delta == sc->unit[s->p][s->i].val);
+                if (s->i < s->pixel_num)
+                    s->last_val = sc->unit[s->p][s->i].val;
             }
-            s.current_mul_index = ((s.last_val + 1) * s.mul_count) >> 32;
-            if (!s.run)
-                encode_mul(&s, s.current_mul_index);
-            s.compact_index ++;
+            s->current_mul_index = ((s->last_val + 1) * s->mul_count) >> 32;
+            if (!s->run)
+                encode_mul(s, s->current_mul_index);
+            s->compact_index ++;
         }
-        if (final && s.i < s.pixel_num)
-            sc->bitmap[s.p][sc->unit[s.p][s.i].ndx] = s.compact_index;
+        if (final && s->i < s->pixel_num)
+            sc->bitmap[s->p][sc->unit[s->p][s->i].ndx] = s->compact_index;
     }
 
-    if (update) {
-        copy_state(state_arg, &s);
-    }
-    return get_rac_count(&s.rc);
+    return get_rac_count(&s->rc);
 }
 
 static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
                                  const uint8_t *src[4])
 {
-    RemapEncoderState s;
-    s.pixel_num = sc->slice_width * sc->slice_height;
+    struct RemapEncoderState *s = &sc->remap_state;
+    s->pixel_num = sc->slice_width * sc->slice_height;
 
-    av_assert0 (s.pixel_num <= 65536);
+    av_assert0 (s->pixel_num <= 65536);
 
     for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) {
         float score_tab[16] = {0};
         int64_t last_val = -1;
         int best_index = 0;
-        s.rc = sc->c;
-        s.i = 0;
-        s.p = p;
+        s->rc = sc->c;
+        s->i = 0;
+        s->p = p;
 
-        s.mul_count = 1;
+        s->mul_count = 1;
 
-        for (int i= 0; i<s.pixel_num; i++) {
+        for (int i= 0; i<s->pixel_num; i++) {
             int64_t val = sc->unit[p][i].val;
             if (val != last_val) {
                 av_assert2(last_val < val);
@@ -1422,12 +1381,12 @@ static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
             if (score_tab[si] < score_tab[ best_index ])
                 best_index = si;
         }
-        s.mul[0] = -1 << best_index;
-        s.mul[s.mul_count] = 1;
+        s->mul[0] = -1 << best_index;
+        s->mul[s->mul_count] = 1;
 
-        encode_float32_remap_segment(sc, &s, 1, 1);
+        encode_float32_remap_segment(sc, 1);
 
-        sc->c = s.rc;
+        sc->c = s->rc;
     }
 }
 
-- 
2.48.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage
  2025-03-24 22:20 [FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage James Almer
@ 2025-03-25  1:50 ` Michael Niedermayer
  2025-03-25  1:57   ` James Almer
  0 siblings, 1 reply; 6+ messages in thread
From: Michael Niedermayer @ 2025-03-25  1:50 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2832 bytes --]

Hi

On Mon, Mar 24, 2025 at 07:20:50PM -0300, James Almer wrote:
> Continues from commit 702239bc500b, fixing FATE failures on MacOS.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> Confirmed by Martin Storsjö. Float encoding untested.
> 
>  libavcodec/ffv1.h    |  16 ++++
>  libavcodec/ffv1enc.c | 177 +++++++++++++++++--------------------------
>  2 files changed, 84 insertions(+), 109 deletions(-)
> 
> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> index 09118e0b7d..d1c239f138 100644
> --- a/libavcodec/ffv1.h
> +++ b/libavcodec/ffv1.h
> @@ -115,6 +115,22 @@ typedef struct FFV1SliceContext {
>          uint32_t val; //this is unneeded if you accept a dereference on each access
>          uint16_t ndx;
>      } unit[4][65536];
> +    struct RemapEncoderState {
> +        int delta_stack[65536];     //We need to encode the run value before the adjustments, this stores the adjustments until we know the length of the run
> +        int16_t index_stack[65537]; //only needed with multiple segments
> +        uint8_t state[2][3][32];
> +        int mul[4096+1];
> +        RangeCoder rc;
> +        int lu;
> +        int run;
> +        int64_t last_val;
> +        int compact_index;
> +        int mul_count;
> +        int i;
> +        int pixel_num;
> +        int p;
> +        int current_mul_index;
> +    } remap_state;
>  } FFV1SliceContext;

please provide a link to the failure

This makes the code increasingly ugly.

i dont understand why this breaks fate, fate should not use
any of the float code as none should be run in fate ATM.
its also all under -strict -2 checks

this is temporary data not needed outside float32
and not needed outside the remap table writing.

we may need more than one such state.
(if we dont use a heuristic but actually
 encode bruteforce / trial and error)

t conflicts with all work i did today

theres tons of unused memory.

We ATM do 2 things in encode_float32_remap_segment()
one is encoding the table
the other is writing the remaped pixels into sc->bitmap
by using unit[s.p][s.i].ndx

sc->bitmap is unused before, unit[s.p][s.i].ndx unused afterwards
the input image itself is also not used again
half of fltmap32 is unused (thats 512kb alone here)

the code can be writen so it doesnt need the stack
but just runs twice over the stuff (not sure how clean this
would be but if you try _please_ do it on top of the patches
i posted today, the code is simpler and less buggy after
these patches

But i really dont understand why fate fails in relation to code
it never executes.

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Asymptotically faster algorithms should always be preferred if you have
asymptotical amounts of data

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage
  2025-03-25  1:50 ` Michael Niedermayer
@ 2025-03-25  1:57   ` James Almer
  2025-03-25  2:26     ` Michael Niedermayer
  2025-03-25  9:20     ` Martin Storsjö
  0 siblings, 2 replies; 6+ messages in thread
From: James Almer @ 2025-03-25  1:57 UTC (permalink / raw)
  To: ffmpeg-devel


[-- Attachment #1.1.1: Type: text/plain, Size: 3726 bytes --]

On 3/24/2025 10:50 PM, Michael Niedermayer wrote:
> Hi
> 
> On Mon, Mar 24, 2025 at 07:20:50PM -0300, James Almer wrote:
>> Continues from commit 702239bc500b, fixing FATE failures on MacOS.
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>> Confirmed by Martin Storsjö. Float encoding untested.
>>
>>   libavcodec/ffv1.h    |  16 ++++
>>   libavcodec/ffv1enc.c | 177 +++++++++++++++++--------------------------
>>   2 files changed, 84 insertions(+), 109 deletions(-)
>>
>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>> index 09118e0b7d..d1c239f138 100644
>> --- a/libavcodec/ffv1.h
>> +++ b/libavcodec/ffv1.h
>> @@ -115,6 +115,22 @@ typedef struct FFV1SliceContext {
>>           uint32_t val; //this is unneeded if you accept a dereference on each access
>>           uint16_t ndx;
>>       } unit[4][65536];
>> +    struct RemapEncoderState {
>> +        int delta_stack[65536];     //We need to encode the run value before the adjustments, this stores the adjustments until we know the length of the run
>> +        int16_t index_stack[65537]; //only needed with multiple segments
>> +        uint8_t state[2][3][32];
>> +        int mul[4096+1];
>> +        RangeCoder rc;
>> +        int lu;
>> +        int run;
>> +        int64_t last_val;
>> +        int compact_index;
>> +        int mul_count;
>> +        int i;
>> +        int pixel_num;
>> +        int p;
>> +        int current_mul_index;
>> +    } remap_state;
>>   } FFV1SliceContext;
> 
> please provide a link to the failure

Martin will have to do that. I can't seem to find any FATE instance 
failing, but he said it affected his OSX machines.

> 
> This makes the code increasingly ugly.
> 
> i dont understand why this breaks fate, fate should not use
> any of the float code as none should be run in fate ATM.
> its also all under -strict -2 checks

It also surprised me, since these are functions that need to be called, 
unlike the fix in 702239bc500b which was in a function actually called 
by existing tests.

> 
> this is temporary data not needed outside float32
> and not needed outside the remap table writing.
> 
> we may need more than one such state.
> (if we dont use a heuristic but actually
>   encode bruteforce / trial and error)
> 
> t conflicts with all work i did today
> 
> theres tons of unused memory.
> 
> We ATM do 2 things in encode_float32_remap_segment()
> one is encoding the table
> the other is writing the remaped pixels into sc->bitmap
> by using unit[s.p][s.i].ndx
> 
> sc->bitmap is unused before, unit[s.p][s.i].ndx unused afterwards
> the input image itself is also not used again
> half of fltmap32 is unused (thats 512kb alone here)

Yeah, ideally all this is allocated only when needed rather than 
unconditionally in the slice context. But i didn't go that far since i 
can't even reproduce this issue.

> 
> the code can be writen so it doesnt need the stack
> but just runs twice over the stuff (not sure how clean this
> would be but if you try _please_ do it on top of the patches
> i posted today, the code is simpler and less buggy after
> these patches
> 
> But i really dont understand why fate fails in relation to code
> it never executes.

For the issue fixed in 702239bc500b, i guess it did attempt to reserve 
stack space even if it never used it. For this one? Beats me.

> 
> thx
> 
> [...]
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage
  2025-03-25  1:57   ` James Almer
@ 2025-03-25  2:26     ` Michael Niedermayer
  2025-03-25  9:20     ` Martin Storsjö
  1 sibling, 0 replies; 6+ messages in thread
From: Michael Niedermayer @ 2025-03-25  2:26 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 4860 bytes --]

On Mon, Mar 24, 2025 at 10:57:28PM -0300, James Almer wrote:
> On 3/24/2025 10:50 PM, Michael Niedermayer wrote:
> > Hi
> > 
> > On Mon, Mar 24, 2025 at 07:20:50PM -0300, James Almer wrote:
> > > Continues from commit 702239bc500b, fixing FATE failures on MacOS.
> > > 
> > > Signed-off-by: James Almer <jamrial@gmail.com>
> > > ---
> > > Confirmed by Martin Storsjö. Float encoding untested.
> > > 
> > >   libavcodec/ffv1.h    |  16 ++++
> > >   libavcodec/ffv1enc.c | 177 +++++++++++++++++--------------------------
> > >   2 files changed, 84 insertions(+), 109 deletions(-)
> > > 
> > > diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> > > index 09118e0b7d..d1c239f138 100644
> > > --- a/libavcodec/ffv1.h
> > > +++ b/libavcodec/ffv1.h
> > > @@ -115,6 +115,22 @@ typedef struct FFV1SliceContext {
> > >           uint32_t val; //this is unneeded if you accept a dereference on each access
> > >           uint16_t ndx;
> > >       } unit[4][65536];
> > > +    struct RemapEncoderState {
> > > +        int delta_stack[65536];     //We need to encode the run value before the adjustments, this stores the adjustments until we know the length of the run
> > > +        int16_t index_stack[65537]; //only needed with multiple segments
> > > +        uint8_t state[2][3][32];
> > > +        int mul[4096+1];
> > > +        RangeCoder rc;
> > > +        int lu;
> > > +        int run;
> > > +        int64_t last_val;
> > > +        int compact_index;
> > > +        int mul_count;
> > > +        int i;
> > > +        int pixel_num;
> > > +        int p;
> > > +        int current_mul_index;
> > > +    } remap_state;
> > >   } FFV1SliceContext;
> > 
> > please provide a link to the failure
> 
> Martin will have to do that. I can't seem to find any FATE instance failing,
> but he said it affected his OSX machines.

yeah, i also looked and couldnt find it


> 
> > 
> > This makes the code increasingly ugly.
> > 
> > i dont understand why this breaks fate, fate should not use
> > any of the float code as none should be run in fate ATM.
> > its also all under -strict -2 checks
> 
> It also surprised me, since these are functions that need to be called,
> unlike the fix in 702239bc500b which was in a function actually called by
> existing tests.

for the record macosx should habe 512k stack per thread


> 
> > 
> > this is temporary data not needed outside float32
> > and not needed outside the remap table writing.
> > 
> > we may need more than one such state.
> > (if we dont use a heuristic but actually
> >   encode bruteforce / trial and error)
> > 
> > t conflicts with all work i did today
> > 
> > theres tons of unused memory.
> > 
> > We ATM do 2 things in encode_float32_remap_segment()
> > one is encoding the table
> > the other is writing the remaped pixels into sc->bitmap
> > by using unit[s.p][s.i].ndx
> > 
> > sc->bitmap is unused before, unit[s.p][s.i].ndx unused afterwards
> > the input image itself is also not used again
> > half of fltmap32 is unused (thats 512kb alone here)
> 
> Yeah, ideally all this is allocated only when needed rather than
> unconditionally in the slice context. But i didn't go that far since i can't
> even reproduce this issue.
> 
> > 
> > the code can be writen so it doesnt need the stack
> > but just runs twice over the stuff (not sure how clean this
> > would be but if you try _please_ do it on top of the patches
> > i posted today, the code is simpler and less buggy after
> > these patches
> > 
> > But i really dont understand why fate fails in relation to code
> > it never executes.
> 
> For the issue fixed in 702239bc500b, i guess it did attempt to reserve stack
> space even if it never used it. For this one? Beats me.

maybe some smart compiler inlining this

ATM before sleeping over it i would suggest as solution
1. apply my patches from today (code will be cleaner)
2. eliminate the RemapEncoderState struct put things on the stack
3. use the space after fltmap (in fltmap32) for the 2 stack arrays
   with some struct and union to keep it clear

moving RemapEncoderState to the context makes no sense, theres tons
of small bits in it that dont exist outside their local use in the
code using them.
I guess the struct causes more problems than it solves

ill look into this tomorrow, need to sleep now as theres some
noisy workers across the street building a house so i need to
sleep at night, but dont hesitate to work on it if you want
or just disable the code

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Let us carefully observe those good qualities wherein our enemies excel us
and endeavor to excel them, by avoiding what is faulty, and imitating what
is excellent in them. -- Plutarch

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage
  2025-03-25  1:57   ` James Almer
  2025-03-25  2:26     ` Michael Niedermayer
@ 2025-03-25  9:20     ` Martin Storsjö
  2025-03-25  9:50       ` Michael Niedermayer
  1 sibling, 1 reply; 6+ messages in thread
From: Martin Storsjö @ 2025-03-25  9:20 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Mon, 24 Mar 2025, James Almer wrote:

> On 3/24/2025 10:50 PM, Michael Niedermayer wrote:
>> Hi
>> 
>> On Mon, Mar 24, 2025 at 07:20:50PM -0300, James Almer wrote:
>>> Continues from commit 702239bc500b, fixing FATE failures on MacOS.
>>> 
>>> Signed-off-by: James Almer <jamrial@gmail.com>
>>> ---
>>> Confirmed by Martin Storsjö. Float encoding untested.
>>>
>>>   libavcodec/ffv1.h    |  16 ++++
>>>   libavcodec/ffv1enc.c | 177 +++++++++++++++++--------------------------
>>>   2 files changed, 84 insertions(+), 109 deletions(-)
>>> 
>>> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
>>> index 09118e0b7d..d1c239f138 100644
>>> --- a/libavcodec/ffv1.h
>>> +++ b/libavcodec/ffv1.h
>>> @@ -115,6 +115,22 @@ typedef struct FFV1SliceContext {
>>>           uint32_t val; //this is unneeded if you accept a dereference on 
>>> each access
>>>           uint16_t ndx;
>>>       } unit[4][65536];
>>> +    struct RemapEncoderState {
>>> +        int delta_stack[65536];     //We need to encode the run value 
>>> before the adjustments, this stores the adjustments until we know the 
>>> length of the run
>>> +        int16_t index_stack[65537]; //only needed with multiple segments
>>> +        uint8_t state[2][3][32];
>>> +        int mul[4096+1];
>>> +        RangeCoder rc;
>>> +        int lu;
>>> +        int run;
>>> +        int64_t last_val;
>>> +        int compact_index;
>>> +        int mul_count;
>>> +        int i;
>>> +        int pixel_num;
>>> +        int p;
>>> +        int current_mul_index;
>>> +    } remap_state;
>>>   } FFV1SliceContext;
>> 
>> please provide a link to the failure
>
> Martin will have to do that. I can't seem to find any FATE instance failing, 
> but he said it affected his OSX machines.

I set up a couple of macOS FATE instances now, both which show the 
failure: 
https://fate.ffmpeg.org/history.cgi?slot=aarch64-apple-darwin-macos11
https://fate.ffmpeg.org/history.cgi?slot=x86_64-apple-darwin-macos11

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage
  2025-03-25  9:20     ` Martin Storsjö
@ 2025-03-25  9:50       ` Michael Niedermayer
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Niedermayer @ 2025-03-25  9:50 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2674 bytes --]

Hi Martin

On Tue, Mar 25, 2025 at 11:20:52AM +0200, Martin Storsjö wrote:
> On Mon, 24 Mar 2025, James Almer wrote:
> 
> > On 3/24/2025 10:50 PM, Michael Niedermayer wrote:
> > > Hi
> > > 
> > > On Mon, Mar 24, 2025 at 07:20:50PM -0300, James Almer wrote:
> > > > Continues from commit 702239bc500b, fixing FATE failures on MacOS.
> > > > 
> > > > Signed-off-by: James Almer <jamrial@gmail.com>
> > > > ---
> > > > Confirmed by Martin Storsjö. Float encoding untested.
> > > > 
> > > >   libavcodec/ffv1.h    |  16 ++++
> > > >   libavcodec/ffv1enc.c | 177 +++++++++++++++++--------------------------
> > > >   2 files changed, 84 insertions(+), 109 deletions(-)
> > > > 
> > > > diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> > > > index 09118e0b7d..d1c239f138 100644
> > > > --- a/libavcodec/ffv1.h
> > > > +++ b/libavcodec/ffv1.h
> > > > @@ -115,6 +115,22 @@ typedef struct FFV1SliceContext {
> > > >           uint32_t val; //this is unneeded if you accept a
> > > > dereference on each access
> > > >           uint16_t ndx;
> > > >       } unit[4][65536];
> > > > +    struct RemapEncoderState {
> > > > +        int delta_stack[65536];     //We need to encode the run
> > > > value before the adjustments, this stores the adjustments until
> > > > we know the length of the run
> > > > +        int16_t index_stack[65537]; //only needed with multiple segments
> > > > +        uint8_t state[2][3][32];
> > > > +        int mul[4096+1];
> > > > +        RangeCoder rc;
> > > > +        int lu;
> > > > +        int run;
> > > > +        int64_t last_val;
> > > > +        int compact_index;
> > > > +        int mul_count;
> > > > +        int i;
> > > > +        int pixel_num;
> > > > +        int p;
> > > > +        int current_mul_index;
> > > > +    } remap_state;
> > > >   } FFV1SliceContext;
> > > 
> > > please provide a link to the failure
> > 
> > Martin will have to do that. I can't seem to find any FATE instance
> > failing, but he said it affected his OSX machines.
> 
> I set up a couple of macOS FATE instances now, both which show the failure:
> https://fate.ffmpeg.org/history.cgi?slot=aarch64-apple-darwin-macos11
> https://fate.ffmpeg.org/history.cgi?slot=x86_64-apple-darwin-macos11

I wonder if strictly taken, this is a compiler bug, given the code is not
executed or reachable

Ill apply a fix that eliminates these 2 arrays entirely

thx


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If a bugfix only changes things apparently unrelated to the bug with no
further explanation, that is a good sign that the bugfix is wrong.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-03-25  9:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-24 22:20 [FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage James Almer
2025-03-25  1:50 ` Michael Niedermayer
2025-03-25  1:57   ` James Almer
2025-03-25  2:26     ` Michael Niedermayer
2025-03-25  9:20     ` Martin Storsjö
2025-03-25  9:50       ` Michael Niedermayer

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git