Re: [FFmpeg-devel] [PATCH] avcodec/jpegxl_parser: fix various memory issues

From: Leo Izen <leo.izen@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] avcodec/jpegxl_parser: fix various memory issues
Date: Mon, 2 Oct 2023 19:41:19 -0400
Message-ID: <a83317e6-9f72-4dc4-bdc6-d87ab9d2bae6@gmail.com> (raw)
In-Reply-To: <AS8P250MB0744069D58B4F78F598043848FC5A@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM>

On 10/2/23 16:40, Andreas Rheinhardt wrote:
> Leo Izen:
>> The spec caps the prefix alphabet size to 32768 (i.e. 1 << 15) so we
>> need to check for that and reject alphabets that are too large.
> 
> No, we don't "need to", we can. FFmpeg is not a validator tool.

We need to because we risk over-allocating otherwise. If the signalled 
value is far too large, we consume a pointlessly large amount of memory.

> 
>>
>> Additionally, there's no need to allocate buffers that are as large as
>> the maximum alphabet size as these aren't stack-allocated, they're heap
>> allocated and thus can be variable size.
>>
>> Added an overflow check as well, which fixes leaking the buffer, and
>> capping the alphabet size fixes two potential overruns as well.
>>
>> Fixes: out of array access
>> Fixes: 62089/clusterfuzz-testcase-minimized-ffmpeg_DEMUXER_fuzzer-
>>      5437089094959104.fuzz
>>
>> Found-by: continuous fuzzing process
>>      https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
>> Found-by: Hardik Shah of Vehere (Dawn Treaders team)
>> Co-authored-by: Michael Niedermayer <michael@niedermayer.cc>
>> Signed-off-by: Leo Izen <leo.izen@gmail.com>
>> ---
>>   libavcodec/jpegxl_parser.c | 23 +++++++++++++++++------
>>   1 file changed, 17 insertions(+), 6 deletions(-)
>>
>> diff --git a/libavcodec/jpegxl_parser.c b/libavcodec/jpegxl_parser.c
>> index d25a1b6e1d..51af0f4ed1 100644
>> --- a/libavcodec/jpegxl_parser.c
>> +++ b/libavcodec/jpegxl_parser.c
>> @@ -46,6 +46,8 @@
>>   #define JXL_FLAG_USE_LF_FRAME 32
>>   #define JXL_FLAG_SKIP_ADAPTIVE_LF_SMOOTH 128
>>   
>> +#define MAX_PREFIX_ALPHABET_SIZE (1u << 15)
>> +
>>   #define clog1p(x) (ff_log2(x) + !!(x))
>>   #define unpack_signed(x) (((x) & 1 ? -(x)-1 : (x))/2)
>>   #define div_ceil(x, y) (((x) - 1) / (y) + 1)
>> @@ -724,16 +726,17 @@ static int read_vlc_prefix(GetBitContext *gb, JXLEntropyDecoder *dec, JXLSymbolD
>>       if (ret < 0)
>>           goto end;
>>   
>> -    buf = av_calloc(1, 262148); // 32768 * 8 + 4
>> +    buf = av_calloc(1, dist->alphabet_size * (2 * sizeof(int8_t) + sizeof(int16_t) + sizeof(uint32_t))
>> +                       + sizeof(uint32_t));
> 
> You can avoid the multiplication by using av_calloc((2 * sizeof(int8_t)
> + sizeof(int16_t) + sizeof(uint32_t)) + sizeof(uint32_t),
> dist->alphabet_size).

That's not the same thing. This will cause us to overallocate by 
dist-alphabet_size - 4 bytes. Is that okay?

> 
>>       if (!buf) {
>>           ret = AVERROR(ENOMEM);
>>           goto end;
>>       }
>>   
>>       level2_lens = (int8_t *)buf;
>> -    level2_lens_s = (int8_t *)(buf + 32768);
>> -    level2_syms = (int16_t *)(buf + 65536);
>> -    level2_codecounts = (uint32_t *)(buf + 131072);
>> +    level2_lens_s = (int8_t *)(buf + dist->alphabet_size * sizeof(int8_t));
>> +    level2_syms = (int16_t *)(buf + dist->alphabet_size * (2 * sizeof(int8_t)));
>> +    level2_codecounts = (uint32_t *)(buf + dist->alphabet_size * (2 * sizeof(int8_t) + sizeof(int16_t)));
>>   
>>       total_code = 0;
>>       for (int i = 0; i < dist->alphabet_size; i++) {
>> @@ -742,6 +745,10 @@ static int read_vlc_prefix(GetBitContext *gb, JXLEntropyDecoder *dec, JXLSymbolD
>>               int extra = 3 + get_bits(gb, 2);
>>               if (repeat_count_prev)
>>                   extra = 4 * (repeat_count_prev - 2) - repeat_count_prev + extra;
>> +            if (i + extra > dist->alphabet_size) {
>> +                ret = AVERROR_INVALIDDATA;
>> +                goto end;
>> +            }
>>               for (int j = 0; j < extra; j++)
>>                   level2_lens[i + j] = prev;
>>               total_code += (32768 >> prev) * extra;
>> @@ -772,8 +779,10 @@ static int read_vlc_prefix(GetBitContext *gb, JXLEntropyDecoder *dec, JXLSymbolD
>>           }
>>       }
>>   
>> -    if (total_code != 32768 && level2_codecounts[0] < dist->alphabet_size - 1)
>> -        return AVERROR_INVALIDDATA;
>> +    if (total_code != 32768 && level2_codecounts[0] < dist->alphabet_size - 1) {
>> +        ret = AVERROR_INVALIDDATA;
>> +        goto end;
>> +    }
>>   
>>       for (int i = 1; i < dist->alphabet_size + 1; i++)
>>           level2_codecounts[i] += level2_codecounts[i - 1];
>> @@ -848,6 +857,8 @@ static int read_distribution_bundle(GetBitContext *gb, JXLEntropyDecoder *dec,
>>               if (get_bits1(gb)) {
>>                   int n = get_bits(gb, 4);
>>                   dist->alphabet_size = 1 + (1 << n) + get_bitsz(gb, n);
>> +                if (dist->alphabet_size > MAX_PREFIX_ALPHABET_SIZE)
>> +                    return AVERROR_INVALIDDATA;
>>               } else {
>>                   dist->alphabet_size = 1;
>>               }
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".