Re: [FFmpeg-devel] [PATCH v2] avcodec/ffv1dec: Don't set ThreadFrame properties, fix race

From: Michael Niedermayer <michael@niedermayer.cc>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH v2] avcodec/ffv1dec: Don't set ThreadFrame properties, fix race
Date: Fri, 4 Mar 2022 01:03:07 +0100
Message-ID: <20220304000307.GC2829255@pb2> (raw)
In-Reply-To: <AM7PR03MB666089E7D33FF266CFBE659E8F049@AM7PR03MB6660.eurprd03.prod.outlook.com>

[-- Attachment #1.1: Type: text/plain, Size: 9155 bytes --]

On Thu, Mar 03, 2022 at 08:55:08AM +0100, Andreas Rheinhardt wrote:
> Each FFV1 slice has its own SAR and picture structure encoded;
> when a slice header was parsed, the relevant fields of a ThreadFrame's
> AVFrame were directly set based upon the parsed values. This is
> a data race in case slice threading is in use because of the concurrent
> writes. In case of frame threading, it is also a data race because
> the writes happen after ff_thread_finish_setup(), so that the reads
> performed by ff_thread_ref_frame() are unsynchronized with the writes
> performed when parsing the header.
> 
> This commit fixes these issues by not writing to the ThreadFrame at all;
> instead the raw data is read into the each SliceContext first; after
> decoding the current frame and creating the actual output frame these
> values are compared to each other. If they are valid and coincide, the
> derived value is written directly to the output frame, not to the
> ThreadFrame, thereby avoiding data races; in case they are not valid
> or inconsistent the most commonly used valid value is used instead.
> 
> This fixes most FFV1 FATE-tests completely when using slice threading;
> the exceptions are fate-vsynth3-ffv1, vsynth3-ffv1-v3-yuv420p and
> vsynth3-ffv1-v3-yuv422p10. (In these tests the partitioning into slices
> does not honour chroma subsampling; as a result, chroma pixels at slice
> borders get set by more than one thread without any synchronization.)
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
>  libavcodec/ffv1.h    |   4 ++
>  libavcodec/ffv1dec.c | 119 +++++++++++++++++++++++++++++++++++--------
>  2 files changed, 103 insertions(+), 20 deletions(-)
> 
> diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
> index ac80fa85ce..f640d5a710 100644
> --- a/libavcodec/ffv1.h
> +++ b/libavcodec/ffv1.h
> @@ -91,6 +91,8 @@ typedef struct FFV1Context {
>      struct FFV1Context *fsrc;
>  
>      AVFrame *cur;
> +    int picture_structure;
> +    AVRational sample_aspect_ratio;
>      int plane_count;
>      int ac;                              ///< 1=range coder <-> 0=golomb rice
>      int ac_byte_count;                   ///< number of bytes used for AC coding
> @@ -132,6 +134,8 @@ typedef struct FFV1Context {
>      int slice_coding_mode;
>      int slice_rct_by_coef;
>      int slice_rct_ry_coef;
> +
> +    AVRational slice_sample_aspect_ratios[MAX_SLICES];
>  } FFV1Context;
>  
>  int ff_ffv1_common_init(AVCodecContext *avctx);
> diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
> index 201630167d..bda8a5a2f1 100644
> --- a/libavcodec/ffv1dec.c
> +++ b/libavcodec/ffv1dec.c
> @@ -167,7 +167,7 @@ static int decode_slice_header(const FFV1Context *f, FFV1Context *fs)
>  {
>      RangeCoder *c = &fs->c;
>      uint8_t state[CONTEXT_SIZE];
> -    unsigned ps, i, context_count;
> +    unsigned i, context_count;
>      memset(state, 128, sizeof(state));
>  
>      av_assert0(f->version > 2);
> @@ -205,25 +205,17 @@ static int decode_slice_header(const FFV1Context *f, FFV1Context *fs)
>          p->context_count = context_count;
>      }
>  
> -    ps = get_symbol(c, state, 0);
> -    if (ps == 1) {
> -        f->cur->interlaced_frame = 1;
> -        f->cur->top_field_first  = 1;
> -    } else if (ps == 2) {
> -        f->cur->interlaced_frame = 1;
> -        f->cur->top_field_first  = 0;
> -    } else if (ps == 3) {
> -        f->cur->interlaced_frame = 0;
> -    }
> -    f->cur->sample_aspect_ratio.num = get_symbol(c, state, 0);
> -    f->cur->sample_aspect_ratio.den = get_symbol(c, state, 0);
> -
> -    if (av_image_check_sar(f->width, f->height,
> -                           f->cur->sample_aspect_ratio) < 0) {
> +    fs->picture_structure       = get_symbol(c, state, 0);
> +    fs->sample_aspect_ratio.num = get_symbol(c, state, 0);
> +    fs->sample_aspect_ratio.den = get_symbol(c, state, 0);
> +    /* Num or den being zero means unknown for FFV1; our unknown is 0 / 1. */

0/0 is unknown in FFV1, 0/1 and 1/0 are treated as unknown because thats
the only reasonable thing one really could do if they are encountered

> +    if (fs->sample_aspect_ratio.num == 0 || fs->sample_aspect_ratio.den == 0) {
> +        fs->sample_aspect_ratio = (AVRational) { 0, 1 };
> +    } else if (av_image_check_sar(f->width, f->height,
> +                                  fs->sample_aspect_ratio) < 0) {
>          av_log(f->avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n",
> -               f->cur->sample_aspect_ratio.num,
> -               f->cur->sample_aspect_ratio.den);
> -        f->cur->sample_aspect_ratio = (AVRational){ 0, 1 };
> +               fs->sample_aspect_ratio.num, fs->sample_aspect_ratio.den);
> +        fs->sample_aspect_ratio = (AVRational) { 0, 0 };
>      }
>  
>      if (fs->version > 3) {
> @@ -251,6 +243,9 @@ static int decode_slice(AVCodecContext *c, void *arg)
>      AVFrame * const p = f->cur;
>      int i, si;
>  
> +    fs->picture_structure   = 0;
> +    fs->sample_aspect_ratio = (AVRational){ 0, 0 };
> +
>      for( si=0; fs != f->slice_context[si]; si ++)
>          ;
>  

> @@ -831,6 +826,21 @@ static av_cold int decode_init(AVCodecContext *avctx)
>      return 0;
>  }
>  
> +static int compare_sar(const void *a, const void *b)
> +{
> +    const AVRational x = *(const AVRational*)a;
> +    const AVRational y = *(const AVRational*)b;
> +    int cmp;
> +
> +    /* 0/0 means invalid and 0/1 means unknown.
> +     * Order the ratios so that these values are at the end. */
> +    if (x.num == 0 || y.num == 0)
> +        return y.num - x.num;
> +    cmp = av_cmp_q(x, y);
> +    /* For definiteness, order equivalent fractions by "lower terms last". */
> +    return cmp ? cmp : y.num - x.num;
> +}

you can probably simplify this if you map the 32/32 fraction into a 64bit int
also should be faster

i also wonder a bit if it would make sense to split this "find the most common
element" code out or if that makes no sense.

> +
>  static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
>  {
>      uint8_t *buf        = avpkt->data;
> @@ -969,9 +979,78 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
>  
>      if (f->last_picture.f)
>          ff_thread_release_ext_buffer(avctx, &f->last_picture);
> -    if ((ret = av_frame_ref(data, f->picture.f)) < 0)
> +    p = data;
> +    if ((ret = av_frame_ref(p, f->picture.f)) < 0)
>          return ret;
>  
> +    if (f->version > 2) {
> +        AVRational sar = f->slice_context[0]->sample_aspect_ratio;
> +        int pic_structures[4] = { 0 }, picture_structure;
> +        int inconsistent_sar = 0, has_sar = 0;
> +
> +        for (int i = 0; i < f->slice_count; i++) {
> +            const FFV1Context *const fs = f->slice_context[i];
> +            int slice_picture_structure = (unsigned)fs->picture_structure > 3 ?
> +                                                    0 : fs->picture_structure;
> +
> +            pic_structures[slice_picture_structure]++;
> +
> +            if (fs->sample_aspect_ratio.num != sar.num ||
> +                fs->sample_aspect_ratio.den != sar.den)
> +                inconsistent_sar = 1;
> +            sar = fs->sample_aspect_ratio;
> +            if (sar.num == 0)
> +                inconsistent_sar = 1;
> +            else
> +                has_sar = 1;
> +            f->slice_sample_aspect_ratios[i] = sar;
> +        }
> +        if (has_sar) {
> +            if (inconsistent_sar) {
> +                AVRational last_sar = (AVRational){ 0, 0 };
> +                qsort(f->slice_sample_aspect_ratios, f->slice_count,
> +                      sizeof(f->slice_sample_aspect_ratios[0]), compare_sar);
> +
> +                for (int i = 0, current_run = 0, longest_run = 0;
> +                     i < f->slice_count; i++) {
> +                    AVRational cur_sar = f->slice_sample_aspect_ratios[i];
> +
> +                    if (cur_sar.num == 0)
> +                        break;
> +                    if (av_cmp_q(cur_sar, last_sar))
> +                        current_run = 1;
> +                    else
> +                        current_run++;
> +                    if (current_run > longest_run) {
> +                        longest_run = current_run;
> +                        sar         = cur_sar;
> +                    }
> +                    last_sar = cur_sar;
> +                }
> +                av_log(avctx, AV_LOG_WARNING, "SAR inconsistent, using %u/%u\n",
> +                       sar.num, sar.den);
> +            }
> +            p->sample_aspect_ratio = sar;
> +        }

if there are 10 slices and 9 say aspect unknown and one says 5000:3
i would belive the 9 more, this code looks a bit like it would always favor
known over unknown

[...]

thx

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

What does censorship reveal? It reveals fear. -- Julian Assange

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".