Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/2] lavc: convert frame threading to the receive_frame() pattern
@ 2022-12-07 11:43 Timo Rothenpieler
  2022-12-07 11:43 ` [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
                   ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-07 11:43 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Anton Khirnov

From: Anton Khirnov <anton@khirnov.net>

Reorganize the code such that the frame threading code does not call the
decoders directly, but instead calls back into the generic decoding
code. This avoids duplicating the logic that wraps the decoder
invocation and will be useful in the following commits.
---
 libavcodec/decode.c        |  57 +++++---
 libavcodec/decode.h        |   7 +
 libavcodec/internal.h      |   7 +
 libavcodec/pthread_frame.c | 276 ++++++++++++++++++++++++-------------
 libavcodec/thread.h        |  18 +--
 5 files changed, 241 insertions(+), 124 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 6be2d3d6ed..bf3c0cbe0a 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -202,6 +202,10 @@ fail:
     return ret;
 }
 
+#if !HAVE_THREADS
+#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
+#endif
+
 int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -210,7 +214,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (avci->draining)
         return AVERROR_EOF;
 
-    ret = av_bsf_receive_packet(avci->bsf, pkt);
+    /* If we are a worker thread, get the next packet from the threading
+     * context. Otherwise we are the main (user-facing) context, so we get the
+     * next packet from the input filterchain.
+     */
+    if (avctx->internal->is_frame_mt)
+        ret = ff_thread_get_packet(avctx, pkt);
+    else
+        ret = av_bsf_receive_packet(avci->bsf, pkt);
     if (ret == AVERROR_EOF)
         avci->draining = 1;
     if (ret < 0)
@@ -295,30 +306,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_EOF;
 
     if (!pkt->data &&
-        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
-          avctx->active_thread_type & FF_THREAD_FRAME))
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
         return AVERROR_EOF;
 
     got_frame = 0;
 
-    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
-        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
-    } else {
-        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
-
-        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
-            frame->pkt_dts = pkt->dts;
-        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
-            if(!avctx->has_b_frames)
-                frame->pkt_pos = pkt->pos;
-            //FIXME these should be under if(!avctx->has_b_frames)
-            /* get_buffer is supposed to set frame parameters */
-            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
-                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
-                if (!frame->width)                    frame->width               = avctx->width;
-                if (!frame->height)                   frame->height              = avctx->height;
-                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
-            }
+    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
+
+    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+        frame->pkt_dts = pkt->dts;
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if(!avctx->has_b_frames)
+            frame->pkt_pos = pkt->pos;
+        //FIXME these should be under if(!avctx->has_b_frames)
+        /* get_buffer is supposed to set frame parameters */
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+            if (!frame->width)                    frame->width               = avctx->width;
+            if (!frame->height)                   frame->height              = avctx->height;
+            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
         }
     }
     emms_c();
@@ -568,7 +574,7 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVCodecInternal *avci = avctx->internal;
     const FFCodec *const codec = ffcodec(avctx->codec);
@@ -634,6 +640,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return ret;
 }
 
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    if (avctx->active_thread_type & FF_THREAD_FRAME)
+        return ff_thread_receive_frame(avctx, frame);
+    return ff_decode_receive_frame_internal(avctx, frame);
+}
+
 int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     AVCodecInternal *avci = avctx->internal;
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 5d95369b5e..34beb70f97 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
  */
 int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
+/**
+ * Do the actual decoding and obtain a decoded frame from the decoder, if
+ * available. When frame threading is used, this is invoked by the worker
+ * threads, otherwise by the top layer directly.
+ */
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
+
 /**
  * Called by decoders to get the next packet for decoding.
  *
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index 76a6ea6bc6..99e4bb3095 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
      */
     int is_copy;
 
+    /**
+     * This field is set to 1 when frame threading is being used and the parent
+     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
+     * one of those actually doing the decoding), 0 otherwise.
+     */
+    int is_frame_mt;
+
     /**
      * An audio frame with less than required samples has been submitted (and
      * potentially padded with silence). Reject all subsequent frames.
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index df82a4125f..08550fc728 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -46,6 +46,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/fifo.h"
 #include "libavutil/thread.h"
 
 enum {
@@ -73,6 +74,12 @@ enum {
     INITIALIZED,    ///< Thread has been properly set up
 };
 
+typedef struct DecodedFrames {
+    AVFrame  **f;
+    size_t  nb_f;
+    size_t  nb_f_allocated;
+} DecodedFrames;
+
 /**
  * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
  */
@@ -93,8 +100,10 @@ typedef struct PerThreadContext {
 
     AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
 
-    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
-    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    /**
+     * Decoded frames from a single decode iteration.
+     */
+    DecodedFrames df;
     int     result;                 ///< The result of the last codec decode/encode() call.
 
     atomic_int state;
@@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
     pthread_cond_t async_cond;
     int async_lock;
 
+    DecodedFrames df;
+    int result;
+
+    /**
+     * Packet to be submitted to the next thread for decoding.
+     */
+    AVPacket *next_pkt;
+
     int next_decoding;             ///< The next context to submit a packet to.
     int next_finished;             ///< The next context to return output from.
 
@@ -190,6 +207,51 @@ static void thread_set_name(PerThreadContext *p)
     ff_thread_setname(name);
 }
 
+// get a free frame to decode into
+static AVFrame *decoded_frames_get_free(DecodedFrames *df)
+{
+    if (df->nb_f == df->nb_f_allocated) {
+        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
+                                         sizeof(*df->f));
+        if (!tmp)
+            return NULL;
+        df->f = tmp;
+
+        df->f[df->nb_f] = av_frame_alloc();
+        if (!df->f[df->nb_f])
+            return NULL;
+
+        df->nb_f_allocated++;
+    }
+
+    av_frame_unref(df->f[df->nb_f]);
+    return df->f[df->nb_f];
+}
+
+static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
+{
+    AVFrame *tmp_frame = df->f[0];
+    av_frame_move_ref(dst, tmp_frame);
+    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
+    df->f[--df->nb_f] = tmp_frame;
+}
+
+static void decoded_frames_flush(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f; i++)
+        av_frame_unref(df->f[i]);
+    df->nb_f = 0;
+}
+
+static void decoded_frames_free(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f_allocated; i++)
+        av_frame_free(&df->f[i]);
+    av_freep(&df->f);
+    df->nb_f           = 0;
+    df->nb_f_allocated = 0;
+}
+
 /**
  * Codec worker thread.
  *
@@ -202,6 +264,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
     PerThreadContext *p = arg;
     AVCodecContext *avctx = p->avctx;
     const FFCodec *codec = ffcodec(avctx->codec);
+    int ret;
 
     thread_set_name(p);
 
@@ -236,16 +299,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
             p->hwaccel_serializing = 1;
         }
 
-        av_frame_unref(p->frame);
-        p->got_frame = 0;
-        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
+        ret = 0;
+        while (ret >= 0) {
+            AVFrame *frame;
 
-        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
-            ff_thread_release_buffer(avctx, p->frame);
+            /* get the frame which will store the output */
+            frame = decoded_frames_get_free(&p->df);
+            if (!frame) {
+                p->result = AVERROR(ENOMEM);
+                goto alloc_fail;
+            }
+
+            /* do the actual decoding */
+            ret = ff_decode_receive_frame_internal(avctx, frame);
+            if (ret == 0)
+                p->df.nb_f++;
+            else if (ret < 0 && frame->buf[0])
+                ff_thread_release_buffer(avctx, frame);
+
+            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
+        }
 
         if (atomic_load(&p->state) == STATE_SETTING_UP)
             ff_thread_finish_setup(avctx);
 
+alloc_fail:
         if (p->hwaccel_serializing) {
             /* wipe hwaccel state to avoid stale pointers lying around;
              * the state was transferred to FrameThreadContext in
@@ -433,23 +511,26 @@ static void release_delayed_buffers(PerThreadContext *p)
 #endif
 
 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-                         AVPacket *avpkt)
+                         AVPacket *in_pkt)
 {
     FrameThreadContext *fctx = p->parent;
     PerThreadContext *prev_thread = fctx->prev_thread;
-    const AVCodec *codec = p->avctx->codec;
-    int ret;
-
-    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
-        return 0;
+    int err;
 
     pthread_mutex_lock(&p->mutex);
 
-    ret = update_context_from_user(p->avctx, user_avctx);
-    if (ret) {
+    av_packet_unref(p->avpkt);
+    av_packet_move_ref(p->avpkt, in_pkt);
+
+    p->avctx->internal->draining      = user_avctx->internal->draining;
+    p->avctx->internal->draining_done = user_avctx->internal->draining_done;
+
+    err = update_context_from_user(p->avctx, user_avctx);
+    if (err < 0) {
         pthread_mutex_unlock(&p->mutex);
-        return ret;
+        return err;
     }
+
     atomic_store_explicit(&p->debug_threads,
                           (p->avctx->debug & FF_DEBUG_THREADS) != 0,
                           memory_order_relaxed);
@@ -459,7 +540,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
 #endif
 
     if (prev_thread) {
-        int err;
         if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
             pthread_mutex_lock(&prev_thread->progress_mutex);
             while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
@@ -480,14 +560,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
     FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
     FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
 
-    av_packet_unref(p->avpkt);
-    ret = av_packet_ref(p->avpkt, avpkt);
-    if (ret < 0) {
-        pthread_mutex_unlock(&p->mutex);
-        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
-        return ret;
-    }
-
     atomic_store(&p->state, STATE_SETTING_UP);
     pthread_cond_signal(&p->input_cond);
     pthread_mutex_unlock(&p->mutex);
@@ -531,57 +603,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
     fctx->prev_thread = p;
-    fctx->next_decoding++;
+    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
 
     return 0;
 }
 
-int ff_thread_decode_frame(AVCodecContext *avctx,
-                           AVFrame *picture, int *got_picture_ptr,
-                           AVPacket *avpkt)
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     FrameThreadContext *fctx = avctx->internal->thread_ctx;
-    int finished = fctx->next_finished;
-    PerThreadContext *p;
-    int err;
+    int ret = 0;
 
     /* release the async lock, permitting blocked hwaccel threads to
      * go forward while we are in this function */
     async_unlock(fctx);
 
-    /*
-     * Submit a packet to the next decoding thread.
-     */
-
-    p = &fctx->threads[fctx->next_decoding];
-    err = submit_packet(p, avctx, avpkt);
-    if (err)
-        goto finish;
-
-    /*
-     * If we're still receiving the initial packets, don't return a frame.
-     */
+    /* submit packets to threads while there are no buffered results to return */
+    while (!fctx->df.nb_f && !fctx->result) {
+        PerThreadContext *p;
 
-    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
-        fctx->delaying = 0;
+        /* get a packet to be submitted to the next thread */
+        av_packet_unref(fctx->next_pkt);
+        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
+        if (ret < 0 && ret != AVERROR_EOF)
+            goto finish;
 
-    if (fctx->delaying) {
-        *got_picture_ptr=0;
-        if (avpkt->size) {
-            err = avpkt->size;
+        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
+                            fctx->next_pkt);
+        if (ret < 0)
             goto finish;
-        }
-    }
 
-    /*
-     * Return the next available frame from the oldest thread.
-     * If we're at the end of the stream, then we have to skip threads that
-     * didn't output a frame/error, because we don't want to accidentally signal
-     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
-     */
+        /* do not return any frames until all threads have something to do */
+        if (fctx->next_decoding != fctx->next_finished &&
+            !avctx->internal->draining)
+            continue;
 
-    do {
-        p = &fctx->threads[finished++];
+        p                   = &fctx->threads[fctx->next_finished];
+        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
 
         if (atomic_load(&p->state) != STATE_INPUT_READY) {
             pthread_mutex_lock(&p->progress_mutex);
@@ -590,35 +647,26 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
             pthread_mutex_unlock(&p->progress_mutex);
         }
 
-        av_frame_move_ref(picture, p->frame);
-        *got_picture_ptr = p->got_frame;
-        picture->pkt_dts = p->avpkt->dts;
-        err = p->result;
-
-        /*
-         * A later call with avkpt->size == 0 may loop over all threads,
-         * including this one, searching for a frame/error to return before being
-         * stopped by the "finished != fctx->next_finished" condition.
-         * Make sure we don't mistakenly return the same frame/error again.
-         */
-        p->got_frame = 0;
-        p->result = 0;
-
-        if (finished >= avctx->thread_count) finished = 0;
-    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
+        fctx->result = p->result;
+        p->result    = 0;
 
-    update_context_from_thread(avctx, p->avctx, 1);
-
-    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+        if (p->df.nb_f)
+            FFSWAP(DecodedFrames, fctx->df, p->df);
+    }
 
-    fctx->next_finished = finished;
+    /* a thread may return multiple frames AND an error
+     * we first return all the frames, then the error */
+    if (fctx->df.nb_f) {
+        decoded_frames_pop(&fctx->df, frame);
+        ret = 0;
+    } else {
+        ret = fctx->result;
+        fctx->result = 0;
+    }
 
-    /* return the size of the consumed packet if no error occurred */
-    if (err >= 0)
-        err = avpkt->size;
 finish:
     async_lock(fctx);
-    return err;
+    return ret;
 }
 
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
@@ -718,7 +766,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
-        p->got_frame = 0;
     }
 
     async_lock(fctx);
@@ -772,6 +819,17 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
                 av_freep(&ctx->priv_data);
             }
 
+            if (ctx->internal->pkt_props) {
+                while (av_fifo_can_read(ctx->internal->pkt_props)) {
+                    av_packet_unref(ctx->internal->last_pkt_props);
+                    av_fifo_read(ctx->internal->pkt_props, ctx->internal->last_pkt_props, 1);
+                }
+                av_fifo_freep2(&ctx->internal->pkt_props);
+            }
+
+            av_packet_free(&ctx->internal->last_pkt_props);
+            av_packet_free(&ctx->internal->in_pkt);
+
             av_freep(&ctx->slice_offset);
 
             av_buffer_unref(&ctx->internal->pool);
@@ -779,7 +837,7 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
             av_buffer_unref(&ctx->hw_frames_ctx);
         }
 
-        av_frame_free(&p->frame);
+        decoded_frames_free(&p->df);
 
         ff_pthread_free(p, per_thread_offsets);
         av_packet_free(&p->avpkt);
@@ -787,6 +845,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
         av_freep(&p->avctx);
     }
 
+    decoded_frames_free(&fctx->df);
+    av_packet_free(&fctx->next_pkt);
+
     av_freep(&fctx->threads);
     ff_pthread_free(fctx, thread_ctx_offsets);
 
@@ -845,14 +906,26 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (err < 0)
         return err;
 
-    if (!(p->frame = av_frame_alloc()) ||
-        !(p->avpkt = av_packet_alloc()))
+    if (!(p->avpkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
-    copy->internal->last_pkt_props = p->avpkt;
 
+    copy->internal->is_frame_mt = 1;
     if (!first)
         copy->internal->is_copy = 1;
 
+    copy->internal->in_pkt = av_packet_alloc();
+    if (!copy->internal->in_pkt)
+        return AVERROR(ENOMEM);
+
+    copy->internal->last_pkt_props = av_packet_alloc();
+    if (!copy->internal->last_pkt_props)
+        return AVERROR(ENOMEM);
+
+    copy->internal->pkt_props = av_fifo_alloc2(1, sizeof(*copy->internal->last_pkt_props),
+                                               AV_FIFO_FLAG_AUTO_GROW);
+    if (!copy->internal->pkt_props)
+        return AVERROR(ENOMEM);
+
     if (codec->init) {
         err = codec->init(copy);
         if (err < 0) {
@@ -908,6 +981,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         return err;
     }
 
+    fctx->next_pkt = av_packet_alloc();
+    if (!fctx->next_pkt)
+        return AVERROR(ENOMEM);
+
     fctx->async_lock = 1;
     fctx->delaying = 1;
 
@@ -952,12 +1029,13 @@ void ff_thread_flush(AVCodecContext *avctx)
     fctx->next_decoding = fctx->next_finished = 0;
     fctx->delaying = 1;
     fctx->prev_thread = NULL;
+
+    decoded_frames_flush(&fctx->df);
+
     for (i = 0; i < avctx->thread_count; i++) {
         PerThreadContext *p = &fctx->threads[i];
-        // Make sure decode flush calls with size=0 won't return old frames
-        p->got_frame = 0;
-        av_frame_unref(p->frame);
-        p->result = 0;
+
+        decoded_frames_flush(&p->df);
 
 #if FF_API_THREAD_SAFE_CALLBACKS
         release_delayed_buffers(p);
@@ -1181,3 +1259,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
     f->owner[0] = f->owner[1] = NULL;
     ff_thread_release_buffer(avctx, f->f);
 }
+
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (p->avpkt->buf) {
+        av_packet_move_ref(pkt, p->avpkt);
+        return 0;
+    }
+
+    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+}
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index d5673f25ea..7ae69990fb 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -40,17 +40,12 @@
 void ff_thread_flush(AVCodecContext *avctx);
 
 /**
- * Submit a new frame to a decoding thread.
- * Returns the next available frame in picture. *got_picture_ptr
- * will be 0 if none is available.
- * The return value on success is the size of the consumed packet for
- * compatibility with FFCodec.decode. This means the decoder
- * has to consume the full packet.
+ * Submit available packets for decoding to worker threads, return a
+ * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
  *
- * Parameters are the same as FFCodec.decode.
+ * Parameters are the same as FFCodec.receive_frame.
  */
-int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
-                           int *got_picture_ptr, AVPacket *avpkt);
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
 /**
  * If the codec defines update_thread_context(), call this
@@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
  */
 void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
 
+/**
+ * Get a packet for decoding. This gets invoked by the worker threads.
+ */
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_thread_init(AVCodecContext *s);
 int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
         int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-07 11:43 [FFmpeg-devel] [PATCH 1/2] lavc: convert frame threading to the receive_frame() pattern Timo Rothenpieler
@ 2022-12-07 11:43 ` Timo Rothenpieler
  2022-12-07 13:14 ` [FFmpeg-devel] [PATCH v2] lavc: convert frame threading to the receive_frame() pattern Timo Rothenpieler
  2022-12-07 13:20 ` [FFmpeg-devel] [PATCH v3] " Timo Rothenpieler
  2 siblings, 0 replies; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-07 11:43 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Timo Rothenpieler

In my tests, this lead to a notable speed increase with the amount
of threads used. Decoding a 720p sample gave the following results:

1 Thread: 1428 FPS
2 Threads: 2501 FPS
8 Threads: 7575 FPS
Automatic: 11326 FPS (On a 16 Core/32 Threads system)
---
 libavcodec/jpeglsdec.c |  2 +-
 libavcodec/mjpegdec.c  | 11 ++++++-----
 libavcodec/sp5xdec.c   |  4 ++--
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index 2e6d018ea6..c0642e8e30 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -559,7 +559,7 @@ const FFCodec ff_jpegls_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SETS_PKT_DTS,
 };
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 9b7465abe7..54605e04cb 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -54,6 +54,7 @@
 #include "exif.h"
 #include "bytestream.h"
 #include "tiff_common.h"
+#include "thread.h"
 
 
 static int init_default_huffman_tables(MJpegDecodeContext *s)
@@ -713,7 +714,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
                 s->avctx->pix_fmt,
                 AV_PIX_FMT_NONE,
             };
-            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
+            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx, pix_fmts);
             if (s->hwaccel_pix_fmt < 0)
                 return AVERROR(EINVAL);
 
@@ -729,7 +730,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         }
 
         av_frame_unref(s->picture_ptr);
-        if (ff_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
+        if (ff_thread_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
             return -1;
         s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
         s->picture_ptr->key_frame = 1;
@@ -3020,7 +3021,7 @@ const FFCodec ff_mjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .p.priv_class   = &mjpegdec_class,
     .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
@@ -3050,7 +3051,7 @@ const FFCodec ff_thp_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SETS_PKT_DTS,
@@ -3068,7 +3069,7 @@ const FFCodec ff_smvjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
                       FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index 394448c5a9..8b08dc672a 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -101,7 +101,7 @@ const FFCodec ff_sp5x_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SETS_PKT_DTS,
@@ -118,7 +118,7 @@ const FFCodec ff_amv_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
     .p.max_lowres   = 3,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SETS_PKT_DTS,
 };
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH v2] lavc: convert frame threading to the receive_frame() pattern
  2022-12-07 11:43 [FFmpeg-devel] [PATCH 1/2] lavc: convert frame threading to the receive_frame() pattern Timo Rothenpieler
  2022-12-07 11:43 ` [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
@ 2022-12-07 13:14 ` Timo Rothenpieler
  2022-12-07 13:20 ` [FFmpeg-devel] [PATCH v3] " Timo Rothenpieler
  2 siblings, 0 replies; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-07 13:14 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Anton Khirnov

From: Anton Khirnov <anton@khirnov.net>

Reorganize the code such that the frame threading code does not call the
decoders directly, but instead calls back into the generic decoding
code. This avoids duplicating the logic that wraps the decoder
invocation and will be useful in the following commits.
---
 libavcodec/decode.c        |  57 +++++---
 libavcodec/decode.h        |   7 +
 libavcodec/internal.h      |   7 +
 libavcodec/pthread_frame.c | 269 ++++++++++++++++++++++++-------------
 libavcodec/thread.h        |  18 +--
 5 files changed, 235 insertions(+), 123 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index b184c3f55b..ce0b2830bd 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -180,6 +180,10 @@ fail:
     return ret;
 }
 
+#if !HAVE_THREADS
+#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
+#endif
+
 int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -188,7 +192,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (avci->draining)
         return AVERROR_EOF;
 
-    ret = av_bsf_receive_packet(avci->bsf, pkt);
+    /* If we are a worker thread, get the next packet from the threading
+     * context. Otherwise we are the main (user-facing) context, so we get the
+     * next packet from the input filterchain.
+     */
+    if (avctx->internal->is_frame_mt)
+        ret = ff_thread_get_packet(avctx, pkt);
+    else
+        ret = av_bsf_receive_packet(avci->bsf, pkt);
     if (ret == AVERROR_EOF)
         avci->draining = 1;
     if (ret < 0)
@@ -273,30 +284,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_EOF;
 
     if (!pkt->data &&
-        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
-          avctx->active_thread_type & FF_THREAD_FRAME))
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
         return AVERROR_EOF;
 
     got_frame = 0;
 
-    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
-        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
-    } else {
-        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
-
-        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
-            frame->pkt_dts = pkt->dts;
-        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
-            if(!avctx->has_b_frames)
-                frame->pkt_pos = pkt->pos;
-            //FIXME these should be under if(!avctx->has_b_frames)
-            /* get_buffer is supposed to set frame parameters */
-            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
-                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
-                if (!frame->width)                    frame->width               = avctx->width;
-                if (!frame->height)                   frame->height              = avctx->height;
-                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
-            }
+    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
+
+    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+        frame->pkt_dts = pkt->dts;
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if(!avctx->has_b_frames)
+            frame->pkt_pos = pkt->pos;
+        //FIXME these should be under if(!avctx->has_b_frames)
+        /* get_buffer is supposed to set frame parameters */
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+            if (!frame->width)                    frame->width               = avctx->width;
+            if (!frame->height)                   frame->height              = avctx->height;
+            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
         }
     }
     emms_c();
@@ -546,7 +552,7 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVCodecInternal *avci = avctx->internal;
     const FFCodec *const codec = ffcodec(avctx->codec);
@@ -604,6 +610,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return ret;
 }
 
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    if (avctx->active_thread_type & FF_THREAD_FRAME)
+        return ff_thread_receive_frame(avctx, frame);
+    return ff_decode_receive_frame_internal(avctx, frame);
+}
+
 int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     AVCodecInternal *avci = avctx->internal;
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 5d95369b5e..34beb70f97 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
  */
 int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
+/**
+ * Do the actual decoding and obtain a decoded frame from the decoder, if
+ * available. When frame threading is used, this is invoked by the worker
+ * threads, otherwise by the top layer directly.
+ */
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
+
 /**
  * Called by decoders to get the next packet for decoding.
  *
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index a283c52e01..c87036efc4 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
      */
     int is_copy;
 
+    /**
+     * This field is set to 1 when frame threading is being used and the parent
+     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
+     * one of those actually doing the decoding), 0 otherwise.
+     */
+    int is_frame_mt;
+
     /**
      * An audio frame with less than required samples has been submitted (and
      * potentially padded with silence). Reject all subsequent frames.
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 62a0b18a8a..dd395a42d2 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -46,6 +46,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/fifo.h"
 #include "libavutil/thread.h"
 
 enum {
@@ -73,6 +74,12 @@ enum {
     INITIALIZED,    ///< Thread has been properly set up
 };
 
+typedef struct DecodedFrames {
+    AVFrame  **f;
+    size_t  nb_f;
+    size_t  nb_f_allocated;
+} DecodedFrames;
+
 /**
  * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
  */
@@ -93,8 +100,10 @@ typedef struct PerThreadContext {
 
     AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
 
-    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
-    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    /**
+     * Decoded frames from a single decode iteration.
+     */
+    DecodedFrames df;
     int     result;                 ///< The result of the last codec decode/encode() call.
 
     atomic_int state;
@@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
     pthread_cond_t async_cond;
     int async_lock;
 
+    DecodedFrames df;
+    int result;
+
+    /**
+     * Packet to be submitted to the next thread for decoding.
+     */
+    AVPacket *next_pkt;
+
     int next_decoding;             ///< The next context to submit a packet to.
     int next_finished;             ///< The next context to return output from.
 
@@ -190,6 +207,51 @@ static void thread_set_name(PerThreadContext *p)
     ff_thread_setname(name);
 }
 
+// get a free frame to decode into
+static AVFrame *decoded_frames_get_free(DecodedFrames *df)
+{
+    if (df->nb_f == df->nb_f_allocated) {
+        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
+                                         sizeof(*df->f));
+        if (!tmp)
+            return NULL;
+        df->f = tmp;
+
+        df->f[df->nb_f] = av_frame_alloc();
+        if (!df->f[df->nb_f])
+            return NULL;
+
+        df->nb_f_allocated++;
+    }
+
+    av_frame_unref(df->f[df->nb_f]);
+    return df->f[df->nb_f];
+}
+
+static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
+{
+    AVFrame *tmp_frame = df->f[0];
+    av_frame_move_ref(dst, tmp_frame);
+    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
+    df->f[--df->nb_f] = tmp_frame;
+}
+
+static void decoded_frames_flush(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f; i++)
+        av_frame_unref(df->f[i]);
+    df->nb_f = 0;
+}
+
+static void decoded_frames_free(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f_allocated; i++)
+        av_frame_free(&df->f[i]);
+    av_freep(&df->f);
+    df->nb_f           = 0;
+    df->nb_f_allocated = 0;
+}
+
 /**
  * Codec worker thread.
  *
@@ -202,6 +264,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
     PerThreadContext *p = arg;
     AVCodecContext *avctx = p->avctx;
     const FFCodec *codec = ffcodec(avctx->codec);
+    int ret;
 
     thread_set_name(p);
 
@@ -236,16 +299,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
             p->hwaccel_serializing = 1;
         }
 
-        av_frame_unref(p->frame);
-        p->got_frame = 0;
-        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
+        ret = 0;
+        while (ret >= 0) {
+            AVFrame *frame;
+
+            /* get the frame which will store the output */
+            frame = decoded_frames_get_free(&p->df);
+            if (!frame) {
+                p->result = AVERROR(ENOMEM);
+                goto alloc_fail;
+            }
+
+            /* do the actual decoding */
+            ret = ff_decode_receive_frame_internal(avctx, frame);
+            if (ret == 0)
+                p->df.nb_f++;
+            else if (ret < 0 && frame->buf[0])
+                ff_thread_release_buffer(avctx, frame);
 
-        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
-            ff_thread_release_buffer(avctx, p->frame);
+            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
+        }
 
         if (atomic_load(&p->state) == STATE_SETTING_UP)
             ff_thread_finish_setup(avctx);
 
+alloc_fail:
         if (p->hwaccel_serializing) {
             /* wipe hwaccel state to avoid stale pointers lying around;
              * the state was transferred to FrameThreadContext in
@@ -414,6 +492,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (err < 0)
         return err;
 
+    dst->internal->draining      = src->internal->draining;
+    dst->internal->draining_done = src->internal->draining_done;
+
     return 0;
 }
 
@@ -441,23 +522,23 @@ static void release_delayed_buffers(PerThreadContext *p)
 #endif
 
 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-                         AVPacket *avpkt)
+                         AVPacket *in_pkt)
 {
     FrameThreadContext *fctx = p->parent;
     PerThreadContext *prev_thread = fctx->prev_thread;
-    const AVCodec *codec = p->avctx->codec;
-    int ret;
-
-    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
-        return 0;
+    int err;
 
     pthread_mutex_lock(&p->mutex);
 
-    ret = update_context_from_user(p->avctx, user_avctx);
-    if (ret) {
+    av_packet_unref(p->avpkt);
+    av_packet_move_ref(p->avpkt, in_pkt);
+
+    err = update_context_from_user(p->avctx, user_avctx);
+    if (err < 0) {
         pthread_mutex_unlock(&p->mutex);
-        return ret;
+        return err;
     }
+
     atomic_store_explicit(&p->debug_threads,
                           (p->avctx->debug & FF_DEBUG_THREADS) != 0,
                           memory_order_relaxed);
@@ -467,7 +548,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
 #endif
 
     if (prev_thread) {
-        int err;
         if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
             pthread_mutex_lock(&prev_thread->progress_mutex);
             while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
@@ -488,14 +568,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
     FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
     FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
 
-    av_packet_unref(p->avpkt);
-    ret = av_packet_ref(p->avpkt, avpkt);
-    if (ret < 0) {
-        pthread_mutex_unlock(&p->mutex);
-        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
-        return ret;
-    }
-
     atomic_store(&p->state, STATE_SETTING_UP);
     pthread_cond_signal(&p->input_cond);
     pthread_mutex_unlock(&p->mutex);
@@ -539,57 +611,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
     fctx->prev_thread = p;
-    fctx->next_decoding++;
+    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
 
     return 0;
 }
 
-int ff_thread_decode_frame(AVCodecContext *avctx,
-                           AVFrame *picture, int *got_picture_ptr,
-                           AVPacket *avpkt)
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     FrameThreadContext *fctx = avctx->internal->thread_ctx;
-    int finished = fctx->next_finished;
-    PerThreadContext *p;
-    int err;
+    int ret = 0;
 
     /* release the async lock, permitting blocked hwaccel threads to
      * go forward while we are in this function */
     async_unlock(fctx);
 
-    /*
-     * Submit a packet to the next decoding thread.
-     */
-
-    p = &fctx->threads[fctx->next_decoding];
-    err = submit_packet(p, avctx, avpkt);
-    if (err)
-        goto finish;
+    /* submit packets to threads while there are no buffered results to return */
+    while (!fctx->df.nb_f && !fctx->result) {
+        PerThreadContext *p;
 
-    /*
-     * If we're still receiving the initial packets, don't return a frame.
-     */
-
-    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
-        fctx->delaying = 0;
+        /* get a packet to be submitted to the next thread */
+        av_packet_unref(fctx->next_pkt);
+        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
+        if (ret < 0 && ret != AVERROR_EOF)
+            goto finish;
 
-    if (fctx->delaying) {
-        *got_picture_ptr=0;
-        if (avpkt->size) {
-            err = avpkt->size;
+        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
+                            fctx->next_pkt);
+        if (ret < 0)
             goto finish;
-        }
-    }
 
-    /*
-     * Return the next available frame from the oldest thread.
-     * If we're at the end of the stream, then we have to skip threads that
-     * didn't output a frame/error, because we don't want to accidentally signal
-     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
-     */
+        /* do not return any frames until all threads have something to do */
+        if (fctx->next_decoding != fctx->next_finished &&
+            !avctx->internal->draining)
+            continue;
 
-    do {
-        p = &fctx->threads[finished++];
+        p                   = &fctx->threads[fctx->next_finished];
+        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
 
         if (atomic_load(&p->state) != STATE_INPUT_READY) {
             pthread_mutex_lock(&p->progress_mutex);
@@ -598,35 +655,26 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
             pthread_mutex_unlock(&p->progress_mutex);
         }
 
-        av_frame_move_ref(picture, p->frame);
-        *got_picture_ptr = p->got_frame;
-        picture->pkt_dts = p->avpkt->dts;
-        err = p->result;
-
-        /*
-         * A later call with avkpt->size == 0 may loop over all threads,
-         * including this one, searching for a frame/error to return before being
-         * stopped by the "finished != fctx->next_finished" condition.
-         * Make sure we don't mistakenly return the same frame/error again.
-         */
-        p->got_frame = 0;
-        p->result = 0;
-
-        if (finished >= avctx->thread_count) finished = 0;
-    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
+        fctx->result = p->result;
+        p->result    = 0;
 
-    update_context_from_thread(avctx, p->avctx, 1);
-
-    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+        if (p->df.nb_f)
+            FFSWAP(DecodedFrames, fctx->df, p->df);
+    }
 
-    fctx->next_finished = finished;
+    /* a thread may return multiple frames AND an error
+     * we first return all the frames, then the error */
+    if (fctx->df.nb_f) {
+        decoded_frames_pop(&fctx->df, frame);
+        ret = 0;
+    } else {
+        ret = fctx->result;
+        fctx->result = 0;
+    }
 
-    /* return the size of the consumed packet if no error occurred */
-    if (err >= 0)
-        err = avpkt->size;
 finish:
     async_lock(fctx);
-    return err;
+    return ret;
 }
 
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
@@ -726,7 +774,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
-        p->got_frame = 0;
     }
 
     async_lock(fctx);
@@ -780,15 +827,24 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
                 av_freep(&ctx->priv_data);
             }
 
+            if (ctx->internal->pkt_props) {
+                while (av_fifo_can_read(ctx->internal->pkt_props)) {
+                    av_packet_unref(ctx->internal->last_pkt_props);
+                    av_fifo_read(ctx->internal->pkt_props, ctx->internal->last_pkt_props, 1);
+                }
+                av_fifo_freep2(&ctx->internal->pkt_props);
+            }
+
             av_freep(&ctx->slice_offset);
 
             av_buffer_unref(&ctx->internal->pool);
             av_packet_free(&ctx->internal->last_pkt_props);
+            av_packet_free(&ctx->internal->in_pkt);
             av_freep(&ctx->internal);
             av_buffer_unref(&ctx->hw_frames_ctx);
         }
 
-        av_frame_free(&p->frame);
+        decoded_frames_free(&p->df);
 
         ff_pthread_free(p, per_thread_offsets);
         av_packet_free(&p->avpkt);
@@ -796,6 +852,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
         av_freep(&p->avctx);
     }
 
+    decoded_frames_free(&fctx->df);
+    av_packet_free(&fctx->next_pkt);
+
     av_freep(&fctx->threads);
     ff_pthread_free(fctx, thread_ctx_offsets);
 
@@ -854,17 +913,26 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (err < 0)
         return err;
 
-    if (!(p->frame = av_frame_alloc()) ||
-        !(p->avpkt = av_packet_alloc()))
+    if (!(p->avpkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
 
+    copy->internal->is_frame_mt = 1;
     if (!first)
         copy->internal->is_copy = 1;
 
+    copy->internal->in_pkt = av_packet_alloc();
+    if (!copy->internal->in_pkt)
+        return AVERROR(ENOMEM);
+
     copy->internal->last_pkt_props = av_packet_alloc();
     if (!copy->internal->last_pkt_props)
         return AVERROR(ENOMEM);
 
+    copy->internal->pkt_props = av_fifo_alloc2(1, sizeof(*copy->internal->last_pkt_props),
+                                               AV_FIFO_FLAG_AUTO_GROW);
+    if (!copy->internal->pkt_props)
+        return AVERROR(ENOMEM);
+
     if (codec->init) {
         err = codec->init(copy);
         if (err < 0) {
@@ -920,6 +988,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         return err;
     }
 
+    fctx->next_pkt = av_packet_alloc();
+    if (!fctx->next_pkt)
+        return AVERROR(ENOMEM);
+
     fctx->async_lock = 1;
     fctx->delaying = 1;
 
@@ -964,12 +1036,13 @@ void ff_thread_flush(AVCodecContext *avctx)
     fctx->next_decoding = fctx->next_finished = 0;
     fctx->delaying = 1;
     fctx->prev_thread = NULL;
+
+    decoded_frames_flush(&fctx->df);
+
     for (i = 0; i < avctx->thread_count; i++) {
         PerThreadContext *p = &fctx->threads[i];
-        // Make sure decode flush calls with size=0 won't return old frames
-        p->got_frame = 0;
-        av_frame_unref(p->frame);
-        p->result = 0;
+
+        decoded_frames_flush(&p->df);
 
 #if FF_API_THREAD_SAFE_CALLBACKS
         release_delayed_buffers(p);
@@ -1193,3 +1266,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
     f->owner[0] = f->owner[1] = NULL;
     ff_thread_release_buffer(avctx, f->f);
 }
+
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (p->avpkt->buf) {
+        av_packet_move_ref(pkt, p->avpkt);
+        return 0;
+    }
+
+    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+}
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index d5673f25ea..7ae69990fb 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -40,17 +40,12 @@
 void ff_thread_flush(AVCodecContext *avctx);
 
 /**
- * Submit a new frame to a decoding thread.
- * Returns the next available frame in picture. *got_picture_ptr
- * will be 0 if none is available.
- * The return value on success is the size of the consumed packet for
- * compatibility with FFCodec.decode. This means the decoder
- * has to consume the full packet.
+ * Submit available packets for decoding to worker threads, return a
+ * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
  *
- * Parameters are the same as FFCodec.decode.
+ * Parameters are the same as FFCodec.receive_frame.
  */
-int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
-                           int *got_picture_ptr, AVPacket *avpkt);
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
 /**
  * If the codec defines update_thread_context(), call this
@@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
  */
 void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
 
+/**
+ * Get a packet for decoding. This gets invoked by the worker threads.
+ */
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_thread_init(AVCodecContext *s);
 int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
         int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH v3] lavc: convert frame threading to the receive_frame() pattern
  2022-12-07 11:43 [FFmpeg-devel] [PATCH 1/2] lavc: convert frame threading to the receive_frame() pattern Timo Rothenpieler
  2022-12-07 11:43 ` [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
  2022-12-07 13:14 ` [FFmpeg-devel] [PATCH v2] lavc: convert frame threading to the receive_frame() pattern Timo Rothenpieler
@ 2022-12-07 13:20 ` Timo Rothenpieler
  2022-12-07 22:22   ` Michael Niedermayer
  2 siblings, 1 reply; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-07 13:20 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Anton Khirnov

From: Anton Khirnov <anton@khirnov.net>

Reorganize the code such that the frame threading code does not call the
decoders directly, but instead calls back into the generic decoding
code. This avoids duplicating the logic that wraps the decoder
invocation and will be useful in the following commits.
---
 libavcodec/decode.c        |  57 +++++----
 libavcodec/decode.h        |   7 +
 libavcodec/internal.h      |   7 +
 libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
 libavcodec/thread.h        |  18 +--
 5 files changed, 222 insertions(+), 123 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index b184c3f55b..ce0b2830bd 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -180,6 +180,10 @@ fail:
     return ret;
 }
 
+#if !HAVE_THREADS
+#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
+#endif
+
 int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -188,7 +192,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (avci->draining)
         return AVERROR_EOF;
 
-    ret = av_bsf_receive_packet(avci->bsf, pkt);
+    /* If we are a worker thread, get the next packet from the threading
+     * context. Otherwise we are the main (user-facing) context, so we get the
+     * next packet from the input filterchain.
+     */
+    if (avctx->internal->is_frame_mt)
+        ret = ff_thread_get_packet(avctx, pkt);
+    else
+        ret = av_bsf_receive_packet(avci->bsf, pkt);
     if (ret == AVERROR_EOF)
         avci->draining = 1;
     if (ret < 0)
@@ -273,30 +284,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_EOF;
 
     if (!pkt->data &&
-        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
-          avctx->active_thread_type & FF_THREAD_FRAME))
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
         return AVERROR_EOF;
 
     got_frame = 0;
 
-    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
-        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
-    } else {
-        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
-
-        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
-            frame->pkt_dts = pkt->dts;
-        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
-            if(!avctx->has_b_frames)
-                frame->pkt_pos = pkt->pos;
-            //FIXME these should be under if(!avctx->has_b_frames)
-            /* get_buffer is supposed to set frame parameters */
-            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
-                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
-                if (!frame->width)                    frame->width               = avctx->width;
-                if (!frame->height)                   frame->height              = avctx->height;
-                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
-            }
+    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
+
+    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+        frame->pkt_dts = pkt->dts;
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if(!avctx->has_b_frames)
+            frame->pkt_pos = pkt->pos;
+        //FIXME these should be under if(!avctx->has_b_frames)
+        /* get_buffer is supposed to set frame parameters */
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+            if (!frame->width)                    frame->width               = avctx->width;
+            if (!frame->height)                   frame->height              = avctx->height;
+            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
         }
     }
     emms_c();
@@ -546,7 +552,7 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVCodecInternal *avci = avctx->internal;
     const FFCodec *const codec = ffcodec(avctx->codec);
@@ -604,6 +610,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return ret;
 }
 
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    if (avctx->active_thread_type & FF_THREAD_FRAME)
+        return ff_thread_receive_frame(avctx, frame);
+    return ff_decode_receive_frame_internal(avctx, frame);
+}
+
 int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     AVCodecInternal *avci = avctx->internal;
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 5d95369b5e..34beb70f97 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
  */
 int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
+/**
+ * Do the actual decoding and obtain a decoded frame from the decoder, if
+ * available. When frame threading is used, this is invoked by the worker
+ * threads, otherwise by the top layer directly.
+ */
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
+
 /**
  * Called by decoders to get the next packet for decoding.
  *
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index a283c52e01..c87036efc4 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
      */
     int is_copy;
 
+    /**
+     * This field is set to 1 when frame threading is being used and the parent
+     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
+     * one of those actually doing the decoding), 0 otherwise.
+     */
+    int is_frame_mt;
+
     /**
      * An audio frame with less than required samples has been submitted (and
      * potentially padded with silence). Reject all subsequent frames.
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 62a0b18a8a..7c7998b675 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -46,6 +46,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/fifo.h"
 #include "libavutil/thread.h"
 
 enum {
@@ -73,6 +74,12 @@ enum {
     INITIALIZED,    ///< Thread has been properly set up
 };
 
+typedef struct DecodedFrames {
+    AVFrame  **f;
+    size_t  nb_f;
+    size_t  nb_f_allocated;
+} DecodedFrames;
+
 /**
  * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
  */
@@ -93,8 +100,10 @@ typedef struct PerThreadContext {
 
     AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
 
-    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
-    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    /**
+     * Decoded frames from a single decode iteration.
+     */
+    DecodedFrames df;
     int     result;                 ///< The result of the last codec decode/encode() call.
 
     atomic_int state;
@@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
     pthread_cond_t async_cond;
     int async_lock;
 
+    DecodedFrames df;
+    int result;
+
+    /**
+     * Packet to be submitted to the next thread for decoding.
+     */
+    AVPacket *next_pkt;
+
     int next_decoding;             ///< The next context to submit a packet to.
     int next_finished;             ///< The next context to return output from.
 
@@ -190,6 +207,51 @@ static void thread_set_name(PerThreadContext *p)
     ff_thread_setname(name);
 }
 
+// get a free frame to decode into
+static AVFrame *decoded_frames_get_free(DecodedFrames *df)
+{
+    if (df->nb_f == df->nb_f_allocated) {
+        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
+                                         sizeof(*df->f));
+        if (!tmp)
+            return NULL;
+        df->f = tmp;
+
+        df->f[df->nb_f] = av_frame_alloc();
+        if (!df->f[df->nb_f])
+            return NULL;
+
+        df->nb_f_allocated++;
+    }
+
+    av_frame_unref(df->f[df->nb_f]);
+    return df->f[df->nb_f];
+}
+
+static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
+{
+    AVFrame *tmp_frame = df->f[0];
+    av_frame_move_ref(dst, tmp_frame);
+    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
+    df->f[--df->nb_f] = tmp_frame;
+}
+
+static void decoded_frames_flush(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f; i++)
+        av_frame_unref(df->f[i]);
+    df->nb_f = 0;
+}
+
+static void decoded_frames_free(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f_allocated; i++)
+        av_frame_free(&df->f[i]);
+    av_freep(&df->f);
+    df->nb_f           = 0;
+    df->nb_f_allocated = 0;
+}
+
 /**
  * Codec worker thread.
  *
@@ -202,6 +264,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
     PerThreadContext *p = arg;
     AVCodecContext *avctx = p->avctx;
     const FFCodec *codec = ffcodec(avctx->codec);
+    int ret;
 
     thread_set_name(p);
 
@@ -236,16 +299,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
             p->hwaccel_serializing = 1;
         }
 
-        av_frame_unref(p->frame);
-        p->got_frame = 0;
-        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
+        ret = 0;
+        while (ret >= 0) {
+            AVFrame *frame;
+
+            /* get the frame which will store the output */
+            frame = decoded_frames_get_free(&p->df);
+            if (!frame) {
+                p->result = AVERROR(ENOMEM);
+                goto alloc_fail;
+            }
+
+            /* do the actual decoding */
+            ret = ff_decode_receive_frame_internal(avctx, frame);
+            if (ret == 0)
+                p->df.nb_f++;
+            else if (ret < 0 && frame->buf[0])
+                ff_thread_release_buffer(avctx, frame);
 
-        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
-            ff_thread_release_buffer(avctx, p->frame);
+            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
+        }
 
         if (atomic_load(&p->state) == STATE_SETTING_UP)
             ff_thread_finish_setup(avctx);
 
+alloc_fail:
         if (p->hwaccel_serializing) {
             /* wipe hwaccel state to avoid stale pointers lying around;
              * the state was transferred to FrameThreadContext in
@@ -414,6 +492,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (err < 0)
         return err;
 
+    dst->internal->draining      = src->internal->draining;
+    dst->internal->draining_done = src->internal->draining_done;
+
     return 0;
 }
 
@@ -441,23 +522,23 @@ static void release_delayed_buffers(PerThreadContext *p)
 #endif
 
 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-                         AVPacket *avpkt)
+                         AVPacket *in_pkt)
 {
     FrameThreadContext *fctx = p->parent;
     PerThreadContext *prev_thread = fctx->prev_thread;
-    const AVCodec *codec = p->avctx->codec;
-    int ret;
-
-    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
-        return 0;
+    int err;
 
     pthread_mutex_lock(&p->mutex);
 
-    ret = update_context_from_user(p->avctx, user_avctx);
-    if (ret) {
+    av_packet_unref(p->avpkt);
+    av_packet_move_ref(p->avpkt, in_pkt);
+
+    err = update_context_from_user(p->avctx, user_avctx);
+    if (err < 0) {
         pthread_mutex_unlock(&p->mutex);
-        return ret;
+        return err;
     }
+
     atomic_store_explicit(&p->debug_threads,
                           (p->avctx->debug & FF_DEBUG_THREADS) != 0,
                           memory_order_relaxed);
@@ -467,7 +548,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
 #endif
 
     if (prev_thread) {
-        int err;
         if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
             pthread_mutex_lock(&prev_thread->progress_mutex);
             while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
@@ -488,14 +568,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
     FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
     FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
 
-    av_packet_unref(p->avpkt);
-    ret = av_packet_ref(p->avpkt, avpkt);
-    if (ret < 0) {
-        pthread_mutex_unlock(&p->mutex);
-        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
-        return ret;
-    }
-
     atomic_store(&p->state, STATE_SETTING_UP);
     pthread_cond_signal(&p->input_cond);
     pthread_mutex_unlock(&p->mutex);
@@ -539,57 +611,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
     fctx->prev_thread = p;
-    fctx->next_decoding++;
+    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
 
     return 0;
 }
 
-int ff_thread_decode_frame(AVCodecContext *avctx,
-                           AVFrame *picture, int *got_picture_ptr,
-                           AVPacket *avpkt)
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     FrameThreadContext *fctx = avctx->internal->thread_ctx;
-    int finished = fctx->next_finished;
-    PerThreadContext *p;
-    int err;
+    int ret = 0;
 
     /* release the async lock, permitting blocked hwaccel threads to
      * go forward while we are in this function */
     async_unlock(fctx);
 
-    /*
-     * Submit a packet to the next decoding thread.
-     */
-
-    p = &fctx->threads[fctx->next_decoding];
-    err = submit_packet(p, avctx, avpkt);
-    if (err)
-        goto finish;
-
-    /*
-     * If we're still receiving the initial packets, don't return a frame.
-     */
+    /* submit packets to threads while there are no buffered results to return */
+    while (!fctx->df.nb_f && !fctx->result) {
+        PerThreadContext *p;
 
-    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
-        fctx->delaying = 0;
+        /* get a packet to be submitted to the next thread */
+        av_packet_unref(fctx->next_pkt);
+        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
+        if (ret < 0 && ret != AVERROR_EOF)
+            goto finish;
 
-    if (fctx->delaying) {
-        *got_picture_ptr=0;
-        if (avpkt->size) {
-            err = avpkt->size;
+        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
+                            fctx->next_pkt);
+        if (ret < 0)
             goto finish;
-        }
-    }
 
-    /*
-     * Return the next available frame from the oldest thread.
-     * If we're at the end of the stream, then we have to skip threads that
-     * didn't output a frame/error, because we don't want to accidentally signal
-     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
-     */
+        /* do not return any frames until all threads have something to do */
+        if (fctx->next_decoding != fctx->next_finished &&
+            !avctx->internal->draining)
+            continue;
 
-    do {
-        p = &fctx->threads[finished++];
+        p                   = &fctx->threads[fctx->next_finished];
+        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
 
         if (atomic_load(&p->state) != STATE_INPUT_READY) {
             pthread_mutex_lock(&p->progress_mutex);
@@ -598,35 +655,26 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
             pthread_mutex_unlock(&p->progress_mutex);
         }
 
-        av_frame_move_ref(picture, p->frame);
-        *got_picture_ptr = p->got_frame;
-        picture->pkt_dts = p->avpkt->dts;
-        err = p->result;
-
-        /*
-         * A later call with avkpt->size == 0 may loop over all threads,
-         * including this one, searching for a frame/error to return before being
-         * stopped by the "finished != fctx->next_finished" condition.
-         * Make sure we don't mistakenly return the same frame/error again.
-         */
-        p->got_frame = 0;
-        p->result = 0;
-
-        if (finished >= avctx->thread_count) finished = 0;
-    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
+        fctx->result = p->result;
+        p->result    = 0;
 
-    update_context_from_thread(avctx, p->avctx, 1);
-
-    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+        if (p->df.nb_f)
+            FFSWAP(DecodedFrames, fctx->df, p->df);
+    }
 
-    fctx->next_finished = finished;
+    /* a thread may return multiple frames AND an error
+     * we first return all the frames, then the error */
+    if (fctx->df.nb_f) {
+        decoded_frames_pop(&fctx->df, frame);
+        ret = 0;
+    } else {
+        ret = fctx->result;
+        fctx->result = 0;
+    }
 
-    /* return the size of the consumed packet if no error occurred */
-    if (err >= 0)
-        err = avpkt->size;
 finish:
     async_lock(fctx);
-    return err;
+    return ret;
 }
 
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
@@ -726,7 +774,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
-        p->got_frame = 0;
     }
 
     async_lock(fctx);
@@ -784,11 +831,12 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
 
             av_buffer_unref(&ctx->internal->pool);
             av_packet_free(&ctx->internal->last_pkt_props);
+            av_packet_free(&ctx->internal->in_pkt);
             av_freep(&ctx->internal);
             av_buffer_unref(&ctx->hw_frames_ctx);
         }
 
-        av_frame_free(&p->frame);
+        decoded_frames_free(&p->df);
 
         ff_pthread_free(p, per_thread_offsets);
         av_packet_free(&p->avpkt);
@@ -796,6 +844,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
         av_freep(&p->avctx);
     }
 
+    decoded_frames_free(&fctx->df);
+    av_packet_free(&fctx->next_pkt);
+
     av_freep(&fctx->threads);
     ff_pthread_free(fctx, thread_ctx_offsets);
 
@@ -854,13 +905,17 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (err < 0)
         return err;
 
-    if (!(p->frame = av_frame_alloc()) ||
-        !(p->avpkt = av_packet_alloc()))
+    if (!(p->avpkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
 
+    copy->internal->is_frame_mt = 1;
     if (!first)
         copy->internal->is_copy = 1;
 
+    copy->internal->in_pkt = av_packet_alloc();
+    if (!copy->internal->in_pkt)
+        return AVERROR(ENOMEM);
+
     copy->internal->last_pkt_props = av_packet_alloc();
     if (!copy->internal->last_pkt_props)
         return AVERROR(ENOMEM);
@@ -920,6 +975,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         return err;
     }
 
+    fctx->next_pkt = av_packet_alloc();
+    if (!fctx->next_pkt)
+        return AVERROR(ENOMEM);
+
     fctx->async_lock = 1;
     fctx->delaying = 1;
 
@@ -964,12 +1023,13 @@ void ff_thread_flush(AVCodecContext *avctx)
     fctx->next_decoding = fctx->next_finished = 0;
     fctx->delaying = 1;
     fctx->prev_thread = NULL;
+
+    decoded_frames_flush(&fctx->df);
+
     for (i = 0; i < avctx->thread_count; i++) {
         PerThreadContext *p = &fctx->threads[i];
-        // Make sure decode flush calls with size=0 won't return old frames
-        p->got_frame = 0;
-        av_frame_unref(p->frame);
-        p->result = 0;
+
+        decoded_frames_flush(&p->df);
 
 #if FF_API_THREAD_SAFE_CALLBACKS
         release_delayed_buffers(p);
@@ -1193,3 +1253,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
     f->owner[0] = f->owner[1] = NULL;
     ff_thread_release_buffer(avctx, f->f);
 }
+
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (p->avpkt->buf) {
+        av_packet_move_ref(pkt, p->avpkt);
+        return 0;
+    }
+
+    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+}
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index d5673f25ea..7ae69990fb 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -40,17 +40,12 @@
 void ff_thread_flush(AVCodecContext *avctx);
 
 /**
- * Submit a new frame to a decoding thread.
- * Returns the next available frame in picture. *got_picture_ptr
- * will be 0 if none is available.
- * The return value on success is the size of the consumed packet for
- * compatibility with FFCodec.decode. This means the decoder
- * has to consume the full packet.
+ * Submit available packets for decoding to worker threads, return a
+ * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
  *
- * Parameters are the same as FFCodec.decode.
+ * Parameters are the same as FFCodec.receive_frame.
  */
-int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
-                           int *got_picture_ptr, AVPacket *avpkt);
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
 /**
  * If the codec defines update_thread_context(), call this
@@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
  */
 void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
 
+/**
+ * Get a packet for decoding. This gets invoked by the worker threads.
+ */
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_thread_init(AVCodecContext *s);
 int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
         int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3] lavc: convert frame threading to the receive_frame() pattern
  2022-12-07 13:20 ` [FFmpeg-devel] [PATCH v3] " Timo Rothenpieler
@ 2022-12-07 22:22   ` Michael Niedermayer
  2022-12-09 13:09     ` Timo Rothenpieler
  2022-12-09 13:37     ` [FFmpeg-devel] [PATCH v4] " Timo Rothenpieler
  0 siblings, 2 replies; 30+ messages in thread
From: Michael Niedermayer @ 2022-12-07 22:22 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1435 bytes --]

On Wed, Dec 07, 2022 at 02:20:23PM +0100, Timo Rothenpieler wrote:
> From: Anton Khirnov <anton@khirnov.net>
> 
> Reorganize the code such that the frame threading code does not call the
> decoders directly, but instead calls back into the generic decoding
> code. This avoids duplicating the logic that wraps the decoder
> invocation and will be useful in the following commits.
> ---
>  libavcodec/decode.c        |  57 +++++----
>  libavcodec/decode.h        |   7 +
>  libavcodec/internal.h      |   7 +
>  libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
>  libavcodec/thread.h        |  18 +--
>  5 files changed, 222 insertions(+), 123 deletions(-)

This breaks on arm (probably lack of pthread support) in this env

libavcodec/libavcodec.a(decode.o): In function `decode_receive_frame_internal':
arm/src/libavcodec/decode.c:616: undefined reference to `ff_thread_receive_frame'
arm/src/libavcodec/decode.c:616: undefined reference to `ff_thread_receive_frame'
collect2: error: ld returned 1 exit status
Makefile:131: recipe for target 'ffprobe_g' failed
make: *** [ffprobe_g] Error 1

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many that live deserve death. And some that die deserve life. Can you give
it to them? Then do not be too eager to deal out death in judgement. For
even the very wise cannot see all ends. -- Gandalf

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3] lavc: convert frame threading to the receive_frame() pattern
  2022-12-07 22:22   ` Michael Niedermayer
@ 2022-12-09 13:09     ` Timo Rothenpieler
  2022-12-09 13:11       ` James Almer
  2022-12-09 16:17       ` Michael Niedermayer
  2022-12-09 13:37     ` [FFmpeg-devel] [PATCH v4] " Timo Rothenpieler
  1 sibling, 2 replies; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-09 13:09 UTC (permalink / raw)
  To: ffmpeg-devel

On 07/12/2022 23:22, Michael Niedermayer wrote:
> On Wed, Dec 07, 2022 at 02:20:23PM +0100, Timo Rothenpieler wrote:
>> From: Anton Khirnov <anton@khirnov.net>
>>
>> Reorganize the code such that the frame threading code does not call the
>> decoders directly, but instead calls back into the generic decoding
>> code. This avoids duplicating the logic that wraps the decoder
>> invocation and will be useful in the following commits.
>> ---
>>   libavcodec/decode.c        |  57 +++++----
>>   libavcodec/decode.h        |   7 +
>>   libavcodec/internal.h      |   7 +
>>   libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
>>   libavcodec/thread.h        |  18 +--
>>   5 files changed, 222 insertions(+), 123 deletions(-)
> 
> This breaks on arm (probably lack of pthread support) in this env
> 
> libavcodec/libavcodec.a(decode.o): In function `decode_receive_frame_internal':
> arm/src/libavcodec/decode.c:616: undefined reference to `ff_thread_receive_frame'
> arm/src/libavcodec/decode.c:616: undefined reference to `ff_thread_receive_frame'
> collect2: error: ld returned 1 exit status
> Makefile:131: recipe for target 'ffprobe_g' failed
> make: *** [ffprobe_g] Error 1

Probably just missing an #if somewhere.
Why does arm not support pthreads though?
Or is that just this specific configuration?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3] lavc: convert frame threading to the receive_frame() pattern
  2022-12-09 13:09     ` Timo Rothenpieler
@ 2022-12-09 13:11       ` James Almer
  2022-12-09 16:17       ` Michael Niedermayer
  1 sibling, 0 replies; 30+ messages in thread
From: James Almer @ 2022-12-09 13:11 UTC (permalink / raw)
  To: ffmpeg-devel

On 12/9/2022 10:09 AM, Timo Rothenpieler wrote:
> On 07/12/2022 23:22, Michael Niedermayer wrote:
>> On Wed, Dec 07, 2022 at 02:20:23PM +0100, Timo Rothenpieler wrote:
>>> From: Anton Khirnov <anton@khirnov.net>
>>>
>>> Reorganize the code such that the frame threading code does not call the
>>> decoders directly, but instead calls back into the generic decoding
>>> code. This avoids duplicating the logic that wraps the decoder
>>> invocation and will be useful in the following commits.
>>> ---
>>>   libavcodec/decode.c        |  57 +++++----
>>>   libavcodec/decode.h        |   7 +
>>>   libavcodec/internal.h      |   7 +
>>>   libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
>>>   libavcodec/thread.h        |  18 +--
>>>   5 files changed, 222 insertions(+), 123 deletions(-)
>>
>> This breaks on arm (probably lack of pthread support) in this env
>>
>> libavcodec/libavcodec.a(decode.o): In function 
>> `decode_receive_frame_internal':
>> arm/src/libavcodec/decode.c:616: undefined reference to 
>> `ff_thread_receive_frame'
>> arm/src/libavcodec/decode.c:616: undefined reference to 
>> `ff_thread_receive_frame'
>> collect2: error: ld returned 1 exit status
>> Makefile:131: recipe for target 'ffprobe_g' failed
>> make: *** [ffprobe_g] Error 1
> 
> Probably just missing an #if somewhere.

Yes.

> +static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
> +{
> +    if (avctx->active_thread_type & FF_THREAD_FRAME)

Should be if (HAVE_THREADS && ...

> +        return ff_thread_receive_frame(avctx, frame);
> +    return ff_decode_receive_frame_internal(avctx, frame);
> +}

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH v4] lavc: convert frame threading to the receive_frame() pattern
  2022-12-07 22:22   ` Michael Niedermayer
  2022-12-09 13:09     ` Timo Rothenpieler
@ 2022-12-09 13:37     ` Timo Rothenpieler
  2022-12-09 23:46       ` Michael Niedermayer
  1 sibling, 1 reply; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-09 13:37 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Anton Khirnov

From: Anton Khirnov <anton@khirnov.net>

Reorganize the code such that the frame threading code does not call the
decoders directly, but instead calls back into the generic decoding
code. This avoids duplicating the logic that wraps the decoder
invocation and will be useful in the following commits.
---
 libavcodec/decode.c        |  58 +++++----
 libavcodec/decode.h        |   7 +
 libavcodec/internal.h      |   7 +
 libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
 libavcodec/thread.h        |  18 +--
 5 files changed, 223 insertions(+), 123 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index b184c3f55b..f1be0d7876 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -180,6 +180,11 @@ fail:
     return ret;
 }
 
+#if !HAVE_THREADS
+#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
+#define ff_thread_receive_frame(avctx, frame) (AVERROR_BUG)
+#endif
+
 int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -188,7 +193,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (avci->draining)
         return AVERROR_EOF;
 
-    ret = av_bsf_receive_packet(avci->bsf, pkt);
+    /* If we are a worker thread, get the next packet from the threading
+     * context. Otherwise we are the main (user-facing) context, so we get the
+     * next packet from the input filterchain.
+     */
+    if (avctx->internal->is_frame_mt)
+        ret = ff_thread_get_packet(avctx, pkt);
+    else
+        ret = av_bsf_receive_packet(avci->bsf, pkt);
     if (ret == AVERROR_EOF)
         avci->draining = 1;
     if (ret < 0)
@@ -273,30 +285,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_EOF;
 
     if (!pkt->data &&
-        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
-          avctx->active_thread_type & FF_THREAD_FRAME))
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
         return AVERROR_EOF;
 
     got_frame = 0;
 
-    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
-        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
-    } else {
-        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
-
-        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
-            frame->pkt_dts = pkt->dts;
-        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
-            if(!avctx->has_b_frames)
-                frame->pkt_pos = pkt->pos;
-            //FIXME these should be under if(!avctx->has_b_frames)
-            /* get_buffer is supposed to set frame parameters */
-            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
-                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
-                if (!frame->width)                    frame->width               = avctx->width;
-                if (!frame->height)                   frame->height              = avctx->height;
-                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
-            }
+    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
+
+    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+        frame->pkt_dts = pkt->dts;
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if(!avctx->has_b_frames)
+            frame->pkt_pos = pkt->pos;
+        //FIXME these should be under if(!avctx->has_b_frames)
+        /* get_buffer is supposed to set frame parameters */
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+            if (!frame->width)                    frame->width               = avctx->width;
+            if (!frame->height)                   frame->height              = avctx->height;
+            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
         }
     }
     emms_c();
@@ -546,7 +553,7 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVCodecInternal *avci = avctx->internal;
     const FFCodec *const codec = ffcodec(avctx->codec);
@@ -604,6 +611,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return ret;
 }
 
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    if (avctx->active_thread_type & FF_THREAD_FRAME)
+        return ff_thread_receive_frame(avctx, frame);
+    return ff_decode_receive_frame_internal(avctx, frame);
+}
+
 int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
 {
     AVCodecInternal *avci = avctx->internal;
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 5d95369b5e..34beb70f97 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
  */
 int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
+/**
+ * Do the actual decoding and obtain a decoded frame from the decoder, if
+ * available. When frame threading is used, this is invoked by the worker
+ * threads, otherwise by the top layer directly.
+ */
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
+
 /**
  * Called by decoders to get the next packet for decoding.
  *
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index a283c52e01..c87036efc4 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
      */
     int is_copy;
 
+    /**
+     * This field is set to 1 when frame threading is being used and the parent
+     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
+     * one of those actually doing the decoding), 0 otherwise.
+     */
+    int is_frame_mt;
+
     /**
      * An audio frame with less than required samples has been submitted (and
      * potentially padded with silence). Reject all subsequent frames.
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 62a0b18a8a..7c7998b675 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -46,6 +46,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/fifo.h"
 #include "libavutil/thread.h"
 
 enum {
@@ -73,6 +74,12 @@ enum {
     INITIALIZED,    ///< Thread has been properly set up
 };
 
+typedef struct DecodedFrames {
+    AVFrame  **f;
+    size_t  nb_f;
+    size_t  nb_f_allocated;
+} DecodedFrames;
+
 /**
  * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
  */
@@ -93,8 +100,10 @@ typedef struct PerThreadContext {
 
     AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
 
-    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
-    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    /**
+     * Decoded frames from a single decode iteration.
+     */
+    DecodedFrames df;
     int     result;                 ///< The result of the last codec decode/encode() call.
 
     atomic_int state;
@@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
     pthread_cond_t async_cond;
     int async_lock;
 
+    DecodedFrames df;
+    int result;
+
+    /**
+     * Packet to be submitted to the next thread for decoding.
+     */
+    AVPacket *next_pkt;
+
     int next_decoding;             ///< The next context to submit a packet to.
     int next_finished;             ///< The next context to return output from.
 
@@ -190,6 +207,51 @@ static void thread_set_name(PerThreadContext *p)
     ff_thread_setname(name);
 }
 
+// get a free frame to decode into
+static AVFrame *decoded_frames_get_free(DecodedFrames *df)
+{
+    if (df->nb_f == df->nb_f_allocated) {
+        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
+                                         sizeof(*df->f));
+        if (!tmp)
+            return NULL;
+        df->f = tmp;
+
+        df->f[df->nb_f] = av_frame_alloc();
+        if (!df->f[df->nb_f])
+            return NULL;
+
+        df->nb_f_allocated++;
+    }
+
+    av_frame_unref(df->f[df->nb_f]);
+    return df->f[df->nb_f];
+}
+
+static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
+{
+    AVFrame *tmp_frame = df->f[0];
+    av_frame_move_ref(dst, tmp_frame);
+    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
+    df->f[--df->nb_f] = tmp_frame;
+}
+
+static void decoded_frames_flush(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f; i++)
+        av_frame_unref(df->f[i]);
+    df->nb_f = 0;
+}
+
+static void decoded_frames_free(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f_allocated; i++)
+        av_frame_free(&df->f[i]);
+    av_freep(&df->f);
+    df->nb_f           = 0;
+    df->nb_f_allocated = 0;
+}
+
 /**
  * Codec worker thread.
  *
@@ -202,6 +264,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
     PerThreadContext *p = arg;
     AVCodecContext *avctx = p->avctx;
     const FFCodec *codec = ffcodec(avctx->codec);
+    int ret;
 
     thread_set_name(p);
 
@@ -236,16 +299,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
             p->hwaccel_serializing = 1;
         }
 
-        av_frame_unref(p->frame);
-        p->got_frame = 0;
-        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
+        ret = 0;
+        while (ret >= 0) {
+            AVFrame *frame;
+
+            /* get the frame which will store the output */
+            frame = decoded_frames_get_free(&p->df);
+            if (!frame) {
+                p->result = AVERROR(ENOMEM);
+                goto alloc_fail;
+            }
+
+            /* do the actual decoding */
+            ret = ff_decode_receive_frame_internal(avctx, frame);
+            if (ret == 0)
+                p->df.nb_f++;
+            else if (ret < 0 && frame->buf[0])
+                ff_thread_release_buffer(avctx, frame);
 
-        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
-            ff_thread_release_buffer(avctx, p->frame);
+            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
+        }
 
         if (atomic_load(&p->state) == STATE_SETTING_UP)
             ff_thread_finish_setup(avctx);
 
+alloc_fail:
         if (p->hwaccel_serializing) {
             /* wipe hwaccel state to avoid stale pointers lying around;
              * the state was transferred to FrameThreadContext in
@@ -414,6 +492,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (err < 0)
         return err;
 
+    dst->internal->draining      = src->internal->draining;
+    dst->internal->draining_done = src->internal->draining_done;
+
     return 0;
 }
 
@@ -441,23 +522,23 @@ static void release_delayed_buffers(PerThreadContext *p)
 #endif
 
 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-                         AVPacket *avpkt)
+                         AVPacket *in_pkt)
 {
     FrameThreadContext *fctx = p->parent;
     PerThreadContext *prev_thread = fctx->prev_thread;
-    const AVCodec *codec = p->avctx->codec;
-    int ret;
-
-    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
-        return 0;
+    int err;
 
     pthread_mutex_lock(&p->mutex);
 
-    ret = update_context_from_user(p->avctx, user_avctx);
-    if (ret) {
+    av_packet_unref(p->avpkt);
+    av_packet_move_ref(p->avpkt, in_pkt);
+
+    err = update_context_from_user(p->avctx, user_avctx);
+    if (err < 0) {
         pthread_mutex_unlock(&p->mutex);
-        return ret;
+        return err;
     }
+
     atomic_store_explicit(&p->debug_threads,
                           (p->avctx->debug & FF_DEBUG_THREADS) != 0,
                           memory_order_relaxed);
@@ -467,7 +548,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
 #endif
 
     if (prev_thread) {
-        int err;
         if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
             pthread_mutex_lock(&prev_thread->progress_mutex);
             while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
@@ -488,14 +568,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
     FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
     FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
 
-    av_packet_unref(p->avpkt);
-    ret = av_packet_ref(p->avpkt, avpkt);
-    if (ret < 0) {
-        pthread_mutex_unlock(&p->mutex);
-        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
-        return ret;
-    }
-
     atomic_store(&p->state, STATE_SETTING_UP);
     pthread_cond_signal(&p->input_cond);
     pthread_mutex_unlock(&p->mutex);
@@ -539,57 +611,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
     fctx->prev_thread = p;
-    fctx->next_decoding++;
+    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
 
     return 0;
 }
 
-int ff_thread_decode_frame(AVCodecContext *avctx,
-                           AVFrame *picture, int *got_picture_ptr,
-                           AVPacket *avpkt)
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     FrameThreadContext *fctx = avctx->internal->thread_ctx;
-    int finished = fctx->next_finished;
-    PerThreadContext *p;
-    int err;
+    int ret = 0;
 
     /* release the async lock, permitting blocked hwaccel threads to
      * go forward while we are in this function */
     async_unlock(fctx);
 
-    /*
-     * Submit a packet to the next decoding thread.
-     */
-
-    p = &fctx->threads[fctx->next_decoding];
-    err = submit_packet(p, avctx, avpkt);
-    if (err)
-        goto finish;
-
-    /*
-     * If we're still receiving the initial packets, don't return a frame.
-     */
+    /* submit packets to threads while there are no buffered results to return */
+    while (!fctx->df.nb_f && !fctx->result) {
+        PerThreadContext *p;
 
-    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
-        fctx->delaying = 0;
+        /* get a packet to be submitted to the next thread */
+        av_packet_unref(fctx->next_pkt);
+        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
+        if (ret < 0 && ret != AVERROR_EOF)
+            goto finish;
 
-    if (fctx->delaying) {
-        *got_picture_ptr=0;
-        if (avpkt->size) {
-            err = avpkt->size;
+        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
+                            fctx->next_pkt);
+        if (ret < 0)
             goto finish;
-        }
-    }
 
-    /*
-     * Return the next available frame from the oldest thread.
-     * If we're at the end of the stream, then we have to skip threads that
-     * didn't output a frame/error, because we don't want to accidentally signal
-     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
-     */
+        /* do not return any frames until all threads have something to do */
+        if (fctx->next_decoding != fctx->next_finished &&
+            !avctx->internal->draining)
+            continue;
 
-    do {
-        p = &fctx->threads[finished++];
+        p                   = &fctx->threads[fctx->next_finished];
+        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
 
         if (atomic_load(&p->state) != STATE_INPUT_READY) {
             pthread_mutex_lock(&p->progress_mutex);
@@ -598,35 +655,26 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
             pthread_mutex_unlock(&p->progress_mutex);
         }
 
-        av_frame_move_ref(picture, p->frame);
-        *got_picture_ptr = p->got_frame;
-        picture->pkt_dts = p->avpkt->dts;
-        err = p->result;
-
-        /*
-         * A later call with avkpt->size == 0 may loop over all threads,
-         * including this one, searching for a frame/error to return before being
-         * stopped by the "finished != fctx->next_finished" condition.
-         * Make sure we don't mistakenly return the same frame/error again.
-         */
-        p->got_frame = 0;
-        p->result = 0;
-
-        if (finished >= avctx->thread_count) finished = 0;
-    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
+        fctx->result = p->result;
+        p->result    = 0;
 
-    update_context_from_thread(avctx, p->avctx, 1);
-
-    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+        if (p->df.nb_f)
+            FFSWAP(DecodedFrames, fctx->df, p->df);
+    }
 
-    fctx->next_finished = finished;
+    /* a thread may return multiple frames AND an error
+     * we first return all the frames, then the error */
+    if (fctx->df.nb_f) {
+        decoded_frames_pop(&fctx->df, frame);
+        ret = 0;
+    } else {
+        ret = fctx->result;
+        fctx->result = 0;
+    }
 
-    /* return the size of the consumed packet if no error occurred */
-    if (err >= 0)
-        err = avpkt->size;
 finish:
     async_lock(fctx);
-    return err;
+    return ret;
 }
 
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
@@ -726,7 +774,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
-        p->got_frame = 0;
     }
 
     async_lock(fctx);
@@ -784,11 +831,12 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
 
             av_buffer_unref(&ctx->internal->pool);
             av_packet_free(&ctx->internal->last_pkt_props);
+            av_packet_free(&ctx->internal->in_pkt);
             av_freep(&ctx->internal);
             av_buffer_unref(&ctx->hw_frames_ctx);
         }
 
-        av_frame_free(&p->frame);
+        decoded_frames_free(&p->df);
 
         ff_pthread_free(p, per_thread_offsets);
         av_packet_free(&p->avpkt);
@@ -796,6 +844,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
         av_freep(&p->avctx);
     }
 
+    decoded_frames_free(&fctx->df);
+    av_packet_free(&fctx->next_pkt);
+
     av_freep(&fctx->threads);
     ff_pthread_free(fctx, thread_ctx_offsets);
 
@@ -854,13 +905,17 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (err < 0)
         return err;
 
-    if (!(p->frame = av_frame_alloc()) ||
-        !(p->avpkt = av_packet_alloc()))
+    if (!(p->avpkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
 
+    copy->internal->is_frame_mt = 1;
     if (!first)
         copy->internal->is_copy = 1;
 
+    copy->internal->in_pkt = av_packet_alloc();
+    if (!copy->internal->in_pkt)
+        return AVERROR(ENOMEM);
+
     copy->internal->last_pkt_props = av_packet_alloc();
     if (!copy->internal->last_pkt_props)
         return AVERROR(ENOMEM);
@@ -920,6 +975,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         return err;
     }
 
+    fctx->next_pkt = av_packet_alloc();
+    if (!fctx->next_pkt)
+        return AVERROR(ENOMEM);
+
     fctx->async_lock = 1;
     fctx->delaying = 1;
 
@@ -964,12 +1023,13 @@ void ff_thread_flush(AVCodecContext *avctx)
     fctx->next_decoding = fctx->next_finished = 0;
     fctx->delaying = 1;
     fctx->prev_thread = NULL;
+
+    decoded_frames_flush(&fctx->df);
+
     for (i = 0; i < avctx->thread_count; i++) {
         PerThreadContext *p = &fctx->threads[i];
-        // Make sure decode flush calls with size=0 won't return old frames
-        p->got_frame = 0;
-        av_frame_unref(p->frame);
-        p->result = 0;
+
+        decoded_frames_flush(&p->df);
 
 #if FF_API_THREAD_SAFE_CALLBACKS
         release_delayed_buffers(p);
@@ -1193,3 +1253,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
     f->owner[0] = f->owner[1] = NULL;
     ff_thread_release_buffer(avctx, f->f);
 }
+
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (p->avpkt->buf) {
+        av_packet_move_ref(pkt, p->avpkt);
+        return 0;
+    }
+
+    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+}
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index d5673f25ea..7ae69990fb 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -40,17 +40,12 @@
 void ff_thread_flush(AVCodecContext *avctx);
 
 /**
- * Submit a new frame to a decoding thread.
- * Returns the next available frame in picture. *got_picture_ptr
- * will be 0 if none is available.
- * The return value on success is the size of the consumed packet for
- * compatibility with FFCodec.decode. This means the decoder
- * has to consume the full packet.
+ * Submit available packets for decoding to worker threads, return a
+ * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
  *
- * Parameters are the same as FFCodec.decode.
+ * Parameters are the same as FFCodec.receive_frame.
  */
-int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
-                           int *got_picture_ptr, AVPacket *avpkt);
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
 /**
  * If the codec defines update_thread_context(), call this
@@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
  */
 void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
 
+/**
+ * Get a packet for decoding. This gets invoked by the worker threads.
+ */
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_thread_init(AVCodecContext *s);
 int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
         int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v3] lavc: convert frame threading to the receive_frame() pattern
  2022-12-09 13:09     ` Timo Rothenpieler
  2022-12-09 13:11       ` James Almer
@ 2022-12-09 16:17       ` Michael Niedermayer
  1 sibling, 0 replies; 30+ messages in thread
From: Michael Niedermayer @ 2022-12-09 16:17 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1957 bytes --]

On Fri, Dec 09, 2022 at 02:09:45PM +0100, Timo Rothenpieler wrote:
> On 07/12/2022 23:22, Michael Niedermayer wrote:
> > On Wed, Dec 07, 2022 at 02:20:23PM +0100, Timo Rothenpieler wrote:
> > > From: Anton Khirnov <anton@khirnov.net>
> > > 
> > > Reorganize the code such that the frame threading code does not call the
> > > decoders directly, but instead calls back into the generic decoding
> > > code. This avoids duplicating the logic that wraps the decoder
> > > invocation and will be useful in the following commits.
> > > ---
> > >   libavcodec/decode.c        |  57 +++++----
> > >   libavcodec/decode.h        |   7 +
> > >   libavcodec/internal.h      |   7 +
> > >   libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
> > >   libavcodec/thread.h        |  18 +--
> > >   5 files changed, 222 insertions(+), 123 deletions(-)
> > 
> > This breaks on arm (probably lack of pthread support) in this env
> > 
> > libavcodec/libavcodec.a(decode.o): In function `decode_receive_frame_internal':
> > arm/src/libavcodec/decode.c:616: undefined reference to `ff_thread_receive_frame'
> > arm/src/libavcodec/decode.c:616: undefined reference to `ff_thread_receive_frame'
> > collect2: error: ld returned 1 exit status
> > Makefile:131: recipe for target 'ffprobe_g' failed
> > make: *** [ffprobe_g] Error 1
> 
> Probably just missing an #if somewhere.

> Why does arm not support pthreads though?
> Or is that just this specific configuration?

just this specific environment 
i could fix that but then noone will test the lack of pthreads so i think
its better if i leave it :)

thx

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

When the tyrant has disposed of foreign enemies by conquest or treaty, and
there is nothing more to fear from them, then he is always stirring up
some war or other, in order that the people may require a leader. -- Plato

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v4] lavc: convert frame threading to the receive_frame() pattern
  2022-12-09 13:37     ` [FFmpeg-devel] [PATCH v4] " Timo Rothenpieler
@ 2022-12-09 23:46       ` Michael Niedermayer
  2022-12-10 20:10         ` Timo Rothenpieler
  2022-12-12 21:42         ` [FFmpeg-devel] [PATCH v5 1/2] " Timo Rothenpieler
  0 siblings, 2 replies; 30+ messages in thread
From: Michael Niedermayer @ 2022-12-09 23:46 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 5049 bytes --]

On Fri, Dec 09, 2022 at 02:37:04PM +0100, Timo Rothenpieler wrote:
> From: Anton Khirnov <anton@khirnov.net>
> 
> Reorganize the code such that the frame threading code does not call the
> decoders directly, but instead calls back into the generic decoding
> code. This avoids duplicating the logic that wraps the decoder
> invocation and will be useful in the following commits.
> ---
>  libavcodec/decode.c        |  58 +++++----
>  libavcodec/decode.h        |   7 +
>  libavcodec/internal.h      |   7 +
>  libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
>  libavcodec/thread.h        |  18 +--
>  5 files changed, 223 insertions(+), 123 deletions(-)

this patch changes the output with this:

./ffmpeg -ss 1 -i Enigma_Principles_of_Lust.flv -t 1 -bitexact -f framecrc

If someone wants to look into this ill send him the file,
it should be online but i failed to find it.

--- /tmp/test	2022-12-10 00:29:18.585089416 +0100
+++ /tmp/ref	2022-12-10 00:21:14.177412007 +0100
@@ -11,46 +11,53 @@
 0,          0,          0,        1,   153360, 0x887a0c84
 1,          0,          0,      482,     1928, 0x9228f7f2
 1,        485,        485,     1024,     4096, 0x60c21370
+0,          1,          1,        1,   153360, 0x49c60bc4
 0,          2,          2,        1,   153360, 0x22740bd4
 1,       1509,       1509,     1024,     4096, 0x77933b11
 0,          3,          3,        1,   153360, 0x244d0bb4
 1,       2536,       2536,     1024,     4096, 0xe15e8d59
+0,          4,          4,        1,   153360, 0x5f660b94
 1,       3560,       3560,     1024,     4096, 0x545cdd61
 0,          5,          5,        1,   153360, 0xb628fd45
 0,          6,          6,        1,   153360, 0x3839e5cd
 1,       4576,       4576,     1024,     4096, 0x47154132
+0,          7,          7,        1,   153360, 0xf015da05
 1,       5601,       5601,     1024,     4096, 0x7822f57e
 0,          8,          8,        1,   153360, 0x70f1d8db
 0,          9,          9,        1,   153360, 0x8968d203
 1,       6625,       6625,     1024,     4096, 0xb786e7ff
-0,         10,         10,        1,   153360, 0x5902c6cb
+0,         10,         10,        1,   153360, 0x9e73caed
 1,       7651,       7651,     1024,     4096, 0x0b467ce8
-0,         11,         11,        1,   153360, 0x68e43893
+0,         11,         11,        1,   153360, 0x5902c6cb
 1,       8675,       8675,     1024,     4096, 0x79229a46
-0,         12,         12,        1,   153360, 0x065c5e22
-0,         13,         13,        1,   153360, 0x6c2962a9
+0,         12,         12,        1,   153360, 0x68e43893
+0,         13,         13,        1,   153360, 0x065c5e22
 1,       9702,       9702,     1024,     4096, 0x63b1e107
+0,         14,         14,        1,   153360, 0x6c2962a9
 1,      10726,      10726,     1024,     4096, 0x9f4355eb
 0,         15,         15,        1,   153360, 0xff0a88e3
 1,      11753,      11753,     1024,     4096, 0xcdbae3fe
-0,         16,         16,        1,   153360, 0xd1395d5c
-0,         17,         17,        1,   153360, 0x0e1f6bc5
+0,         16,         16,        1,   153360, 0x07025790
+0,         17,         17,        1,   153360, 0xd1395d5c
 1,      12777,      12777,     1024,     4096, 0x48a38fc7
-0,         18,         18,        1,   153360, 0x1972db1e
+0,         18,         18,        1,   153360, 0x0e1f6bc5
 1,      13793,      13793,     1024,     4096, 0x3baef67f
+0,         19,         19,        1,   153360, 0x1972db1e
 0,         20,         20,        1,   153360, 0x1d6eef56
 1,      14818,      14818,     1024,     4096, 0x1009f25c
 0,         21,         21,        1,   153360, 0x7581f07c
 1,      15842,      15842,     1024,     4096, 0x01bedb12
-0,         22,         22,        1,   153360, 0xae79cdac
+0,         22,         22,        1,   153360, 0xe1a9d022
 1,      16868,      16868,     1024,     4096, 0xa00c62b0
+0,         23,         23,        1,   153360, 0xae79cdac
 0,         24,         24,        1,   153360, 0x9d05ebf3
 1,      17892,      17892,     1024,     4096, 0x9e2f639e
 0,         25,         25,        1,   153360, 0x48e4e890
 1,      18919,      18919,     1024,     4096, 0x0a627322
-0,         26,         26,        1,   153360, 0x37e0e5d7
-0,         27,         27,        1,   153360, 0x6c20f174
+0,         26,         26,        1,   153360, 0x0b35e41a
+0,         27,         27,        1,   153360, 0x37e0e5d7
 1,      19943,      19943,     1024,     4096, 0x5f670b1d
-0,         28,         28,        1,   153360, 0x727bf68a
+0,         28,         28,        1,   153360, 0x6c20f174
 1,      20959,      20959,     1024,     4096, 0xb6486ba8
+0,         29,         29,        1,   153360, 0x727bf68a
 1,      21984,      21984,       66,      264, 0x00000000

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who are best at talking, realize last or never when they are wrong.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v4] lavc: convert frame threading to the receive_frame() pattern
  2022-12-09 23:46       ` Michael Niedermayer
@ 2022-12-10 20:10         ` Timo Rothenpieler
  2022-12-11 16:29           ` Michael Niedermayer
  2022-12-12 21:42         ` [FFmpeg-devel] [PATCH v5 1/2] " Timo Rothenpieler
  1 sibling, 1 reply; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-10 20:10 UTC (permalink / raw)
  To: FFmpeg development discussions and patches, Michael Niedermayer

On 10.12.2022 00:46, Michael Niedermayer wrote:
> On Fri, Dec 09, 2022 at 02:37:04PM +0100, Timo Rothenpieler wrote:
>> From: Anton Khirnov <anton@khirnov.net>
>>
>> Reorganize the code such that the frame threading code does not call the
>> decoders directly, but instead calls back into the generic decoding
>> code. This avoids duplicating the logic that wraps the decoder
>> invocation and will be useful in the following commits.
>> ---
>>   libavcodec/decode.c        |  58 +++++----
>>   libavcodec/decode.h        |   7 +
>>   libavcodec/internal.h      |   7 +
>>   libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
>>   libavcodec/thread.h        |  18 +--
>>   5 files changed, 223 insertions(+), 123 deletions(-)
> 
> this patch changes the output with this:
> 
> ./ffmpeg -ss 1 -i Enigma_Principles_of_Lust.flv -t 1 -bitexact -f framecrc

Yeah, I'd like to have that file.

The change is interesting, and it looks like it might actually be legit 
frames?

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v4] lavc: convert frame threading to the receive_frame() pattern
  2022-12-10 20:10         ` Timo Rothenpieler
@ 2022-12-11 16:29           ` Michael Niedermayer
  0 siblings, 0 replies; 30+ messages in thread
From: Michael Niedermayer @ 2022-12-11 16:29 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1334 bytes --]

On Sat, Dec 10, 2022 at 09:10:18PM +0100, Timo Rothenpieler wrote:
> On 10.12.2022 00:46, Michael Niedermayer wrote:
> > On Fri, Dec 09, 2022 at 02:37:04PM +0100, Timo Rothenpieler wrote:
> > > From: Anton Khirnov <anton@khirnov.net>
> > > 
> > > Reorganize the code such that the frame threading code does not call the
> > > decoders directly, but instead calls back into the generic decoding
> > > code. This avoids duplicating the logic that wraps the decoder
> > > invocation and will be useful in the following commits.
> > > ---
> > >   libavcodec/decode.c        |  58 +++++----
> > >   libavcodec/decode.h        |   7 +
> > >   libavcodec/internal.h      |   7 +
> > >   libavcodec/pthread_frame.c | 256 ++++++++++++++++++++++++-------------
> > >   libavcodec/thread.h        |  18 +--
> > >   5 files changed, 223 insertions(+), 123 deletions(-)
> > 
> > this patch changes the output with this:
> > 
> > ./ffmpeg -ss 1 -i Enigma_Principles_of_Lust.flv -t 1 -bitexact -f framecrc
> 
> Yeah, I'd like to have that file.

<timo@rothenpieler.org>: message size 16466626 exceeds size limit 10240000 of
    server mail.btbn.de[136.243.74.85]

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

He who knows, does not speak. He who speaks, does not know. -- Lao Tsu

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH v5 1/2] lavc: convert frame threading to the receive_frame() pattern
  2022-12-09 23:46       ` Michael Niedermayer
  2022-12-10 20:10         ` Timo Rothenpieler
@ 2022-12-12 21:42         ` Timo Rothenpieler
  2022-12-12 21:42           ` [FFmpeg-devel] [PATCH v5 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
  2022-12-13 18:17           ` [FFmpeg-devel] [PATCH v5 1/2] lavc: convert frame threading to the receive_frame() pattern Michael Niedermayer
  1 sibling, 2 replies; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-12 21:42 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Anton Khirnov

From: Anton Khirnov <anton@khirnov.net>

Reorganize the code such that the frame threading code does not call the
decoders directly, but instead calls back into the generic decoding
code. This avoids duplicating the logic that wraps the decoder
invocation and will be useful in the following commits.
---
 libavcodec/decode.c        |  62 ++++++---
 libavcodec/decode.h        |   7 +
 libavcodec/internal.h      |   7 +
 libavcodec/pthread_frame.c | 265 ++++++++++++++++++++++++-------------
 libavcodec/thread.h        |  18 +--
 5 files changed, 236 insertions(+), 123 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index b184c3f55b..672bf472dd 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -180,6 +180,11 @@ fail:
     return ret;
 }
 
+#if !HAVE_THREADS
+#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
+#define ff_thread_receive_frame(avctx, frame) (AVERROR_BUG)
+#endif
+
 int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -188,7 +193,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (avci->draining)
         return AVERROR_EOF;
 
-    ret = av_bsf_receive_packet(avci->bsf, pkt);
+    /* If we are a worker thread, get the next packet from the threading
+     * context. Otherwise we are the main (user-facing) context, so we get the
+     * next packet from the input filterchain.
+     */
+    if (avctx->internal->is_frame_mt)
+        ret = ff_thread_get_packet(avctx, pkt);
+    else
+        ret = av_bsf_receive_packet(avci->bsf, pkt);
     if (ret == AVERROR_EOF)
         avci->draining = 1;
     if (ret < 0)
@@ -273,30 +285,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_EOF;
 
     if (!pkt->data &&
-        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
-          avctx->active_thread_type & FF_THREAD_FRAME))
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
         return AVERROR_EOF;
 
     got_frame = 0;
 
-    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
-        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
-    } else {
-        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
-
-        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
-            frame->pkt_dts = pkt->dts;
-        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
-            if(!avctx->has_b_frames)
-                frame->pkt_pos = pkt->pos;
-            //FIXME these should be under if(!avctx->has_b_frames)
-            /* get_buffer is supposed to set frame parameters */
-            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
-                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
-                if (!frame->width)                    frame->width               = avctx->width;
-                if (!frame->height)                   frame->height              = avctx->height;
-                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
-            }
+    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
+
+    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+        frame->pkt_dts = pkt->dts;
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if(!avctx->has_b_frames)
+            frame->pkt_pos = pkt->pos;
+        //FIXME these should be under if(!avctx->has_b_frames)
+        /* get_buffer is supposed to set frame parameters */
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+            if (!frame->width)                    frame->width               = avctx->width;
+            if (!frame->height)                   frame->height              = avctx->height;
+            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
         }
     }
     emms_c();
@@ -546,7 +553,7 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVCodecInternal *avci = avctx->internal;
     const FFCodec *const codec = ffcodec(avctx->codec);
@@ -569,6 +576,17 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
         return ok;
     }
 
+    return ret;
+}
+
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret;
+    if (avctx->active_thread_type & FF_THREAD_FRAME)
+        ret = ff_thread_receive_frame(avctx, frame);
+    else
+        ret = ff_decode_receive_frame_internal(avctx, frame);
+
     if (!ret) {
         frame->best_effort_timestamp = guess_correct_pts(avctx,
                                                          frame->pts,
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 5d95369b5e..34beb70f97 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
  */
 int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
+/**
+ * Do the actual decoding and obtain a decoded frame from the decoder, if
+ * available. When frame threading is used, this is invoked by the worker
+ * threads, otherwise by the top layer directly.
+ */
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
+
 /**
  * Called by decoders to get the next packet for decoding.
  *
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index a283c52e01..c87036efc4 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
      */
     int is_copy;
 
+    /**
+     * This field is set to 1 when frame threading is being used and the parent
+     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
+     * one of those actually doing the decoding), 0 otherwise.
+     */
+    int is_frame_mt;
+
     /**
      * An audio frame with less than required samples has been submitted (and
      * potentially padded with silence). Reject all subsequent frames.
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 62a0b18a8a..30f6cb95b0 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -46,6 +46,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/fifo.h"
 #include "libavutil/thread.h"
 
 enum {
@@ -73,6 +74,12 @@ enum {
     INITIALIZED,    ///< Thread has been properly set up
 };
 
+typedef struct DecodedFrames {
+    AVFrame  **f;
+    size_t  nb_f;
+    size_t  nb_f_allocated;
+} DecodedFrames;
+
 /**
  * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
  */
@@ -93,8 +100,10 @@ typedef struct PerThreadContext {
 
     AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
 
-    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
-    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    /**
+     * Decoded frames from a single decode iteration.
+     */
+    DecodedFrames df;
     int     result;                 ///< The result of the last codec decode/encode() call.
 
     atomic_int state;
@@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
     pthread_cond_t async_cond;
     int async_lock;
 
+    DecodedFrames df;
+    int result;
+
+    /**
+     * Packet to be submitted to the next thread for decoding.
+     */
+    AVPacket *next_pkt;
+
     int next_decoding;             ///< The next context to submit a packet to.
     int next_finished;             ///< The next context to return output from.
 
@@ -190,6 +207,51 @@ static void thread_set_name(PerThreadContext *p)
     ff_thread_setname(name);
 }
 
+// get a free frame to decode into
+static AVFrame *decoded_frames_get_free(DecodedFrames *df)
+{
+    if (df->nb_f == df->nb_f_allocated) {
+        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
+                                         sizeof(*df->f));
+        if (!tmp)
+            return NULL;
+        df->f = tmp;
+
+        df->f[df->nb_f] = av_frame_alloc();
+        if (!df->f[df->nb_f])
+            return NULL;
+
+        df->nb_f_allocated++;
+    }
+
+    av_frame_unref(df->f[df->nb_f]);
+    return df->f[df->nb_f];
+}
+
+static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
+{
+    AVFrame *tmp_frame = df->f[0];
+    av_frame_move_ref(dst, tmp_frame);
+    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
+    df->f[--df->nb_f] = tmp_frame;
+}
+
+static void decoded_frames_flush(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f; i++)
+        av_frame_unref(df->f[i]);
+    df->nb_f = 0;
+}
+
+static void decoded_frames_free(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f_allocated; i++)
+        av_frame_free(&df->f[i]);
+    av_freep(&df->f);
+    df->nb_f           = 0;
+    df->nb_f_allocated = 0;
+}
+
 /**
  * Codec worker thread.
  *
@@ -202,6 +264,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
     PerThreadContext *p = arg;
     AVCodecContext *avctx = p->avctx;
     const FFCodec *codec = ffcodec(avctx->codec);
+    int ret;
 
     thread_set_name(p);
 
@@ -236,16 +299,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
             p->hwaccel_serializing = 1;
         }
 
-        av_frame_unref(p->frame);
-        p->got_frame = 0;
-        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
+        ret = 0;
+        while (ret >= 0) {
+            AVFrame *frame;
+
+            /* get the frame which will store the output */
+            frame = decoded_frames_get_free(&p->df);
+            if (!frame) {
+                p->result = AVERROR(ENOMEM);
+                goto alloc_fail;
+            }
 
-        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
-            ff_thread_release_buffer(avctx, p->frame);
+            /* do the actual decoding */
+            ret = ff_decode_receive_frame_internal(avctx, frame);
+            if (ret == 0)
+                p->df.nb_f++;
+            else if (ret < 0 && frame->buf[0])
+                ff_thread_release_buffer(avctx, frame);
+
+            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
+        }
 
         if (atomic_load(&p->state) == STATE_SETTING_UP)
             ff_thread_finish_setup(avctx);
 
+alloc_fail:
         if (p->hwaccel_serializing) {
             /* wipe hwaccel state to avoid stale pointers lying around;
              * the state was transferred to FrameThreadContext in
@@ -441,23 +519,25 @@ static void release_delayed_buffers(PerThreadContext *p)
 #endif
 
 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-                         AVPacket *avpkt)
+                         AVPacket *in_pkt)
 {
     FrameThreadContext *fctx = p->parent;
     PerThreadContext *prev_thread = fctx->prev_thread;
-    const AVCodec *codec = p->avctx->codec;
-    int ret;
-
-    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
-        return 0;
+    int err;
 
     pthread_mutex_lock(&p->mutex);
 
-    ret = update_context_from_user(p->avctx, user_avctx);
-    if (ret) {
+    av_packet_unref(p->avpkt);
+    av_packet_move_ref(p->avpkt, in_pkt);
+    if (!p->avpkt->size)
+        p->avctx->internal->draining = 1;
+
+    err = update_context_from_user(p->avctx, user_avctx);
+    if (err < 0) {
         pthread_mutex_unlock(&p->mutex);
-        return ret;
+        return err;
     }
+
     atomic_store_explicit(&p->debug_threads,
                           (p->avctx->debug & FF_DEBUG_THREADS) != 0,
                           memory_order_relaxed);
@@ -467,7 +547,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
 #endif
 
     if (prev_thread) {
-        int err;
         if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
             pthread_mutex_lock(&prev_thread->progress_mutex);
             while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
@@ -488,14 +567,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
     FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
     FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
 
-    av_packet_unref(p->avpkt);
-    ret = av_packet_ref(p->avpkt, avpkt);
-    if (ret < 0) {
-        pthread_mutex_unlock(&p->mutex);
-        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
-        return ret;
-    }
-
     atomic_store(&p->state, STATE_SETTING_UP);
     pthread_cond_signal(&p->input_cond);
     pthread_mutex_unlock(&p->mutex);
@@ -539,57 +610,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
     fctx->prev_thread = p;
-    fctx->next_decoding++;
+    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
 
     return 0;
 }
 
-int ff_thread_decode_frame(AVCodecContext *avctx,
-                           AVFrame *picture, int *got_picture_ptr,
-                           AVPacket *avpkt)
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     FrameThreadContext *fctx = avctx->internal->thread_ctx;
-    int finished = fctx->next_finished;
-    PerThreadContext *p;
-    int err;
+    int ret = 0;
 
     /* release the async lock, permitting blocked hwaccel threads to
      * go forward while we are in this function */
     async_unlock(fctx);
 
-    /*
-     * Submit a packet to the next decoding thread.
-     */
-
-    p = &fctx->threads[fctx->next_decoding];
-    err = submit_packet(p, avctx, avpkt);
-    if (err)
-        goto finish;
+    /* submit packets to threads while there are no buffered results to return */
+    while (!fctx->df.nb_f && !fctx->result) {
+        PerThreadContext *p;
 
-    /*
-     * If we're still receiving the initial packets, don't return a frame.
-     */
-
-    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
-        fctx->delaying = 0;
+        /* get a packet to be submitted to the next thread */
+        av_packet_unref(fctx->next_pkt);
+        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
+        if (ret < 0 && ret != AVERROR_EOF)
+            goto finish;
 
-    if (fctx->delaying) {
-        *got_picture_ptr=0;
-        if (avpkt->size) {
-            err = avpkt->size;
+        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
+                            fctx->next_pkt);
+        if (ret < 0)
             goto finish;
-        }
-    }
 
-    /*
-     * Return the next available frame from the oldest thread.
-     * If we're at the end of the stream, then we have to skip threads that
-     * didn't output a frame/error, because we don't want to accidentally signal
-     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
-     */
+        /* do not return any frames until all threads have something to do */
+        if (fctx->next_decoding != fctx->next_finished &&
+            !avctx->internal->draining)
+            continue;
 
-    do {
-        p = &fctx->threads[finished++];
+        p                   = &fctx->threads[fctx->next_finished];
+        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
 
         if (atomic_load(&p->state) != STATE_INPUT_READY) {
             pthread_mutex_lock(&p->progress_mutex);
@@ -598,35 +654,26 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
             pthread_mutex_unlock(&p->progress_mutex);
         }
 
-        av_frame_move_ref(picture, p->frame);
-        *got_picture_ptr = p->got_frame;
-        picture->pkt_dts = p->avpkt->dts;
-        err = p->result;
-
-        /*
-         * A later call with avkpt->size == 0 may loop over all threads,
-         * including this one, searching for a frame/error to return before being
-         * stopped by the "finished != fctx->next_finished" condition.
-         * Make sure we don't mistakenly return the same frame/error again.
-         */
-        p->got_frame = 0;
-        p->result = 0;
-
-        if (finished >= avctx->thread_count) finished = 0;
-    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
+        fctx->result = p->result;
+        p->result    = 0;
 
-    update_context_from_thread(avctx, p->avctx, 1);
-
-    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+        if (p->df.nb_f)
+            FFSWAP(DecodedFrames, fctx->df, p->df);
+    }
 
-    fctx->next_finished = finished;
+    /* a thread may return multiple frames AND an error
+     * we first return all the frames, then the error */
+    if (fctx->df.nb_f) {
+        decoded_frames_pop(&fctx->df, frame);
+        ret = 0;
+    } else {
+        ret = fctx->result;
+        fctx->result = 0;
+    }
 
-    /* return the size of the consumed packet if no error occurred */
-    if (err >= 0)
-        err = avpkt->size;
 finish:
     async_lock(fctx);
-    return err;
+    return ret;
 }
 
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
@@ -726,7 +773,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
-        p->got_frame = 0;
     }
 
     async_lock(fctx);
@@ -784,11 +830,12 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
 
             av_buffer_unref(&ctx->internal->pool);
             av_packet_free(&ctx->internal->last_pkt_props);
+            av_packet_free(&ctx->internal->in_pkt);
             av_freep(&ctx->internal);
             av_buffer_unref(&ctx->hw_frames_ctx);
         }
 
-        av_frame_free(&p->frame);
+        decoded_frames_free(&p->df);
 
         ff_pthread_free(p, per_thread_offsets);
         av_packet_free(&p->avpkt);
@@ -796,6 +843,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
         av_freep(&p->avctx);
     }
 
+    decoded_frames_free(&fctx->df);
+    av_packet_free(&fctx->next_pkt);
+
     av_freep(&fctx->threads);
     ff_pthread_free(fctx, thread_ctx_offsets);
 
@@ -854,13 +904,17 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (err < 0)
         return err;
 
-    if (!(p->frame = av_frame_alloc()) ||
-        !(p->avpkt = av_packet_alloc()))
+    if (!(p->avpkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
 
+    copy->internal->is_frame_mt = 1;
     if (!first)
         copy->internal->is_copy = 1;
 
+    copy->internal->in_pkt = av_packet_alloc();
+    if (!copy->internal->in_pkt)
+        return AVERROR(ENOMEM);
+
     copy->internal->last_pkt_props = av_packet_alloc();
     if (!copy->internal->last_pkt_props)
         return AVERROR(ENOMEM);
@@ -920,6 +974,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         return err;
     }
 
+    fctx->next_pkt = av_packet_alloc();
+    if (!fctx->next_pkt)
+        return AVERROR(ENOMEM);
+
     fctx->async_lock = 1;
     fctx->delaying = 1;
 
@@ -964,17 +1022,28 @@ void ff_thread_flush(AVCodecContext *avctx)
     fctx->next_decoding = fctx->next_finished = 0;
     fctx->delaying = 1;
     fctx->prev_thread = NULL;
+
+    decoded_frames_flush(&fctx->df);
+
     for (i = 0; i < avctx->thread_count; i++) {
         PerThreadContext *p = &fctx->threads[i];
-        // Make sure decode flush calls with size=0 won't return old frames
-        p->got_frame = 0;
-        av_frame_unref(p->frame);
-        p->result = 0;
+
+        decoded_frames_flush(&p->df);
 
 #if FF_API_THREAD_SAFE_CALLBACKS
         release_delayed_buffers(p);
 #endif
 
+        av_packet_unref(p->avctx->internal->last_pkt_props);
+        av_packet_unref(p->avctx->internal->in_pkt);
+
+        p->avctx->pts_correction_last_pts =
+        p->avctx->pts_correction_last_dts = INT64_MIN;
+
+        p->avctx->internal->draining = 0;
+        p->avctx->internal->draining_done = 0;
+        p->avctx->internal->nb_draining_errors = 0;
+
         if (ffcodec(avctx->codec)->flush)
             ffcodec(avctx->codec)->flush(p->avctx);
     }
@@ -1193,3 +1262,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
     f->owner[0] = f->owner[1] = NULL;
     ff_thread_release_buffer(avctx, f->f);
 }
+
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (p->avpkt->buf) {
+        av_packet_move_ref(pkt, p->avpkt);
+        return 0;
+    }
+
+    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+}
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index d5673f25ea..7ae69990fb 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -40,17 +40,12 @@
 void ff_thread_flush(AVCodecContext *avctx);
 
 /**
- * Submit a new frame to a decoding thread.
- * Returns the next available frame in picture. *got_picture_ptr
- * will be 0 if none is available.
- * The return value on success is the size of the consumed packet for
- * compatibility with FFCodec.decode. This means the decoder
- * has to consume the full packet.
+ * Submit available packets for decoding to worker threads, return a
+ * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
  *
- * Parameters are the same as FFCodec.decode.
+ * Parameters are the same as FFCodec.receive_frame.
  */
-int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
-                           int *got_picture_ptr, AVPacket *avpkt);
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
 /**
  * If the codec defines update_thread_context(), call this
@@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
  */
 void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
 
+/**
+ * Get a packet for decoding. This gets invoked by the worker threads.
+ */
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_thread_init(AVCodecContext *s);
 int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
         int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH v5 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-12 21:42         ` [FFmpeg-devel] [PATCH v5 1/2] " Timo Rothenpieler
@ 2022-12-12 21:42           ` Timo Rothenpieler
  2022-12-13 18:17           ` [FFmpeg-devel] [PATCH v5 1/2] lavc: convert frame threading to the receive_frame() pattern Michael Niedermayer
  1 sibling, 0 replies; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-12 21:42 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Timo Rothenpieler

In my tests, this lead to a notable speed increase with the amount
of threads used. Decoding a 720p sample gave the following results:

1 Thread: 1428 FPS
2 Threads: 2501 FPS
8 Threads: 7575 FPS
Automatic: 11326 FPS (On a 16 Core/32 Threads system)
---
 libavcodec/jpeglsdec.c |  2 +-
 libavcodec/mjpegdec.c  | 11 ++++++-----
 libavcodec/sp5xdec.c   |  4 ++--
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index ec163b8964..6e75c9b406 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -559,6 +559,6 @@ const FFCodec ff_jpegls_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(ff_mjpeg_decode_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index f33911e1a8..41d3f36940 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -54,6 +54,7 @@
 #include "exif.h"
 #include "bytestream.h"
 #include "tiff_common.h"
+#include "thread.h"
 
 
 static int init_default_huffman_tables(MJpegDecodeContext *s)
@@ -712,7 +713,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
                 s->avctx->pix_fmt,
                 AV_PIX_FMT_NONE,
             };
-            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
+            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx, pix_fmts);
             if (s->hwaccel_pix_fmt < 0)
                 return AVERROR(EINVAL);
 
@@ -728,7 +729,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         }
 
         av_frame_unref(s->picture_ptr);
-        if (ff_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
+        if (ff_thread_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
             return -1;
         s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
         s->picture_ptr->key_frame = 1;
@@ -2954,7 +2955,7 @@ const FFCodec ff_mjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(ff_mjpeg_decode_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .p.priv_class   = &mjpegdec_class,
     .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
@@ -2983,7 +2984,7 @@ const FFCodec ff_thp_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(ff_mjpeg_decode_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
@@ -3062,7 +3063,7 @@ const FFCodec ff_smvjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(smvjpeg_receive_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
                       FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index dfed725500..af1b6400e1 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -103,7 +103,7 @@ const FFCodec ff_sp5x_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(sp5x_decode_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
@@ -119,7 +119,7 @@ const FFCodec ff_amv_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(sp5x_decode_frame),
     .p.max_lowres   = 3,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
 #endif
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v5 1/2] lavc: convert frame threading to the receive_frame() pattern
  2022-12-12 21:42         ` [FFmpeg-devel] [PATCH v5 1/2] " Timo Rothenpieler
  2022-12-12 21:42           ` [FFmpeg-devel] [PATCH v5 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
@ 2022-12-13 18:17           ` Michael Niedermayer
  2022-12-14  1:50             ` [FFmpeg-devel] [PATCH v6] " Timo Rothenpieler
  1 sibling, 1 reply; 30+ messages in thread
From: Michael Niedermayer @ 2022-12-13 18:17 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1948 bytes --]

On Mon, Dec 12, 2022 at 10:42:09PM +0100, Timo Rothenpieler wrote:
> From: Anton Khirnov <anton@khirnov.net>
> 
> Reorganize the code such that the frame threading code does not call the
> decoders directly, but instead calls back into the generic decoding
> code. This avoids duplicating the logic that wraps the decoder
> invocation and will be useful in the following commits.
> ---
>  libavcodec/decode.c        |  62 ++++++---
>  libavcodec/decode.h        |   7 +
>  libavcodec/internal.h      |   7 +
>  libavcodec/pthread_frame.c | 265 ++++++++++++++++++++++++-------------
>  libavcodec/thread.h        |  18 +--
>  5 files changed, 236 insertions(+), 123 deletions(-)

this breaks some files 
but seems not 100% reproduceable

this is what i see after the patch when it fails:
[ogg @ 0x56170f3d67c0] Broken file, keyframe not correctly marked.peed= 242x    
Error while decoding stream #0:1: Operation not permitted
    Last message repeated 128784 times
Error while decoding stream #0:1: Operation not permittedate=N/A speed=9.94x    
    Last message repeated 118013 times
Error while decoding stream #0:1: Operation not permittedate=N/A speed=4.98x    
    Last message repeated 131881 times
Error while decoding stream #0:1: Operation not permittedate=N/A speed=3.32x    
    Last message repeated 131490 times
Error while decoding stream #0:1: Operation not permittedate=N/A speed=2.49x    
    Last message repeated 26968 times
frame=  106 fps= 50 q=-0.0 Lsize=N/A time=00:00:04.98 bitrate=N/A speed=2.37x    
video:50kB audio:215kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
Exiting normally, received signal 2.

i will mail you the file privatly

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

it is not once nor twice but times without number that the same ideas make
their appearance in the world. -- Aristotle

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH v6] lavc: convert frame threading to the receive_frame() pattern
  2022-12-13 18:17           ` [FFmpeg-devel] [PATCH v5 1/2] lavc: convert frame threading to the receive_frame() pattern Michael Niedermayer
@ 2022-12-14  1:50             ` Timo Rothenpieler
  2022-12-15 22:10               ` Michael Niedermayer
                                 ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-14  1:50 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Anton Khirnov

From: Anton Khirnov <anton@khirnov.net>

Reorganize the code such that the frame threading code does not call the
decoders directly, but instead calls back into the generic decoding
code. This avoids duplicating the logic that wraps the decoder
invocation and will be useful in the following commits.
---
 libavcodec/decode.c        |  62 ++++++---
 libavcodec/decode.h        |   7 +
 libavcodec/internal.h      |   7 +
 libavcodec/pthread_frame.c | 279 ++++++++++++++++++++++++-------------
 libavcodec/thread.h        |  18 +--
 5 files changed, 247 insertions(+), 126 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 3e5be501b9..5d2b484b66 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -180,6 +180,11 @@ fail:
     return ret;
 }
 
+#if !HAVE_THREADS
+#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
+#define ff_thread_receive_frame(avctx, frame) (AVERROR_BUG)
+#endif
+
 int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -188,7 +193,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (avci->draining)
         return AVERROR_EOF;
 
-    ret = av_bsf_receive_packet(avci->bsf, pkt);
+    /* If we are a worker thread, get the next packet from the threading
+     * context. Otherwise we are the main (user-facing) context, so we get the
+     * next packet from the input filterchain.
+     */
+    if (avctx->internal->is_frame_mt)
+        ret = ff_thread_get_packet(avctx, pkt);
+    else
+        ret = av_bsf_receive_packet(avci->bsf, pkt);
     if (ret == AVERROR_EOF)
         avci->draining = 1;
     if (ret < 0)
@@ -273,30 +285,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_EOF;
 
     if (!pkt->data &&
-        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
-          avctx->active_thread_type & FF_THREAD_FRAME))
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
         return AVERROR_EOF;
 
     got_frame = 0;
 
-    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
-        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
-    } else {
-        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
-
-        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
-            frame->pkt_dts = pkt->dts;
-        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
-            if(!avctx->has_b_frames)
-                frame->pkt_pos = pkt->pos;
-            //FIXME these should be under if(!avctx->has_b_frames)
-            /* get_buffer is supposed to set frame parameters */
-            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
-                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
-                if (!frame->width)                    frame->width               = avctx->width;
-                if (!frame->height)                   frame->height              = avctx->height;
-                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
-            }
+    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
+
+    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+        frame->pkt_dts = pkt->dts;
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if(!avctx->has_b_frames)
+            frame->pkt_pos = pkt->pos;
+        //FIXME these should be under if(!avctx->has_b_frames)
+        /* get_buffer is supposed to set frame parameters */
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+            if (!frame->width)                    frame->width               = avctx->width;
+            if (!frame->height)                   frame->height              = avctx->height;
+            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
         }
     }
     emms_c();
@@ -546,7 +553,7 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVCodecInternal *avci = avctx->internal;
     const FFCodec *const codec = ffcodec(avctx->codec);
@@ -569,6 +576,17 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
         return ok;
     }
 
+    return ret;
+}
+
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret;
+    if (avctx->active_thread_type & FF_THREAD_FRAME)
+        ret = ff_thread_receive_frame(avctx, frame);
+    else
+        ret = ff_decode_receive_frame_internal(avctx, frame);
+
     if (!ret) {
         frame->best_effort_timestamp = guess_correct_pts(avctx,
                                                          frame->pts,
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 906122b4a7..7ba8e3a332 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
  */
 int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
+/**
+ * Do the actual decoding and obtain a decoded frame from the decoder, if
+ * available. When frame threading is used, this is invoked by the worker
+ * threads, otherwise by the top layer directly.
+ */
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
+
 /**
  * Called by decoders to get the next packet for decoding.
  *
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index a283c52e01..c87036efc4 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
      */
     int is_copy;
 
+    /**
+     * This field is set to 1 when frame threading is being used and the parent
+     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
+     * one of those actually doing the decoding), 0 otherwise.
+     */
+    int is_frame_mt;
+
     /**
      * An audio frame with less than required samples has been submitted (and
      * potentially padded with silence). Reject all subsequent frames.
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 62a0b18a8a..d8182cb4b8 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -46,6 +46,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/fifo.h"
 #include "libavutil/thread.h"
 
 enum {
@@ -73,6 +74,12 @@ enum {
     INITIALIZED,    ///< Thread has been properly set up
 };
 
+typedef struct DecodedFrames {
+    AVFrame  **f;
+    size_t  nb_f;
+    size_t  nb_f_allocated;
+} DecodedFrames;
+
 /**
  * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
  */
@@ -93,8 +100,10 @@ typedef struct PerThreadContext {
 
     AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
 
-    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
-    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    /**
+     * Decoded frames from a single decode iteration.
+     */
+    DecodedFrames df;
     int     result;                 ///< The result of the last codec decode/encode() call.
 
     atomic_int state;
@@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
     pthread_cond_t async_cond;
     int async_lock;
 
+    DecodedFrames df;
+    int result;
+
+    /**
+     * Packet to be submitted to the next thread for decoding.
+     */
+    AVPacket *next_pkt;
+
     int next_decoding;             ///< The next context to submit a packet to.
     int next_finished;             ///< The next context to return output from.
 
@@ -190,6 +207,51 @@ static void thread_set_name(PerThreadContext *p)
     ff_thread_setname(name);
 }
 
+// get a free frame to decode into
+static AVFrame *decoded_frames_get_free(DecodedFrames *df)
+{
+    if (df->nb_f == df->nb_f_allocated) {
+        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
+                                         sizeof(*df->f));
+        if (!tmp)
+            return NULL;
+        df->f = tmp;
+
+        df->f[df->nb_f] = av_frame_alloc();
+        if (!df->f[df->nb_f])
+            return NULL;
+
+        df->nb_f_allocated++;
+    }
+
+    av_frame_unref(df->f[df->nb_f]);
+    return df->f[df->nb_f];
+}
+
+static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
+{
+    AVFrame *tmp_frame = df->f[0];
+    av_frame_move_ref(dst, tmp_frame);
+    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
+    df->f[--df->nb_f] = tmp_frame;
+}
+
+static void decoded_frames_flush(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f; i++)
+        av_frame_unref(df->f[i]);
+    df->nb_f = 0;
+}
+
+static void decoded_frames_free(DecodedFrames *df)
+{
+    for (int i = 0; i < df->nb_f_allocated; i++)
+        av_frame_free(&df->f[i]);
+    av_freep(&df->f);
+    df->nb_f           = 0;
+    df->nb_f_allocated = 0;
+}
+
 /**
  * Codec worker thread.
  *
@@ -202,6 +264,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
     PerThreadContext *p = arg;
     AVCodecContext *avctx = p->avctx;
     const FFCodec *codec = ffcodec(avctx->codec);
+    int ret;
 
     thread_set_name(p);
 
@@ -236,16 +299,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
             p->hwaccel_serializing = 1;
         }
 
-        av_frame_unref(p->frame);
-        p->got_frame = 0;
-        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
+        ret = 0;
+        while (ret >= 0) {
+            AVFrame *frame;
 
-        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
-            ff_thread_release_buffer(avctx, p->frame);
+            /* get the frame which will store the output */
+            frame = decoded_frames_get_free(&p->df);
+            if (!frame) {
+                p->result = AVERROR(ENOMEM);
+                goto alloc_fail;
+            }
+
+            /* do the actual decoding */
+            ret = ff_decode_receive_frame_internal(avctx, frame);
+            if (ret == 0)
+                p->df.nb_f++;
+            else if (ret < 0 && frame->buf[0])
+                ff_thread_release_buffer(avctx, frame);
+
+            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
+        }
 
         if (atomic_load(&p->state) == STATE_SETTING_UP)
             ff_thread_finish_setup(avctx);
 
+alloc_fail:
         if (p->hwaccel_serializing) {
             /* wipe hwaccel state to avoid stale pointers lying around;
              * the state was transferred to FrameThreadContext in
@@ -441,23 +519,25 @@ static void release_delayed_buffers(PerThreadContext *p)
 #endif
 
 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-                         AVPacket *avpkt)
+                         AVPacket *in_pkt)
 {
     FrameThreadContext *fctx = p->parent;
     PerThreadContext *prev_thread = fctx->prev_thread;
-    const AVCodec *codec = p->avctx->codec;
-    int ret;
-
-    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
-        return 0;
+    int err;
 
     pthread_mutex_lock(&p->mutex);
 
-    ret = update_context_from_user(p->avctx, user_avctx);
-    if (ret) {
+    av_packet_unref(p->avpkt);
+    av_packet_move_ref(p->avpkt, in_pkt);
+    if (!p->avpkt->size)
+        p->avctx->internal->draining = 1;
+
+    err = update_context_from_user(p->avctx, user_avctx);
+    if (err < 0) {
         pthread_mutex_unlock(&p->mutex);
-        return ret;
+        return err;
     }
+
     atomic_store_explicit(&p->debug_threads,
                           (p->avctx->debug & FF_DEBUG_THREADS) != 0,
                           memory_order_relaxed);
@@ -467,7 +547,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
 #endif
 
     if (prev_thread) {
-        int err;
         if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
             pthread_mutex_lock(&prev_thread->progress_mutex);
             while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
@@ -475,10 +554,16 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
             pthread_mutex_unlock(&prev_thread->progress_mutex);
         }
 
-        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
-        if (err) {
-            pthread_mutex_unlock(&p->mutex);
-            return err;
+        /* codecs without delay might not be prepared to be called repeatedly here during
+         * flushing (vp3/theora), and also don't need to be, since from this point on, they
+         * will always return EOF anyway */
+        if (!p->avctx->internal->draining || (p->avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+        {
+            err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
+            if (err) {
+                pthread_mutex_unlock(&p->mutex);
+                return err;
+            }
         }
     }
 
@@ -488,14 +573,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
     FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
     FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
 
-    av_packet_unref(p->avpkt);
-    ret = av_packet_ref(p->avpkt, avpkt);
-    if (ret < 0) {
-        pthread_mutex_unlock(&p->mutex);
-        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
-        return ret;
-    }
-
     atomic_store(&p->state, STATE_SETTING_UP);
     pthread_cond_signal(&p->input_cond);
     pthread_mutex_unlock(&p->mutex);
@@ -539,57 +616,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
     fctx->prev_thread = p;
-    fctx->next_decoding++;
+    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
 
     return 0;
 }
 
-int ff_thread_decode_frame(AVCodecContext *avctx,
-                           AVFrame *picture, int *got_picture_ptr,
-                           AVPacket *avpkt)
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     FrameThreadContext *fctx = avctx->internal->thread_ctx;
-    int finished = fctx->next_finished;
-    PerThreadContext *p;
-    int err;
+    int ret = 0;
 
     /* release the async lock, permitting blocked hwaccel threads to
      * go forward while we are in this function */
     async_unlock(fctx);
 
-    /*
-     * Submit a packet to the next decoding thread.
-     */
+    /* submit packets to threads while there are no buffered results to return */
+    while (!fctx->df.nb_f && !fctx->result) {
+        PerThreadContext *p;
 
-    p = &fctx->threads[fctx->next_decoding];
-    err = submit_packet(p, avctx, avpkt);
-    if (err)
-        goto finish;
-
-    /*
-     * If we're still receiving the initial packets, don't return a frame.
-     */
-
-    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
-        fctx->delaying = 0;
+        /* get a packet to be submitted to the next thread */
+        av_packet_unref(fctx->next_pkt);
+        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
+        if (ret < 0 && ret != AVERROR_EOF)
+            goto finish;
 
-    if (fctx->delaying) {
-        *got_picture_ptr=0;
-        if (avpkt->size) {
-            err = avpkt->size;
+        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
+                            fctx->next_pkt);
+        if (ret < 0)
             goto finish;
-        }
-    }
 
-    /*
-     * Return the next available frame from the oldest thread.
-     * If we're at the end of the stream, then we have to skip threads that
-     * didn't output a frame/error, because we don't want to accidentally signal
-     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
-     */
+        /* do not return any frames until all threads have something to do */
+        if (fctx->next_decoding != fctx->next_finished &&
+            !avctx->internal->draining)
+            continue;
 
-    do {
-        p = &fctx->threads[finished++];
+        p                   = &fctx->threads[fctx->next_finished];
+        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
 
         if (atomic_load(&p->state) != STATE_INPUT_READY) {
             pthread_mutex_lock(&p->progress_mutex);
@@ -598,35 +660,28 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
             pthread_mutex_unlock(&p->progress_mutex);
         }
 
-        av_frame_move_ref(picture, p->frame);
-        *got_picture_ptr = p->got_frame;
-        picture->pkt_dts = p->avpkt->dts;
-        err = p->result;
-
-        /*
-         * A later call with avkpt->size == 0 may loop over all threads,
-         * including this one, searching for a frame/error to return before being
-         * stopped by the "finished != fctx->next_finished" condition.
-         * Make sure we don't mistakenly return the same frame/error again.
-         */
-        p->got_frame = 0;
-        p->result = 0;
-
-        if (finished >= avctx->thread_count) finished = 0;
-    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
+        update_context_from_thread(avctx, p->avctx, 1);
 
-    update_context_from_thread(avctx, p->avctx, 1);
+        fctx->result = p->result;
+        p->result    = 0;
 
-    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+        if (p->df.nb_f)
+            FFSWAP(DecodedFrames, fctx->df, p->df);
+    }
 
-    fctx->next_finished = finished;
+    /* a thread may return multiple frames AND an error
+     * we first return all the frames, then the error */
+    if (fctx->df.nb_f) {
+        decoded_frames_pop(&fctx->df, frame);
+        ret = 0;
+    } else {
+        ret = fctx->result;
+        fctx->result = 0;
+    }
 
-    /* return the size of the consumed packet if no error occurred */
-    if (err >= 0)
-        err = avpkt->size;
 finish:
     async_lock(fctx);
-    return err;
+    return ret;
 }
 
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
@@ -726,7 +781,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
-        p->got_frame = 0;
     }
 
     async_lock(fctx);
@@ -784,11 +838,12 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
 
             av_buffer_unref(&ctx->internal->pool);
             av_packet_free(&ctx->internal->last_pkt_props);
+            av_packet_free(&ctx->internal->in_pkt);
             av_freep(&ctx->internal);
             av_buffer_unref(&ctx->hw_frames_ctx);
         }
 
-        av_frame_free(&p->frame);
+        decoded_frames_free(&p->df);
 
         ff_pthread_free(p, per_thread_offsets);
         av_packet_free(&p->avpkt);
@@ -796,6 +851,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
         av_freep(&p->avctx);
     }
 
+    decoded_frames_free(&fctx->df);
+    av_packet_free(&fctx->next_pkt);
+
     av_freep(&fctx->threads);
     ff_pthread_free(fctx, thread_ctx_offsets);
 
@@ -854,13 +912,17 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (err < 0)
         return err;
 
-    if (!(p->frame = av_frame_alloc()) ||
-        !(p->avpkt = av_packet_alloc()))
+    if (!(p->avpkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
 
+    copy->internal->is_frame_mt = 1;
     if (!first)
         copy->internal->is_copy = 1;
 
+    copy->internal->in_pkt = av_packet_alloc();
+    if (!copy->internal->in_pkt)
+        return AVERROR(ENOMEM);
+
     copy->internal->last_pkt_props = av_packet_alloc();
     if (!copy->internal->last_pkt_props)
         return AVERROR(ENOMEM);
@@ -920,6 +982,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         return err;
     }
 
+    fctx->next_pkt = av_packet_alloc();
+    if (!fctx->next_pkt)
+        return AVERROR(ENOMEM);
+
     fctx->async_lock = 1;
     fctx->delaying = 1;
 
@@ -964,17 +1030,28 @@ void ff_thread_flush(AVCodecContext *avctx)
     fctx->next_decoding = fctx->next_finished = 0;
     fctx->delaying = 1;
     fctx->prev_thread = NULL;
+
+    decoded_frames_flush(&fctx->df);
+
     for (i = 0; i < avctx->thread_count; i++) {
         PerThreadContext *p = &fctx->threads[i];
-        // Make sure decode flush calls with size=0 won't return old frames
-        p->got_frame = 0;
-        av_frame_unref(p->frame);
-        p->result = 0;
+
+        decoded_frames_flush(&p->df);
 
 #if FF_API_THREAD_SAFE_CALLBACKS
         release_delayed_buffers(p);
 #endif
 
+        av_packet_unref(p->avctx->internal->last_pkt_props);
+        av_packet_unref(p->avctx->internal->in_pkt);
+
+        p->avctx->pts_correction_last_pts =
+        p->avctx->pts_correction_last_dts = INT64_MIN;
+
+        p->avctx->internal->draining = 0;
+        p->avctx->internal->draining_done = 0;
+        p->avctx->internal->nb_draining_errors = 0;
+
         if (ffcodec(avctx->codec)->flush)
             ffcodec(avctx->codec)->flush(p->avctx);
     }
@@ -1193,3 +1270,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
     f->owner[0] = f->owner[1] = NULL;
     ff_thread_release_buffer(avctx, f->f);
 }
+
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (p->avpkt->buf) {
+        av_packet_move_ref(pkt, p->avpkt);
+        return 0;
+    }
+
+    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+}
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index d5673f25ea..7ae69990fb 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -40,17 +40,12 @@
 void ff_thread_flush(AVCodecContext *avctx);
 
 /**
- * Submit a new frame to a decoding thread.
- * Returns the next available frame in picture. *got_picture_ptr
- * will be 0 if none is available.
- * The return value on success is the size of the consumed packet for
- * compatibility with FFCodec.decode. This means the decoder
- * has to consume the full packet.
+ * Submit available packets for decoding to worker threads, return a
+ * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
  *
- * Parameters are the same as FFCodec.decode.
+ * Parameters are the same as FFCodec.receive_frame.
  */
-int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
-                           int *got_picture_ptr, AVPacket *avpkt);
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
 /**
  * If the codec defines update_thread_context(), call this
@@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
  */
 void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
 
+/**
+ * Get a packet for decoding. This gets invoked by the worker threads.
+ */
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_thread_init(AVCodecContext *s);
 int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
         int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v6] lavc: convert frame threading to the receive_frame() pattern
  2022-12-14  1:50             ` [FFmpeg-devel] [PATCH v6] " Timo Rothenpieler
@ 2022-12-15 22:10               ` Michael Niedermayer
  2022-12-19 11:37               ` Timo Rothenpieler
  2022-12-20 11:45               ` Andreas Rheinhardt
  2 siblings, 0 replies; 30+ messages in thread
From: Michael Niedermayer @ 2022-12-15 22:10 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 924 bytes --]

On Wed, Dec 14, 2022 at 02:50:59AM +0100, Timo Rothenpieler wrote:
> From: Anton Khirnov <anton@khirnov.net>
> 
> Reorganize the code such that the frame threading code does not call the
> decoders directly, but instead calls back into the generic decoding
> code. This avoids duplicating the logic that wraps the decoder
> invocation and will be useful in the following commits.
> ---
>  libavcodec/decode.c        |  62 ++++++---
>  libavcodec/decode.h        |   7 +
>  libavcodec/internal.h      |   7 +
>  libavcodec/pthread_frame.c | 279 ++++++++++++++++++++++++-------------
>  libavcodec/thread.h        |  18 +--
>  5 files changed, 247 insertions(+), 126 deletions(-)

i didnt review the code changes but
seems working now

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No great genius has ever existed without some touch of madness. -- Aristotle

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v6] lavc: convert frame threading to the receive_frame() pattern
  2022-12-14  1:50             ` [FFmpeg-devel] [PATCH v6] " Timo Rothenpieler
  2022-12-15 22:10               ` Michael Niedermayer
@ 2022-12-19 11:37               ` Timo Rothenpieler
  2022-12-20 11:45               ` Andreas Rheinhardt
  2 siblings, 0 replies; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-19 11:37 UTC (permalink / raw)
  To: ffmpeg-devel

On 14/12/2022 02:50, Timo Rothenpieler wrote:
> From: Anton Khirnov <anton@khirnov.net>
> 
> Reorganize the code such that the frame threading code does not call the
> decoders directly, but instead calls back into the generic decoding
> code. This avoids duplicating the logic that wraps the decoder
> invocation and will be useful in the following commits.

Will apply series in 24h if nobody objects.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v6] lavc: convert frame threading to the receive_frame() pattern
  2022-12-14  1:50             ` [FFmpeg-devel] [PATCH v6] " Timo Rothenpieler
  2022-12-15 22:10               ` Michael Niedermayer
  2022-12-19 11:37               ` Timo Rothenpieler
@ 2022-12-20 11:45               ` Andreas Rheinhardt
  2022-12-20 16:06                 ` Timo Rothenpieler
  2022-12-21 14:22                 ` [FFmpeg-devel] [PATCH v7 1/2] " Timo Rothenpieler
  2 siblings, 2 replies; 30+ messages in thread
From: Andreas Rheinhardt @ 2022-12-20 11:45 UTC (permalink / raw)
  To: ffmpeg-devel

Timo Rothenpieler:
> From: Anton Khirnov <anton@khirnov.net>
> 
> Reorganize the code such that the frame threading code does not call the
> decoders directly, but instead calls back into the generic decoding
> code. This avoids duplicating the logic that wraps the decoder
> invocation and will be useful in the following commits.
> ---
>  libavcodec/decode.c        |  62 ++++++---
>  libavcodec/decode.h        |   7 +
>  libavcodec/internal.h      |   7 +
>  libavcodec/pthread_frame.c | 279 ++++++++++++++++++++++++-------------
>  libavcodec/thread.h        |  18 +--
>  5 files changed, 247 insertions(+), 126 deletions(-)
> 
> diff --git a/libavcodec/decode.c b/libavcodec/decode.c
> index 3e5be501b9..5d2b484b66 100644
> --- a/libavcodec/decode.c
> +++ b/libavcodec/decode.c
> @@ -180,6 +180,11 @@ fail:
>      return ret;
>  }
>  
> +#if !HAVE_THREADS
> +#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
> +#define ff_thread_receive_frame(avctx, frame) (AVERROR_BUG)
> +#endif
> +
>  int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
>  {
>      AVCodecInternal *avci = avctx->internal;
> @@ -188,7 +193,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
>      if (avci->draining)
>          return AVERROR_EOF;
>  
> -    ret = av_bsf_receive_packet(avci->bsf, pkt);
> +    /* If we are a worker thread, get the next packet from the threading
> +     * context. Otherwise we are the main (user-facing) context, so we get the
> +     * next packet from the input filterchain.
> +     */
> +    if (avctx->internal->is_frame_mt)
> +        ret = ff_thread_get_packet(avctx, pkt);
> +    else
> +        ret = av_bsf_receive_packet(avci->bsf, pkt);
>      if (ret == AVERROR_EOF)
>          avci->draining = 1;
>      if (ret < 0)
> @@ -273,30 +285,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
>          return AVERROR_EOF;
>  
>      if (!pkt->data &&
> -        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
> -          avctx->active_thread_type & FF_THREAD_FRAME))
> +        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
>          return AVERROR_EOF;
>  
>      got_frame = 0;
>  
> -    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
> -        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
> -    } else {
> -        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
> -
> -        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
> -            frame->pkt_dts = pkt->dts;
> -        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
> -            if(!avctx->has_b_frames)
> -                frame->pkt_pos = pkt->pos;
> -            //FIXME these should be under if(!avctx->has_b_frames)
> -            /* get_buffer is supposed to set frame parameters */
> -            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
> -                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
> -                if (!frame->width)                    frame->width               = avctx->width;
> -                if (!frame->height)                   frame->height              = avctx->height;
> -                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
> -            }
> +    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
> +
> +    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
> +        frame->pkt_dts = pkt->dts;
> +    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
> +        if(!avctx->has_b_frames)
> +            frame->pkt_pos = pkt->pos;
> +        //FIXME these should be under if(!avctx->has_b_frames)
> +        /* get_buffer is supposed to set frame parameters */
> +        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
> +            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
> +            if (!frame->width)                    frame->width               = avctx->width;
> +            if (!frame->height)                   frame->height              = avctx->height;
> +            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
>          }
>      }
>      emms_c();
> @@ -546,7 +553,7 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
>      return 0;
>  }
>  
> -static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
> +int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
>  {
>      AVCodecInternal *avci = avctx->internal;
>      const FFCodec *const codec = ffcodec(avctx->codec);
> @@ -569,6 +576,17 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
>          return ok;
>      }

You keep detect_colorspace() (the ICC stuff) in the worker threads; this
means that more cmsContexts need to be allocated. Given that the
cmdContexts are not synced between threads, this also might possibly
mean that the output depends upon the number of threads (depending upon
how state based fflcms2 is). Is there a downside to putting this besides
the FrameDecodeData post-processing in the main thread?

>  
> +    return ret;
> +}
> +
> +static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
> +{
> +    int ret;
> +    if (avctx->active_thread_type & FF_THREAD_FRAME)
> +        ret = ff_thread_receive_frame(avctx, frame);
> +    else
> +        ret = ff_decode_receive_frame_internal(avctx, frame);
> +
>      if (!ret) {
>          frame->best_effort_timestamp = guess_correct_pts(avctx,
>                                                           frame->pts,
> diff --git a/libavcodec/decode.h b/libavcodec/decode.h
> index 906122b4a7..7ba8e3a332 100644
> --- a/libavcodec/decode.h
> +++ b/libavcodec/decode.h
> @@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
>   */
>  int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
>  
> +/**
> + * Do the actual decoding and obtain a decoded frame from the decoder, if
> + * available. When frame threading is used, this is invoked by the worker
> + * threads, otherwise by the top layer directly.
> + */
> +int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
> +
>  /**
>   * Called by decoders to get the next packet for decoding.
>   *
> diff --git a/libavcodec/internal.h b/libavcodec/internal.h
> index a283c52e01..c87036efc4 100644
> --- a/libavcodec/internal.h
> +++ b/libavcodec/internal.h
> @@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
>       */
>      int is_copy;
>  
> +    /**
> +     * This field is set to 1 when frame threading is being used and the parent
> +     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
> +     * one of those actually doing the decoding), 0 otherwise.
> +     */
> +    int is_frame_mt;
> +
>      /**
>       * An audio frame with less than required samples has been submitted (and
>       * potentially padded with silence). Reject all subsequent frames.
> diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
> index 62a0b18a8a..d8182cb4b8 100644
> --- a/libavcodec/pthread_frame.c
> +++ b/libavcodec/pthread_frame.c
> @@ -46,6 +46,7 @@
>  #include "libavutil/log.h"
>  #include "libavutil/mem.h"
>  #include "libavutil/opt.h"
> +#include "libavutil/fifo.h"
>  #include "libavutil/thread.h"
>  
>  enum {
> @@ -73,6 +74,12 @@ enum {
>      INITIALIZED,    ///< Thread has been properly set up
>  };
>  
> +typedef struct DecodedFrames {
> +    AVFrame  **f;
> +    size_t  nb_f;
> +    size_t  nb_f_allocated;
> +} DecodedFrames;
> +
>  /**
>   * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
>   */
> @@ -93,8 +100,10 @@ typedef struct PerThreadContext {
>  
>      AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
>  
> -    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
> -    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
> +    /**
> +     * Decoded frames from a single decode iteration.
> +     */
> +    DecodedFrames df;
>      int     result;                 ///< The result of the last codec decode/encode() call.
>  
>      atomic_int state;
> @@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
>      pthread_cond_t async_cond;
>      int async_lock;
>  
> +    DecodedFrames df;
> +    int result;
> +
> +    /**
> +     * Packet to be submitted to the next thread for decoding.
> +     */
> +    AVPacket *next_pkt;
> +
>      int next_decoding;             ///< The next context to submit a packet to.
>      int next_finished;             ///< The next context to return output from.
>  
> @@ -190,6 +207,51 @@ static void thread_set_name(PerThreadContext *p)
>      ff_thread_setname(name);
>  }
>  
> +// get a free frame to decode into
> +static AVFrame *decoded_frames_get_free(DecodedFrames *df)
> +{
> +    if (df->nb_f == df->nb_f_allocated) {
> +        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
> +                                         sizeof(*df->f));
> +        if (!tmp)
> +            return NULL;
> +        df->f = tmp;
> +
> +        df->f[df->nb_f] = av_frame_alloc();
> +        if (!df->f[df->nb_f])
> +            return NULL;
> +
> +        df->nb_f_allocated++;
> +    }
> +
> +    av_frame_unref(df->f[df->nb_f]);

Instead of unrefing here you should ensure that the frames corresponding
to indices nb_f..nb_f_allocated-1 are always blank.

> +    return df->f[df->nb_f];
> +}
> +
> +static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
> +{
> +    AVFrame *tmp_frame = df->f[0];
> +    av_frame_move_ref(dst, tmp_frame);
> +    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
> +    df->f[--df->nb_f] = tmp_frame;
> +}
> +
> +static void decoded_frames_flush(DecodedFrames *df)
> +{
> +    for (int i = 0; i < df->nb_f; i++)

nb_f is a size_t; the iterator or nb_f should be made to match, one way
or another.
Same below.

> +        av_frame_unref(df->f[i]);
> +    df->nb_f = 0;
> +}
> +
> +static void decoded_frames_free(DecodedFrames *df)
> +{
> +    for (int i = 0; i < df->nb_f_allocated; i++)
> +        av_frame_free(&df->f[i]);
> +    av_freep(&df->f);
> +    df->nb_f           = 0;
> +    df->nb_f_allocated = 0;
> +}
> +
>  /**
>   * Codec worker thread.
>   *
> @@ -202,6 +264,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
>      PerThreadContext *p = arg;
>      AVCodecContext *avctx = p->avctx;
>      const FFCodec *codec = ffcodec(avctx->codec);
> +    int ret;
>  
>      thread_set_name(p);
>  
> @@ -236,16 +299,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
>              p->hwaccel_serializing = 1;
>          }
>  
> -        av_frame_unref(p->frame);
> -        p->got_frame = 0;
> -        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
> +        ret = 0;
> +        while (ret >= 0) {
> +            AVFrame *frame;
>  
> -        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
> -            ff_thread_release_buffer(avctx, p->frame);
> +            /* get the frame which will store the output */
> +            frame = decoded_frames_get_free(&p->df);
> +            if (!frame) {
> +                p->result = AVERROR(ENOMEM);
> +                goto alloc_fail;
> +            }
> +
> +            /* do the actual decoding */
> +            ret = ff_decode_receive_frame_internal(avctx, frame);
> +            if (ret == 0)
> +                p->df.nb_f++;
> +            else if (ret < 0 && frame->buf[0])
> +                ff_thread_release_buffer(avctx, frame);
> +
> +            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
> +        }
>  
>          if (atomic_load(&p->state) == STATE_SETTING_UP)
>              ff_thread_finish_setup(avctx);
>  
> +alloc_fail:
>          if (p->hwaccel_serializing) {
>              /* wipe hwaccel state to avoid stale pointers lying around;
>               * the state was transferred to FrameThreadContext in
> @@ -441,23 +519,25 @@ static void release_delayed_buffers(PerThreadContext *p)
>  #endif
>  
>  static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
> -                         AVPacket *avpkt)
> +                         AVPacket *in_pkt)
>  {
>      FrameThreadContext *fctx = p->parent;
>      PerThreadContext *prev_thread = fctx->prev_thread;
> -    const AVCodec *codec = p->avctx->codec;
> -    int ret;
> -
> -    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
> -        return 0;
> +    int err;
>  
>      pthread_mutex_lock(&p->mutex);
>  
> -    ret = update_context_from_user(p->avctx, user_avctx);
> -    if (ret) {
> +    av_packet_unref(p->avpkt);
> +    av_packet_move_ref(p->avpkt, in_pkt);
> +    if (!p->avpkt->size)
> +        p->avctx->internal->draining = 1;
> +
> +    err = update_context_from_user(p->avctx, user_avctx);
> +    if (err < 0) {
>          pthread_mutex_unlock(&p->mutex);
> -        return ret;
> +        return err;
>      }
> +
>      atomic_store_explicit(&p->debug_threads,
>                            (p->avctx->debug & FF_DEBUG_THREADS) != 0,
>                            memory_order_relaxed);
> @@ -467,7 +547,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
>  #endif
>  
>      if (prev_thread) {
> -        int err;
>          if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
>              pthread_mutex_lock(&prev_thread->progress_mutex);
>              while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
> @@ -475,10 +554,16 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
>              pthread_mutex_unlock(&prev_thread->progress_mutex);
>          }
>  
> -        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
> -        if (err) {
> -            pthread_mutex_unlock(&p->mutex);
> -            return err;
> +        /* codecs without delay might not be prepared to be called repeatedly here during
> +         * flushing (vp3/theora), and also don't need to be, since from this point on, they
> +         * will always return EOF anyway */
> +        if (!p->avctx->internal->draining || (p->avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
> +        {
> +            err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
> +            if (err) {
> +                pthread_mutex_unlock(&p->mutex);
> +                return err;
> +            }
>          }
>      }
>  
> @@ -488,14 +573,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
>      FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
>      FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
>  
> -    av_packet_unref(p->avpkt);
> -    ret = av_packet_ref(p->avpkt, avpkt);
> -    if (ret < 0) {
> -        pthread_mutex_unlock(&p->mutex);
> -        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
> -        return ret;
> -    }
> -
>      atomic_store(&p->state, STATE_SETTING_UP);
>      pthread_cond_signal(&p->input_cond);
>      pthread_mutex_unlock(&p->mutex);
> @@ -539,57 +616,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
>  #endif
>  
>      fctx->prev_thread = p;
> -    fctx->next_decoding++;
> +    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
>  
>      return 0;
>  }
>  
> -int ff_thread_decode_frame(AVCodecContext *avctx,
> -                           AVFrame *picture, int *got_picture_ptr,
> -                           AVPacket *avpkt)
> +int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
>  {
>      FrameThreadContext *fctx = avctx->internal->thread_ctx;
> -    int finished = fctx->next_finished;
> -    PerThreadContext *p;
> -    int err;
> +    int ret = 0;
>  
>      /* release the async lock, permitting blocked hwaccel threads to
>       * go forward while we are in this function */
>      async_unlock(fctx);
>  
> -    /*
> -     * Submit a packet to the next decoding thread.
> -     */
> +    /* submit packets to threads while there are no buffered results to return */
> +    while (!fctx->df.nb_f && !fctx->result) {
> +        PerThreadContext *p;
>  
> -    p = &fctx->threads[fctx->next_decoding];
> -    err = submit_packet(p, avctx, avpkt);
> -    if (err)
> -        goto finish;
> -
> -    /*
> -     * If we're still receiving the initial packets, don't return a frame.
> -     */
> -
> -    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
> -        fctx->delaying = 0;
> +        /* get a packet to be submitted to the next thread */
> +        av_packet_unref(fctx->next_pkt);
> +        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
> +        if (ret < 0 && ret != AVERROR_EOF)
> +            goto finish;
>  
> -    if (fctx->delaying) {
> -        *got_picture_ptr=0;
> -        if (avpkt->size) {
> -            err = avpkt->size;
> +        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
> +                            fctx->next_pkt);
> +        if (ret < 0)
>              goto finish;
> -        }
> -    }
>  
> -    /*
> -     * Return the next available frame from the oldest thread.
> -     * If we're at the end of the stream, then we have to skip threads that
> -     * didn't output a frame/error, because we don't want to accidentally signal
> -     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
> -     */
> +        /* do not return any frames until all threads have something to do */
> +        if (fctx->next_decoding != fctx->next_finished &&
> +            !avctx->internal->draining)
> +            continue;
>  
> -    do {
> -        p = &fctx->threads[finished++];
> +        p                   = &fctx->threads[fctx->next_finished];
> +        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
>  
>          if (atomic_load(&p->state) != STATE_INPUT_READY) {
>              pthread_mutex_lock(&p->progress_mutex);
> @@ -598,35 +660,28 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
>              pthread_mutex_unlock(&p->progress_mutex);
>          }
>  
> -        av_frame_move_ref(picture, p->frame);
> -        *got_picture_ptr = p->got_frame;
> -        picture->pkt_dts = p->avpkt->dts;
> -        err = p->result;
> -
> -        /*
> -         * A later call with avkpt->size == 0 may loop over all threads,
> -         * including this one, searching for a frame/error to return before being
> -         * stopped by the "finished != fctx->next_finished" condition.
> -         * Make sure we don't mistakenly return the same frame/error again.
> -         */
> -        p->got_frame = 0;
> -        p->result = 0;
> -
> -        if (finished >= avctx->thread_count) finished = 0;
> -    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
> +        update_context_from_thread(avctx, p->avctx, 1);
>  
> -    update_context_from_thread(avctx, p->avctx, 1);
> +        fctx->result = p->result;
> +        p->result    = 0;
>  
> -    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
> +        if (p->df.nb_f)
> +            FFSWAP(DecodedFrames, fctx->df, p->df);
> +    }
>  
> -    fctx->next_finished = finished;
> +    /* a thread may return multiple frames AND an error
> +     * we first return all the frames, then the error */
> +    if (fctx->df.nb_f) {
> +        decoded_frames_pop(&fctx->df, frame);
> +        ret = 0;
> +    } else {
> +        ret = fctx->result;
> +        fctx->result = 0;
> +    }
>  
> -    /* return the size of the consumed packet if no error occurred */
> -    if (err >= 0)
> -        err = avpkt->size;
>  finish:
>      async_lock(fctx);
> -    return err;
> +    return ret;
>  }
>  
>  void ff_thread_report_progress(ThreadFrame *f, int n, int field)
> @@ -726,7 +781,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
>                  pthread_cond_wait(&p->output_cond, &p->progress_mutex);
>              pthread_mutex_unlock(&p->progress_mutex);
>          }
> -        p->got_frame = 0;
>      }
>  
>      async_lock(fctx);
> @@ -784,11 +838,12 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
>  
>              av_buffer_unref(&ctx->internal->pool);
>              av_packet_free(&ctx->internal->last_pkt_props);
> +            av_packet_free(&ctx->internal->in_pkt);
>              av_freep(&ctx->internal);
>              av_buffer_unref(&ctx->hw_frames_ctx);
>          }
>  
> -        av_frame_free(&p->frame);
> +        decoded_frames_free(&p->df);
>  
>          ff_pthread_free(p, per_thread_offsets);
>          av_packet_free(&p->avpkt);
> @@ -796,6 +851,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
>          av_freep(&p->avctx);
>      }
>  
> +    decoded_frames_free(&fctx->df);
> +    av_packet_free(&fctx->next_pkt);
> +
>      av_freep(&fctx->threads);
>      ff_pthread_free(fctx, thread_ctx_offsets);
>  
> @@ -854,13 +912,17 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
>      if (err < 0)
>          return err;
>  
> -    if (!(p->frame = av_frame_alloc()) ||
> -        !(p->avpkt = av_packet_alloc()))
> +    if (!(p->avpkt = av_packet_alloc()))
>          return AVERROR(ENOMEM);
>  
> +    copy->internal->is_frame_mt = 1;
>      if (!first)
>          copy->internal->is_copy = 1;
>  
> +    copy->internal->in_pkt = av_packet_alloc();
> +    if (!copy->internal->in_pkt)
> +        return AVERROR(ENOMEM);
> +
>      copy->internal->last_pkt_props = av_packet_alloc();
>      if (!copy->internal->last_pkt_props)
>          return AVERROR(ENOMEM);
> @@ -920,6 +982,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
>          return err;
>      }
>  
> +    fctx->next_pkt = av_packet_alloc();
> +    if (!fctx->next_pkt)
> +        return AVERROR(ENOMEM);

Did you test whether cleanup works when this allocation fails? I doubt
it does. You are entering ff_frame_thread_free() here with thread_count
> 0, yet you have not initialized a single worker thread.

> +
>      fctx->async_lock = 1;
>      fctx->delaying = 1;
>  
> @@ -964,17 +1030,28 @@ void ff_thread_flush(AVCodecContext *avctx)
>      fctx->next_decoding = fctx->next_finished = 0;
>      fctx->delaying = 1;
>      fctx->prev_thread = NULL;
> +
> +    decoded_frames_flush(&fctx->df);
> +
>      for (i = 0; i < avctx->thread_count; i++) {
>          PerThreadContext *p = &fctx->threads[i];
> -        // Make sure decode flush calls with size=0 won't return old frames
> -        p->got_frame = 0;
> -        av_frame_unref(p->frame);
> -        p->result = 0;
> +
> +        decoded_frames_flush(&p->df);
>  
>  #if FF_API_THREAD_SAFE_CALLBACKS
>          release_delayed_buffers(p);
>  #endif
>  
> +        av_packet_unref(p->avctx->internal->last_pkt_props);
> +        av_packet_unref(p->avctx->internal->in_pkt);
> +
> +        p->avctx->pts_correction_last_pts =
> +        p->avctx->pts_correction_last_dts = INT64_MIN;
> +
> +        p->avctx->internal->draining = 0;
> +        p->avctx->internal->draining_done = 0;
> +        p->avctx->internal->nb_draining_errors = 0;
> +
>          if (ffcodec(avctx->codec)->flush)
>              ffcodec(avctx->codec)->flush(p->avctx);
>      }
> @@ -1193,3 +1270,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
>      f->owner[0] = f->owner[1] = NULL;
>      ff_thread_release_buffer(avctx, f->f);
>  }
> +
> +int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
> +{
> +    PerThreadContext *p = avctx->internal->thread_ctx;
> +
> +    if (p->avpkt->buf) {
> +        av_packet_move_ref(pkt, p->avpkt);
> +        return 0;
> +    }
> +
> +    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
> +}
> diff --git a/libavcodec/thread.h b/libavcodec/thread.h
> index d5673f25ea..7ae69990fb 100644
> --- a/libavcodec/thread.h
> +++ b/libavcodec/thread.h
> @@ -40,17 +40,12 @@
>  void ff_thread_flush(AVCodecContext *avctx);
>  
>  /**
> - * Submit a new frame to a decoding thread.
> - * Returns the next available frame in picture. *got_picture_ptr
> - * will be 0 if none is available.
> - * The return value on success is the size of the consumed packet for
> - * compatibility with FFCodec.decode. This means the decoder
> - * has to consume the full packet.
> + * Submit available packets for decoding to worker threads, return a
> + * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
>   *
> - * Parameters are the same as FFCodec.decode.
> + * Parameters are the same as FFCodec.receive_frame.
>   */
> -int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
> -                           int *got_picture_ptr, AVPacket *avpkt);
> +int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
>  
>  /**
>   * If the codec defines update_thread_context(), call this
> @@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
>   */
>  void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
>  
> +/**
> + * Get a packet for decoding. This gets invoked by the worker threads.
> + */
> +int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
> +
>  int ff_thread_init(AVCodecContext *s);
>  int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
>          int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),

Generally, I don't see the advantage of this. The only decoder that
would benefit from it is SMVJPEG, yet for all other codecs it will just
involve more allocs+frees.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v6] lavc: convert frame threading to the receive_frame() pattern
  2022-12-20 11:45               ` Andreas Rheinhardt
@ 2022-12-20 16:06                 ` Timo Rothenpieler
  2022-12-20 16:15                   ` James Almer
  2022-12-21 14:22                 ` [FFmpeg-devel] [PATCH v7 1/2] " Timo Rothenpieler
  1 sibling, 1 reply; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-20 16:06 UTC (permalink / raw)
  To: ffmpeg-devel

On 20.12.2022 12:45, Andreas Rheinhardt wrote:
> Timo Rothenpieler:
>> From: Anton Khirnov <anton@khirnov.net>
>>
>> Reorganize the code such that the frame threading code does not call the
>> decoders directly, but instead calls back into the generic decoding
>> code. This avoids duplicating the logic that wraps the decoder
>> invocation and will be useful in the following commits.
>> ---
>>   libavcodec/decode.c        |  62 ++++++---
>>   libavcodec/decode.h        |   7 +
>>   libavcodec/internal.h      |   7 +
>>   libavcodec/pthread_frame.c | 279 ++++++++++++++++++++++++-------------
>>   libavcodec/thread.h        |  18 +--
>>   5 files changed, 247 insertions(+), 126 deletions(-)
>>
>> diff --git a/libavcodec/decode.c b/libavcodec/decode.c
>> index 3e5be501b9..5d2b484b66 100644
>> --- a/libavcodec/decode.c
>> +++ b/libavcodec/decode.c
>> @@ -180,6 +180,11 @@ fail:
>>       return ret;
>>   }
>>   
>> +#if !HAVE_THREADS
>> +#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
>> +#define ff_thread_receive_frame(avctx, frame) (AVERROR_BUG)
>> +#endif
>> +
>>   int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
>>   {
>>       AVCodecInternal *avci = avctx->internal;
>> @@ -188,7 +193,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
>>       if (avci->draining)
>>           return AVERROR_EOF;
>>   
>> -    ret = av_bsf_receive_packet(avci->bsf, pkt);
>> +    /* If we are a worker thread, get the next packet from the threading
>> +     * context. Otherwise we are the main (user-facing) context, so we get the
>> +     * next packet from the input filterchain.
>> +     */
>> +    if (avctx->internal->is_frame_mt)
>> +        ret = ff_thread_get_packet(avctx, pkt);
>> +    else
>> +        ret = av_bsf_receive_packet(avci->bsf, pkt);
>>       if (ret == AVERROR_EOF)
>>           avci->draining = 1;
>>       if (ret < 0)
>> @@ -273,30 +285,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
>>           return AVERROR_EOF;
>>   
>>       if (!pkt->data &&
>> -        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
>> -          avctx->active_thread_type & FF_THREAD_FRAME))
>> +        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
>>           return AVERROR_EOF;
>>   
>>       got_frame = 0;
>>   
>> -    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
>> -        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
>> -    } else {
>> -        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
>> -
>> -        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
>> -            frame->pkt_dts = pkt->dts;
>> -        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
>> -            if(!avctx->has_b_frames)
>> -                frame->pkt_pos = pkt->pos;
>> -            //FIXME these should be under if(!avctx->has_b_frames)
>> -            /* get_buffer is supposed to set frame parameters */
>> -            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
>> -                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
>> -                if (!frame->width)                    frame->width               = avctx->width;
>> -                if (!frame->height)                   frame->height              = avctx->height;
>> -                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
>> -            }
>> +    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
>> +
>> +    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
>> +        frame->pkt_dts = pkt->dts;
>> +    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
>> +        if(!avctx->has_b_frames)
>> +            frame->pkt_pos = pkt->pos;
>> +        //FIXME these should be under if(!avctx->has_b_frames)
>> +        /* get_buffer is supposed to set frame parameters */
>> +        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
>> +            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
>> +            if (!frame->width)                    frame->width               = avctx->width;
>> +            if (!frame->height)                   frame->height              = avctx->height;
>> +            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
>>           }
>>       }
>>       emms_c();
>> @@ -546,7 +553,7 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
>>       return 0;
>>   }
>>   
>> -static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
>> +int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
>>   {
>>       AVCodecInternal *avci = avctx->internal;
>>       const FFCodec *const codec = ffcodec(avctx->codec);
>> @@ -569,6 +576,17 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
>>           return ok;
>>       }
> 
> You keep detect_colorspace() (the ICC stuff) in the worker threads; this
> means that more cmsContexts need to be allocated. Given that the
> cmdContexts are not synced between threads, this also might possibly
> mean that the output depends upon the number of threads (depending upon
> how state based fflcms2 is). Is there a downside to putting this besides
> the FrameDecodeData post-processing in the main thread?

Just to keep as much processing in the threads as possible.
The initial patch had even more stuff inside of the threads (everything, 
including the post processing), which actually broke some stuff.

If that's causing issues for even the colorspace detection, the worker 
thread can just call the callback directly as it used to, and all logic 
and functions here can be kept as it was.

>>   
>> +    return ret;
>> +}
>> +
>> +static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
>> +{
>> +    int ret;
>> +    if (avctx->active_thread_type & FF_THREAD_FRAME)
>> +        ret = ff_thread_receive_frame(avctx, frame);
>> +    else
>> +        ret = ff_decode_receive_frame_internal(avctx, frame);
>> +
>>       if (!ret) {
>>           frame->best_effort_timestamp = guess_correct_pts(avctx,
>>                                                            frame->pts,
>> diff --git a/libavcodec/decode.h b/libavcodec/decode.h
>> index 906122b4a7..7ba8e3a332 100644
>> --- a/libavcodec/decode.h
>> +++ b/libavcodec/decode.h
>> @@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
>>    */
>>   int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
>>   
>> +/**
>> + * Do the actual decoding and obtain a decoded frame from the decoder, if
>> + * available. When frame threading is used, this is invoked by the worker
>> + * threads, otherwise by the top layer directly.
>> + */
>> +int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
>> +
>>   /**
>>    * Called by decoders to get the next packet for decoding.
>>    *
>> diff --git a/libavcodec/internal.h b/libavcodec/internal.h
>> index a283c52e01..c87036efc4 100644
>> --- a/libavcodec/internal.h
>> +++ b/libavcodec/internal.h
>> @@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
>>        */
>>       int is_copy;
>>   
>> +    /**
>> +     * This field is set to 1 when frame threading is being used and the parent
>> +     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
>> +     * one of those actually doing the decoding), 0 otherwise.
>> +     */
>> +    int is_frame_mt;
>> +
>>       /**
>>        * An audio frame with less than required samples has been submitted (and
>>        * potentially padded with silence). Reject all subsequent frames.
>> diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
>> index 62a0b18a8a..d8182cb4b8 100644
>> --- a/libavcodec/pthread_frame.c
>> +++ b/libavcodec/pthread_frame.c
>> @@ -46,6 +46,7 @@
>>   #include "libavutil/log.h"
>>   #include "libavutil/mem.h"
>>   #include "libavutil/opt.h"
>> +#include "libavutil/fifo.h"
>>   #include "libavutil/thread.h"
>>   
>>   enum {
>> @@ -73,6 +74,12 @@ enum {
>>       INITIALIZED,    ///< Thread has been properly set up
>>   };
>>   
>> +typedef struct DecodedFrames {
>> +    AVFrame  **f;
>> +    size_t  nb_f;
>> +    size_t  nb_f_allocated;
>> +} DecodedFrames;
>> +
>>   /**
>>    * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
>>    */
>> @@ -93,8 +100,10 @@ typedef struct PerThreadContext {
>>   
>>       AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
>>   
>> -    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
>> -    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
>> +    /**
>> +     * Decoded frames from a single decode iteration.
>> +     */
>> +    DecodedFrames df;
>>       int     result;                 ///< The result of the last codec decode/encode() call.
>>   
>>       atomic_int state;
>> @@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
>>       pthread_cond_t async_cond;
>>       int async_lock;
>>   
>> +    DecodedFrames df;
>> +    int result;
>> +
>> +    /**
>> +     * Packet to be submitted to the next thread for decoding.
>> +     */
>> +    AVPacket *next_pkt;
>> +
>>       int next_decoding;             ///< The next context to submit a packet to.
>>       int next_finished;             ///< The next context to return output from.
>>   
>> @@ -190,6 +207,51 @@ static void thread_set_name(PerThreadContext *p)
>>       ff_thread_setname(name);
>>   }
>>   
>> +// get a free frame to decode into
>> +static AVFrame *decoded_frames_get_free(DecodedFrames *df)
>> +{
>> +    if (df->nb_f == df->nb_f_allocated) {
>> +        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
>> +                                         sizeof(*df->f));
>> +        if (!tmp)
>> +            return NULL;
>> +        df->f = tmp;
>> +
>> +        df->f[df->nb_f] = av_frame_alloc();
>> +        if (!df->f[df->nb_f])
>> +            return NULL;
>> +
>> +        df->nb_f_allocated++;
>> +    }
>> +
>> +    av_frame_unref(df->f[df->nb_f]);
> 
> Instead of unrefing here you should ensure that the frames corresponding
> to indices nb_f..nb_f_allocated-1 are always blank.
> 
>> +    return df->f[df->nb_f];
>> +}
>> +
>> +static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
>> +{
>> +    AVFrame *tmp_frame = df->f[0];
>> +    av_frame_move_ref(dst, tmp_frame);
>> +    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
>> +    df->f[--df->nb_f] = tmp_frame;
>> +}
>> +
>> +static void decoded_frames_flush(DecodedFrames *df)
>> +{
>> +    for (int i = 0; i < df->nb_f; i++)
> 
> nb_f is a size_t; the iterator or nb_f should be made to match, one way
> or another.
> Same below.
> 
>> +        av_frame_unref(df->f[i]);
>> +    df->nb_f = 0;
>> +}
>> +
>> +static void decoded_frames_free(DecodedFrames *df)
>> +{
>> +    for (int i = 0; i < df->nb_f_allocated; i++)
>> +        av_frame_free(&df->f[i]);
>> +    av_freep(&df->f);
>> +    df->nb_f           = 0;
>> +    df->nb_f_allocated = 0;
>> +}
>> +
>>   /**
>>    * Codec worker thread.
>>    *
>> @@ -202,6 +264,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
>>       PerThreadContext *p = arg;
>>       AVCodecContext *avctx = p->avctx;
>>       const FFCodec *codec = ffcodec(avctx->codec);
>> +    int ret;
>>   
>>       thread_set_name(p);
>>   
>> @@ -236,16 +299,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
>>               p->hwaccel_serializing = 1;
>>           }
>>   
>> -        av_frame_unref(p->frame);
>> -        p->got_frame = 0;
>> -        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
>> +        ret = 0;
>> +        while (ret >= 0) {
>> +            AVFrame *frame;
>>   
>> -        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
>> -            ff_thread_release_buffer(avctx, p->frame);
>> +            /* get the frame which will store the output */
>> +            frame = decoded_frames_get_free(&p->df);
>> +            if (!frame) {
>> +                p->result = AVERROR(ENOMEM);
>> +                goto alloc_fail;
>> +            }
>> +
>> +            /* do the actual decoding */
>> +            ret = ff_decode_receive_frame_internal(avctx, frame);
>> +            if (ret == 0)
>> +                p->df.nb_f++;
>> +            else if (ret < 0 && frame->buf[0])
>> +                ff_thread_release_buffer(avctx, frame);
>> +
>> +            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
>> +        }
>>   
>>           if (atomic_load(&p->state) == STATE_SETTING_UP)
>>               ff_thread_finish_setup(avctx);
>>   
>> +alloc_fail:
>>           if (p->hwaccel_serializing) {
>>               /* wipe hwaccel state to avoid stale pointers lying around;
>>                * the state was transferred to FrameThreadContext in
>> @@ -441,23 +519,25 @@ static void release_delayed_buffers(PerThreadContext *p)
>>   #endif
>>   
>>   static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
>> -                         AVPacket *avpkt)
>> +                         AVPacket *in_pkt)
>>   {
>>       FrameThreadContext *fctx = p->parent;
>>       PerThreadContext *prev_thread = fctx->prev_thread;
>> -    const AVCodec *codec = p->avctx->codec;
>> -    int ret;
>> -
>> -    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
>> -        return 0;
>> +    int err;
>>   
>>       pthread_mutex_lock(&p->mutex);
>>   
>> -    ret = update_context_from_user(p->avctx, user_avctx);
>> -    if (ret) {
>> +    av_packet_unref(p->avpkt);
>> +    av_packet_move_ref(p->avpkt, in_pkt);
>> +    if (!p->avpkt->size)
>> +        p->avctx->internal->draining = 1;
>> +
>> +    err = update_context_from_user(p->avctx, user_avctx);
>> +    if (err < 0) {
>>           pthread_mutex_unlock(&p->mutex);
>> -        return ret;
>> +        return err;
>>       }
>> +
>>       atomic_store_explicit(&p->debug_threads,
>>                             (p->avctx->debug & FF_DEBUG_THREADS) != 0,
>>                             memory_order_relaxed);
>> @@ -467,7 +547,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
>>   #endif
>>   
>>       if (prev_thread) {
>> -        int err;
>>           if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
>>               pthread_mutex_lock(&prev_thread->progress_mutex);
>>               while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
>> @@ -475,10 +554,16 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
>>               pthread_mutex_unlock(&prev_thread->progress_mutex);
>>           }
>>   
>> -        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
>> -        if (err) {
>> -            pthread_mutex_unlock(&p->mutex);
>> -            return err;
>> +        /* codecs without delay might not be prepared to be called repeatedly here during
>> +         * flushing (vp3/theora), and also don't need to be, since from this point on, they
>> +         * will always return EOF anyway */
>> +        if (!p->avctx->internal->draining || (p->avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
>> +        {
>> +            err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
>> +            if (err) {
>> +                pthread_mutex_unlock(&p->mutex);
>> +                return err;
>> +            }
>>           }
>>       }
>>   
>> @@ -488,14 +573,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
>>       FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
>>       FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
>>   
>> -    av_packet_unref(p->avpkt);
>> -    ret = av_packet_ref(p->avpkt, avpkt);
>> -    if (ret < 0) {
>> -        pthread_mutex_unlock(&p->mutex);
>> -        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
>> -        return ret;
>> -    }
>> -
>>       atomic_store(&p->state, STATE_SETTING_UP);
>>       pthread_cond_signal(&p->input_cond);
>>       pthread_mutex_unlock(&p->mutex);
>> @@ -539,57 +616,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
>>   #endif
>>   
>>       fctx->prev_thread = p;
>> -    fctx->next_decoding++;
>> +    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
>>   
>>       return 0;
>>   }
>>   
>> -int ff_thread_decode_frame(AVCodecContext *avctx,
>> -                           AVFrame *picture, int *got_picture_ptr,
>> -                           AVPacket *avpkt)
>> +int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
>>   {
>>       FrameThreadContext *fctx = avctx->internal->thread_ctx;
>> -    int finished = fctx->next_finished;
>> -    PerThreadContext *p;
>> -    int err;
>> +    int ret = 0;
>>   
>>       /* release the async lock, permitting blocked hwaccel threads to
>>        * go forward while we are in this function */
>>       async_unlock(fctx);
>>   
>> -    /*
>> -     * Submit a packet to the next decoding thread.
>> -     */
>> +    /* submit packets to threads while there are no buffered results to return */
>> +    while (!fctx->df.nb_f && !fctx->result) {
>> +        PerThreadContext *p;
>>   
>> -    p = &fctx->threads[fctx->next_decoding];
>> -    err = submit_packet(p, avctx, avpkt);
>> -    if (err)
>> -        goto finish;
>> -
>> -    /*
>> -     * If we're still receiving the initial packets, don't return a frame.
>> -     */
>> -
>> -    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
>> -        fctx->delaying = 0;
>> +        /* get a packet to be submitted to the next thread */
>> +        av_packet_unref(fctx->next_pkt);
>> +        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
>> +        if (ret < 0 && ret != AVERROR_EOF)
>> +            goto finish;
>>   
>> -    if (fctx->delaying) {
>> -        *got_picture_ptr=0;
>> -        if (avpkt->size) {
>> -            err = avpkt->size;
>> +        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
>> +                            fctx->next_pkt);
>> +        if (ret < 0)
>>               goto finish;
>> -        }
>> -    }
>>   
>> -    /*
>> -     * Return the next available frame from the oldest thread.
>> -     * If we're at the end of the stream, then we have to skip threads that
>> -     * didn't output a frame/error, because we don't want to accidentally signal
>> -     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
>> -     */
>> +        /* do not return any frames until all threads have something to do */
>> +        if (fctx->next_decoding != fctx->next_finished &&
>> +            !avctx->internal->draining)
>> +            continue;
>>   
>> -    do {
>> -        p = &fctx->threads[finished++];
>> +        p                   = &fctx->threads[fctx->next_finished];
>> +        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
>>   
>>           if (atomic_load(&p->state) != STATE_INPUT_READY) {
>>               pthread_mutex_lock(&p->progress_mutex);
>> @@ -598,35 +660,28 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
>>               pthread_mutex_unlock(&p->progress_mutex);
>>           }
>>   
>> -        av_frame_move_ref(picture, p->frame);
>> -        *got_picture_ptr = p->got_frame;
>> -        picture->pkt_dts = p->avpkt->dts;
>> -        err = p->result;
>> -
>> -        /*
>> -         * A later call with avkpt->size == 0 may loop over all threads,
>> -         * including this one, searching for a frame/error to return before being
>> -         * stopped by the "finished != fctx->next_finished" condition.
>> -         * Make sure we don't mistakenly return the same frame/error again.
>> -         */
>> -        p->got_frame = 0;
>> -        p->result = 0;
>> -
>> -        if (finished >= avctx->thread_count) finished = 0;
>> -    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
>> +        update_context_from_thread(avctx, p->avctx, 1);
>>   
>> -    update_context_from_thread(avctx, p->avctx, 1);
>> +        fctx->result = p->result;
>> +        p->result    = 0;
>>   
>> -    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
>> +        if (p->df.nb_f)
>> +            FFSWAP(DecodedFrames, fctx->df, p->df);
>> +    }
>>   
>> -    fctx->next_finished = finished;
>> +    /* a thread may return multiple frames AND an error
>> +     * we first return all the frames, then the error */
>> +    if (fctx->df.nb_f) {
>> +        decoded_frames_pop(&fctx->df, frame);
>> +        ret = 0;
>> +    } else {
>> +        ret = fctx->result;
>> +        fctx->result = 0;
>> +    }
>>   
>> -    /* return the size of the consumed packet if no error occurred */
>> -    if (err >= 0)
>> -        err = avpkt->size;
>>   finish:
>>       async_lock(fctx);
>> -    return err;
>> +    return ret;
>>   }
>>   
>>   void ff_thread_report_progress(ThreadFrame *f, int n, int field)
>> @@ -726,7 +781,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
>>                   pthread_cond_wait(&p->output_cond, &p->progress_mutex);
>>               pthread_mutex_unlock(&p->progress_mutex);
>>           }
>> -        p->got_frame = 0;
>>       }
>>   
>>       async_lock(fctx);
>> @@ -784,11 +838,12 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
>>   
>>               av_buffer_unref(&ctx->internal->pool);
>>               av_packet_free(&ctx->internal->last_pkt_props);
>> +            av_packet_free(&ctx->internal->in_pkt);
>>               av_freep(&ctx->internal);
>>               av_buffer_unref(&ctx->hw_frames_ctx);
>>           }
>>   
>> -        av_frame_free(&p->frame);
>> +        decoded_frames_free(&p->df);
>>   
>>           ff_pthread_free(p, per_thread_offsets);
>>           av_packet_free(&p->avpkt);
>> @@ -796,6 +851,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
>>           av_freep(&p->avctx);
>>       }
>>   
>> +    decoded_frames_free(&fctx->df);
>> +    av_packet_free(&fctx->next_pkt);
>> +
>>       av_freep(&fctx->threads);
>>       ff_pthread_free(fctx, thread_ctx_offsets);
>>   
>> @@ -854,13 +912,17 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
>>       if (err < 0)
>>           return err;
>>   
>> -    if (!(p->frame = av_frame_alloc()) ||
>> -        !(p->avpkt = av_packet_alloc()))
>> +    if (!(p->avpkt = av_packet_alloc()))
>>           return AVERROR(ENOMEM);
>>   
>> +    copy->internal->is_frame_mt = 1;
>>       if (!first)
>>           copy->internal->is_copy = 1;
>>   
>> +    copy->internal->in_pkt = av_packet_alloc();
>> +    if (!copy->internal->in_pkt)
>> +        return AVERROR(ENOMEM);
>> +
>>       copy->internal->last_pkt_props = av_packet_alloc();
>>       if (!copy->internal->last_pkt_props)
>>           return AVERROR(ENOMEM);
>> @@ -920,6 +982,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
>>           return err;
>>       }
>>   
>> +    fctx->next_pkt = av_packet_alloc();
>> +    if (!fctx->next_pkt)
>> +        return AVERROR(ENOMEM);
> 
> Did you test whether cleanup works when this allocation fails? I doubt
> it does. You are entering ff_frame_thread_free() here with thread_count
>> 0, yet you have not initialized a single worker thread.
> 
>> +
>>       fctx->async_lock = 1;
>>       fctx->delaying = 1;
>>   
>> @@ -964,17 +1030,28 @@ void ff_thread_flush(AVCodecContext *avctx)
>>       fctx->next_decoding = fctx->next_finished = 0;
>>       fctx->delaying = 1;
>>       fctx->prev_thread = NULL;
>> +
>> +    decoded_frames_flush(&fctx->df);
>> +
>>       for (i = 0; i < avctx->thread_count; i++) {
>>           PerThreadContext *p = &fctx->threads[i];
>> -        // Make sure decode flush calls with size=0 won't return old frames
>> -        p->got_frame = 0;
>> -        av_frame_unref(p->frame);
>> -        p->result = 0;
>> +
>> +        decoded_frames_flush(&p->df);
>>   
>>   #if FF_API_THREAD_SAFE_CALLBACKS
>>           release_delayed_buffers(p);
>>   #endif
>>   
>> +        av_packet_unref(p->avctx->internal->last_pkt_props);
>> +        av_packet_unref(p->avctx->internal->in_pkt);
>> +
>> +        p->avctx->pts_correction_last_pts =
>> +        p->avctx->pts_correction_last_dts = INT64_MIN;
>> +
>> +        p->avctx->internal->draining = 0;
>> +        p->avctx->internal->draining_done = 0;
>> +        p->avctx->internal->nb_draining_errors = 0;
>> +
>>           if (ffcodec(avctx->codec)->flush)
>>               ffcodec(avctx->codec)->flush(p->avctx);
>>       }
>> @@ -1193,3 +1270,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
>>       f->owner[0] = f->owner[1] = NULL;
>>       ff_thread_release_buffer(avctx, f->f);
>>   }
>> +
>> +int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
>> +{
>> +    PerThreadContext *p = avctx->internal->thread_ctx;
>> +
>> +    if (p->avpkt->buf) {
>> +        av_packet_move_ref(pkt, p->avpkt);
>> +        return 0;
>> +    }
>> +
>> +    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
>> +}
>> diff --git a/libavcodec/thread.h b/libavcodec/thread.h
>> index d5673f25ea..7ae69990fb 100644
>> --- a/libavcodec/thread.h
>> +++ b/libavcodec/thread.h
>> @@ -40,17 +40,12 @@
>>   void ff_thread_flush(AVCodecContext *avctx);
>>   
>>   /**
>> - * Submit a new frame to a decoding thread.
>> - * Returns the next available frame in picture. *got_picture_ptr
>> - * will be 0 if none is available.
>> - * The return value on success is the size of the consumed packet for
>> - * compatibility with FFCodec.decode. This means the decoder
>> - * has to consume the full packet.
>> + * Submit available packets for decoding to worker threads, return a
>> + * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
>>    *
>> - * Parameters are the same as FFCodec.decode.
>> + * Parameters are the same as FFCodec.receive_frame.
>>    */
>> -int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
>> -                           int *got_picture_ptr, AVPacket *avpkt);
>> +int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
>>   
>>   /**
>>    * If the codec defines update_thread_context(), call this
>> @@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
>>    */
>>   void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
>>   
>> +/**
>> + * Get a packet for decoding. This gets invoked by the worker threads.
>> + */
>> +int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
>> +
>>   int ff_thread_init(AVCodecContext *s);
>>   int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
>>           int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
> 
> Generally, I don't see the advantage of this. The only decoder that
> would benefit from it is SMVJPEG, yet for all other codecs it will just
> involve more allocs+frees.

Being unable to support newer encoders because only the old (and I 
thought deprecated, but apprently not, given encoders are getting ported 
back to it??) is a pretty bad situation, and could hold back some progress.

So adding the ability for frame threading to work for everything seems 
natural and neccesary to me.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v6] lavc: convert frame threading to the receive_frame() pattern
  2022-12-20 16:06                 ` Timo Rothenpieler
@ 2022-12-20 16:15                   ` James Almer
  0 siblings, 0 replies; 30+ messages in thread
From: James Almer @ 2022-12-20 16:15 UTC (permalink / raw)
  To: ffmpeg-devel

On 12/20/2022 1:06 PM, Timo Rothenpieler wrote:
>>
>> Generally, I don't see the advantage of this. The only decoder that
>> would benefit from it is SMVJPEG, yet for all other codecs it will just
>> involve more allocs+frees.
> 
> Being unable to support newer encoders because only the old (and I 
> thought deprecated, but apprently not, given encoders are getting ported 
> back to it??) is a pretty bad situation, and could hold back some progress.

Decoders. And what was deprecated and removed was the public API, not 
the internal callback.
For 1:1 decoders, or decoders with no delay or buffering, the decoupled 
input/output callback is overkill, so you'll keep seeing new decoders 
added using the old one in the future.
But similarly, new decoders can and will be added using the decoupled IO 
callback, so this change is necessary so any future decoder that intends 
to have frame threading isn't forced to use the old callback when it 
could benefit from the new. An obvious case would be the (hopefully) 
eventual native AV1 decoder.

> 
> So adding the ability for frame threading to work for everything seems 
> natural and neccesary to me.

Yes.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH v7 1/2] lavc: convert frame threading to the receive_frame() pattern
  2022-12-20 11:45               ` Andreas Rheinhardt
  2022-12-20 16:06                 ` Timo Rothenpieler
@ 2022-12-21 14:22                 ` Timo Rothenpieler
  2022-12-21 14:22                   ` [FFmpeg-devel] [PATCH v7 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
  1 sibling, 1 reply; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-21 14:22 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Anton Khirnov

From: Anton Khirnov <anton@khirnov.net>

Reorganize the code such that the frame threading code does not call the
decoders directly, but instead calls back into the generic decoding
code. This avoids duplicating the logic that wraps the decoder
invocation and will be useful in the following commits.
---
 libavcodec/decode.c        |  64 ++++++---
 libavcodec/decode.h        |   7 +
 libavcodec/internal.h      |   7 +
 libavcodec/pthread_frame.c | 280 ++++++++++++++++++++++++-------------
 libavcodec/thread.h        |  18 +--
 5 files changed, 249 insertions(+), 127 deletions(-)

diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index 3e5be501b9..d0b5293750 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -180,6 +180,11 @@ fail:
     return ret;
 }
 
+#if !HAVE_THREADS
+#define ff_thread_get_packet(avctx, pkt) (AVERROR_BUG)
+#define ff_thread_receive_frame(avctx, frame) (AVERROR_BUG)
+#endif
+
 int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
 {
     AVCodecInternal *avci = avctx->internal;
@@ -188,7 +193,14 @@ int ff_decode_get_packet(AVCodecContext *avctx, AVPacket *pkt)
     if (avci->draining)
         return AVERROR_EOF;
 
-    ret = av_bsf_receive_packet(avci->bsf, pkt);
+    /* If we are a worker thread, get the next packet from the threading
+     * context. Otherwise we are the main (user-facing) context, so we get the
+     * next packet from the input filterchain.
+     */
+    if (avctx->internal->is_frame_mt)
+        ret = ff_thread_get_packet(avctx, pkt);
+    else
+        ret = av_bsf_receive_packet(avci->bsf, pkt);
     if (ret == AVERROR_EOF)
         avci->draining = 1;
     if (ret < 0)
@@ -273,30 +285,25 @@ static inline int decode_simple_internal(AVCodecContext *avctx, AVFrame *frame,
         return AVERROR_EOF;
 
     if (!pkt->data &&
-        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY ||
-          avctx->active_thread_type & FF_THREAD_FRAME))
+        !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
         return AVERROR_EOF;
 
     got_frame = 0;
 
-    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME) {
-        ret = ff_thread_decode_frame(avctx, frame, &got_frame, pkt);
-    } else {
-        ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
-
-        if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
-            frame->pkt_dts = pkt->dts;
-        if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
-            if(!avctx->has_b_frames)
-                frame->pkt_pos = pkt->pos;
-            //FIXME these should be under if(!avctx->has_b_frames)
-            /* get_buffer is supposed to set frame parameters */
-            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
-                if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
-                if (!frame->width)                    frame->width               = avctx->width;
-                if (!frame->height)                   frame->height              = avctx->height;
-                if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
-            }
+    ret = codec->cb.decode(avctx, frame, &got_frame, pkt);
+
+    if (!(codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+        frame->pkt_dts = pkt->dts;
+    if (avctx->codec->type == AVMEDIA_TYPE_VIDEO) {
+        if(!avctx->has_b_frames)
+            frame->pkt_pos = pkt->pos;
+        //FIXME these should be under if(!avctx->has_b_frames)
+        /* get_buffer is supposed to set frame parameters */
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+            if (!frame->sample_aspect_ratio.num)  frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+            if (!frame->width)                    frame->width               = avctx->width;
+            if (!frame->height)                   frame->height              = avctx->height;
+            if (frame->format == AV_PIX_FMT_NONE) frame->format              = avctx->pix_fmt;
         }
     }
     emms_c();
@@ -546,11 +553,11 @@ static int decode_simple_receive_frame(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVCodecInternal *avci = avctx->internal;
     const FFCodec *const codec = ffcodec(avctx->codec);
-    int ret, ok;
+    int ret;
 
     av_assert0(!frame->buf[0]);
 
@@ -562,6 +569,17 @@ static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
     if (ret == AVERROR_EOF)
         avci->draining_done = 1;
 
+    return ret;
+}
+
+static int decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret, ok;
+    if (avctx->active_thread_type & FF_THREAD_FRAME)
+        ret = ff_thread_receive_frame(avctx, frame);
+    else
+        ret = ff_decode_receive_frame_internal(avctx, frame);
+
     /* preserve ret */
     ok = detect_colorspace(avctx, frame);
     if (ok < 0) {
diff --git a/libavcodec/decode.h b/libavcodec/decode.h
index 906122b4a7..7ba8e3a332 100644
--- a/libavcodec/decode.h
+++ b/libavcodec/decode.h
@@ -58,6 +58,13 @@ typedef struct FrameDecodeData {
  */
 int ff_decode_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
+/**
+ * Do the actual decoding and obtain a decoded frame from the decoder, if
+ * available. When frame threading is used, this is invoked by the worker
+ * threads, otherwise by the top layer directly.
+ */
+int ff_decode_receive_frame_internal(AVCodecContext *avctx, AVFrame *frame);
+
 /**
  * Called by decoders to get the next packet for decoding.
  *
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index a283c52e01..c87036efc4 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -56,6 +56,13 @@ typedef struct AVCodecInternal {
      */
     int is_copy;
 
+    /**
+     * This field is set to 1 when frame threading is being used and the parent
+     * AVCodecContext of this AVCodecInternal is a worker-thread context (i.e.
+     * one of those actually doing the decoding), 0 otherwise.
+     */
+    int is_frame_mt;
+
     /**
      * An audio frame with less than required samples has been submitted (and
      * potentially padded with silence). Reject all subsequent frames.
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 62a0b18a8a..ecc19ea725 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -46,6 +46,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/fifo.h"
 #include "libavutil/thread.h"
 
 enum {
@@ -73,6 +74,12 @@ enum {
     INITIALIZED,    ///< Thread has been properly set up
 };
 
+typedef struct DecodedFrames {
+    AVFrame  **f;
+    size_t  nb_f;
+    size_t  nb_f_allocated;
+} DecodedFrames;
+
 /**
  * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
  */
@@ -93,8 +100,10 @@ typedef struct PerThreadContext {
 
     AVPacket       *avpkt;          ///< Input packet (for decoding) or output (for encoding).
 
-    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
-    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    /**
+     * Decoded frames from a single decode iteration.
+     */
+    DecodedFrames df;
     int     result;                 ///< The result of the last codec decode/encode() call.
 
     atomic_int state;
@@ -141,6 +150,14 @@ typedef struct FrameThreadContext {
     pthread_cond_t async_cond;
     int async_lock;
 
+    DecodedFrames df;
+    int result;
+
+    /**
+     * Packet to be submitted to the next thread for decoding.
+     */
+    AVPacket *next_pkt;
+
     int next_decoding;             ///< The next context to submit a packet to.
     int next_finished;             ///< The next context to return output from.
 
@@ -190,6 +207,52 @@ static void thread_set_name(PerThreadContext *p)
     ff_thread_setname(name);
 }
 
+// get a free frame to decode into
+static AVFrame *decoded_frames_get_free(DecodedFrames *df)
+{
+    if (df->nb_f == df->nb_f_allocated) {
+        AVFrame **tmp = av_realloc_array(df->f, df->nb_f + 1,
+                                         sizeof(*df->f));
+        if (!tmp)
+            return NULL;
+        df->f = tmp;
+
+        df->f[df->nb_f] = av_frame_alloc();
+        if (!df->f[df->nb_f])
+            return NULL;
+
+        df->nb_f_allocated++;
+    }
+
+    av_assert0(!df->f[df->nb_f]->buf[0]);
+
+    return df->f[df->nb_f];
+}
+
+static void decoded_frames_pop(DecodedFrames *df, AVFrame *dst)
+{
+    AVFrame *tmp_frame = df->f[0];
+    av_frame_move_ref(dst, tmp_frame);
+    memmove(df->f, df->f + 1, (df->nb_f - 1) * sizeof(*df->f));
+    df->f[--df->nb_f] = tmp_frame;
+}
+
+static void decoded_frames_flush(DecodedFrames *df)
+{
+    for (size_t i = 0; i < df->nb_f; i++)
+        av_frame_unref(df->f[i]);
+    df->nb_f = 0;
+}
+
+static void decoded_frames_free(DecodedFrames *df)
+{
+    for (size_t i = 0; i < df->nb_f_allocated; i++)
+        av_frame_free(&df->f[i]);
+    av_freep(&df->f);
+    df->nb_f           = 0;
+    df->nb_f_allocated = 0;
+}
+
 /**
  * Codec worker thread.
  *
@@ -202,6 +265,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
     PerThreadContext *p = arg;
     AVCodecContext *avctx = p->avctx;
     const FFCodec *codec = ffcodec(avctx->codec);
+    int ret;
 
     thread_set_name(p);
 
@@ -236,16 +300,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
             p->hwaccel_serializing = 1;
         }
 
-        av_frame_unref(p->frame);
-        p->got_frame = 0;
-        p->result = codec->cb.decode(avctx, p->frame, &p->got_frame, p->avpkt);
+        ret = 0;
+        while (ret >= 0) {
+            AVFrame *frame;
+
+            /* get the frame which will store the output */
+            frame = decoded_frames_get_free(&p->df);
+            if (!frame) {
+                p->result = AVERROR(ENOMEM);
+                goto alloc_fail;
+            }
+
+            /* do the actual decoding */
+            ret = ff_decode_receive_frame_internal(avctx, frame);
+            if (ret == 0)
+                p->df.nb_f++;
+            else if (ret < 0 && frame->buf[0])
+                ff_thread_release_buffer(avctx, frame);
 
-        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0])
-            ff_thread_release_buffer(avctx, p->frame);
+            p->result = (ret == AVERROR(EAGAIN)) ? 0 : ret;
+        }
 
         if (atomic_load(&p->state) == STATE_SETTING_UP)
             ff_thread_finish_setup(avctx);
 
+alloc_fail:
         if (p->hwaccel_serializing) {
             /* wipe hwaccel state to avoid stale pointers lying around;
              * the state was transferred to FrameThreadContext in
@@ -441,23 +520,25 @@ static void release_delayed_buffers(PerThreadContext *p)
 #endif
 
 static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-                         AVPacket *avpkt)
+                         AVPacket *in_pkt)
 {
     FrameThreadContext *fctx = p->parent;
     PerThreadContext *prev_thread = fctx->prev_thread;
-    const AVCodec *codec = p->avctx->codec;
-    int ret;
-
-    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
-        return 0;
+    int err;
 
     pthread_mutex_lock(&p->mutex);
 
-    ret = update_context_from_user(p->avctx, user_avctx);
-    if (ret) {
+    av_packet_unref(p->avpkt);
+    av_packet_move_ref(p->avpkt, in_pkt);
+    if (!p->avpkt->size)
+        p->avctx->internal->draining = 1;
+
+    err = update_context_from_user(p->avctx, user_avctx);
+    if (err < 0) {
         pthread_mutex_unlock(&p->mutex);
-        return ret;
+        return err;
     }
+
     atomic_store_explicit(&p->debug_threads,
                           (p->avctx->debug & FF_DEBUG_THREADS) != 0,
                           memory_order_relaxed);
@@ -467,7 +548,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
 #endif
 
     if (prev_thread) {
-        int err;
         if (atomic_load(&prev_thread->state) == STATE_SETTING_UP) {
             pthread_mutex_lock(&prev_thread->progress_mutex);
             while (atomic_load(&prev_thread->state) == STATE_SETTING_UP)
@@ -475,10 +555,16 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
             pthread_mutex_unlock(&prev_thread->progress_mutex);
         }
 
-        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
-        if (err) {
-            pthread_mutex_unlock(&p->mutex);
-            return err;
+        /* codecs without delay might not be prepared to be called repeatedly here during
+         * flushing (vp3/theora), and also don't need to be, since from this point on, they
+         * will always return EOF anyway */
+        if (!p->avctx->internal->draining || (p->avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+        {
+            err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
+            if (err) {
+                pthread_mutex_unlock(&p->mutex);
+                return err;
+            }
         }
     }
 
@@ -488,14 +574,6 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
     FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
     FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
 
-    av_packet_unref(p->avpkt);
-    ret = av_packet_ref(p->avpkt, avpkt);
-    if (ret < 0) {
-        pthread_mutex_unlock(&p->mutex);
-        av_log(p->avctx, AV_LOG_ERROR, "av_packet_ref() failed in submit_packet()\n");
-        return ret;
-    }
-
     atomic_store(&p->state, STATE_SETTING_UP);
     pthread_cond_signal(&p->input_cond);
     pthread_mutex_unlock(&p->mutex);
@@ -539,57 +617,42 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
     fctx->prev_thread = p;
-    fctx->next_decoding++;
+    fctx->next_decoding = (fctx->next_decoding + 1) % p->avctx->thread_count;
 
     return 0;
 }
 
-int ff_thread_decode_frame(AVCodecContext *avctx,
-                           AVFrame *picture, int *got_picture_ptr,
-                           AVPacket *avpkt)
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame)
 {
     FrameThreadContext *fctx = avctx->internal->thread_ctx;
-    int finished = fctx->next_finished;
-    PerThreadContext *p;
-    int err;
+    int ret = 0;
 
     /* release the async lock, permitting blocked hwaccel threads to
      * go forward while we are in this function */
     async_unlock(fctx);
 
-    /*
-     * Submit a packet to the next decoding thread.
-     */
-
-    p = &fctx->threads[fctx->next_decoding];
-    err = submit_packet(p, avctx, avpkt);
-    if (err)
-        goto finish;
-
-    /*
-     * If we're still receiving the initial packets, don't return a frame.
-     */
+    /* submit packets to threads while there are no buffered results to return */
+    while (!fctx->df.nb_f && !fctx->result) {
+        PerThreadContext *p;
 
-    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
-        fctx->delaying = 0;
+        /* get a packet to be submitted to the next thread */
+        av_packet_unref(fctx->next_pkt);
+        ret = ff_decode_get_packet(avctx, fctx->next_pkt);
+        if (ret < 0 && ret != AVERROR_EOF)
+            goto finish;
 
-    if (fctx->delaying) {
-        *got_picture_ptr=0;
-        if (avpkt->size) {
-            err = avpkt->size;
+        ret = submit_packet(&fctx->threads[fctx->next_decoding], avctx,
+                            fctx->next_pkt);
+        if (ret < 0)
             goto finish;
-        }
-    }
 
-    /*
-     * Return the next available frame from the oldest thread.
-     * If we're at the end of the stream, then we have to skip threads that
-     * didn't output a frame/error, because we don't want to accidentally signal
-     * EOF (avpkt->size == 0 && *got_picture_ptr == 0 && err >= 0).
-     */
+        /* do not return any frames until all threads have something to do */
+        if (fctx->next_decoding != fctx->next_finished &&
+            !avctx->internal->draining)
+            continue;
 
-    do {
-        p = &fctx->threads[finished++];
+        p                   = &fctx->threads[fctx->next_finished];
+        fctx->next_finished = (fctx->next_finished + 1) % avctx->thread_count;
 
         if (atomic_load(&p->state) != STATE_INPUT_READY) {
             pthread_mutex_lock(&p->progress_mutex);
@@ -598,35 +661,28 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
             pthread_mutex_unlock(&p->progress_mutex);
         }
 
-        av_frame_move_ref(picture, p->frame);
-        *got_picture_ptr = p->got_frame;
-        picture->pkt_dts = p->avpkt->dts;
-        err = p->result;
+        update_context_from_thread(avctx, p->avctx, 1);
 
-        /*
-         * A later call with avkpt->size == 0 may loop over all threads,
-         * including this one, searching for a frame/error to return before being
-         * stopped by the "finished != fctx->next_finished" condition.
-         * Make sure we don't mistakenly return the same frame/error again.
-         */
-        p->got_frame = 0;
-        p->result = 0;
+        fctx->result = p->result;
+        p->result    = 0;
 
-        if (finished >= avctx->thread_count) finished = 0;
-    } while (!avpkt->size && !*got_picture_ptr && err >= 0 && finished != fctx->next_finished);
-
-    update_context_from_thread(avctx, p->avctx, 1);
-
-    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+        if (p->df.nb_f)
+            FFSWAP(DecodedFrames, fctx->df, p->df);
+    }
 
-    fctx->next_finished = finished;
+    /* a thread may return multiple frames AND an error
+     * we first return all the frames, then the error */
+    if (fctx->df.nb_f) {
+        decoded_frames_pop(&fctx->df, frame);
+        ret = 0;
+    } else {
+        ret = fctx->result;
+        fctx->result = 0;
+    }
 
-    /* return the size of the consumed packet if no error occurred */
-    if (err >= 0)
-        err = avpkt->size;
 finish:
     async_lock(fctx);
-    return err;
+    return ret;
 }
 
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
@@ -726,7 +782,6 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
-        p->got_frame = 0;
     }
 
     async_lock(fctx);
@@ -784,11 +839,12 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
 
             av_buffer_unref(&ctx->internal->pool);
             av_packet_free(&ctx->internal->last_pkt_props);
+            av_packet_free(&ctx->internal->in_pkt);
             av_freep(&ctx->internal);
             av_buffer_unref(&ctx->hw_frames_ctx);
         }
 
-        av_frame_free(&p->frame);
+        decoded_frames_free(&p->df);
 
         ff_pthread_free(p, per_thread_offsets);
         av_packet_free(&p->avpkt);
@@ -796,6 +852,9 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
         av_freep(&p->avctx);
     }
 
+    decoded_frames_free(&fctx->df);
+    av_packet_free(&fctx->next_pkt);
+
     av_freep(&fctx->threads);
     ff_pthread_free(fctx, thread_ctx_offsets);
 
@@ -854,13 +913,17 @@ static av_cold int init_thread(PerThreadContext *p, int *threads_to_free,
     if (err < 0)
         return err;
 
-    if (!(p->frame = av_frame_alloc()) ||
-        !(p->avpkt = av_packet_alloc()))
+    if (!(p->avpkt = av_packet_alloc()))
         return AVERROR(ENOMEM);
 
+    copy->internal->is_frame_mt = 1;
     if (!first)
         copy->internal->is_copy = 1;
 
+    copy->internal->in_pkt = av_packet_alloc();
+    if (!copy->internal->in_pkt)
+        return AVERROR(ENOMEM);
+
     copy->internal->last_pkt_props = av_packet_alloc();
     if (!copy->internal->last_pkt_props)
         return AVERROR(ENOMEM);
@@ -920,6 +983,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         return err;
     }
 
+    fctx->next_pkt = av_packet_alloc();
+    if (!fctx->next_pkt)
+        return AVERROR(ENOMEM);
+
     fctx->async_lock = 1;
     fctx->delaying = 1;
 
@@ -964,17 +1031,28 @@ void ff_thread_flush(AVCodecContext *avctx)
     fctx->next_decoding = fctx->next_finished = 0;
     fctx->delaying = 1;
     fctx->prev_thread = NULL;
+
+    decoded_frames_flush(&fctx->df);
+
     for (i = 0; i < avctx->thread_count; i++) {
         PerThreadContext *p = &fctx->threads[i];
-        // Make sure decode flush calls with size=0 won't return old frames
-        p->got_frame = 0;
-        av_frame_unref(p->frame);
-        p->result = 0;
+
+        decoded_frames_flush(&p->df);
 
 #if FF_API_THREAD_SAFE_CALLBACKS
         release_delayed_buffers(p);
 #endif
 
+        av_packet_unref(p->avctx->internal->last_pkt_props);
+        av_packet_unref(p->avctx->internal->in_pkt);
+
+        p->avctx->pts_correction_last_pts =
+        p->avctx->pts_correction_last_dts = INT64_MIN;
+
+        p->avctx->internal->draining = 0;
+        p->avctx->internal->draining_done = 0;
+        p->avctx->internal->nb_draining_errors = 0;
+
         if (ffcodec(avctx->codec)->flush)
             ffcodec(avctx->codec)->flush(p->avctx);
     }
@@ -1193,3 +1271,15 @@ void ff_thread_release_ext_buffer(AVCodecContext *avctx, ThreadFrame *f)
     f->owner[0] = f->owner[1] = NULL;
     ff_thread_release_buffer(avctx, f->f);
 }
+
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (p->avpkt->buf) {
+        av_packet_move_ref(pkt, p->avpkt);
+        return 0;
+    }
+
+    return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+}
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index d5673f25ea..7ae69990fb 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -40,17 +40,12 @@
 void ff_thread_flush(AVCodecContext *avctx);
 
 /**
- * Submit a new frame to a decoding thread.
- * Returns the next available frame in picture. *got_picture_ptr
- * will be 0 if none is available.
- * The return value on success is the size of the consumed packet for
- * compatibility with FFCodec.decode. This means the decoder
- * has to consume the full packet.
+ * Submit available packets for decoding to worker threads, return a
+ * decoded frame if available. Returns AVERROR(EAGAIN) if none is available.
  *
- * Parameters are the same as FFCodec.decode.
+ * Parameters are the same as FFCodec.receive_frame.
  */
-int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
-                           int *got_picture_ptr, AVPacket *avpkt);
+int ff_thread_receive_frame(AVCodecContext *avctx, AVFrame *frame);
 
 /**
  * If the codec defines update_thread_context(), call this
@@ -99,6 +94,11 @@ int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f, int flags);
  */
 void ff_thread_release_buffer(AVCodecContext *avctx, AVFrame *f);
 
+/**
+ * Get a packet for decoding. This gets invoked by the worker threads.
+ */
+int ff_thread_get_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_thread_init(AVCodecContext *s);
 int ff_slice_thread_execute_with_mainfunc(AVCodecContext *avctx,
         int (*action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr),
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH v7 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-21 14:22                 ` [FFmpeg-devel] [PATCH v7 1/2] " Timo Rothenpieler
@ 2022-12-21 14:22                   ` Timo Rothenpieler
  2022-12-21 21:35                     ` Michael Niedermayer
  0 siblings, 1 reply; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-21 14:22 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Timo Rothenpieler

In my tests, this lead to a notable speed increase with the amount
of threads used. Decoding a 720p sample gave the following results:

1 Thread: 1428 FPS
2 Threads: 2501 FPS
8 Threads: 7575 FPS
Automatic: 11326 FPS (On a 16 Core/32 Threads system)
---
 libavcodec/jpeglsdec.c |  2 +-
 libavcodec/mjpegdec.c  | 11 ++++++-----
 libavcodec/sp5xdec.c   |  4 ++--
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index ec163b8964..6e75c9b406 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -559,6 +559,6 @@ const FFCodec ff_jpegls_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(ff_mjpeg_decode_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index f33911e1a8..41d3f36940 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -54,6 +54,7 @@
 #include "exif.h"
 #include "bytestream.h"
 #include "tiff_common.h"
+#include "thread.h"
 
 
 static int init_default_huffman_tables(MJpegDecodeContext *s)
@@ -712,7 +713,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
                 s->avctx->pix_fmt,
                 AV_PIX_FMT_NONE,
             };
-            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
+            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx, pix_fmts);
             if (s->hwaccel_pix_fmt < 0)
                 return AVERROR(EINVAL);
 
@@ -728,7 +729,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         }
 
         av_frame_unref(s->picture_ptr);
-        if (ff_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
+        if (ff_thread_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
             return -1;
         s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
         s->picture_ptr->key_frame = 1;
@@ -2954,7 +2955,7 @@ const FFCodec ff_mjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(ff_mjpeg_decode_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .p.priv_class   = &mjpegdec_class,
     .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
@@ -2983,7 +2984,7 @@ const FFCodec ff_thp_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(ff_mjpeg_decode_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
@@ -3062,7 +3063,7 @@ const FFCodec ff_smvjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(smvjpeg_receive_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
                       FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index dfed725500..af1b6400e1 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -103,7 +103,7 @@ const FFCodec ff_sp5x_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(sp5x_decode_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
@@ -119,7 +119,7 @@ const FFCodec ff_amv_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_DECODE_CB(sp5x_decode_frame),
     .p.max_lowres   = 3,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
 #endif
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH v7 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-21 14:22                   ` [FFmpeg-devel] [PATCH v7 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
@ 2022-12-21 21:35                     ` Michael Niedermayer
  0 siblings, 0 replies; 30+ messages in thread
From: Michael Niedermayer @ 2022-12-21 21:35 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 902 bytes --]

On Wed, Dec 21, 2022 at 03:22:56PM +0100, Timo Rothenpieler wrote:
> In my tests, this lead to a notable speed increase with the amount
> of threads used. Decoding a 720p sample gave the following results:
> 
> 1 Thread: 1428 FPS
> 2 Threads: 2501 FPS
> 8 Threads: 7575 FPS
> Automatic: 11326 FPS (On a 16 Core/32 Threads system)
> ---
>  libavcodec/jpeglsdec.c |  2 +-
>  libavcodec/mjpegdec.c  | 11 ++++++-----
>  libavcodec/sp5xdec.c   |  4 ++--
>  3 files changed, 9 insertions(+), 8 deletions(-)

Changes output for:
./ffmpeg  -an -i ~/tickets/1915/m_noint.avi -an -bitexact -f framecrc  -

will mail you the file as it seems its not on trac

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The smallest minority on earth is the individual. Those who deny 
individual rights cannot claim to be defenders of minorities. - Ayn Rand

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading
  2023-09-07 17:17       ` Paul B Mahol
@ 2023-09-07 17:28         ` Paul B Mahol
  0 siblings, 0 replies; 30+ messages in thread
From: Paul B Mahol @ 2023-09-07 17:28 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Thu, Sep 7, 2023 at 7:17 PM Paul B Mahol <onemda@gmail.com> wrote:

>
>
> On Tue, Dec 6, 2022 at 12:02 AM Timo Rothenpieler <timo@rothenpieler.org>
> wrote:
>
>> On 05.12.2022 15:15, Andreas Rheinhardt wrote:
>> > Timo Rothenpieler:
>> >> In my tests, this lead to a notable speed increase with the amount
>> >> of threads used. Decoding a 720p sample gave the following results:
>> >>
>> >> 1 Thread: 1428 FPS
>> >> 2 Threads: 2501 FPS
>> >> 8 Threads: 7575 FPS
>> >> Automatic: 11326 FPS (On a 16 Core/32 Threads system)
>> >> ---
>> >>   libavcodec/jpeglsdec.c |  2 +-
>> >>   libavcodec/mjpegdec.c  | 13 +++++++------
>> >>   libavcodec/sp5xdec.c   |  4 ++--
>> >>   3 files changed, 10 insertions(+), 9 deletions(-)
>> >>
>>
>
> I made almost same patch, can you apply this one?
> Thanks.
>

Actually, on better look, this one is more complicated, and does not apply
anymore.
So ignore my 'request'.


>
>
>> >> diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
>> >> index 2e6d018ea6..c0642e8e30 100644
>> >> --- a/libavcodec/jpeglsdec.c
>> >> +++ b/libavcodec/jpeglsdec.c
>> >> @@ -559,7 +559,7 @@ const FFCodec ff_jpegls_decoder = {
>> >>       .init           = ff_mjpeg_decode_init,
>> >>       .close          = ff_mjpeg_decode_end,
>> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>> >>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>> >>                         FF_CODEC_CAP_SETS_PKT_DTS,
>> >>   };
>> >> diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
>> >> index 9b7465abe7..d30d722398 100644
>> >> --- a/libavcodec/mjpegdec.c
>> >> +++ b/libavcodec/mjpegdec.c
>> >> @@ -54,6 +54,7 @@
>> >>   #include "exif.h"
>> >>   #include "bytestream.h"
>> >>   #include "tiff_common.h"
>> >> +#include "thread.h"
>> >>
>> >>
>> >>   static int init_default_huffman_tables(MJpegDecodeContext *s)
>> >> @@ -713,7 +714,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
>> >>                   s->avctx->pix_fmt,
>> >>                   AV_PIX_FMT_NONE,
>> >>               };
>> >> -            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
>> >> +            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx,
>> pix_fmts);
>> >>               if (s->hwaccel_pix_fmt < 0)
>> >>                   return AVERROR(EINVAL);
>> >>
>> >> @@ -729,7 +730,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
>> >>           }
>> >>
>> >>           av_frame_unref(s->picture_ptr);
>> >> -        if (ff_get_buffer(s->avctx, s->picture_ptr,
>> AV_GET_BUFFER_FLAG_REF) < 0)
>> >> +        if (ff_thread_get_buffer(s->avctx, s->picture_ptr,
>> AV_GET_BUFFER_FLAG_REF) < 0)
>> >>               return -1;
>> >>           s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
>> >>           s->picture_ptr->key_frame = 1;
>> >> @@ -2388,7 +2389,7 @@ static int mjpeg_get_packet(AVCodecContext
>> *avctx)
>> >>       int ret;
>> >>
>> >>       av_packet_unref(s->pkt);
>> >> -    ret = ff_decode_get_packet(avctx, s->pkt);
>> >> +    ret = ff_thread_decode_get_packet(avctx, s->pkt);
>> >>       if (ret < 0)
>> >>           return ret;
>> >>
>> >> @@ -3020,7 +3021,7 @@ const FFCodec ff_mjpeg_decoder = {
>> >>       .close          = ff_mjpeg_decode_end,
>> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> >>       .flush          = decode_flush,
>> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>> >>       .p.max_lowres   = 3,
>> >>       .p.priv_class   = &mjpegdec_class,
>> >>       .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
>> >> @@ -3050,7 +3051,7 @@ const FFCodec ff_thp_decoder = {
>> >>       .close          = ff_mjpeg_decode_end,
>> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> >>       .flush          = decode_flush,
>> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>> >>       .p.max_lowres   = 3,
>> >>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>> >>                         FF_CODEC_CAP_SETS_PKT_DTS,
>> >> @@ -3068,7 +3069,7 @@ const FFCodec ff_smvjpeg_decoder = {
>> >>       .close          = ff_mjpeg_decode_end,
>> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> >>       .flush          = decode_flush,
>> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>> >>       .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
>> >>                         FF_CODEC_CAP_SETS_PKT_DTS |
>> FF_CODEC_CAP_INIT_CLEANUP,
>> >>   };
>> >> diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
>> >> index 394448c5a9..8b08dc672a 100644
>> >> --- a/libavcodec/sp5xdec.c
>> >> +++ b/libavcodec/sp5xdec.c
>> >> @@ -101,7 +101,7 @@ const FFCodec ff_sp5x_decoder = {
>> >>       .init           = ff_mjpeg_decode_init,
>> >>       .close          = ff_mjpeg_decode_end,
>> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>> >>       .p.max_lowres   = 3,
>> >>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>> >>                         FF_CODEC_CAP_SETS_PKT_DTS,
>> >> @@ -118,7 +118,7 @@ const FFCodec ff_amv_decoder = {
>> >>       .close          = ff_mjpeg_decode_end,
>> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> >>       .p.max_lowres   = 3,
>> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>> >>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>> >>                         FF_CODEC_CAP_SETS_PKT_DTS,
>> >>   };
>> >
>> > Can you test the sample that Michael mentioned here:
>> >
>> https://patchwork.ffmpeg.org/project/ffmpeg/patch/AS8PR01MB7944E105BE990A5D01EF89208FEF9@AS8PR01MB7944.eurprd01.prod.exchangelabs.com/
>> ?
>> > (I never got around to analyzing this, but if I am not mistaken, it
>> > shows that this decoder is not a simple one-in, one-out decoder, so that
>> > making it multithreaded will be more complicated than just adding the
>> > flag and using ff_thread_get_buffer/format.)
>>
>> Assuming this is the right file:
>> https://trac.ffmpeg.org/attachment/ticket/1915/not_interleaved.avi
>>
>> It does not decode for me at all, with or without this patch applied:
>>
>> > [mjpeg @ 0x562e81877c80] No JPEG data found in image
>> > Error while decoding stream #0:0: Invalid data found when processing
>> input
>>
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-05 23:02     ` Timo Rothenpieler
@ 2023-09-07 17:17       ` Paul B Mahol
  2023-09-07 17:28         ` Paul B Mahol
  0 siblings, 1 reply; 30+ messages in thread
From: Paul B Mahol @ 2023-09-07 17:17 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Tue, Dec 6, 2022 at 12:02 AM Timo Rothenpieler <timo@rothenpieler.org>
wrote:

> On 05.12.2022 15:15, Andreas Rheinhardt wrote:
> > Timo Rothenpieler:
> >> In my tests, this lead to a notable speed increase with the amount
> >> of threads used. Decoding a 720p sample gave the following results:
> >>
> >> 1 Thread: 1428 FPS
> >> 2 Threads: 2501 FPS
> >> 8 Threads: 7575 FPS
> >> Automatic: 11326 FPS (On a 16 Core/32 Threads system)
> >> ---
> >>   libavcodec/jpeglsdec.c |  2 +-
> >>   libavcodec/mjpegdec.c  | 13 +++++++------
> >>   libavcodec/sp5xdec.c   |  4 ++--
> >>   3 files changed, 10 insertions(+), 9 deletions(-)
> >>
>

I made almost same patch, can you apply this one?
Thanks.


> >> diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
> >> index 2e6d018ea6..c0642e8e30 100644
> >> --- a/libavcodec/jpeglsdec.c
> >> +++ b/libavcodec/jpeglsdec.c
> >> @@ -559,7 +559,7 @@ const FFCodec ff_jpegls_decoder = {
> >>       .init           = ff_mjpeg_decode_init,
> >>       .close          = ff_mjpeg_decode_end,
> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
> >>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
> >>                         FF_CODEC_CAP_SETS_PKT_DTS,
> >>   };
> >> diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
> >> index 9b7465abe7..d30d722398 100644
> >> --- a/libavcodec/mjpegdec.c
> >> +++ b/libavcodec/mjpegdec.c
> >> @@ -54,6 +54,7 @@
> >>   #include "exif.h"
> >>   #include "bytestream.h"
> >>   #include "tiff_common.h"
> >> +#include "thread.h"
> >>
> >>
> >>   static int init_default_huffman_tables(MJpegDecodeContext *s)
> >> @@ -713,7 +714,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
> >>                   s->avctx->pix_fmt,
> >>                   AV_PIX_FMT_NONE,
> >>               };
> >> -            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
> >> +            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx,
> pix_fmts);
> >>               if (s->hwaccel_pix_fmt < 0)
> >>                   return AVERROR(EINVAL);
> >>
> >> @@ -729,7 +730,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
> >>           }
> >>
> >>           av_frame_unref(s->picture_ptr);
> >> -        if (ff_get_buffer(s->avctx, s->picture_ptr,
> AV_GET_BUFFER_FLAG_REF) < 0)
> >> +        if (ff_thread_get_buffer(s->avctx, s->picture_ptr,
> AV_GET_BUFFER_FLAG_REF) < 0)
> >>               return -1;
> >>           s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
> >>           s->picture_ptr->key_frame = 1;
> >> @@ -2388,7 +2389,7 @@ static int mjpeg_get_packet(AVCodecContext *avctx)
> >>       int ret;
> >>
> >>       av_packet_unref(s->pkt);
> >> -    ret = ff_decode_get_packet(avctx, s->pkt);
> >> +    ret = ff_thread_decode_get_packet(avctx, s->pkt);
> >>       if (ret < 0)
> >>           return ret;
> >>
> >> @@ -3020,7 +3021,7 @@ const FFCodec ff_mjpeg_decoder = {
> >>       .close          = ff_mjpeg_decode_end,
> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
> >>       .flush          = decode_flush,
> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
> >>       .p.max_lowres   = 3,
> >>       .p.priv_class   = &mjpegdec_class,
> >>       .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
> >> @@ -3050,7 +3051,7 @@ const FFCodec ff_thp_decoder = {
> >>       .close          = ff_mjpeg_decode_end,
> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
> >>       .flush          = decode_flush,
> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
> >>       .p.max_lowres   = 3,
> >>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
> >>                         FF_CODEC_CAP_SETS_PKT_DTS,
> >> @@ -3068,7 +3069,7 @@ const FFCodec ff_smvjpeg_decoder = {
> >>       .close          = ff_mjpeg_decode_end,
> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
> >>       .flush          = decode_flush,
> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
> >>       .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
> >>                         FF_CODEC_CAP_SETS_PKT_DTS |
> FF_CODEC_CAP_INIT_CLEANUP,
> >>   };
> >> diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
> >> index 394448c5a9..8b08dc672a 100644
> >> --- a/libavcodec/sp5xdec.c
> >> +++ b/libavcodec/sp5xdec.c
> >> @@ -101,7 +101,7 @@ const FFCodec ff_sp5x_decoder = {
> >>       .init           = ff_mjpeg_decode_init,
> >>       .close          = ff_mjpeg_decode_end,
> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
> >>       .p.max_lowres   = 3,
> >>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
> >>                         FF_CODEC_CAP_SETS_PKT_DTS,
> >> @@ -118,7 +118,7 @@ const FFCodec ff_amv_decoder = {
> >>       .close          = ff_mjpeg_decode_end,
> >>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
> >>       .p.max_lowres   = 3,
> >> -    .p.capabilities = AV_CODEC_CAP_DR1,
> >> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
> >>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
> >>                         FF_CODEC_CAP_SETS_PKT_DTS,
> >>   };
> >
> > Can you test the sample that Michael mentioned here:
> >
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/AS8PR01MB7944E105BE990A5D01EF89208FEF9@AS8PR01MB7944.eurprd01.prod.exchangelabs.com/
> ?
> > (I never got around to analyzing this, but if I am not mistaken, it
> > shows that this decoder is not a simple one-in, one-out decoder, so that
> > making it multithreaded will be more complicated than just adding the
> > flag and using ff_thread_get_buffer/format.)
>
> Assuming this is the right file:
> https://trac.ffmpeg.org/attachment/ticket/1915/not_interleaved.avi
>
> It does not decode for me at all, with or without this patch applied:
>
> > [mjpeg @ 0x562e81877c80] No JPEG data found in image
> > Error while decoding stream #0:0: Invalid data found when processing
> input
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-05 14:15   ` Andreas Rheinhardt
  2022-12-05 14:28     ` Paul B Mahol
@ 2022-12-05 23:02     ` Timo Rothenpieler
  2023-09-07 17:17       ` Paul B Mahol
  1 sibling, 1 reply; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-05 23:02 UTC (permalink / raw)
  To: ffmpeg-devel

On 05.12.2022 15:15, Andreas Rheinhardt wrote:
> Timo Rothenpieler:
>> In my tests, this lead to a notable speed increase with the amount
>> of threads used. Decoding a 720p sample gave the following results:
>>
>> 1 Thread: 1428 FPS
>> 2 Threads: 2501 FPS
>> 8 Threads: 7575 FPS
>> Automatic: 11326 FPS (On a 16 Core/32 Threads system)
>> ---
>>   libavcodec/jpeglsdec.c |  2 +-
>>   libavcodec/mjpegdec.c  | 13 +++++++------
>>   libavcodec/sp5xdec.c   |  4 ++--
>>   3 files changed, 10 insertions(+), 9 deletions(-)
>>
>> diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
>> index 2e6d018ea6..c0642e8e30 100644
>> --- a/libavcodec/jpeglsdec.c
>> +++ b/libavcodec/jpeglsdec.c
>> @@ -559,7 +559,7 @@ const FFCodec ff_jpegls_decoder = {
>>       .init           = ff_mjpeg_decode_init,
>>       .close          = ff_mjpeg_decode_end,
>>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>>                         FF_CODEC_CAP_SETS_PKT_DTS,
>>   };
>> diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
>> index 9b7465abe7..d30d722398 100644
>> --- a/libavcodec/mjpegdec.c
>> +++ b/libavcodec/mjpegdec.c
>> @@ -54,6 +54,7 @@
>>   #include "exif.h"
>>   #include "bytestream.h"
>>   #include "tiff_common.h"
>> +#include "thread.h"
>>   
>>   
>>   static int init_default_huffman_tables(MJpegDecodeContext *s)
>> @@ -713,7 +714,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
>>                   s->avctx->pix_fmt,
>>                   AV_PIX_FMT_NONE,
>>               };
>> -            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
>> +            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx, pix_fmts);
>>               if (s->hwaccel_pix_fmt < 0)
>>                   return AVERROR(EINVAL);
>>   
>> @@ -729,7 +730,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
>>           }
>>   
>>           av_frame_unref(s->picture_ptr);
>> -        if (ff_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
>> +        if (ff_thread_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
>>               return -1;
>>           s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
>>           s->picture_ptr->key_frame = 1;
>> @@ -2388,7 +2389,7 @@ static int mjpeg_get_packet(AVCodecContext *avctx)
>>       int ret;
>>   
>>       av_packet_unref(s->pkt);
>> -    ret = ff_decode_get_packet(avctx, s->pkt);
>> +    ret = ff_thread_decode_get_packet(avctx, s->pkt);
>>       if (ret < 0)
>>           return ret;
>>   
>> @@ -3020,7 +3021,7 @@ const FFCodec ff_mjpeg_decoder = {
>>       .close          = ff_mjpeg_decode_end,
>>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>>       .flush          = decode_flush,
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>       .p.max_lowres   = 3,
>>       .p.priv_class   = &mjpegdec_class,
>>       .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
>> @@ -3050,7 +3051,7 @@ const FFCodec ff_thp_decoder = {
>>       .close          = ff_mjpeg_decode_end,
>>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>>       .flush          = decode_flush,
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>       .p.max_lowres   = 3,
>>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>>                         FF_CODEC_CAP_SETS_PKT_DTS,
>> @@ -3068,7 +3069,7 @@ const FFCodec ff_smvjpeg_decoder = {
>>       .close          = ff_mjpeg_decode_end,
>>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>>       .flush          = decode_flush,
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>       .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
>>                         FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP,
>>   };
>> diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
>> index 394448c5a9..8b08dc672a 100644
>> --- a/libavcodec/sp5xdec.c
>> +++ b/libavcodec/sp5xdec.c
>> @@ -101,7 +101,7 @@ const FFCodec ff_sp5x_decoder = {
>>       .init           = ff_mjpeg_decode_init,
>>       .close          = ff_mjpeg_decode_end,
>>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>       .p.max_lowres   = 3,
>>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>>                         FF_CODEC_CAP_SETS_PKT_DTS,
>> @@ -118,7 +118,7 @@ const FFCodec ff_amv_decoder = {
>>       .close          = ff_mjpeg_decode_end,
>>       FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>>       .p.max_lowres   = 3,
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>       .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>>                         FF_CODEC_CAP_SETS_PKT_DTS,
>>   };
> 
> Can you test the sample that Michael mentioned here:
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/AS8PR01MB7944E105BE990A5D01EF89208FEF9@AS8PR01MB7944.eurprd01.prod.exchangelabs.com/?
> (I never got around to analyzing this, but if I am not mistaken, it
> shows that this decoder is not a simple one-in, one-out decoder, so that
> making it multithreaded will be more complicated than just adding the
> flag and using ff_thread_get_buffer/format.)

Assuming this is the right file:
https://trac.ffmpeg.org/attachment/ticket/1915/not_interleaved.avi

It does not decode for me at all, with or without this patch applied:

> [mjpeg @ 0x562e81877c80] No JPEG data found in image
> Error while decoding stream #0:0: Invalid data found when processing input


_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-05 14:15   ` Andreas Rheinhardt
@ 2022-12-05 14:28     ` Paul B Mahol
  2022-12-05 23:02     ` Timo Rothenpieler
  1 sibling, 0 replies; 30+ messages in thread
From: Paul B Mahol @ 2022-12-05 14:28 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On 12/5/22, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:
> Timo Rothenpieler:
>> In my tests, this lead to a notable speed increase with the amount
>> of threads used. Decoding a 720p sample gave the following results:
>>
>> 1 Thread: 1428 FPS
>> 2 Threads: 2501 FPS
>> 8 Threads: 7575 FPS
>> Automatic: 11326 FPS (On a 16 Core/32 Threads system)
>> ---
>>  libavcodec/jpeglsdec.c |  2 +-
>>  libavcodec/mjpegdec.c  | 13 +++++++------
>>  libavcodec/sp5xdec.c   |  4 ++--
>>  3 files changed, 10 insertions(+), 9 deletions(-)
>>
>> diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
>> index 2e6d018ea6..c0642e8e30 100644
>> --- a/libavcodec/jpeglsdec.c
>> +++ b/libavcodec/jpeglsdec.c
>> @@ -559,7 +559,7 @@ const FFCodec ff_jpegls_decoder = {
>>      .init           = ff_mjpeg_decode_init,
>>      .close          = ff_mjpeg_decode_end,
>>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>      .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>>                        FF_CODEC_CAP_SETS_PKT_DTS,
>>  };
>> diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
>> index 9b7465abe7..d30d722398 100644
>> --- a/libavcodec/mjpegdec.c
>> +++ b/libavcodec/mjpegdec.c
>> @@ -54,6 +54,7 @@
>>  #include "exif.h"
>>  #include "bytestream.h"
>>  #include "tiff_common.h"
>> +#include "thread.h"
>>
>>
>>  static int init_default_huffman_tables(MJpegDecodeContext *s)
>> @@ -713,7 +714,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
>>                  s->avctx->pix_fmt,
>>                  AV_PIX_FMT_NONE,
>>              };
>> -            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
>> +            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx,
>> pix_fmts);
>>              if (s->hwaccel_pix_fmt < 0)
>>                  return AVERROR(EINVAL);
>>
>> @@ -729,7 +730,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
>>          }
>>
>>          av_frame_unref(s->picture_ptr);
>> -        if (ff_get_buffer(s->avctx, s->picture_ptr,
>> AV_GET_BUFFER_FLAG_REF) < 0)
>> +        if (ff_thread_get_buffer(s->avctx, s->picture_ptr,
>> AV_GET_BUFFER_FLAG_REF) < 0)
>>              return -1;
>>          s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
>>          s->picture_ptr->key_frame = 1;
>> @@ -2388,7 +2389,7 @@ static int mjpeg_get_packet(AVCodecContext *avctx)
>>      int ret;
>>
>>      av_packet_unref(s->pkt);
>> -    ret = ff_decode_get_packet(avctx, s->pkt);
>> +    ret = ff_thread_decode_get_packet(avctx, s->pkt);
>>      if (ret < 0)
>>          return ret;
>>
>> @@ -3020,7 +3021,7 @@ const FFCodec ff_mjpeg_decoder = {
>>      .close          = ff_mjpeg_decode_end,
>>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>>      .flush          = decode_flush,
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>      .p.max_lowres   = 3,
>>      .p.priv_class   = &mjpegdec_class,
>>      .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
>> @@ -3050,7 +3051,7 @@ const FFCodec ff_thp_decoder = {
>>      .close          = ff_mjpeg_decode_end,
>>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>>      .flush          = decode_flush,
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>      .p.max_lowres   = 3,
>>      .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>>                        FF_CODEC_CAP_SETS_PKT_DTS,
>> @@ -3068,7 +3069,7 @@ const FFCodec ff_smvjpeg_decoder = {
>>      .close          = ff_mjpeg_decode_end,
>>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>>      .flush          = decode_flush,
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>      .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
>>                        FF_CODEC_CAP_SETS_PKT_DTS |
>> FF_CODEC_CAP_INIT_CLEANUP,
>>  };
>> diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
>> index 394448c5a9..8b08dc672a 100644
>> --- a/libavcodec/sp5xdec.c
>> +++ b/libavcodec/sp5xdec.c
>> @@ -101,7 +101,7 @@ const FFCodec ff_sp5x_decoder = {
>>      .init           = ff_mjpeg_decode_init,
>>      .close          = ff_mjpeg_decode_end,
>>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>      .p.max_lowres   = 3,
>>      .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>>                        FF_CODEC_CAP_SETS_PKT_DTS,
>> @@ -118,7 +118,7 @@ const FFCodec ff_amv_decoder = {
>>      .close          = ff_mjpeg_decode_end,
>>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>>      .p.max_lowres   = 3,
>> -    .p.capabilities = AV_CODEC_CAP_DR1,
>> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>      .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>>                        FF_CODEC_CAP_SETS_PKT_DTS,
>>  };
>
> Can you test the sample that Michael mentioned here:
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/AS8PR01MB7944E105BE990A5D01EF89208FEF9@AS8PR01MB7944.eurprd01.prod.exchangelabs.com/?
> (I never got around to analyzing this, but if I am not mistaken, it
> shows that this decoder is not a simple one-in, one-out decoder, so that
> making it multithreaded will be more complicated than just adding the
> flag and using ff_thread_get_buffer/format.)

That is full FUD.

>
> - Andreas
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-05 13:39 ` [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
@ 2022-12-05 14:15   ` Andreas Rheinhardt
  2022-12-05 14:28     ` Paul B Mahol
  2022-12-05 23:02     ` Timo Rothenpieler
  0 siblings, 2 replies; 30+ messages in thread
From: Andreas Rheinhardt @ 2022-12-05 14:15 UTC (permalink / raw)
  To: ffmpeg-devel

Timo Rothenpieler:
> In my tests, this lead to a notable speed increase with the amount
> of threads used. Decoding a 720p sample gave the following results:
> 
> 1 Thread: 1428 FPS
> 2 Threads: 2501 FPS
> 8 Threads: 7575 FPS
> Automatic: 11326 FPS (On a 16 Core/32 Threads system)
> ---
>  libavcodec/jpeglsdec.c |  2 +-
>  libavcodec/mjpegdec.c  | 13 +++++++------
>  libavcodec/sp5xdec.c   |  4 ++--
>  3 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
> index 2e6d018ea6..c0642e8e30 100644
> --- a/libavcodec/jpeglsdec.c
> +++ b/libavcodec/jpeglsdec.c
> @@ -559,7 +559,7 @@ const FFCodec ff_jpegls_decoder = {
>      .init           = ff_mjpeg_decode_init,
>      .close          = ff_mjpeg_decode_end,
>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
> -    .p.capabilities = AV_CODEC_CAP_DR1,
> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>      .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>                        FF_CODEC_CAP_SETS_PKT_DTS,
>  };
> diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
> index 9b7465abe7..d30d722398 100644
> --- a/libavcodec/mjpegdec.c
> +++ b/libavcodec/mjpegdec.c
> @@ -54,6 +54,7 @@
>  #include "exif.h"
>  #include "bytestream.h"
>  #include "tiff_common.h"
> +#include "thread.h"
>  
>  
>  static int init_default_huffman_tables(MJpegDecodeContext *s)
> @@ -713,7 +714,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
>                  s->avctx->pix_fmt,
>                  AV_PIX_FMT_NONE,
>              };
> -            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
> +            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx, pix_fmts);
>              if (s->hwaccel_pix_fmt < 0)
>                  return AVERROR(EINVAL);
>  
> @@ -729,7 +730,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
>          }
>  
>          av_frame_unref(s->picture_ptr);
> -        if (ff_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
> +        if (ff_thread_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
>              return -1;
>          s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
>          s->picture_ptr->key_frame = 1;
> @@ -2388,7 +2389,7 @@ static int mjpeg_get_packet(AVCodecContext *avctx)
>      int ret;
>  
>      av_packet_unref(s->pkt);
> -    ret = ff_decode_get_packet(avctx, s->pkt);
> +    ret = ff_thread_decode_get_packet(avctx, s->pkt);
>      if (ret < 0)
>          return ret;
>  
> @@ -3020,7 +3021,7 @@ const FFCodec ff_mjpeg_decoder = {
>      .close          = ff_mjpeg_decode_end,
>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>      .flush          = decode_flush,
> -    .p.capabilities = AV_CODEC_CAP_DR1,
> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>      .p.max_lowres   = 3,
>      .p.priv_class   = &mjpegdec_class,
>      .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
> @@ -3050,7 +3051,7 @@ const FFCodec ff_thp_decoder = {
>      .close          = ff_mjpeg_decode_end,
>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>      .flush          = decode_flush,
> -    .p.capabilities = AV_CODEC_CAP_DR1,
> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>      .p.max_lowres   = 3,
>      .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>                        FF_CODEC_CAP_SETS_PKT_DTS,
> @@ -3068,7 +3069,7 @@ const FFCodec ff_smvjpeg_decoder = {
>      .close          = ff_mjpeg_decode_end,
>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>      .flush          = decode_flush,
> -    .p.capabilities = AV_CODEC_CAP_DR1,
> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>      .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
>                        FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP,
>  };
> diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
> index 394448c5a9..8b08dc672a 100644
> --- a/libavcodec/sp5xdec.c
> +++ b/libavcodec/sp5xdec.c
> @@ -101,7 +101,7 @@ const FFCodec ff_sp5x_decoder = {
>      .init           = ff_mjpeg_decode_init,
>      .close          = ff_mjpeg_decode_end,
>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
> -    .p.capabilities = AV_CODEC_CAP_DR1,
> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>      .p.max_lowres   = 3,
>      .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>                        FF_CODEC_CAP_SETS_PKT_DTS,
> @@ -118,7 +118,7 @@ const FFCodec ff_amv_decoder = {
>      .close          = ff_mjpeg_decode_end,
>      FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
>      .p.max_lowres   = 3,
> -    .p.capabilities = AV_CODEC_CAP_DR1,
> +    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>      .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
>                        FF_CODEC_CAP_SETS_PKT_DTS,
>  };

Can you test the sample that Michael mentioned here:
https://patchwork.ffmpeg.org/project/ffmpeg/patch/AS8PR01MB7944E105BE990A5D01EF89208FEF9@AS8PR01MB7944.eurprd01.prod.exchangelabs.com/?
(I never got around to analyzing this, but if I am not mistaken, it
shows that this decoder is not a simple one-in, one-out decoder, so that
making it multithreaded will be more complicated than just adding the
flag and using ff_thread_get_buffer/format.)

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading
  2022-12-05 13:39 [FFmpeg-devel] [PATCH 1/2] avcodec/thread: add support for frame threading receive_frame based decoders Timo Rothenpieler
@ 2022-12-05 13:39 ` Timo Rothenpieler
  2022-12-05 14:15   ` Andreas Rheinhardt
  0 siblings, 1 reply; 30+ messages in thread
From: Timo Rothenpieler @ 2022-12-05 13:39 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Timo Rothenpieler

In my tests, this lead to a notable speed increase with the amount
of threads used. Decoding a 720p sample gave the following results:

1 Thread: 1428 FPS
2 Threads: 2501 FPS
8 Threads: 7575 FPS
Automatic: 11326 FPS (On a 16 Core/32 Threads system)
---
 libavcodec/jpeglsdec.c |  2 +-
 libavcodec/mjpegdec.c  | 13 +++++++------
 libavcodec/sp5xdec.c   |  4 ++--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index 2e6d018ea6..c0642e8e30 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -559,7 +559,7 @@ const FFCodec ff_jpegls_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SETS_PKT_DTS,
 };
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 9b7465abe7..d30d722398 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -54,6 +54,7 @@
 #include "exif.h"
 #include "bytestream.h"
 #include "tiff_common.h"
+#include "thread.h"
 
 
 static int init_default_huffman_tables(MJpegDecodeContext *s)
@@ -713,7 +714,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
                 s->avctx->pix_fmt,
                 AV_PIX_FMT_NONE,
             };
-            s->hwaccel_pix_fmt = ff_get_format(s->avctx, pix_fmts);
+            s->hwaccel_pix_fmt = ff_thread_get_format(s->avctx, pix_fmts);
             if (s->hwaccel_pix_fmt < 0)
                 return AVERROR(EINVAL);
 
@@ -729,7 +730,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         }
 
         av_frame_unref(s->picture_ptr);
-        if (ff_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
+        if (ff_thread_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
             return -1;
         s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
         s->picture_ptr->key_frame = 1;
@@ -2388,7 +2389,7 @@ static int mjpeg_get_packet(AVCodecContext *avctx)
     int ret;
 
     av_packet_unref(s->pkt);
-    ret = ff_decode_get_packet(avctx, s->pkt);
+    ret = ff_thread_decode_get_packet(avctx, s->pkt);
     if (ret < 0)
         return ret;
 
@@ -3020,7 +3021,7 @@ const FFCodec ff_mjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .p.priv_class   = &mjpegdec_class,
     .p.profiles     = NULL_IF_CONFIG_SMALL(ff_mjpeg_profiles),
@@ -3050,7 +3051,7 @@ const FFCodec ff_thp_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SETS_PKT_DTS,
@@ -3068,7 +3069,7 @@ const FFCodec ff_smvjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
     .flush          = decode_flush,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_EXPORTS_CROPPING |
                       FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index 394448c5a9..8b08dc672a 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -101,7 +101,7 @@ const FFCodec ff_sp5x_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .p.max_lowres   = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SETS_PKT_DTS,
@@ -118,7 +118,7 @@ const FFCodec ff_amv_decoder = {
     .close          = ff_mjpeg_decode_end,
     FF_CODEC_RECEIVE_FRAME_CB(ff_mjpeg_receive_frame),
     .p.max_lowres   = 3,
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
                       FF_CODEC_CAP_SETS_PKT_DTS,
 };
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2023-09-07 17:20 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-07 11:43 [FFmpeg-devel] [PATCH 1/2] lavc: convert frame threading to the receive_frame() pattern Timo Rothenpieler
2022-12-07 11:43 ` [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
2022-12-07 13:14 ` [FFmpeg-devel] [PATCH v2] lavc: convert frame threading to the receive_frame() pattern Timo Rothenpieler
2022-12-07 13:20 ` [FFmpeg-devel] [PATCH v3] " Timo Rothenpieler
2022-12-07 22:22   ` Michael Niedermayer
2022-12-09 13:09     ` Timo Rothenpieler
2022-12-09 13:11       ` James Almer
2022-12-09 16:17       ` Michael Niedermayer
2022-12-09 13:37     ` [FFmpeg-devel] [PATCH v4] " Timo Rothenpieler
2022-12-09 23:46       ` Michael Niedermayer
2022-12-10 20:10         ` Timo Rothenpieler
2022-12-11 16:29           ` Michael Niedermayer
2022-12-12 21:42         ` [FFmpeg-devel] [PATCH v5 1/2] " Timo Rothenpieler
2022-12-12 21:42           ` [FFmpeg-devel] [PATCH v5 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
2022-12-13 18:17           ` [FFmpeg-devel] [PATCH v5 1/2] lavc: convert frame threading to the receive_frame() pattern Michael Niedermayer
2022-12-14  1:50             ` [FFmpeg-devel] [PATCH v6] " Timo Rothenpieler
2022-12-15 22:10               ` Michael Niedermayer
2022-12-19 11:37               ` Timo Rothenpieler
2022-12-20 11:45               ` Andreas Rheinhardt
2022-12-20 16:06                 ` Timo Rothenpieler
2022-12-20 16:15                   ` James Almer
2022-12-21 14:22                 ` [FFmpeg-devel] [PATCH v7 1/2] " Timo Rothenpieler
2022-12-21 14:22                   ` [FFmpeg-devel] [PATCH v7 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
2022-12-21 21:35                     ` Michael Niedermayer
  -- strict thread matches above, loose matches on Subject: below --
2022-12-05 13:39 [FFmpeg-devel] [PATCH 1/2] avcodec/thread: add support for frame threading receive_frame based decoders Timo Rothenpieler
2022-12-05 13:39 ` [FFmpeg-devel] [PATCH 2/2] avcodec/mjpegdec: add support for frame threading Timo Rothenpieler
2022-12-05 14:15   ` Andreas Rheinhardt
2022-12-05 14:28     ` Paul B Mahol
2022-12-05 23:02     ` Timo Rothenpieler
2023-09-07 17:17       ` Paul B Mahol
2023-09-07 17:28         ` Paul B Mahol

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git