Re: [FFmpeg-devel] [PATCH] avcodec/libaomenc: remove one memcpy when queueing packets

From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] avcodec/libaomenc: remove one memcpy when queueing packets
Date: Thu, 25 Aug 2022 18:29:11 +0200
Message-ID: <DB6PR0101MB2214FAB7B46119A143EF04A58F729@DB6PR0101MB2214.eurprd01.prod.exchangelabs.com> (raw)
In-Reply-To: <20220824225209.4076-1-jamrial@gmail.com>

James Almer:
> Don't use an intermediary buffer. Achieve this by replacing FrameListData with
> a PacketList, and by allocating and populating every packet's payload before
> inserting them into the list.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavcodec/libaomenc.c | 195 +++++++++++++++--------------------------
>  1 file changed, 70 insertions(+), 125 deletions(-)
> 
> diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
> index 485f554165..f9476b3ddf 100644
> --- a/libavcodec/libaomenc.c
> +++ b/libavcodec/libaomenc.c
> @@ -38,6 +38,7 @@
>  
>  #include "av1.h"
>  #include "avcodec.h"
> +#include "bytestream.h"
>  #include "bsf.h"
>  #include "codec_internal.h"
>  #include "encode.h"
> @@ -46,24 +47,6 @@
>  #include "packet_internal.h"
>  #include "profiles.h"
>  
> -/*
> - * Portion of struct aom_codec_cx_pkt from aom_encoder.h.
> - * One encoded frame returned from the library.
> - */
> -struct FrameListData {
> -    void *buf;                       /**< compressed data buffer */
> -    size_t sz;                       /**< length of compressed data */
> -    int64_t pts;                     /**< time stamp to show frame
> -                                          (in timebase units) */
> -    unsigned long duration;          /**< duration to show frame
> -                                          (in timebase units) */
> -    uint32_t flags;                  /**< flags for this frame */
> -    uint64_t sse[4];
> -    int have_sse;                    /**< true if we have pending sse[] */
> -    uint64_t frame_number;
> -    struct FrameListData *next;
> -};
> -
>  typedef struct AOMEncoderContext {
>      AVClass *class;
>      AVBSFContext *bsf;
> @@ -71,7 +54,8 @@ typedef struct AOMEncoderContext {
>      struct aom_image rawimg;
>      struct aom_fixed_buf twopass_stats;
>      unsigned twopass_stats_size;
> -    struct FrameListData *coded_frame_list;
> +    PacketList coded_frame_list;
> +    AVPacket *pkt;

Renaming this variable to avpkt would improve clarity by simplifying
distinguishing it from the aom_codec_cx_pkt packets.

>      int cpu_used;
>      int auto_alt_ref;
>      int arnr_max_frames;
> @@ -283,33 +267,6 @@ static av_cold void dump_enc_cfg(AVCodecContext *avctx,
>      av_log(avctx, level, "\n");
>  }
>  
> -static void coded_frame_add(void *list, struct FrameListData *cx_frame)
> -{
> -    struct FrameListData **p = list;
> -
> -    while (*p)
> -        p = &(*p)->next;
> -    *p = cx_frame;
> -    cx_frame->next = NULL;
> -}
> -
> -static av_cold void free_coded_frame(struct FrameListData *cx_frame)
> -{
> -    av_freep(&cx_frame->buf);
> -    av_freep(&cx_frame);
> -}
> -
> -static av_cold void free_frame_list(struct FrameListData *list)
> -{
> -    struct FrameListData *p = list;
> -
> -    while (p) {
> -        list = list->next;
> -        free_coded_frame(p);
> -        p = list;
> -    }
> -}
> -
>  static av_cold int codecctl_int(AVCodecContext *avctx,
>  #ifdef UENUM1BYTE
>                                  aome_enc_control_id id,
> @@ -432,7 +389,8 @@ static av_cold int aom_free(AVCodecContext *avctx)
>      aom_codec_destroy(&ctx->encoder);
>      av_freep(&ctx->twopass_stats.buf);
>      av_freep(&avctx->stats_out);
> -    free_frame_list(ctx->coded_frame_list);
> +    avpriv_packet_list_free(&ctx->coded_frame_list);
> +    av_packet_free(&ctx->pkt);
>      av_bsf_free(&ctx->bsf);
>      return 0;
>  }
> @@ -1042,6 +1000,10 @@ static av_cold int aom_init(AVCodecContext *avctx,
>             return ret;
>      }
>  
> +    ctx->pkt = av_packet_alloc();
> +    if (!ctx->pkt)
> +        return AVERROR(ENOMEM);
> +

This encoder does not have the INIT_CLEANUP flag set, so everything
leaks in case the above allocation fails. In fact, it seems like there
are already leaks in several errors paths in this function.

>      if (enccfg.rc_end_usage == AOM_CBR ||
>          enccfg.g_pass != AOM_RC_ONE_PASS) {
>          cpb_props->max_bitrate = avctx->rc_max_rate;
> @@ -1053,25 +1015,40 @@ static av_cold int aom_init(AVCodecContext *avctx,
>      return 0;
>  }
>  
> -static inline void cx_pktcpy(AOMContext *ctx,
> -                             struct FrameListData *dst,
> +static inline int cx_pktcpy(AVCodecContext *avctx,

We should not override the compiler's inlining behaviour unless we have
a good reason to do so, so you could remove it while at it.

> +                             AVPacket *dst,

Wrong indentation.

>                               const struct aom_codec_cx_pkt *src)
>  {
> -    dst->pts      = src->data.frame.pts;
> -    dst->duration = src->data.frame.duration;
> -    dst->flags    = src->data.frame.flags;
> -    dst->sz       = src->data.frame.sz;
> -    dst->buf      = src->data.frame.buf;
> +    AOMContext *ctx = avctx->priv_data;
> +    int av_unused pict_type;
> +    int ret;
> +
> +    av_packet_unref(dst);

Can dst ever be non-blank here (i.e. before the unref)?

> +    ret = ff_get_encode_buffer(avctx, dst, src->data.frame.sz, 0);
> +    if (ret < 0) {
> +        av_log(avctx, AV_LOG_ERROR,
> +               "Error getting output packet of size %"SIZE_SPECIFIER".\n", src->data.frame.sz);
> +        return ret;
> +    }
> +    memcpy(dst->data, src->data.frame.buf, src->data.frame.sz);
> +    dst->pts = dst->dts = src->data.frame.pts;
> +
> +    if (src->data.frame.flags & AOM_FRAME_IS_KEY) {
> +        dst->flags |= AV_PKT_FLAG_KEY;
>  #ifdef AOM_FRAME_IS_INTRAONLY
> -    dst->frame_number = ++ctx->frame_number;
> -    dst->have_sse = ctx->have_sse;
> +        pict_type = AV_PICTURE_TYPE_I;
> +    } else if (src->data.frame.flags & AOM_FRAME_IS_INTRAONLY) {
> +        pict_type = AV_PICTURE_TYPE_I;
> +    } else {
> +        pict_type = AV_PICTURE_TYPE_P;
> +    }
> +
>      if (ctx->have_sse) {
> -        /* associate last-seen SSE to the frame. */
> -        /* Transfers ownership from ctx to dst. */
> -        memcpy(dst->sse, ctx->sse, sizeof(dst->sse));
> +        ff_side_data_set_encoder_stats(dst, 0, ctx->sse + 1, 3, pict_type);

This function can fail.

>          ctx->have_sse = 0;
> -    }
>  #endif
> +    }
> +    return 0;
>  }
>  
>  /**
> @@ -1081,50 +1058,32 @@ static inline void cx_pktcpy(AOMContext *ctx,
>   * @return packet data size on success
>   * @return a negative AVERROR on error
>   */
> -static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
> -                      AVPacket *pkt)
> +static int storeframe(AVCodecContext *avctx, AVPacket *dst, AVPacket *src)
>  {
>      AOMContext *ctx = avctx->priv_data;
> -    int av_unused pict_type;
> -    int ret = ff_get_encode_buffer(avctx, pkt, cx_frame->sz, 0);
> -    if (ret < 0) {
> -        av_log(avctx, AV_LOG_ERROR,
> -               "Error getting output packet of size %"SIZE_SPECIFIER".\n", cx_frame->sz);
> -        return ret;
> -    }
> -    memcpy(pkt->data, cx_frame->buf, pkt->size);
> -    pkt->pts = pkt->dts = cx_frame->pts;
> +    const uint8_t *sd;
> +    size_t size;
> +    int ret;
>  
> -    if (!!(cx_frame->flags & AOM_FRAME_IS_KEY)) {
> -        pkt->flags |= AV_PKT_FLAG_KEY;
> -#ifdef AOM_FRAME_IS_INTRAONLY
> -        pict_type = AV_PICTURE_TYPE_I;
> -    } else if (cx_frame->flags & AOM_FRAME_IS_INTRAONLY) {
> -        pict_type = AV_PICTURE_TYPE_I;
> -    } else {
> -        pict_type = AV_PICTURE_TYPE_P;
> -    }
> -
> -    ff_side_data_set_encoder_stats(pkt, 0, cx_frame->sse + 1,
> -                                   cx_frame->have_sse ? 3 : 0, pict_type);
> +    av_packet_move_ref(dst, src);
>  
> -    if (cx_frame->have_sse) {
> +    sd = av_packet_get_side_data(dst, AV_PKT_DATA_QUALITY_STATS, &size);
> +    if (sd && size >= 4 + 4 + 8 * 3) {
>          int i;
> +        sd += 4 + 4;
>          for (i = 0; i < 3; ++i) {
> -            avctx->error[i] += cx_frame->sse[i + 1];
> +            avctx->error[i] += bytestream_get_le64(&sd);
>          }
> -        cx_frame->have_sse = 0;
> -#endif
>      }
>  
>      if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
> -        ret = av_bsf_send_packet(ctx->bsf, pkt);
> +        ret = av_bsf_send_packet(ctx->bsf, dst);
>          if (ret < 0) {
>              av_log(avctx, AV_LOG_ERROR, "extract_extradata filter "
>                     "failed to send input packet\n");
>              return ret;
>          }
> -        ret = av_bsf_receive_packet(ctx->bsf, pkt);
> +        ret = av_bsf_receive_packet(ctx->bsf, dst);
>  
>          if (ret < 0) {
>              av_log(avctx, AV_LOG_ERROR, "extract_extradata filter "
> @@ -1132,7 +1091,7 @@ static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
>              return ret;
>          }
>      }
> -    return pkt->size;
> +    return dst->size;
>  }
>  
>  /**
> @@ -1148,16 +1107,14 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out)
>      AOMContext *ctx = avctx->priv_data;
>      const struct aom_codec_cx_pkt *pkt;
>      const void *iter = NULL;
> -    int size = 0;
> +    int ret, size = 0;
>  
> -    if (ctx->coded_frame_list) {
> -        struct FrameListData *cx_frame = ctx->coded_frame_list;
> +    if (!avpriv_packet_list_get(&ctx->coded_frame_list, ctx->pkt)) {
>          /* return the leading frame if we've already begun queueing */
> -        size = storeframe(avctx, cx_frame, pkt_out);
> -        if (size < 0)
> -            return size;
> -        ctx->coded_frame_list = cx_frame->next;
> -        free_coded_frame(cx_frame);
> +        ret = storeframe(avctx, pkt_out, ctx->pkt);
> +        if (ret < 0)
> +            goto fail;
> +        size = ret;
>      }
>  
>      /* consume all available output from the encoder before returning. buffers
> @@ -1165,37 +1122,21 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out)
>      while ((pkt = aom_codec_get_cx_data(&ctx->encoder, &iter))) {
>          switch (pkt->kind) {
>          case AOM_CODEC_CX_FRAME_PKT:
> +            ret = cx_pktcpy(avctx, ctx->pkt, pkt);
> +            if (ret < 0)
> +                goto fail;
>              if (!size) {
> -                struct FrameListData cx_frame;
> -
>                  /* avoid storing the frame when the list is empty and we haven't yet
>                   * provided a frame for output */
> -                av_assert0(!ctx->coded_frame_list);
> -                cx_pktcpy(ctx, &cx_frame, pkt);
> -                size = storeframe(avctx, &cx_frame, pkt_out);
> -                if (size < 0)
> -                    return size;
> +                av_assert0(!ctx->coded_frame_list.head);
> +                ret = storeframe(avctx, pkt_out, ctx->pkt);
> +                if (ret < 0)
> +                    goto fail;
> +                size = ret;
>              } else {
> -                struct FrameListData *cx_frame =
> -                    av_malloc(sizeof(struct FrameListData));
> -
> -                if (!cx_frame) {
> -                    av_log(avctx, AV_LOG_ERROR,
> -                           "Frame queue element alloc failed\n");
> -                    return AVERROR(ENOMEM);
> -                }
> -                cx_pktcpy(ctx, cx_frame, pkt);
> -                cx_frame->buf = av_malloc(cx_frame->sz);
> -
> -                if (!cx_frame->buf) {
> -                    av_log(avctx, AV_LOG_ERROR,
> -                           "Data buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
> -                           cx_frame->sz);
> -                    av_freep(&cx_frame);
> -                    return AVERROR(ENOMEM);
> -                }
> -                memcpy(cx_frame->buf, pkt->data.frame.buf, pkt->data.frame.sz);

I am shocked to see that there were two memcpies.

> -                coded_frame_add(&ctx->coded_frame_list, cx_frame);
> +                ret = avpriv_packet_list_put(&ctx->coded_frame_list, ctx->pkt, NULL, 0);
> +                if (ret < 0)
> +                    goto fail;

wtf: Any error that queue_frames() returns will be translated to
"got_packet = 1" by the caller (with return code 0). Error handling in
this encoder seems to be a joke.

>              }
>              break;
>          case AOM_CODEC_STATS_PKT:
> @@ -1236,6 +1177,10 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out)
>      }
>  
>      return size;
> +fail:
> +    av_packet_unref(ctx->pkt);
> +    av_packet_unref(pkt_out);
> +    return ret;
>  }
>  
>  static enum AVPixelFormat aomfmt_to_pixfmt(struct aom_image *img)

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".