Re: [FFmpeg-devel] [PATCH v12 15/15] avcodec/hw_base_encode: add avctx pointer for FFHWBaseEncodeContext - Lynne via ffmpeg-devel

From: Lynne via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: "Wu, Tong1" <tong1.wu@intel.com>,
	FFmpeg development discussions and patches
	<ffmpeg-devel@ffmpeg.org>
Cc: Lynne <dev@lynne.ee>
Subject: Re: [FFmpeg-devel] [PATCH v12 15/15] avcodec/hw_base_encode: add avctx pointer for FFHWBaseEncodeContext
Date: Wed, 29 May 2024 02:02:19 +0200
Message-ID: <04370e78-0311-4c42-a065-890889b7d6c5@lynne.ee> (raw)
In-Reply-To: <CH3PR11MB865938EBB1E1E4A7F73C1B25C0F12@CH3PR11MB8659.namprd11.prod.outlook.com>

[-- Attachment #1.1.1.1: Type: text/plain, Size: 12378 bytes --]

On 29/05/2024 00:53, Wu, Tong1 wrote:
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Lynne
>> via ffmpeg-devel
>> Sent: Wednesday, May 29, 2024 1:08 AM
>> To: ffmpeg-devel@ffmpeg.org
>> Cc: Lynne <dev@lynne.ee>
>> Subject: Re: [FFmpeg-devel] [PATCH v12 15/15] avcodec/hw_base_encode: add
>> avctx pointer for FFHWBaseEncodeContext
>>
>> On 28/05/2024 17:48, tong1.wu-at-intel.com@ffmpeg.org wrote:
>>> From: Tong Wu <tong1.wu@intel.com>
>>>
>>> An avctx pointer is added to FFHWBaseEncodeContext. This is to make
>>> FFHWBaseEncodeContext a standalone component for ff_hw_base_*
>> functions.
>>> This patch also removes some unnecessary AVCodecContext arguments.
>>>
>>> Signed-off-by: Tong Wu <tong1.wu@intel.com>
>>> ---
>>>    libavcodec/d3d12va_encode.c |  6 +++---
>>>    libavcodec/hw_base_encode.c | 31 +++++++++++++------------------
>>>    libavcodec/hw_base_encode.h |  8 +++++---
>>>    libavcodec/vaapi_encode.c   |  6 +++---
>>>    4 files changed, 24 insertions(+), 27 deletions(-)
>>>
>>> diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
>>> index 0fbf8eb07c..6d3a53c6ca 100644
>>> --- a/libavcodec/d3d12va_encode.c
>>> +++ b/libavcodec/d3d12va_encode.c
>>> @@ -1351,7 +1351,7 @@ static int
>> d3d12va_encode_create_recon_frames(AVCodecContext *avctx)
>>>        enum AVPixelFormat recon_format;
>>>        int err;
>>>
>>> -    err = ff_hw_base_get_recon_format(avctx, NULL, &recon_format);
>>> +    err = ff_hw_base_get_recon_format(base_ctx, NULL, &recon_format);
>>>        if (err < 0)
>>>            return err;
>>>
>>> @@ -1398,7 +1398,7 @@ int ff_d3d12va_encode_init(AVCodecContext
>> *avctx)
>>>        int err;
>>>        HRESULT hr;
>>>
>>> -    err = ff_hw_base_encode_init(avctx);
>>> +    err = ff_hw_base_encode_init(avctx, base_ctx);
>>>        if (err < 0)
>>>            goto fail;
>>>
>>> @@ -1552,7 +1552,7 @@ int ff_d3d12va_encode_close(AVCodecContext
>> *avctx)
>>>        D3D12_OBJECT_RELEASE(ctx->video_device3);
>>>        D3D12_OBJECT_RELEASE(ctx->device3);
>>>
>>> -    ff_hw_base_encode_close(avctx);
>>> +    ff_hw_base_encode_close(base_ctx);
>>>
>>>        return 0;
>>>    }
>>> diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
>>> index 92f69bb78c..88efdf672c 100644
>>> --- a/libavcodec/hw_base_encode.c
>>> +++ b/libavcodec/hw_base_encode.c
>>> @@ -94,14 +94,13 @@ static void
>> hw_base_encode_remove_refs(FFHWBaseEncodePicture *pic, int level)
>>>        pic->ref_removed[level] = 1;
>>>    }
>>>
>>> -static void hw_base_encode_set_b_pictures(AVCodecContext *avctx,
>>> +static void hw_base_encode_set_b_pictures(FFHWBaseEncodeContext *ctx,
>>>                                              FFHWBaseEncodePicture *start,
>>>                                              FFHWBaseEncodePicture *end,
>>>                                              FFHWBaseEncodePicture *prev,
>>>                                              int current_depth,
>>>                                              FFHWBaseEncodePicture **last)
>>>    {
>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>>        FFHWBaseEncodePicture *pic, *next, *ref;
>>>        int i, len;
>>>
>>> @@ -148,20 +147,19 @@ static void
>> hw_base_encode_set_b_pictures(AVCodecContext *avctx,
>>>                hw_base_encode_add_ref(pic, ref, 0, 1, 0);
>>>
>>>            if (i > 1)
>>> -            hw_base_encode_set_b_pictures(avctx, start, pic, pic,
>>> +            hw_base_encode_set_b_pictures(ctx, start, pic, pic,
>>>                                              current_depth + 1, &next);
>>>            else
>>>                next = pic;
>>>
>>> -        hw_base_encode_set_b_pictures(avctx, pic, end, next,
>>> +        hw_base_encode_set_b_pictures(ctx, pic, end, next,
>>>                                          current_depth + 1, last);
>>>        }
>>>    }
>>>
>>> -static void hw_base_encode_add_next_prev(AVCodecContext *avctx,
>>> +static void hw_base_encode_add_next_prev(FFHWBaseEncodeContext
>> *ctx,
>>>                                             FFHWBaseEncodePicture *pic)
>>>    {
>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>>        int i;
>>>
>>>        if (!pic)
>>> @@ -333,12 +331,12 @@ static int
>> hw_base_encode_pick_next(AVCodecContext *avctx,
>>>        }
>>>
>>>        if (b_counter > 0) {
>>> -        hw_base_encode_set_b_pictures(avctx, start, pic, pic, 1,
>>> +        hw_base_encode_set_b_pictures(ctx, start, pic, pic, 1,
>>>                                          &prev);
>>>        } else {
>>>            prev = pic;
>>>        }
>>> -    hw_base_encode_add_next_prev(avctx, prev);
>>> +    hw_base_encode_add_next_prev(ctx, prev);
>>>
>>>        return 0;
>>>    }
>>> @@ -687,9 +685,9 @@ int ff_hw_base_init_gop_structure(AVCodecContext
>> *avctx, uint32_t ref_l0, uint32
>>>        return 0;
>>>    }
>>>
>>> -int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void
>> *hwconfig, enum AVPixelFormat *fmt)
>>> +int ff_hw_base_get_recon_format(FFHWBaseEncodeContext *ctx, const
>> void *hwconfig,
>>> +                                enum AVPixelFormat *fmt)
>>>    {
>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>>        AVHWFramesConstraints *constraints = NULL;
>>>        enum AVPixelFormat recon_format;
>>>        int err, i;
>>> @@ -722,14 +720,14 @@ int
>> ff_hw_base_get_recon_format(AVCodecContext *avctx, const void *hwconfig,
>> enu
>>>            // No idea what to use; copy input format.
>>>            recon_format = ctx->input_frames->sw_format;
>>>        }
>>> -    av_log(avctx, AV_LOG_DEBUG, "Using %s as format of "
>>> +    av_log(ctx->avctx, AV_LOG_DEBUG, "Using %s as format of "
>>>               "reconstructed frames.\n", av_get_pix_fmt_name(recon_format));
>>>
>>>        if (ctx->surface_width  < constraints->min_width  ||
>>>            ctx->surface_height < constraints->min_height ||
>>>            ctx->surface_width  > constraints->max_width ||
>>>            ctx->surface_height > constraints->max_height) {
>>> -        av_log(avctx, AV_LOG_ERROR, "Hardware does not support encoding at
>> "
>>> +        av_log(ctx->avctx, AV_LOG_ERROR, "Hardware does not support
>> encoding at "
>>>                   "size %dx%d (constraints: width %d-%d height %d-%d).\n",
>>>                   ctx->surface_width, ctx->surface_height,
>>>                   constraints->min_width,  constraints->max_width,
>>> @@ -756,10 +754,9 @@ int
>> ff_hw_base_encode_free(FFHWBaseEncodePicture *pic)
>>>        return 0;
>>>    }
>>>
>>> -int ff_hw_base_encode_init(AVCodecContext *avctx)
>>> +int ff_hw_base_encode_init(AVCodecContext *avctx,
>> FFHWBaseEncodeContext *ctx)
>>>    {
>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>> -
>>> +    ctx->avctx = avctx;
>>>        ctx->frame = av_frame_alloc();
>>>        if (!ctx->frame)
>>>            return AVERROR(ENOMEM);
>>> @@ -789,10 +786,8 @@ int ff_hw_base_encode_init(AVCodecContext
>> *avctx)
>>>        return 0;
>>>    }
>>>
>>> -int ff_hw_base_encode_close(AVCodecContext *avctx)
>>> +int ff_hw_base_encode_close(FFHWBaseEncodeContext *ctx)
>>>    {
>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>> -
>>>        av_fifo_freep2(&ctx->encode_fifo);
>>>
>>>        av_frame_free(&ctx->frame);
>>> diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
>>> index 15ef3d7ac6..13c1fc0f69 100644
>>> --- a/libavcodec/hw_base_encode.h
>>> +++ b/libavcodec/hw_base_encode.h
>>> @@ -116,6 +116,7 @@ typedef struct FFHWEncodePictureOperation {
>>>
>>>    typedef struct FFHWBaseEncodeContext {
>>>        const AVClass *class;
>>> +    AVCodecContext *avctx;
>>>
>>>        // Hardware-specific hooks.
>>>        const struct FFHWEncodePictureOperation *op;
>>> @@ -222,13 +223,14 @@ int
>> ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
>>>    int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t ref_l0,
>> uint32_t ref_l1,
>>>                                      int flags, int prediction_pre_only);
>>>
>>> -int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void
>> *hwconfig, enum AVPixelFormat *fmt);
>>> +int ff_hw_base_get_recon_format(FFHWBaseEncodeContext *ctx, const
>> void *hwconfig,
>>> +                                enum AVPixelFormat *fmt);
>>>
>>>    int ff_hw_base_encode_free(FFHWBaseEncodePicture *pic);
>>>
>>> -int ff_hw_base_encode_init(AVCodecContext *avctx);
>>> +int ff_hw_base_encode_init(AVCodecContext *avctx,
>> FFHWBaseEncodeContext *ctx);
>>>
>>> -int ff_hw_base_encode_close(AVCodecContext *avctx);
>>> +int ff_hw_base_encode_close(FFHWBaseEncodeContext *ctx);
>>>
>>>    #define HW_BASE_ENCODE_COMMON_OPTIONS \
>>>        { "idr_interval", \
>>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>>> index b35a23e852..0693e77548 100644
>>> --- a/libavcodec/vaapi_encode.c
>>> +++ b/libavcodec/vaapi_encode.c
>>> @@ -2059,7 +2059,7 @@ static av_cold int
>> vaapi_encode_create_recon_frames(AVCodecContext *avctx)
>>>        }
>>>        hwconfig->config_id = ctx->va_config;
>>>
>>> -    err = ff_hw_base_get_recon_format(avctx, (const void*)hwconfig,
>> &recon_format);
>>> +    err = ff_hw_base_get_recon_format(base_ctx, (const void*)hwconfig,
>> &recon_format);
>>>        if (err < 0)
>>>            goto fail;
>>>
>>> @@ -2106,7 +2106,7 @@ av_cold int ff_vaapi_encode_init(AVCodecContext
>> *avctx)
>>>        VAStatus vas;
>>>        int err;
>>>
>>> -    err = ff_hw_base_encode_init(avctx);
>>> +    err = ff_hw_base_encode_init(avctx, base_ctx);
>>>        if (err < 0)
>>>            goto fail;
>>>
>>> @@ -2313,7 +2313,7 @@ av_cold int
>> ff_vaapi_encode_close(AVCodecContext *avctx)
>>>        av_freep(&ctx->codec_sequence_params);
>>>        av_freep(&ctx->codec_picture_params);
>>>
>>> -    ff_hw_base_encode_close(avctx);
>>> +    ff_hw_base_encode_close(base_ctx);
>>>
>>>        return 0;
>>>    }
>>
>> Err, you missed ff_hw_base_encode_set_output_property,
>> ff_hw_base_encode_receive_packet and ff_hw_base_init_gop_structure?
>>
>> Rest looks better.
> 
> ff_hw_base_encode_receive_packet is directly bind to int (*receive_packet)(struct AVCodecContext *avctx, struct AVPacket *avpkt); I thought maybe this should not be changed? For the other two functions, components in avctx are used. If they need to be changed, I could either take both FFHWBaseEncodeContext and AVCodecContext as arguments or use ctx->avctx->* to get the components which one do you think better. Thank you.

Take both FFHWBaseEncodeContext and AVCodecContext as arguments, and use 
it to get fields such as gop_size, max_b_frames and so on.

*Do not use* avctx->priv_data anywhere in hw_base_encode. Any usage of 
it has to be switched to a user-given FFHWBaseEncodeContext via a 
function argument.

ff_hw_base_encode_receive_packet needs to be switched to 
(FFHWBaseEncodeContext, AVCodecContext...) as well, as it uses 
avctx->priv_data.

Hardware encoders should have small wrappers for receive_packet which 
would call ff_hw_base_encode_receive_packet. It's not much, just 3 lines.

Hardware encoders could still use FFHWBaseEncodeContext as their own 
main encode context, but they would no longer be required to use it as 
their main context. It'll make sense with some code:

For example, d3d12va_encode's wrapper function would be:
static int receive_packet(avctx...)
{
     return ff_hw_base_encode_receive_packet(avctx->priv_data, avctx...)
}

Vulkan's wrapper would, on the other hand look like:
static int receive_packet(avctx...)
{
     VulkanEncodeContext *vkenc = avctx->priv_data;
     return ff_hw_base_encode_receive_packet(vkenc->hw_base, avctx...)
}

You should also rename FFHWBaseEncodeContext.avctx to 
FFHWBaseEncodeContext.log_ctx with a type of void *, and use it for 
av_log where needed. It would be set to avctx in 
ff_hw_base_encode_init(), but encoders could override it if they needed 
to by modifying the context.
If frame threading is ever implemented, this change would make it 
easier, as the context for each call would be exlicitly known.

Thanks

[-- Attachment #1.1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 637 bytes --]

[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".