From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.ffmpeg.org (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 612644BE17 for ; Thu, 22 May 2025 15:25:21 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTP id 48A3268DCDF; Thu, 22 May 2025 18:25:16 +0300 (EEST) Received: from btbn.de (btbn.de [144.76.60.213]) by ffbox0-bg.ffmpeg.org (Postfix) with ESMTPS id 5B74868B7AD for ; Thu, 22 May 2025 18:25:08 +0300 (EEST) Received: from [authenticated] by btbn.de (Postfix) with ESMTPSA id CF7C728191074 for ; Thu, 22 May 2025 17:25:05 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=rothenpieler.org; s=mail; t=1747927505; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=pS8uPsBKnXjjoPjpK8xEG5pheKkjnd6LwTKy0Osp6mw=; b=ZGjuTVvxsE/MbuFs7CyBsvhSNK6cmt02VESVmsezN8gcyvKB/XnKyG75asgxFyuDsrCXf0 k8JhyE6mB65/+IZ7p88GUzYj6oVir2CYJp+rrbv5tR8YRf10t96KSgtYjJhuk3T1q+0f5v z+8YAXOMPraizar9ixYFwM7YSuhL1vF0nPEL5JnI9EpIE53EJbRkWuF39zTITIuTbryVhe 7SM0oOhbk0Njk73ciFvA7SGOEqTXvvU484qP8e3o+RFVmYqnOY5D/aL65fkMWQsJIXn8FR cHA3h0OIHyawbWR/DNb925XVrwhjoxfB7K5DdtBTb/13x/YnonZZ+aDVg6XDBw== Message-ID: <12837948-f6cc-4204-bf40-91cf7cafeea5@rothenpieler.org> Date: Thu, 22 May 2025 17:25:04 +0200 MIME-Version: 1.0 User-Agent: Mozilla Thunderbird To: ffmpeg-devel@ffmpeg.org References: Content-Language: en-US, de-DE From: Timo Rothenpieler In-Reply-To: Subject: Re: [FFmpeg-devel] [PATCH 1/2] avfilter: add scale_d3d11 filter X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="us-ascii"; Format="flowed" Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: On 22/05/2025 15:20, Dash Santosh Sathyanarayanan wrote: > This commit introduces a new hardware-accelerated video filter, scale_d3d11, > which performs scaling and format conversion using Direct3D 11. The filter enables > efficient GPU-based scaling and pixel format conversion (p010 to nv12), reducing > CPU overhead and latency in video pipelines. > --- > Changelog | 1 + > libavcodec/decode.c | 2 +- > libavcodec/dxva2.c | 3 + > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/vf_scale_d3d11.c | 480 ++++++++++++++++++++++++++++++++++ > libavutil/hwcontext_d3d11va.c | 40 ++- > 7 files changed, 514 insertions(+), 14 deletions(-) > create mode 100644 libavfilter/vf_scale_d3d11.c > > diff --git a/Changelog b/Changelog > index 4217449438..68610a63d0 100644 > --- a/Changelog > +++ b/Changelog > @@ -18,6 +18,7 @@ version : > - APV encoding support through a libopenapv wrapper > - VVC decoder supports all content of SCC (Screen Content Coding): > IBC (Inter Block Copy), Palette Mode and ACT (Adaptive Color Transform > +- vf_scale_d3d11 filter Bit of a nit, this could at last say "Added". > > > version 7.1: > diff --git a/libavcodec/decode.c b/libavcodec/decode.c > index c2b2dd6e3b..a796ae7930 100644 > --- a/libavcodec/decode.c > +++ b/libavcodec/decode.c > @@ -1079,7 +1079,7 @@ int ff_decode_get_hw_frames_ctx(AVCodecContext *avctx, > if (frames_ctx->initial_pool_size) { > // We guarantee 4 base work surfaces. The function above guarantees 1 > // (the absolute minimum), so add the missing count. > - frames_ctx->initial_pool_size += 3; > + frames_ctx->initial_pool_size += 33; This seems a bit extreme, and can potentially drastically increase VRAM usage of anything using d3d11va. > } > > ret = av_hwframe_ctx_init(avctx->hw_frames_ctx); > diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c > index 22ecd5acaf..37dab6cd68 100644 > --- a/libavcodec/dxva2.c > +++ b/libavcodec/dxva2.c > @@ -647,6 +647,9 @@ int ff_dxva2_common_frame_params(AVCodecContext *avctx, > AVD3D11VAFramesContext *frames_hwctx = frames_ctx->hwctx; > > frames_hwctx->BindFlags |= D3D11_BIND_DECODER; > + if (frames_ctx->sw_format == AV_PIX_FMT_NV12) { > + frames_hwctx->BindFlags |= D3D11_BIND_VIDEO_ENCODER; > + } This change also seems a bit random here. Using NV12 does not automatically mean you'll encode with it. Did not look at the rest yet. > + return AVERROR_EXTERNAL; > + } > + > + ///< Set up output frame > + ret = av_frame_copy_props(out, in); > + if (ret < 0) { > + av_log(ctx, AV_LOG_ERROR, "Failed to copy frame properties\n"); > + videoContext->lpVtbl->Release(videoContext); > + inputView->lpVtbl->Release(inputView); > + av_frame_free(&in); > + av_frame_free(&out); > + return ret; > + } > + > + out->data[0] = (uint8_t *)output_texture; > + out->data[1] = (uint8_t *)(intptr_t)0; > + out->width = s->width; > + out->height = s->height; > + out->format = AV_PIX_FMT_D3D11; > + > + ///< Clean up resources > + inputView->lpVtbl->Release(inputView); > + videoContext->lpVtbl->Release(videoContext); > + if (s->outputView) { > + s->outputView->lpVtbl->Release(s->outputView); > + s->outputView = NULL; > + } > + av_frame_free(&in); > + > + ///< Forward the frame > + return ff_filter_frame(outlink, out); > +} > + > +static int scale_d3d11_config_props(AVFilterLink *outlink) > +{ > + AVFilterContext *ctx = outlink->src; > + ScaleD3D11Context *s = ctx->priv; > + AVFilterLink *inlink = ctx->inputs[0]; > + FilterLink *inl = ff_filter_link(inlink); > + FilterLink *outl = ff_filter_link(outlink); > + int ret; > + > + ///< Clean up any previous resources > + release_d3d11_resources(s); > + > + ///< Evaluate output dimensions > + ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, &s->width, &s->height); > + if (ret < 0) { > + av_log(ctx, AV_LOG_ERROR, "Failed to evaluate dimensions\n"); > + return ret; > + } > + > + outlink->w = s->width; > + outlink->h = s->height; > + > + ///< Validate input hw_frames_ctx > + if (!inl->hw_frames_ctx) { > + av_log(ctx, AV_LOG_ERROR, "No hw_frames_ctx available on input link\n"); > + return AVERROR(EINVAL); > + } > + > + ///< Propagate hw_frames_ctx to output > + outl->hw_frames_ctx = av_buffer_ref(inl->hw_frames_ctx); > + if (!outl->hw_frames_ctx) { > + av_log(ctx, AV_LOG_ERROR, "Failed to propagate hw_frames_ctx to output\n"); > + return AVERROR(ENOMEM); > + } > + > + ///< Initialize filter's hardware device context > + if (!s->hw_device_ctx) { > + AVHWFramesContext *in_frames_ctx = (AVHWFramesContext *)inl->hw_frames_ctx->data; > + s->hw_device_ctx = av_buffer_ref(in_frames_ctx->device_ref); > + if (!s->hw_device_ctx) { > + av_log(ctx, AV_LOG_ERROR, "Failed to initialize filter hardware device context\n"); > + return AVERROR(ENOMEM); > + } > + } > + > + ///< Get D3D11 device and context (but don't initialize processor yet - done in filter_frame) > + AVHWDeviceContext *hwctx = (AVHWDeviceContext *)s->hw_device_ctx->data; > + AVD3D11VADeviceContext *d3d11_hwctx = (AVD3D11VADeviceContext *)hwctx->hwctx; > + > + s->device = d3d11_hwctx->device; > + s->context = d3d11_hwctx->device_context; > + > + if (!s->device || !s->context) { > + av_log(ctx, AV_LOG_ERROR, "Failed to get valid D3D11 device or context\n"); > + return AVERROR(EINVAL); > + } > + > + ///< Create new hardware frames context for output > + AVHWFramesContext *in_frames_ctx = (AVHWFramesContext *)inl->hw_frames_ctx->data; > + s->hw_frames_ctx_out = av_hwframe_ctx_alloc(s->hw_device_ctx); > + if (!s->hw_frames_ctx_out) > + return AVERROR(ENOMEM); > + > + enum AVPixelFormat sw_format; > + switch (s->output_format_opt) { > + case OUTPUT_NV12: > + sw_format = AV_PIX_FMT_NV12; > + break; > + case OUTPUT_P010: > + sw_format = AV_PIX_FMT_P010; > + break; > + default: > + return AVERROR(EINVAL); > + } > + > + AVHWFramesContext *frames_ctx = (AVHWFramesContext *)s->hw_frames_ctx_out->data; > + frames_ctx->format = AV_PIX_FMT_D3D11; > + frames_ctx->sw_format = sw_format; > + frames_ctx->width = s->width; > + frames_ctx->height = s->height; > + frames_ctx->initial_pool_size = 30; ///< Adjust pool size as needed > + > + AVD3D11VAFramesContext *frames_hwctx = frames_ctx->hwctx; > + frames_hwctx->MiscFlags = 0; > + frames_hwctx->BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_VIDEO_ENCODER; > + > + ret = av_hwframe_ctx_init(s->hw_frames_ctx_out); > + if (ret < 0) { > + av_buffer_unref(&s->hw_frames_ctx_out); > + return ret; > + } > + > + outl->hw_frames_ctx = av_buffer_ref(s->hw_frames_ctx_out); > + if (!outl->hw_frames_ctx) > + return AVERROR(ENOMEM); > + > + av_log(ctx, AV_LOG_VERBOSE, "D3D11 scale config: %dx%d -> %dx%d\n", > + inlink->w, inlink->h, outlink->w, outlink->h); > + return 0; > +} > + > +static av_cold void scale_d3d11_uninit(AVFilterContext *ctx) { > + ScaleD3D11Context *s = ctx->priv; > + > + ///< Release D3D11 resources > + release_d3d11_resources(s); > + > + ///< Free the hardware device context reference > + av_buffer_unref(&s->hw_frames_ctx_out); > + av_buffer_unref(&s->hw_device_ctx); > + > + ///< Free option strings > + av_freep(&s->w_expr); > + av_freep(&s->h_expr); > +} > + > +static const AVFilterPad scale_d3d11_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .filter_frame = scale_d3d11_filter_frame, > + }, > +}; > + > +static const AVFilterPad scale_d3d11_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = scale_d3d11_config_props, > + }, > +}; > + > +#define OFFSET(x) offsetof(ScaleD3D11Context, x) > +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) > + > +static const AVOption scale_d3d11_options[] = { > + { "width", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, > + { "height", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, > + { "output_fmt", "Output format", OFFSET(output_format_opt), AV_OPT_TYPE_INT, {.i64 = OUTPUT_NV12}, 0, OUTPUT_P010, FLAGS, "fmt" }, > + { "nv12", "NV12 format", 0, AV_OPT_TYPE_CONST, {.i64 = OUTPUT_NV12}, 0, 0, FLAGS, "fmt" }, > + { "p010", "P010 format", 0, AV_OPT_TYPE_CONST, {.i64 = OUTPUT_P010}, 0, 0, FLAGS, "fmt" }, > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(scale_d3d11); > + > +const FFFilter ff_vf_scale_d3d11 = { > + .p.name = "scale_d3d11", > + .p.description = NULL_IF_CONFIG_SMALL("Scale video using Direct3D11"), > + .priv_size = sizeof(ScaleD3D11Context), > + .p.priv_class = &scale_d3d11_class, > + .init = scale_d3d11_init, > + .uninit = scale_d3d11_uninit, > + FILTER_INPUTS(scale_d3d11_inputs), > + FILTER_OUTPUTS(scale_d3d11_outputs), > + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_D3D11), > + .p.flags = AVFILTER_FLAG_HWDEVICE, > + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, > +}; > \ No newline at end of file > diff --git a/libavutil/hwcontext_d3d11va.c b/libavutil/hwcontext_d3d11va.c > index 1a047ce57b..36694896e4 100644 > --- a/libavutil/hwcontext_d3d11va.c > +++ b/libavutil/hwcontext_d3d11va.c > @@ -82,6 +82,8 @@ typedef struct D3D11VAFramesContext { > > int nb_surfaces; > int nb_surfaces_used; > + int retries; > + int max_retries; > > DXGI_FORMAT format; > > @@ -258,7 +260,9 @@ static AVBufferRef *d3d11va_pool_alloc(void *opaque, size_t size) > ID3D11Texture2D_GetDesc(hwctx->texture, &texDesc); > > if (s->nb_surfaces_used >= texDesc.ArraySize) { > - av_log(ctx, AV_LOG_ERROR, "Static surface pool size exceeded.\n"); > + if (s->retries >= s->max_retries) { > + av_log(ctx, AV_LOG_ERROR, "Static surface pool size exceeded.\n"); > + } > return NULL; > } > > @@ -339,20 +343,30 @@ static int d3d11va_frames_init(AVHWFramesContext *ctx) > static int d3d11va_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) > { > AVD3D11FrameDescriptor *desc; > + D3D11VAFramesContext *s = ctx->hwctx; > + s->retries = 0; > + s->max_retries = 50; > + > + while (s->retries < s->max_retries) { > + > + frame->buf[0] = av_buffer_pool_get(ctx->pool); > + if (frame->buf[0]) { > + desc = (AVD3D11FrameDescriptor *)frame->buf[0]->data; > + > + frame->data[0] = (uint8_t *)desc->texture; > + frame->data[1] = (uint8_t *)desc->index; > + frame->format = AV_PIX_FMT_D3D11; > + frame->width = ctx->width; > + frame->height = ctx->height; > + > + return 0; > + } > > - frame->buf[0] = av_buffer_pool_get(ctx->pool); > - if (!frame->buf[0]) > - return AVERROR(ENOMEM); > - > - desc = (AVD3D11FrameDescriptor *)frame->buf[0]->data; > - > - frame->data[0] = (uint8_t *)desc->texture; > - frame->data[1] = (uint8_t *)desc->index; > - frame->format = AV_PIX_FMT_D3D11; > - frame->width = ctx->width; > - frame->height = ctx->height; > + av_usleep(1000); > + s->retries++; > + } > > - return 0; > + return AVERROR(ENOMEM); > } > > static int d3d11va_transfer_get_formats(AVHWFramesContext *ctx, > > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".