From: Raja Rathour via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: imraja729@gmail.com
Subject: [FFmpeg-devel] [PATCH] avfilter/dnn_backend_torch: enable async execution, memory safety & dynamic shapes
Date: Wed, 14 Jan 2026 13:19:41 +0530
Message-ID: <20260114074941.283607-1-imraja729@gmail.com> (raw)
This patch overhauls the LibTorch backend to support modern FFmpeg DNN features:
1. Async Execution: Implements non-blocking inference using ff_dnn_start_inference_async.
2. Memory Safety: Fixes a critical memory leak by introducing a persistent input buffer in THInferRequest.
3. Dynamic Shapes: Adds support for changing input resolutions by reallocating buffers on the fly.
4. Robustness: Fixes device selection crashes on parameter-less models.
Signed-off-by: Raja Rathour <imraja729@gmail.com>
---
libavfilter/dnn/dnn_backend_torch.cpp | 182 ++++++++++++--------------
1 file changed, 84 insertions(+), 98 deletions(-)
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp
index 2e4326d9d4..a320de1bf4 100644
--- a/libavfilter/dnn/dnn_backend_torch.cpp
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -47,6 +47,8 @@ typedef struct THModel {
typedef struct THInferRequest {
torch::Tensor *output;
torch::Tensor *input_tensor;
+ float *input_data; // Persistent buffer to prevent leaks
+ size_t input_data_size; // Track size for dynamic resizing
} THInferRequest;
typedef struct THRequestItem {
@@ -95,7 +97,10 @@ static void th_free_request(THInferRequest *request)
delete(request->input_tensor);
request->input_tensor = NULL;
}
- return;
+ if (request->input_data) {
+ av_freep(&request->input_data);
+ request->input_data_size = 0;
+ }
}
static inline void destroy_request_item(THRequestItem **arg)
@@ -138,7 +143,8 @@ static void dnn_free_model_th(DNNModel **model)
av_freep(&item);
}
ff_queue_destroy(th_model->task_queue);
- delete th_model->jit_model;
+ if (th_model->jit_model)
+ delete th_model->jit_model;
av_freep(&th_model);
*model = NULL;
}
@@ -155,10 +161,6 @@ static int get_input_th(DNNModel *model, DNNData *input, const char *input_name)
return 0;
}
-static void deleter(void *arg)
-{
- av_freep(&arg);
-}
static int fill_model_input_th(THModel *th_model, THRequestItem *request)
{
@@ -168,31 +170,43 @@ static int fill_model_input_th(THModel *th_model, THRequestItem *request)
DNNData input = { 0 };
DnnContext *ctx = th_model->ctx;
int ret, width_idx, height_idx, channel_idx;
+ size_t cur_size;
lltask = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue);
if (!lltask) {
- ret = AVERROR(EINVAL);
- goto err;
+ return AVERROR(EINVAL);
}
request->lltask = lltask;
task = lltask->task;
infer_request = request->infer_request;
ret = get_input_th(&th_model->model, &input, NULL);
- if ( ret != 0) {
- goto err;
+ if (ret != 0) {
+ return ret;
}
width_idx = dnn_get_width_idx_by_layout(input.layout);
height_idx = dnn_get_height_idx_by_layout(input.layout);
channel_idx = dnn_get_channel_idx_by_layout(input.layout);
input.dims[height_idx] = task->in_frame->height;
input.dims[width_idx] = task->in_frame->width;
- input.data = av_malloc(input.dims[height_idx] * input.dims[width_idx] *
- input.dims[channel_idx] * sizeof(float));
- if (!input.data)
- return AVERROR(ENOMEM);
- infer_request->input_tensor = new torch::Tensor();
- infer_request->output = new torch::Tensor();
+
+ cur_size = input.dims[height_idx] * input.dims[width_idx] *
+ input.dims[channel_idx] * sizeof(float);
+
+ if (!infer_request->input_data || infer_request->input_data_size < cur_size) {
+ av_freep(&infer_request->input_data);
+ infer_request->input_data = (float *)av_malloc(cur_size);
+ if (!infer_request->input_data)
+ return AVERROR(ENOMEM);
+ infer_request->input_data_size = cur_size;
+ }
+
+ input.data = infer_request->input_data;
+
+ if (!infer_request->input_tensor)
+ infer_request->input_tensor = new torch::Tensor();
+ if (!infer_request->output)
+ infer_request->output = new torch::Tensor();
switch (th_model->model.func_type) {
case DFT_PROCESS_FRAME:
@@ -206,17 +220,15 @@ static int fill_model_input_th(THModel *th_model, THRequestItem *request)
}
break;
default:
- avpriv_report_missing_feature(NULL, "model function type %d", th_model->model.func_type);
+ avpriv_report_missing_feature(th_model->ctx, "model function type %d", th_model->model.func_type);
break;
}
*infer_request->input_tensor = torch::from_blob(input.data,
{1, input.dims[channel_idx], input.dims[height_idx], input.dims[width_idx]},
- deleter, torch::kFloat32);
+ nullptr, torch::kFloat32);
+
return 0;
-err:
- th_free_request(infer_request);
- return ret;
}
static int th_start_inference(void *args)
@@ -240,22 +252,28 @@ static int th_start_inference(void *args)
th_model = (THModel *)task->model;
ctx = th_model->ctx;
- if (ctx->torch_option.optimize)
- torch::jit::setGraphExecutorOptimize(true);
- else
- torch::jit::setGraphExecutorOptimize(false);
+ torch::jit::setGraphExecutorOptimize(!!ctx->torch_option.optimize);
if (!infer_request->input_tensor || !infer_request->output) {
av_log(ctx, AV_LOG_ERROR, "input or output tensor is NULL\n");
return DNN_GENERIC_ERROR;
}
- // Transfer tensor to the same device as model
- c10::Device device = (*th_model->jit_model->parameters().begin()).device();
+
+ /* FIX: Use the context device directly instead of querying model parameters */
+ const char *device_name = ctx->device ? ctx->device : "cpu";
+ c10::Device device = c10::Device(device_name);
+
if (infer_request->input_tensor->device() != device)
*infer_request->input_tensor = infer_request->input_tensor->to(device);
+
inputs.push_back(*infer_request->input_tensor);
-
- *infer_request->output = th_model->jit_model->forward(inputs).toTensor();
+
+ try {
+ *infer_request->output = th_model->jit_model->forward(inputs).toTensor();
+ } catch (const c10::Error& e) {
+ av_log(ctx, AV_LOG_ERROR, "Torch forward pass failed: %s\n", e.what());
+ return DNN_GENERIC_ERROR;
+ }
return 0;
}
@@ -273,13 +291,12 @@ static void infer_completion_callback(void *args) {
outputs.order = DCO_RGB;
outputs.layout = DL_NCHW;
outputs.dt = DNN_FLOAT;
+
if (sizes.size() == 4) {
- // 4 dimensions: [batch_size, channel, height, width]
- // this format of data is normally used for video frame SR
- outputs.dims[0] = sizes.at(0); // N
- outputs.dims[1] = sizes.at(1); // C
- outputs.dims[2] = sizes.at(2); // H
- outputs.dims[3] = sizes.at(3); // W
+ outputs.dims[0] = sizes.at(0);
+ outputs.dims[1] = sizes.at(1);
+ outputs.dims[2] = sizes.at(2);
+ outputs.dims[3] = sizes.at(3);
} else {
avpriv_report_missing_feature(th_model->ctx, "Support of this kind of model");
goto err;
@@ -288,7 +305,6 @@ static void infer_completion_callback(void *args) {
switch (th_model->model.func_type) {
case DFT_PROCESS_FRAME:
if (task->do_ioproc) {
- // Post process can only deal with CPU memory.
if (output->device() != torch::kCPU)
*output = output->to(torch::kCPU);
outputs.scale = 255;
@@ -307,14 +323,15 @@ static void infer_completion_callback(void *args) {
avpriv_report_missing_feature(th_model->ctx, "model function type %d", th_model->model.func_type);
goto err;
}
+
task->inference_done++;
av_freep(&request->lltask);
+
err:
th_free_request(infer_request);
if (ff_safe_queue_push_back(th_model->request_queue, request) < 0) {
destroy_request_item(&request);
- av_log(th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n");
}
}
@@ -332,7 +349,6 @@ static int execute_model_th(THRequestItem *request, Queue *lltask_queue)
lltask = (LastLevelTaskItem *)ff_queue_peek_front(lltask_queue);
if (lltask == NULL) {
- av_log(NULL, AV_LOG_ERROR, "Failed to get LastLevelTaskItem\n");
ret = AVERROR(EINVAL);
goto err;
}
@@ -340,16 +356,19 @@ static int execute_model_th(THRequestItem *request, Queue *lltask_queue)
th_model = (THModel *)task->model;
ret = fill_model_input_th(th_model, request);
- if ( ret != 0) {
+ if (ret != 0) {
goto err;
}
+
if (task->async) {
- avpriv_report_missing_feature(th_model->ctx, "LibTorch async");
+ ret = ff_dnn_start_inference_async(th_model->ctx, &request->exec_module);
+ if (ret < 0)
+ goto err;
+ return 0;
} else {
ret = th_start_inference((void *)(request));
- if (ret != 0) {
+ if (ret != 0)
goto err;
- }
infer_completion_callback(request);
return (task->inference_done == task->inference_todo) ? 0 : DNN_GENERIC_ERROR;
}
@@ -367,7 +386,6 @@ static int get_output_th(DNNModel *model, const char *input_name, int input_widt
{
int ret = 0;
THModel *th_model = (THModel*) model;
- DnnContext *ctx = th_model->ctx;
TaskItem task = { 0 };
THRequestItem *request = NULL;
DNNExecBaseParams exec_params = {
@@ -377,20 +395,17 @@ static int get_output_th(DNNModel *model, const char *input_name, int input_widt
.in_frame = NULL,
.out_frame = NULL,
};
- ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, th_model, input_height, input_width, ctx);
- if ( ret != 0) {
+
+ ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, th_model, input_height, input_width, th_model->ctx);
+ if (ret != 0)
goto err;
- }
ret = extract_lltask_from_task(&task, th_model->lltask_queue);
- if ( ret != 0) {
- av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from task.\n");
+ if (ret != 0)
goto err;
- }
request = (THRequestItem*) ff_safe_queue_pop_front(th_model->request_queue);
if (!request) {
- av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n");
ret = AVERROR(EINVAL);
goto err;
}
@@ -407,12 +422,9 @@ err:
static THInferRequest *th_create_inference_request(void)
{
- THInferRequest *request = (THInferRequest *)av_malloc(sizeof(THInferRequest));
- if (!request) {
+ THInferRequest *request = (THInferRequest *)av_mallocz(sizeof(THInferRequest));
+ if (!request)
return NULL;
- }
- request->input_tensor = NULL;
- request->output = NULL;
return request;
}
@@ -451,38 +463,29 @@ static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, A
}
th_model->request_queue = ff_safe_queue_create();
- if (!th_model->request_queue) {
+ if (!th_model->request_queue)
goto fail;
- }
item = (THRequestItem *)av_mallocz(sizeof(THRequestItem));
- if (!item) {
+ if (!item)
goto fail;
- }
- item->lltask = NULL;
+
item->infer_request = th_create_inference_request();
- if (!item->infer_request) {
- av_log(NULL, AV_LOG_ERROR, "Failed to allocate memory for Torch inference request\n");
+ if (!item->infer_request)
goto fail;
- }
+
item->exec_module.start_inference = &th_start_inference;
item->exec_module.callback = &infer_completion_callback;
item->exec_module.args = item;
- if (ff_safe_queue_push_back(th_model->request_queue, item) < 0) {
+ if (ff_safe_queue_push_back(th_model->request_queue, item) < 0)
goto fail;
- }
item = NULL;
th_model->task_queue = ff_queue_create();
- if (!th_model->task_queue) {
- goto fail;
- }
-
th_model->lltask_queue = ff_queue_create();
- if (!th_model->lltask_queue) {
+ if (!th_model->task_queue || !th_model->lltask_queue)
goto fail;
- }
model->get_input = &get_input_th;
model->get_output = &get_output_th;
@@ -491,10 +494,8 @@ static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, A
return model;
fail:
- if (item) {
+ if (item)
destroy_request_item(&item);
- av_freep(&item);
- }
dnn_free_model_th(&model);
return NULL;
}
@@ -502,48 +503,36 @@ fail:
static int dnn_execute_model_th(const DNNModel *model, DNNExecBaseParams *exec_params)
{
THModel *th_model = (THModel *)model;
- DnnContext *ctx = th_model->ctx;
TaskItem *task;
THRequestItem *request;
int ret = 0;
- ret = ff_check_exec_params(ctx, DNN_TH, model->func_type, exec_params);
- if (ret != 0) {
- av_log(ctx, AV_LOG_ERROR, "exec parameter checking fail.\n");
+ ret = ff_check_exec_params(th_model->ctx, DNN_TH, model->func_type, exec_params);
+ if (ret != 0)
return ret;
- }
task = (TaskItem *)av_malloc(sizeof(TaskItem));
- if (!task) {
- av_log(ctx, AV_LOG_ERROR, "unable to alloc memory for task item.\n");
+ if (!task)
return AVERROR(ENOMEM);
- }
ret = ff_dnn_fill_task(task, exec_params, th_model, 0, 1);
if (ret != 0) {
av_freep(&task);
- av_log(ctx, AV_LOG_ERROR, "unable to fill task.\n");
return ret;
}
- ret = ff_queue_push_back(th_model->task_queue, task);
- if (ret < 0) {
+ if (ff_queue_push_back(th_model->task_queue, task) < 0) {
av_freep(&task);
- av_log(ctx, AV_LOG_ERROR, "unable to push back task_queue.\n");
- return ret;
+ return AVERROR(ENOMEM);
}
ret = extract_lltask_from_task(task, th_model->lltask_queue);
- if (ret != 0) {
- av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from task.\n");
+ if (ret != 0)
return ret;
- }
request = (THRequestItem *)ff_safe_queue_pop_front(th_model->request_queue);
- if (!request) {
- av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n");
+ if (!request)
return AVERROR(EINVAL);
- }
return execute_model_th(request, th_model->lltask_queue);
}
@@ -560,14 +549,11 @@ static int dnn_flush_th(const DNNModel *model)
THRequestItem *request;
if (ff_queue_size(th_model->lltask_queue) == 0)
- // no pending task need to flush
return 0;
request = (THRequestItem *)ff_safe_queue_pop_front(th_model->request_queue);
- if (!request) {
- av_log(th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n");
+ if (!request)
return AVERROR(EINVAL);
- }
return execute_model_th(request, th_model->lltask_queue);
}
@@ -580,4 +566,4 @@ extern const DNNModule ff_dnn_backend_torch = {
.get_result = dnn_get_result_th,
.flush = dnn_flush_th,
.free_model = dnn_free_model_th,
-};
+};
\ No newline at end of file
--
2.51.0
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2026-01-14 7:50 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260114074941.283607-1-imraja729@gmail.com \
--to=ffmpeg-devel@ffmpeg.org \
--cc=imraja729@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git