* [FFmpeg-devel] [PR] avfilter/dnn: implement persistent buffers for LibTorch backend (PR #21748)
@ 2026-02-13 18:27 Raja-89 via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: Raja-89 via ffmpeg-devel @ 2026-02-13 18:27 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Raja-89
PR #21748 opened by Raja-89
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21748
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21748.patch
### Overview:
This patch (Part 2 of the LibTorch backend overhaul) optimizes memory management by implementing persistent input buffers. Previously, the backend required repetitive allocations which could lead to heap fragmentation; this update ensures high-performance, steady-state processing.
### Key Changes:
- Persistent THInferRequest Buffers: Added input_data and input_data_size to the THInferRequest struct to maintain memory across inference cycles.
- Surgical Reallocation: Updated fill_model_input_th to use a "grow-only" allocation strategy. The buffer is only reallocated if the incoming frame resolution exceeds the currently allocated capacity.
- Memory Safety: Integrated strict cleanup in th_free_request and dnn_free_model_th using av_freep to ensure all persistent memory is released on filtergraph teardown.
- Code Cleanup: Improved dnn_free_model_th to safely drain all queues (task, lltask, and pending) and join the worker thread before destruction.
### Verification:
- Valgrind: Confirmed definitely lost: 0 bytes related to backend logic. Static registries in LibTorch/Protobuf are noted as "still reachable" as per standard behavior.
- Stability: Tested with high-frame-rate streams (100fps) and dynamic resolution jumps (128x128 to 256x256 concat) to ensure the reallocation logic is robust.
- Linting: Verified with patcheck and manual whitespace stripping to ensure no formatting noise.
>From 6b70e9b87e7cb512256ae6594fff2beab7c61b32 Mon Sep 17 00:00:00 2001
From: Raja Rathour <imraja729@gmail.com>
Date: Fri, 13 Feb 2026 23:53:50 +0530
Subject: [PATCH] avfilter/dnn: implement persistent buffers for LibTorch
backend
---
Changelog | 2 +
libavfilter/dnn/dnn_backend_torch.cpp | 152 +++++++++++++++-----------
2 files changed, 88 insertions(+), 66 deletions(-)
diff --git a/Changelog b/Changelog
index a9d68b369e..05f2bdb2fd 100644
--- a/Changelog
+++ b/Changelog
@@ -2256,3 +2256,5 @@ version 0.3.1: added avi/divx support
version 0.3: initial public release
+
+- libavfilter/dnn: persistent buffer management for LibTorch backend
\ No newline at end of file
diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp
index d3c4966c09..6c16396468 100644
--- a/libavfilter/dnn/dnn_backend_torch.cpp
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -56,6 +56,8 @@ typedef struct THModel {
typedef struct THInferRequest {
torch::Tensor *output;
torch::Tensor *input_tensor;
+ uint8_t *input_data; // New: Persistent buffer for input pixels
+ size_t input_data_size; // New: Current size of the buffer
} THInferRequest;
typedef struct THRequestItem {
@@ -96,15 +98,22 @@ static void th_free_request(THInferRequest *request)
{
if (!request)
return;
- if (request->output) {
- delete(request->output);
- request->output = NULL;
- }
+
if (request->input_tensor) {
- delete(request->input_tensor);
+ delete request->input_tensor;
request->input_tensor = NULL;
}
- return;
+
+ if (request->output) {
+ delete request->output;
+ request->output = NULL;
+ }
+
+ /* Free the persistent buffer */
+ if (request->input_data) {
+ av_freep(&request->input_data);
+ }
+ request->input_data_size = 0;
}
static inline void destroy_request_item(THRequestItem **arg)
@@ -129,7 +138,7 @@ static void dnn_free_model_th(DNNModel **model)
th_model = (THModel *)(*model);
- /* 1. Stop and join the worker thread if it exists */
+ /* 1. Stop and join the worker thread */
if (th_model->worker_thread) {
{
std::lock_guard<std::mutex> lock(*th_model->mutex);
@@ -151,7 +160,7 @@ static void dnn_free_model_th(DNNModel **model)
th_model->cond = NULL;
}
- /* 3. Clean up the pending queue */
+ /* 3. Clean up the pending queue (Async tasks) */
if (th_model->pending_queue) {
while (ff_safe_queue_size(th_model->pending_queue) > 0) {
THRequestItem *item = (THRequestItem *)ff_safe_queue_pop_front(th_model->pending_queue);
@@ -160,7 +169,7 @@ static void dnn_free_model_th(DNNModel **model)
ff_safe_queue_destroy(th_model->pending_queue);
}
- /* 4. Clean up standard backend queues */
+ /* 4. Clean up standard backend queues and persistent request buffers */
if (th_model->request_queue) {
while (ff_safe_queue_size(th_model->request_queue) != 0) {
THRequestItem *item = (THRequestItem *)ff_safe_queue_pop_front(th_model->request_queue);
@@ -169,6 +178,7 @@ static void dnn_free_model_th(DNNModel **model)
ff_safe_queue_destroy(th_model->request_queue);
}
+ /* 5. Clean up task and lltask queues */
if (th_model->lltask_queue) {
while (ff_queue_size(th_model->lltask_queue) != 0) {
LastLevelTaskItem *item = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue);
@@ -180,14 +190,16 @@ static void dnn_free_model_th(DNNModel **model)
if (th_model->task_queue) {
while (ff_queue_size(th_model->task_queue) != 0) {
TaskItem *item = (TaskItem *)ff_queue_pop_front(th_model->task_queue);
- av_frame_free(&item->in_frame);
- av_frame_free(&item->out_frame);
- av_freep(&item);
+ if (item) {
+ av_frame_free(&item->in_frame);
+ av_frame_free(&item->out_frame);
+ av_freep(&item);
+ }
}
ff_queue_destroy(th_model->task_queue);
}
- /* 5. Final model cleanup */
+ /* 6. Final model cleanup */
if (th_model->jit_model)
delete th_model->jit_model;
@@ -195,18 +207,6 @@ static void dnn_free_model_th(DNNModel **model)
*model = NULL;
}
-static int get_input_th(DNNModel *model, DNNData *input, const char *input_name)
-{
- input->dt = DNN_FLOAT;
- input->order = DCO_RGB;
- input->layout = DL_NCHW;
- input->dims[0] = 1;
- input->dims[1] = 3;
- input->dims[2] = -1;
- input->dims[3] = -1;
- return 0;
-}
-
static void deleter(void *arg)
{
av_freep(&arg);
@@ -214,61 +214,68 @@ static void deleter(void *arg)
static int fill_model_input_th(THModel *th_model, THRequestItem *request)
{
- LastLevelTaskItem *lltask = NULL;
- TaskItem *task = NULL;
- THInferRequest *infer_request = NULL;
+ LastLevelTaskItem *lltask;
+ TaskItem *task;
+ THInferRequest *infer_request;
DNNData input = { 0 };
- DnnContext *ctx = th_model->ctx;
int ret, width_idx, height_idx, channel_idx;
+ size_t cur_size;
lltask = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue);
- if (!lltask) {
- ret = AVERROR(EINVAL);
- goto err;
- }
+ if (!lltask)
+ return AVERROR(EINVAL);
+
request->lltask = lltask;
task = lltask->task;
infer_request = request->infer_request;
ret = get_input_th(&th_model->model, &input, NULL);
- if ( ret != 0) {
- goto err;
- }
+ if (ret != 0)
+ return ret;
+
width_idx = dnn_get_width_idx_by_layout(input.layout);
height_idx = dnn_get_height_idx_by_layout(input.layout);
channel_idx = dnn_get_channel_idx_by_layout(input.layout);
+
input.dims[height_idx] = task->in_frame->height;
input.dims[width_idx] = task->in_frame->width;
- input.data = av_malloc(input.dims[height_idx] * input.dims[width_idx] *
- input.dims[channel_idx] * sizeof(float));
- if (!input.data)
- return AVERROR(ENOMEM);
- infer_request->input_tensor = new torch::Tensor();
- infer_request->output = new torch::Tensor();
- switch (th_model->model.func_type) {
- case DFT_PROCESS_FRAME:
- input.scale = 255;
- if (task->do_ioproc) {
- if (th_model->model.frame_pre_proc != NULL) {
- th_model->model.frame_pre_proc(task->in_frame, &input, th_model->model.filter_ctx);
- } else {
- ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx);
- }
- }
- break;
- default:
- avpriv_report_missing_feature(NULL, "model function type %d", th_model->model.func_type);
- break;
+ /* Calculate required size for current frame */
+ cur_size = (size_t)input.dims[height_idx] * input.dims[width_idx] *
+ input.dims[channel_idx] * sizeof(float);
+
+ /**
+ * Persistent Buffer Logic:
+ * Only reallocate if existing buffer is too small or doesn't exist.
+ */
+ if (!infer_request->input_data || infer_request->input_data_size < cur_size) {
+ av_freep(&infer_request->input_data);
+ infer_request->input_data = (uint8_t *)av_malloc(cur_size);
+ if (!infer_request->input_data)
+ return AVERROR(ENOMEM);
+ infer_request->input_data_size = cur_size;
}
+
+ /* Initialize tensors if they don't exist */
+ if (!infer_request->input_tensor)
+ infer_request->input_tensor = new torch::Tensor();
+ if (!infer_request->output)
+ infer_request->output = new torch::Tensor();
+
+ input.data = infer_request->input_data;
+
+ if (task->do_ioproc) {
+ if (th_model->model.frame_pre_proc)
+ th_model->model.frame_pre_proc(task->in_frame, &input, th_model->model.filter_ctx);
+ else
+ ff_proc_from_frame_to_dnn(task->in_frame, &input, th_model->ctx);
+ }
+
*infer_request->input_tensor = torch::from_blob(input.data,
{1, input.dims[channel_idx], input.dims[height_idx], input.dims[width_idx]},
- deleter, torch::kFloat32);
- return 0;
+ torch::kFloat32);
-err:
- th_free_request(infer_request);
- return ret;
+ return 0;
}
static int th_start_inference(void *args)
@@ -487,15 +494,28 @@ err:
static THInferRequest *th_create_inference_request(void)
{
- THInferRequest *request = (THInferRequest *)av_malloc(sizeof(THInferRequest));
- if (!request) {
+ // Use av_mallocz to zero-initialize everything (including input_data and input_data_size)
+ THInferRequest *request = (THInferRequest *)av_mallocz(sizeof(THInferRequest));
+ if (!request)
return NULL;
- }
- request->input_tensor = NULL;
- request->output = NULL;
+
return request;
}
+static int get_input_th(DNNModel *model, DNNData *input, const char *input_name)
+{
+ input->dt = DNN_FLOAT;
+ input->order = DCO_RGB;
+ input->layout = DL_NCHW;
+
+ input->dims[0] = 1;
+ input->dims[1] = 3;
+ input->dims[2] = -1;
+ input->dims[3] = -1;
+
+ return 0;
+}
+
static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
{
DNNModel *model = NULL;
--
2.52.0
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2026-02-13 18:28 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-02-13 18:27 [FFmpeg-devel] [PR] avfilter/dnn: implement persistent buffers for LibTorch backend (PR #21748) Raja-89 via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git