* [FFmpeg-devel] [PATCH FFmpeg 4/15] libavfilter: dnn interface definitions for CLIP/CLAP Inference
@ 2025-03-08 14:59 m.kaindl0208
0 siblings, 0 replies; only message in thread
From: m.kaindl0208 @ 2025-03-08 14:59 UTC (permalink / raw)
To: ffmpeg-devel
Defines new DNNFunctionType enums for CLIP and CLAP inference and adds new data structures like DNNExecZeroShotClassificationParams to support zero-shot classification models.
Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification.
Any Feedback is appreciated!
Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com>
---
libavfilter/dnn_interface.h | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index 66086409be..2125348c6b 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -58,6 +58,8 @@ typedef enum {
DFT_PROCESS_FRAME, // process the whole frame
DFT_ANALYTICS_DETECT, // detect from the whole frame
DFT_ANALYTICS_CLASSIFY, // classify for each bounding box
+ DFT_ANALYTICS_CLIP, // classify whole frame with zero-shot classification
+ DFT_ANALYTICS_CLAP // classify whole audio frame with zero-shot classification
}DNNFunctionType;
typedef enum {
@@ -90,6 +92,16 @@ typedef struct DNNExecClassificationParams {
const char *target;
} DNNExecClassificationParams;
+typedef struct DNNExecZeroShotClassificationParams {
+ DNNExecBaseParams base;
+ const char **labels;
+ const int label_count;
+ const char *target;
+ const char *tokenizer_path;
+ const int *softmax_units;
+ const int softmax_units_count;
+} DNNExecZeroShotClassificationParams;
+
typedef int (*FramePrePostProc)(AVFrame *frame, DNNData *model, AVFilterContext *filter_ctx);
typedef int (*DetectPostProc)(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx);
typedef int (*ClassifyPostProc)(AVFrame *frame, DNNData *output, uint32_t bbox_index, AVFilterContext *filter_ctx);
@@ -136,6 +148,16 @@ typedef struct OVOptions {
typedef struct THOptions {
const AVClass *clazz;
int optimize;
+
+ // Contrastive Language-X Pre-training options
+ float logit_scale;
+ float temperature;
+ int forward_order; // Order of forward output (0: media text, 1: text media)
+ int normalize; // Normalize the input tensor
+ int64_t token_dimension;
+ int64_t input_resolution;
+ int64_t sample_rate;
+ int64_t sample_duration;
} THOptions;
typedef struct DNNModule DNNModule;
@@ -177,6 +199,8 @@ struct DNNModule {
DNNBackendType type;
// Loads model and parameters from given file. Returns NULL if it is not possible.
DNNModel *(*load_model)(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
+ // Loads model, tokenizer and parameters from given file. Returns NULL if it is not possible.
+ DNNModel *(*load_model_with_tokenizer)(DnnContext *ctx, DNNFunctionType func_type, const char** labels, int label_count, int* softmax_units, int softmax_units_count, const char* tokenizer_path, AVFilterContext *filter_ctx);
// Executes model with specified input and output. Returns the error code otherwise.
int (*execute_model)(const DNNModel *model, DNNExecBaseParams *exec_params);
// Retrieve inference result.
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-03-08 14:59 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-08 14:59 [FFmpeg-devel] [PATCH FFmpeg 4/15] libavfilter: dnn interface definitions for CLIP/CLAP Inference m.kaindl0208
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git