From: <m.kaindl0208@gmail.com> To: <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH FFmpeg 8/15] libavfilter: add missing temperature application in apply_softmax function and set default temperature to 1. apply_softmax refactoring and improved error handling Date: Sat, 8 Mar 2025 16:00:59 +0100 Message-ID: <007a01db903a$e723fd40$b56bf7c0$@gmail.com> (raw) Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification. Any Feedback is appreciated! Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com> --- libavfilter/avf_dnn_classify.c | 2 +- libavfilter/dnn/dnn_backend_torch.cpp | 66 ++++++++++++++++----------- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/libavfilter/avf_dnn_classify.c b/libavfilter/avf_dnn_classify.c index 5f294d1d9b..fa3a5ebf99 100644 --- a/libavfilter/avf_dnn_classify.c +++ b/libavfilter/avf_dnn_classify.c @@ -134,7 +134,7 @@ static const AVOption dnn_classify_options[] = { #if (CONFIG_LIBTORCH == 1) { "torch", "torch backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TH }, 0, 0, FLAGS, .unit = "backend" }, { "logit_scale", "logit scale for similarity calculation", OFFSET3(logit_scale), AV_OPT_TYPE_FLOAT, { .dbl = -1.0 }, -1.0, 100.0, FLAGS }, - { "temperature", "softmax temperature", OFFSET3(temperature), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 1, 100.0, FLAGS }, + { "temperature", "softmax temperature", OFFSET3(temperature), AV_OPT_TYPE_FLOAT, { .dbl = -1.0 }, -1.0, 100.0, FLAGS }, { "forward_order", "Order of forward output (0: media text, 1: text media) (CLIP/CLAP only)", OFFSET3(forward_order), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, FLAGS }, { "normalize", "Normalize the input tensor (CLIP/CLAP only)", OFFSET3(normalize), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, FLAGS }, { "input_res", "video processing model expected input size", OFFSET3(input_resolution), AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, 10000, FLAGS }, diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp index dc68ad254f..c8804639d9 100644 --- a/libavfilter/dnn/dnn_backend_torch.cpp +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -473,15 +473,12 @@ static torch::Tensor calculate_similarity(torch::Tensor &tensor1, torch::Tensor torch::Tensor similarity = logit_scale * torch::matmul(tensor2, tensor1.transpose(0, 1)); return similarity.transpose(0, 1); } catch (const c10::Error &e) { - if (ctx) { - av_log(ctx, AV_LOG_ERROR, "Similarity computation failed: %s\n", e.what()); - } + av_log(ctx, AV_LOG_ERROR, "Similarity computation failed: %s\n", e.what()); return torch::Tensor(); // Return empty tensor properly } } -static torch::Tensor apply_softmax(torch::Tensor input_tensor, const int *softmax_units, int softmax_units_count, - DnnContext *ctx) +static torch::Tensor apply_softmax(torch::Tensor input_tensor, float temperature, const int *softmax_units, int softmax_units_count, DnnContext *ctx) { try { // Check for empty or invalid input tensor @@ -490,44 +487,53 @@ static torch::Tensor apply_softmax(torch::Tensor input_tensor, const int *softma return input_tensor; } + // Apply temperature if needed + torch::Tensor scaled_tensor; + if (temperature > 0.0f && temperature != 1.0f) { + scaled_tensor = input_tensor / temperature; + } else { + scaled_tensor = input_tensor; + } + // If no specific units are provided, apply softmax to the entire tensor if (!softmax_units || softmax_units_count <= 0) { - return torch::nn::functional::softmax(input_tensor, torch::nn::functional::SoftmaxFuncOptions(1)); + return torch::nn::functional::softmax(scaled_tensor, torch::nn::functional::SoftmaxFuncOptions(1)); } - torch::Tensor result = input_tensor.clone(); + // Create a new output tensor with the same shape as the input + torch::Tensor result = torch::empty_like(scaled_tensor); int offset = 0; // Apply softmax to each specified segment for (int i = 0; i < softmax_units_count; i++) { int length = softmax_units[i]; - if (length <= 0 || offset + length > input_tensor.size(1)) { - continue; + if (length <= 0 || offset + length > scaled_tensor.size(1)) { + av_log(ctx, AV_LOG_ERROR, "Invlid Softmax units were given to softmax. Index invalid or out of Bounds.\n"); + return input_tensor; } - // Select the segment to apply softmax - torch::Tensor segment = result.slice(1, offset, offset + length); - - // Apply softmax along dimension 1 (across labels in segment) - torch::Tensor softmax_segment = - torch::nn::functional::softmax(segment, torch::nn::functional::SoftmaxFuncOptions(1)); - - // Put softmaxed segment back into result tensor - result.slice(1, offset, offset + length) = softmax_segment; + // Apply softmax to the segment and directly place it in the result tensor + result.slice(1, offset, offset + length) = torch::nn::functional::softmax( + scaled_tensor.slice(1, offset, offset + length), torch::nn::functional::SoftmaxFuncOptions(1)); // Move offset forward offset += length; } + + // Copy any remaining unprocessed parts if there are any + if (offset < scaled_tensor.size(1)) { + result.slice(1, offset, scaled_tensor.size(1)) = scaled_tensor.slice(1, offset, scaled_tensor.size(1)); + // Copy remaining unprocessed elements without modification + av_log(ctx, AV_LOG_ERROR, "Some tensor elements (%d to %ld) were not processed by softmax\n", offset, + scaled_tensor.size(1) - 1); + } + return result; } catch (const c10::Error &e) { - if (ctx) { - av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what()); - } + av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what()); return input_tensor; // Return original tensor on error } catch (const std::exception &e) { - if (ctx) { - av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what()); - } + av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what()); return input_tensor; // Return original tensor on error } } @@ -833,8 +839,9 @@ static int th_start_inference(void *args) *infer_request->output = calculate_similarity(media_embeddings, text_embeddings, th_model->ctx->torch_option.normalize, logit_scale, ctx); } - *infer_request->output = apply_softmax(*infer_request->output, th_model->clxp_ctx->softmax_units, - th_model->clxp_ctx->softmax_units_count, ctx); + *infer_request->output = + apply_softmax(*infer_request->output, th_model->ctx->torch_option.temperature, + th_model->clxp_ctx->softmax_units, th_model->clxp_ctx->softmax_units_count, ctx); } } else { avpriv_report_missing_feature(ctx, "model function type %d", th_model->model.func_type); @@ -1071,6 +1078,13 @@ static THModel *init_model_th(DnnContext *ctx, DNNFunctionType func_type, AVFilt av_log(ctx, AV_LOG_INFO, "Using default logit_scale=%.4f for %s input\n", ctx->torch_option.logit_scale, func_type == DFT_ANALYTICS_CLAP ? "audio" : "video"); } + if (ctx->torch_option.temperature <= 0) { + // set default value for logit_scale + ctx->torch_option.temperature = 1; + // Log the default value for logit_scale + av_log(ctx, AV_LOG_INFO, "Using default temperature=%.4f for %s input\n", ctx->torch_option.temperature, + func_type == DFT_ANALYTICS_CLAP ? "audio" : "video"); + } if (ctx->torch_option.normalize < 0) { ctx->torch_option.normalize = func_type == DFT_ANALYTICS_CLAP ? 1 : 0; // Log the default value for logit_scale -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
reply other threads:[~2025-03-08 15:01 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to='007a01db903a$e723fd40$b56bf7c0$@gmail.com' \ --to=m.kaindl0208@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git