From: <m.kaindl0208@gmail.com> To: <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH v2 FFmpeg 14/20] libavfilter/dnn/dnn_backend_torch: Similarity and Softmax calculation functions for CLIP/CLAP Date: Mon, 10 Mar 2025 20:54:52 +0100 Message-ID: <004201db91f6$4a450060$decf0120$@gmail.com> (raw) Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com> --- libavfilter/dnn/dnn_backend_torch.cpp | 76 +++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp index 3a0ef931f9..12ba2674b3 100644 --- a/libavfilter/dnn/dnn_backend_torch.cpp +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -381,6 +381,82 @@ static int copy_softmax_units(THModel *th_model, const int *softmax_units, int s return 0; } +static torch::Tensor calculate_similarity(torch::Tensor &tensor1, torch::Tensor &tensor2, bool normalize, float logit_scale, DnnContext *ctx) +{ + try { + if (normalize) { + tensor1 = torch::nn::functional::normalize(tensor1, torch::nn::functional::NormalizeFuncOptions().p(2).dim(-1)); + tensor2 = torch::nn::functional::normalize(tensor2, torch::nn::functional::NormalizeFuncOptions().p(2).dim(-1)); + } + + // Compute similarity matrix + torch::Tensor similarity = logit_scale * torch::matmul(tensor2, tensor1.transpose(0, 1)); + return similarity.transpose(0, 1); + } catch (const c10::Error &e) { + av_log(ctx, AV_LOG_ERROR, "Similarity computation failed: %s\n", e.what()); + return torch::Tensor(); // Return empty tensor properly + } +} + +static torch::Tensor apply_softmax(torch::Tensor input_tensor, float temperature, const int *softmax_units, int softmax_units_count, DnnContext *ctx) +{ + try { + // Check for empty or invalid input tensor + if (input_tensor.numel() == 0 || input_tensor.dim() < 2) { + av_log(ctx, AV_LOG_ERROR, "Invalid input tensor for softmax\n"); + return input_tensor; + } + + // Apply temperature if needed + torch::Tensor scaled_tensor; + if (temperature > 0.0f && temperature != 1.0f) { + scaled_tensor = input_tensor / temperature; + } else { + scaled_tensor = input_tensor; + } + + // If no specific units are provided, apply softmax to the entire tensor + if (!softmax_units || softmax_units_count <= 0) { + return torch::nn::functional::softmax(scaled_tensor, torch::nn::functional::SoftmaxFuncOptions(1)); + } + + // Create a new output tensor with the same shape as the input + torch::Tensor result = torch::empty_like(scaled_tensor); + int offset = 0; + + // Apply softmax to each specified segment + for (int i = 0; i < softmax_units_count; i++) { + int length = softmax_units[i]; + if (length <= 0 || offset + length > scaled_tensor.size(1)) { + av_log(ctx, AV_LOG_ERROR, "Invlid Softmax units were given to softmax. Index invalid or out of Bounds.\n"); + return input_tensor; + } + + // Apply softmax to the segment and directly place it in the result tensor + result.slice(1, offset, offset + length) = torch::nn::functional::softmax( + scaled_tensor.slice(1, offset, offset + length), torch::nn::functional::SoftmaxFuncOptions(1)); + + // Move offset forward + offset += length; + } + + // Copy any remaining unprocessed parts if there are any + if (offset < scaled_tensor.size(1)) { + result.slice(1, offset, scaled_tensor.size(1)) = scaled_tensor.slice(1, offset, scaled_tensor.size(1)); + // Copy remaining unprocessed elements without modification + av_log(ctx, AV_LOG_ERROR, "Some tensor elements (%d to %ld) were not processed by softmax\n", offset, + scaled_tensor.size(1) - 1); + } + + return result; + } catch (const c10::Error &e) { + av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what()); + return input_tensor; // Return original tensor on error + } catch (const std::exception &e) { + av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what()); + return input_tensor; // Return original tensor on error + } +} static int fill_model_input_th(THModel *th_model, THRequestItem *request) { -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
reply other threads:[~2025-03-10 19:55 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to='004201db91f6$4a450060$decf0120$@gmail.com' \ --to=m.kaindl0208@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git