Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: <m.kaindl0208@gmail.com>
To: <ffmpeg-devel@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH v2 FFmpeg 14/20] libavfilter/dnn/dnn_backend_torch: Similarity and Softmax calculation functions for CLIP/CLAP
Date: Mon, 10 Mar 2025 20:54:52 +0100
Message-ID: <004201db91f6$4a450060$decf0120$@gmail.com> (raw)

Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com>
---
 libavfilter/dnn/dnn_backend_torch.cpp | 76 +++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp
index 3a0ef931f9..12ba2674b3 100644
--- a/libavfilter/dnn/dnn_backend_torch.cpp
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -381,6 +381,82 @@ static int copy_softmax_units(THModel *th_model, const int *softmax_units, int s
     return 0;
 }

+static torch::Tensor calculate_similarity(torch::Tensor &tensor1, torch::Tensor &tensor2, bool normalize, float logit_scale, DnnContext *ctx)
+{
+    try {
+        if (normalize) {
+            tensor1 = torch::nn::functional::normalize(tensor1, torch::nn::functional::NormalizeFuncOptions().p(2).dim(-1));
+            tensor2 = torch::nn::functional::normalize(tensor2, torch::nn::functional::NormalizeFuncOptions().p(2).dim(-1));
+        }
+
+        // Compute similarity matrix
+        torch::Tensor similarity = logit_scale * torch::matmul(tensor2, tensor1.transpose(0, 1));
+        return similarity.transpose(0, 1);
+    } catch (const c10::Error &e) {
+        av_log(ctx, AV_LOG_ERROR, "Similarity computation failed: %s\n", e.what());
+        return torch::Tensor(); // Return empty tensor properly
+    }
+}
+
+static torch::Tensor apply_softmax(torch::Tensor input_tensor, float temperature, const int *softmax_units, int softmax_units_count, DnnContext *ctx)
+{
+    try {
+        // Check for empty or invalid input tensor
+        if (input_tensor.numel() == 0 || input_tensor.dim() < 2) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid input tensor for softmax\n");
+            return input_tensor;
+        }
+
+        // Apply temperature if needed
+        torch::Tensor scaled_tensor;
+        if (temperature > 0.0f && temperature != 1.0f) {
+            scaled_tensor = input_tensor / temperature;
+        } else {
+            scaled_tensor = input_tensor;
+        }
+
+        // If no specific units are provided, apply softmax to the entire tensor
+        if (!softmax_units || softmax_units_count <= 0) {
+            return torch::nn::functional::softmax(scaled_tensor, torch::nn::functional::SoftmaxFuncOptions(1));
+        }
+
+        // Create a new output tensor with the same shape as the input
+        torch::Tensor result = torch::empty_like(scaled_tensor);
+        int offset = 0;
+
+        // Apply softmax to each specified segment
+        for (int i = 0; i < softmax_units_count; i++) {
+            int length = softmax_units[i];
+            if (length <= 0 || offset + length > scaled_tensor.size(1)) {
+                av_log(ctx, AV_LOG_ERROR, "Invlid Softmax units were given to softmax. Index invalid or out of Bounds.\n");
+                return input_tensor;
+            }
+
+            // Apply softmax to the segment and directly place it in the result tensor
+            result.slice(1, offset, offset + length) = torch::nn::functional::softmax(
+                scaled_tensor.slice(1, offset, offset + length), torch::nn::functional::SoftmaxFuncOptions(1));
+
+            // Move offset forward
+            offset += length;
+        }
+
+        // Copy any remaining unprocessed parts if there are any
+        if (offset < scaled_tensor.size(1)) {
+            result.slice(1, offset, scaled_tensor.size(1)) = scaled_tensor.slice(1, offset, scaled_tensor.size(1));
+            // Copy remaining unprocessed elements without modification
+            av_log(ctx, AV_LOG_ERROR, "Some tensor elements (%d to %ld) were not processed by softmax\n", offset,
+                    scaled_tensor.size(1) - 1);
+        }
+
+        return result;
+    } catch (const c10::Error &e) {
+        av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what());
+        return input_tensor; // Return original tensor on error
+    } catch (const std::exception &e) {
+        av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what());
+        return input_tensor; // Return original tensor on error
+    }
+}

 static int fill_model_input_th(THModel *th_model, THRequestItem *request)
 {
--
2.34.1


_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

                 reply	other threads:[~2025-03-10 19:55 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='004201db91f6$4a450060$decf0120$@gmail.com' \
    --to=m.kaindl0208@gmail.com \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git