From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 6D2014E302 for ; Mon, 10 Mar 2025 19:55:16 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 4A07D68E0D4; Mon, 10 Mar 2025 21:54:56 +0200 (EET) Received: from mail-wm1-f43.google.com (mail-wm1-f43.google.com [209.85.128.43]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id C195668E004 for ; Mon, 10 Mar 2025 21:54:54 +0200 (EET) Received: by mail-wm1-f43.google.com with SMTP id 5b1f17b1804b1-43cfecdd8b2so6929425e9.2 for ; Mon, 10 Mar 2025 12:54:54 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1741636493; x=1742241293; darn=ffmpeg.org; h=thread-index:content-language:content-transfer-encoding :mime-version:message-id:date:subject:to:from:from:to:cc:subject :date:message-id:reply-to; bh=WCVSB+XRsZ7SQNtY4yja+3s/+pc9NmYsZcZ0PdxMgUI=; b=CAkMEdJCJxEWxDDBOg70/LcdK35JVBRWGgqBAqwy9gUfjof1+MCOXZ8I2RjrB5+x88 7S+hCzDLjSzc7O6x4e+pgiuNdqzSHQKTsp+jMGd9bNL4BBFqCMyoIq2xH92Xz7gmqlOl XbJ76dq3PyN3aARxuaLYiSUCh1hqzIMr1rR9UDQqzRjcRL/Jdo9+w5rxkazqHXKI304/ Shuws801hQShovMNn2QFp6HY00q5UciFvWsZvZrUdKQAAj4TKTdKTkY/6BQQyT3Qnx81 ZBKrmn5NYb+bXEMhso5dw4mJ1cSUN1CLjRC6rc1UQ/QKUnWvUFhmWIzBbBHv8JfOj7Pv NbBg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1741636493; x=1742241293; h=thread-index:content-language:content-transfer-encoding :mime-version:message-id:date:subject:to:from:x-gm-message-state :from:to:cc:subject:date:message-id:reply-to; bh=WCVSB+XRsZ7SQNtY4yja+3s/+pc9NmYsZcZ0PdxMgUI=; b=qmLqp75OT/8vyDOrg+ab0OVR4IgSXv3GVfi83v0KDwZaJgtascj005p1T92eRmRE5C W0cD/CWJBwcnsgOAUNUqzz2aYUW8DJEl1TA82CoWRjq/75CpPkgJWuM+G9BcGV8eLVqM Ib+BgCyPzaeH1TDcVwn1H36XWeOeN8yWgm1GwtvyMism1MdHtQqGcFe4mGqKbnJoDPEW nBNeihl/Dq+c+tFOYAynBjvUqQJB4Sl1a3kGc1QexLnM4d1p6XwwnrZ2e0PYPCsLhZI/ SgZmp3f1P4H1etL9r6WCa4J70BEFeGevds97Xiz1CTQiLu3KCHTyR/UzGS/VueCV7OXK Y0SA== X-Gm-Message-State: AOJu0Yy8F/JuODQWKzkABxrHutX/9T6MxvbAMi4oy58STLV6Ny9uDtJJ E2aQEJY1r8Yazc3JO0zuHRk/l4hTO5uz8jfujfUXtTTNNp86Izp6Ve68cQ== X-Gm-Gg: ASbGnctwjEuVpDgY8kudxgHJVVz6rnuw0kLPuHEs8go/I43ko6Ia9s8+znIFlAVIAfD 7aaG3woM3tqivuCcQ0QGZec1n+ok3lS65DiQLzFaSC5yU07tv25o3rwOXKfkWe9xFizW36CBuoJ CgEEtU4fugx5E8FHW6TIWga35UoGk7UDDZNmcUFSPV52AoAz+uFl/fmgFNKqGD5O17fUBCoKxgC 3CRHsXGPIDx2z6tVWpQVM3p53/8aaBhkJ99Vr8q7ryNLvQ7JTmwYEso8YAzjk1cw6DcBIEWo0HU f6zTA8jcBvfI/FMR43WRgvr7zb0Dc7C99I5teTFafd6Mk0xZ6iOKl2uMOXhFxfhJrqU8LhOy2fh Tdc2MLonTXkofA+6s X-Google-Smtp-Source: AGHT+IHQhXVAEkBhLNb+RNQvU1EqAAuHwb1i98t6tEXfp+eO9w9eyLVhwBzLqBISSSeaYBXiSJbxdw== X-Received: by 2002:a05:600c:4fcc:b0:43c:fcb1:528a with SMTP id 5b1f17b1804b1-43cfcb1544bmr34793665e9.6.1741636493070; Mon, 10 Mar 2025 12:54:53 -0700 (PDT) Received: from MK2 (80-108-16-220.cable.dynamic.surfer.at. [80.108.16.220]) by smtp.gmail.com with ESMTPSA id 5b1f17b1804b1-43cfa0723c9sm41014835e9.6.2025.03.10.12.54.52 for (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Mon, 10 Mar 2025 12:54:52 -0700 (PDT) From: To: Date: Mon, 10 Mar 2025 20:54:52 +0100 Message-ID: <004201db91f6$4a450060$decf0120$@gmail.com> MIME-Version: 1.0 X-Mailer: Microsoft Outlook 16.0 Content-Language: en-at Thread-Index: AduR9kk0U2pLkHx0QAWykpv6/x3TUg== Subject: [FFmpeg-devel] [PATCH v2 FFmpeg 14/20] libavfilter/dnn/dnn_backend_torch: Similarity and Softmax calculation functions for CLIP/CLAP X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: Signed-off-by: MaximilianKaindl --- libavfilter/dnn/dnn_backend_torch.cpp | 76 +++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp index 3a0ef931f9..12ba2674b3 100644 --- a/libavfilter/dnn/dnn_backend_torch.cpp +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -381,6 +381,82 @@ static int copy_softmax_units(THModel *th_model, const int *softmax_units, int s return 0; } +static torch::Tensor calculate_similarity(torch::Tensor &tensor1, torch::Tensor &tensor2, bool normalize, float logit_scale, DnnContext *ctx) +{ + try { + if (normalize) { + tensor1 = torch::nn::functional::normalize(tensor1, torch::nn::functional::NormalizeFuncOptions().p(2).dim(-1)); + tensor2 = torch::nn::functional::normalize(tensor2, torch::nn::functional::NormalizeFuncOptions().p(2).dim(-1)); + } + + // Compute similarity matrix + torch::Tensor similarity = logit_scale * torch::matmul(tensor2, tensor1.transpose(0, 1)); + return similarity.transpose(0, 1); + } catch (const c10::Error &e) { + av_log(ctx, AV_LOG_ERROR, "Similarity computation failed: %s\n", e.what()); + return torch::Tensor(); // Return empty tensor properly + } +} + +static torch::Tensor apply_softmax(torch::Tensor input_tensor, float temperature, const int *softmax_units, int softmax_units_count, DnnContext *ctx) +{ + try { + // Check for empty or invalid input tensor + if (input_tensor.numel() == 0 || input_tensor.dim() < 2) { + av_log(ctx, AV_LOG_ERROR, "Invalid input tensor for softmax\n"); + return input_tensor; + } + + // Apply temperature if needed + torch::Tensor scaled_tensor; + if (temperature > 0.0f && temperature != 1.0f) { + scaled_tensor = input_tensor / temperature; + } else { + scaled_tensor = input_tensor; + } + + // If no specific units are provided, apply softmax to the entire tensor + if (!softmax_units || softmax_units_count <= 0) { + return torch::nn::functional::softmax(scaled_tensor, torch::nn::functional::SoftmaxFuncOptions(1)); + } + + // Create a new output tensor with the same shape as the input + torch::Tensor result = torch::empty_like(scaled_tensor); + int offset = 0; + + // Apply softmax to each specified segment + for (int i = 0; i < softmax_units_count; i++) { + int length = softmax_units[i]; + if (length <= 0 || offset + length > scaled_tensor.size(1)) { + av_log(ctx, AV_LOG_ERROR, "Invlid Softmax units were given to softmax. Index invalid or out of Bounds.\n"); + return input_tensor; + } + + // Apply softmax to the segment and directly place it in the result tensor + result.slice(1, offset, offset + length) = torch::nn::functional::softmax( + scaled_tensor.slice(1, offset, offset + length), torch::nn::functional::SoftmaxFuncOptions(1)); + + // Move offset forward + offset += length; + } + + // Copy any remaining unprocessed parts if there are any + if (offset < scaled_tensor.size(1)) { + result.slice(1, offset, scaled_tensor.size(1)) = scaled_tensor.slice(1, offset, scaled_tensor.size(1)); + // Copy remaining unprocessed elements without modification + av_log(ctx, AV_LOG_ERROR, "Some tensor elements (%d to %ld) were not processed by softmax\n", offset, + scaled_tensor.size(1) - 1); + } + + return result; + } catch (const c10::Error &e) { + av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what()); + return input_tensor; // Return original tensor on error + } catch (const std::exception &e) { + av_log(ctx, AV_LOG_ERROR, "Error applying softmax: %s\n", e.what()); + return input_tensor; // Return original tensor on error + } +} static int fill_model_input_th(THModel *th_model, THRequestItem *request) { -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".