Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions
@ 2025-03-08 14:58 m.kaindl0208
  2025-03-08 17:52 ` Sean McGovern
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: m.kaindl0208 @ 2025-03-08 14:58 UTC (permalink / raw)
  To: ffmpeg-devel

Those functions will be used by classify in the upcoming patches.

Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification.

Any Feedback is appreciated!

Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com>
---
 libavutil/detection_bbox.c | 54 ++++++++++++++++++++++++++++++++++++++
 libavutil/detection_bbox.h | 31 ++++++++++++++++++++++
 2 files changed, 85 insertions(+)

diff --git a/libavutil/detection_bbox.c b/libavutil/detection_bbox.c index cb157b355b..378233121d 100644
--- a/libavutil/detection_bbox.c
+++ b/libavutil/detection_bbox.c
@@ -18,6 +18,7 @@
 
 #include "detection_bbox.h"
 #include "mem.h"
+#include "libavutil/avstring.h"
 
 AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size)  { @@ -71,3 +72,56 @@ AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32
 
     return header;
 }
+
+int av_detection_bbox_fill_with_best_labels(char **labels, float 
+*probabilities, int num_labels, AVDetectionBBox *bbox, int max_classes_per_box, float confidence_threshold) {
+    int i, j, minpos, ret;
+    float min;
+
+    if (!labels || !probabilities || !bbox) {
+        return AVERROR(EINVAL);
+    }
+
+    for (i = 0; i < num_labels; i++) {
+        if (probabilities[i] >= confidence_threshold) {
+            if (bbox->classify_count >= max_classes_per_box) {
+                // Find lowest probability classification
+                min = av_q2d(bbox->classify_confidences[0]);
+                minpos = 0;
+                for (j = 1; j < bbox->classify_count; j++) {
+                    float prob = av_q2d(bbox->classify_confidences[j]);
+                    if (prob < min) {
+                        min = prob;
+                        minpos = j;
+                    }
+                }
+
+                if (probabilities[i] > min) {
+                    ret = av_detection_bbox_set_content(bbox, labels[i], minpos, probabilities[i]);
+                    if (ret < 0)
+                        return ret;
+                }
+            } else {
+                ret = av_detection_bbox_set_content(bbox, labels[i], bbox->classify_count, probabilities[i]);
+                if (ret < 0)
+                    return ret;
+                bbox->classify_count++;
+            }
+        }
+    }
+    return 0;
+}
+
+int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label, 
+int index, float probability) {
+    // Set probability
+    bbox->classify_confidences[index] = av_make_q((int)(probability * 
+10000), 10000);
+
+    // Copy label with size checking
+    if (av_strlcpy(bbox->classify_labels[index], label, AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) >=
+        AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {
+        av_log(NULL, AV_LOG_WARNING, "Label truncated in set_prob_and_label_of_bbox\n");
+    }
+
+    return 0;
+}
diff --git a/libavutil/detection_bbox.h b/libavutil/detection_bbox.h index 011988052c..27d749ad59 100644
--- a/libavutil/detection_bbox.h
+++ b/libavutil/detection_bbox.h
@@ -105,4 +105,35 @@ AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_s
  * AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables.
  */
 AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes);
+
+/**
+ * Fills an AVDetectionBBox structure with the best labels based on probabilities.
+ *
+ * This function selects up to max_classes_per_box labels with the 
+highest probabilities
+ * that exceed the given confidence threshold, and assigns them to the bounding box.
+ *
+ * @param labels Array of label strings
+ * @param probabilities Array of probability values corresponding to 
+each label
+ * @param num_labels Number of elements in the labels and probabilities 
+arrays
+ * @param bbox Pointer to the AVDetectionBBox structure to be filled
+ * @param max_classes_per_box Maximum number of classes to assign to 
+the bounding box
+ * @param confidence_threshold Minimum probability value required for a 
+label to be considered
+ * @return 0 on success, negative error code on failure  */ int 
+av_detection_bbox_fill_with_best_labels(char **labels, float 
+*probabilities, int num_labels, AVDetectionBBox *bbox, int 
+max_classes_per_box, float confidence_threshold);
+
+/**
+ * Sets the content of an AVDetectionBBox at the specified index.
+ *
+ * This function assigns a label and its associated probability to the 
+specified index
+ * in the bounding box's internal storage.
+ *
+ * @param bbox Pointer to the AVDetectionBBox structure to modify
+ * @param label The class label to assign (will be copied internally)
+ * @param index The index at which to store the label and probability
+ * @param probability The confidence score/probability for this label
+ * @return 0 on success
+ */
+int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label, 
+int index, float probability);
+
 #endif
--
2.34.1


_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions
  2025-03-08 14:58 [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions m.kaindl0208
@ 2025-03-08 17:52 ` Sean McGovern
  2025-03-09 13:51 ` Michael Niedermayer
  2025-03-09 15:46 ` Lynne
  2 siblings, 0 replies; 5+ messages in thread
From: Sean McGovern @ 2025-03-08 17:52 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi,

On Sat, Mar 8, 2025 at 9:58 AM <m.kaindl0208@gmail.com> wrote:
>
> Those functions will be used by classify in the upcoming patches.
>
> Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification.
>
> Any Feedback is appreciated!
>
> Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com>
> ---
>  libavutil/detection_bbox.c | 54 ++++++++++++++++++++++++++++++++++++++
>  libavutil/detection_bbox.h | 31 ++++++++++++++++++++++
>  2 files changed, 85 insertions(+)
>
> diff --git a/libavutil/detection_bbox.c b/libavutil/detection_bbox.c index cb157b355b..378233121d 100644
> --- a/libavutil/detection_bbox.c
> +++ b/libavutil/detection_bbox.c
> @@ -18,6 +18,7 @@
>
>  #include "detection_bbox.h"
>  #include "mem.h"
> +#include "libavutil/avstring.h"
>
>  AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size)  { @@ -71,3 +72,56 @@ AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32
>
>      return header;
>  }
> +
> +int av_detection_bbox_fill_with_best_labels(char **labels, float
> +*probabilities, int num_labels, AVDetectionBBox *bbox, int max_classes_per_box, float confidence_threshold) {
> +    int i, j, minpos, ret;
> +    float min;
> +
> +    if (!labels || !probabilities || !bbox) {
> +        return AVERROR(EINVAL);
> +    }
> +
> +    for (i = 0; i < num_labels; i++) {
> +        if (probabilities[i] >= confidence_threshold) {
> +            if (bbox->classify_count >= max_classes_per_box) {
> +                // Find lowest probability classification
> +                min = av_q2d(bbox->classify_confidences[0]);
> +                minpos = 0;
> +                for (j = 1; j < bbox->classify_count; j++) {
> +                    float prob = av_q2d(bbox->classify_confidences[j]);
> +                    if (prob < min) {
> +                        min = prob;
> +                        minpos = j;
> +                    }
> +                }
> +
> +                if (probabilities[i] > min) {
> +                    ret = av_detection_bbox_set_content(bbox, labels[i], minpos, probabilities[i]);
> +                    if (ret < 0)
> +                        return ret;
> +                }
> +            } else {
> +                ret = av_detection_bbox_set_content(bbox, labels[i], bbox->classify_count, probabilities[i]);
> +                if (ret < 0)
> +                    return ret;
> +                bbox->classify_count++;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label,
> +int index, float probability) {
> +    // Set probability
> +    bbox->classify_confidences[index] = av_make_q((int)(probability *
> +10000), 10000);
> +
> +    // Copy label with size checking
> +    if (av_strlcpy(bbox->classify_labels[index], label, AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) >=
> +        AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {
> +        av_log(NULL, AV_LOG_WARNING, "Label truncated in set_prob_and_label_of_bbox\n");
> +    }
> +
> +    return 0;
> +}
> diff --git a/libavutil/detection_bbox.h b/libavutil/detection_bbox.h index 011988052c..27d749ad59 100644
> --- a/libavutil/detection_bbox.h
> +++ b/libavutil/detection_bbox.h
> @@ -105,4 +105,35 @@ AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_s
>   * AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables.
>   */
>  AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes);
> +
> +/**
> + * Fills an AVDetectionBBox structure with the best labels based on probabilities.
> + *
> + * This function selects up to max_classes_per_box labels with the
> +highest probabilities
> + * that exceed the given confidence threshold, and assigns them to the bounding box.
> + *
> + * @param labels Array of label strings
> + * @param probabilities Array of probability values corresponding to
> +each label
> + * @param num_labels Number of elements in the labels and probabilities
> +arrays
> + * @param bbox Pointer to the AVDetectionBBox structure to be filled
> + * @param max_classes_per_box Maximum number of classes to assign to
> +the bounding box
> + * @param confidence_threshold Minimum probability value required for a
> +label to be considered
> + * @return 0 on success, negative error code on failure  */ int
> +av_detection_bbox_fill_with_best_labels(char **labels, float
> +*probabilities, int num_labels, AVDetectionBBox *bbox, int
> +max_classes_per_box, float confidence_threshold);
> +
> +/**
> + * Sets the content of an AVDetectionBBox at the specified index.
> + *
> + * This function assigns a label and its associated probability to the
> +specified index
> + * in the bounding box's internal storage.
> + *
> + * @param bbox Pointer to the AVDetectionBBox structure to modify
> + * @param label The class label to assign (will be copied internally)
> + * @param index The index at which to store the label and probability
> + * @param probability The confidence score/probability for this label
> + * @return 0 on success
> + */
> +int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label,
> +int index, float probability);
> +
>  #endif
> --
> 2.34.1
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

No comment on the patchset itself -- it's too far out of my wheelhouse.
Maybe don't spam the link to your Github repo in the commit messages though.

-- Sean McGovern
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions
  2025-03-08 14:58 [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions m.kaindl0208
  2025-03-08 17:52 ` Sean McGovern
@ 2025-03-09 13:51 ` Michael Niedermayer
  2025-03-09 15:46 ` Lynne
  2 siblings, 0 replies; 5+ messages in thread
From: Michael Niedermayer @ 2025-03-09 13:51 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1833 bytes --]

Hi

On Sat, Mar 08, 2025 at 03:58:04PM +0100, m.kaindl0208@gmail.com wrote:
> Those functions will be used by classify in the upcoming patches.
> 
> Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification.
> 
> Any Feedback is appreciated!
> 
> Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com>
> ---
>  libavutil/detection_bbox.c | 54 ++++++++++++++++++++++++++++++++++++++
>  libavutil/detection_bbox.h | 31 ++++++++++++++++++++++
>  2 files changed, 85 insertions(+)
> 
> diff --git a/libavutil/detection_bbox.c b/libavutil/detection_bbox.c index cb157b355b..378233121d 100644
> --- a/libavutil/detection_bbox.c
> +++ b/libavutil/detection_bbox.c
> @@ -18,6 +18,7 @@
>  
>  #include "detection_bbox.h"
>  #include "mem.h"
> +#include "libavutil/avstring.h"
>  
>  AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size)  { @@ -71,3 +72,56 @@ AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32
>  
>      return header;
>  }
> +
> +int av_detection_bbox_fill_with_best_labels(char **labels, float 
> +*probabilities, int num_labels, AVDetectionBBox *bbox, int max_classes_per_box, float confidence_threshold) {
> +    int i, j, minpos, ret;

The patch is corrupted by line breaks, check your editor settings for line/word wrap
OR attach the patch
OR use git send-email

Applying: libavutil: add detectionbbox util functions
error: corrupt patch at line 121
error: could not build fake ancestor
Patch failed at 0001 libavutil: add detectionbbox util functions

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Elect your leaders based on what they did after the last election, not
based on what they say before an election.


[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions
  2025-03-08 14:58 [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions m.kaindl0208
  2025-03-08 17:52 ` Sean McGovern
  2025-03-09 13:51 ` Michael Niedermayer
@ 2025-03-09 15:46 ` Lynne
  2025-03-09 18:46   ` Maximilian Kaindl
  2 siblings, 1 reply; 5+ messages in thread
From: Lynne @ 2025-03-09 15:46 UTC (permalink / raw)
  To: ffmpeg-devel

On 08/03/2025 15:58, m.kaindl0208@gmail.com wrote:
> Those functions will be used by classify in the upcoming patches.
> 
> Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification.
> 
> Any Feedback is appreciated!
> 
> Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com>
> ---
>   libavutil/detection_bbox.c | 54 ++++++++++++++++++++++++++++++++++++++
>   libavutil/detection_bbox.h | 31 ++++++++++++++++++++++
>   2 files changed, 85 insertions(+)
> 
> diff --git a/libavutil/detection_bbox.c b/libavutil/detection_bbox.c index cb157b355b..378233121d 100644
> --- a/libavutil/detection_bbox.c
> +++ b/libavutil/detection_bbox.c
> @@ -18,6 +18,7 @@
>   
>   #include "detection_bbox.h"
>   #include "mem.h"
> +#include "libavutil/avstring.h"
>   
>   AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size)  { @@ -71,3 +72,56 @@ AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32
>   
>       return header;
>   }
> +
> +int av_detection_bbox_fill_with_best_labels(char **labels, float
> +*probabilities, int num_labels, AVDetectionBBox *bbox, int max_classes_per_box, float confidence_threshold) {
> +    int i, j, minpos, ret;
> +    float min;
> +
> +    if (!labels || !probabilities || !bbox) {
> +        return AVERROR(EINVAL);
> +    }
> +
> +    for (i = 0; i < num_labels; i++) {
> +        if (probabilities[i] >= confidence_threshold) {
> +            if (bbox->classify_count >= max_classes_per_box) {
> +                // Find lowest probability classification
> +                min = av_q2d(bbox->classify_confidences[0]);
> +                minpos = 0;
> +                for (j = 1; j < bbox->classify_count; j++) {
> +                    float prob = av_q2d(bbox->classify_confidences[j]);
> +                    if (prob < min) {
> +                        min = prob;
> +                        minpos = j;
> +                    }
> +                }
> +
> +                if (probabilities[i] > min) {
> +                    ret = av_detection_bbox_set_content(bbox, labels[i], minpos, probabilities[i]);
> +                    if (ret < 0)
> +                        return ret;
> +                }
> +            } else {
> +                ret = av_detection_bbox_set_content(bbox, labels[i], bbox->classify_count, probabilities[i]);
> +                if (ret < 0)
> +                    return ret;
> +                bbox->classify_count++;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label,
> +int index, float probability) {
> +    // Set probability
> +    bbox->classify_confidences[index] = av_make_q((int)(probability *
> +10000), 10000);
> +
> +    // Copy label with size checking
> +    if (av_strlcpy(bbox->classify_labels[index], label, AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) >=
> +        AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {
> +        av_log(NULL, AV_LOG_WARNING, "Label truncated in set_prob_and_label_of_bbox\n");
> +    }
> +
> +    return 0;
> +}
> diff --git a/libavutil/detection_bbox.h b/libavutil/detection_bbox.h index 011988052c..27d749ad59 100644
> --- a/libavutil/detection_bbox.h
> +++ b/libavutil/detection_bbox.h
> @@ -105,4 +105,35 @@ AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_s
>    * AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables.
>    */
>   AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes);
> +
> +/**
> + * Fills an AVDetectionBBox structure with the best labels based on probabilities.
> + *
> + * This function selects up to max_classes_per_box labels with the
> +highest probabilities
> + * that exceed the given confidence threshold, and assigns them to the bounding box.
> + *
> + * @param labels Array of label strings
> + * @param probabilities Array of probability values corresponding to
> +each label
> + * @param num_labels Number of elements in the labels and probabilities
> +arrays
> + * @param bbox Pointer to the AVDetectionBBox structure to be filled
> + * @param max_classes_per_box Maximum number of classes to assign to
> +the bounding box
> + * @param confidence_threshold Minimum probability value required for a
> +label to be considered
> + * @return 0 on success, negative error code on failure  */ int
> +av_detection_bbox_fill_with_best_labels(char **labels, float
> +*probabilities, int num_labels, AVDetectionBBox *bbox, int
> +max_classes_per_box, float confidence_threshold);
> +
> +/**
> + * Sets the content of an AVDetectionBBox at the specified index.
> + *
> + * This function assigns a label and its associated probability to the
> +specified index
> + * in the bounding box's internal storage.
> + *
> + * @param bbox Pointer to the AVDetectionBBox structure to modify
> + * @param label The class label to assign (will be copied internally)
> + * @param index The index at which to store the label and probability
> + * @param probability The confidence score/probability for this label
> + * @return 0 on success
> + */
> +int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label,
> +int index, float probability);

This is outside the scope of the file IMO. Not something that should be 
in the public API.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions
  2025-03-09 15:46 ` Lynne
@ 2025-03-09 18:46   ` Maximilian Kaindl
  0 siblings, 0 replies; 5+ messages in thread
From: Maximilian Kaindl @ 2025-03-09 18:46 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi,

thank you all for the fast feedback.

Regarding the GitHub references in the footer, I apologize for the overly promotional nature - that was not my intention. I was rushing to post and immediately realized it was excessive.

Regarding Lynne's suggestion about relocating functions from the detection box to the classify filter - I had considered this approach initially but kept the code in the detection box class since the classify filter file is already quite substantial. While these functions could theoretically be used elsewhere, that is unlikely. I am comfortable with either implementation approach.

I will fix the formatting issues in the patches promptly and move the code to the classify filter as suggested. Since most of the patches need correction, I will submit everything as a v2 patch set.

Kind regards​​​​​​​​​​​​​​​​
________________________________
From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> on behalf of Lynne <dev@lynne.ee>
Sent: Sunday, March 9, 2025 4:46:31 PM
To: ffmpeg-devel@ffmpeg.org <ffmpeg-devel@ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions

On 08/03/2025 15:58, m.kaindl0208@gmail.com wrote:
> Those functions will be used by classify in the upcoming patches.
>
> Try the new filters using my Github Repo https://github.com/MaximilianKaindl/DeepFFMPEGVideoClassification.
>
> Any Feedback is appreciated!
>
> Signed-off-by: MaximilianKaindl <m.kaindl0208@gmail.com>
> ---
>   libavutil/detection_bbox.c | 54 ++++++++++++++++++++++++++++++++++++++
>   libavutil/detection_bbox.h | 31 ++++++++++++++++++++++
>   2 files changed, 85 insertions(+)
>
> diff --git a/libavutil/detection_bbox.c b/libavutil/detection_bbox.c index cb157b355b..378233121d 100644
> --- a/libavutil/detection_bbox.c
> +++ b/libavutil/detection_bbox.c
> @@ -18,6 +18,7 @@
>
>   #include "detection_bbox.h"
>   #include "mem.h"
> +#include "libavutil/avstring.h"
>
>   AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size)  { @@ -71,3 +72,56 @@ AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32
>
>       return header;
>   }
> +
> +int av_detection_bbox_fill_with_best_labels(char **labels, float
> +*probabilities, int num_labels, AVDetectionBBox *bbox, int max_classes_per_box, float confidence_threshold) {
> +    int i, j, minpos, ret;
> +    float min;
> +
> +    if (!labels || !probabilities || !bbox) {
> +        return AVERROR(EINVAL);
> +    }
> +
> +    for (i = 0; i < num_labels; i++) {
> +        if (probabilities[i] >= confidence_threshold) {
> +            if (bbox->classify_count >= max_classes_per_box) {
> +                // Find lowest probability classification
> +                min = av_q2d(bbox->classify_confidences[0]);
> +                minpos = 0;
> +                for (j = 1; j < bbox->classify_count; j++) {
> +                    float prob = av_q2d(bbox->classify_confidences[j]);
> +                    if (prob < min) {
> +                        min = prob;
> +                        minpos = j;
> +                    }
> +                }
> +
> +                if (probabilities[i] > min) {
> +                    ret = av_detection_bbox_set_content(bbox, labels[i], minpos, probabilities[i]);
> +                    if (ret < 0)
> +                        return ret;
> +                }
> +            } else {
> +                ret = av_detection_bbox_set_content(bbox, labels[i], bbox->classify_count, probabilities[i]);
> +                if (ret < 0)
> +                    return ret;
> +                bbox->classify_count++;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label,
> +int index, float probability) {
> +    // Set probability
> +    bbox->classify_confidences[index] = av_make_q((int)(probability *
> +10000), 10000);
> +
> +    // Copy label with size checking
> +    if (av_strlcpy(bbox->classify_labels[index], label, AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) >=
> +        AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {
> +        av_log(NULL, AV_LOG_WARNING, "Label truncated in set_prob_and_label_of_bbox\n");
> +    }
> +
> +    return 0;
> +}
> diff --git a/libavutil/detection_bbox.h b/libavutil/detection_bbox.h index 011988052c..27d749ad59 100644
> --- a/libavutil/detection_bbox.h
> +++ b/libavutil/detection_bbox.h
> @@ -105,4 +105,35 @@ AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_s
>    * AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables.
>    */
>   AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes);
> +
> +/**
> + * Fills an AVDetectionBBox structure with the best labels based on probabilities.
> + *
> + * This function selects up to max_classes_per_box labels with the
> +highest probabilities
> + * that exceed the given confidence threshold, and assigns them to the bounding box.
> + *
> + * @param labels Array of label strings
> + * @param probabilities Array of probability values corresponding to
> +each label
> + * @param num_labels Number of elements in the labels and probabilities
> +arrays
> + * @param bbox Pointer to the AVDetectionBBox structure to be filled
> + * @param max_classes_per_box Maximum number of classes to assign to
> +the bounding box
> + * @param confidence_threshold Minimum probability value required for a
> +label to be considered
> + * @return 0 on success, negative error code on failure  */ int
> +av_detection_bbox_fill_with_best_labels(char **labels, float
> +*probabilities, int num_labels, AVDetectionBBox *bbox, int
> +max_classes_per_box, float confidence_threshold);
> +
> +/**
> + * Sets the content of an AVDetectionBBox at the specified index.
> + *
> + * This function assigns a label and its associated probability to the
> +specified index
> + * in the bounding box's internal storage.
> + *
> + * @param bbox Pointer to the AVDetectionBBox structure to modify
> + * @param label The class label to assign (will be copied internally)
> + * @param index The index at which to store the label and probability
> + * @param probability The confidence score/probability for this label
> + * @return 0 on success
> + */
> +int av_detection_bbox_set_content(AVDetectionBBox *bbox, char *label,
> +int index, float probability);

This is outside the scope of the file IMO. Not something that should be
in the public API.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-03-09 18:46 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-08 14:58 [FFmpeg-devel] [PATCH FFmpeg 1/15] libavutil: add detectionbbox util functions m.kaindl0208
2025-03-08 17:52 ` Sean McGovern
2025-03-09 13:51 ` Michael Niedermayer
2025-03-09 15:46 ` Lynne
2025-03-09 18:46   ` Maximilian Kaindl

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git