Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Lidong Yan <yldhome2d2-at-gmail.com@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Lidong Yan <502024330056@smail.nju.edu.cn>
Subject: [FFmpeg-devel] [PATCH v3 1/2] bloom: replace struct bloom_key * with struct bloom_keyvec
Date: Sat, 28 Jun 2025 12:19:20 +0800
Message-ID: <20250628041921.1097642-2-502024330056@smail.nju.edu.cn> (raw)
In-Reply-To: <20250627062154.1121530-1-502024330056@smail.nju.edu.cn>

The revision traversal limited by pathspec has optimization when
the pathspec has only one element. To support optimization for
multiple pathspec items, we need to modify the data structures
in struct rev_info.

struct rev_info uses bloom_keys and bloom_nr to store the bloom keys
corresponding to a single pathspec item. To allow struct rev_info
to store bloom keys for multiple pathspec items, a new data structure
`struct bloom_keyvec` is introduced. Each `struct bloom_keyvec`
corresponds to a single pathspec item.

In `struct rev_info`, replace bloom_keys and bloom_nr with bloom_keyvecs
and bloom_keyvec_nr. This commit still optimize one pathspec item, thus
bloom_keyvec_nr can only be 0 or 1.

New *_bloom_keyvec functions are added to create and destroy a keyvec.
bloom_filter_contains_vec() is added to check if all key in keyvec is
contained in a bloom filter. fill_bloom_keyvec_key() is added to
initialize a key in keyvec.

Signed-off-by: Lidong Yan <502024330056@smail.nju.edu.cn>
---
 bloom.c    | 31 +++++++++++++++++++++++++++++++
 bloom.h    | 20 ++++++++++++++++++++
 revision.c | 36 ++++++++++++++++++------------------
 revision.h |  6 +++---
 4 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/bloom.c b/bloom.c
index 0c8d2cebf9..8259cfce51 100644
--- a/bloom.c
+++ b/bloom.c
@@ -280,6 +280,25 @@ void deinit_bloom_filters(void)
 	deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
 }
 
+struct bloom_keyvec *create_bloom_keyvec(size_t count)
+{
+	struct bloom_keyvec *vec;
+	size_t sz = sizeof(struct bloom_keyvec);
+	sz += count * sizeof(struct bloom_key);
+	vec = (struct bloom_keyvec *)xcalloc(1, sz);
+	vec->count = count;
+	return vec;
+}
+
+void destroy_bloom_keyvec(struct bloom_keyvec *vec)
+{
+	if (!vec)
+		return;
+	for (size_t nr = 0; nr < vec->count; nr++)
+		clear_bloom_key(&vec->key[nr]);
+	free(vec);
+}
+
 static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
 		       const struct hashmap_entry *eptr,
 		       const struct hashmap_entry *entry_or_key,
@@ -540,3 +559,15 @@ int bloom_filter_contains(const struct bloom_filter *filter,
 
 	return 1;
 }
+
+int bloom_filter_contains_vec(const struct bloom_filter *filter,
+			      const struct bloom_keyvec *vec,
+			      const struct bloom_filter_settings *settings)
+{
+	int ret = 1;
+
+	for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
+		ret = bloom_filter_contains(filter, &vec->key[nr], settings);
+
+	return ret;
+}
diff --git a/bloom.h b/bloom.h
index 6e46489a20..9e4e832c8c 100644
--- a/bloom.h
+++ b/bloom.h
@@ -74,6 +74,11 @@ struct bloom_key {
 	uint32_t *hashes;
 };
 
+struct bloom_keyvec {
+	size_t count;
+	struct bloom_key key[FLEX_ARRAY];
+};
+
 int load_bloom_filter_from_graph(struct commit_graph *g,
 				 struct bloom_filter *filter,
 				 uint32_t graph_pos);
@@ -100,6 +105,17 @@ void add_key_to_filter(const struct bloom_key *key,
 void init_bloom_filters(void);
 void deinit_bloom_filters(void);
 
+struct bloom_keyvec *create_bloom_keyvec(size_t count);
+void destroy_bloom_keyvec(struct bloom_keyvec *vec);
+
+static inline void fill_bloom_keyvec_key(const char *data, size_t len,
+					 struct bloom_keyvec *vec, size_t nr,
+					 const struct bloom_filter_settings *settings)
+{
+	assert(nr < vec->count);
+	fill_bloom_key(data, len, &vec->key[nr], settings);
+}
+
 enum bloom_filter_computed {
 	BLOOM_NOT_COMPUTED = (1 << 0),
 	BLOOM_COMPUTED     = (1 << 1),
@@ -137,4 +153,8 @@ int bloom_filter_contains(const struct bloom_filter *filter,
 			  const struct bloom_key *key,
 			  const struct bloom_filter_settings *settings);
 
+int bloom_filter_contains_vec(const struct bloom_filter *filter,
+			      const struct bloom_keyvec *v,
+			      const struct bloom_filter_settings *settings);
+
 #endif
diff --git a/revision.c b/revision.c
index afee111196..3aa544c137 100644
--- a/revision.c
+++ b/revision.c
@@ -688,6 +688,7 @@ static int forbid_bloom_filters(struct pathspec *spec)
 static void prepare_to_use_bloom_filter(struct rev_info *revs)
 {
 	struct pathspec_item *pi;
+	struct bloom_keyvec *bloom_keyvec;
 	char *path_alloc = NULL;
 	const char *path, *p;
 	size_t len;
@@ -736,19 +737,21 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
 		p++;
 	}
 
-	revs->bloom_keys_nr = path_component_nr;
-	ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
+	revs->bloom_keyvecs_nr = 1;
+	CALLOC_ARRAY(revs->bloom_keyvecs, 1);
+	bloom_keyvec = create_bloom_keyvec(path_component_nr);
+	revs->bloom_keyvecs[0] = bloom_keyvec;
 
-	fill_bloom_key(path, len, &revs->bloom_keys[0],
-		       revs->bloom_filter_settings);
+	fill_bloom_keyvec_key(path, len, bloom_keyvec, 0,
+			      revs->bloom_filter_settings);
 	path_component_nr = 1;
 
 	p = path + len - 1;
 	while (p > path) {
 		if (*p == '/')
-			fill_bloom_key(path, p - path,
-				       &revs->bloom_keys[path_component_nr++],
-				       revs->bloom_filter_settings);
+			fill_bloom_keyvec_key(path, p - path, bloom_keyvec,
+					      path_component_nr++,
+					      revs->bloom_filter_settings);
 		p--;
 	}
 
@@ -779,11 +782,8 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
 		return -1;
 	}
 
-	for (j = 0; result && j < revs->bloom_keys_nr; j++) {
-		result = bloom_filter_contains(filter,
-					       &revs->bloom_keys[j],
-					       revs->bloom_filter_settings);
-	}
+	result = bloom_filter_contains_vec(filter, revs->bloom_keyvecs[0],
+					   revs->bloom_filter_settings);
 
 	if (result)
 		count_bloom_filter_maybe++;
@@ -823,7 +823,7 @@ static int rev_compare_tree(struct rev_info *revs,
 			return REV_TREE_SAME;
 	}
 
-	if (revs->bloom_keys_nr && !nth_parent) {
+	if (revs->bloom_keyvecs_nr && !nth_parent) {
 		bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
 
 		if (bloom_ret == 0)
@@ -850,7 +850,7 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
 	if (!t1)
 		return 0;
 
-	if (!nth_parent && revs->bloom_keys_nr) {
+	if (!nth_parent && revs->bloom_keyvecs_nr) {
 		bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
 		if (!bloom_ret)
 			return 1;
@@ -3230,10 +3230,10 @@ void release_revisions(struct rev_info *revs)
 	line_log_free(revs);
 	oidset_clear(&revs->missing_commits);
 
-	for (int i = 0; i < revs->bloom_keys_nr; i++)
-		clear_bloom_key(&revs->bloom_keys[i]);
-	FREE_AND_NULL(revs->bloom_keys);
-	revs->bloom_keys_nr = 0;
+	for (int i = 0; i < revs->bloom_keyvecs_nr; i++)
+		destroy_bloom_keyvec(revs->bloom_keyvecs[i]);
+	FREE_AND_NULL(revs->bloom_keyvecs);
+	revs->bloom_keyvecs_nr = 0;
 }
 
 static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child)
diff --git a/revision.h b/revision.h
index 6d369cdad6..ac843f58d0 100644
--- a/revision.h
+++ b/revision.h
@@ -62,7 +62,7 @@ struct repository;
 struct rev_info;
 struct string_list;
 struct saved_parents;
-struct bloom_key;
+struct bloom_keyvec;
 struct bloom_filter_settings;
 struct option;
 struct parse_opt_ctx_t;
@@ -360,8 +360,8 @@ struct rev_info {
 
 	/* Commit graph bloom filter fields */
 	/* The bloom filter key(s) for the pathspec */
-	struct bloom_key *bloom_keys;
-	int bloom_keys_nr;
+	struct bloom_keyvec **bloom_keyvecs;
+	int bloom_keyvecs_nr;
 
 	/*
 	 * The bloom filter settings used to generate the key.
-- 
2.50.0.108.g6ae0c543ae

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2025-06-28  4:20 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20250627062154.1121530-1-502024330056@smail.nju.edu.cn>
2025-06-28  4:19 ` [FFmpeg-devel] [PATCH v3 0/2] bloom: enable bloom filter optimization for multiple pathspec elements in revision traversal Lidong Yan
2025-06-28  4:19 ` Lidong Yan [this message]
2025-06-28  4:19 ` [FFmpeg-devel] [PATCH v3 2/2] bloom: optimize multiple pathspec items " Lidong Yan
2025-06-28  4:27   ` Lidong Yan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250628041921.1097642-2-502024330056@smail.nju.edu.cn \
    --to=yldhome2d2-at-gmail.com@ffmpeg.org \
    --cc=502024330056@smail.nju.edu.cn \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git