* [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table
@ 2025-04-22 7:42 Emma Worley
2025-04-22 7:42 ` [FFmpeg-devel] [PATCH v4 2/3] lavc/dxvenc: migrate DXT1 encoder to lavc hashtable Emma Worley
` (3 more replies)
0 siblings, 4 replies; 9+ messages in thread
From: Emma Worley @ 2025-04-22 7:42 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Emma Worley
Adds a generic hash table with the DXV encoder as an initial use case.
Signed-off-by: Emma Worley <emma@emma.gg>
---
libavcodec/Makefile | 2 +
libavcodec/hashtable.c | 192 +++++++++++++++++++++++++++++++++++
libavcodec/hashtable.h | 91 +++++++++++++++++
libavcodec/tests/hashtable.c | 110 ++++++++++++++++++++
4 files changed, 395 insertions(+)
create mode 100644 libavcodec/hashtable.c
create mode 100644 libavcodec/hashtable.h
create mode 100644 libavcodec/tests/hashtable.c
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7bd1dbec9a..8071c59378 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -42,6 +42,7 @@ OBJS = ac3_parser.o \
dv_profile.o \
encode.o \
get_buffer.o \
+ hashtable.o \
imgconvert.o \
jni.o \
lcevcdec.o \
@@ -1321,6 +1322,7 @@ TESTPROGS = avcodec \
bitstream_le \
celp_math \
codec_desc \
+ hashtable \
htmlsubtitles \
jpeg2000dwt \
mathops \
diff --git a/libavcodec/hashtable.c b/libavcodec/hashtable.c
new file mode 100644
index 0000000000..4d6d4a9506
--- /dev/null
+++ b/libavcodec/hashtable.c
@@ -0,0 +1,192 @@
+/*
+ * Generic hashtable
+ * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/crc.h"
+#include "libavutil/error.h"
+#include "libavutil/mem.h"
+#include "hashtable.h"
+
+#define ALIGN _Alignof(size_t)
+
+struct FFHashtableContext {
+ size_t key_size;
+ size_t key_size_aligned;
+ size_t val_size;
+ size_t val_size_aligned;
+ size_t entry_size;
+ size_t max_entries;
+ size_t utilization;
+ const AVCRC *crc;
+ uint8_t *table;
+ uint8_t *swapbuf;
+};
+
+#define ENTRY_PSL(entry) (entry)
+#define ENTRY_OCC(entry) (ENTRY_PSL(entry) + FFALIGN(sizeof(size_t), ALIGN))
+#define ENTRY_KEY(entry) (ENTRY_OCC(entry) + FFALIGN(sizeof(size_t), ALIGN))
+#define ENTRY_VAL(entry) (ENTRY_KEY(entry) + ctx->key_size_aligned)
+
+#define KEYS_EQUAL(k1, k2) !memcmp(k1, k2, ctx->key_size)
+
+int ff_hashtable_alloc(struct FFHashtableContext **ctx, size_t key_size, size_t val_size, size_t max_entries)
+{
+ struct FFHashtableContext *res = av_malloc(sizeof(struct FFHashtableContext));
+ if (!res)
+ return AVERROR(ENOMEM);
+ res->key_size = key_size;
+ res->key_size_aligned = FFALIGN(key_size, ALIGN);
+ res->val_size = val_size;
+ res->val_size_aligned = FFALIGN(val_size, ALIGN);
+ res->entry_size = FFALIGN(sizeof(size_t), ALIGN)
+ + FFALIGN(sizeof(size_t), ALIGN)
+ + res->key_size_aligned
+ + res->val_size_aligned;
+ res->max_entries = max_entries;
+ res->utilization = 0;
+ res->crc = av_crc_get_table(AV_CRC_32_IEEE);
+ if (!res->crc) {
+ ff_hashtable_freep(&res);
+ return AVERROR_BUG;
+ }
+ res->table = av_calloc(res->max_entries, res->entry_size);
+ if (!res->table) {
+ ff_hashtable_freep(&res);
+ return AVERROR(ENOMEM);
+ }
+ res->swapbuf = av_calloc(2, res->key_size_aligned + res->val_size_aligned);
+ if (!res->swapbuf) {
+ ff_hashtable_freep(&res);
+ return AVERROR(ENOMEM);
+ }
+ *ctx = res;
+ return 0;
+}
+
+static size_t hash_key(const struct FFHashtableContext *ctx, const void *key)
+{
+ return av_crc(ctx->crc, 0, key, ctx->key_size) % ctx->max_entries;
+}
+
+int ff_hashtable_get(const struct FFHashtableContext *ctx, const void *key, void *val)
+{
+ size_t hash = hash_key(ctx, key);
+
+ if (!ctx->utilization)
+ return 0;
+
+ for (size_t psl = 0; psl < ctx->max_entries; psl++) {
+ size_t wrapped_index = (hash + psl) % ctx->max_entries;
+ uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
+ if (!*ENTRY_OCC(entry) || *(size_t*)ENTRY_PSL(entry) < psl)
+ return 0;
+ if (KEYS_EQUAL(ENTRY_KEY(entry), key)) {
+ memcpy(val, ENTRY_VAL(entry), ctx->val_size);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int ff_hashtable_set(struct FFHashtableContext *ctx, const void *key, const void *val)
+{
+ int swapping = 0;
+ size_t psl = 0;
+ size_t hash = hash_key(ctx, key);
+ uint8_t *set = ctx->swapbuf;
+ uint8_t *tmp = ctx->swapbuf + ctx->key_size_aligned + ctx->val_size_aligned;
+
+ memcpy(set, key, ctx->key_size);
+ memcpy(set + ctx->key_size_aligned, val, ctx->val_size);
+
+ for (size_t i = 0; i < ctx->max_entries; i++) {
+ size_t wrapped_index = (hash + i) % ctx->max_entries;
+ uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
+ if (!*ENTRY_OCC(entry) || (!swapping && KEYS_EQUAL(ENTRY_KEY(entry), set))) {
+ if (!*ENTRY_OCC(entry))
+ ctx->utilization++;
+ *(size_t*)ENTRY_PSL(entry) = psl;
+ *ENTRY_OCC(entry) = 1;
+ memcpy(ENTRY_KEY(entry), set, ctx->key_size_aligned + ctx->val_size);
+ return 1;
+ }
+ if (*(size_t*)ENTRY_PSL(entry) < psl) {
+ if (ctx->utilization == ctx->max_entries)
+ return 0;
+ swapping = 1;
+ // set needs to swap with entry
+ memcpy(tmp, ENTRY_KEY(entry), ctx->key_size_aligned + ctx->val_size);
+ memcpy(ENTRY_KEY(entry), set, ctx->key_size_aligned + ctx->val_size);
+ FFSWAP(uint8_t*, set, tmp);
+ FFSWAP(size_t, psl, *(size_t*)ENTRY_PSL(entry));
+ }
+ psl++;
+ }
+ return 0;
+}
+
+int ff_hashtable_delete(struct FFHashtableContext *ctx, const void *key)
+{
+ uint8_t *next_entry;
+ size_t hash = hash_key(ctx, key);
+
+ if (!ctx->utilization)
+ return 0;
+
+ for (size_t psl = 0; psl < ctx->max_entries; psl++) {
+ size_t wrapped_index = (hash + psl) % ctx->max_entries;
+ uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
+ if (!*ENTRY_OCC(entry) || *(size_t*)ENTRY_PSL(entry) < psl)
+ return 0;
+ if (KEYS_EQUAL(ENTRY_KEY(entry), key)) {
+ *ENTRY_OCC(entry) = 0;
+ for (psl++; psl < ctx->max_entries; psl++) {
+ wrapped_index = (hash + psl) % ctx->max_entries;
+ next_entry = ctx->table + wrapped_index * ctx->entry_size;
+ if (!*ENTRY_OCC(next_entry) || !*(size_t*)ENTRY_PSL(next_entry)) {
+ ctx->utilization--;
+ return 1;
+ }
+ memcpy(entry, next_entry, ctx->entry_size);
+ (*(size_t*)ENTRY_PSL(entry))--;
+ *ENTRY_OCC(next_entry) = 0;
+ entry = next_entry;
+ }
+ }
+ };
+ return 0;
+}
+
+void ff_hashtable_clear(struct FFHashtableContext *ctx)
+{
+ memset(ctx->table, 0, ctx->entry_size * ctx->max_entries);
+}
+
+void ff_hashtable_freep(struct FFHashtableContext **ctx)
+{
+ if (*ctx) {
+ av_freep(&(*ctx)->table);
+ av_freep(&(*ctx)->swapbuf);
+ }
+ av_freep(ctx);
+}
diff --git a/libavcodec/hashtable.h b/libavcodec/hashtable.h
new file mode 100644
index 0000000000..fd28d3bdd7
--- /dev/null
+++ b/libavcodec/hashtable.h
@@ -0,0 +1,91 @@
+/*
+ * Generic hashtable
+ * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HASHTABLE_H
+#define AVCODEC_HASHTABLE_H
+
+#include <stddef.h>
+
+/* Implements a hash table using Robin Hood open addressing.
+ * See: https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
+ */
+
+struct FFHashtableContext;
+
+/**
+ * Creates a fixed-sized Robin Hood hash table.
+ *
+ * @param ctx context to allocate and initialize
+ * @param key_size size of key type in bytes
+ * @param val_size size of value type in bytes
+ * @param max_entries maximum number of key-value pairs to store
+ *
+ * @return zero on success, nonzero on error
+ */
+int ff_hashtable_alloc(struct FFHashtableContext **ctx, size_t key_size, size_t val_size, size_t max_entries);
+
+/**
+ * Looks up a value from a hash table given a key.
+ *
+ * @param ctx hash table context
+ * @param key pointer to key data
+ * @param val destination pointer for value data
+ *
+ * @return 1 if the key is found, zero if the key is not found
+ */
+int ff_hashtable_get(const struct FFHashtableContext *ctx, const void *key, void *val);
+
+/**
+ * Stores a value in a hash table given a key.
+ *
+ * @param ctx hash table context
+ * @param key pointer to key data
+ * @param val pointer for value data
+ *
+ * @return 1 if the key is written, zero if the key is not written due to the hash table reaching max capacity
+ */
+int ff_hashtable_set(struct FFHashtableContext *ctx, const void *key, const void *val);
+
+/**
+ * Deletes a value from a hash table given a key.
+ *
+ * @param ctx hash table context
+ * @param key pointer to key data
+ *
+ * @return 1 if the key is deleted, zero if the key is not deleted due to not being found
+ */
+int ff_hashtable_delete(struct FFHashtableContext *ctx, const void *key);
+
+/**
+ * Deletes all values from a hash table.
+ *
+ * @param ctx hash table context
+ */
+void ff_hashtable_clear(struct FFHashtableContext *ctx);
+
+/**
+ * Frees a hash table.
+ *
+ * @param ctx hash table context
+ */
+void ff_hashtable_freep(struct FFHashtableContext **ctx);
+
+#endif
diff --git a/libavcodec/tests/hashtable.c b/libavcodec/tests/hashtable.c
new file mode 100644
index 0000000000..367baf4f96
--- /dev/null
+++ b/libavcodec/tests/hashtable.c
@@ -0,0 +1,110 @@
+/*
+ * Generic hashtable tests
+ * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+#include "libavcodec/hashtable.h"
+
+int main(void)
+{
+ struct FFHashtableContext *ctx;
+ uint8_t k;
+ uint64_t v;
+
+ // impossibly large allocation should fail gracefully
+ av_assert0(ff_hashtable_alloc(&ctx, -1, -1, -1) < 0);
+
+ // hashtable can store up to 3 uint8_t->uint64_t entries
+ av_assert0(!ff_hashtable_alloc(&ctx, sizeof(k), sizeof(v), 3));
+
+ // unsuccessful deletes return 0
+ k = 1;
+ av_assert0(!ff_hashtable_delete(ctx, &k));
+
+ // unsuccessful gets return 0
+ k = 1;
+ av_assert0(!ff_hashtable_get(ctx, &k, &v));
+
+ // successful sets returns 1
+ k = 1;
+ v = 1;
+ av_assert0(ff_hashtable_set(ctx, &k, &v));
+
+ // get should now contain 1
+ k = 1;
+ v = 0;
+ av_assert0(ff_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 1);
+
+ // updating sets should return 1
+ k = 1;
+ v = 2;
+ av_assert0(ff_hashtable_set(ctx, &k, &v));
+
+ // get should now contain 2
+ k = 1;
+ v = 0;
+ av_assert0(ff_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 2);
+
+ // fill the table
+ k = 2;
+ v = 2;
+ av_assert0(ff_hashtable_set(ctx, &k, &v));
+ k = 3;
+ v = 3;
+ av_assert0(ff_hashtable_set(ctx, &k, &v));
+
+ // inserting sets on a full table should return 0
+ k = 4;
+ v = 4;
+ av_assert0(!ff_hashtable_set(ctx, &k, &v));
+
+ // updating sets on a full table should return 1
+ k = 1;
+ v = 4;
+ av_assert0(ff_hashtable_set(ctx, &k, &v));
+ v = 0;
+ av_assert0(ff_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 4);
+
+ // successful deletes should return 1
+ k = 1;
+ av_assert0(ff_hashtable_delete(ctx, &k));
+
+ // get should now return 0
+ av_assert0(!ff_hashtable_get(ctx, &k, &v));
+
+ // sanity check remaining keys
+ k = 2;
+ v = 0;
+ av_assert0(ff_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 2);
+ k = 3;
+ v = 0;
+ av_assert0(ff_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 3);
+
+ ff_hashtable_freep(&ctx);
+
+ return 0;
+}
--
2.48.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* [FFmpeg-devel] [PATCH v4 2/3] lavc/dxvenc: migrate DXT1 encoder to lavc hashtable
2025-04-22 7:42 [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Emma Worley
@ 2025-04-22 7:42 ` Emma Worley
2025-04-22 7:42 ` [FFmpeg-devel] [PATCH v4 3/3] lavc/dxvenc: improve compatibility with Resolume products Emma Worley
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Emma Worley @ 2025-04-22 7:42 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Emma Worley
Offers a modest performance gain due to the switch from naive linear
probling to robin hood.
Signed-off-by: Emma Worley <emma@emma.gg>
---
libavcodec/dxvenc.c | 121 ++++++++++++--------------------------------
1 file changed, 33 insertions(+), 88 deletions(-)
diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
index 808d8daedb..207ac1f24f 100644
--- a/libavcodec/dxvenc.c
+++ b/libavcodec/dxvenc.c
@@ -21,7 +21,7 @@
#include <stdint.h>
-#include "libavutil/crc.h"
+#include "libavcodec/hashtable.h"
#include "libavutil/imgutils.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
@@ -39,72 +39,9 @@
* appeared in the decompressed stream. Using a simple hash table (HT)
* significantly speeds up the lookback process while encoding.
*/
-#define LOOKBACK_HT_ELEMS 0x40000
+#define LOOKBACK_HT_ELEMS 0x20202
#define LOOKBACK_WORDS 0x20202
-typedef struct HTEntry {
- uint32_t key;
- uint32_t pos;
-} HTEntry;
-
-static void ht_init(HTEntry *ht)
-{
- for (size_t i = 0; i < LOOKBACK_HT_ELEMS; i++) {
- ht[i].pos = -1;
- }
-}
-
-static uint32_t ht_lookup_and_upsert(HTEntry *ht, const AVCRC *hash_ctx,
- uint32_t key, uint32_t pos)
-{
- uint32_t ret = -1;
- size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
- for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
- size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
- HTEntry *entry = &ht[wrapped_index];
- if (entry->key == key || entry->pos == -1) {
- ret = entry->pos;
- entry->key = key;
- entry->pos = pos;
- break;
- }
- }
- return ret;
-}
-
-static void ht_delete(HTEntry *ht, const AVCRC *hash_ctx,
- uint32_t key, uint32_t pos)
-{
- HTEntry *removed_entry = NULL;
- size_t removed_hash;
- size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
-
- for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
- size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
- HTEntry *entry = &ht[wrapped_index];
- if (entry->pos == -1)
- return;
- if (removed_entry) {
- size_t candidate_hash = av_crc(hash_ctx, 0, (uint8_t*)&entry->key, 4) % LOOKBACK_HT_ELEMS;
- if ((wrapped_index > removed_hash && (candidate_hash <= removed_hash || candidate_hash > wrapped_index)) ||
- (wrapped_index < removed_hash && (candidate_hash <= removed_hash && candidate_hash > wrapped_index))) {
- *removed_entry = *entry;
- entry->pos = -1;
- removed_entry = entry;
- removed_hash = wrapped_index;
- }
- } else if (entry->key == key) {
- if (entry->pos <= pos) {
- entry->pos = -1;
- removed_entry = entry;
- removed_hash = wrapped_index;
- } else {
- return;
- }
- }
- }
-}
-
typedef struct DXVEncContext {
AVClass *class;
@@ -121,10 +58,8 @@ typedef struct DXVEncContext {
DXVTextureFormat tex_fmt;
int (*compress_tex)(AVCodecContext *avctx);
- const AVCRC *crc_ctx;
-
- HTEntry color_lookback_ht[LOOKBACK_HT_ELEMS];
- HTEntry lut_lookback_ht[LOOKBACK_HT_ELEMS];
+ struct FFHashtableContext *color_ht;
+ struct FFHashtableContext *lut_ht;
} DXVEncContext;
/* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
@@ -159,27 +94,32 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
DXVEncContext *ctx = avctx->priv_data;
PutByteContext *pbc = &ctx->pbc;
void *value;
- uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 2, op = 0;
+ uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0;
- ht_init(ctx->color_lookback_ht);
- ht_init(ctx->lut_lookback_ht);
+ ff_hashtable_clear(ctx->color_ht);
+ ff_hashtable_clear(ctx->lut_ht);
bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
+ ff_hashtable_set(ctx->color_ht, ctx->tex_data, &pos);
+ pos++;
bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4));
-
- ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data), 0);
- ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data + 4), 1);
+ ff_hashtable_set(ctx->lut_ht, ctx->tex_data + 4, &pos);
+ pos++;
while (pos + 2 <= ctx->tex_size / 4) {
idx = 0;
+ color_idx = 0;
+ lut_idx = 0;
color = AV_RL32(ctx->tex_data + pos * 4);
- prev_pos = ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, color, pos);
- color_idx = prev_pos != -1 ? pos - prev_pos : 0;
+ if (ff_hashtable_get(ctx->color_ht, &color, &prev_pos))
+ color_idx = pos - prev_pos;
+ ff_hashtable_set(ctx->color_ht, &color, &pos);
+
if (pos >= LOOKBACK_WORDS) {
uint32_t old_pos = pos - LOOKBACK_WORDS;
- uint32_t old_color = AV_RL32(ctx->tex_data + old_pos * 4);
- ht_delete(ctx->color_lookback_ht, ctx->crc_ctx, old_color, old_pos);
+ if (ff_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
+ ff_hashtable_delete(ctx->color_ht, ctx->tex_data + old_pos * 4);
}
pos++;
@@ -188,13 +128,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
idx = color_idx;
} else {
idx = 0;
- prev_pos = ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, lut, pos);
- lut_idx = prev_pos != -1 ? pos - prev_pos : 0;
+ if (ff_hashtable_get(ctx->lut_ht, &lut, &prev_pos))
+ lut_idx = pos - prev_pos;
+ ff_hashtable_set(ctx->lut_ht, &lut, &pos);
}
if (pos >= LOOKBACK_WORDS) {
uint32_t old_pos = pos - LOOKBACK_WORDS;
- uint32_t old_lut = AV_RL32(ctx->tex_data + old_pos * 4);
- ht_delete(ctx->lut_lookback_ht, ctx->crc_ctx, old_lut, old_pos);
+ if (ff_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
+ ff_hashtable_delete(ctx->lut_ht, ctx->tex_data + old_pos * 4);
}
pos++;
@@ -306,11 +247,12 @@ static av_cold int dxv_init(AVCodecContext *avctx)
return AVERROR(ENOMEM);
}
- ctx->crc_ctx = av_crc_get_table(AV_CRC_32_IEEE);
- if (!ctx->crc_ctx) {
- av_log(avctx, AV_LOG_ERROR, "Could not initialize CRC table.\n");
- return AVERROR_BUG;
- }
+ ret = ff_hashtable_alloc(&ctx->color_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
+ if (ret < 0)
+ return ret;
+ ret = ff_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
+ if (ret < 0)
+ return ret;
return 0;
}
@@ -321,6 +263,9 @@ static av_cold int dxv_close(AVCodecContext *avctx)
av_freep(&ctx->tex_data);
+ ff_hashtable_freep(&ctx->color_ht);
+ ff_hashtable_freep(&ctx->lut_ht);
+
return 0;
}
--
2.48.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* [FFmpeg-devel] [PATCH v4 3/3] lavc/dxvenc: improve compatibility with Resolume products
2025-04-22 7:42 [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Emma Worley
2025-04-22 7:42 ` [FFmpeg-devel] [PATCH v4 2/3] lavc/dxvenc: migrate DXT1 encoder to lavc hashtable Emma Worley
@ 2025-04-22 7:42 ` Emma Worley
2025-04-22 7:59 ` Andreas Rheinhardt
2025-04-22 17:24 ` [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Nicolas George
2025-04-22 22:02 ` Nicolas George
3 siblings, 1 reply; 9+ messages in thread
From: Emma Worley @ 2025-04-22 7:42 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Emma Worley
Improves compatibility with Resolume products by adding an additional
hashtable for DXT color+LUT combinations, and padding the DXT texture
dimensions to the next largest multiple of 16. Produces identical
packets to Resolume Alley in manual tests.
Signed-off-by: Emma Worley <emma@emma.gg>
---
libavcodec/dxvenc.c | 131 +++++++++++++++++++++++-------------
tests/ref/fate/dxv3enc-dxt1 | 2 +-
2 files changed, 86 insertions(+), 47 deletions(-)
diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
index 207ac1f24f..48548ae811 100644
--- a/libavcodec/dxvenc.c
+++ b/libavcodec/dxvenc.c
@@ -34,6 +34,11 @@
#define DXV_HEADER_LENGTH 12
+/*
+ * Resolume will refuse to display frames that are not padded to 16x16 pixels.
+ */
+#define DXV_ALIGN(x) FFALIGN(x, 16)
+
/*
* DXV uses LZ-like back-references to avoid copying words that have already
* appeared in the decompressed stream. Using a simple hash table (HT)
@@ -60,6 +65,7 @@ typedef struct DXVEncContext {
struct FFHashtableContext *color_ht;
struct FFHashtableContext *lut_ht;
+ struct FFHashtableContext *combo_ht;
} DXVEncContext;
/* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
@@ -94,10 +100,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
DXVEncContext *ctx = avctx->priv_data;
PutByteContext *pbc = &ctx->pbc;
void *value;
- uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0;
+ uint64_t combo;
+ uint32_t color, lut, idx, combo_idx, prev_pos, old_pos, state = 16, pos = 0, op = 0;
ff_hashtable_clear(ctx->color_ht);
ff_hashtable_clear(ctx->lut_ht);
+ ff_hashtable_clear(ctx->combo_ht);
+
+ ff_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos);
bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
ff_hashtable_set(ctx->color_ht, ctx->tex_data, &pos);
@@ -107,51 +117,46 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
pos++;
while (pos + 2 <= ctx->tex_size / 4) {
- idx = 0;
- color_idx = 0;
- lut_idx = 0;
+ combo = AV_RL64(ctx->tex_data + pos * 4);
+ combo_idx = ff_hashtable_get(ctx->combo_ht, &combo, &prev_pos) ? pos - prev_pos : 0;
+ idx = combo_idx;
+ PUSH_OP(2);
+ if (pos >= LOOKBACK_WORDS) {
+ old_pos = pos - LOOKBACK_WORDS;
+ if (ff_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
+ ff_hashtable_delete(ctx->combo_ht, ctx->tex_data + old_pos * 4);
+ }
+ ff_hashtable_set(ctx->combo_ht, &combo, &pos);
color = AV_RL32(ctx->tex_data + pos * 4);
- if (ff_hashtable_get(ctx->color_ht, &color, &prev_pos))
- color_idx = pos - prev_pos;
- ff_hashtable_set(ctx->color_ht, &color, &pos);
-
+ if (!combo_idx) {
+ idx = ff_hashtable_get(ctx->color_ht, &color, &prev_pos) ? pos - prev_pos : 0;
+ PUSH_OP(2);
+ if (!idx)
+ bytestream2_put_le32(pbc, color);
+ }
if (pos >= LOOKBACK_WORDS) {
- uint32_t old_pos = pos - LOOKBACK_WORDS;
+ old_pos = pos - LOOKBACK_WORDS;
if (ff_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
ff_hashtable_delete(ctx->color_ht, ctx->tex_data + old_pos * 4);
}
+ ff_hashtable_set(ctx->color_ht, &color, &pos);
pos++;
lut = AV_RL32(ctx->tex_data + pos * 4);
- if (color_idx && lut == AV_RL32(ctx->tex_data + (pos - color_idx) * 4)) {
- idx = color_idx;
- } else {
- idx = 0;
- if (ff_hashtable_get(ctx->lut_ht, &lut, &prev_pos))
- lut_idx = pos - prev_pos;
- ff_hashtable_set(ctx->lut_ht, &lut, &pos);
+ if (!combo_idx) {
+ idx = ff_hashtable_get(ctx->lut_ht, &lut, &prev_pos) ? pos - prev_pos : 0;
+ PUSH_OP(2);
+ if (!idx)
+ bytestream2_put_le32(pbc, lut);
}
if (pos >= LOOKBACK_WORDS) {
- uint32_t old_pos = pos - LOOKBACK_WORDS;
+ old_pos = pos - LOOKBACK_WORDS;
if (ff_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
ff_hashtable_delete(ctx->lut_ht, ctx->tex_data + old_pos * 4);
}
+ ff_hashtable_set(ctx->lut_ht, &lut, &pos);
pos++;
-
- PUSH_OP(2);
-
- if (!idx) {
- idx = color_idx;
- PUSH_OP(2);
- if (!idx)
- bytestream2_put_le32(pbc, color);
-
- idx = lut_idx;
- PUSH_OP(2);
- if (!idx)
- bytestream2_put_le32(pbc, lut);
- }
}
return 0;
@@ -172,12 +177,50 @@ static int dxv_encode(AVCodecContext *avctx, AVPacket *pkt,
return ret;
if (ctx->enc.tex_funct) {
+ uint8_t *safe_data[4] = {frame->data[0], 0, 0, 0};
+ int safe_linesize[4] = {frame->linesize[0], 0, 0, 0};
+
+ if (avctx->width != DXV_ALIGN(avctx->width) || avctx->height != DXV_ALIGN(avctx->height)) {
+ ret = av_image_alloc(
+ safe_data,
+ safe_linesize,
+ DXV_ALIGN(avctx->width),
+ DXV_ALIGN(avctx->height),
+ avctx->pix_fmt,
+ 1);
+ if (ret < 0)
+ return ret;
+
+ av_image_copy2(
+ safe_data,
+ safe_linesize,
+ frame->data,
+ frame->linesize,
+ avctx->pix_fmt,
+ avctx->width,
+ avctx->height);
+
+ if (avctx->width != DXV_ALIGN(avctx->width)) {
+ for (int y = 0; y < avctx->height; y++) {
+ memset(safe_data[0] + y * safe_linesize[0] + frame->linesize[0], 0, safe_linesize[0] - frame->linesize[0]);
+ }
+ }
+ if (avctx->height != DXV_ALIGN(avctx->height)) {
+ for (int y = avctx->height; y < DXV_ALIGN(avctx->height); y++) {
+ memset(safe_data[0] + y * safe_linesize[0], 0, safe_linesize[0]);
+ }
+ }
+ }
+
ctx->enc.tex_data.out = ctx->tex_data;
- ctx->enc.frame_data.in = frame->data[0];
- ctx->enc.stride = frame->linesize[0];
- ctx->enc.width = avctx->width;
- ctx->enc.height = avctx->height;
+ ctx->enc.frame_data.in = safe_data[0];
+ ctx->enc.stride = safe_linesize[0];
+ ctx->enc.width = DXV_ALIGN(avctx->width);
+ ctx->enc.height = DXV_ALIGN(avctx->height);
ff_texturedsp_exec_compress_threads(avctx, &ctx->enc);
+
+ if (safe_data[0] != frame->data[0])
+ av_freep(&safe_data[0]);
} else {
/* unimplemented: YCoCg formats */
return AVERROR_INVALIDDATA;
@@ -216,14 +259,6 @@ static av_cold int dxv_init(AVCodecContext *avctx)
return ret;
}
- if (avctx->width % TEXTURE_BLOCK_W || avctx->height % TEXTURE_BLOCK_H) {
- av_log(avctx,
- AV_LOG_ERROR,
- "Video size %dx%d is not multiple of "AV_STRINGIFY(TEXTURE_BLOCK_W)"x"AV_STRINGIFY(TEXTURE_BLOCK_H)".\n",
- avctx->width, avctx->height);
- return AVERROR_INVALIDDATA;
- }
-
ff_texturedspenc_init(&texdsp);
switch (ctx->tex_fmt) {
@@ -237,10 +272,10 @@ static av_cold int dxv_init(AVCodecContext *avctx)
return AVERROR_INVALIDDATA;
}
ctx->enc.raw_ratio = 16;
- ctx->tex_size = avctx->width / TEXTURE_BLOCK_W *
- avctx->height / TEXTURE_BLOCK_H *
+ ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W *
+ DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H *
ctx->enc.tex_ratio;
- ctx->enc.slice_count = av_clip(avctx->thread_count, 1, avctx->height / TEXTURE_BLOCK_H);
+ ctx->enc.slice_count = av_clip(avctx->thread_count, 1, DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H);
ctx->tex_data = av_malloc(ctx->tex_size);
if (!ctx->tex_data) {
@@ -251,6 +286,9 @@ static av_cold int dxv_init(AVCodecContext *avctx)
if (ret < 0)
return ret;
ret = ff_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
+ if (ret < 0)
+ return ret;
+ ret = ff_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
if (ret < 0)
return ret;
@@ -265,6 +303,7 @@ static av_cold int dxv_close(AVCodecContext *avctx)
ff_hashtable_freep(&ctx->color_ht);
ff_hashtable_freep(&ctx->lut_ht);
+ ff_hashtable_freep(&ctx->combo_ht);
return 0;
}
diff --git a/tests/ref/fate/dxv3enc-dxt1 b/tests/ref/fate/dxv3enc-dxt1
index 74849a8031..e09000e181 100644
--- a/tests/ref/fate/dxv3enc-dxt1
+++ b/tests/ref/fate/dxv3enc-dxt1
@@ -3,4 +3,4 @@
#codec_id 0: dxv
#dimensions 0: 1920x1080
#sar 0: 1/1
-0, 0, 0, 1, 76521, 0xed387a5e
+0, 0, 0, 1, 76190, 0x0e6f0326
--
2.48.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 3/3] lavc/dxvenc: improve compatibility with Resolume products
2025-04-22 7:42 ` [FFmpeg-devel] [PATCH v4 3/3] lavc/dxvenc: improve compatibility with Resolume products Emma Worley
@ 2025-04-22 7:59 ` Andreas Rheinhardt
2025-04-22 8:14 ` Emma Worley
0 siblings, 1 reply; 9+ messages in thread
From: Andreas Rheinhardt @ 2025-04-22 7:59 UTC (permalink / raw)
To: ffmpeg-devel
Emma Worley:
> Improves compatibility with Resolume products by adding an additional
> hashtable for DXT color+LUT combinations, and padding the DXT texture
> dimensions to the next largest multiple of 16. Produces identical
> packets to Resolume Alley in manual tests.
>
> Signed-off-by: Emma Worley <emma@emma.gg>
> ---
> libavcodec/dxvenc.c | 131 +++++++++++++++++++++++-------------
> tests/ref/fate/dxv3enc-dxt1 | 2 +-
> 2 files changed, 86 insertions(+), 47 deletions(-)
>
> diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
> index 207ac1f24f..48548ae811 100644
> --- a/libavcodec/dxvenc.c
> +++ b/libavcodec/dxvenc.c
> @@ -34,6 +34,11 @@
>
> #define DXV_HEADER_LENGTH 12
>
> +/*
> + * Resolume will refuse to display frames that are not padded to 16x16 pixels.
> + */
> +#define DXV_ALIGN(x) FFALIGN(x, 16)
> +
> /*
> * DXV uses LZ-like back-references to avoid copying words that have already
> * appeared in the decompressed stream. Using a simple hash table (HT)
> @@ -60,6 +65,7 @@ typedef struct DXVEncContext {
>
> struct FFHashtableContext *color_ht;
> struct FFHashtableContext *lut_ht;
> + struct FFHashtableContext *combo_ht;
> } DXVEncContext;
>
> /* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
> @@ -94,10 +100,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
> DXVEncContext *ctx = avctx->priv_data;
> PutByteContext *pbc = &ctx->pbc;
> void *value;
> - uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0;
> + uint64_t combo;
> + uint32_t color, lut, idx, combo_idx, prev_pos, old_pos, state = 16, pos = 0, op = 0;
>
> ff_hashtable_clear(ctx->color_ht);
> ff_hashtable_clear(ctx->lut_ht);
> + ff_hashtable_clear(ctx->combo_ht);
> +
> + ff_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos);
>
> bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
> ff_hashtable_set(ctx->color_ht, ctx->tex_data, &pos);
> @@ -107,51 +117,46 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
> pos++;
>
> while (pos + 2 <= ctx->tex_size / 4) {
> - idx = 0;
> - color_idx = 0;
> - lut_idx = 0;
> + combo = AV_RL64(ctx->tex_data + pos * 4);
> + combo_idx = ff_hashtable_get(ctx->combo_ht, &combo, &prev_pos) ? pos - prev_pos : 0;
> + idx = combo_idx;
> + PUSH_OP(2);
> + if (pos >= LOOKBACK_WORDS) {
> + old_pos = pos - LOOKBACK_WORDS;
> + if (ff_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
> + ff_hashtable_delete(ctx->combo_ht, ctx->tex_data + old_pos * 4);
> + }
> + ff_hashtable_set(ctx->combo_ht, &combo, &pos);
>
> color = AV_RL32(ctx->tex_data + pos * 4);
> - if (ff_hashtable_get(ctx->color_ht, &color, &prev_pos))
> - color_idx = pos - prev_pos;
> - ff_hashtable_set(ctx->color_ht, &color, &pos);
> -
> + if (!combo_idx) {
> + idx = ff_hashtable_get(ctx->color_ht, &color, &prev_pos) ? pos - prev_pos : 0;
> + PUSH_OP(2);
> + if (!idx)
> + bytestream2_put_le32(pbc, color);
> + }
> if (pos >= LOOKBACK_WORDS) {
> - uint32_t old_pos = pos - LOOKBACK_WORDS;
> + old_pos = pos - LOOKBACK_WORDS;
> if (ff_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
> ff_hashtable_delete(ctx->color_ht, ctx->tex_data + old_pos * 4);
> }
> + ff_hashtable_set(ctx->color_ht, &color, &pos);
> pos++;
>
> lut = AV_RL32(ctx->tex_data + pos * 4);
> - if (color_idx && lut == AV_RL32(ctx->tex_data + (pos - color_idx) * 4)) {
> - idx = color_idx;
> - } else {
> - idx = 0;
> - if (ff_hashtable_get(ctx->lut_ht, &lut, &prev_pos))
> - lut_idx = pos - prev_pos;
> - ff_hashtable_set(ctx->lut_ht, &lut, &pos);
> + if (!combo_idx) {
> + idx = ff_hashtable_get(ctx->lut_ht, &lut, &prev_pos) ? pos - prev_pos : 0;
> + PUSH_OP(2);
> + if (!idx)
> + bytestream2_put_le32(pbc, lut);
> }
> if (pos >= LOOKBACK_WORDS) {
> - uint32_t old_pos = pos - LOOKBACK_WORDS;
> + old_pos = pos - LOOKBACK_WORDS;
> if (ff_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
> ff_hashtable_delete(ctx->lut_ht, ctx->tex_data + old_pos * 4);
> }
> + ff_hashtable_set(ctx->lut_ht, &lut, &pos);
> pos++;
> -
> - PUSH_OP(2);
> -
> - if (!idx) {
> - idx = color_idx;
> - PUSH_OP(2);
> - if (!idx)
> - bytestream2_put_le32(pbc, color);
> -
> - idx = lut_idx;
> - PUSH_OP(2);
> - if (!idx)
> - bytestream2_put_le32(pbc, lut);
> - }
> }
>
> return 0;
> @@ -172,12 +177,50 @@ static int dxv_encode(AVCodecContext *avctx, AVPacket *pkt,
> return ret;
>
> if (ctx->enc.tex_funct) {
> + uint8_t *safe_data[4] = {frame->data[0], 0, 0, 0};
> + int safe_linesize[4] = {frame->linesize[0], 0, 0, 0};
> +
> + if (avctx->width != DXV_ALIGN(avctx->width) || avctx->height != DXV_ALIGN(avctx->height)) {
> + ret = av_image_alloc(
> + safe_data,
> + safe_linesize,
> + DXV_ALIGN(avctx->width),
> + DXV_ALIGN(avctx->height),
> + avctx->pix_fmt,
> + 1);
> + if (ret < 0)
> + return ret;
> +
> + av_image_copy2(
> + safe_data,
> + safe_linesize,
> + frame->data,
> + frame->linesize,
> + avctx->pix_fmt,
> + avctx->width,
> + avctx->height);
> +
> + if (avctx->width != DXV_ALIGN(avctx->width)) {
> + for (int y = 0; y < avctx->height; y++) {
> + memset(safe_data[0] + y * safe_linesize[0] + frame->linesize[0], 0, safe_linesize[0] - frame->linesize[0]);
> + }
> + }
> + if (avctx->height != DXV_ALIGN(avctx->height)) {
> + for (int y = avctx->height; y < DXV_ALIGN(avctx->height); y++) {
> + memset(safe_data[0] + y * safe_linesize[0], 0, safe_linesize[0]);
> + }
> + }
> + }
Did you try to avoid the above by modifying
ff_texturedsp_exec_compress_threads()?
> +
> ctx->enc.tex_data.out = ctx->tex_data;
> - ctx->enc.frame_data.in = frame->data[0];
> - ctx->enc.stride = frame->linesize[0];
> - ctx->enc.width = avctx->width;
> - ctx->enc.height = avctx->height;
> + ctx->enc.frame_data.in = safe_data[0];
> + ctx->enc.stride = safe_linesize[0];
> + ctx->enc.width = DXV_ALIGN(avctx->width);
> + ctx->enc.height = DXV_ALIGN(avctx->height);
> ff_texturedsp_exec_compress_threads(avctx, &ctx->enc);
> +
> + if (safe_data[0] != frame->data[0])
> + av_freep(&safe_data[0]);
> } else {
> /* unimplemented: YCoCg formats */
> return AVERROR_INVALIDDATA;
> @@ -216,14 +259,6 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> return ret;
> }
>
> - if (avctx->width % TEXTURE_BLOCK_W || avctx->height % TEXTURE_BLOCK_H) {
> - av_log(avctx,
> - AV_LOG_ERROR,
> - "Video size %dx%d is not multiple of "AV_STRINGIFY(TEXTURE_BLOCK_W)"x"AV_STRINGIFY(TEXTURE_BLOCK_H)".\n",
> - avctx->width, avctx->height);
> - return AVERROR_INVALIDDATA;
> - }
> -
> ff_texturedspenc_init(&texdsp);
>
> switch (ctx->tex_fmt) {
> @@ -237,10 +272,10 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> return AVERROR_INVALIDDATA;
> }
> ctx->enc.raw_ratio = 16;
> - ctx->tex_size = avctx->width / TEXTURE_BLOCK_W *
> - avctx->height / TEXTURE_BLOCK_H *
> + ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W *
> + DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H *
> ctx->enc.tex_ratio;
> - ctx->enc.slice_count = av_clip(avctx->thread_count, 1, avctx->height / TEXTURE_BLOCK_H);
> + ctx->enc.slice_count = av_clip(avctx->thread_count, 1, DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H);
>
> ctx->tex_data = av_malloc(ctx->tex_size);
> if (!ctx->tex_data) {
> @@ -251,6 +286,9 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> if (ret < 0)
> return ret;
> ret = ff_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> + if (ret < 0)
> + return ret;
> + ret = ff_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> if (ret < 0)
> return ret;
>
> @@ -265,6 +303,7 @@ static av_cold int dxv_close(AVCodecContext *avctx)
>
> ff_hashtable_freep(&ctx->color_ht);
> ff_hashtable_freep(&ctx->lut_ht);
> + ff_hashtable_freep(&ctx->combo_ht);
>
> return 0;
> }
> diff --git a/tests/ref/fate/dxv3enc-dxt1 b/tests/ref/fate/dxv3enc-dxt1
> index 74849a8031..e09000e181 100644
> --- a/tests/ref/fate/dxv3enc-dxt1
> +++ b/tests/ref/fate/dxv3enc-dxt1
> @@ -3,4 +3,4 @@
> #codec_id 0: dxv
> #dimensions 0: 1920x1080
> #sar 0: 1/1
> -0, 0, 0, 1, 76521, 0xed387a5e
> +0, 0, 0, 1, 76190, 0x0e6f0326
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 3/3] lavc/dxvenc: improve compatibility with Resolume products
2025-04-22 7:59 ` Andreas Rheinhardt
@ 2025-04-22 8:14 ` Emma Worley
0 siblings, 0 replies; 9+ messages in thread
From: Emma Worley @ 2025-04-22 8:14 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Tue, Apr 22, 2025 at 1:00 AM Andreas Rheinhardt
<andreas.rheinhardt@outlook.com> wrote:
> Did you try to avoid the above by modifying
> ff_texturedsp_exec_compress_threads()?
I considered it but adding support to substitute in padding seemed
like a rather large refactor.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table
2025-04-22 7:42 [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Emma Worley
2025-04-22 7:42 ` [FFmpeg-devel] [PATCH v4 2/3] lavc/dxvenc: migrate DXT1 encoder to lavc hashtable Emma Worley
2025-04-22 7:42 ` [FFmpeg-devel] [PATCH v4 3/3] lavc/dxvenc: improve compatibility with Resolume products Emma Worley
@ 2025-04-22 17:24 ` Nicolas George
2025-04-22 17:57 ` Emma Worley
2025-04-22 22:02 ` Nicolas George
3 siblings, 1 reply; 9+ messages in thread
From: Nicolas George @ 2025-04-22 17:24 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Emma Worley (HE12025-04-22):
> Adds a generic hash table with the DXV encoder as an initial use case.
Hi.
This code is already really good. I have some local remarks that will
not require a lot of work from you and make it better. And I have some
global remarks that would require a lot of work from you and might shave
a few cycles. For the rice wine of clarity, I have decided to send them
separately. This is the mail about the later.
After writing most of what follows I thought of checking how this data
structure is used in practice. What I wrote is more relevant as the size
of the entries grows, but in practice the entries are very small, which
means the current version is probably close to the best. Still, I wrote
it, I might as well send it.
As I said, the code is already really good. I do not consider necessary
to act on what I will say here, or even to read it. But it may contain
ideas to do even better, for you or somebody else, for now or for later.
This is about hash tables and algorithmic.
Vocabulary as I will use it here:
Hash bucket, or bucket: integer-indexed cell where the hash function
directs us to start searching for a key.
Entry: key and its associated value, as wanted by the code that uses the
hash table.
Entry slot, or slot: memory that can store an entry, especially relevant
if pre-allocated.
We all know the core difficulty with hash tables: different keys can map
to the same hash value and therefore hash bucket. There are two kinds of
solutions to this collision issue:
- try to find another hash bucket to find an available entry slot;
- make hash buckets capable of holding multiple entries, typically by
the means of some kind of linked list.
In the most C and simplistic implementation, hash buckets and entry
slots are the same, and the code, common to add, get, del, just tries
the next bucket until it finds the right key or a hole.
In the most Java/GLib implementation, hash buckets are just pointers and
each entry is a separately allocated slot. (Though I am sure the
efficient Java hash tables are written much more in the C style.)
The issue with the second way is that it requires an extra number
(pointer/index, whatever) per bucket and per entry slot.
The first way has two problems. Clustering ruins the efficiency the
table. And deletion becomes hard, because just leaving a hole would
prevent from finding an entry that has the be stored after the hole
compared to its ideal place.
Clustering can be reduced with a secondary hash function (or even more
subtle): instead of looking at bucket h+1, look at bucket h+h'. Make
sure h' and the size of the hash table do not have common factors. But
that does not solve the issue of deletions.
I was not familiar with this “Robin Hood” algorithm. From what I
understand, it is a solution to both clustering and deletion. Excellent.
Except it costs a number per bucket/slot.
If we consider spending extra memory, we have to consider if the linked
list solution might not be more efficient.
This hash table has constant size. That makes it easy to pre-allocate
all slots in a big array. It can be the same array as the buckets or a
separate one.
If we keep buckets = slots and if we accept to move an entry on
occasion, then we can make it work with just one number per
entry/bucket. When adding a new entry, if the bucket is not empty, check
if it is a collision: same hash value? If it is a collision, then take
any empty bucket/slot (keeping them chained is easy) for the new entry
and link it to the existing one. If it is not a collision, then take the
bucket/slot for the new entry after moving the entry that was there into
an empty bucket.
What I described above is not very nice, but it is the best I know / can
find that only costs one number per bucket/slot. If we can afford to
spend more memory on it, we can do better by the virtue of breaking the
buckets = slots equivalence.
It is especially interesting if the entries are large, because this
version does not require copying entries. Also, we can have more buckets
than slots, reducing collisions.
All of this is very dependant on specifics. In particular, cache
locality can make a solution that seems slightly worse actually better.
It would require extensive benchmarking.
Regards,
--
Nicolas George
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table
2025-04-22 17:24 ` [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Nicolas George
@ 2025-04-22 17:57 ` Emma Worley
2025-04-22 22:04 ` Nicolas George
0 siblings, 1 reply; 9+ messages in thread
From: Emma Worley @ 2025-04-22 17:57 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Perhaps I can add a `mode` enum parameter to the FFHashtableContext to
control behavior? Then we can benchmark different behaviors on a
per-use-case basis.
On Tue, Apr 22, 2025 at 10:24 AM Nicolas George <george@nsup.org> wrote:
>
> Emma Worley (HE12025-04-22):
> > Adds a generic hash table with the DXV encoder as an initial use case.
>
> Hi.
>
> This code is already really good. I have some local remarks that will
> not require a lot of work from you and make it better. And I have some
> global remarks that would require a lot of work from you and might shave
> a few cycles. For the rice wine of clarity, I have decided to send them
> separately. This is the mail about the later.
>
> After writing most of what follows I thought of checking how this data
> structure is used in practice. What I wrote is more relevant as the size
> of the entries grows, but in practice the entries are very small, which
> means the current version is probably close to the best. Still, I wrote
> it, I might as well send it.
>
> As I said, the code is already really good. I do not consider necessary
> to act on what I will say here, or even to read it. But it may contain
> ideas to do even better, for you or somebody else, for now or for later.
>
> This is about hash tables and algorithmic.
>
> Vocabulary as I will use it here:
>
> Hash bucket, or bucket: integer-indexed cell where the hash function
> directs us to start searching for a key.
>
> Entry: key and its associated value, as wanted by the code that uses the
> hash table.
>
> Entry slot, or slot: memory that can store an entry, especially relevant
> if pre-allocated.
>
> We all know the core difficulty with hash tables: different keys can map
> to the same hash value and therefore hash bucket. There are two kinds of
> solutions to this collision issue:
>
> - try to find another hash bucket to find an available entry slot;
>
> - make hash buckets capable of holding multiple entries, typically by
> the means of some kind of linked list.
>
> In the most C and simplistic implementation, hash buckets and entry
> slots are the same, and the code, common to add, get, del, just tries
> the next bucket until it finds the right key or a hole.
>
> In the most Java/GLib implementation, hash buckets are just pointers and
> each entry is a separately allocated slot. (Though I am sure the
> efficient Java hash tables are written much more in the C style.)
>
> The issue with the second way is that it requires an extra number
> (pointer/index, whatever) per bucket and per entry slot.
>
> The first way has two problems. Clustering ruins the efficiency the
> table. And deletion becomes hard, because just leaving a hole would
> prevent from finding an entry that has the be stored after the hole
> compared to its ideal place.
>
> Clustering can be reduced with a secondary hash function (or even more
> subtle): instead of looking at bucket h+1, look at bucket h+h'. Make
> sure h' and the size of the hash table do not have common factors. But
> that does not solve the issue of deletions.
>
> I was not familiar with this “Robin Hood” algorithm. From what I
> understand, it is a solution to both clustering and deletion. Excellent.
>
> Except it costs a number per bucket/slot.
>
> If we consider spending extra memory, we have to consider if the linked
> list solution might not be more efficient.
>
> This hash table has constant size. That makes it easy to pre-allocate
> all slots in a big array. It can be the same array as the buckets or a
> separate one.
>
> If we keep buckets = slots and if we accept to move an entry on
> occasion, then we can make it work with just one number per
> entry/bucket. When adding a new entry, if the bucket is not empty, check
> if it is a collision: same hash value? If it is a collision, then take
> any empty bucket/slot (keeping them chained is easy) for the new entry
> and link it to the existing one. If it is not a collision, then take the
> bucket/slot for the new entry after moving the entry that was there into
> an empty bucket.
>
> What I described above is not very nice, but it is the best I know / can
> find that only costs one number per bucket/slot. If we can afford to
> spend more memory on it, we can do better by the virtue of breaking the
> buckets = slots equivalence.
>
> It is especially interesting if the entries are large, because this
> version does not require copying entries. Also, we can have more buckets
> than slots, reducing collisions.
>
> All of this is very dependant on specifics. In particular, cache
> locality can make a solution that seems slightly worse actually better.
> It would require extensive benchmarking.
>
> Regards,
>
> --
> Nicolas George
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table
2025-04-22 7:42 [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Emma Worley
` (2 preceding siblings ...)
2025-04-22 17:24 ` [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Nicolas George
@ 2025-04-22 22:02 ` Nicolas George
3 siblings, 0 replies; 9+ messages in thread
From: Nicolas George @ 2025-04-22 22:02 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Emma Worley (HE12025-04-22):
> Adds a generic hash table with the DXV encoder as an initial use case.
>
> Signed-off-by: Emma Worley <emma@emma.gg>
> ---
> libavcodec/Makefile | 2 +
> libavcodec/hashtable.c | 192 +++++++++++++++++++++++++++++++++++
> libavcodec/hashtable.h | 91 +++++++++++++++++
Thanks.
Since it is no longer public and the only user for now has small entries
and a bounded total size, I would suggest to change back all the
unsigned integers that I-do-not-remember-who had you replace with size_t
and see the effect on speed. I suspect it might be non-negligible. You
can use a typedef to make switching between size_t and unsigned easy.
> libavcodec/tests/hashtable.c | 110 ++++++++++++++++++++
> 4 files changed, 395 insertions(+)
> create mode 100644 libavcodec/hashtable.c
> create mode 100644 libavcodec/hashtable.h
> create mode 100644 libavcodec/tests/hashtable.c
>
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 7bd1dbec9a..8071c59378 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -42,6 +42,7 @@ OBJS = ac3_parser.o \
> dv_profile.o \
> encode.o \
> get_buffer.o \
> + hashtable.o \
> imgconvert.o \
> jni.o \
> lcevcdec.o \
> @@ -1321,6 +1322,7 @@ TESTPROGS = avcodec \
> bitstream_le \
> celp_math \
> codec_desc \
> + hashtable \
> htmlsubtitles \
> jpeg2000dwt \
> mathops \
> diff --git a/libavcodec/hashtable.c b/libavcodec/hashtable.c
> new file mode 100644
> index 0000000000..4d6d4a9506
> --- /dev/null
> +++ b/libavcodec/hashtable.c
> @@ -0,0 +1,192 @@
> +/*
> + * Generic hashtable
> + * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stdint.h>
> +#include <string.h>
> +
> +#include "libavutil/crc.h"
> +#include "libavutil/error.h"
> +#include "libavutil/mem.h"
> +#include "hashtable.h"
> +
> +#define ALIGN _Alignof(size_t)
> +
> +struct FFHashtableContext {
> + size_t key_size;
> + size_t key_size_aligned;
> + size_t val_size;
> + size_t val_size_aligned;
> + size_t entry_size;
> + size_t max_entries;
> + size_t utilization;
Nit: nb_entries?
> + const AVCRC *crc;
> + uint8_t *table;
> + uint8_t *swapbuf;
> +};
> +
> +#define ENTRY_PSL(entry) (entry)
Except for the definition of the next macro, it is always used with
*(size_t*) in front, you might as well make it part of the macro.
> +#define ENTRY_OCC(entry) (ENTRY_PSL(entry) + FFALIGN(sizeof(size_t), ALIGN))
Is it a flag that tells if the bucket is occupied? Maybe add a comment.
Also, you could make *ENTRY_PSL(entry) one more than it is currently,
zero for an empty bucket. It saves one field. And less memory without
more complex code should be faster.
> +#define ENTRY_KEY(entry) (ENTRY_OCC(entry) + FFALIGN(sizeof(size_t), ALIGN))
> +#define ENTRY_VAL(entry) (ENTRY_KEY(entry) + ctx->key_size_aligned)
> +
> +#define KEYS_EQUAL(k1, k2) !memcmp(k1, k2, ctx->key_size)
Not necessary here but usually a good practice to protect macros with
parentheses.
> +
> +int ff_hashtable_alloc(struct FFHashtableContext **ctx, size_t key_size, size_t val_size, size_t max_entries)
> +{
> + struct FFHashtableContext *res = av_malloc(sizeof(struct FFHashtableContext));
> + if (!res)
> + return AVERROR(ENOMEM);
> + res->key_size = key_size;
> + res->key_size_aligned = FFALIGN(key_size, ALIGN);
> + res->val_size = val_size;
> + res->val_size_aligned = FFALIGN(val_size, ALIGN);
> + res->entry_size = FFALIGN(sizeof(size_t), ALIGN)
> + + FFALIGN(sizeof(size_t), ALIGN)
> + + res->key_size_aligned
> + + res->val_size_aligned;
> + res->max_entries = max_entries;
> + res->utilization = 0;
> + res->crc = av_crc_get_table(AV_CRC_32_IEEE);
> + if (!res->crc) {
> + ff_hashtable_freep(&res);
> + return AVERROR_BUG;
> + }
> + res->table = av_calloc(res->max_entries, res->entry_size);
> + if (!res->table) {
> + ff_hashtable_freep(&res);
> + return AVERROR(ENOMEM);
> + }
> + res->swapbuf = av_calloc(2, res->key_size_aligned + res->val_size_aligned);
> + if (!res->swapbuf) {
> + ff_hashtable_freep(&res);
> + return AVERROR(ENOMEM);
> + }
Since they will not be resized individually, you can allocate these in
the same segment as the table itself.
> + *ctx = res;
> + return 0;
> +}
> +
> +static size_t hash_key(const struct FFHashtableContext *ctx, const void *key)
> +{
> + return av_crc(ctx->crc, 0, key, ctx->key_size) % ctx->max_entries;
> +}
> +
> +int ff_hashtable_get(const struct FFHashtableContext *ctx, const void *key, void *val)
> +{
> + size_t hash = hash_key(ctx, key);
> +
> + if (!ctx->utilization)
> + return 0;
Does this optimization make a lot of difference? If not, utilization can
be removed entirely.
I expect the compiler will do the right thing, but better compute hash
after checking utilization anyway.
> +
> + for (size_t psl = 0; psl < ctx->max_entries; psl++) {
> + size_t wrapped_index = (hash + psl) % ctx->max_entries;
> + uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
> + if (!*ENTRY_OCC(entry) || *(size_t*)ENTRY_PSL(entry) < psl)
> + return 0;
> + if (KEYS_EQUAL(ENTRY_KEY(entry), key)) {
> + memcpy(val, ENTRY_VAL(entry), ctx->val_size);
For a generic hash table, I would recommend returning a pointer to the
internal memory, valid until the next modification of the table. But
since the keys are smaller than pointers...
> + return 1;
> + }
> + }
> + return 0;
> +}
> +
> +int ff_hashtable_set(struct FFHashtableContext *ctx, const void *key, const void *val)
> +{
> + int swapping = 0;
> + size_t psl = 0;
A few comments explaining the semantic of psl would probably be useful.
> + size_t hash = hash_key(ctx, key);
> + uint8_t *set = ctx->swapbuf;
> + uint8_t *tmp = ctx->swapbuf + ctx->key_size_aligned + ctx->val_size_aligned;
> +
> + memcpy(set, key, ctx->key_size);
> + memcpy(set + ctx->key_size_aligned, val, ctx->val_size);
> +
> + for (size_t i = 0; i < ctx->max_entries; i++) {
> + size_t wrapped_index = (hash + i) % ctx->max_entries;
if (++wrapped_index == max_entries) wrapped_index = 0 might be faster.
> + uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
> + if (!*ENTRY_OCC(entry) || (!swapping && KEYS_EQUAL(ENTRY_KEY(entry), set))) {
> + if (!*ENTRY_OCC(entry))
> + ctx->utilization++;
> + *(size_t*)ENTRY_PSL(entry) = psl;
> + *ENTRY_OCC(entry) = 1;
> + memcpy(ENTRY_KEY(entry), set, ctx->key_size_aligned + ctx->val_size);
> + return 1;
> + }
> + if (*(size_t*)ENTRY_PSL(entry) < psl) {
> + if (ctx->utilization == ctx->max_entries)
> + return 0;
> + swapping = 1;
> + // set needs to swap with entry
> + memcpy(tmp, ENTRY_KEY(entry), ctx->key_size_aligned + ctx->val_size);
> + memcpy(ENTRY_KEY(entry), set, ctx->key_size_aligned + ctx->val_size);
I suspect copying until val_size_aligned might be faster.
Also, FFSWAP()ing the contents of entry and set (by blocks of size_t)
would make tmp and associated memory unnecessary. But I cannot predict
if it would make things slower or faster.
I kind of hoped to find a way to get rid of swapbuf entirely, but I did
not.
> + FFSWAP(uint8_t*, set, tmp);
> + FFSWAP(size_t, psl, *(size_t*)ENTRY_PSL(entry));
> + }
> + psl++;
> + }
> + return 0;
> +}
> +
> +int ff_hashtable_delete(struct FFHashtableContext *ctx, const void *key)
> +{
> + uint8_t *next_entry;
> + size_t hash = hash_key(ctx, key);
> +
> + if (!ctx->utilization)
> + return 0;
> +
> + for (size_t psl = 0; psl < ctx->max_entries; psl++) {
> + size_t wrapped_index = (hash + psl) % ctx->max_entries;
> + uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
> + if (!*ENTRY_OCC(entry) || *(size_t*)ENTRY_PSL(entry) < psl)
> + return 0;
> + if (KEYS_EQUAL(ENTRY_KEY(entry), key)) {
> + *ENTRY_OCC(entry) = 0;
> + for (psl++; psl < ctx->max_entries; psl++) {
> + wrapped_index = (hash + psl) % ctx->max_entries;
> + next_entry = ctx->table + wrapped_index * ctx->entry_size;
> + if (!*ENTRY_OCC(next_entry) || !*(size_t*)ENTRY_PSL(next_entry)) {
> + ctx->utilization--;
> + return 1;
> + }
> + memcpy(entry, next_entry, ctx->entry_size);
> + (*(size_t*)ENTRY_PSL(entry))--;
> + *ENTRY_OCC(next_entry) = 0;
> + entry = next_entry;
> + }
> + }
> + };
> + return 0;
> +}
I need to wrap this mail. I assume testing confirms the code does what
it should.
> +
> +void ff_hashtable_clear(struct FFHashtableContext *ctx)
> +{
> + memset(ctx->table, 0, ctx->entry_size * ctx->max_entries);
> +}
> +
> +void ff_hashtable_freep(struct FFHashtableContext **ctx)
> +{
> + if (*ctx) {
> + av_freep(&(*ctx)->table);
> + av_freep(&(*ctx)->swapbuf);
> + }
> + av_freep(ctx);
> +}
> diff --git a/libavcodec/hashtable.h b/libavcodec/hashtable.h
> new file mode 100644
> index 0000000000..fd28d3bdd7
> --- /dev/null
> +++ b/libavcodec/hashtable.h
> @@ -0,0 +1,91 @@
> +/*
> + * Generic hashtable
> + * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
Some argued against having names in the copyright notice of the public
headers. This is no longer public, but it could become.
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_HASHTABLE_H
> +#define AVCODEC_HASHTABLE_H
> +
> +#include <stddef.h>
> +
> +/* Implements a hash table using Robin Hood open addressing.
> + * See: https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
> + */
> +
> +struct FFHashtableContext;
We usually typedef too.
> +
> +/**
> + * Creates a fixed-sized Robin Hood hash table.
We usually use some kind of weird impersonal / imperative: Create. Same
below.
> + *
> + * @param ctx context to allocate and initialize
> + * @param key_size size of key type in bytes
> + * @param val_size size of value type in bytes
> + * @param max_entries maximum number of key-value pairs to store
> + *
> + * @return zero on success, nonzero on error
> + */
> +int ff_hashtable_alloc(struct FFHashtableContext **ctx, size_t key_size, size_t val_size, size_t max_entries);
> +
> +/**
> + * Looks up a value from a hash table given a key.
> + *
> + * @param ctx hash table context
> + * @param key pointer to key data
> + * @param val destination pointer for value data
> + *
> + * @return 1 if the key is found, zero if the key is not found
> + */
> +int ff_hashtable_get(const struct FFHashtableContext *ctx, const void *key, void *val);
> +
> +/**
> + * Stores a value in a hash table given a key.
> + *
> + * @param ctx hash table context
> + * @param key pointer to key data
> + * @param val pointer for value data
> + *
> + * @return 1 if the key is written, zero if the key is not written due to the hash table reaching max capacity
> + */
> +int ff_hashtable_set(struct FFHashtableContext *ctx, const void *key, const void *val);
> +
> +/**
> + * Deletes a value from a hash table given a key.
> + *
> + * @param ctx hash table context
> + * @param key pointer to key data
> + *
> + * @return 1 if the key is deleted, zero if the key is not deleted due to not being found
> + */
> +int ff_hashtable_delete(struct FFHashtableContext *ctx, const void *key);
> +
> +/**
> + * Deletes all values from a hash table.
> + *
> + * @param ctx hash table context
> + */
> +void ff_hashtable_clear(struct FFHashtableContext *ctx);
> +
> +/**
> + * Frees a hash table.
> + *
> + * @param ctx hash table context
> + */
> +void ff_hashtable_freep(struct FFHashtableContext **ctx);
> +
> +#endif
> diff --git a/libavcodec/tests/hashtable.c b/libavcodec/tests/hashtable.c
> new file mode 100644
> index 0000000000..367baf4f96
> --- /dev/null
> +++ b/libavcodec/tests/hashtable.c
> @@ -0,0 +1,110 @@
> +/*
> + * Generic hashtable tests
> + * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stdint.h>
> +
> +#include "libavutil/avassert.h"
> +#include "libavcodec/hashtable.h"
> +
> +int main(void)
> +{
> + struct FFHashtableContext *ctx;
> + uint8_t k;
> + uint64_t v;
> +
> + // impossibly large allocation should fail gracefully
> + av_assert0(ff_hashtable_alloc(&ctx, -1, -1, -1) < 0);
> +
> + // hashtable can store up to 3 uint8_t->uint64_t entries
> + av_assert0(!ff_hashtable_alloc(&ctx, sizeof(k), sizeof(v), 3));
> +
> + // unsuccessful deletes return 0
> + k = 1;
> + av_assert0(!ff_hashtable_delete(ctx, &k));
> +
> + // unsuccessful gets return 0
> + k = 1;
> + av_assert0(!ff_hashtable_get(ctx, &k, &v));
> +
> + // successful sets returns 1
> + k = 1;
> + v = 1;
> + av_assert0(ff_hashtable_set(ctx, &k, &v));
> +
> + // get should now contain 1
> + k = 1;
> + v = 0;
> + av_assert0(ff_hashtable_get(ctx, &k, &v));
> + av_assert0(v == 1);
> +
> + // updating sets should return 1
> + k = 1;
> + v = 2;
> + av_assert0(ff_hashtable_set(ctx, &k, &v));
> +
> + // get should now contain 2
> + k = 1;
> + v = 0;
> + av_assert0(ff_hashtable_get(ctx, &k, &v));
> + av_assert0(v == 2);
> +
> + // fill the table
> + k = 2;
> + v = 2;
> + av_assert0(ff_hashtable_set(ctx, &k, &v));
> + k = 3;
> + v = 3;
> + av_assert0(ff_hashtable_set(ctx, &k, &v));
> +
> + // inserting sets on a full table should return 0
> + k = 4;
> + v = 4;
> + av_assert0(!ff_hashtable_set(ctx, &k, &v));
> +
> + // updating sets on a full table should return 1
> + k = 1;
> + v = 4;
> + av_assert0(ff_hashtable_set(ctx, &k, &v));
> + v = 0;
> + av_assert0(ff_hashtable_get(ctx, &k, &v));
> + av_assert0(v == 4);
> +
> + // successful deletes should return 1
> + k = 1;
> + av_assert0(ff_hashtable_delete(ctx, &k));
> +
> + // get should now return 0
> + av_assert0(!ff_hashtable_get(ctx, &k, &v));
> +
> + // sanity check remaining keys
> + k = 2;
> + v = 0;
> + av_assert0(ff_hashtable_get(ctx, &k, &v));
> + av_assert0(v == 2);
> + k = 3;
> + v = 0;
> + av_assert0(ff_hashtable_get(ctx, &k, &v));
> + av_assert0(v == 3);
> +
> + ff_hashtable_freep(&ctx);
> +
> + return 0;
> +}
I do not consider any of my suggestions blocking. Thanks for the
patches.
Regards,
--
Nicolas George
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table
2025-04-22 17:57 ` Emma Worley
@ 2025-04-22 22:04 ` Nicolas George
0 siblings, 0 replies; 9+ messages in thread
From: Nicolas George @ 2025-04-22 22:04 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Emma Worley (HE12025-04-22):
> Perhaps I can add a `mode` enum parameter to the FFHashtableContext to
> control behavior? Then we can benchmark different behaviors on a
> per-use-case basis.
For benchmarks, I think #ifdef might be less hassle.
Anyway, I repeat: I do not consider it mandatory, and it probably is
only relevant for larger entries.
Regards,
--
Nicolas George
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2025-04-22 22:04 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-22 7:42 [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Emma Worley
2025-04-22 7:42 ` [FFmpeg-devel] [PATCH v4 2/3] lavc/dxvenc: migrate DXT1 encoder to lavc hashtable Emma Worley
2025-04-22 7:42 ` [FFmpeg-devel] [PATCH v4 3/3] lavc/dxvenc: improve compatibility with Resolume products Emma Worley
2025-04-22 7:59 ` Andreas Rheinhardt
2025-04-22 8:14 ` Emma Worley
2025-04-22 17:24 ` [FFmpeg-devel] [PATCH v4 1/3] lavc/hashtable: create generic robin hood hash table Nicolas George
2025-04-22 17:57 ` Emma Worley
2025-04-22 22:04 ` Nicolas George
2025-04-22 22:02 ` Nicolas George
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git