* [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table
@ 2025-04-21 2:33 Emma Worley
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 2/3] lavc/dxvenc: migrate DXT1 encoder to lavu hashtable Emma Worley
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Emma Worley @ 2025-04-21 2:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Emma Worley
Adds a general purpose hash table with the DXV encoder as an initial use case.
Signed-off-by: Emma Worley <emma@emma.gg>
---
libavutil/Makefile | 2 +
libavutil/hashtable.c | 192 ++++++++++++++++++++++++++++++++++++
libavutil/hashtable.h | 91 +++++++++++++++++
libavutil/tests/hashtable.c | 110 +++++++++++++++++++++
4 files changed, 395 insertions(+)
create mode 100644 libavutil/hashtable.c
create mode 100644 libavutil/hashtable.h
create mode 100644 libavutil/tests/hashtable.c
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 9ef118016b..43cb32c9d6 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -144,6 +144,7 @@ OBJS = adler32.o \
fixed_dsp.o \
frame.o \
hash.o \
+ hashtable.o \
hdr_dynamic_metadata.o \
hdr_dynamic_vivid_metadata.o \
hmac.o \
@@ -272,6 +273,7 @@ TESTPROGS = adler32 \
file \
fifo \
hash \
+ hashtable \
hmac \
hwdevice \
integer \
diff --git a/libavutil/hashtable.c b/libavutil/hashtable.c
new file mode 100644
index 0000000000..155a264665
--- /dev/null
+++ b/libavutil/hashtable.c
@@ -0,0 +1,192 @@
+/*
+ * Generic hashtable
+ * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "crc.h"
+#include "error.h"
+#include "mem.h"
+#include "hashtable.h"
+
+#define ALIGN _Alignof(size_t)
+
+struct AVHashtableContext {
+ size_t key_size;
+ size_t key_size_aligned;
+ size_t val_size;
+ size_t val_size_aligned;
+ size_t entry_size;
+ size_t max_entries;
+ size_t utilization;
+ const AVCRC *crc;
+ uint8_t *table;
+ uint8_t *swapbuf;
+};
+
+#define ENTRY_PSL(entry) (entry)
+#define ENTRY_OCC(entry) (ENTRY_PSL(entry) + FFALIGN(sizeof(size_t), ALIGN))
+#define ENTRY_KEY(entry) (ENTRY_OCC(entry) + FFALIGN(sizeof(size_t), ALIGN))
+#define ENTRY_VAL(entry) (ENTRY_KEY(entry) + ctx->key_size_aligned)
+
+#define KEYS_EQUAL(k1, k2) !memcmp(k1, k2, ctx->key_size)
+
+int av_hashtable_alloc(struct AVHashtableContext **ctx, size_t key_size, size_t val_size, size_t max_entries)
+{
+ struct AVHashtableContext *res = av_malloc(sizeof(struct AVHashtableContext));
+ if (!res)
+ return AVERROR(ENOMEM);
+ res->key_size = key_size;
+ res->key_size_aligned = FFALIGN(key_size, ALIGN);
+ res->val_size = val_size;
+ res->val_size_aligned = FFALIGN(val_size, ALIGN);
+ res->entry_size = FFALIGN(sizeof(size_t), ALIGN)
+ + FFALIGN(sizeof(size_t), ALIGN)
+ + res->key_size_aligned
+ + res->val_size_aligned;
+ res->max_entries = max_entries;
+ res->utilization = 0;
+ res->crc = av_crc_get_table(AV_CRC_32_IEEE);
+ if (!res->crc) {
+ av_hashtable_freep(&res);
+ return AVERROR_BUG;
+ }
+ res->table = av_calloc(res->max_entries, res->entry_size);
+ if (!res->table) {
+ av_hashtable_freep(&res);
+ return AVERROR(ENOMEM);
+ }
+ res->swapbuf = av_calloc(2, res->key_size_aligned + res->val_size_aligned);
+ if (!res->swapbuf) {
+ av_hashtable_freep(&res);
+ return AVERROR(ENOMEM);
+ }
+ *ctx = res;
+ return 0;
+}
+
+static size_t hash_key(const struct AVHashtableContext *ctx, const void *key)
+{
+ return av_crc(ctx->crc, 0, key, ctx->key_size) % ctx->max_entries;
+}
+
+int av_hashtable_get(const struct AVHashtableContext *ctx, const void *key, void *val)
+{
+ size_t hash = hash_key(ctx, key);
+
+ if (!ctx->utilization)
+ return 0;
+
+ for (size_t psl = 0; psl < ctx->max_entries; psl++) {
+ size_t wrapped_index = (hash + psl) % ctx->max_entries;
+ uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
+ if (!*ENTRY_OCC(entry) || *(size_t*)ENTRY_PSL(entry) < psl)
+ return 0;
+ if (KEYS_EQUAL(ENTRY_KEY(entry), key)) {
+ memcpy(val, ENTRY_VAL(entry), ctx->val_size);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int av_hashtable_set(struct AVHashtableContext *ctx, const void *key, const void *val)
+{
+ int swapping = 0;
+ size_t psl = 0;
+ size_t hash = hash_key(ctx, key);
+ uint8_t *set = ctx->swapbuf;
+ uint8_t *tmp = ctx->swapbuf + ctx->key_size_aligned + ctx->val_size_aligned;
+
+ memcpy(set, key, ctx->key_size);
+ memcpy(set + ctx->key_size_aligned, val, ctx->val_size);
+
+ for (size_t i = 0; i < ctx->max_entries; i++) {
+ size_t wrapped_index = (hash + i) % ctx->max_entries;
+ uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
+ if (!*ENTRY_OCC(entry) || (!swapping && KEYS_EQUAL(ENTRY_KEY(entry), set))) {
+ if (!*ENTRY_OCC(entry))
+ ctx->utilization++;
+ *(size_t*)ENTRY_PSL(entry) = psl;
+ *ENTRY_OCC(entry) = 1;
+ memcpy(ENTRY_KEY(entry), set, ctx->key_size_aligned + ctx->val_size);
+ return 1;
+ }
+ if (*(size_t*)ENTRY_PSL(entry) < psl) {
+ if (ctx->utilization == ctx->max_entries)
+ return 0;
+ swapping = 1;
+ // set needs to swap with entry
+ memcpy(tmp, ENTRY_KEY(entry), ctx->key_size_aligned + ctx->val_size);
+ memcpy(ENTRY_KEY(entry), set, ctx->key_size_aligned + ctx->val_size);
+ FFSWAP(uint8_t*, set, tmp);
+ FFSWAP(size_t, psl, *(size_t*)ENTRY_PSL(entry));
+ }
+ psl++;
+ }
+ return 0;
+}
+
+int av_hashtable_delete(struct AVHashtableContext *ctx, const void *key)
+{
+ uint8_t *next_entry;
+ size_t hash = hash_key(ctx, key);
+
+ if (!ctx->utilization)
+ return 0;
+
+ for (size_t psl = 0; psl < ctx->max_entries; psl++) {
+ size_t wrapped_index = (hash + psl) % ctx->max_entries;
+ uint8_t *entry = ctx->table + wrapped_index * ctx->entry_size;
+ if (!*ENTRY_OCC(entry) || *(size_t*)ENTRY_PSL(entry) < psl)
+ return 0;
+ if (KEYS_EQUAL(ENTRY_KEY(entry), key)) {
+ *ENTRY_OCC(entry) = 0;
+ for (psl++; psl < ctx->max_entries; psl++) {
+ wrapped_index = (hash + psl) % ctx->max_entries;
+ next_entry = ctx->table + wrapped_index * ctx->entry_size;
+ if (!*ENTRY_OCC(next_entry) || !*(size_t*)ENTRY_PSL(next_entry)) {
+ ctx->utilization--;
+ return 1;
+ }
+ memcpy(entry, next_entry, ctx->entry_size);
+ (*(size_t*)ENTRY_PSL(entry))--;
+ *ENTRY_OCC(next_entry) = 0;
+ entry = next_entry;
+ }
+ }
+ };
+ return 0;
+}
+
+void av_hashtable_clear(struct AVHashtableContext *ctx)
+{
+ memset(ctx->table, 0, ctx->entry_size * ctx->max_entries);
+}
+
+void av_hashtable_freep(struct AVHashtableContext **ctx)
+{
+ if (*ctx) {
+ av_freep(&(*ctx)->table);
+ av_freep(&(*ctx)->swapbuf);
+ }
+ av_freep(ctx);
+}
diff --git a/libavutil/hashtable.h b/libavutil/hashtable.h
new file mode 100644
index 0000000000..8ce6faafe3
--- /dev/null
+++ b/libavutil/hashtable.h
@@ -0,0 +1,91 @@
+/*
+ * Generic hashtable
+ * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HASHTABLE_H
+#define AVUTIL_HASHTABLE_H
+
+#include <stddef.h>
+
+/* Implements a hash table using Robin Hood open addressing.
+ * See: https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf
+ */
+
+struct AVHashtableContext;
+
+/**
+ * Creates a fixed-sized Robin Hood hash table.
+ *
+ * @param ctx context to allocate and initialize
+ * @param key_size size of key type in bytes
+ * @param val_size size of value type in bytes
+ * @param max_entries maximum number of key-value pairs to store
+ *
+ * @return zero on success, nonzero on error
+ */
+int av_hashtable_alloc(struct AVHashtableContext **ctx, size_t key_size, size_t val_size, size_t max_entries);
+
+/**
+ * Looks up a value from a hash table given a key.
+ *
+ * @param ctx hash table context
+ * @param key pointer to key data
+ * @param val destination pointer for value data
+ *
+ * @return 1 if the key is found, zero if the key is not found
+ */
+int av_hashtable_get(const struct AVHashtableContext *ctx, const void *key, void *val);
+
+/**
+ * Stores a value in a hash table given a key.
+ *
+ * @param ctx hash table context
+ * @param key pointer to key data
+ * @param val pointer for value data
+ *
+ * @return 1 if the key is written, zero if the key is not written due to the hash table reaching max capacity
+ */
+int av_hashtable_set(struct AVHashtableContext *ctx, const void *key, const void *val);
+
+/**
+ * Deletes a value from a hash table given a key.
+ *
+ * @param ctx hash table context
+ * @param key pointer to key data
+ *
+ * @return 1 if the key is deleted, zero if the key is not deleted due to not being found
+ */
+int av_hashtable_delete(struct AVHashtableContext *ctx, const void *key);
+
+/**
+ * Deletes all values from a hash table.
+ *
+ * @param ctx hash table context
+ */
+void av_hashtable_clear(struct AVHashtableContext *ctx);
+
+/**
+ * Frees a hash table.
+ *
+ * @param ctx hash table context
+ */
+void av_hashtable_freep(struct AVHashtableContext **ctx);
+
+#endif
diff --git a/libavutil/tests/hashtable.c b/libavutil/tests/hashtable.c
new file mode 100644
index 0000000000..1e37d7663b
--- /dev/null
+++ b/libavutil/tests/hashtable.c
@@ -0,0 +1,110 @@
+/*
+ * Generic hashtable tests
+ * Copyright (C) 2024 Connor Worley <connorbworley@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/hashtable.h"
+
+int main(void)
+{
+ struct AVHashtableContext *ctx;
+ uint8_t k;
+ uint64_t v;
+
+ // impossibly large allocation should fail gracefully
+ av_assert0(av_hashtable_alloc(&ctx, -1, -1, -1) < 0);
+
+ // hashtable can store up to 3 uint8_t->uint64_t entries
+ av_assert0(!av_hashtable_alloc(&ctx, sizeof(k), sizeof(v), 3));
+
+ // unsuccessful deletes return 0
+ k = 1;
+ av_assert0(!av_hashtable_delete(ctx, &k));
+
+ // unsuccessful gets return 0
+ k = 1;
+ av_assert0(!av_hashtable_get(ctx, &k, &v));
+
+ // successful sets returns 1
+ k = 1;
+ v = 1;
+ av_assert0(av_hashtable_set(ctx, &k, &v));
+
+ // get should now contain 1
+ k = 1;
+ v = 0;
+ av_assert0(av_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 1);
+
+ // updating sets should return 1
+ k = 1;
+ v = 2;
+ av_assert0(av_hashtable_set(ctx, &k, &v));
+
+ // get should now contain 2
+ k = 1;
+ v = 0;
+ av_assert0(av_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 2);
+
+ // fill the table
+ k = 2;
+ v = 2;
+ av_assert0(av_hashtable_set(ctx, &k, &v));
+ k = 3;
+ v = 3;
+ av_assert0(av_hashtable_set(ctx, &k, &v));
+
+ // inserting sets on a full table should return 0
+ k = 4;
+ v = 4;
+ av_assert0(!av_hashtable_set(ctx, &k, &v));
+
+ // updating sets on a full table should return 1
+ k = 1;
+ v = 4;
+ av_assert0(av_hashtable_set(ctx, &k, &v));
+ v = 0;
+ av_assert0(av_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 4);
+
+ // successful deletes should return 1
+ k = 1;
+ av_assert0(av_hashtable_delete(ctx, &k));
+
+ // get should now return 0
+ av_assert0(!av_hashtable_get(ctx, &k, &v));
+
+ // sanity check remaining keys
+ k = 2;
+ v = 0;
+ av_assert0(av_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 2);
+ k = 3;
+ v = 0;
+ av_assert0(av_hashtable_get(ctx, &k, &v));
+ av_assert0(v == 3);
+
+ av_hashtable_freep(&ctx);
+
+ return 0;
+}
--
2.48.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v3 2/3] lavc/dxvenc: migrate DXT1 encoder to lavu hashtable
2025-04-21 2:33 [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table Emma Worley
@ 2025-04-21 2:33 ` Emma Worley
2025-04-21 10:41 ` Nicolas George
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 3/3] lavc/dxvenc: improve compatibility with Resolume products Emma Worley
2025-04-21 10:40 ` [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table Nicolas George
2 siblings, 1 reply; 7+ messages in thread
From: Emma Worley @ 2025-04-21 2:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Emma Worley
Offers a modest performance gain due to the switch from naive linear
probling to robin hood.
Signed-off-by: Emma Worley <emma@emma.gg>
---
libavcodec/dxvenc.c | 121 ++++++++++++--------------------------------
1 file changed, 33 insertions(+), 88 deletions(-)
diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
index 808d8daedb..c64b4d5f15 100644
--- a/libavcodec/dxvenc.c
+++ b/libavcodec/dxvenc.c
@@ -21,7 +21,7 @@
#include <stdint.h>
-#include "libavutil/crc.h"
+#include "libavutil/hashtable.h"
#include "libavutil/imgutils.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
@@ -39,72 +39,9 @@
* appeared in the decompressed stream. Using a simple hash table (HT)
* significantly speeds up the lookback process while encoding.
*/
-#define LOOKBACK_HT_ELEMS 0x40000
+#define LOOKBACK_HT_ELEMS 0x20202
#define LOOKBACK_WORDS 0x20202
-typedef struct HTEntry {
- uint32_t key;
- uint32_t pos;
-} HTEntry;
-
-static void ht_init(HTEntry *ht)
-{
- for (size_t i = 0; i < LOOKBACK_HT_ELEMS; i++) {
- ht[i].pos = -1;
- }
-}
-
-static uint32_t ht_lookup_and_upsert(HTEntry *ht, const AVCRC *hash_ctx,
- uint32_t key, uint32_t pos)
-{
- uint32_t ret = -1;
- size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
- for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
- size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
- HTEntry *entry = &ht[wrapped_index];
- if (entry->key == key || entry->pos == -1) {
- ret = entry->pos;
- entry->key = key;
- entry->pos = pos;
- break;
- }
- }
- return ret;
-}
-
-static void ht_delete(HTEntry *ht, const AVCRC *hash_ctx,
- uint32_t key, uint32_t pos)
-{
- HTEntry *removed_entry = NULL;
- size_t removed_hash;
- size_t hash = av_crc(hash_ctx, 0, (uint8_t*)&key, 4) % LOOKBACK_HT_ELEMS;
-
- for (size_t i = hash; i < hash + LOOKBACK_HT_ELEMS; i++) {
- size_t wrapped_index = i % LOOKBACK_HT_ELEMS;
- HTEntry *entry = &ht[wrapped_index];
- if (entry->pos == -1)
- return;
- if (removed_entry) {
- size_t candidate_hash = av_crc(hash_ctx, 0, (uint8_t*)&entry->key, 4) % LOOKBACK_HT_ELEMS;
- if ((wrapped_index > removed_hash && (candidate_hash <= removed_hash || candidate_hash > wrapped_index)) ||
- (wrapped_index < removed_hash && (candidate_hash <= removed_hash && candidate_hash > wrapped_index))) {
- *removed_entry = *entry;
- entry->pos = -1;
- removed_entry = entry;
- removed_hash = wrapped_index;
- }
- } else if (entry->key == key) {
- if (entry->pos <= pos) {
- entry->pos = -1;
- removed_entry = entry;
- removed_hash = wrapped_index;
- } else {
- return;
- }
- }
- }
-}
-
typedef struct DXVEncContext {
AVClass *class;
@@ -121,10 +58,8 @@ typedef struct DXVEncContext {
DXVTextureFormat tex_fmt;
int (*compress_tex)(AVCodecContext *avctx);
- const AVCRC *crc_ctx;
-
- HTEntry color_lookback_ht[LOOKBACK_HT_ELEMS];
- HTEntry lut_lookback_ht[LOOKBACK_HT_ELEMS];
+ struct AVHashtableContext *color_ht;
+ struct AVHashtableContext *lut_ht;
} DXVEncContext;
/* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
@@ -159,27 +94,32 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
DXVEncContext *ctx = avctx->priv_data;
PutByteContext *pbc = &ctx->pbc;
void *value;
- uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 2, op = 0;
+ uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0;
- ht_init(ctx->color_lookback_ht);
- ht_init(ctx->lut_lookback_ht);
+ av_hashtable_clear(ctx->color_ht);
+ av_hashtable_clear(ctx->lut_ht);
bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
+ av_hashtable_set(ctx->color_ht, ctx->tex_data, &pos);
+ pos++;
bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data + 4));
-
- ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data), 0);
- ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, AV_RL32(ctx->tex_data + 4), 1);
+ av_hashtable_set(ctx->lut_ht, ctx->tex_data + 4, &pos);
+ pos++;
while (pos + 2 <= ctx->tex_size / 4) {
idx = 0;
+ color_idx = 0;
+ lut_idx = 0;
color = AV_RL32(ctx->tex_data + pos * 4);
- prev_pos = ht_lookup_and_upsert(ctx->color_lookback_ht, ctx->crc_ctx, color, pos);
- color_idx = prev_pos != -1 ? pos - prev_pos : 0;
+ if (av_hashtable_get(ctx->color_ht, &color, &prev_pos))
+ color_idx = pos - prev_pos;
+ av_hashtable_set(ctx->color_ht, &color, &pos);
+
if (pos >= LOOKBACK_WORDS) {
uint32_t old_pos = pos - LOOKBACK_WORDS;
- uint32_t old_color = AV_RL32(ctx->tex_data + old_pos * 4);
- ht_delete(ctx->color_lookback_ht, ctx->crc_ctx, old_color, old_pos);
+ if (av_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
+ av_hashtable_delete(ctx->color_ht, ctx->tex_data + old_pos * 4);
}
pos++;
@@ -188,13 +128,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
idx = color_idx;
} else {
idx = 0;
- prev_pos = ht_lookup_and_upsert(ctx->lut_lookback_ht, ctx->crc_ctx, lut, pos);
- lut_idx = prev_pos != -1 ? pos - prev_pos : 0;
+ if (av_hashtable_get(ctx->lut_ht, &lut, &prev_pos))
+ lut_idx = pos - prev_pos;
+ av_hashtable_set(ctx->lut_ht, &lut, &pos);
}
if (pos >= LOOKBACK_WORDS) {
uint32_t old_pos = pos - LOOKBACK_WORDS;
- uint32_t old_lut = AV_RL32(ctx->tex_data + old_pos * 4);
- ht_delete(ctx->lut_lookback_ht, ctx->crc_ctx, old_lut, old_pos);
+ if (av_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
+ av_hashtable_delete(ctx->lut_ht, ctx->tex_data + old_pos * 4);
}
pos++;
@@ -306,11 +247,12 @@ static av_cold int dxv_init(AVCodecContext *avctx)
return AVERROR(ENOMEM);
}
- ctx->crc_ctx = av_crc_get_table(AV_CRC_32_IEEE);
- if (!ctx->crc_ctx) {
- av_log(avctx, AV_LOG_ERROR, "Could not initialize CRC table.\n");
- return AVERROR_BUG;
- }
+ ret = av_hashtable_alloc(&ctx->color_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
+ if (ret < 0)
+ return ret;
+ ret = av_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
+ if (ret < 0)
+ return ret;
return 0;
}
@@ -321,6 +263,9 @@ static av_cold int dxv_close(AVCodecContext *avctx)
av_freep(&ctx->tex_data);
+ av_hashtable_freep(&ctx->color_ht);
+ av_hashtable_freep(&ctx->lut_ht);
+
return 0;
}
--
2.48.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v3 3/3] lavc/dxvenc: improve compatibility with Resolume products
2025-04-21 2:33 [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table Emma Worley
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 2/3] lavc/dxvenc: migrate DXT1 encoder to lavu hashtable Emma Worley
@ 2025-04-21 2:33 ` Emma Worley
2025-04-21 11:19 ` Andreas Rheinhardt
2025-04-21 10:40 ` [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table Nicolas George
2 siblings, 1 reply; 7+ messages in thread
From: Emma Worley @ 2025-04-21 2:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Emma Worley
Improves compatibility with Resolume products by adding an additional hashtable
for DXT color+LUT combinations, and padding the DXT texture to the next largest
16x16 multiple. Produces identical packets to Resolume Alley in manual tests.
Signed-off-by: Emma Worley <emma@emma.gg>
---
libavcodec/dxvenc.c | 105 ++++++++++++++++++++----------------
tests/ref/fate/dxv3enc-dxt1 | 2 +-
2 files changed, 60 insertions(+), 47 deletions(-)
diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
index c64b4d5f15..737ccfff3e 100644
--- a/libavcodec/dxvenc.c
+++ b/libavcodec/dxvenc.c
@@ -34,6 +34,11 @@
#define DXV_HEADER_LENGTH 12
+/*
+ * Resolume will refuse to display frames that are not padded to 16x16 pixels.
+ */
+#define DXV_ALIGN(x) FFALIGN(x, 16)
+
/*
* DXV uses LZ-like back-references to avoid copying words that have already
* appeared in the decompressed stream. Using a simple hash table (HT)
@@ -60,6 +65,7 @@ typedef struct DXVEncContext {
struct AVHashtableContext *color_ht;
struct AVHashtableContext *lut_ht;
+ struct AVHashtableContext *combo_ht;
} DXVEncContext;
/* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
@@ -94,10 +100,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
DXVEncContext *ctx = avctx->priv_data;
PutByteContext *pbc = &ctx->pbc;
void *value;
- uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0;
+ uint64_t combo;
+ uint32_t color, lut, idx, combo_idx, prev_pos, old_pos, state = 16, pos = 0, op = 0;
av_hashtable_clear(ctx->color_ht);
av_hashtable_clear(ctx->lut_ht);
+ av_hashtable_clear(ctx->combo_ht);
+
+ av_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos);
bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
av_hashtable_set(ctx->color_ht, ctx->tex_data, &pos);
@@ -107,51 +117,46 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
pos++;
while (pos + 2 <= ctx->tex_size / 4) {
- idx = 0;
- color_idx = 0;
- lut_idx = 0;
+ combo = AV_RL64(ctx->tex_data + pos * 4);
+ combo_idx = av_hashtable_get(ctx->combo_ht, &combo, &prev_pos) ? pos - prev_pos : 0;
+ idx = combo_idx;
+ PUSH_OP(2);
+ if (pos >= LOOKBACK_WORDS) {
+ old_pos = pos - LOOKBACK_WORDS;
+ if (av_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
+ av_hashtable_delete(ctx->combo_ht, ctx->tex_data + old_pos * 4);
+ }
+ av_hashtable_set(ctx->combo_ht, &combo, &pos);
color = AV_RL32(ctx->tex_data + pos * 4);
- if (av_hashtable_get(ctx->color_ht, &color, &prev_pos))
- color_idx = pos - prev_pos;
- av_hashtable_set(ctx->color_ht, &color, &pos);
-
+ if (!combo_idx) {
+ idx = av_hashtable_get(ctx->color_ht, &color, &prev_pos) ? pos - prev_pos : 0;
+ PUSH_OP(2);
+ if (!idx)
+ bytestream2_put_le32(pbc, color);
+ }
if (pos >= LOOKBACK_WORDS) {
- uint32_t old_pos = pos - LOOKBACK_WORDS;
+ old_pos = pos - LOOKBACK_WORDS;
if (av_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
av_hashtable_delete(ctx->color_ht, ctx->tex_data + old_pos * 4);
}
+ av_hashtable_set(ctx->color_ht, &color, &pos);
pos++;
lut = AV_RL32(ctx->tex_data + pos * 4);
- if (color_idx && lut == AV_RL32(ctx->tex_data + (pos - color_idx) * 4)) {
- idx = color_idx;
- } else {
- idx = 0;
- if (av_hashtable_get(ctx->lut_ht, &lut, &prev_pos))
- lut_idx = pos - prev_pos;
- av_hashtable_set(ctx->lut_ht, &lut, &pos);
+ if (!combo_idx) {
+ idx = av_hashtable_get(ctx->lut_ht, &lut, &prev_pos) ? pos - prev_pos : 0;
+ PUSH_OP(2);
+ if (!idx)
+ bytestream2_put_le32(pbc, lut);
}
if (pos >= LOOKBACK_WORDS) {
- uint32_t old_pos = pos - LOOKBACK_WORDS;
+ old_pos = pos - LOOKBACK_WORDS;
if (av_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
av_hashtable_delete(ctx->lut_ht, ctx->tex_data + old_pos * 4);
}
+ av_hashtable_set(ctx->lut_ht, &lut, &pos);
pos++;
-
- PUSH_OP(2);
-
- if (!idx) {
- idx = color_idx;
- PUSH_OP(2);
- if (!idx)
- bytestream2_put_le32(pbc, color);
-
- idx = lut_idx;
- PUSH_OP(2);
- if (!idx)
- bytestream2_put_le32(pbc, lut);
- }
}
return 0;
@@ -172,11 +177,23 @@ static int dxv_encode(AVCodecContext *avctx, AVPacket *pkt,
return ret;
if (ctx->enc.tex_funct) {
+ uint8_t *dst_data[4] = {av_mallocz(DXV_ALIGN(avctx->width) * DXV_ALIGN(avctx->height) * 4), 0, 0, 0};
+ int dst_linesize[4] = {DXV_ALIGN(avctx->width) * 4, 0, 0, 0};
+
+ av_image_copy2(
+ dst_data,
+ dst_linesize,
+ frame->data,
+ frame->linesize,
+ frame->format,
+ avctx->width,
+ avctx->height);
+
ctx->enc.tex_data.out = ctx->tex_data;
- ctx->enc.frame_data.in = frame->data[0];
- ctx->enc.stride = frame->linesize[0];
- ctx->enc.width = avctx->width;
- ctx->enc.height = avctx->height;
+ ctx->enc.frame_data.in = dst_data[0];
+ ctx->enc.stride = dst_linesize[0];
+ ctx->enc.width = DXV_ALIGN(avctx->width);
+ ctx->enc.height = DXV_ALIGN(avctx->height);
ff_texturedsp_exec_compress_threads(avctx, &ctx->enc);
} else {
/* unimplemented: YCoCg formats */
@@ -216,14 +233,6 @@ static av_cold int dxv_init(AVCodecContext *avctx)
return ret;
}
- if (avctx->width % TEXTURE_BLOCK_W || avctx->height % TEXTURE_BLOCK_H) {
- av_log(avctx,
- AV_LOG_ERROR,
- "Video size %dx%d is not multiple of "AV_STRINGIFY(TEXTURE_BLOCK_W)"x"AV_STRINGIFY(TEXTURE_BLOCK_H)".\n",
- avctx->width, avctx->height);
- return AVERROR_INVALIDDATA;
- }
-
ff_texturedspenc_init(&texdsp);
switch (ctx->tex_fmt) {
@@ -237,10 +246,10 @@ static av_cold int dxv_init(AVCodecContext *avctx)
return AVERROR_INVALIDDATA;
}
ctx->enc.raw_ratio = 16;
- ctx->tex_size = avctx->width / TEXTURE_BLOCK_W *
- avctx->height / TEXTURE_BLOCK_H *
+ ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W *
+ DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H *
ctx->enc.tex_ratio;
- ctx->enc.slice_count = av_clip(avctx->thread_count, 1, avctx->height / TEXTURE_BLOCK_H);
+ ctx->enc.slice_count = av_clip(avctx->thread_count, 1, DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H);
ctx->tex_data = av_malloc(ctx->tex_size);
if (!ctx->tex_data) {
@@ -251,6 +260,9 @@ static av_cold int dxv_init(AVCodecContext *avctx)
if (ret < 0)
return ret;
ret = av_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
+ if (ret < 0)
+ return ret;
+ ret = av_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
if (ret < 0)
return ret;
@@ -265,6 +277,7 @@ static av_cold int dxv_close(AVCodecContext *avctx)
av_hashtable_freep(&ctx->color_ht);
av_hashtable_freep(&ctx->lut_ht);
+ av_hashtable_freep(&ctx->combo_ht);
return 0;
}
diff --git a/tests/ref/fate/dxv3enc-dxt1 b/tests/ref/fate/dxv3enc-dxt1
index 74849a8031..e09000e181 100644
--- a/tests/ref/fate/dxv3enc-dxt1
+++ b/tests/ref/fate/dxv3enc-dxt1
@@ -3,4 +3,4 @@
#codec_id 0: dxv
#dimensions 0: 1920x1080
#sar 0: 1/1
-0, 0, 0, 1, 76521, 0xed387a5e
+0, 0, 0, 1, 76190, 0x0e6f0326
--
2.48.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table
2025-04-21 2:33 [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table Emma Worley
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 2/3] lavc/dxvenc: migrate DXT1 encoder to lavu hashtable Emma Worley
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 3/3] lavc/dxvenc: improve compatibility with Resolume products Emma Worley
@ 2025-04-21 10:40 ` Nicolas George
2 siblings, 0 replies; 7+ messages in thread
From: Nicolas George @ 2025-04-21 10:40 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Emma Worley (HE12025-04-20):
> Adds a general purpose hash table with the DXV encoder as an initial use case.
I still think it is too specific to be called general purpose and
exposed as a public API. Better move it in lavc and rename it
ff_hashtable.
Regards,
--
Nicolas George
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 2/3] lavc/dxvenc: migrate DXT1 encoder to lavu hashtable
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 2/3] lavc/dxvenc: migrate DXT1 encoder to lavu hashtable Emma Worley
@ 2025-04-21 10:41 ` Nicolas George
0 siblings, 0 replies; 7+ messages in thread
From: Nicolas George @ 2025-04-21 10:41 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Emma Worley (HE12025-04-20):
> Offers a modest performance gain due to the switch from naive linear
> probling to robin hood.
Thanks for splitting the commits. I do not know this encoder, I cannot
comment on the specifics of the patch.
Regards,
--
Nicolas George
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 3/3] lavc/dxvenc: improve compatibility with Resolume products
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 3/3] lavc/dxvenc: improve compatibility with Resolume products Emma Worley
@ 2025-04-21 11:19 ` Andreas Rheinhardt
2025-04-21 16:59 ` Emma Worley
0 siblings, 1 reply; 7+ messages in thread
From: Andreas Rheinhardt @ 2025-04-21 11:19 UTC (permalink / raw)
To: ffmpeg-devel
Emma Worley:
> Improves compatibility with Resolume products by adding an additional hashtable
> for DXT color+LUT combinations, and padding the DXT texture to the next largest
> 16x16 multiple. Produces identical packets to Resolume Alley in manual tests.
>
Are the files that this encoder currently produces invalid?
> Signed-off-by: Emma Worley <emma@emma.gg>
> ---
> libavcodec/dxvenc.c | 105 ++++++++++++++++++++----------------
> tests/ref/fate/dxv3enc-dxt1 | 2 +-
> 2 files changed, 60 insertions(+), 47 deletions(-)
>
> diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
> index c64b4d5f15..737ccfff3e 100644
> --- a/libavcodec/dxvenc.c
> +++ b/libavcodec/dxvenc.c
> @@ -34,6 +34,11 @@
>
> #define DXV_HEADER_LENGTH 12
>
> +/*
> + * Resolume will refuse to display frames that are not padded to 16x16 pixels.
> + */
> +#define DXV_ALIGN(x) FFALIGN(x, 16)
> +
> /*
> * DXV uses LZ-like back-references to avoid copying words that have already
> * appeared in the decompressed stream. Using a simple hash table (HT)
> @@ -60,6 +65,7 @@ typedef struct DXVEncContext {
>
> struct AVHashtableContext *color_ht;
> struct AVHashtableContext *lut_ht;
> + struct AVHashtableContext *combo_ht;
> } DXVEncContext;
>
> /* Converts an index offset value to a 2-bit opcode and pushes it to a stream.
> @@ -94,10 +100,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
> DXVEncContext *ctx = avctx->priv_data;
> PutByteContext *pbc = &ctx->pbc;
> void *value;
> - uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16, pos = 0, op = 0;
> + uint64_t combo;
> + uint32_t color, lut, idx, combo_idx, prev_pos, old_pos, state = 16, pos = 0, op = 0;
>
> av_hashtable_clear(ctx->color_ht);
> av_hashtable_clear(ctx->lut_ht);
> + av_hashtable_clear(ctx->combo_ht);
> +
> + av_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos);
>
> bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
> av_hashtable_set(ctx->color_ht, ctx->tex_data, &pos);
> @@ -107,51 +117,46 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
> pos++;
>
> while (pos + 2 <= ctx->tex_size / 4) {
> - idx = 0;
> - color_idx = 0;
> - lut_idx = 0;
> + combo = AV_RL64(ctx->tex_data + pos * 4);
> + combo_idx = av_hashtable_get(ctx->combo_ht, &combo, &prev_pos) ? pos - prev_pos : 0;
> + idx = combo_idx;
> + PUSH_OP(2);
> + if (pos >= LOOKBACK_WORDS) {
> + old_pos = pos - LOOKBACK_WORDS;
> + if (av_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
> + av_hashtable_delete(ctx->combo_ht, ctx->tex_data + old_pos * 4);
> + }
> + av_hashtable_set(ctx->combo_ht, &combo, &pos);
>
> color = AV_RL32(ctx->tex_data + pos * 4);
> - if (av_hashtable_get(ctx->color_ht, &color, &prev_pos))
> - color_idx = pos - prev_pos;
> - av_hashtable_set(ctx->color_ht, &color, &pos);
> -
> + if (!combo_idx) {
> + idx = av_hashtable_get(ctx->color_ht, &color, &prev_pos) ? pos - prev_pos : 0;
> + PUSH_OP(2);
> + if (!idx)
> + bytestream2_put_le32(pbc, color);
> + }
> if (pos >= LOOKBACK_WORDS) {
> - uint32_t old_pos = pos - LOOKBACK_WORDS;
> + old_pos = pos - LOOKBACK_WORDS;
> if (av_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
> av_hashtable_delete(ctx->color_ht, ctx->tex_data + old_pos * 4);
> }
> + av_hashtable_set(ctx->color_ht, &color, &pos);
> pos++;
>
> lut = AV_RL32(ctx->tex_data + pos * 4);
> - if (color_idx && lut == AV_RL32(ctx->tex_data + (pos - color_idx) * 4)) {
> - idx = color_idx;
> - } else {
> - idx = 0;
> - if (av_hashtable_get(ctx->lut_ht, &lut, &prev_pos))
> - lut_idx = pos - prev_pos;
> - av_hashtable_set(ctx->lut_ht, &lut, &pos);
> + if (!combo_idx) {
> + idx = av_hashtable_get(ctx->lut_ht, &lut, &prev_pos) ? pos - prev_pos : 0;
> + PUSH_OP(2);
> + if (!idx)
> + bytestream2_put_le32(pbc, lut);
> }
> if (pos >= LOOKBACK_WORDS) {
> - uint32_t old_pos = pos - LOOKBACK_WORDS;
> + old_pos = pos - LOOKBACK_WORDS;
> if (av_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos * 4, &prev_pos) && prev_pos <= old_pos)
> av_hashtable_delete(ctx->lut_ht, ctx->tex_data + old_pos * 4);
> }
> + av_hashtable_set(ctx->lut_ht, &lut, &pos);
> pos++;
> -
> - PUSH_OP(2);
> -
> - if (!idx) {
> - idx = color_idx;
> - PUSH_OP(2);
> - if (!idx)
> - bytestream2_put_le32(pbc, color);
> -
> - idx = lut_idx;
> - PUSH_OP(2);
> - if (!idx)
> - bytestream2_put_le32(pbc, lut);
> - }
> }
>
> return 0;
> @@ -172,11 +177,23 @@ static int dxv_encode(AVCodecContext *avctx, AVPacket *pkt,
> return ret;
>
> if (ctx->enc.tex_funct) {
> + uint8_t *dst_data[4] = {av_mallocz(DXV_ALIGN(avctx->width) * DXV_ALIGN(avctx->height) * 4), 0, 0, 0};
> + int dst_linesize[4] = {DXV_ALIGN(avctx->width) * 4, 0, 0, 0};
> +
> + av_image_copy2(
> + dst_data,
> + dst_linesize,
> + frame->data,
> + frame->linesize,
> + frame->format,
> + avctx->width,
> + avctx->height);
1. Unchecked allocation.
2. The buffer leaks unconditionally.
3. The copy is definitely unnecessary if the input is already properly
aligned.
4. Can't we avoid the copy in case the input is not properly aligned?
5. Most of dst_data will be initially zeroed only to be overwritten with
the final value again. This is wasteful.
> +
> ctx->enc.tex_data.out = ctx->tex_data;
> - ctx->enc.frame_data.in = frame->data[0];
> - ctx->enc.stride = frame->linesize[0];
> - ctx->enc.width = avctx->width;
> - ctx->enc.height = avctx->height;
> + ctx->enc.frame_data.in = dst_data[0];
> + ctx->enc.stride = dst_linesize[0];
> + ctx->enc.width = DXV_ALIGN(avctx->width);
> + ctx->enc.height = DXV_ALIGN(avctx->height);
> ff_texturedsp_exec_compress_threads(avctx, &ctx->enc);
> } else {
> /* unimplemented: YCoCg formats */
> @@ -216,14 +233,6 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> return ret;
> }
>
> - if (avctx->width % TEXTURE_BLOCK_W || avctx->height % TEXTURE_BLOCK_H) {
> - av_log(avctx,
> - AV_LOG_ERROR,
> - "Video size %dx%d is not multiple of "AV_STRINGIFY(TEXTURE_BLOCK_W)"x"AV_STRINGIFY(TEXTURE_BLOCK_H)".\n",
> - avctx->width, avctx->height);
> - return AVERROR_INVALIDDATA;
> - }
> -
> ff_texturedspenc_init(&texdsp);
>
> switch (ctx->tex_fmt) {
> @@ -237,10 +246,10 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> return AVERROR_INVALIDDATA;
> }
> ctx->enc.raw_ratio = 16;
> - ctx->tex_size = avctx->width / TEXTURE_BLOCK_W *
> - avctx->height / TEXTURE_BLOCK_H *
> + ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W *
> + DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H *
> ctx->enc.tex_ratio;
> - ctx->enc.slice_count = av_clip(avctx->thread_count, 1, avctx->height / TEXTURE_BLOCK_H);
> + ctx->enc.slice_count = av_clip(avctx->thread_count, 1, DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H);
>
> ctx->tex_data = av_malloc(ctx->tex_size);
> if (!ctx->tex_data) {
> @@ -251,6 +260,9 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> if (ret < 0)
> return ret;
> ret = av_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> + if (ret < 0)
> + return ret;
> + ret = av_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t), sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> if (ret < 0)
> return ret;
>
> @@ -265,6 +277,7 @@ static av_cold int dxv_close(AVCodecContext *avctx)
>
> av_hashtable_freep(&ctx->color_ht);
> av_hashtable_freep(&ctx->lut_ht);
> + av_hashtable_freep(&ctx->combo_ht);
>
> return 0;
> }
> diff --git a/tests/ref/fate/dxv3enc-dxt1 b/tests/ref/fate/dxv3enc-dxt1
> index 74849a8031..e09000e181 100644
> --- a/tests/ref/fate/dxv3enc-dxt1
> +++ b/tests/ref/fate/dxv3enc-dxt1
> @@ -3,4 +3,4 @@
> #codec_id 0: dxv
> #dimensions 0: 1920x1080
> #sar 0: 1/1
> -0, 0, 0, 1, 76521, 0xed387a5e
> +0, 0, 0, 1, 76190, 0x0e6f0326
Is padding the reason for this change or is it the new hashtable? Or both?
- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v3 3/3] lavc/dxvenc: improve compatibility with Resolume products
2025-04-21 11:19 ` Andreas Rheinhardt
@ 2025-04-21 16:59 ` Emma Worley
0 siblings, 0 replies; 7+ messages in thread
From: Emma Worley @ 2025-04-21 16:59 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Mon, Apr 21, 2025 at 4:19 AM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:
> Emma Worley:
> > Improves compatibility with Resolume products by adding an additional
> hashtable
> > for DXT color+LUT combinations, and padding the DXT texture to the next
> largest
> > 16x16 multiple. Produces identical packets to Resolume Alley in manual
> tests.
> >
>
> Are the files that this encoder currently produces invalid?
>
Yes, if they are unaligned.
>
> > Signed-off-by: Emma Worley <emma@emma.gg>
> > ---
> > libavcodec/dxvenc.c | 105 ++++++++++++++++++++----------------
> > tests/ref/fate/dxv3enc-dxt1 | 2 +-
> > 2 files changed, 60 insertions(+), 47 deletions(-)
> >
> > diff --git a/libavcodec/dxvenc.c b/libavcodec/dxvenc.c
> > index c64b4d5f15..737ccfff3e 100644
> > --- a/libavcodec/dxvenc.c
> > +++ b/libavcodec/dxvenc.c
> > @@ -34,6 +34,11 @@
> >
> > #define DXV_HEADER_LENGTH 12
> >
> > +/*
> > + * Resolume will refuse to display frames that are not padded to 16x16
> pixels.
> > + */
> > +#define DXV_ALIGN(x) FFALIGN(x, 16)
> > +
> > /*
> > * DXV uses LZ-like back-references to avoid copying words that have
> already
> > * appeared in the decompressed stream. Using a simple hash table (HT)
> > @@ -60,6 +65,7 @@ typedef struct DXVEncContext {
> >
> > struct AVHashtableContext *color_ht;
> > struct AVHashtableContext *lut_ht;
> > + struct AVHashtableContext *combo_ht;
> > } DXVEncContext;
> >
> > /* Converts an index offset value to a 2-bit opcode and pushes it to a
> stream.
> > @@ -94,10 +100,14 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
> > DXVEncContext *ctx = avctx->priv_data;
> > PutByteContext *pbc = &ctx->pbc;
> > void *value;
> > - uint32_t color, lut, idx, color_idx, lut_idx, prev_pos, state = 16,
> pos = 0, op = 0;
> > + uint64_t combo;
> > + uint32_t color, lut, idx, combo_idx, prev_pos, old_pos, state = 16,
> pos = 0, op = 0;
> >
> > av_hashtable_clear(ctx->color_ht);
> > av_hashtable_clear(ctx->lut_ht);
> > + av_hashtable_clear(ctx->combo_ht);
> > +
> > + av_hashtable_set(ctx->combo_ht, ctx->tex_data, &pos);
> >
> > bytestream2_put_le32(pbc, AV_RL32(ctx->tex_data));
> > av_hashtable_set(ctx->color_ht, ctx->tex_data, &pos);
> > @@ -107,51 +117,46 @@ static int dxv_compress_dxt1(AVCodecContext *avctx)
> > pos++;
> >
> > while (pos + 2 <= ctx->tex_size / 4) {
> > - idx = 0;
> > - color_idx = 0;
> > - lut_idx = 0;
> > + combo = AV_RL64(ctx->tex_data + pos * 4);
> > + combo_idx = av_hashtable_get(ctx->combo_ht, &combo, &prev_pos)
> ? pos - prev_pos : 0;
> > + idx = combo_idx;
> > + PUSH_OP(2);
> > + if (pos >= LOOKBACK_WORDS) {
> > + old_pos = pos - LOOKBACK_WORDS;
> > + if (av_hashtable_get(ctx->combo_ht, ctx->tex_data + old_pos
> * 4, &prev_pos) && prev_pos <= old_pos)
> > + av_hashtable_delete(ctx->combo_ht, ctx->tex_data +
> old_pos * 4);
> > + }
> > + av_hashtable_set(ctx->combo_ht, &combo, &pos);
> >
> > color = AV_RL32(ctx->tex_data + pos * 4);
> > - if (av_hashtable_get(ctx->color_ht, &color, &prev_pos))
> > - color_idx = pos - prev_pos;
> > - av_hashtable_set(ctx->color_ht, &color, &pos);
> > -
> > + if (!combo_idx) {
> > + idx = av_hashtable_get(ctx->color_ht, &color, &prev_pos) ?
> pos - prev_pos : 0;
> > + PUSH_OP(2);
> > + if (!idx)
> > + bytestream2_put_le32(pbc, color);
> > + }
> > if (pos >= LOOKBACK_WORDS) {
> > - uint32_t old_pos = pos - LOOKBACK_WORDS;
> > + old_pos = pos - LOOKBACK_WORDS;
> > if (av_hashtable_get(ctx->color_ht, ctx->tex_data + old_pos
> * 4, &prev_pos) && prev_pos <= old_pos)
> > av_hashtable_delete(ctx->color_ht, ctx->tex_data +
> old_pos * 4);
> > }
> > + av_hashtable_set(ctx->color_ht, &color, &pos);
> > pos++;
> >
> > lut = AV_RL32(ctx->tex_data + pos * 4);
> > - if (color_idx && lut == AV_RL32(ctx->tex_data + (pos -
> color_idx) * 4)) {
> > - idx = color_idx;
> > - } else {
> > - idx = 0;
> > - if (av_hashtable_get(ctx->lut_ht, &lut, &prev_pos))
> > - lut_idx = pos - prev_pos;
> > - av_hashtable_set(ctx->lut_ht, &lut, &pos);
> > + if (!combo_idx) {
> > + idx = av_hashtable_get(ctx->lut_ht, &lut, &prev_pos) ? pos
> - prev_pos : 0;
> > + PUSH_OP(2);
> > + if (!idx)
> > + bytestream2_put_le32(pbc, lut);
> > }
> > if (pos >= LOOKBACK_WORDS) {
> > - uint32_t old_pos = pos - LOOKBACK_WORDS;
> > + old_pos = pos - LOOKBACK_WORDS;
> > if (av_hashtable_get(ctx->lut_ht, ctx->tex_data + old_pos *
> 4, &prev_pos) && prev_pos <= old_pos)
> > av_hashtable_delete(ctx->lut_ht, ctx->tex_data +
> old_pos * 4);
> > }
> > + av_hashtable_set(ctx->lut_ht, &lut, &pos);
> > pos++;
> > -
> > - PUSH_OP(2);
> > -
> > - if (!idx) {
> > - idx = color_idx;
> > - PUSH_OP(2);
> > - if (!idx)
> > - bytestream2_put_le32(pbc, color);
> > -
> > - idx = lut_idx;
> > - PUSH_OP(2);
> > - if (!idx)
> > - bytestream2_put_le32(pbc, lut);
> > - }
> > }
> >
> > return 0;
> > @@ -172,11 +177,23 @@ static int dxv_encode(AVCodecContext *avctx,
> AVPacket *pkt,
> > return ret;
> >
> > if (ctx->enc.tex_funct) {
> > + uint8_t *dst_data[4] = {av_mallocz(DXV_ALIGN(avctx->width) *
> DXV_ALIGN(avctx->height) * 4), 0, 0, 0};
> > + int dst_linesize[4] = {DXV_ALIGN(avctx->width) * 4, 0, 0, 0};
> > +
> > + av_image_copy2(
> > + dst_data,
> > + dst_linesize,
> > + frame->data,
> > + frame->linesize,
> > + frame->format,
> > + avctx->width,
> > + avctx->height);
>
> 1. Unchecked allocation.
> 2. The buffer leaks unconditionally.
> 3. The copy is definitely unnecessary if the input is already properly
> aligned.
> 4. Can't we avoid the copy in case the input is not properly aligned?
5. Most of dst_data will be initially zeroed only to be overwritten with
> the final value again. This is wasteful.
>
Is there an idiomatic way to do a partial fill here?
> > +
> > ctx->enc.tex_data.out = ctx->tex_data;
> > - ctx->enc.frame_data.in = frame->data[0];
> > - ctx->enc.stride = frame->linesize[0];
> > - ctx->enc.width = avctx->width;
> > - ctx->enc.height = avctx->height;
> > + ctx->enc.frame_data.in = dst_data[0];
> > + ctx->enc.stride = dst_linesize[0];
> > + ctx->enc.width = DXV_ALIGN(avctx->width);
> > + ctx->enc.height = DXV_ALIGN(avctx->height);
> > ff_texturedsp_exec_compress_threads(avctx, &ctx->enc);
> > } else {
> > /* unimplemented: YCoCg formats */
> > @@ -216,14 +233,6 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> > return ret;
> > }
> >
> > - if (avctx->width % TEXTURE_BLOCK_W || avctx->height %
> TEXTURE_BLOCK_H) {
> > - av_log(avctx,
> > - AV_LOG_ERROR,
> > - "Video size %dx%d is not multiple of
> "AV_STRINGIFY(TEXTURE_BLOCK_W)"x"AV_STRINGIFY(TEXTURE_BLOCK_H)".\n",
> > - avctx->width, avctx->height);
> > - return AVERROR_INVALIDDATA;
> > - }
> > -
> > ff_texturedspenc_init(&texdsp);
> >
> > switch (ctx->tex_fmt) {
> > @@ -237,10 +246,10 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> > return AVERROR_INVALIDDATA;
> > }
> > ctx->enc.raw_ratio = 16;
> > - ctx->tex_size = avctx->width / TEXTURE_BLOCK_W *
> > - avctx->height / TEXTURE_BLOCK_H *
> > + ctx->tex_size = DXV_ALIGN(avctx->width) / TEXTURE_BLOCK_W *
> > + DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H *
> > ctx->enc.tex_ratio;
> > - ctx->enc.slice_count = av_clip(avctx->thread_count, 1,
> avctx->height / TEXTURE_BLOCK_H);
> > + ctx->enc.slice_count = av_clip(avctx->thread_count, 1,
> DXV_ALIGN(avctx->height) / TEXTURE_BLOCK_H);
> >
> > ctx->tex_data = av_malloc(ctx->tex_size);
> > if (!ctx->tex_data) {
> > @@ -251,6 +260,9 @@ static av_cold int dxv_init(AVCodecContext *avctx)
> > if (ret < 0)
> > return ret;
> > ret = av_hashtable_alloc(&ctx->lut_ht, sizeof(uint32_t),
> sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> > + if (ret < 0)
> > + return ret;
> > + ret = av_hashtable_alloc(&ctx->combo_ht, sizeof(uint64_t),
> sizeof(uint32_t), LOOKBACK_HT_ELEMS);
> > if (ret < 0)
> > return ret;
> >
> > @@ -265,6 +277,7 @@ static av_cold int dxv_close(AVCodecContext *avctx)
> >
> > av_hashtable_freep(&ctx->color_ht);
> > av_hashtable_freep(&ctx->lut_ht);
> > + av_hashtable_freep(&ctx->combo_ht);
> >
> > return 0;
> > }
> > diff --git a/tests/ref/fate/dxv3enc-dxt1 b/tests/ref/fate/dxv3enc-dxt1
> > index 74849a8031..e09000e181 100644
> > --- a/tests/ref/fate/dxv3enc-dxt1
> > +++ b/tests/ref/fate/dxv3enc-dxt1
> > @@ -3,4 +3,4 @@
> > #codec_id 0: dxv
> > #dimensions 0: 1920x1080
> > #sar 0: 1/1
> > -0, 0, 0, 1, 76521, 0xed387a5e
> > +0, 0, 0, 1, 76190, 0x0e6f0326
>
> Is padding the reason for this change or is it the new hashtable? Or both?
>
Both. For more context, the padding is solely for player compatibility
while the new hashtable optimize output file size.
With both I see identical packets to the de facto closed source encoder
(Resolume Alley) given the same DXT1 texture going into dxv_compress_dxt1.
>
> - Andreas
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-04-21 16:59 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-21 2:33 [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table Emma Worley
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 2/3] lavc/dxvenc: migrate DXT1 encoder to lavu hashtable Emma Worley
2025-04-21 10:41 ` Nicolas George
2025-04-21 2:33 ` [FFmpeg-devel] [PATCH v3 3/3] lavc/dxvenc: improve compatibility with Resolume products Emma Worley
2025-04-21 11:19 ` Andreas Rheinhardt
2025-04-21 16:59 ` Emma Worley
2025-04-21 10:40 ` [FFmpeg-devel] [PATCH v3 1/3] lavu/hashtable: create generic robin hood hash table Nicolas George
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git