* [FFmpeg-devel] [PATCH] MULTI VLC decoding boost
@ 2023-08-28 17:36 Paul B Mahol
2023-09-04 16:08 ` Paul B Mahol
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Paul B Mahol @ 2023-08-28 17:36 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 179 bytes --]
Patches attached.
Thanks for kurosu for pointing unmerged branches.
The UNCACHED_PATH for x86_32 is broken with this for 2 codecs it touches.
Fix is trivial, to be fixed later.
[-- Attachment #2: 0002-avcodec-magicyuv-add-vlc-multi-support.patch --]
[-- Type: text/x-patch, Size: 6174 bytes --]
From 2feb559ce6384c408a5d5008722b0a20262e976a Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Mon, 28 Aug 2023 12:20:15 +0200
Subject: [PATCH 2/3] avcodec/magicyuv: add vlc multi support
Gives nice speed boost, depending on encoded content it goes from
30% to 60% faster.
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
libavcodec/magicyuv.c | 64 ++++++++++++++++++++++---------------------
1 file changed, 33 insertions(+), 31 deletions(-)
diff --git a/libavcodec/magicyuv.c b/libavcodec/magicyuv.c
index 7898cd5be4..7a73837248 100644
--- a/libavcodec/magicyuv.c
+++ b/libavcodec/magicyuv.c
@@ -34,6 +34,8 @@
#include "lossless_videodsp.h"
#include "thread.h"
+#define VLC_BITS 12
+
typedef struct Slice {
uint32_t start;
uint32_t size;
@@ -67,13 +69,14 @@ typedef struct MagicYUVContext {
Slice *slices[4]; // slice bitstream positions for each plane
unsigned int slices_size[4]; // slice sizes for each plane
VLC vlc[4]; // VLC for each plane
+ VLC_MULTI multi[4]; // Buffer for joint VLC data
int (*magy_decode_slice)(AVCodecContext *avctx, void *tdata,
int j, int threadnr);
LLVidDSPContext llviddsp;
} MagicYUVContext;
static int huff_build(const uint8_t len[], uint16_t codes_pos[33],
- VLC *vlc, int nb_elems, void *logctx)
+ VLC *vlc, VLC_MULTI *multi, int nb_elems, void *logctx)
{
HuffEntry he[4096];
@@ -84,7 +87,8 @@ static int huff_build(const uint8_t len[], uint16_t codes_pos[33],
he[--codes_pos[len[i]]] = (HuffEntry){ len[i], i };
ff_free_vlc(vlc);
- return ff_init_vlc_from_lengths(vlc, FFMIN(he[0].len, 12), nb_elems,
+ ff_free_vlc_multi(multi);
+ return ff_init_vlc_multi_from_lengths(vlc, multi, FFMIN(he[0].len, VLC_BITS), nb_elems, nb_elems,
&he[0].len, sizeof(he[0]),
&he[0].sym, sizeof(he[0]), sizeof(he[0].sym),
0, 0, logctx);
@@ -111,6 +115,21 @@ static void magicyuv_median_pred16(uint16_t *dst, const uint16_t *src1,
*left_top = lt;
}
+#define READ_PLANE(dst, plane, b, c) \
+{ \
+ for (x = 0; CACHED_BITSTREAM_READER && x < width-c && get_bits_left(&gb) > 0;) {\
+ ret = get_vlc_multi(&gb, (uint8_t *)dst + x * b, multi, \
+ vlc, vlc_bits, 3); \
+ if (ret > 0) \
+ x += ret; \
+ if (ret <= 0) \
+ return AVERROR_INVALIDDATA; \
+ } \
+ for (; x < width && get_bits_left(&gb) > 0; x++) \
+ dst[x] = get_vlc2(&gb, vlc, vlc_bits, 3); \
+ dst += stride; \
+}
+
static int magy_decode_slice10(AVCodecContext *avctx, void *tdata,
int j, int threadnr)
{
@@ -130,6 +149,9 @@ static int magy_decode_slice10(AVCodecContext *avctx, void *tdata,
int sheight = AV_CEIL_RSHIFT(s->slice_height, s->vshift[i]);
ptrdiff_t fake_stride = (p->linesize[i] / 2) * (1 + interlaced);
ptrdiff_t stride = p->linesize[i] / 2;
+ const VLC_MULTI_ELEM *const multi = s->multi[i].table;
+ const VLCElem *const vlc = s->vlc[i].table;
+ const int vlc_bits = s->vlc[i].bits;
int flags, pred;
int ret = init_get_bits8(&gb, s->buf + s->slices[i][j].start,
s->slices[i][j].size);
@@ -151,20 +173,8 @@ static int magy_decode_slice10(AVCodecContext *avctx, void *tdata,
dst += stride;
}
} else {
- for (k = 0; k < height; k++) {
- for (x = 0; x < width; x++) {
- int pix;
- if (get_bits_left(&gb) <= 0)
- return AVERROR_INVALIDDATA;
-
- pix = get_vlc2(&gb, s->vlc[i].table, s->vlc[i].bits, 3);
- if (pix < 0)
- return AVERROR_INVALIDDATA;
-
- dst[x] = pix;
- }
- dst += stride;
- }
+ for (k = 0; k < height; k++)
+ READ_PLANE(dst, i, 2, 3)
}
switch (pred) {
@@ -261,6 +271,9 @@ static int magy_decode_slice(AVCodecContext *avctx, void *tdata,
ptrdiff_t fake_stride = p->linesize[i] * (1 + interlaced);
ptrdiff_t stride = p->linesize[i];
const uint8_t *slice = s->buf + s->slices[i][j].start;
+ const VLC_MULTI_ELEM *const multi = s->multi[i].table;
+ const VLCElem *const vlc = s->vlc[i].table;
+ const int vlc_bits = s->vlc[i].bits;
int flags, pred;
flags = bytestream_get_byte(&slice);
@@ -280,20 +293,8 @@ static int magy_decode_slice(AVCodecContext *avctx, void *tdata,
if (ret < 0)
return ret;
- for (k = 0; k < height; k++) {
- for (x = 0; x < width; x++) {
- int pix;
- if (get_bits_left(&gb) <= 0)
- return AVERROR_INVALIDDATA;
-
- pix = get_vlc2(&gb, s->vlc[i].table, s->vlc[i].bits, 3);
- if (pix < 0)
- return AVERROR_INVALIDDATA;
-
- dst[x] = pix;
- }
- dst += stride;
- }
+ for (k = 0; k < height; k++)
+ READ_PLANE(dst, i, 1, 5)
}
switch (pred) {
@@ -407,7 +408,7 @@ static int build_huffman(AVCodecContext *avctx, const uint8_t *table,
if (j == max) {
j = 0;
- if (huff_build(len, length_count, &s->vlc[i], max, avctx)) {
+ if (huff_build(len, length_count, &s->vlc[i], &s->multi[i], max, avctx)) {
av_log(avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n");
return AVERROR_INVALIDDATA;
}
@@ -687,6 +688,7 @@ static av_cold int magy_decode_end(AVCodecContext *avctx)
av_freep(&s->slices[i]);
s->slices_size[i] = 0;
ff_free_vlc(&s->vlc[i]);
+ ff_free_vlc_multi(&s->multi[i]);
}
return 0;
--
2.39.1
[-- Attachment #3: 0001-avcodec-add-multi-vlc-reader.patch --]
[-- Type: text/x-patch, Size: 12037 bytes --]
From a5ade51373bb9871772244f2d135ff1b8ff0ff61 Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Sun, 27 Aug 2023 21:18:46 +0200
Subject: [PATCH 1/3] avcodec: add multi vlc reader
Heavily based and inspired by Christophe's cache branches.
Co-Authored-by: Christophe Gisquet
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
libavcodec/bitstream.h | 2 +
libavcodec/bitstream_template.h | 29 +++++++
libavcodec/get_bits.h | 1 +
libavcodec/vlc.c | 137 ++++++++++++++++++++++++++++++++
libavcodec/vlc.h | 53 ++++++++++++
5 files changed, 222 insertions(+)
diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h
index b60f0c296d..35b7873b9c 100644
--- a/libavcodec/bitstream.h
+++ b/libavcodec/bitstream.h
@@ -102,6 +102,7 @@
# define bits_decode210 bits_decode210_le
# define bits_apply_sign bits_apply_sign_le
# define bits_read_vlc bits_read_vlc_le
+# define bits_read_vlc_multi bits_read_vlc_multi_le
#elif defined(BITS_DEFAULT_BE)
@@ -130,6 +131,7 @@
# define bits_decode210 bits_decode210_be
# define bits_apply_sign bits_apply_sign_be
# define bits_read_vlc bits_read_vlc_be
+# define bits_read_vlc_multi bits_read_vlc_multi_be
#endif
diff --git a/libavcodec/bitstream_template.h b/libavcodec/bitstream_template.h
index 30bea84add..0308e3a924 100644
--- a/libavcodec/bitstream_template.h
+++ b/libavcodec/bitstream_template.h
@@ -520,6 +520,35 @@ static inline int BS_FUNC(read_vlc)(BSCTX *bc, const VLCElem *table,
return code;
}
+static inline int BS_FUNC(read_vlc_multi)(BSCTX *bc, uint8_t *dst,
+ const VLC_MULTI_ELEM *const Jtable,
+ const VLCElem *const table,
+ const int bits, const int max_depth)
+{
+ unsigned idx = BS_FUNC(peek)(bc, bits);
+ int ret, nb_bits, code, n = Jtable[idx].len;
+ if (Jtable[idx].num) {
+ AV_COPY64U(dst, Jtable[idx].val);
+ ret = Jtable[idx].num;
+ } else {
+ code = table[idx].sym;
+ n = table[idx].len;
+ if (max_depth > 1 && n < 0) {
+ BS_FUNC(priv_skip_remaining)(bc, bits);
+ code = BS_FUNC(priv_set_idx)(bc, code, &n, &nb_bits, table);
+ if (max_depth > 2 && n < 0) {
+ BS_FUNC(priv_skip_remaining)(bc, nb_bits);
+ code = BS_FUNC(priv_set_idx)(bc, code, &n, &nb_bits, table);
+ }
+ }
+ AV_WN16(dst, code);
+ ret = n > 0;
+ }
+ BS_FUNC(priv_skip_remaining)(bc, n);
+
+ return ret;
+}
+
#undef BSCTX
#undef BS_FUNC
#undef BS_JOIN3
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 65dc080ddb..581e3d44a4 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -93,6 +93,7 @@ typedef BitstreamContext GetBitContext;
#define init_get_bits8 bits_init8
#define align_get_bits bits_align
#define get_vlc2 bits_read_vlc
+#define get_vlc_multi bits_read_vlc_multi
#define init_get_bits8_le(s, buffer, byte_size) bits_init8_le((BitstreamContextLE*)s, buffer, byte_size)
#define get_bits_le(s, n) bits_read_le((BitstreamContextLE*)s, n)
diff --git a/libavcodec/vlc.c b/libavcodec/vlc.c
index 96f2b28c7e..840bf8ab76 100644
--- a/libavcodec/vlc.c
+++ b/libavcodec/vlc.c
@@ -30,6 +30,7 @@
#include "libavutil/avassert.h"
#include "libavutil/error.h"
#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavutil/macros.h"
#include "libavutil/mem.h"
@@ -372,6 +373,142 @@ fail:
return AVERROR_INVALIDDATA;
}
+static void add_level(VLC_MULTI_ELEM *table, const int nb_elems,
+ const int num, const int numbits,
+ const VLCcode *buf,
+ uint32_t curcode, int curlen,
+ int curlimit, int curlevel,
+ const int minlen, const int max,
+ unsigned* levelcnt, VLC_MULTI_ELEM *info)
+{
+ if (nb_elems > 256 && curlevel > 2)
+ return; // No room
+ for (int i = num-1; i > max; i--) {
+ for (int j = 0; j < 2; j++) {
+ int newlimit, sym;
+ int t = j ? i-1 : i;
+ int l = buf[t].bits;
+ uint32_t code;
+
+ sym = buf[t].symbol;
+ if (l > curlimit)
+ return;
+ code = curcode + (buf[t].code >> curlen);
+ newlimit = curlimit - l;
+ l += curlen;
+ if (nb_elems>256) AV_WN16(info->val+2*curlevel, sym);
+ else info->val[curlevel] = sym&0xFF;
+
+ if (curlevel) { // let's not add single entries
+ uint32_t val = code >> (32 - numbits);
+ uint32_t nb = val + (1U << (numbits - l));
+ info->len = l;
+ info->num = curlevel+1;
+ for (; val < nb; val++)
+ AV_COPY64(table+val, info);
+ levelcnt[curlevel-1]++;
+ }
+
+ if (curlevel+1 < VLC_MULTI_MAX_SYMBOLS && newlimit >= minlen) {
+ add_level(table, nb_elems, num, numbits, buf,
+ code, l, newlimit, curlevel+1,
+ minlen, max, levelcnt, info);
+ }
+ }
+ }
+}
+
+static int vlc_multi_gen(VLC_MULTI_ELEM *table, const VLC *single,
+ const int nb_elems, const int nb_codes, const int numbits,
+ VLCcode *buf, void *logctx)
+{
+ int minbits, maxbits, max = nb_codes-1;
+ unsigned count[VLC_MULTI_MAX_SYMBOLS-1] = { 0, };
+ VLC_MULTI_ELEM info = { { 0, }, 0, };
+
+ minbits = buf[nb_codes-1].bits;
+ maxbits = FFMIN(buf[0].bits, numbits);
+
+ while (max >= nb_codes/2) {
+ if (buf[max].bits+minbits > maxbits)
+ break;
+ max--;
+ }
+
+ for (int j = 0; j < 1<<numbits; j++) {
+ table[j].len = single->table[j].len;
+ table[j].num = single->table[j].len > 0 ? 1 : 0;
+ AV_WN16(table[j].val, single->table[j].sym);
+ }
+
+ add_level(table, nb_elems, nb_codes, numbits, buf,
+ 0, 0, numbits, 0, minbits, max, count, &info);
+
+ av_log(NULL, AV_LOG_DEBUG, "Joint: %d/%d/%d/%d/%d codes min=%ubits max=%u\n",
+ count[0], count[1], count[2], count[3], count[4], minbits, max);
+
+ return 0;
+}
+
+int ff_init_vlc_multi_from_lengths(VLC *vlc, VLC_MULTI *multi, int nb_bits, int nb_elems,
+ int nb_codes, const int8_t *lens, int lens_wrap,
+ const void *symbols, int symbols_wrap, int symbols_size,
+ int offset, int flags, void *logctx)
+{
+ VLCcode localbuf[LOCALBUF_ELEMS], *buf = localbuf;
+ uint64_t code;
+ int ret, j, len_max = FFMIN(32, 3 * nb_bits);
+
+ ret = vlc_common_init(vlc, nb_bits, nb_codes, &buf, flags);
+ if (ret < 0)
+ return ret;
+
+ multi->table = av_malloc(sizeof(*multi->table) << nb_bits);
+ if (!multi->table)
+ return AVERROR(ENOMEM);
+
+ j = code = 0;
+ for (int i = 0; i < nb_codes; i++, lens += lens_wrap) {
+ int len = *lens;
+ if (len > 0) {
+ unsigned sym;
+
+ buf[j].bits = len;
+ if (symbols)
+ GET_DATA(sym, symbols, i, symbols_wrap, symbols_size)
+ else
+ sym = i;
+ buf[j].symbol = sym + offset;
+ buf[j++].code = code;
+ } else if (len < 0) {
+ len = -len;
+ } else
+ continue;
+ if (len > len_max || code & ((1U << (32 - len)) - 1)) {
+ av_log(logctx, AV_LOG_ERROR, "Invalid VLC (length %u)\n", len);
+ goto fail;
+ }
+ code += 1U << (32 - len);
+ if (code > UINT32_MAX + 1ULL) {
+ av_log(logctx, AV_LOG_ERROR, "Overdetermined VLC tree\n");
+ goto fail;
+ }
+ }
+ ret = vlc_common_end(vlc, nb_bits, j, buf, flags, localbuf);
+ if (ret < 0)
+ goto fail;
+ return vlc_multi_gen(multi->table, vlc, nb_elems, j, nb_bits, buf, logctx);
+fail:
+ if (buf != localbuf)
+ av_free(buf);
+ return AVERROR_INVALIDDATA;
+}
+
+void ff_free_vlc_multi(VLC_MULTI *vlc)
+{
+ av_freep(&vlc->table);
+}
+
void ff_free_vlc(VLC *vlc)
{
av_freep(&vlc->table);
diff --git a/libavcodec/vlc.h b/libavcodec/vlc.h
index e63c484755..46063862f6 100644
--- a/libavcodec/vlc.h
+++ b/libavcodec/vlc.h
@@ -21,6 +21,8 @@
#include <stdint.h>
+#define VLC_MULTI_MAX_SYMBOLS 6
+
// When changing this, be sure to also update tableprint_vlc.h accordingly.
typedef int16_t VLCBaseType;
@@ -34,6 +36,17 @@ typedef struct VLC {
int table_size, table_allocated;
} VLC;
+typedef struct VLC_MULTI_ELEM {
+ uint8_t val[VLC_MULTI_MAX_SYMBOLS];
+ int8_t len; // -31,32
+ uint8_t num;
+} VLC_MULTI_ELEM;
+
+typedef struct VLC_MULTI {
+ VLC_MULTI_ELEM *table;
+ int table_size, table_allocated;
+} VLC_MULTI;
+
typedef struct RL_VLC_ELEM {
int16_t level;
int8_t len;
@@ -89,6 +102,46 @@ int ff_init_vlc_from_lengths(VLC *vlc, int nb_bits, int nb_codes,
const void *symbols, int symbols_wrap, int symbols_size,
int offset, int flags, void *logctx);
+/**
+ * Build VLC decoding tables suitable for use with get_vlc_multi()
+ *
+ * This function takes lengths and symbols and calculates the codes from them.
+ * For this the input lengths and symbols have to be sorted according to "left
+ * nodes in the corresponding tree first".
+ *
+ * @param[in,out] vlc The VLC to be initialized; table and table_allocated
+ * must have been set when initializing a static VLC,
+ * otherwise this will be treated as uninitialized.
+ * @param[in,out] multi The VLC_MULTI to be initialized; table and table_allocated
+ * must have been set when initializing a static VLC,
+ * otherwise this will be treated as uninitialized.
+ * @param[in] nb_bits The number of bits to use for the VLC table;
+ * higher values take up more memory and cache, but
+ * allow to read codes with fewer reads.
+ * @param[in] nb_elems The max possible number of elements.
+ * @param[in] nb_codes The number of provided length and (if supplied) symbol
+ * entries.
+ * @param[in] lens The lengths of the codes. Entries > 0 correspond to
+ * valid codes; entries == 0 will be skipped and entries
+ * with len < 0 indicate that the tree is incomplete and
+ * has an open end of length -len at this position.
+ * @param[in] lens_wrap Stride (in bytes) of the lengths.
+ * @param[in] symbols The symbols, i.e. what is returned from get_vlc2()
+ * when the corresponding code is encountered.
+ * May be NULL, then 0, 1, 2, 3, 4,... will be used.
+ * @param[in] symbols_wrap Stride (in bytes) of the symbols.
+ * @param[in] symbols_size Size of the symbols. 1 and 2 are supported.
+ * @param[in] offset An offset to apply to all the valid symbols.
+ * @param[in] flags A combination of the INIT_VLC_* flags; notice that
+ * INIT_VLC_INPUT_LE is pointless and ignored.
+ */
+int ff_init_vlc_multi_from_lengths(VLC *vlc, VLC_MULTI *multi, int nb_bits, int nb_elems,
+ int nb_codes, const int8_t *lens, int lens_wrap,
+ const void *symbols, int symbols_wrap, int symbols_size,
+ int offset, int flags, void *logctx);
+
+
+void ff_free_vlc_multi(VLC_MULTI *vlc);
void ff_free_vlc(VLC *vlc);
/* If INIT_VLC_INPUT_LE is set, the LSB bit of the codes used to
--
2.39.1
[-- Attachment #4: 0003-avcodec-utvideodec-add-vlc-multi-support.patch --]
[-- Type: text/x-patch, Size: 7624 bytes --]
From 4250d74dad2bfb4c8d01fc26c9635c56293fc74c Mon Sep 17 00:00:00 2001
From: Christophe Gisquet <christophe.gisquet@gmail.com>
Date: Sun, 9 Jul 2017 12:56:35 +0000
Subject: [PATCH 3/3] avcodec/utvideodec: add vlc multi support
Faster decoding, by average 50% faster overall.
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
libavcodec/utvideo.h | 1 +
libavcodec/utvideodec.c | 91 ++++++++++++++++++++---------------------
2 files changed, 45 insertions(+), 47 deletions(-)
diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
index 9da9329ff3..d274b6586d 100644
--- a/libavcodec/utvideo.h
+++ b/libavcodec/utvideo.h
@@ -80,6 +80,7 @@ typedef struct UtvideoContext {
ptrdiff_t slice_stride;
uint8_t *slice_bits, *slice_buffer[4];
+ void *buffer;
int slice_bits_size;
const uint8_t *packed_stream[4][256];
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index 1f00c58950..0b0352b7ec 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -46,7 +46,7 @@ typedef struct HuffEntry {
} HuffEntry;
static int build_huff(UtvideoContext *c, const uint8_t *src, VLC *vlc,
- int *fsym, unsigned nb_elems)
+ VLC_MULTI *multi, int *fsym, unsigned nb_elems)
{
int i;
HuffEntry he[1024];
@@ -82,11 +82,34 @@ static int build_huff(UtvideoContext *c, const uint8_t *src, VLC *vlc,
he[--codes_count[bits[i]]] = (HuffEntry) { bits[i], i };
#define VLC_BITS 11
- return ff_init_vlc_from_lengths(vlc, VLC_BITS, codes_count[0],
+ return ff_init_vlc_multi_from_lengths(vlc, multi, VLC_BITS, nb_elems, codes_count[0],
&he[0].len, sizeof(*he),
&he[0].sym, sizeof(*he), 2, 0, 0, c->avctx);
}
+#define READ_PLANE(b, end) \
+{ \
+ buf = !use_pred ? dest : c->buffer; \
+ for (i = 0; CACHED_BITSTREAM_READER && i < width-end && get_bits_left(&gb) > 0;) {\
+ ret = get_vlc_multi(&gb, (uint8_t *)buf + i * b, multi.table, \
+ vlc.table, VLC_BITS, 3); \
+ if (ret > 0) \
+ i += ret; \
+ if (ret <= 0) \
+ goto fail; \
+ } \
+ for (; i < width && get_bits_left(&gb) > 0; i++) \
+ buf[i] = get_vlc2(&gb, vlc.table, VLC_BITS, 3); \
+ if (use_pred) { \
+ if (b == 2) \
+ c->llviddsp.add_left_pred_int16((uint16_t *)dest, (const uint16_t *)buf, 0x3ff, width, prev); \
+ else \
+ c->llviddsp.add_left_pred((uint8_t *)dest, (const uint8_t *)buf, width, prev); \
+ } \
+ prev = dest[width-1]; \
+ dest += stride; \
+}
+
static int decode_plane10(UtvideoContext *c, int plane_no,
uint16_t *dst, ptrdiff_t stride,
int width, int height,
@@ -95,11 +118,12 @@ static int decode_plane10(UtvideoContext *c, int plane_no,
{
int i, j, slice, pix, ret;
int sstart, send;
+ VLC_MULTI multi;
VLC vlc;
GetBitContext gb;
int prev, fsym;
- if ((ret = build_huff(c, huff, &vlc, &fsym, 1024)) < 0) {
+ if ((ret = build_huff(c, huff, &vlc, &multi, &fsym, 1024)) < 0) {
av_log(c->avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n");
return ret;
}
@@ -131,7 +155,7 @@ static int decode_plane10(UtvideoContext *c, int plane_no,
send = 0;
for (slice = 0; slice < c->slices; slice++) {
- uint16_t *dest;
+ uint16_t *dest, *buf;
int slice_data_start, slice_data_end, slice_size;
sstart = send;
@@ -156,37 +180,20 @@ static int decode_plane10(UtvideoContext *c, int plane_no,
init_get_bits(&gb, c->slice_bits, slice_size * 8);
prev = 0x200;
- for (j = sstart; j < send; j++) {
- for (i = 0; i < width; i++) {
- pix = get_vlc2(&gb, vlc.table, VLC_BITS, 3);
- if (pix < 0) {
- av_log(c->avctx, AV_LOG_ERROR, "Decoding error\n");
- goto fail;
- }
- if (use_pred) {
- prev += pix;
- prev &= 0x3FF;
- pix = prev;
- }
- dest[i] = pix;
- }
- dest += stride;
- if (get_bits_left(&gb) < 0) {
- av_log(c->avctx, AV_LOG_ERROR,
- "Slice decoding ran out of bits\n");
- goto fail;
- }
- }
+ for (j = sstart; j < send; j++)
+ READ_PLANE(2, 3)
if (get_bits_left(&gb) > 32)
av_log(c->avctx, AV_LOG_WARNING,
"%d bits left after decoding slice\n", get_bits_left(&gb));
}
ff_free_vlc(&vlc);
+ ff_free_vlc_multi(&multi);
return 0;
fail:
ff_free_vlc(&vlc);
+ ff_free_vlc_multi(&multi);
return AVERROR_INVALIDDATA;
}
@@ -207,6 +214,7 @@ static int decode_plane(UtvideoContext *c, int plane_no,
{
int i, j, slice, pix;
int sstart, send;
+ VLC_MULTI multi;
VLC vlc;
GetBitContext gb;
int ret, prev, fsym;
@@ -259,7 +267,7 @@ static int decode_plane(UtvideoContext *c, int plane_no,
return 0;
}
- if (build_huff(c, src, &vlc, &fsym, 256)) {
+ if (build_huff(c, src, &vlc, &multi, &fsym, 256)) {
av_log(c->avctx, AV_LOG_ERROR, "Cannot build Huffman codes\n");
return AVERROR_INVALIDDATA;
}
@@ -292,7 +300,7 @@ static int decode_plane(UtvideoContext *c, int plane_no,
send = 0;
for (slice = 0; slice < c->slices; slice++) {
- uint8_t *dest;
+ uint8_t *dest, *buf;
int slice_data_start, slice_data_end, slice_size;
sstart = send;
@@ -317,36 +325,20 @@ static int decode_plane(UtvideoContext *c, int plane_no,
init_get_bits(&gb, c->slice_bits, slice_size * 8);
prev = 0x80;
- for (j = sstart; j < send; j++) {
- for (i = 0; i < width; i++) {
- pix = get_vlc2(&gb, vlc.table, VLC_BITS, 3);
- if (pix < 0) {
- av_log(c->avctx, AV_LOG_ERROR, "Decoding error\n");
- goto fail;
- }
- if (use_pred) {
- prev += pix;
- pix = prev;
- }
- dest[i] = pix;
- }
- if (get_bits_left(&gb) < 0) {
- av_log(c->avctx, AV_LOG_ERROR,
- "Slice decoding ran out of bits\n");
- goto fail;
- }
- dest += stride;
- }
+ for (j = sstart; j < send; j++)
+ READ_PLANE(1, 5)
if (get_bits_left(&gb) > 32)
av_log(c->avctx, AV_LOG_WARNING,
"%d bits left after decoding slice\n", get_bits_left(&gb));
}
ff_free_vlc(&vlc);
+ ff_free_vlc_multi(&multi);
return 0;
fail:
ff_free_vlc(&vlc);
+ ff_free_vlc_multi(&multi);
return AVERROR_INVALIDDATA;
}
@@ -992,6 +984,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
return AVERROR_INVALIDDATA;
}
+ c->buffer = av_calloc(avctx->width, c->pro?2:1);
+ if (!c->buffer)
+ return AVERROR(ENOMEM);
+
av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &h_shift, &v_shift);
if ((avctx->width & ((1<<h_shift)-1)) ||
(avctx->height & ((1<<v_shift)-1))) {
@@ -1047,6 +1043,7 @@ static av_cold int decode_end(AVCodecContext *avctx)
UtvideoContext * const c = avctx->priv_data;
av_freep(&c->slice_bits);
+ av_freep(&c->buffer);
return 0;
}
--
2.39.1
[-- Attachment #5: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] MULTI VLC decoding boost
2023-08-28 17:36 [FFmpeg-devel] [PATCH] MULTI VLC decoding boost Paul B Mahol
@ 2023-09-04 16:08 ` Paul B Mahol
2023-09-14 22:01 ` Michael Niedermayer
2023-10-22 18:01 ` Michael Niedermayer
2 siblings, 0 replies; 5+ messages in thread
From: Paul B Mahol @ 2023-09-04 16:08 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Will apply soon.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] MULTI VLC decoding boost
2023-08-28 17:36 [FFmpeg-devel] [PATCH] MULTI VLC decoding boost Paul B Mahol
2023-09-04 16:08 ` Paul B Mahol
@ 2023-09-14 22:01 ` Michael Niedermayer
2023-09-14 22:05 ` Paul B Mahol
2023-10-22 18:01 ` Michael Niedermayer
2 siblings, 1 reply; 5+ messages in thread
From: Michael Niedermayer @ 2023-09-14 22:01 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 2804 bytes --]
On Mon, Aug 28, 2023 at 07:36:17PM +0200, Paul B Mahol wrote:
> Patches attached.
>
> Thanks for kurosu for pointing unmerged branches.
>
> The UNCACHED_PATH for x86_32 is broken with this for 2 codecs it touches.
> Fix is trivial, to be fixed later.
[...]
> +int ff_init_vlc_multi_from_lengths(VLC *vlc, VLC_MULTI *multi, int nb_bits, int nb_elems,
> + int nb_codes, const int8_t *lens, int lens_wrap,
> + const void *symbols, int symbols_wrap, int symbols_size,
> + int offset, int flags, void *logctx)
> +{
> + VLCcode localbuf[LOCALBUF_ELEMS], *buf = localbuf;
> + uint64_t code;
> + int ret, j, len_max = FFMIN(32, 3 * nb_bits);
> +
> + ret = vlc_common_init(vlc, nb_bits, nb_codes, &buf, flags);
> + if (ret < 0)
> + return ret;
> +
> + multi->table = av_malloc(sizeof(*multi->table) << nb_bits);
> + if (!multi->table)
> + return AVERROR(ENOMEM);
> +
> + j = code = 0;
> + for (int i = 0; i < nb_codes; i++, lens += lens_wrap) {
> + int len = *lens;
> + if (len > 0) {
> + unsigned sym;
> +
> + buf[j].bits = len;
> + if (symbols)
> + GET_DATA(sym, symbols, i, symbols_wrap, symbols_size)
> + else
> + sym = i;
> + buf[j].symbol = sym + offset;
> + buf[j++].code = code;
> + } else if (len < 0) {
> + len = -len;
> + } else
> + continue;
> + if (len > len_max || code & ((1U << (32 - len)) - 1)) {
> + av_log(logctx, AV_LOG_ERROR, "Invalid VLC (length %u)\n", len);
> + goto fail;
> + }
> + code += 1U << (32 - len);
> + if (code > UINT32_MAX + 1ULL) {
> + av_log(logctx, AV_LOG_ERROR, "Overdetermined VLC tree\n");
> + goto fail;
> + }
> + }
> + ret = vlc_common_end(vlc, nb_bits, j, buf, flags, localbuf);
> + if (ret < 0)
> + goto fail;
> + return vlc_multi_gen(multi->table, vlc, nb_elems, j, nb_bits, buf, logctx);
> +fail:
> + if (buf != localbuf)
> + av_free(buf);
> + return AVERROR_INVALIDDATA;
> +}
this is copy and pasted from
int ff_vlc_init_from_lengths(VLC *vlc, int nb_bits, int nb_codes,
leading to code duplication, it would be better if you could
factor the duplication out
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
"You are 36 times more likely to die in a bathtub than at the hands of a
terrorist. Also, you are 2.5 times more likely to become a president and
2 times more likely to become an astronaut, than to die in a terrorist
attack." -- Thoughty2
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] MULTI VLC decoding boost
2023-09-14 22:01 ` Michael Niedermayer
@ 2023-09-14 22:05 ` Paul B Mahol
0 siblings, 0 replies; 5+ messages in thread
From: Paul B Mahol @ 2023-09-14 22:05 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Fri, Sep 15, 2023 at 12:01 AM Michael Niedermayer <michael@niedermayer.cc>
wrote:
> On Mon, Aug 28, 2023 at 07:36:17PM +0200, Paul B Mahol wrote:
> > Patches attached.
> >
> > Thanks for kurosu for pointing unmerged branches.
> >
> > The UNCACHED_PATH for x86_32 is broken with this for 2 codecs it touches.
> > Fix is trivial, to be fixed later.
> [...]
>
> > +int ff_init_vlc_multi_from_lengths(VLC *vlc, VLC_MULTI *multi, int
> nb_bits, int nb_elems,
> > + int nb_codes, const int8_t *lens, int
> lens_wrap,
> > + const void *symbols, int
> symbols_wrap, int symbols_size,
> > + int offset, int flags, void *logctx)
> > +{
> > + VLCcode localbuf[LOCALBUF_ELEMS], *buf = localbuf;
> > + uint64_t code;
> > + int ret, j, len_max = FFMIN(32, 3 * nb_bits);
> > +
> > + ret = vlc_common_init(vlc, nb_bits, nb_codes, &buf, flags);
> > + if (ret < 0)
> > + return ret;
> > +
> > + multi->table = av_malloc(sizeof(*multi->table) << nb_bits);
> > + if (!multi->table)
> > + return AVERROR(ENOMEM);
> > +
> > + j = code = 0;
> > + for (int i = 0; i < nb_codes; i++, lens += lens_wrap) {
> > + int len = *lens;
> > + if (len > 0) {
> > + unsigned sym;
> > +
> > + buf[j].bits = len;
> > + if (symbols)
> > + GET_DATA(sym, symbols, i, symbols_wrap, symbols_size)
> > + else
> > + sym = i;
> > + buf[j].symbol = sym + offset;
> > + buf[j++].code = code;
> > + } else if (len < 0) {
> > + len = -len;
> > + } else
> > + continue;
> > + if (len > len_max || code & ((1U << (32 - len)) - 1)) {
> > + av_log(logctx, AV_LOG_ERROR, "Invalid VLC (length %u)\n",
> len);
> > + goto fail;
> > + }
> > + code += 1U << (32 - len);
> > + if (code > UINT32_MAX + 1ULL) {
> > + av_log(logctx, AV_LOG_ERROR, "Overdetermined VLC tree\n");
> > + goto fail;
> > + }
> > + }
> > + ret = vlc_common_end(vlc, nb_bits, j, buf, flags, localbuf);
> > + if (ret < 0)
> > + goto fail;
> > + return vlc_multi_gen(multi->table, vlc, nb_elems, j, nb_bits, buf,
> logctx);
> > +fail:
> > + if (buf != localbuf)
> > + av_free(buf);
> > + return AVERROR_INVALIDDATA;
> > +}
>
> this is copy and pasted from
>
> int ff_vlc_init_from_lengths(VLC *vlc, int nb_bits, int nb_codes,
>
> leading to code duplication, it would be better if you could
> factor the duplication out
>
> thx
>
-1
>
> [...]
>
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> "You are 36 times more likely to die in a bathtub than at the hands of a
> terrorist. Also, you are 2.5 times more likely to become a president and
> 2 times more likely to become an astronaut, than to die in a terrorist
> attack." -- Thoughty2
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] MULTI VLC decoding boost
2023-08-28 17:36 [FFmpeg-devel] [PATCH] MULTI VLC decoding boost Paul B Mahol
2023-09-04 16:08 ` Paul B Mahol
2023-09-14 22:01 ` Michael Niedermayer
@ 2023-10-22 18:01 ` Michael Niedermayer
2 siblings, 0 replies; 5+ messages in thread
From: Michael Niedermayer @ 2023-10-22 18:01 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 2864 bytes --]
On Mon, Aug 28, 2023 at 07:36:17PM +0200, Paul B Mahol wrote:
> Patches attached.
>
> Thanks for kurosu for pointing unmerged branches.
>
[...]
> +static void add_level(VLC_MULTI_ELEM *table, const int nb_elems,
> + const int num, const int numbits,
> + const VLCcode *buf,
> + uint32_t curcode, int curlen,
> + int curlimit, int curlevel,
> + const int minlen, const int max,
> + unsigned* levelcnt, VLC_MULTI_ELEM *info)
> +{
> + if (nb_elems > 256 && curlevel > 2)
> + return; // No room
this and
> + for (int i = num-1; i > max; i--) {
> + for (int j = 0; j < 2; j++) {
> + int newlimit, sym;
> + int t = j ? i-1 : i;
> + int l = buf[t].bits;
> + uint32_t code;
> +
> + sym = buf[t].symbol;
> + if (l > curlimit)
> + return;
> + code = curcode + (buf[t].code >> curlen);
> + newlimit = curlimit - l;
> + l += curlen;
> + if (nb_elems>256) AV_WN16(info->val+2*curlevel, sym);
> + else info->val[curlevel] = sym&0xFF;
> +
> + if (curlevel) { // let's not add single entries
> + uint32_t val = code >> (32 - numbits);
> + uint32_t nb = val + (1U << (numbits - l));
> + info->len = l;
> + info->num = curlevel+1;
> + for (; val < nb; val++)
> + AV_COPY64(table+val, info);
> + levelcnt[curlevel-1]++;
> + }
> +
> + if (curlevel+1 < VLC_MULTI_MAX_SYMBOLS && newlimit >= minlen) {
this are 2 checks doing the same thing for 8 and 16 bit
what mess is this ?
for 8bit we have VLC_MULTI_MAX_SYMBOLS space (6) in the array so we skip beyond that
for 16bit we have VLC_MULTI_MAX_SYMBOLS/2 space which is 3 and the skip instead
is inside add_level() above with hardcoded litteral number
(nb_elems > 256 is a check for if its 8 or 16bit)
why is such totally hacked up code pushed with standing objections and no
review ?
yes, ill fix this one but i have the feeling this code has more surprises
> + add_level(table, nb_elems, num, numbits, buf,
> + code, l, newlimit, curlevel+1,
> + minlen, max, levelcnt, info);
> + }
> + }
> + }
> +}
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Dictatorship: All citizens are under surveillance, all their steps and
actions recorded, for the politicians to enforce control.
Democracy: All politicians are under surveillance, all their steps and
actions recorded, for the citizens to enforce control.
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-10-22 18:01 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-28 17:36 [FFmpeg-devel] [PATCH] MULTI VLC decoding boost Paul B Mahol
2023-09-04 16:08 ` Paul B Mahol
2023-09-14 22:01 ` Michael Niedermayer
2023-09-14 22:05 ` Paul B Mahol
2023-10-22 18:01 ` Michael Niedermayer
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git