* [FFmpeg-devel] [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb, code
@ 2025-03-12 3:10 Andreas Rheinhardt
2025-03-12 4:50 ` Lynne
0 siblings, 1 reply; 4+ messages in thread
From: Andreas Rheinhardt @ 2025-03-12 3:10 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 29 bytes --]
Patches attached.
- Andreas
[-- Attachment #2: 0001-avcodec-vc2enc-Use-LUT-to-assemble-interleaved-golom.patch --]
[-- Type: text/x-patch, Size: 2977 bytes --]
From 362a2cdad8717c016cd05e8d782260bd1aa0751a Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 12 Mar 2025 03:26:09 +0100
Subject: [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb
code
Up until now, the encoder processed only one bit at a time.
With this patch, it is eight bits.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/vc2enc.c | 45 +++++++++++++++++++++++++++++++++------------
1 file changed, 33 insertions(+), 12 deletions(-)
diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c
index 4728a48938..1fe973f4cd 100644
--- a/libavcodec/vc2enc.c
+++ b/libavcodec/vc2enc.c
@@ -22,6 +22,7 @@
#include "libavutil/mem.h"
#include "libavutil/pixdesc.h"
#include "libavutil/opt.h"
+#include "libavutil/thread.h"
#include "libavutil/version.h"
#include "codec_internal.h"
#include "dirac.h"
@@ -186,22 +187,39 @@ typedef struct VC2EncContext {
enum DiracParseCodes last_parse_code;
} VC2EncContext;
+/// x_k x_{k-1} ... x_0 -> 0 x_k 0 x_{k - 1} ... 0 x_0
+static uint16_t interleaved_ue_golomb_tab[256];
+/// 1 x_{k-1} ... x_0 -> 0 0 0 x_{k - 1} ... 0 x_0
+static uint16_t top_interleaved_ue_golomb_tab[256];
+/// 1 x_{k-1} ... x_0 -> 2 * k
+static uint8_t golomb_len_tab[256];
+
+static av_cold void vc2_init_static_data(void)
+{
+ interleaved_ue_golomb_tab[1] = 1;
+ for (unsigned i = 2; i < 256; ++i) {
+ golomb_len_tab[i] = golomb_len_tab[i >> 1] + 2;
+ interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1);
+ top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]);
+ }
+}
+
static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
{
- int i;
- int bits = av_log2(++val);
- unsigned topbit = 1 << bits;
- uint64_t pbits = 0;
-
- for (i = 0; i < bits; i++) {
- topbit >>= 1;
- av_assert2(pbits <= UINT64_MAX>>3);
- pbits <<= 2;
- if (val & topbit)
- pbits |= 0x1;
+ uint64_t pbits = 1;
+ int bits = 1;
+
+ ++val;
+
+ while (val >> 8) {
+ pbits |= (uint64_t)interleaved_ue_golomb_tab[val & 0xff] << bits;
+ val >>= 8;
+ bits += 16;
}
+ pbits |= (uint64_t)top_interleaved_ue_golomb_tab[val] << bits;
+ bits += golomb_len_tab[val];
- put_bits63(pb, 2 * bits + 1, (pbits << 1) | 1);
+ put_bits63(pb, bits, pbits);
}
static av_always_inline int count_vc2_ue_uint(uint32_t val)
@@ -1003,6 +1021,7 @@ static av_cold int vc2_encode_end(AVCodecContext *avctx)
static av_cold int vc2_encode_init(AVCodecContext *avctx)
{
+ static AVOnce init_static_once = AV_ONCE_INIT;
Plane *p;
SubBand *b;
int i, level, o, shift;
@@ -1165,6 +1184,8 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx)
}
}
+ ff_thread_once(&init_static_once, vc2_init_static_data);
+
return 0;
}
--
2.45.2
[-- Attachment #3: 0002-avcodec-vc2enc-Avoid-excessive-inlining.patch --]
[-- Type: text/x-patch, Size: 1832 bytes --]
From 389a64c00bc8244186db1abb25be8dd5ec452df7 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Wed, 12 Mar 2025 03:56:03 +0100
Subject: [PATCH 2/2] avcodec/vc2enc: Avoid excessive inlining
There is no reason to inline put_vc2_ue_uint() everywhere;
only one call site is actually hot: The one in encode_subband()
(which accounts for 35735040 of 35739495 calls to said function
in a FATE run). Uninline all the others.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/vc2enc.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c
index 1fe973f4cd..d05df64911 100644
--- a/libavcodec/vc2enc.c
+++ b/libavcodec/vc2enc.c
@@ -204,7 +204,7 @@ static av_cold void vc2_init_static_data(void)
}
}
-static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
+static av_always_inline void put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val)
{
uint64_t pbits = 1;
int bits = 1;
@@ -222,6 +222,11 @@ static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
put_bits63(pb, bits, pbits);
}
+static av_noinline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
+{
+ put_vc2_ue_uint_inline(pb, val);
+}
+
static av_always_inline int count_vc2_ue_uint(uint32_t val)
{
return 2 * av_log2(val + 1) + 1;
@@ -545,7 +550,7 @@ static void encode_subband(const VC2EncContext *s, PutBitContext *pb,
for (y = top; y < bottom; y++) {
for (x = left; x < right; x++) {
uint32_t c_abs = QUANT(FFABS(coeff[x]), q_m, q_a, q_s);
- put_vc2_ue_uint(pb, c_abs);
+ put_vc2_ue_uint_inline(pb, c_abs);
if (c_abs)
put_bits(pb, 1, coeff[x] < 0);
}
--
2.45.2
[-- Attachment #4: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb, code
2025-03-12 3:10 [FFmpeg-devel] [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb, code Andreas Rheinhardt
@ 2025-03-12 4:50 ` Lynne
2025-03-12 5:27 ` Andreas Rheinhardt
0 siblings, 1 reply; 4+ messages in thread
From: Lynne @ 2025-03-12 4:50 UTC (permalink / raw)
To: ffmpeg-devel
[-- Attachment #1.1.1.1: Type: text/plain, Size: 742 bytes --]
On 12/03/2025 04:10, Andreas Rheinhardt wrote:
> Patches attached.
>
> - Andreas
First patch is wild, its surprising no one considered inverting the way
decoder parses codes for an encoder yet.
Rather than ORing and using put_bits63, I think it would make more sense
to write out each chunk using put_bits sequentially. It might be
possible to reverse the lookups such that you get the MSBs first so you
wouldn't need to reverse them out of place in a small array.
But either way, LGTM. Feel free to explore this in a follow-up.
Second patch seems a bit pointless. It's just one single call you're
uninlining? Chasing to save a few extra bytes of binary surely don't
deserve having a wrapper function for uninlining.
[-- Attachment #1.1.1.2: OpenPGP public key --]
[-- Type: application/pgp-keys, Size: 637 bytes --]
[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 236 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb, code
2025-03-12 4:50 ` Lynne
@ 2025-03-12 5:27 ` Andreas Rheinhardt
2025-03-12 13:46 ` Lynne
0 siblings, 1 reply; 4+ messages in thread
From: Andreas Rheinhardt @ 2025-03-12 5:27 UTC (permalink / raw)
To: ffmpeg-devel
Lynne:
> On 12/03/2025 04:10, Andreas Rheinhardt wrote:
>> Patches attached.
>>
>> - Andreas
>
> First patch is wild, its surprising no one considered inverting the way
> decoder parses codes for an encoder yet.
I didn't even look at the decoder.
(It is actually surprising that it took until
512e597932dfe05cf5665192efbe2c93c2e36af2 for the original code to be
improved.)
> Rather than ORing and using put_bits63, I think it would make more sense
> to write out each chunk using put_bits sequentially. It might be
> possible to reverse the lookups such that you get the MSBs first so you
> wouldn't need to reverse them out of place in a small array.
> But either way, LGTM. Feel free to explore this in a follow-up.
I don't think that writing them sequentially will improve anything: In
order to be able to use a LUT, I would have to shift the bits starting
with the MSBs into position; and then there would be the internal shifts
and checks inside put_bits().
Apart from that: put_bits63() is the same as put_bits() when BUF_BITS is
64 (see ede2b391cc516f4f93621f6a214b3410b231f582).
>
> Second patch seems a bit pointless. It's just one single call you're
> uninlining? Chasing to save a few extra bytes of binary surely don't
> deserve having a wrapper function for uninlining.
>
I am uninlining all calls besides the hot one. 31 callsites.
For GCC, this reduced codesize 2c36 to 25b1 (15% saved), for clang from
4b08 to 3338 (32% saved).
- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [FFmpeg-devel] [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb, code
2025-03-12 5:27 ` Andreas Rheinhardt
@ 2025-03-12 13:46 ` Lynne
0 siblings, 0 replies; 4+ messages in thread
From: Lynne @ 2025-03-12 13:46 UTC (permalink / raw)
To: ffmpeg-devel
On 12/03/2025 06:27, Andreas Rheinhardt wrote:
> Lynne:
>> On 12/03/2025 04:10, Andreas Rheinhardt wrote:
>>> Patches attached.
>>>
>>> - Andreas
>>
>> First patch is wild, its surprising no one considered inverting the way
>> decoder parses codes for an encoder yet.
>
> I didn't even look at the decoder.
> (It is actually surprising that it took until
> 512e597932dfe05cf5665192efbe2c93c2e36af2 for the original code to be
> improved.)
>
>> Rather than ORing and using put_bits63, I think it would make more sense
>> to write out each chunk using put_bits sequentially. It might be
>> possible to reverse the lookups such that you get the MSBs first so you
>> wouldn't need to reverse them out of place in a small array.
>> But either way, LGTM. Feel free to explore this in a follow-up.
>
> I don't think that writing them sequentially will improve anything: In
> order to be able to use a LUT, I would have to shift the bits starting
> with the MSBs into position; and then there would be the internal shifts
> and checks inside put_bits().
> Apart from that: put_bits63() is the same as put_bits() when BUF_BITS is
> 64 (see ede2b391cc516f4f93621f6a214b3410b231f582).
>
>>
>> Second patch seems a bit pointless. It's just one single call you're
>> uninlining? Chasing to save a few extra bytes of binary surely don't
>> deserve having a wrapper function for uninlining.
>>
>
> I am uninlining all calls besides the hot one. 31 callsites.
> For GCC, this reduced codesize 2c36 to 25b1 (15% saved), for clang from
> 4b08 to 3338 (32% saved).
Oh, it was late and I didn't read carefully.
Both patches LGTM.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-03-12 13:46 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-12 3:10 [FFmpeg-devel] [PATCH 1/2] avcodec/vc2enc: Use LUT to assemble interleaved golomb, code Andreas Rheinhardt
2025-03-12 4:50 ` Lynne
2025-03-12 5:27 ` Andreas Rheinhardt
2025-03-12 13:46 ` Lynne
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git