From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> To: ffmpeg-devel@ffmpeg.org Subject: Re: [FFmpeg-devel] [PATCH 3/7] proresdec2: use VLC for level instead of EC switch Date: Fri, 8 Sep 2023 11:58:16 +0200 Message-ID: <AS8P250MB074416FD282155D73239E4368FEDA@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM> (raw) In-Reply-To: <AS8P250MB0744FE441F1B5B9BC63049E68FEDA@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM> Andreas Rheinhardt: > Christophe Gisquet: >> x86/x64: 61/52 -> 55/46 >> Around 7-10% speedup. >> >> Run and DC do not lend themselves to such changes, likely because >> their distribution is less skewed, and need larger average vlc read >> iterations. >> --- >> libavcodec/proresdec.h | 1 + >> libavcodec/proresdec2.c | 77 ++++++++++++++++++++++++++++++++++------- >> 2 files changed, 66 insertions(+), 12 deletions(-) >> >> diff --git a/libavcodec/proresdec.h b/libavcodec/proresdec.h >> index 1e48752e6f..7ebacaeb21 100644 >> --- a/libavcodec/proresdec.h >> +++ b/libavcodec/proresdec.h >> @@ -22,6 +22,7 @@ >> #ifndef AVCODEC_PRORESDEC_H >> #define AVCODEC_PRORESDEC_H >> >> +#define CACHED_BITSTREAM_READER 1 > > This should be in the commit switching to the cached bitstream reader. Correction: This header is included in videotoolbox.c and there is other stuff that also includes get_bits.h included in said file (and currently gets included before proresdec.h). This means that proresdec2.c and videotoolbox.c will have different opinions on what a GetBitContext is: It will be the non-cached one in videotoolbox.c and the cached one in proresdec2.c. This will work in practice, because ProresContext does not need the complete GetBitContext type at all (it does not contain a GetBitContext at all), so offsets are not affected. But it is nevertheless undefined behaviour and could become dangerous when using LTO. So you should switch the type of the pointer to BitstreamContextBE* in proresdec2.h. Furthermore, you can either include bitstream.h in proresdec.h or (IMO better) use a forward declaration and struct BitstreamContextBE* in the function pointer without including get_bits.h in the header at all. > >> #include "get_bits.h" >> #include "blockdsp.h" >> #include "proresdsp.h" >> diff --git a/libavcodec/proresdec2.c b/libavcodec/proresdec2.c >> index 65e8b01755..91c689d9ef 100644 >> --- a/libavcodec/proresdec2.c >> +++ b/libavcodec/proresdec2.c >> @@ -24,17 +24,17 @@ >> * Known FOURCCs: 'apch' (HQ), 'apcn' (SD), 'apcs' (LT), 'apco' (Proxy), 'ap4h' (4444), 'ap4x' (4444 XQ) >> */ >> >> -#define CACHED_BITSTREAM_READER 1 >> +//#define DEBUG >> >> #include "config_components.h" >> >> #include "libavutil/internal.h" >> #include "libavutil/mem_internal.h" >> +#include "libavutil/thread.h" >> >> #include "avcodec.h" >> #include "codec_internal.h" >> #include "decode.h" >> -#include "get_bits.h" >> #include "hwaccel_internal.h" >> #include "hwconfig.h" >> #include "idctdsp.h" >> @@ -129,8 +129,64 @@ static void unpack_alpha_12(GetBitContext *gb, uint16_t *dst, int num_coeffs, >> } >> } >> >> +#define AC_BITS 12 >> +#define PRORES_LEV_BITS 9 >> + >> +static const uint8_t ac_info[] = { 0x04, 0x0A, 0x05, 0x06, 0x28, 0x4C }; >> +static VLC ac_vlc[6]; >> + >> +static av_cold void init_vlcs(void) >> +{ >> + int i; >> + for (i = 0; i < sizeof(ac_info); i++) { > > FF_ARRAY_ELEMS() is cleaner; also we support and prefer declarations > inside for-loops: for (int i = 0; > >> + uint32_t ac_codes[1<<AC_BITS]; >> + uint8_t ac_bits[1<<AC_BITS]; >> + unsigned int rice_order, exp_order, switch_bits, switch_val; >> + int ac, max_bits = 0, codebook = ac_info[i]; >> + >> + /* number of prefix bits to switch between Rice and expGolomb */ >> + switch_bits = (codebook & 3); >> + rice_order = codebook >> 5; /* rice code order */ >> + exp_order = (codebook >> 2) & 7; /* exp golomb code order */ >> + >> + switch_val = (switch_bits+1) << rice_order; >> + >> + // Values are actually transformed, but this is more a wrapping >> + for (ac = 0; ac <1<<AC_BITS; ac++) { >> + int exponent, bits, val = ac; >> + unsigned int code; >> + >> + if (val >= switch_val) { >> + val += (1 << exp_order) - switch_val; >> + exponent = av_log2(val); >> + bits = exponent+1+switch_bits-exp_order/*0*/ + exponent+1/*val*/; >> + code = val; >> + } else if (rice_order) { >> + bits = (val >> rice_order)/*0*/ + 1/*1*/ + rice_order/*val*/; >> + code = (1 << rice_order) | val; >> + } else { >> + bits = val/*0*/ + 1/*1*/; >> + code = 1; >> + } >> + if (bits > max_bits) max_bits = bits; >> + ac_bits [ac] = bits; >> + ac_codes[ac] = code; >> + } >> + >> + ff_free_vlc(ac_vlc+i); > > This is unnecessary, as the VLC is initially blank and is not > initialized multiple times. > >> + >> + if (init_vlc(ac_vlc+i, PRORES_LEV_BITS, 1<<AC_BITS, >> + ac_bits, 1, 1, ac_codes, 4, 4, 0) < 0) { >> + av_log(NULL, AV_LOG_ERROR, "Error for %d(0x%02X), max bits %d\n", >> + i, codebook, max_bits); >> + break; //return AVERROR_BUG; > > This is not how you initialize a static table (you miss the > INIT_VLC_USE_NEW_STATIC flag and don't set the static store buffer). > Search for INIT_VLC_STATIC_OVERLONG for an idea of how to do it. > >> + } >> + } >> +} >> + >> static av_cold int decode_init(AVCodecContext *avctx) >> { >> + static AVOnce init_static_once = AV_ONCE_INIT; >> int ret = 0; >> ProresContext *ctx = avctx->priv_data; >> uint8_t idct_permutation[64]; >> @@ -184,6 +240,9 @@ static av_cold int decode_init(AVCodecContext *avctx) >> >> ctx->pix_fmt = AV_PIX_FMT_NONE; >> >> + // init dc_tables >> + ff_thread_once(&init_static_once, init_vlcs); >> + >> if (avctx->bits_per_raw_sample == 10){ >> ctx->unpack_alpha = unpack_alpha_10; >> } else if (avctx->bits_per_raw_sample == 12){ >> @@ -510,7 +569,7 @@ static av_always_inline int decode_dc_coeffs(GetBitContext *gb, int16_t *out, >> return 0; >> } >> >> -// adaptive codebook switching lut according to previous run/level values >> +// adaptive codebook switching lut according to previous run values >> static const char run_to_cb[16][4] = { >> { 2, 0, -1, 1 }, { 2, 0, -1, 1 }, { 1, 0, 0, 0 }, { 1, 0, 0, 0 }, { 0, 0, 1, -1 }, >> { 1, 1, 1, 0 }, { 1, 1, 1, 0 }, { 1, 1, 1, 0 }, { 1, 1, 1, 0 }, >> @@ -518,12 +577,6 @@ static const char run_to_cb[16][4] = { >> { 0, 2, 3, -4 } >> }; >> >> -static const char lev_to_cb[10][4] = { >> - { 0, 0, 1, -1 }, { 2, 0, 0, -1 }, { 1, 0, 0, 0 }, { 2, 0, -1, 1 }, { 0, 0, 1, -1 }, >> - { 0, 1, 2, -2 }, { 0, 1, 2, -2 }, { 0, 1, 2, -2 }, { 0, 1, 2, -2 }, >> - { 0, 2, 3, -4 } >> -}; >> - >> static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContext *gb, >> int16_t *out, int blocks_per_slice) >> { >> @@ -540,8 +593,9 @@ static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex >> block_mask = blocks_per_slice - 1; >> >> for (pos = block_mask;;) { >> + static const uint8_t ctx_to_tbl[] = { 0, 1, 2, 3, 0, 4, 4, 4, 4, 5 }; >> + const VLC* tbl = ac_vlc + ctx_to_tbl[FFMIN(level, 9)]; >> unsigned int runcb = FFMIN(run, 15); >> - unsigned int levcb = FFMIN(level, 9); >> bits_rem = get_bits_left(gb); >> if (!bits_rem || (bits_rem < 16 && !show_bits(gb, bits_rem))) >> break; >> @@ -554,8 +608,7 @@ static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex >> return AVERROR_INVALIDDATA; >> } >> >> - DECODE_CODEWORD2(level, lev_to_cb[levcb][0], lev_to_cb[levcb][1], >> - lev_to_cb[levcb][2], lev_to_cb[levcb][3]); >> + level = get_vlc2(gb, tbl->table, PRORES_LEV_BITS, 3); >> level += 1; >> >> i = pos >> log2_block_count; > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2023-09-08 9:57 UTC|newest] Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-09-08 8:15 [FFmpeg-devel] [PATCH 1/7] proresdec2: port and fix for cached reader Christophe Gisquet 2023-09-08 8:15 ` [FFmpeg-devel] [PATCH 2/7] proresdec2: store precomputed EC parameters Christophe Gisquet 2023-09-08 8:39 ` Andreas Rheinhardt 2023-09-08 8:15 ` [FFmpeg-devel] [PATCH 3/7] proresdec2: use VLC for level instead of EC switch Christophe Gisquet 2023-09-08 8:44 ` Andreas Rheinhardt 2023-09-08 9:58 ` Andreas Rheinhardt [this message] 2023-09-10 15:28 ` Christophe Gisquet 2023-09-10 15:41 ` Andreas Rheinhardt 2023-09-10 15:56 ` Christophe Gisquet 2023-09-08 8:15 ` [FFmpeg-devel] [PATCH 4/7] proresdec2: offset VLCs by 1 to avoid 1 add Christophe Gisquet 2023-09-08 8:15 ` [FFmpeg-devel] [PATCH 5/7] proresdec2: use VLC for small runs and levels Christophe Gisquet 2023-09-08 8:15 ` [FFmpeg-devel] [PATCH 6/7] proresdec2: remove a useless DC codebook entry Christophe Gisquet 2023-09-08 9:08 ` Andreas Rheinhardt 2023-09-08 8:15 ` [FFmpeg-devel] [PATCH 7/7] prores: use VLC LUTs Christophe Gisquet 2023-09-08 9:20 ` Andreas Rheinhardt 2023-09-08 9:58 ` Christophe Gisquet 2023-09-08 8:20 ` [FFmpeg-devel] [PATCH 1/7] proresdec2: port and fix for cached reader Christophe Gisquet 2023-09-08 8:30 ` Andreas Rheinhardt 2023-09-08 8:34 ` Andreas Rheinhardt 2023-09-11 20:54 ` Christophe Gisquet 2023-09-08 8:36 ` Andreas Rheinhardt 2023-09-08 21:00 ` Michael Niedermayer
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=AS8P250MB074416FD282155D73239E4368FEDA@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM \ --to=andreas.rheinhardt@outlook.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git