From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 083CE47307 for ; Sun, 19 Jan 2025 20:44:30 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id A2BC468B4B4; Sun, 19 Jan 2025 22:44:27 +0200 (EET) Received: from relay5-d.mail.gandi.net (relay5-d.mail.gandi.net [217.70.183.197]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 20C4568A9C0 for ; Sun, 19 Jan 2025 22:44:21 +0200 (EET) Received: by mail.gandi.net (Postfix) with ESMTPSA id 51D8A1C0003 for ; Sun, 19 Jan 2025 20:44:20 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=niedermayer.cc; s=gm1; t=1737319460; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding; bh=E6mMvXQystwnwFowHLHIR7wdpcZ+GqwbHoBor6H52n8=; b=G5Hx2xKnmsHjFru/98ARnAK7+X+b4XbaQC4r6PGyulP74x4acSJyV//j/Lx3VGzfzitCR7 xMIsUAP/VI3kUB6HpHcPYM8BsFcuxx44cRacT3/92F7W429KBJ4bgdXv+x03RAORU7wALe BYNMtwrh/7+Gh5kHiplE89eUbKrQs5sTlqMrVS+W6nPm+gK+qRfVxb/rPOhHcDcxw9S7uK 0a/PTpjsphxvPpaZ+LNBfO0F5B5z2n0ZulC2RkvzUk4+w1VhDOr9XwLMd8I2VSeGFAt6aZ AFiWtRPbo/91j0iO4R8PZDSCR4/go5irDLGqyZjaC34TpYxdhAoGB7gmPRuevg== From: Michael Niedermayer To: FFmpeg development discussions and patches Date: Sun, 19 Jan 2025 21:44:19 +0100 Message-ID: <20250119204419.4078171-1-michael@niedermayer.cc> X-Mailer: git-send-email 2.48.1 MIME-Version: 1.0 X-GND-Sasl: michael@niedermayer.cc Subject: [FFmpeg-devel] [PATCH] avcodec/ffv1: NOT FOR GIT experiment to store exponent and mantisse seperately X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: Storing them separately performs slightly worse (compression wise) in every variant i tried. It also would likely be slower if someone has ideas, they are welcome This is the result of several hours tuning first the sign bit storage (theres little to gain here as its 1% of the bitstream) then the exponent, after i was done tuning predictor and context for the exponent, i tried combining sign and exponent and it performed better, so this implementation has them combined. With the mantissa then we achieve 534mb while the combined integer coder achieves 531mb without RCT and with RCT 503mb the RCT works with integers not sign/exp/mantissa vectors. But if we apply the RCT as if these where integers the split coder gives us 527mb, so the integer coder + integer RCT with some tweaks still performs best and also is likely faster but ill probably try again after sleeping over this also the qusntization tables need to be attempted to be used with the split coder Signed-off-by: Michael Niedermayer --- libavcodec/ffv1.h | 2 + libavcodec/ffv1enc_template.c | 115 +++++++++++++++++++++++++++------- 2 files changed, 95 insertions(+), 22 deletions(-) diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h index 3254ae1b56b..934c271954b 100644 --- a/libavcodec/ffv1.h +++ b/libavcodec/ffv1.h @@ -106,6 +106,8 @@ typedef struct FFV1SliceContext { }; }; uint16_t fltmap[4][65536]; + + uint8_t newstate[256*256*256*16]; } FFV1SliceContext; typedef struct FFV1Context { diff --git a/libavcodec/ffv1enc_template.c b/libavcodec/ffv1enc_template.c index 4c7cd2e647c..23d6dbe2d05 100644 --- a/libavcodec/ffv1enc_template.c +++ b/libavcodec/ffv1enc_template.c @@ -148,6 +148,9 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc, memset(RENAME(sc->sample_buffer), 0, ring_size * MAX_PLANES * (w + 6) * sizeof(*RENAME(sc->sample_buffer))); + if (!sc->newstate[sizeof(sc->newstate)-1]) + memset(sc->newstate, 128, sizeof(sc->newstate)); + if (sc->remap) { memset(sc->fltmap, 0, sizeof(sc->fltmap)); @@ -221,39 +224,107 @@ static int RENAME(encode_rgb_frame)(FFV1Context *f, FFV1SliceContext *sc, r = *((const uint16_t *)(src[2] + x*2 + stride[2]*y)); } - if (sc->remap) { - r = sc->fltmap[0][r]; - g = sc->fltmap[1][g]; - b = sc->fltmap[2][b]; - if (transparency) - a = sc->fltmap[3][a]; - } + if (!(0x8000&r)) + r ^= 0x7FFF; + if (!(0x8000&g)) + g ^= 0x7FFF; + if (!(0x8000&b)) + b ^= 0x7FFF; - if (sc->slice_coding_mode != 1) { - b -= g; - r -= g; - g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; - b += offset; - r += offset; - } +// if (sc->remap) { +// r = sc->fltmap[0][r]; +// g = sc->fltmap[1][g]; +// b = sc->fltmap[2][b]; +// if (transparency) +// a = sc->fltmap[3][a]; +// } +// +// if (sc->slice_coding_mode != 1) { +// b -= g; +// r -= g; +// g += (b * sc->slice_rct_by_coef + r * sc->slice_rct_ry_coef) >> 2; +// b += offset; +// r += offset; +// } sample[0][0][x] = g; sample[1][0][x] = b; sample[2][0][x] = r; sample[3][0][x] = a; } + +// Using the exponent context for the sign helps alot for the sign compression +// Combining sign and exponent simply performs better than seperate sign and exponent + for (p = 0; p < 3 + transparency; p++) { - int ret; sample[p][0][-1] = sample[p][1][0 ]; sample[p][1][ w] = sample[p][1][w-1]; - if (lbd && sc->slice_coding_mode == 0) - ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], (p + 1) / 2, 9, ac, pass1); - else - ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], (p + 1) / 2, - bits + (sc->slice_coding_mode != 1), ac, pass1); - if (ret < 0) - return ret; + for (x = 0; x < w; x++) { + uint8_t * state = sc->newstate; + int X = sample[p][0][x]; + int L = sample[p][0][x-1]; + int LL= sample[p][0][x-2]; + int TT= sample[p][2][x]; + int T = sample[p][1][x]; + int TL= sample[p][1][x-1]; + int TR= sample[p][1][x+1]; +// int sign_ctx = (sample[p][0][x-1]>>15) + 2*(sample[p][1][x]>>15) + 4*p; +// put_rac(&sc->c, sc->newstate + sign_ctx, sample[p][0][x] >> 15); + + int sign_ctx = (L>>15) + (T>>15) + 3*p; +// put_rac(&sc->c, state + sign_ctx, X >> 15); +// state += 4*4; + + int LE = (L >>10)&63; + int LLE= (LL>>10)&31; + int TE = (T >>10)&63; + int TTE= (TT>>10)&31; + int TLE= (TL>>10)&31; + int TRE= (TR>>10)&31; + int pred = (LE+TE)/2; +// int pred = mid_pred(LE, TE, LE + TE - TLE); + int diff = ((X>>10)&63) - pred; + int exp_ctx = (TLE&1) + 8*(LE) + 8*(TE) + 2*(TRE&1) + 4*(LE&1) + 8*128*p; +// put_rac(&sc->c, state + exp_ctx + 32*8*64*4*4, X >> 15); + put_symbol_inline(&sc->c, state + 32*exp_ctx, diff, 1, NULL, NULL); + + state += 32*8*128*4; + + int LM = (L &1023); + int LLM= (LL &1023); + int TM = (TL &1023); + int TTM= (TT &1023); + int TLM= (TL &1023); + int TRM= (TR &1023); + pred = (TM+LM)/2; //bad + pred = ((L+T)/2)&1023; +// pred = ((L+T-TL))&1023; + pred = mid_pred(L,T,L+T-TL)&1023; + + int mant_ctx = 2*(LE) + 2*(TE) + 2*128*p; //we loose 0.1% by not including LE&1 but it halfs context space +// mant_ctx += 2*128*4*((LM>>9) + 2*(TM>>9)) ; + mant_ctx = (TLE&1) + 8*(LE) + 8*(TE) + 2*(TRE&1) + 4*(LE&1) + 8*128*p; + mant_ctx += 8*128*4*((TM>>8) + 4*(LM>>8)); + + diff = fold((X&1023) - pred, 10); + put_symbol_inline(&sc->c, state + 32*mant_ctx, diff, 1, NULL, NULL); + } } + + + +// for (p = 0; p < 3 + transparency; p++) { +// int ret; +// sample[p][0][-1] = sample[p][1][0 ]; +// sample[p][1][ w] = sample[p][1][w-1]; +// if (lbd && sc->slice_coding_mode == 0) +// ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], (p + 1) / 2, 9, ac, pass1); +// else +// ret = RENAME(encode_line)(f, sc, f->avctx, w, sample[p], (p + 1) / 2, +// bits + (sc->slice_coding_mode != 1), ac, pass1); +// if (ret < 0) +// return ret; +// } } return 0; } -- 2.48.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".