From f5257ca9ed821e9fb3dd9edc3487da4d06ba47a3 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Wed, 25 Oct 2023 09:58:24 +0200 Subject: [PATCH 1/4] avcodec/mlpenc: replace naive rematrix with brute-force search Signed-off-by: Paul B Mahol --- libavcodec/mlpenc.c | 183 +++++++++++++++++++++++++++++++------------- 1 file changed, 129 insertions(+), 54 deletions(-) diff --git a/libavcodec/mlpenc.c b/libavcodec/mlpenc.c index 6b801605db..27ef5f2c82 100644 --- a/libavcodec/mlpenc.c +++ b/libavcodec/mlpenc.c @@ -136,7 +136,8 @@ typedef struct MLPEncodeContext { int min_restart_interval; ///< Min interval of access units in between two major frames. int cur_restart_interval; int lpc_coeff_precision; - int rematrix_precision; + int rematrix_search_step; + int rematrix_search_limit; int lpc_type; int lpc_passes; int prediction_order; @@ -1399,79 +1400,150 @@ static void determine_filters(MLPEncodeContext *ctx, MLPSubstream *s) set_filter(ctx, s, ch, 0); } +static int invert2x2(const int32_t *s, int32_t *d) +{ + int64_t det; + + d[0] = +s[3]; + d[1] = -s[1]; + d[2] = -s[2]; + d[3] = +s[0]; + + det = (int64_t)s[0] * d[0] + (int64_t)s[1] * d[2]; + if (det == 0LL) + return -1; + + d[0] = (d[0] * (1LL << 28)) / det; + d[1] = (d[1] * (1LL << 28)) / det; + d[2] = (d[2] * (1LL << 28)) / det; + d[3] = (d[3] * (1LL << 28)) / det; + + return 0; +} + static int estimate_coeff(MLPEncodeContext *ctx, MLPSubstream *s, - MatrixParams *mp, - int ch0, int ch1) + MatrixParams *mp, int ch0, int ch1) { - int32_t maxl = INT32_MIN, maxr = INT32_MIN, minl = INT32_MAX, minr = INT32_MAX; - int64_t summ = 0, sums = 0, suml = 0, sumr = 0, enl = 0, enr = 0; - const int shift = 14 - ctx->rematrix_precision; - int32_t cf0, cf1, e[4], d[4], ml, mr; - int i, count = 0; + const int search_limit = 1 << ctx->rematrix_search_limit; + const int search_step = 1 << ctx->rematrix_search_step; + int32_t best[4], d[4], e[4], count = 0, chan = -1; + uint64_t best_sum = UINT64_MAX; + int32_t v[2], inc; - for (int j = 0; j <= ctx->cur_restart_interval; j++) { - DecodingParams *dp = &s->b[j].decoding_params; - const int32_t *ch[2]; + v[0] = 0; + v[1] = 0; - ch[0] = dp->sample_buffer[ch0]; - ch[1] = dp->sample_buffer[ch1]; + inc = search_step; - for (int i = 0; i < dp->blocksize; i++) { - int32_t lm = ch[0][i], rm = ch[1][i]; + while (1) { + for (int c = 0; c < 2; c++) { + uint64_t sum = 0; + + if (c) { + e[0] = 1 << 14; + e[1] = 0 << 14; + e[2] = v[1]; + e[3] = v[0]; + } else { + e[0] = v[0]; + e[1] = v[1]; + e[2] = 0 << 14; + e[3] = 1 << 14; + } - enl += FFABS(lm); - enr += FFABS(rm); + if (invert2x2(e, d)) { + sum = UINT64_MAX; + goto next; + } - summ += FFABS(lm + rm); - sums += FFABS(lm - rm); + for (int i = 0; i < 4; i++) { + if (d[i] != av_clip_intp2(d[i], 15)) { + sum = UINT64_MAX; + goto next; + } + } - suml += lm; - sumr += rm; + for (int j = 0; j <= ctx->cur_restart_interval; j++) { + DecodingParams *dp = &s->b[j].decoding_params; + const int32_t *ch[2]; - maxl = FFMAX(maxl, lm); - maxr = FFMAX(maxr, rm); + ch[0] = dp->sample_buffer[ch0]; + ch[1] = dp->sample_buffer[ch1]; - minl = FFMIN(minl, lm); - minr = FFMIN(minr, rm); - } - } + for (int i = 0; i < dp->blocksize; i++) { + const int64_t lm = ch[0][i], rm = ch[1][i]; + int64_t lt, rt, v = 0; - summ -= FFABS(suml + sumr); - sums -= FFABS(suml - sumr); + lt = ((lm * e[0]) >> 14) + ((rm * e[1]) >> 14); + rt = ((lm * e[2]) >> 14) + ((rm * e[3]) >> 14); - ml = maxl - minl; - mr = maxr - minr; + if (FFABS(lt) > (1LL << 23) || + FFABS(rt) > (1LL << 23)) { + sum = UINT64_MAX; + goto next; + } - if (!summ && !sums) - return 0; + if (c) + v += FFABS(rt); + else + v += FFABS(lt); + sum += v; + if (sum > best_sum) + goto next; + + if ((((lt * d[0]) >> 14) + ((rt * d[1]) >> 14)) != lm) { + sum = UINT64_MAX; + goto next; + } - if (!ml || !mr) - return 0; + if ((((lt * d[2]) >> 14) + ((rt * d[3]) >> 14)) != rm) { + sum = UINT64_MAX; + goto next; + } + } + } - if ((FFABS(ml) + FFABS(mr)) >= (1 << 24)) - return 0; +next: + if (sum < best_sum) { + chan = c; + best_sum = sum; + memcpy(best, e, sizeof(e)); + } + } - cf0 = (FFMIN(FFABS(mr), FFABS(ml)) * (1LL << 14)) / FFMAX(FFABS(ml), FFABS(mr)); - cf0 = (cf0 >> shift) << shift; - cf1 = -cf0; + v[1] += inc; + + if (v[1] < -search_limit) { + if (v[0] > search_limit) { + v[0] = -search_step; + } else if (v[0] >= 0) { + v[0] += search_step; + } else if (v[0] >= -search_limit) { + v[0] -= search_step; + } else { + break; + } - if (sums > summ) - FFSWAP(int32_t, cf0, cf1); + inc = search_step; + } else if (v[1] > search_limit) { + v[1] = 0; + inc = -search_step; + } - count = 1; - i = enl < enr; - mp->outch[0] = ch0 + i; + if (best_sum == 0ULL) + break; + } - d[!i] = cf0; - d[ i] = 1 << 14; - e[!i] = cf1; - e[ i] = 1 << 14; + if (chan < 0) + return 0; - mp->coeff[0][ch0] = av_clip_intp2(d[0], 15); - mp->coeff[0][ch1] = av_clip_intp2(d[1], 15); + mp->outch[0] = chan; + memcpy(e, best, sizeof(e)); + invert2x2(e, d); + count = 1; - mp->forco[0][ch0] = av_clip_intp2(e[0], 15); - mp->forco[0][ch1] = av_clip_intp2(e[1], 15); + mp->coeff[0][ch0] = d[chan * 2 + 0]; mp->coeff[0][ch1] = d[chan * 2 + 1]; + mp->forco[0][ch0] = e[chan * 2 + 0]; mp->forco[0][ch1] = e[chan * 2 + 1]; return count; } @@ -2060,11 +2132,13 @@ static void set_major_params(MLPEncodeContext *ctx, MLPSubstream *s) for (int index = 0; index < s->b[ctx->restart_intervals-1].seq_size; index++) { memcpy(&s->b[index].major_decoding_params, &s->b[index].decoding_params, sizeof(DecodingParams)); + for (int ch = 0; ch <= rh->max_matrix_channel; ch++) { int8_t shift = s->b[index].decoding_params.output_shift[ch]; max_shift = FFMAX(max_shift, shift); } + for (int ch = rh->min_channel; ch <= rh->max_channel; ch++) { uint8_t huff_lsbs = s->b[index].channel_params[ch].huff_lsbs; @@ -2277,7 +2351,8 @@ static const AVOption mlp_options[] = { { "prediction_order", "Search method for selecting prediction order", OFFSET(prediction_order), AV_OPT_TYPE_INT, {.i64 = ORDER_METHOD_EST }, ORDER_METHOD_EST, ORDER_METHOD_SEARCH, FLAGS, "predm" }, { "estimation", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = ORDER_METHOD_EST }, 0, 0, FLAGS, "predm" }, { "search", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = ORDER_METHOD_SEARCH }, 0, 0, FLAGS, "predm" }, -{ "rematrix_precision", "Rematrix coefficient precision", OFFSET(rematrix_precision), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 14, FLAGS }, +{ "rematrix_limit", "Rematrix search limit precision", OFFSET(rematrix_search_limit), AV_OPT_TYPE_INT, {.i64 = 16 }, 14, 20, FLAGS }, +{ "rematrix_step", "Rematrix search step precision", OFFSET(rematrix_search_step), AV_OPT_TYPE_INT, {.i64 = 10 }, 1, 14, FLAGS }, { NULL }, }; -- 2.42.0