From: Paul B Mahol <onemda@gmail.com> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH] tta decoder improvements Date: Wed, 16 Aug 2023 12:47:36 +0200 Message-ID: <CAPYw7P6of-NO0ZgXR8hv1tHZ_T2HJfdjU_xo6LA2Cim88MqPHA@mail.gmail.com> (raw) [-- Attachment #1: Type: text/plain, Size: 16 bytes --] Patch attached. [-- Attachment #2: 0001-avcodec-tta-switch-to-planar-sample-formats.patch --] [-- Type: text/x-patch, Size: 10571 bytes --] From 2b6ac4f7093157533b7f279a78a73bfabeb98cf0 Mon Sep 17 00:00:00 2001 From: Paul B Mahol <onemda@gmail.com> Date: Tue, 15 Aug 2023 21:13:59 +0200 Subject: [PATCH] avcodec/tta: switch to planar sample formats Makes decoding few percent faster. Also fix code style while here. Signed-off-by: Paul B Mahol <onemda@gmail.com> --- libavcodec/tta.c | 167 +++++++++++++++++++++++++++++++---------------- 1 file changed, 109 insertions(+), 58 deletions(-) diff --git a/libavcodec/tta.c b/libavcodec/tta.c index 3e89571f16..6add4106d3 100644 --- a/libavcodec/tta.c +++ b/libavcodec/tta.c @@ -55,7 +55,7 @@ typedef struct TTAContext { unsigned data_length; int frame_length, last_frame_length; - int32_t *decode_buffer; + int32_t **decode_buffer; uint8_t crc_pass[8]; uint8_t *pass; @@ -107,10 +107,16 @@ static int allocate_buffers(AVCodecContext *avctx) TTAContext *s = avctx->priv_data; if (s->bps < 3) { - s->decode_buffer = av_calloc(s->frame_length, - sizeof(*s->decode_buffer) * s->channels); + s->decode_buffer = av_calloc(s->channels, sizeof(*s->decode_buffer)); if (!s->decode_buffer) return AVERROR(ENOMEM); + + for (int ch = 0; ch < s->channels; ch++) { + s->decode_buffer[ch] = av_calloc(s->frame_length, + sizeof(*s->decode_buffer[ch])); + if (!s->decode_buffer[ch]) + return AVERROR(ENOMEM); + } } else s->decode_buffer = NULL; s->ch_ctx = av_malloc_array(avctx->ch_layout.nb_channels, sizeof(*s->ch_ctx)); @@ -181,14 +187,14 @@ static av_cold int tta_decode_init(AVCodecContext * avctx) } switch(s->bps) { - case 1: avctx->sample_fmt = AV_SAMPLE_FMT_U8; break; + case 1: avctx->sample_fmt = AV_SAMPLE_FMT_U8P; break; case 2: - avctx->sample_fmt = AV_SAMPLE_FMT_S16; + avctx->sample_fmt = AV_SAMPLE_FMT_S16P; break; case 3: - avctx->sample_fmt = AV_SAMPLE_FMT_S32; + avctx->sample_fmt = AV_SAMPLE_FMT_S32P; break; - //case 4: avctx->sample_fmt = AV_SAMPLE_FMT_S32; break; + //case 4: avctx->sample_fmt = AV_SAMPLE_FMT_S32P; break; default: av_log(avctx, AV_LOG_ERROR, "Invalid/unsupported sample format.\n"); return AVERROR_INVALIDDATA; @@ -231,10 +237,10 @@ static int tta_decode_frame(AVCodecContext *avctx, AVFrame *frame, const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; TTAContext *s = avctx->priv_data; + const int bps = s->bps; GetBitContext gb; int i, ret; int cur_chan = 0, framelen = s->frame_length; - uint32_t *p; if (avctx->err_recognition & AV_EF_CRCCHECK) { if (buf_size < 4 || @@ -251,14 +257,13 @@ static int tta_decode_frame(AVCodecContext *avctx, AVFrame *frame, return ret; // decode directly to output buffer for 24-bit sample format - if (s->bps == 3) - s->decode_buffer = (int32_t *)frame->data[0]; + if (bps == 3) + s->decode_buffer = (int32_t **)frame->extended_data; // init per channel states for (i = 0; i < s->channels; i++) { TTAFilter *filter = &s->ch_ctx[i].filter; - s->ch_ctx[i].predictor = 0; - ff_tta_filter_init(filter, ff_tta_filter_configs[s->bps-1]); + ff_tta_filter_init(filter, ff_tta_filter_configs[bps-1]); if (s->format == FORMAT_ENCRYPTED) { int i; for (i = 0; i < 8; i++) @@ -268,9 +273,8 @@ static int tta_decode_frame(AVCodecContext *avctx, AVFrame *frame, } i = 0; - for (p = s->decode_buffer; (int32_t*)p < s->decode_buffer + (framelen * s->channels); p++) { - int32_t *predictor = &s->ch_ctx[cur_chan].predictor; - TTAFilter *filter = &s->ch_ctx[cur_chan].filter; + for (int j = 0; j < framelen * s->channels; j++) { + int32_t *p = s->decode_buffer[cur_chan] + i; TTARice *rice = &s->ch_ctx[cur_chan].rice; uint32_t unary, depth, k; int32_t value; @@ -306,44 +310,24 @@ static int tta_decode_frame(AVCodecContext *avctx, AVFrame *frame, rice->sum1 += value - (rice->sum1 >> 4); if (rice->k1 > 0 && rice->sum1 < ff_tta_shift_16[rice->k1]) rice->k1--; - else if(rice->sum1 > ff_tta_shift_16[rice->k1 + 1]) + else if (rice->sum1 > ff_tta_shift_16[rice->k1 + 1]) rice->k1++; value += ff_tta_shift_1[rice->k0]; default: rice->sum0 += value - (rice->sum0 >> 4); if (rice->k0 > 0 && rice->sum0 < ff_tta_shift_16[rice->k0]) rice->k0--; - else if(rice->sum0 > ff_tta_shift_16[rice->k0 + 1]) + else if (rice->sum0 > ff_tta_shift_16[rice->k0 + 1]) rice->k0++; } // extract coded value *p = 1 + ((value >> 1) ^ ((value & 1) - 1)); - // run hybrid filter - s->dsp.filter_process(filter->qm, filter->dx, filter->dl, &filter->error, p, - filter->shift, filter->round); - - // fixed order prediction -#define PRED(x, k) (int32_t)((((uint64_t)(x) << (k)) - (x)) >> (k)) - switch (s->bps) { - case 1: *p += PRED(*predictor, 4); break; - case 2: - case 3: *p += PRED(*predictor, 5); break; - case 4: *p += *predictor; break; - } - *predictor = *p; - // flip channels if (cur_chan < (s->channels-1)) cur_chan++; else { - // decorrelate in case of multiple channels - if (s->channels > 1) { - int32_t *r = p - 1; - for (*p += *r / 2; r > (int32_t*)p - s->channels; r--) - *r = *(r + 1) - *r; - } cur_chan = 0; i++; // check for last frame @@ -354,6 +338,64 @@ static int tta_decode_frame(AVCodecContext *avctx, AVFrame *frame, } } + // run hybrid filter + for (int ch = 0; ch < s->channels; ch++) { + TTAFilter *filter = &s->ch_ctx[ch].filter; + const int32_t shift = filter->shift; + const int32_t round = filter->round; + int32_t *p = s->decode_buffer[ch]; + int32_t error = filter->error; + int32_t *qm = filter->qm; + int32_t *dx = filter->dx; + int32_t *dl = filter->dl; + + for (int n = 0; n < framelen; n++) { + s->dsp.filter_process(qm, dx, dl, + &error, &p[n], + shift, round); + } + } + + // fixed order prediction +#define PRED(x, k) (int32_t)((((uint64_t)(x) << (k)) - (x)) >> (k)) + for (int ch = 0; ch < s->channels; ch++) { + int32_t *p = s->decode_buffer[ch]; + int32_t predictor = 0; + + switch (bps) { + case 1: + for (int n = 0; n < framelen; n++) { + p[n] += PRED(predictor, 4); + predictor = p[n]; + } + break; + case 2: + case 3: + for (int n = 0; n < framelen; n++) { + p[n] += PRED(predictor, 5); + predictor = p[n]; + } + break; + } + } + + // decorrelate in case of multiple channels + if (s->channels > 1) { + int32_t *a = s->decode_buffer[s->channels-1]; + int32_t *b = s->decode_buffer[s->channels-2]; + + for (int n = 0; n < framelen; n++) + a[n] += b[n] / 2; + + for (int ch = s->channels - 1; ch >= 1; ch--) { + int32_t *b = s->decode_buffer[ch-1]; + int32_t *c = s->decode_buffer[ch ]; + + for (int n = 0; n < framelen; n++) + b[n] = c[n] - b[n]; + } + } + align_get_bits(&gb); if (get_bits_left(&gb) < 32) { ret = AVERROR_INVALIDDATA; @@ -362,31 +404,34 @@ static int tta_decode_frame(AVCodecContext *avctx, AVFrame *frame, skip_bits_long(&gb, 32); // frame crc // convert to output buffer - switch (s->bps) { - case 1: { - uint8_t *samples = (uint8_t *)frame->data[0]; - p = s->decode_buffer; - for (i = 0; i < framelen * s->channels; i++) - samples[i] = p[i] + 0x80; - break; + switch (bps) { + case 1: + for (int ch = 0; ch < s->channels; ch++) { + uint8_t *samples = (uint8_t *)frame->extended_data[ch]; + int32_t *p = s->decode_buffer[ch]; + for (i = 0; i < framelen; i++) + samples[i] = p[i] + 0x80; } - case 2: { - int16_t *samples = (int16_t *)frame->data[0]; - p = s->decode_buffer; - for (i = 0; i < framelen * s->channels; i++) - samples[i] = p[i]; break; + case 2: + for (int ch = 0; ch < s->channels; ch++) { + int16_t *samples = (int16_t *)frame->extended_data[ch]; + int32_t *p = s->decode_buffer[ch]; + for (i = 0; i < framelen; i++) + samples[i] = p[i]; } - case 3: { - // shift samples for 24-bit sample format - int32_t *samples = (int32_t *)frame->data[0]; + break; + case 3: + for (int ch = 0; ch < s->channels; ch++) { + // shift samples for 24-bit sample format + int32_t *samples = (int32_t *)frame->extended_data[ch]; - for (i = 0; i < framelen * s->channels; i++) - samples[i] = samples[i] * 256U; + for (i = 0; i < framelen; i++) + samples[i] = samples[i] * 256U; + } // reset decode buffer s->decode_buffer = NULL; break; - } } *got_frame_ptr = 1; @@ -394,16 +439,22 @@ static int tta_decode_frame(AVCodecContext *avctx, AVFrame *frame, return buf_size; error: // reset decode buffer - if (s->bps == 3) + if (bps == 3) s->decode_buffer = NULL; return ret; } -static av_cold int tta_decode_close(AVCodecContext *avctx) { +static av_cold int tta_decode_close(AVCodecContext *avctx) +{ TTAContext *s = avctx->priv_data; - if (s->bps < 3) + if (s->bps < 3) { + if (s->decode_buffer) { + for (int ch = 0; ch < s->channels; ch++) + av_freep(&s->decode_buffer[ch]); + } av_freep(&s->decode_buffer); + } s->decode_buffer = NULL; av_freep(&s->ch_ctx); -- 2.39.1 [-- Attachment #3: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2023-08-16 10:48 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2023-08-16 10:47 Paul B Mahol [this message] 2023-08-16 16:49 ` Michael Niedermayer 2023-08-16 18:14 ` Michael Niedermayer
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=CAPYw7P6of-NO0ZgXR8hv1tHZ_T2HJfdjU_xo6LA2Cim88MqPHA@mail.gmail.com \ --to=onemda@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git