From 920bfa1e105c2c3a8951fc713614e0edf90fdc54 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Sun, 22 Jun 2025 07:37:45 +0200 Subject: [PATCH] avcodec/dfpwmenc: Correctly pad input Before this patch, the DFPWM1a encoder was marked as supporting variable frame sizes. The DFPWM1a format converts eight bytes of input into one output byte and so it simply padded the number of data output by frame->nb_samples * frame->ch_layout.nb_channels / 8 + (frame->nb_samples % 8 > 0 ? 1 : 0) This has several bugs: a) The additional byte leads to eight additional input byte being read; this can read into the frame's padding, i.e. the data can be uninitialized. b) The criterion for whether one should pad is wrong: nb_samples * nb_channels should be tested for divisibility by eight. c) The created frames can be undecodable (at least with our decoder): Our decoder requires the number of bits per frame to divisible by the number of channels, yet the above approach does not guarantee this. d) The padding will be added in the middle of the stream (potentially for every packet). This commit fixes all of this by removing the variable frame size cap and using AVCodecInternal.pad_samples to pad the last frame so that nb_samples * nb_channels is always a multiple of eight. The lavf-dfpwm FATE-test was affected by a). The frames originated from lavfi and were part of an audio frame pool, so that the padding contained data from an earlier (bigger) frame. Now the last frame is properly filled with silence. Reported-by: Paul B Mahol Signed-off-by: Andreas Rheinhardt --- libavcodec/dfpwmenc.c | 11 +++++++---- tests/ref/lavf/dfpwm | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/libavcodec/dfpwmenc.c b/libavcodec/dfpwmenc.c index ee3005fa5c..31aaa96abc 100644 --- a/libavcodec/dfpwmenc.c +++ b/libavcodec/dfpwmenc.c @@ -25,11 +25,11 @@ * DFPWM1a encoder */ -#include "libavutil/internal.h" #include "avcodec.h" #include "codec_id.h" #include "codec_internal.h" #include "encode.h" +#include "internal.h" typedef struct { int fq, q, s, lt; @@ -85,6 +85,10 @@ static av_cold int dfpwm_enc_init(struct AVCodecContext *ctx) state->lt = -128; ctx->bits_per_coded_sample = 1; + // Pad so that nb_samples * nb_channels is always a multiple of eight. + ctx->internal->pad_samples = (const uint8_t[]){ 1, 8, 4, 8, 2, 8, 4, 8 }[ctx->ch_layout.nb_channels & 7]; + if (ctx->frame_size <= 0 || ctx->frame_size * ctx->ch_layout.nb_channels % 8U) + ctx->frame_size = 4096; return 0; } @@ -93,7 +97,7 @@ static int dfpwm_enc_frame(struct AVCodecContext *ctx, struct AVPacket *packet, const struct AVFrame *frame, int *got_packet) { DFPWMState *state = ctx->priv_data; - int size = frame->nb_samples * frame->ch_layout.nb_channels / 8 + (frame->nb_samples % 8 > 0 ? 1 : 0); + int size = frame->nb_samples * frame->ch_layout.nb_channels / 8U; int ret = ff_get_encode_buffer(ctx, packet, size, 0); if (ret) { @@ -112,10 +116,9 @@ const FFCodec ff_dfpwm_encoder = { CODEC_LONG_NAME("DFPWM1a audio"), .p.type = AVMEDIA_TYPE_AUDIO, .p.id = AV_CODEC_ID_DFPWM, + .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, .priv_data_size = sizeof(DFPWMState), .init = dfpwm_enc_init, FF_CODEC_ENCODE_CB(dfpwm_enc_frame), CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_U8), - .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_VARIABLE_FRAME_SIZE | - AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, }; diff --git a/tests/ref/lavf/dfpwm b/tests/ref/lavf/dfpwm index b9423bc1c1..32e2b8f906 100644 --- a/tests/ref/lavf/dfpwm +++ b/tests/ref/lavf/dfpwm @@ -1,3 +1,3 @@ -c216a2b5576f3e7f31516854bbb41eb8 *tests/data/lavf/lavf.dfpwm +5f070e76586d0ddac277ad10dfd43d7e *tests/data/lavf/lavf.dfpwm 5513 tests/data/lavf/lavf.dfpwm -tests/data/lavf/lavf.dfpwm CRC=0x226be6b3 +tests/data/lavf/lavf.dfpwm CRC=0x21dfe683 -- 2.45.2