From 920bfa1e105c2c3a8951fc713614e0edf90fdc54 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 22 Jun 2025 07:37:45 +0200
Subject: [PATCH] avcodec/dfpwmenc: Correctly pad input

Before this patch, the DFPWM1a encoder was marked as supporting
variable frame sizes. The DFPWM1a format converts eight bytes
of input into one output byte and so it simply padded the number
of data output by
frame->nb_samples * frame->ch_layout.nb_channels / 8 +
(frame->nb_samples % 8 > 0 ? 1 : 0)
This has several bugs:
a) The additional byte leads to eight additional input byte being
read; this can read into the frame's padding, i.e. the data can
be uninitialized.
b) The criterion for whether one should pad is wrong:
nb_samples * nb_channels should be tested for divisibility by eight.
c) The created frames can be undecodable (at least with our decoder):
Our decoder requires the number of bits per frame to divisible by
the number of channels, yet the above approach does not guarantee this.
d) The padding will be added in the middle of the stream (potentially
for every packet).

This commit fixes all of this by removing the variable frame size cap
and using AVCodecInternal.pad_samples to pad the last frame so that
nb_samples * nb_channels is always a multiple of eight.
The lavf-dfpwm FATE-test was affected by a). The frames originated from
lavfi and were part of an audio frame pool, so that the padding
contained data from an earlier (bigger) frame. Now the last frame is
properly filled with silence.

Reported-by: Paul B Mahol <onemda@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/dfpwmenc.c | 11 +++++++----
 tests/ref/lavf/dfpwm  |  4 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/libavcodec/dfpwmenc.c b/libavcodec/dfpwmenc.c
index ee3005fa5c..31aaa96abc 100644
--- a/libavcodec/dfpwmenc.c
+++ b/libavcodec/dfpwmenc.c
@@ -25,11 +25,11 @@
  * DFPWM1a encoder
  */
 
-#include "libavutil/internal.h"
 #include "avcodec.h"
 #include "codec_id.h"
 #include "codec_internal.h"
 #include "encode.h"
+#include "internal.h"
 
 typedef struct {
     int fq, q, s, lt;
@@ -85,6 +85,10 @@ static av_cold int dfpwm_enc_init(struct AVCodecContext *ctx)
     state->lt = -128;
 
     ctx->bits_per_coded_sample = 1;
+    // Pad so that nb_samples * nb_channels is always a multiple of eight.
+    ctx->internal->pad_samples = (const uint8_t[]){ 1, 8, 4, 8, 2, 8, 4, 8 }[ctx->ch_layout.nb_channels & 7];
+    if (ctx->frame_size <= 0 || ctx->frame_size * ctx->ch_layout.nb_channels % 8U)
+        ctx->frame_size = 4096;
 
     return 0;
 }
@@ -93,7 +97,7 @@ static int dfpwm_enc_frame(struct AVCodecContext *ctx, struct AVPacket *packet,
     const struct AVFrame *frame, int *got_packet)
 {
     DFPWMState *state = ctx->priv_data;
-    int size = frame->nb_samples * frame->ch_layout.nb_channels / 8 + (frame->nb_samples % 8 > 0 ? 1 : 0);
+    int size = frame->nb_samples * frame->ch_layout.nb_channels / 8U;
     int ret = ff_get_encode_buffer(ctx, packet, size, 0);
 
     if (ret) {
@@ -112,10 +116,9 @@ const FFCodec ff_dfpwm_encoder = {
     CODEC_LONG_NAME("DFPWM1a audio"),
     .p.type          = AVMEDIA_TYPE_AUDIO,
     .p.id            = AV_CODEC_ID_DFPWM,
+    .p.capabilities  = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
     .priv_data_size  = sizeof(DFPWMState),
     .init            = dfpwm_enc_init,
     FF_CODEC_ENCODE_CB(dfpwm_enc_frame),
     CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_U8),
-    .p.capabilities  = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_VARIABLE_FRAME_SIZE |
-                       AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
 };
diff --git a/tests/ref/lavf/dfpwm b/tests/ref/lavf/dfpwm
index b9423bc1c1..32e2b8f906 100644
--- a/tests/ref/lavf/dfpwm
+++ b/tests/ref/lavf/dfpwm
@@ -1,3 +1,3 @@
-c216a2b5576f3e7f31516854bbb41eb8 *tests/data/lavf/lavf.dfpwm
+5f070e76586d0ddac277ad10dfd43d7e *tests/data/lavf/lavf.dfpwm
 5513 tests/data/lavf/lavf.dfpwm
-tests/data/lavf/lavf.dfpwm CRC=0x226be6b3
+tests/data/lavf/lavf.dfpwm CRC=0x21dfe683
-- 
2.45.2