* [FFmpeg-devel] [PATCH 1/2] libavcodec/flacdec: Implement decoding of 32 bit-per-sample PCM
2022-07-26 19:41 [FFmpeg-devel] [PATCH 0/2] 32bps FLAC patches Martijn van Beurden
@ 2022-07-26 19:41 ` Martijn van Beurden
2022-07-26 19:41 ` [FFmpeg-devel] [PATCH 2/2] libavcodec/flacenc: Implement encoding " Martijn van Beurden
1 sibling, 0 replies; 3+ messages in thread
From: Martijn van Beurden @ 2022-07-26 19:41 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Martijn van Beurden
Add decoding of FLAC files coding for 32 bit-per-sample PCM to libavcodec.
---
libavcodec/flac.c | 4 +-
libavcodec/flacdec.c | 248 ++++++++++++++++++++++++++++++++++++++----
libavcodec/get_bits.h | 12 ++
libavcodec/mathops.h | 9 ++
4 files changed, 250 insertions(+), 23 deletions(-)
diff --git a/libavcodec/flac.c b/libavcodec/flac.c
index dd68830622..f326d8fa5c 100644
--- a/libavcodec/flac.c
+++ b/libavcodec/flac.c
@@ -27,7 +27,7 @@
#include "flac.h"
#include "flacdata.h"
-static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 0 };
+static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 32 };
static const AVChannelLayout flac_channel_layouts[8] = {
AV_CHANNEL_LAYOUT_MONO,
@@ -81,7 +81,7 @@ int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
/* bits per sample */
bps_code = get_bits(gb, 3);
- if (bps_code == 3 || bps_code == 7) {
+ if (bps_code == 3) {
av_log(avctx, AV_LOG_ERROR + log_level_offset,
"invalid sample size code (%d)\n",
bps_code);
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 17f1821c50..403cfa419e 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -63,6 +63,9 @@ typedef struct FLACContext {
int32_t *decoded[FLAC_MAX_CHANNELS]; ///< decoded samples
uint8_t *decoded_buffer;
unsigned int decoded_buffer_size;
+ int64_t *decoded_33bps; ///< decoded samples for a 33 bps subframe
+ uint8_t *decoded_buffer_33bps;
+ unsigned int decoded_buffer_size_33bps;
int buggy_lpc; ///< use workaround for old lavc encoded files
FLACDSPContext dsp;
@@ -154,6 +157,24 @@ static int allocate_buffers(FLACContext *s)
s->flac_stream_info.channels,
s->flac_stream_info.max_blocksize,
AV_SAMPLE_FMT_S32P, 0);
+ if (ret >= 0 && s->flac_stream_info.bps == 32 && s->flac_stream_info.channels == 2) {
+ buf_size = av_samples_get_buffer_size(NULL, 1,
+ s->flac_stream_info.max_blocksize,
+ AV_SAMPLE_FMT_S64P, 0);
+ if (buf_size < 0)
+ return buf_size;
+
+ av_fast_malloc(&s->decoded_buffer_33bps, &s->decoded_buffer_size_33bps, buf_size);
+ if (!s->decoded_buffer)
+ return AVERROR(ENOMEM);
+
+ ret = av_samples_fill_arrays((uint8_t **)&s->decoded_33bps, NULL,
+ s->decoded_buffer_33bps,
+ 1,
+ s->flac_stream_info.max_blocksize,
+ AV_SAMPLE_FMT_S64P, 0);
+
+ }
return ret < 0 ? ret : 0;
}
@@ -331,6 +352,94 @@ static int decode_subframe_fixed(FLACContext *s, int32_t *decoded,
return 0;
}
+static int decode_subframe_fixed_wide(FLACContext *s, int32_t *decoded,
+ int pred_order, int bps)
+{
+ const int blocksize = s->blocksize;
+ int i;
+ int ret;
+
+ /* warm up samples */
+ for (i = 0; i < pred_order; i++) {
+ decoded[i] = get_sbits_long(&s->gb, bps);
+ }
+
+ if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
+ return ret;
+
+ switch (pred_order) {
+ case 0:
+ break;
+ case 1:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] += decoded[i-1];
+ break;
+ case 2:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] = (int64_t)decoded[i] + 2*(int64_t)decoded[i-1] - (int64_t)decoded[i-2];
+ break;
+ case 3:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] = (int64_t)decoded[i] + 3*(int64_t)decoded[i-1] - 3*(int64_t)decoded[i-2] + (int64_t)decoded[i-3];
+ break;
+ case 4:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] = (int64_t)decoded[i] + 4*(int64_t)decoded[i-1] - 6*(int64_t)decoded[i-2] + 4*(int64_t)decoded[i-3] - (int64_t)decoded[i-4];
+ break;
+ default:
+ av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
+ return AVERROR_INVALIDDATA;
+ }
+
+ return 0;
+}
+
+
+static int decode_subframe_fixed_33bps(FLACContext *s, int64_t *decoded,
+ int32_t *residual, int pred_order)
+{
+ const int blocksize = s->blocksize;
+ int i;
+ int ret;
+
+ /* warm up samples */
+ for (i = 0; i < pred_order; i++) {
+ decoded[i] = get_sbits64(&s->gb, 33);
+ av_log(s->avctx, AV_LOG_DEBUG, "warm-up %d = %" PRId64 "\n", i, decoded[i]);
+ }
+
+ if ((ret = decode_residuals(s, residual, pred_order)) < 0)
+ return ret;
+
+ switch (pred_order) {
+ case 0:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] = residual[i];
+ break;
+ case 1:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] = residual[i] + decoded[i-1];
+ break;
+ case 2:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] = residual[i] + 2*decoded[i-1] - decoded[i-2];
+ break;
+ case 3:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] = residual[i] + 3*decoded[i-1] - 3*decoded[i-2] + decoded[i-3];
+ break;
+ case 4:
+ for (i = pred_order; i < blocksize; i++)
+ decoded[i] = residual[i] + 4*decoded[i-1] - 6*decoded[i-2] + 4*decoded[i-3] - decoded[i-4];
+ break;
+ default:
+ av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
+ return AVERROR_INVALIDDATA;
+ }
+
+ return 0;
+}
+
static void lpc_analyze_remodulate(SUINT32 *decoded, const int coeffs[32],
int order, int qlevel, int len, int bps)
{
@@ -402,12 +511,53 @@ static int decode_subframe_lpc(FLACContext *s, int32_t *decoded, int pred_order,
return 0;
}
+static int decode_subframe_lpc_33bps(FLACContext *s, int64_t *decoded,
+ int32_t *residual, int pred_order)
+{
+ int i, j, ret;
+ int coeff_prec, qlevel;
+ int coeffs[32];
+
+ /* warm up samples */
+ for (i = 0; i < pred_order; i++) {
+ decoded[i] = get_sbits64(&s->gb, 33);
+ }
+
+ coeff_prec = get_bits(&s->gb, 4) + 1;
+ if (coeff_prec == 16) {
+ av_log(s->avctx, AV_LOG_ERROR, "invalid coeff precision\n");
+ return AVERROR_INVALIDDATA;
+ }
+ qlevel = get_sbits(&s->gb, 5);
+ if (qlevel < 0) {
+ av_log(s->avctx, AV_LOG_ERROR, "qlevel %d not supported, maybe buggy stream\n",
+ qlevel);
+ return AVERROR_INVALIDDATA;
+ }
+
+ for (i = 0; i < pred_order; i++) {
+ coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec);
+ }
+
+ if ((ret = decode_residuals(s, residual, pred_order)) < 0)
+ return ret;
+
+ for (i = pred_order; i < s->blocksize; i++, decoded++) {
+ int64_t sum = 0;
+ for (j = 0; j < pred_order; j++)
+ sum += (int64_t)coeffs[j] * decoded[j];
+ decoded[j] = residual[i] + (sum >> qlevel);
+ }
+
+ return 0;
+}
+
static inline int decode_subframe(FLACContext *s, int channel)
{
int32_t *decoded = s->decoded[channel];
int type, wasted = 0;
int bps = s->flac_stream_info.bps;
- int i, tmp, ret;
+ int i, ret;
if (channel == 0) {
if (s->ch_mode == FLAC_CHMODE_RIGHT_SIDE)
@@ -436,34 +586,63 @@ static inline int decode_subframe(FLACContext *s, int channel)
wasted = 1 + get_unary(&s->gb, 1, get_bits_left(&s->gb));
bps -= wasted;
}
- if (bps > 32) {
- avpriv_report_missing_feature(s->avctx, "Decorrelated bit depth > 32");
- return AVERROR_PATCHWELCOME;
- }
//FIXME use av_log2 for types
if (type == 0) {
- tmp = get_sbits_long(&s->gb, bps);
- for (i = 0; i < s->blocksize; i++)
- decoded[i] = tmp;
+ if (bps < 33) {
+ int32_t tmp = get_sbits_long(&s->gb, bps);
+ for (i = 0; i < s->blocksize; i++)
+ decoded[i] = tmp;
+ } else {
+ int64_t tmp = get_sbits64(&s->gb, 33);
+ for (i = 0; i < s->blocksize; i++)
+ s->decoded_33bps[i] = tmp;
+ }
} else if (type == 1) {
- for (i = 0; i < s->blocksize; i++)
- decoded[i] = get_sbits_long(&s->gb, bps);
+ if (bps < 33) {
+ for (i = 0; i < s->blocksize; i++)
+ decoded[i] = get_sbits_long(&s->gb, bps);
+ } else {
+ for (i = 0; i < s->blocksize; i++)
+ s->decoded_33bps[i] = get_sbits64(&s->gb, 33);
+ }
} else if ((type >= 8) && (type <= 12)) {
- if ((ret = decode_subframe_fixed(s, decoded, type & ~0x8, bps)) < 0)
- return ret;
+ int order = type & ~0x8;
+ if (bps < 33) {
+ if (bps + order <= 32) {
+ if ((ret = decode_subframe_fixed(s, decoded, order, bps)) < 0)
+ return ret;
+ } else {
+ if ((ret = decode_subframe_fixed_wide(s, decoded, order, bps)) < 0)
+ return ret;
+ }
+ } else {
+ if ((ret = decode_subframe_fixed_33bps(s, s->decoded_33bps, decoded, order)) < 0)
+ return ret;
+ }
} else if (type >= 32) {
- if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0)
- return ret;
+ if (bps < 33) {
+ if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0)
+ return ret;
+ } else {
+ if ((ret = decode_subframe_lpc_33bps(s, s->decoded_33bps, decoded, (type & ~0x20)+1)) < 0)
+ return ret;
+ }
} else {
av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n");
return AVERROR_INVALIDDATA;
}
- if (wasted && wasted < 32) {
- int i;
- for (i = 0; i < s->blocksize; i++)
- decoded[i] = (unsigned)decoded[i] << wasted;
+ if (wasted) {
+ if (wasted+bps == 33) {
+ int i;
+ for (i = 0; i < s->blocksize; i++)
+ s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
+ } else if (wasted < 32) {
+ int i;
+ for (i = 0; i < s->blocksize; i++)
+ decoded[i] = (unsigned)decoded[i] << wasted;
+ }
}
return 0;
@@ -554,6 +733,26 @@ static int decode_frame(FLACContext *s)
return 0;
}
+static void decorrelate_33bps(int ch_mode, int32_t **decoded, int64_t *decoded_33bps, int len)
+{
+ int i;
+ if (ch_mode == FLAC_CHMODE_LEFT_SIDE ) {
+ for (i = 0; i < len; i++)
+ decoded[1][i] = decoded[0][i] - decoded_33bps[i];
+ } else if (ch_mode == FLAC_CHMODE_RIGHT_SIDE ) {
+ for (i = 0; i < len; i++)
+ decoded[0][i] = decoded[1][i] + decoded_33bps[i];
+ } else if (ch_mode == FLAC_CHMODE_MID_SIDE ) {
+ for (i = 0; i < len; i++) {
+ uint64_t a = decoded[0][i];
+ int64_t b = decoded_33bps[i];
+ a -= b >> 1;
+ decoded[0][i] = (a + b);
+ decoded[1][i] = a;
+ }
+ }
+}
+
static int flac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
int *got_frame_ptr, AVPacket *avpkt)
{
@@ -618,9 +817,15 @@ static int flac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
if ((ret = ff_thread_get_buffer(avctx, frame, 0)) < 0)
return ret;
- s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
- s->flac_stream_info.channels,
- s->blocksize, s->sample_shift);
+ if (s->flac_stream_info.bps == 32 && s->ch_mode > 0) {
+ decorrelate_33bps(s->ch_mode, s->decoded, s->decoded_33bps, s->blocksize);
+ s->dsp.decorrelate[0](frame->data, s->decoded, s->flac_stream_info.channels,
+ s->blocksize, s->sample_shift);
+ } else {
+ s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
+ s->flac_stream_info.channels,
+ s->blocksize, s->sample_shift);
+ }
if (bytes_read > buf_size) {
av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", bytes_read - buf_size);
@@ -641,6 +846,7 @@ static av_cold int flac_decode_close(AVCodecContext *avctx)
FLACContext *s = avctx->priv_data;
av_freep(&s->decoded_buffer);
+ av_freep(&s->decoded_buffer_33bps);
return 0;
}
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 992765dc92..52d13b8242 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -596,6 +596,18 @@ static inline int get_sbits_long(GetBitContext *s, int n)
return sign_extend(get_bits_long(s, n), n);
}
+/**
+ * Read 0-64 bits as a signed integer.
+ */
+static inline int64_t get_sbits64(GetBitContext *s, int n)
+{
+ // sign_extend(x, 0) is undefined
+ if (!n)
+ return 0;
+
+ return sign_extend64(get_bits64(s, n), n);
+}
+
/**
* Show 0-32 bits.
*/
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index f81d21f9c4..8a82d9d086 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -137,6 +137,15 @@ static inline av_const int sign_extend(int val, unsigned bits)
}
#endif
+#ifndef sign_extend64
+static inline av_const int64_t sign_extend64(int64_t val, unsigned bits)
+{
+ unsigned shift = 8 * sizeof(int64_t) - bits;
+ union { uint64_t u; int64_t s; } v = { (uint64_t) val << shift };
+ return v.s >> shift;
+}
+#endif
+
#ifndef zero_extend
static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
{
--
2.30.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* [FFmpeg-devel] [PATCH 2/2] libavcodec/flacenc: Implement encoding of 32 bit-per-sample PCM
2022-07-26 19:41 [FFmpeg-devel] [PATCH 0/2] 32bps FLAC patches Martijn van Beurden
2022-07-26 19:41 ` [FFmpeg-devel] [PATCH 1/2] libavcodec/flacdec: Implement decoding of 32 bit-per-sample PCM Martijn van Beurden
@ 2022-07-26 19:41 ` Martijn van Beurden
1 sibling, 0 replies; 3+ messages in thread
From: Martijn van Beurden @ 2022-07-26 19:41 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Martijn van Beurden
Add encoding of 32 bit-per-sample PCM to FLAC files to libavcodec.
Coding to this format is at this point considered experimental and
-strict -2 is needed to get ffmpeg to encode such files.
---
libavcodec/flacenc.c | 499 ++++++++++++++++++++++++++++++++--------
libavcodec/put_bits.h | 7 +
libavcodec/put_golomb.h | 19 +-
3 files changed, 418 insertions(+), 107 deletions(-)
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 9350e42dbc..4ed0a3961a 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -96,6 +96,7 @@ typedef struct FlacSubframe {
typedef struct FlacFrame {
FlacSubframe subframes[FLAC_MAX_CHANNELS];
+ int64_t samples_33bps[FLAC_MAX_BLOCKSIZE];
int blocksize;
int bs_code[2];
uint8_t crc8;
@@ -255,10 +256,23 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
s->bps_code = 4;
break;
case AV_SAMPLE_FMT_S32:
- if (avctx->bits_per_raw_sample != 24)
- av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
- avctx->bits_per_raw_sample = 24;
- s->bps_code = 6;
+ if (avctx->bits_per_raw_sample <= 24) {
+ if (avctx->bits_per_raw_sample < 24)
+ av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
+ avctx->bits_per_raw_sample = 24;
+ s->bps_code = 6;
+ } else if (avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+ av_log(avctx, AV_LOG_WARNING,
+ "encoding as 24 bits-per-sample, more is considered "
+ "experimental. Add -strict %d if you want to encode "
+ "more than 24 bits-per-sample\n",
+ FF_COMPLIANCE_EXPERIMENTAL);
+ avctx->bits_per_raw_sample = 24;
+ s->bps_code = 6;
+ } else {
+ avctx->bits_per_raw_sample = 32;
+ s->bps_code = 7;
+ }
break;
}
@@ -507,8 +521,7 @@ static uint64_t rice_count_exact(const int32_t *res, int n, int k)
uint64_t count = 0;
for (i = 0; i < n; i++) {
- int32_t v = -2 * res[i] - 1;
- v ^= v >> 31;
+ unsigned v = ((unsigned)(res[i]) << 1) ^ (res[i] >> 31);
count += (v >> k) + 1 + k;
}
return count;
@@ -687,8 +700,8 @@ static uint64_t calc_rice_params(RiceContext *rc,
tmp_rc.coding_mode = rc->coding_mode;
- for (i = 0; i < n; i++)
- udata[i] = (2 * data[i]) ^ (data[i] >> 31);
+ for (i = pred_order; i < n; i++)
+ udata[i] = ((unsigned)(data[i]) << 1) ^ (data[i] >> 31);
calc_sum_top(pmax, exact ? kmax : 0, udata, n, pred_order, sums);
@@ -786,6 +799,180 @@ static void encode_residual_fixed(int32_t *res, const int32_t *smp, int n,
}
+static int encode_residual_fixed_with_residual_limit(int32_t *res, const int32_t *smp,
+ int n, int order)
+{
+ /* This function checks for every residual whether it can be
+ * contained in <INT32_MIN,INT32_MAX]. In case it doesn't, the
+ * function that called this function has to try something else */
+ int i;
+ int64_t res64;
+
+ for (i = 0; i < order; i++)
+ res[i] = smp[i];
+
+ if (order == 0) {
+ for (i = order; i < n; i++) {
+ if (smp[i] == INT32_MIN)
+ return 1;
+ res[i] = smp[i];
+ }
+ } else if (order == 1) {
+ for (i = order; i < n; i++) {
+ res64 = (int64_t)smp[i] - smp[i-1];
+ if (res64 <= INT32_MIN || res64 > INT32_MAX)
+ return 1;
+ res[i] = res64;
+ }
+ } else if (order == 2) {
+ for (i = order; i < n; i++) {
+ res64 = (int64_t)smp[i] - 2*(int64_t)smp[i-1] + smp[i-2];
+ if (res64 <= INT32_MIN || res64 > INT32_MAX)
+ return 1;
+ res[i] = res64;
+ }
+ } else if (order == 3) {
+ for (i = order; i < n; i++) {
+ res64 = (int64_t)smp[i] - 3*(int64_t)smp[i-1] + 3*(int64_t)smp[i-2] - smp[i-3];
+ if (res64 <= INT32_MIN || res64 > INT32_MAX)
+ return 1;
+ res[i] = res64;
+ }
+ } else {
+ for (i = order; i < n; i++) {
+ res64 = (int64_t)smp[i] - 4*(int64_t)smp[i-1] + 6*(int64_t)smp[i-2] - 4*(int64_t)smp[i-3] + smp[i-4];
+ if (res64 <= INT32_MIN || res64 > INT32_MAX)
+ return 1;
+ res[i] = res64;
+ }
+ }
+ return 0;
+}
+
+
+static int encode_residual_fixed_with_residual_limit_33bps(int32_t *res, const int64_t *smp,
+ int n, int order)
+{
+ /* This function checks for every residual whether it can be
+ * contained in <INT32_MIN,INT32_MAX]. In case it doesn't, the
+ * function that called this function has to try something else */
+ int i;
+ int64_t res64;
+
+ if (order == 0) {
+ for (i = order; i < n; i++) {
+ if (smp[i] <= INT32_MIN || smp[i] > INT32_MAX)
+ return 1;
+ res[i] = smp[i];
+ }
+ } else if (order == 1) {
+ for (i = order; i < n; i++) {
+ res64 = smp[i] - smp[i-1];
+ if (res64 <= INT32_MIN || res64 > INT32_MAX)
+ return 1;
+ res[i] = res64;
+ }
+ } else if (order == 2) {
+ for (i = order; i < n; i++) {
+ res64 = smp[i] - 2*smp[i-1] + smp[i-2];
+ if (res64 <= INT32_MIN || res64 > INT32_MAX)
+ return 1;
+ res[i] = res64;
+ }
+ } else if (order == 3) {
+ for (i = order; i < n; i++) {
+ res64 = smp[i] - 3*smp[i-1] + 3*smp[i-2] - smp[i-3];
+ if (res64 <= INT32_MIN || res64 > INT32_MAX)
+ return 1;
+ res[i] = res64;
+ }
+ } else {
+ for (i = order; i < n; i++) {
+ res64 = smp[i] - 4*smp[i-1] + 6*smp[i-2] - 4*smp[i-3] + smp[i-4];
+ if (res64 <= INT32_MIN || res64 > INT32_MAX)
+ return 1;
+ res[i] = res64;
+ }
+ }
+ return 0;
+}
+
+
+static int lpc_encode_with_residual_limit(int32_t *res, const int32_t *smp, int len,
+ int order, int32_t *coefs, int shift)
+{
+ /* This function checks for every residual whether it can be
+ * contained in <INT32_MIN,INT32_MAX]. In case it doesn't, the
+ * function that called this function has to try something else */
+ for (int i = 0; i < order; i++)
+ res[i] = smp[i];
+ for (int i = order; i < len; i++) {
+ int64_t p = 0, tmp;
+ for (int j = 0; j < order; j++)
+ p += (int64_t)coefs[j]*smp[(i-1)-j];
+ p >>= shift;
+ tmp = smp[i] - p;
+ if (tmp <= INT32_MIN || tmp > INT32_MAX)
+ return 1;
+ res[i] = tmp;
+ }
+ return 0;
+}
+
+static int lpc_encode_with_residual_limit_33bps(int32_t *res, const int64_t *smp, int len,
+ int order, int32_t *coefs, int shift)
+{
+ /* This function checks for every residual whether it can be
+ * contained in <INT32_MIN,INT32_MAX]. In case it doesn't, the
+ * function that called this function has to try something else */
+ for (int i = order; i < len; i++) {
+ int64_t p = 0, tmp;
+ for (int j = 0; j < order; j++)
+ p += (int64_t)coefs[j]*smp[(i-1)-j];
+ p >>= shift;
+ tmp = smp[i] - p;
+ if (tmp <= INT32_MIN || tmp > INT32_MAX)
+ return 1;
+ res[i] = tmp;
+ }
+ return 0;
+}
+
+static int lpc_encode_choose_datapath(FlacEncodeContext *s, int32_t bps,
+ int32_t *res, const int32_t *smp,
+ const int64_t *smp_33bps, int len,
+ int order, int32_t *coefs, int shift)
+{
+ uint64_t max_residual_value = 0;
+ int64_t max_sample_value = ((int64_t)(1) << (bps-1));
+ /* This calculates the max size of any residual with the current
+ * predictor, so we know whether we need to check the residual */
+ for (int i = 0; i < order; i++)
+ max_residual_value += FFABS(max_sample_value * coefs[i]);
+ max_residual_value >>= shift;
+ max_residual_value += max_sample_value;
+ if (bps > 32) {
+ if (lpc_encode_with_residual_limit_33bps(res, smp_33bps, len, order, coefs, shift))
+ return 1;
+ } else if (max_residual_value > INT32_MAX) {
+ if (lpc_encode_with_residual_limit(res, smp, len, order, coefs, shift))
+ return 1;
+ } else if (bps + s->options.lpc_coeff_precision + av_log2(order) <= 32) {
+ s->flac_dsp.lpc16_encode(res, smp, len, order, coefs, shift);
+ } else {
+ s->flac_dsp.lpc32_encode(res, smp, len, order, coefs, shift);
+ }
+ return 0;
+}
+
+#define DEFAULT_TO_VERBATIM() \
+{ \
+ sub->type = sub->type_code = FLAC_SUBFRAME_VERBATIM; \
+ if (sub->obits <= 32) \
+ memcpy(res, smp, n * sizeof(int32_t)); \
+ return subframe_count_exact(s, sub, 0); \
+}
+
static int encode_residual_ch(FlacEncodeContext *s, int ch)
{
int i, n;
@@ -795,28 +982,38 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
int shift[MAX_LPC_ORDER];
int32_t *res, *smp;
+ int64_t *smp_33bps;
- frame = &s->frame;
- sub = &frame->subframes[ch];
- res = sub->residual;
- smp = sub->samples;
- n = frame->blocksize;
+ frame = &s->frame;
+ sub = &frame->subframes[ch];
+ res = sub->residual;
+ smp = sub->samples;
+ smp_33bps = frame->samples_33bps;
+ n = frame->blocksize;
/* CONSTANT */
- for (i = 1; i < n; i++)
- if(smp[i] != smp[0])
- break;
- if (i == n) {
- sub->type = sub->type_code = FLAC_SUBFRAME_CONSTANT;
- res[0] = smp[0];
- return subframe_count_exact(s, sub, 0);
+ if (sub->obits > 32) {
+ for (i = 1; i < n; i++)
+ if(smp_33bps[i] != smp_33bps[0])
+ break;
+ if (i == n) {
+ sub->type = sub->type_code = FLAC_SUBFRAME_CONSTANT;
+ return subframe_count_exact(s, sub, 0);
+ }
+ } else {
+ for (i = 1; i < n; i++)
+ if(smp[i] != smp[0])
+ break;
+ if (i == n) {
+ sub->type = sub->type_code = FLAC_SUBFRAME_CONSTANT;
+ res[0] = smp[0];
+ return subframe_count_exact(s, sub, 0);
+ }
}
/* VERBATIM */
if (frame->verbatim_only || n < 5) {
- sub->type = sub->type_code = FLAC_SUBFRAME_VERBATIM;
- memcpy(res, smp, n * sizeof(int32_t));
- return subframe_count_exact(s, sub, 0);
+ DEFAULT_TO_VERBATIM();
}
min_order = s->options.min_prediction_order;
@@ -833,15 +1030,32 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
opt_order = 0;
bits[0] = UINT32_MAX;
for (i = min_order; i <= max_order; i++) {
- encode_residual_fixed(res, smp, n, i);
+ if (sub->obits == 33) {
+ if (encode_residual_fixed_with_residual_limit_33bps(res, smp_33bps, n, i))
+ continue;
+ } else if (sub->obits + i >= 32) {
+ if (encode_residual_fixed_with_residual_limit(res, smp, n, i))
+ continue;
+ } else
+ encode_residual_fixed(res, smp, n, i);
bits[i] = find_subframe_rice_params(s, sub, i);
if (bits[i] < bits[opt_order])
opt_order = i;
}
+ if (opt_order == 0 && bits[0] == UINT32_MAX) {
+ /* No predictor found with residuals within <INT32_MIN,INT32_MAX],
+ * so encode a verbatim subframe instead */
+ DEFAULT_TO_VERBATIM();
+ }
sub->order = opt_order;
sub->type_code = sub->type | sub->order;
if (sub->order != max_order) {
- encode_residual_fixed(res, smp, n, sub->order);
+ if (sub->obits == 33)
+ encode_residual_fixed_with_residual_limit_33bps(res, smp_33bps, n, sub->order);
+ else if (sub->obits + i >= 32)
+ encode_residual_fixed_with_residual_limit(res, smp, n, sub->order);
+ else
+ encode_residual_fixed(res, smp, n, sub->order);
find_subframe_rice_params(s, sub, sub->order);
}
return subframe_count_exact(s, sub, sub->order);
@@ -849,6 +1063,14 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
/* LPC */
sub->type = FLAC_SUBFRAME_LPC;
+ if (sub->obits == 33)
+ /* As ff_lpc_calc_coefs is shared with other codecs and the LSB
+ * probably isn't predictable anyway, throw away LSB for analysis
+ * so it fits 32 bit int and existing function can be used
+ * unmodified */
+ for (i = 0; i < n; i++)
+ smp[i] = smp_33bps[i] >> 1;
+
opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, smp, n, min_order, max_order,
s->options.lpc_coeff_precision, coefs, shift, s->options.lpc_type,
s->options.lpc_passes, omethod,
@@ -869,13 +1091,8 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
order = av_clip(order, min_order - 1, max_order - 1);
if (order == last_order)
continue;
- if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(order) <= 32) {
- s->flac_dsp.lpc16_encode(res, smp, n, order+1, coefs[order],
- shift[order]);
- } else {
- s->flac_dsp.lpc32_encode(res, smp, n, order+1, coefs[order],
- shift[order]);
- }
+ if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, i+1, coefs[i], shift[i]))
+ continue;
bits[i] = find_subframe_rice_params(s, sub, order+1);
if (bits[i] < bits[opt_index]) {
opt_index = i;
@@ -889,11 +1106,8 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
opt_order = 0;
bits[0] = UINT32_MAX;
for (i = min_order-1; i < max_order; i++) {
- if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(i) <= 32) {
- s->flac_dsp.lpc16_encode(res, smp, n, i+1, coefs[i], shift[i]);
- } else {
- s->flac_dsp.lpc32_encode(res, smp, n, i+1, coefs[i], shift[i]);
- }
+ if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, i+1, coefs[i], shift[i]))
+ continue;
bits[i] = find_subframe_rice_params(s, sub, i+1);
if (bits[i] < bits[opt_order])
opt_order = i;
@@ -911,11 +1125,8 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
for (i = last-step; i <= last+step; i += step) {
if (i < min_order-1 || i >= max_order || bits[i] < UINT32_MAX)
continue;
- if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(i) <= 32) {
- s->flac_dsp.lpc32_encode(res, smp, n, i+1, coefs[i], shift[i]);
- } else {
- s->flac_dsp.lpc16_encode(res, smp, n, i+1, coefs[i], shift[i]);
- }
+ if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, i+1, coefs[i], shift[i]))
+ continue;
bits[i] = find_subframe_rice_params(s, sub, i+1);
if (bits[i] < bits[opt_order])
opt_order = i;
@@ -952,11 +1163,8 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
if (diffsum >8)
continue;
- if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(opt_order - 1) <= 32) {
- s->flac_dsp.lpc16_encode(res, smp, n, opt_order, lpc_try, shift[opt_order-1]);
- } else {
- s->flac_dsp.lpc32_encode(res, smp, n, opt_order, lpc_try, shift[opt_order-1]);
- }
+ if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, i+1, coefs[i], shift[i]))
+ continue;
score = find_subframe_rice_params(s, sub, opt_order);
if (score < best_score) {
best_score = score;
@@ -973,10 +1181,10 @@ static int encode_residual_ch(FlacEncodeContext *s, int ch)
for (i = 0; i < sub->order; i++)
sub->coefs[i] = coefs[sub->order-1][i];
- if (s->bps_code * 4 + s->options.lpc_coeff_precision + av_log2(opt_order) <= 32) {
- s->flac_dsp.lpc16_encode(res, smp, n, sub->order, sub->coefs, sub->shift);
- } else {
- s->flac_dsp.lpc32_encode(res, smp, n, sub->order, sub->coefs, sub->shift);
+ if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, sub->order, coefs[sub->order-1], shift[sub->order-1])) {
+ /* No predictor found with residuals within <INT32_MIN,INT32_MAX],
+ * so encode a verbatim subframe instead */
+ DEFAULT_TO_VERBATIM();
}
find_subframe_rice_params(s, sub, sub->order);
@@ -1043,54 +1251,88 @@ static int encode_frame(FlacEncodeContext *s)
static void remove_wasted_bits(FlacEncodeContext *s)
{
- int ch, i;
+ int ch, i, wasted_bits;
for (ch = 0; ch < s->channels; ch++) {
FlacSubframe *sub = &s->frame.subframes[ch];
- int32_t v = 0;
- for (i = 0; i < s->frame.blocksize; i++) {
- v |= sub->samples[i];
- if (v & 1)
- break;
- }
+ if (sub->obits > 32) {
+ int64_t v = 0;
+ for (i = 0; i < s->frame.blocksize; i++) {
+ v |= s->frame.samples_33bps[i];
+ if (v & 1)
+ break;
+ }
+
+ if (!v || (v & 1))
+ return;
- if (v && !(v & 1)) {
v = ff_ctz(v);
+ /* If any wasted bits are found, samples are moved
+ * from frame.samples_33bps to frame.subframes[ch] */
for (i = 0; i < s->frame.blocksize; i++)
- sub->samples[i] >>= v;
+ sub->samples[i] = s->frame.samples_33bps[i] >> v;
+ wasted_bits = v;
+ } else {
+ int32_t v = 0;
+ for (i = 0; i < s->frame.blocksize; i++) {
+ v |= sub->samples[i];
+ if (v & 1)
+ break;
+ }
+
+ if (!v || (v & 1))
+ return;
- sub->wasted = v;
- sub->obits -= v;
+ v = ff_ctz(v);
- /* for 24-bit, check if removing wasted bits makes the range better
- suited for using RICE instead of RICE2 for entropy coding */
- if (sub->obits <= 17)
- sub->rc.coding_mode = CODING_MODE_RICE;
+ for (i = 0; i < s->frame.blocksize; i++)
+ sub->samples[i] >>= v;
+ wasted_bits = v;
}
+
+ sub->wasted = wasted_bits;
+ sub->obits -= wasted_bits;
+
+ /* for 24-bit, check if removing wasted bits makes the range better
+ * suited for using RICE instead of RICE2 for entropy coding */
+ if (sub->obits <= 17)
+ sub->rc.coding_mode = CODING_MODE_RICE;
}
}
static int estimate_stereo_mode(const int32_t *left_ch, const int32_t *right_ch, int n,
- int max_rice_param)
+ int max_rice_param, int bps)
{
int i, best;
- int32_t lt, rt;
uint64_t sum[4];
uint64_t score[4];
int k;
/* calculate sum of 2nd order residual for each channel */
sum[0] = sum[1] = sum[2] = sum[3] = 0;
- for (i = 2; i < n; i++) {
- lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2];
- rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2];
- sum[2] += FFABS((lt + rt) >> 1);
- sum[3] += FFABS(lt - rt);
- sum[0] += FFABS(lt);
- sum[1] += FFABS(rt);
+ if(bps < 30) {
+ int32_t lt, rt;
+ for (i = 2; i < n; i++) {
+ lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2];
+ rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2];
+ sum[2] += FFABS((lt + rt) >> 1);
+ sum[3] += FFABS(lt - rt);
+ sum[0] += FFABS(lt);
+ sum[1] += FFABS(rt);
+ }
+ } else {
+ int64_t lt, rt;
+ for (i = 2; i < n; i++) {
+ lt = (int64_t)left_ch[i] - 2*(int64_t)left_ch[i-1] + left_ch[i-2];
+ rt = (int64_t)right_ch[i] - 2*(int64_t)right_ch[i-1] + right_ch[i-2];
+ sum[2] += FFABS((lt + rt) >> 1);
+ sum[3] += FFABS(lt - rt);
+ sum[0] += FFABS(lt);
+ sum[1] += FFABS(rt);
+ }
}
/* estimate bit counts */
for (i = 0; i < 4; i++) {
@@ -1121,12 +1363,14 @@ static void channel_decorrelation(FlacEncodeContext *s)
{
FlacFrame *frame;
int32_t *left, *right;
+ int64_t *side_33bps;
int i, n;
- frame = &s->frame;
- n = frame->blocksize;
- left = frame->subframes[0].samples;
- right = frame->subframes[1].samples;
+ frame = &s->frame;
+ n = frame->blocksize;
+ left = frame->subframes[0].samples;
+ right = frame->subframes[1].samples;
+ side_33bps = frame->samples_33bps;
if (s->channels != 2) {
frame->ch_mode = FLAC_CHMODE_INDEPENDENT;
@@ -1135,29 +1379,49 @@ static void channel_decorrelation(FlacEncodeContext *s)
if (s->options.ch_mode < 0) {
int max_rice_param = (1 << frame->subframes[0].rc.coding_mode) - 2;
- frame->ch_mode = estimate_stereo_mode(left, right, n, max_rice_param);
+ frame->ch_mode = estimate_stereo_mode(left, right, n, max_rice_param, s->avctx->bits_per_raw_sample);
} else
frame->ch_mode = s->options.ch_mode;
/* perform decorrelation and adjust bits-per-sample */
if (frame->ch_mode == FLAC_CHMODE_INDEPENDENT)
return;
- if (frame->ch_mode == FLAC_CHMODE_MID_SIDE) {
- int32_t tmp;
- for (i = 0; i < n; i++) {
- tmp = left[i];
- left[i] = (tmp + right[i]) >> 1;
- right[i] = tmp - right[i];
+ if(s->avctx->bits_per_raw_sample == 32) {
+ if (frame->ch_mode == FLAC_CHMODE_MID_SIDE) {
+ int64_t tmp;
+ for (i = 0; i < n; i++) {
+ tmp = left[i];
+ left[i] = (tmp + right[i]) >> 1;
+ side_33bps[i] = tmp - right[i];
+ }
+ frame->subframes[1].obits++;
+ } else if (frame->ch_mode == FLAC_CHMODE_LEFT_SIDE) {
+ for (i = 0; i < n; i++)
+ side_33bps[i] = (int64_t)left[i] - right[i];
+ frame->subframes[1].obits++;
+ } else {
+ for (i = 0; i < n; i++)
+ side_33bps[i] = (int64_t)left[i] - right[i];
+ frame->subframes[0].obits++;
}
- frame->subframes[1].obits++;
- } else if (frame->ch_mode == FLAC_CHMODE_LEFT_SIDE) {
- for (i = 0; i < n; i++)
- right[i] = left[i] - right[i];
- frame->subframes[1].obits++;
} else {
- for (i = 0; i < n; i++)
- left[i] -= right[i];
- frame->subframes[0].obits++;
+ if (frame->ch_mode == FLAC_CHMODE_MID_SIDE) {
+ int32_t tmp;
+ for (i = 0; i < n; i++) {
+ tmp = left[i];
+ left[i] = (tmp + right[i]) >> 1;
+ right[i] = tmp - right[i];
+ }
+ frame->subframes[1].obits++;
+ } else if (frame->ch_mode == FLAC_CHMODE_LEFT_SIDE) {
+ for (i = 0; i < n; i++)
+ right[i] = left[i] - right[i];
+ frame->subframes[1].obits++;
+ } else {
+ for (i = 0; i < n; i++)
+ left[i] -= right[i];
+ frame->subframes[0].obits++;
+ }
}
}
@@ -1226,14 +1490,38 @@ static void write_subframes(FlacEncodeContext *s)
/* subframe */
if (sub->type == FLAC_SUBFRAME_CONSTANT) {
- put_sbits(&s->pb, sub->obits, res[0]);
+ if(sub->obits == 33)
+ put_sbits64(&s->pb, 33, s->frame.samples_33bps[0]);
+ else if(sub->obits == 32)
+ put_bits32(&s->pb, res[0]);
+ else
+ put_sbits(&s->pb, sub->obits, res[0]);
} else if (sub->type == FLAC_SUBFRAME_VERBATIM) {
- while (res < frame_end)
- put_sbits(&s->pb, sub->obits, *res++);
+ if (sub->obits == 33) {
+ int64_t *res64 = s->frame.samples_33bps;
+ int64_t *frame_end64 = &s->frame.samples_33bps[s->frame.blocksize];
+ while (res64 < frame_end64)
+ put_sbits64(&s->pb, 33, (*res64++));
+ } else if (sub->obits == 32) {
+ while (res < frame_end)
+ put_bits32(&s->pb, *res++);
+ } else {
+ while (res < frame_end)
+ put_sbits(&s->pb, sub->obits, *res++);
+ }
} else {
/* warm-up samples */
- for (i = 0; i < sub->order; i++)
- put_sbits(&s->pb, sub->obits, *res++);
+ if (sub->obits == 33) {
+ for (i = 0; i < sub->order; i++)
+ put_sbits64(&s->pb, 33, s->frame.samples_33bps[i]);
+ res += sub->order;
+ } else if (sub->obits == 32) {
+ for (i = 0; i < sub->order; i++)
+ put_bits32(&s->pb, *res++);
+ } else {
+ for (i = 0; i < sub->order; i++)
+ put_sbits(&s->pb, sub->obits, *res++);
+ }
/* LPC coefficients */
if (sub->type == FLAC_SUBFRAME_LPC) {
@@ -1258,7 +1546,7 @@ static void write_subframes(FlacEncodeContext *s)
int k = sub->rc.params[p];
put_bits(&s->pb, sub->rc.coding_mode, k);
while (res < part_end)
- set_sr_golomb_flac(&s->pb, *res++, k, INT32_MAX, 0);
+ set_sr_golomb_flac(&s->pb, *res++, k);
part_end = FFMIN(frame_end, part_end + psize);
}
}
@@ -1306,7 +1594,7 @@ static int update_md5_sum(FlacEncodeContext *s, const void *samples)
(const uint16_t *) samples, buf_size / 2);
buf = s->md5_buffer;
#endif
- } else {
+ } else if (s->avctx->bits_per_raw_sample <= 24) {
int i;
const int32_t *samples0 = samples;
uint8_t *tmp = s->md5_buffer;
@@ -1316,6 +1604,15 @@ static int update_md5_sum(FlacEncodeContext *s, const void *samples)
AV_WL24(tmp + 3*i, v);
}
buf = s->md5_buffer;
+ } else {
+ /* s->avctx->bits_per_raw_sample <= 32 */
+ int i;
+ const int32_t *samples0 = samples;
+ uint8_t *tmp = s->md5_buffer;
+
+ for (i = 0; i < s->frame.blocksize * s->channels; i++)
+ AV_WL32(tmp + 4*i, samples0[i]);
+ buf = s->md5_buffer;
}
av_md5_update(s->md5ctx, buf, buf_size);
diff --git a/libavcodec/put_bits.h b/libavcodec/put_bits.h
index 4b4f977ad5..11360fe7a4 100644
--- a/libavcodec/put_bits.h
+++ b/libavcodec/put_bits.h
@@ -363,6 +363,13 @@ static inline void put_bits64(PutBitContext *s, int n, uint64_t value)
}
}
+static inline void put_sbits64(PutBitContext *pb, int n, int64_t value)
+{
+ av_assert2(n >= 0 && n <= 64);
+
+ put_bits64(pb, n, (uint64_t)(value) & (~(UINT64_MAX << n)));
+}
+
/**
* Return the pointer to the byte where the bitstream writer will put
* the next bit.
diff --git a/libavcodec/put_golomb.h b/libavcodec/put_golomb.h
index 9ca911fc3c..ca4b182ae8 100644
--- a/libavcodec/put_golomb.h
+++ b/libavcodec/put_golomb.h
@@ -154,15 +154,22 @@ static inline void set_sr_golomb(PutBitContext *pb, int i, int k, int limit,
/**
* write signed golomb rice code (flac).
*/
-static inline void set_sr_golomb_flac(PutBitContext *pb, int i, int k,
- int limit, int esc_len)
+static inline void set_sr_golomb_flac(PutBitContext *pb, int i, int k)
{
- int v;
+ unsigned v, e;
- v = -2 * i - 1;
- v ^= (v >> 31);
+ v = ((unsigned)(i) << 1) ^ (i >> 31);
- set_ur_golomb_jpegls(pb, v, k, limit, esc_len);
+ e = (v >> k) + 1;
+ while (e > 31) {
+ put_bits(pb, 31, 0);
+ e -= 31;
+ }
+ put_bits(pb, e, 1);
+ if (k) {
+ unsigned mask = UINT32_MAX >> (32-k);
+ put_bits(pb, k, v & mask);
+ }
}
#endif /* AVCODEC_PUT_GOLOMB_H */
--
2.30.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread