[FFmpeg-devel] [PATCH] avcodec/aacenc: add bitrate threshold for PNS and improve attack detection (PR #20815)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* [FFmpeg-devel] [PATCH] avcodec/aacenc: add bitrate threshold for PNS and improve attack detection (PR #20815)
@ 2025-11-01 21:52 Agent45 via ffmpeg-devel
  0 siblings, 0 replies; only message in thread
From: Agent45 via ffmpeg-devel @ 2025-11-01 21:52 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Agent45

PR #20815 opened by Agent45
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20815
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20815.patch

- Fixes indexing errors in attack detection logic and introduces a state flag (next_attack0_zero) to stabilize attack[0] prediction across frames. This reduces vertical line artifacts in periodic signals such as trumpet.
- Changes PSY_LAME_NUM_SUBBLOCKS from 3 to 2 to ensure full coverage of all 1024 MDCT samples, with each subblock containing exactly 64 samples—matching LAME’s empirical design. And adjust attack threshold presets. This improves the handling of periodic signals, especially under low bitrate conditions.
- Disables PNS when the per-channel bitrate exceeds 64 kbps. This avoids unnecessary noise substitution in high-bitrate scenarios where it may degrade quality.
This resolves issue #20200.


From 458a942481151ede27478e6ac6d9d2866d438b84 Mon Sep 17 00:00:00 2001
From: Agent45 <agent45@noreply.code.ffmpeg.org>
Date: Sat, 1 Nov 2025 19:49:05 +0000
Subject: [PATCH 1/2] avcodec/aacenc: add bitrate threshold for PNS

---
 libavcodec/aaccoder.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 7f1c4cdcc1..ddebdfd53d 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -58,6 +58,9 @@
  * replace low energy non zero bands */
 #define NOISE_LAMBDA_REPLACE 1.948f
 
+/* Bitrate threshold (in bits/sec/channel) above which PNS is disabled. */
+#define PNS_BITRATE_LIMIT 64000.0f
+
 #include "libavcodec/aaccoder_trellis.h"
 
 typedef float (*quantize_and_encode_band_func)(struct AACEncContext *s, PutBitContext *pb,
@@ -513,6 +516,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
         ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
         : (avctx->bit_rate / avctx->ch_layout.nb_channels);
 
+	int pns_at_low_bitrate = frame_bit_rate < PNS_BITRATE_LIMIT;
     frame_bit_rate *= 1.15f;
 
     if (avctx->cutoff > 0) {
@@ -536,7 +540,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
             const int start = wstart+sce->ics.swb_offset[g];
             const float freq = (start-wstart)*freq_mult;
             const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
-            if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
+            if (!pns_at_low_bitrate || freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
                 if (!sce->zeroes[w*16+g])
                     prev_sf = sce->sf_idx[w*16+g];
                 continue;
@@ -649,6 +653,7 @@ static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelEleme
         ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
         : (avctx->bit_rate / avctx->ch_layout.nb_channels);
 
+	int pns_at_low_bitrate = frame_bit_rate < PNS_BITRATE_LIMIT;
     frame_bit_rate *= 1.15f;
 
     if (avctx->cutoff > 0) {
@@ -667,7 +672,7 @@ static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelEleme
             const int start = sce->ics.swb_offset[g];
             const float freq = start*freq_mult;
             const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
-            if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
+            if (!pns_at_low_bitrate || freq < NOISE_LOW_LIMIT || start >= cutoff) {
                 sce->can_pns[w*16+g] = 0;
                 continue;
             }
-- 
2.49.1


From 81a985d6a92eb411ba495fb05a3a962d181649ea Mon Sep 17 00:00:00 2001
From: Agent45 <agent45@noreply.code.ffmpeg.org>
Date: Sat, 1 Nov 2025 20:41:24 +0000
Subject: [PATCH 2/2] avcodec/aacpsy: fix attack detection logic and subblock
 indexing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix several indexing errors in attack detection logic and refine transient handling in the AAC psychoacoustic model.

- Change PSY_LAME_NUM_SUBBLOCKS from 3 to 2 to ensure full coverage of all 1024 MDCT samples, with each subblock containing exactly 1024 / (8 * 2) = 64 samples—matching LAME’s empirical design.
- Introduce next_attack0_zero state flag to stabilize attack[0] prediction across frames.
- Adjust attack threshold presets.

These changes improve the handling of periodic signals such as trumpet, especially under low bitrate conditions.
---
 libavcodec/aacpsy.c | 64 +++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c
index ed03cb68ac..f91ba45a52 100644
--- a/libavcodec/aacpsy.c
+++ b/libavcodec/aacpsy.c
@@ -97,7 +97,7 @@ enum {
 #define AAC_BLOCK_SIZE_LONG 1024    ///< long block size
 #define AAC_BLOCK_SIZE_SHORT 128    ///< short block size
 #define AAC_NUM_BLOCKS_SHORT 8      ///< number of blocks in a short sequence
-#define PSY_LAME_NUM_SUBBLOCKS 3    ///< Number of sub-blocks in each short block
+#define PSY_LAME_NUM_SUBBLOCKS 2    ///< Number of sub-blocks in each short block
 
 /**
  * @}
@@ -133,6 +133,7 @@ typedef struct AacPsyChannel{
     float attack_threshold;              ///< attack threshold for this channel
     float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];
     int   prev_attack;                   ///< attack value for the last short block in the previous sequence
+    int   next_attack0_zero;          ///< whether attack[0] of the next frame is zero
 }AacPsyChannel;
 
 /**
@@ -181,19 +182,19 @@ typedef struct PsyLamePreset {
 static const PsyLamePreset psy_abr_map[] = {
 /* TODO: Tuning. These were taken from LAME. */
 /* kbps/ch st_lrm   */
-    {  8,  6.60},
-    { 16,  6.60},
-    { 24,  6.60},
-    { 32,  6.60},
-    { 40,  6.60},
-    { 48,  6.60},
-    { 56,  6.60},
-    { 64,  6.40},
-    { 80,  6.00},
-    { 96,  5.60},
-    {112,  5.20},
-    {128,  5.20},
-    {160,  5.20}
+    {  8,  7.60},
+    { 16,  7.60},
+    { 24,  7.60},
+    { 32,  7.60},
+    { 40,  7.60},
+    { 48,  7.60},
+    { 56,  7.60},
+    { 64,  7.40},
+    { 80,  7.00},
+    { 96,  6.60},
+    {112,  6.20},
+    {128,  6.20},
+    {160,  6.20}
 };
 
 /**
@@ -900,8 +901,8 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
         /* Calculate the energies of each sub-shortblock */
         for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
             energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
-            assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
-            attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
+            assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS - 2)] > 0);
+            attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS - 2)];
             energy_short[0] += energy_subshort[i];
         }
 
@@ -912,17 +913,12 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
                 p = FFMAX(p, fabsf(*pf));
             pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
             energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
-            /* NOTE: The indexes below are [i + 3 - 2] in the LAME source.
-             *       Obviously the 3 and 2 have some significance, or this would be just [i + 1]
-             *       (which is what we use here). What the 3 stands for is ambiguous, as it is both
-             *       number of short blocks, and the number of sub-short blocks.
-             *       It seems that LAME is comparing each sub-block to sub-block + 1 in the
-             *       previous block.
-             */
-            if (p > energy_subshort[i + 1])
-                p = p / energy_subshort[i + 1];
-            else if (energy_subshort[i + 1] > p * 10.0f)
-                p = energy_subshort[i + 1] / (p * 10.0f);
+            
+            /* NOTE: The indexes below are [i + 3 - 2] in the LAME source. Compare each sub-block to sub-block - 2 */
+            if (p > energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2])
+                p = p / energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2];
+            else if (energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] > p * 10.0f)
+                p = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS - 2] / (p * 10.0f);
             else
                 p = 0.0;
             attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
@@ -943,7 +939,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
             const float v = energy_short[i];
             const float m = FFMAX(u, v);
             if (m < 40000) {                          /* (2) */
-                if (u < 1.7f * v && v < 1.7f * u) {   /* (1) */
+                if (u < 2.3f * v && v < 2.3f * u) {   /* (1) */
                     if (i == 1 && attacks[0] < attacks[i])
                         attacks[0] = 0;
                     attacks[i] = 0;
@@ -951,13 +947,19 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
             }
             att_sum += attacks[i];
         }
+		if (pch->next_attack0_zero)
+            attacks[0] = 0;
+		if (attacks[AAC_NUM_BLOCKS_SHORT] == 0)
+            pch->next_attack0_zero = 1;
+        else
+			pch->next_attack0_zero = 0;
 
         if (attacks[0] <= pch->prev_attack)
             attacks[0] = 0;
 
         att_sum += attacks[0];
-        /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */
-        if (pch->prev_attack == 3 || att_sum) {
+		/* If the previous attack happened in the last sub-block of the previous sequence, or if there's a new attack, use short window */
+        if (pch->prev_attack == PSY_LAME_NUM_SUBBLOCKS || att_sum) {
             uselongblock = 0;
 
             for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++)
@@ -1007,7 +1009,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
     }
     pch->next_grouping = window_grouping[grouping];
 
-    pch->prev_attack = attacks[8];
+    pch->prev_attack = attacks[AAC_NUM_BLOCKS_SHORT - 1];
 
     return wi;
 }
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2025-11-01 21:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-01 21:52 [FFmpeg-devel] [PATCH] avcodec/aacenc: add bitrate threshold for PNS and improve attack detection (PR #20815) Agent45 via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git