* [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II
@ 2025-06-13 16:37 Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 02/13] avfilter/f_ebur128: simplify sample cache array Niklas Haas
` (11 more replies)
0 siblings, 12 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
Instead of direct form I. See af_biquads.c for math. Also eliminate
an unnecessary indirection.
---
libavfilter/f_ebur128.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 768f062bac..173a4f75ca 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -686,17 +686,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
/* Y[i] = X[i]*b0 + X[i-1]*b1 + X[i-2]*b2 - Y[i-1]*a1 - Y[i-2]*a2 */
#define FILTER(Y, X, NUM, DEN) do { \
double *dst = ebur128->Y + ch*3; \
- double *src = ebur128->X + ch*3; \
- dst[2] = dst[1]; \
- dst[1] = dst[0]; \
- dst[0] = src[0]*NUM[0] + src[1]*NUM[1] + src[2]*NUM[2] \
- - dst[1]*DEN[1] - dst[2]*DEN[2]; \
+ double src = ebur128->X[ch*3] ; \
+ double dst0 = NUM[0] * src + dst[1]; \
+ dst[1] = NUM[1] * src + dst[2] - DEN[1] * dst0; \
+ dst[2] = NUM[2] * src - DEN[2] * dst0; \
+ dst[0] = dst0; \
} while (0)
// TODO: merge both filters in one?
FILTER(y, x, ebur128->pre_b, ebur128->pre_a); // apply pre-filter
- ebur128->x[ch * 3 + 2] = ebur128->x[ch * 3 + 1];
- ebur128->x[ch * 3 + 1] = ebur128->x[ch * 3 ];
FILTER(z, y, ebur128->rlb_b, ebur128->rlb_a); // apply RLB-filter
bin = ebur128->z[ch * 3] * ebur128->z[ch * 3];
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 02/13] avfilter/f_ebur128: simplify sample cache array
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 03/13] avfilter/f_ebur128: use structs for biquad weights Niklas Haas
` (10 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
We don't need an X sample cache anymore, and we also can simplify the
access macro slightly.
---
libavfilter/f_ebur128.c | 29 +++++++++++------------------
1 file changed, 11 insertions(+), 18 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 173a4f75ca..d0707e9ef9 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -115,7 +115,6 @@ typedef struct EBUR128Context {
/* Filter caches.
* The mult by 3 in the following is for X[i], X[i-1] and X[i-2] */
- double *x; ///< 3 input samples cache for each channel
double *y; ///< 3 pre-filter samples cache for each channel
double *z; ///< 3 RLB-filter samples cache for each channel
double pre_b[3]; ///< pre-filter numerator coefficients
@@ -446,11 +445,10 @@ static int config_audio_output(AVFilterLink *outlink)
AV_CH_SURROUND_DIRECT_LEFT |AV_CH_SURROUND_DIRECT_RIGHT)
ebur128->nb_channels = nb_channels;
- ebur128->x = av_calloc(nb_channels, 3 * sizeof(*ebur128->x));
ebur128->y = av_calloc(nb_channels, 3 * sizeof(*ebur128->y));
ebur128->z = av_calloc(nb_channels, 3 * sizeof(*ebur128->z));
ebur128->ch_weighting = av_calloc(nb_channels, sizeof(*ebur128->ch_weighting));
- if (!ebur128->ch_weighting || !ebur128->x || !ebur128->y || !ebur128->z)
+ if (!ebur128->ch_weighting || !ebur128->y || !ebur128->z)
return AVERROR(ENOMEM);
#define I400_BINS(x) ((x) * 4 / 10)
@@ -673,34 +671,30 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
MOVE_TO_NEXT_CACHED_ENTRY(3000);
for (ch = 0; ch < nb_channels; ch++) {
- double bin;
-
if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS)
ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], fabs(samples[idx_insample * nb_channels + ch]));
- ebur128->x[ch * 3] = samples[idx_insample * nb_channels + ch]; // set X[i]
-
if (!ebur128->ch_weighting[ch])
continue;
/* Y[i] = X[i]*b0 + X[i-1]*b1 + X[i-2]*b2 - Y[i-1]*a1 - Y[i-2]*a2 */
-#define FILTER(Y, X, NUM, DEN) do { \
- double *dst = ebur128->Y + ch*3; \
- double src = ebur128->X[ch*3] ; \
- double dst0 = NUM[0] * src + dst[1]; \
- dst[1] = NUM[1] * src + dst[2] - DEN[1] * dst0; \
- dst[2] = NUM[2] * src - DEN[2] * dst0; \
- dst[0] = dst0; \
+#define FILTER(DST, SRC, NUM, DEN) do { \
+ const double tmp = DST[0] = NUM[0] * SRC + DST[1]; \
+ DST[1] = NUM[1] * SRC + DST[2] - DEN[1] * tmp; \
+ DST[2] = NUM[2] * SRC - DEN[2] * tmp; \
} while (0)
+ const double x = samples[idx_insample * nb_channels + ch];
+ double *restrict y = &ebur128->y[3 * ch];
+ double *restrict z = &ebur128->z[3 * ch];
+
// TODO: merge both filters in one?
FILTER(y, x, ebur128->pre_b, ebur128->pre_a); // apply pre-filter
- FILTER(z, y, ebur128->rlb_b, ebur128->rlb_a); // apply RLB-filter
-
- bin = ebur128->z[ch * 3] * ebur128->z[ch * 3];
+ FILTER(z, *y, ebur128->rlb_b, ebur128->rlb_a); // apply RLB-filter
/* add the new value, and limit the sum to the cache size (400ms or 3s)
* by removing the oldest one */
+ double bin = *z * *z;
ebur128->i400.sum [ch] = ebur128->i400.sum [ch] + bin - ebur128->i400.cache [ch][bin_id_400];
ebur128->i3000.sum[ch] = ebur128->i3000.sum[ch] + bin - ebur128->i3000.cache[ch][bin_id_3000];
@@ -1073,7 +1067,6 @@ static av_cold void uninit(AVFilterContext *ctx)
}
av_freep(&ebur128->y_line_ref);
- av_freep(&ebur128->x);
av_freep(&ebur128->y);
av_freep(&ebur128->z);
av_freep(&ebur128->ch_weighting);
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 03/13] avfilter/f_ebur128: use structs for biquad weights
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 02/13] avfilter/f_ebur128: simplify sample cache array Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 04/13] avfilter/f_ebur128: use a single packed array for the integrator cache Niklas Haas
` (9 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
Simplifies the code a bit. In particular, the copy to the stack is marginally
faster.
---
libavfilter/f_ebur128.c | 52 +++++++++++++++++++++++------------------
1 file changed, 29 insertions(+), 23 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index d0707e9ef9..776329db1c 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -75,6 +75,11 @@ struct integrator {
struct rect { int x, y, w, h; };
+struct biquad {
+ double b0, b1, b2;
+ double a1, a2;
+};
+
typedef struct EBUR128Context {
const AVClass *class; ///< AVClass context for log and options purpose
@@ -117,10 +122,8 @@ typedef struct EBUR128Context {
* The mult by 3 in the following is for X[i], X[i-1] and X[i-2] */
double *y; ///< 3 pre-filter samples cache for each channel
double *z; ///< 3 RLB-filter samples cache for each channel
- double pre_b[3]; ///< pre-filter numerator coefficients
- double pre_a[3]; ///< pre-filter denominator coefficients
- double rlb_b[3]; ///< rlb-filter numerator coefficients
- double rlb_a[3]; ///< rlb-filter denominator coefficients
+ struct biquad pre;
+ struct biquad rlb;
struct integrator i400; ///< 400ms integrator, used for Momentary loudness (M), and Integrated loudness (I)
struct integrator i3000; ///< 3s integrator, used for Short term loudness (S), and Loudness Range (LRA)
@@ -405,21 +408,21 @@ static int config_audio_input(AVFilterLink *inlink)
double a0 = 1.0 + K / Q + K * K;
- ebur128->pre_b[0] = (Vh + Vb * K / Q + K * K) / a0;
- ebur128->pre_b[1] = 2.0 * (K * K - Vh) / a0;
- ebur128->pre_b[2] = (Vh - Vb * K / Q + K * K) / a0;
- ebur128->pre_a[1] = 2.0 * (K * K - 1.0) / a0;
- ebur128->pre_a[2] = (1.0 - K / Q + K * K) / a0;
+ ebur128->pre.b0 = (Vh + Vb * K / Q + K * K) / a0;
+ ebur128->pre.b1 = 2.0 * (K * K - Vh) / a0;
+ ebur128->pre.b2 = (Vh - Vb * K / Q + K * K) / a0;
+ ebur128->pre.a1 = 2.0 * (K * K - 1.0) / a0;
+ ebur128->pre.a2 = (1.0 - K / Q + K * K) / a0;
f0 = 38.13547087602444;
Q = 0.5003270373238773;
K = tan(M_PI * f0 / (double)inlink->sample_rate);
- ebur128->rlb_b[0] = 1.0;
- ebur128->rlb_b[1] = -2.0;
- ebur128->rlb_b[2] = 1.0;
- ebur128->rlb_a[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K);
- ebur128->rlb_a[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K);
+ ebur128->rlb.b0 = 1.0;
+ ebur128->rlb.b1 = -2.0;
+ ebur128->rlb.b2 = 1.0;
+ ebur128->rlb.a1 = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K);
+ ebur128->rlb.a2 = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K);
/* Force 100ms framing in case of metadata injection: the frames must have
* a granularity of the window overlap to be accurately exploited.
@@ -654,6 +657,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
}
#endif
+ const struct biquad pre = ebur128->pre;
+ const struct biquad rlb = ebur128->rlb;
+
for (idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
const int bin_id_400 = ebur128->i400.cache_pos;
const int bin_id_3000 = ebur128->i3000.cache_pos;
@@ -678,10 +684,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
continue;
/* Y[i] = X[i]*b0 + X[i-1]*b1 + X[i-2]*b2 - Y[i-1]*a1 - Y[i-2]*a2 */
-#define FILTER(DST, SRC, NUM, DEN) do { \
- const double tmp = DST[0] = NUM[0] * SRC + DST[1]; \
- DST[1] = NUM[1] * SRC + DST[2] - DEN[1] * tmp; \
- DST[2] = NUM[2] * SRC - DEN[2] * tmp; \
+#define FILTER(DST, SRC, FILT) do { \
+ const double tmp = DST[0] = FILT.b0 * SRC + DST[1]; \
+ DST[1] = FILT.b1 * SRC + DST[2] - FILT.a1 * tmp; \
+ DST[2] = FILT.b2 * SRC - FILT.a2 * tmp; \
} while (0)
const double x = samples[idx_insample * nb_channels + ch];
@@ -689,14 +695,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
double *restrict z = &ebur128->z[3 * ch];
// TODO: merge both filters in one?
- FILTER(y, x, ebur128->pre_b, ebur128->pre_a); // apply pre-filter
- FILTER(z, *y, ebur128->rlb_b, ebur128->rlb_a); // apply RLB-filter
+ FILTER(y, x, pre); // apply pre-filter
+ FILTER(z, *y, rlb); // apply RLB-filter
/* add the new value, and limit the sum to the cache size (400ms or 3s)
* by removing the oldest one */
- double bin = *z * *z;
- ebur128->i400.sum [ch] = ebur128->i400.sum [ch] + bin - ebur128->i400.cache [ch][bin_id_400];
- ebur128->i3000.sum[ch] = ebur128->i3000.sum[ch] + bin - ebur128->i3000.cache[ch][bin_id_3000];
+ const double bin = *z * *z;
+ ebur128->i400.sum [ch] += bin - ebur128->i400.cache [ch][bin_id_400];
+ ebur128->i3000.sum[ch] += bin - ebur128->i3000.cache[ch][bin_id_3000];
/* override old cache entry with the new value */
ebur128->i400.cache [ch][bin_id_400 ] = bin;
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 04/13] avfilter/f_ebur128: use a single packed array for the integrator cache
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 02/13] avfilter/f_ebur128: simplify sample cache array Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 03/13] avfilter/f_ebur128: use structs for biquad weights Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 05/13] avfilter/f_ebur128: move weights and cache to EBUR128DSPContext Niklas Haas
` (8 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
Instead of having a planar array for each channel, use a single packed array.
This will help processing multiple channels in parallel, as we can directly
load all channels' data in a single load instruction.
Also improves memory locality of data, as the loop order is:
for (samples) {
for (channels) {
process sample
}
}
---
libavfilter/f_ebur128.c | 36 ++++++++++--------------------------
1 file changed, 10 insertions(+), 26 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 776329db1c..9f7c080750 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -62,7 +62,7 @@ struct hist_entry {
};
struct integrator {
- double **cache; ///< window of filtered samples (N ms)
+ double *cache; ///< window of filtered samples (N ms)
int cache_pos; ///< focus on the last added bin in the cache array
int cache_size;
double *sum; ///< sum of the last N ms filtered samples (cache content)
@@ -457,10 +457,12 @@ static int config_audio_output(AVFilterLink *outlink)
#define I400_BINS(x) ((x) * 4 / 10)
#define I3000_BINS(x) ((x) * 3)
+ ebur128->i400.cache_size = I400_BINS(outlink->sample_rate);
+ ebur128->i3000.cache_size = I3000_BINS(outlink->sample_rate);
ebur128->i400.sum = av_calloc(nb_channels, sizeof(*ebur128->i400.sum));
ebur128->i3000.sum = av_calloc(nb_channels, sizeof(*ebur128->i3000.sum));
- ebur128->i400.cache = av_calloc(nb_channels, sizeof(*ebur128->i400.cache));
- ebur128->i3000.cache = av_calloc(nb_channels, sizeof(*ebur128->i3000.cache));
+ ebur128->i400.cache = av_calloc(nb_channels * ebur128->i400.cache_size, sizeof(*ebur128->i400.cache));
+ ebur128->i3000.cache = av_calloc(nb_channels * ebur128->i3000.cache_size, sizeof(*ebur128->i3000.cache));
if (!ebur128->i400.sum || !ebur128->i3000.sum ||
!ebur128->i400.cache || !ebur128->i3000.cache)
return AVERROR(ENOMEM);
@@ -475,17 +477,6 @@ static int config_audio_output(AVFilterLink *outlink)
} else {
ebur128->ch_weighting[i] = 1.0;
}
-
- if (!ebur128->ch_weighting[i])
- continue;
-
- /* bins buffer for the two integration window (400ms and 3s) */
- ebur128->i400.cache_size = I400_BINS(outlink->sample_rate);
- ebur128->i3000.cache_size = I3000_BINS(outlink->sample_rate);
- ebur128->i400.cache[i] = av_calloc(ebur128->i400.cache_size, sizeof(*ebur128->i400.cache[0]));
- ebur128->i3000.cache[i] = av_calloc(ebur128->i3000.cache_size, sizeof(*ebur128->i3000.cache[0]));
- if (!ebur128->i400.cache[i] || !ebur128->i3000.cache[i])
- return AVERROR(ENOMEM);
}
#if CONFIG_SWRESAMPLE
@@ -663,6 +654,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
for (idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
const int bin_id_400 = ebur128->i400.cache_pos;
const int bin_id_3000 = ebur128->i3000.cache_pos;
+ double *restrict cache_400 = &ebur128->i400.cache[bin_id_400 * nb_channels];
+ double *restrict cache_3000 = &ebur128->i3000.cache[bin_id_3000 * nb_channels];
#define MOVE_TO_NEXT_CACHED_ENTRY(time) do { \
ebur128->i##time.cache_pos++; \
@@ -701,12 +694,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
/* add the new value, and limit the sum to the cache size (400ms or 3s)
* by removing the oldest one */
const double bin = *z * *z;
- ebur128->i400.sum [ch] += bin - ebur128->i400.cache [ch][bin_id_400];
- ebur128->i3000.sum[ch] += bin - ebur128->i3000.cache[ch][bin_id_3000];
-
- /* override old cache entry with the new value */
- ebur128->i400.cache [ch][bin_id_400 ] = bin;
- ebur128->i3000.cache[ch][bin_id_3000] = bin;
+ ebur128->i400.sum [ch] += bin - cache_400[ch];
+ ebur128->i3000.sum[ch] += bin - cache_3000[ch];
+ cache_400[ch] = cache_3000[ch] = bin;
}
#define FIND_PEAK(global, sp, ptype) do { \
@@ -1083,12 +1073,6 @@ static av_cold void uninit(AVFilterContext *ctx)
av_freep(&ebur128->i3000.sum);
av_freep(&ebur128->i400.histogram);
av_freep(&ebur128->i3000.histogram);
- for (int i = 0; i < ebur128->nb_channels; i++) {
- if (ebur128->i400.cache)
- av_freep(&ebur128->i400.cache[i]);
- if (ebur128->i3000.cache)
- av_freep(&ebur128->i3000.cache[i]);
- }
av_freep(&ebur128->i400.cache);
av_freep(&ebur128->i3000.cache);
av_frame_free(&ebur128->outpicref);
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 05/13] avfilter/f_ebur128: move weights and cache to EBUR128DSPContext
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (2 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 04/13] avfilter/f_ebur128: use a single packed array for the integrator cache Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 06/13] avfilter/f_ebur128: split off C implementation to separate function Niklas Haas
` (7 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
---
libavfilter/f_ebur128.c | 53 +++++++++++++++++------------------------
libavfilter/f_ebur128.h | 35 +++++++++++++++++++++++++++
2 files changed, 57 insertions(+), 31 deletions(-)
create mode 100644 libavfilter/f_ebur128.h
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 9f7c080750..c3328dc520 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -43,6 +43,8 @@
#include "formats.h"
#include "video.h"
+#include "f_ebur128.h"
+
#define ABS_THRES -70 ///< silence gate: we discard anything below this absolute (LUFS) threshold
#define ABS_UP_THRES 10 ///< upper loud limit to consider (ABS_THRES being the minimum)
#define HIST_GRAIN 100 ///< defines histogram precision
@@ -75,13 +77,9 @@ struct integrator {
struct rect { int x, y, w, h; };
-struct biquad {
- double b0, b1, b2;
- double a1, a2;
-};
-
typedef struct EBUR128Context {
const AVClass *class; ///< AVClass context for log and options purpose
+ EBUR128DSPContext dsp;
/* peak metering */
int peak_mode; ///< enabled peak modes
@@ -118,13 +116,6 @@ typedef struct EBUR128Context {
int idx_insample; ///< current sample position of processed samples in single input frame
AVFrame *insamples; ///< input samples reference, updated regularly
- /* Filter caches.
- * The mult by 3 in the following is for X[i], X[i-1] and X[i-2] */
- double *y; ///< 3 pre-filter samples cache for each channel
- double *z; ///< 3 RLB-filter samples cache for each channel
- struct biquad pre;
- struct biquad rlb;
-
struct integrator i400; ///< 400ms integrator, used for Momentary loudness (M), and Integrated loudness (I)
struct integrator i3000; ///< 3s integrator, used for Short term loudness (S), and Loudness Range (LRA)
@@ -408,21 +399,21 @@ static int config_audio_input(AVFilterLink *inlink)
double a0 = 1.0 + K / Q + K * K;
- ebur128->pre.b0 = (Vh + Vb * K / Q + K * K) / a0;
- ebur128->pre.b1 = 2.0 * (K * K - Vh) / a0;
- ebur128->pre.b2 = (Vh - Vb * K / Q + K * K) / a0;
- ebur128->pre.a1 = 2.0 * (K * K - 1.0) / a0;
- ebur128->pre.a2 = (1.0 - K / Q + K * K) / a0;
+ ebur128->dsp.pre.b0 = (Vh + Vb * K / Q + K * K) / a0;
+ ebur128->dsp.pre.b1 = 2.0 * (K * K - Vh) / a0;
+ ebur128->dsp.pre.b2 = (Vh - Vb * K / Q + K * K) / a0;
+ ebur128->dsp.pre.a1 = 2.0 * (K * K - 1.0) / a0;
+ ebur128->dsp.pre.a2 = (1.0 - K / Q + K * K) / a0;
f0 = 38.13547087602444;
Q = 0.5003270373238773;
K = tan(M_PI * f0 / (double)inlink->sample_rate);
- ebur128->rlb.b0 = 1.0;
- ebur128->rlb.b1 = -2.0;
- ebur128->rlb.b2 = 1.0;
- ebur128->rlb.a1 = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K);
- ebur128->rlb.a2 = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K);
+ ebur128->dsp.rlb.b0 = 1.0;
+ ebur128->dsp.rlb.b1 = -2.0;
+ ebur128->dsp.rlb.b2 = 1.0;
+ ebur128->dsp.rlb.a1 = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K);
+ ebur128->dsp.rlb.a2 = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K);
/* Force 100ms framing in case of metadata injection: the frames must have
* a granularity of the window overlap to be accurately exploited.
@@ -448,10 +439,10 @@ static int config_audio_output(AVFilterLink *outlink)
AV_CH_SURROUND_DIRECT_LEFT |AV_CH_SURROUND_DIRECT_RIGHT)
ebur128->nb_channels = nb_channels;
- ebur128->y = av_calloc(nb_channels, 3 * sizeof(*ebur128->y));
- ebur128->z = av_calloc(nb_channels, 3 * sizeof(*ebur128->z));
+ ebur128->dsp.y = av_calloc(nb_channels, 3 * sizeof(*ebur128->dsp.y));
+ ebur128->dsp.z = av_calloc(nb_channels, 3 * sizeof(*ebur128->dsp.z));
ebur128->ch_weighting = av_calloc(nb_channels, sizeof(*ebur128->ch_weighting));
- if (!ebur128->ch_weighting || !ebur128->y || !ebur128->z)
+ if (!ebur128->ch_weighting || !ebur128->dsp.y || !ebur128->dsp.z)
return AVERROR(ENOMEM);
#define I400_BINS(x) ((x) * 4 / 10)
@@ -648,8 +639,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
}
#endif
- const struct biquad pre = ebur128->pre;
- const struct biquad rlb = ebur128->rlb;
+ const EBUR128Biquad pre = ebur128->dsp.pre;
+ const EBUR128Biquad rlb = ebur128->dsp.rlb;
for (idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
const int bin_id_400 = ebur128->i400.cache_pos;
@@ -684,8 +675,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
} while (0)
const double x = samples[idx_insample * nb_channels + ch];
- double *restrict y = &ebur128->y[3 * ch];
- double *restrict z = &ebur128->z[3 * ch];
+ double *restrict y = &ebur128->dsp.y[3 * ch];
+ double *restrict z = &ebur128->dsp.z[3 * ch];
// TODO: merge both filters in one?
FILTER(y, x, pre); // apply pre-filter
@@ -1063,8 +1054,8 @@ static av_cold void uninit(AVFilterContext *ctx)
}
av_freep(&ebur128->y_line_ref);
- av_freep(&ebur128->y);
- av_freep(&ebur128->z);
+ av_freep(&ebur128->dsp.y);
+ av_freep(&ebur128->dsp.z);
av_freep(&ebur128->ch_weighting);
av_freep(&ebur128->true_peaks);
av_freep(&ebur128->sample_peaks);
diff --git a/libavfilter/f_ebur128.h b/libavfilter/f_ebur128.h
new file mode 100644
index 0000000000..a8247e5aa0
--- /dev/null
+++ b/libavfilter/f_ebur128.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ * Copyright (c) 2025 Niklas Haas
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+typedef struct EBUR128Biquad {
+ double b0, b1, b2;
+ double a1, a2;
+} EBUR128Biquad;
+
+typedef struct EBUR128DSPContext {
+ /* Filter data */
+ EBUR128Biquad pre;
+ EBUR128Biquad rlb;
+
+ /* Cache of 3 samples for each channel */
+ double *y; /* after pre-filter */
+ double *z; /* after RLB-filter */
+} EBUR128DSPContext;
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 06/13] avfilter/f_ebur128: split off C implementation to separate function
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (3 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 05/13] avfilter/f_ebur128: move weights and cache to EBUR128DSPContext Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation Niklas Haas
` (6 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
I decided to separate out the peak measurement loop to avoid bloating
the signature, and since it's only conditionally used.
---
libavfilter/f_ebur128.c | 83 ++++++++++++++++++++++++-----------------
libavfilter/f_ebur128.h | 3 ++
2 files changed, 52 insertions(+), 34 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index c3328dc520..b9e210c05a 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -609,11 +609,48 @@ static int gate_update(struct integrator *integ, double power,
return gate_hist_pos;
}
+void ff_ebur128_filter_channels_c(const EBUR128DSPContext *dsp,
+ const double *restrict samples,
+ double *restrict cache_400,
+ double *restrict cache_3000,
+ double *restrict sum_400,
+ double *restrict sum_3000,
+ const int nb_channels)
+{
+ const EBUR128Biquad pre = dsp->pre;
+ const EBUR128Biquad rlb = dsp->rlb;
+
+ for (int ch = 0; ch < nb_channels; ch++) {
+ /* Y[i] = X[i]*b0 + X[i-1]*b1 + X[i-2]*b2 - Y[i-1]*a1 - Y[i-2]*a2 */
+#define FILTER(DST, SRC, FILT) do { \
+ const double tmp = DST[0] = FILT.b0 * SRC + DST[1]; \
+ DST[1] = FILT.b1 * SRC + DST[2] - FILT.a1 * tmp; \
+ DST[2] = FILT.b2 * SRC - FILT.a2 * tmp; \
+} while (0)
+
+ const double x = samples[ch];
+ double *restrict y = &dsp->y[3 * ch];
+ double *restrict z = &dsp->z[3 * ch];
+
+ // TODO: merge both filters in one?
+ FILTER(y, x, pre); // apply pre-filter
+ FILTER(z, *y, rlb); // apply RLB-filter
+
+ /* add the new value, and limit the sum to the cache size (400ms or 3s)
+ * by removing the oldest one */
+ const double bin = *z * *z;
+ sum_400 [ch] += bin - cache_400[ch];
+ sum_3000[ch] += bin - cache_3000[ch];
+ cache_400[ch] = cache_3000[ch] = bin;
+ }
+}
+
static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
{
int i, ch, idx_insample, ret;
AVFilterContext *ctx = inlink->dst;
EBUR128Context *ebur128 = ctx->priv;
+ const EBUR128DSPContext *dsp = &ebur128->dsp;
const int nb_channels = ebur128->nb_channels;
const int nb_samples = insamples->nb_samples;
const double *samples = (double *)insamples->data[0];
@@ -639,14 +676,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
}
#endif
- const EBUR128Biquad pre = ebur128->dsp.pre;
- const EBUR128Biquad rlb = ebur128->dsp.rlb;
-
for (idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
const int bin_id_400 = ebur128->i400.cache_pos;
const int bin_id_3000 = ebur128->i3000.cache_pos;
- double *restrict cache_400 = &ebur128->i400.cache[bin_id_400 * nb_channels];
- double *restrict cache_3000 = &ebur128->i3000.cache[bin_id_3000 * nb_channels];
#define MOVE_TO_NEXT_CACHED_ENTRY(time) do { \
ebur128->i##time.cache_pos++; \
@@ -660,35 +692,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
MOVE_TO_NEXT_CACHED_ENTRY(400);
MOVE_TO_NEXT_CACHED_ENTRY(3000);
- for (ch = 0; ch < nb_channels; ch++) {
- if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS)
- ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], fabs(samples[idx_insample * nb_channels + ch]));
-
- if (!ebur128->ch_weighting[ch])
- continue;
-
- /* Y[i] = X[i]*b0 + X[i-1]*b1 + X[i-2]*b2 - Y[i-1]*a1 - Y[i-2]*a2 */
-#define FILTER(DST, SRC, FILT) do { \
- const double tmp = DST[0] = FILT.b0 * SRC + DST[1]; \
- DST[1] = FILT.b1 * SRC + DST[2] - FILT.a1 * tmp; \
- DST[2] = FILT.b2 * SRC - FILT.a2 * tmp; \
-} while (0)
-
- const double x = samples[idx_insample * nb_channels + ch];
- double *restrict y = &ebur128->dsp.y[3 * ch];
- double *restrict z = &ebur128->dsp.z[3 * ch];
-
- // TODO: merge both filters in one?
- FILTER(y, x, pre); // apply pre-filter
- FILTER(z, *y, rlb); // apply RLB-filter
-
- /* add the new value, and limit the sum to the cache size (400ms or 3s)
- * by removing the oldest one */
- const double bin = *z * *z;
- ebur128->i400.sum [ch] += bin - cache_400[ch];
- ebur128->i3000.sum[ch] += bin - cache_3000[ch];
- cache_400[ch] = cache_3000[ch] = bin;
- }
+ ff_ebur128_filter_channels_c(dsp, &samples[idx_insample * nb_channels],
+ &ebur128->i400.cache[bin_id_400 * nb_channels],
+ &ebur128->i3000.cache[bin_id_3000 * nb_channels],
+ ebur128->i400.sum, ebur128->i3000.sum,
+ nb_channels);
#define FIND_PEAK(global, sp, ptype) do { \
int ch; \
@@ -701,6 +709,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
} \
} while (0)
+ if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS) {
+ for (ch = 0; ch < nb_channels; ch++) {
+ const double sample = samples[idx_insample * nb_channels + ch];
+ ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], fabs(sample));
+ }
+ }
+
FIND_PEAK(ebur128->sample_peak, ebur128->sample_peaks, SAMPLES);
FIND_PEAK(ebur128->true_peak, ebur128->true_peaks, TRUE);
diff --git a/libavfilter/f_ebur128.h b/libavfilter/f_ebur128.h
index a8247e5aa0..dfc2e7c75b 100644
--- a/libavfilter/f_ebur128.h
+++ b/libavfilter/f_ebur128.h
@@ -33,3 +33,6 @@ typedef struct EBUR128DSPContext {
double *y; /* after pre-filter */
double *z; /* after RLB-filter */
} EBUR128DSPContext;
+
+void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
+ double *, double *, double *, double *, int);
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (4 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 06/13] avfilter/f_ebur128: split off C implementation to separate function Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-14 1:12 ` Michael Niedermayer
2025-06-14 1:20 ` James Almer
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 08/13] avfilter/f_ebur128: remove pointless macro Niklas Haas
` (5 subsequent siblings)
11 siblings, 2 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
Processes two channels in parallel, using 128-bit XMM registers.
In theory, we could go up to YMM registers to process 4 channels, but this is
not a gain except for relatively high channel counts (e.g. 7.1), and also
complicates the sample load/store operations considerably.
I decided to only add an AVX variant, since the C code is not substantially
slower enough to justify a separate function just for ancient CPUs.
---
libavfilter/f_ebur128.c | 15 ++--
libavfilter/f_ebur128.h | 16 ++++
libavfilter/x86/Makefile | 2 +
libavfilter/x86/f_ebur128.asm | 142 +++++++++++++++++++++++++++++++
libavfilter/x86/f_ebur128_init.c | 35 ++++++++
5 files changed, 205 insertions(+), 5 deletions(-)
create mode 100644 libavfilter/x86/f_ebur128.asm
create mode 100644 libavfilter/x86/f_ebur128_init.c
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index b9e210c05a..2d94cefce7 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -579,6 +579,11 @@ static av_cold int init(AVFilterContext *ctx)
/* summary */
av_log(ctx, AV_LOG_VERBOSE, "EBU +%d scale\n", ebur128->meter);
+ ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;
+#if ARCH_X86
+ ff_ebur128_init_x86(&ebur128->dsp);
+#endif
+
return 0;
}
@@ -692,11 +697,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
MOVE_TO_NEXT_CACHED_ENTRY(400);
MOVE_TO_NEXT_CACHED_ENTRY(3000);
- ff_ebur128_filter_channels_c(dsp, &samples[idx_insample * nb_channels],
- &ebur128->i400.cache[bin_id_400 * nb_channels],
- &ebur128->i3000.cache[bin_id_3000 * nb_channels],
- ebur128->i400.sum, ebur128->i3000.sum,
- nb_channels);
+ dsp->filter_channels(dsp, &samples[idx_insample * nb_channels],
+ &ebur128->i400.cache[bin_id_400 * nb_channels],
+ &ebur128->i3000.cache[bin_id_3000 * nb_channels],
+ ebur128->i400.sum, ebur128->i3000.sum,
+ nb_channels);
#define FIND_PEAK(global, sp, ptype) do { \
int ch; \
diff --git a/libavfilter/f_ebur128.h b/libavfilter/f_ebur128.h
index dfc2e7c75b..decf3917af 100644
--- a/libavfilter/f_ebur128.h
+++ b/libavfilter/f_ebur128.h
@@ -19,6 +19,9 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <assert.h>
+#include <stddef.h>
+
typedef struct EBUR128Biquad {
double b0, b1, b2;
double a1, a2;
@@ -32,7 +35,20 @@ typedef struct EBUR128DSPContext {
/* Cache of 3 samples for each channel */
double *y; /* after pre-filter */
double *z; /* after RLB-filter */
+
+ /* DSP functions */
+ void (*filter_channels)(const struct EBUR128DSPContext *dsp,
+ const double *samples,
+ double *cache_400, double *cache_3000,
+ double *sum_400, double *sum_3000,
+ int nb_channels);
} EBUR128DSPContext;
+static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct layout mismatch");
+static_assert(offsetof(EBUR128DSPContext, rlb) == 5 * sizeof(double), "struct layout mismatch");
+static_assert(offsetof(EBUR128DSPContext, y) == 10 * sizeof(double), "struct layout mismatch");
+
+void ff_ebur128_init_x86(EBUR128DSPContext *dsp);
+
void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
double *, double *, double *, double *, int);
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 0d9a28a935..0efe3f8d2c 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -7,6 +7,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o
+OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128_init.o
OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o
OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o
@@ -52,6 +53,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
+X86ASM-OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128.o
X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o
X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
diff --git a/libavfilter/x86/f_ebur128.asm b/libavfilter/x86/f_ebur128.asm
new file mode 100644
index 0000000000..28add77f70
--- /dev/null
+++ b/libavfilter/x86/f_ebur128.asm
@@ -0,0 +1,142 @@
+;*****************************************************************************
+;* x86-optimized functions for overlay filter
+;*
+;* Copyright (C) 2018 Paul B Mahol
+;* Copyright (C) 2018 Henrik Gramner
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+cextern ebur128_filter_channels_c
+
+struc Biquad
+ .b0 resq 1
+ .b1 resq 1
+ .b2 resq 1
+ .a1 resq 1
+ .a2 resq 1
+endstruc
+
+struc DSP
+ .pre resq 5
+ .rlb resq 5
+ .y resq 1
+ .z resq 1
+endstruc
+
+SECTION .text
+
+%macro MOVNQ 3 ; num, dst, src
+%if %1 == 1
+ movsd %2, %3
+%else
+ movupd %2, %3
+%endif
+%endmacro
+
+%macro FILTER 11 ; y0, y1, y2, x, b0, b1, b2, a1, a2, samples, num_channels
+ ; Y[0] := b0 * X + Y1
+ ; Y[1] := b1 * X + Y2 - a1 * Y[0]
+ ; Y[2] := b2 * X - a2 * Y[0]
+ movsd %1, [%10 + 8]
+ movsd %3, [%10 + 16]
+%if %11 > 1
+ movhpd %1, [%10 + 32]
+ movhpd %3, [%10 + 40]
+%endif
+
+ mulpd %2, %5, %4
+ addpd %1, %2
+
+ mulpd %2, %8, %1
+ subpd %3, %2
+ mulpd %2, %6, %4
+ addpd %2, %3
+
+ mulpd %3, %7, %4
+ mulpd %4, %9, %1
+ subpd %3, %4
+
+ movlpd [%10 + 0], %1
+ movlpd [%10 + 8], %2
+ movlpd [%10 + 16], %3
+%if %11 > 1
+ movhpd [%10 + 24], %1
+ movhpd [%10 + 32], %2
+ movhpd [%10 + 40], %3
+%endif
+ add %10, 24 * %11
+%endmacro
+
+%macro filter_channels 1 ; num_channels
+ MOVNQ %1, m3, [samplesq]
+ add samplesq, 8 * %1
+
+ FILTER m0, m1, m2, m3, m4, m5, m6, m7, m8, r7q, %1
+ FILTER m3, m1, m2, m0, m9, m10, m11, m12, m13, r8q, %1
+
+ ; update sum and cache
+ mulpd m3, m3
+ subpd m0, m3, [cache400q]
+ subpd m1, m3, [cache3000q]
+ MOVNQ %1, [cache400q], m3
+ MOVNQ %1, [cache3000q], m3
+ add cache400q, 8 * %1
+ add cache3000q, 8 * %1
+ addpd m0, [sum400q]
+ addpd m1, [sum3000q]
+ MOVNQ %1, [sum400q], m0
+ MOVNQ %1, [sum3000q], m1
+ add sum400q, 8 * %1
+ add sum3000q, 8 * %1
+%endmacro
+
+INIT_XMM avx
+cglobal ebur128_filter_channels, 7, 9, 14, dsp, samples, cache400, cache3000, sum400, sum3000, channels
+ vpbroadcastq m4, [dspq + DSP.pre + Biquad.b0]
+ vpbroadcastq m5, [dspq + DSP.pre + Biquad.b1]
+ vpbroadcastq m6, [dspq + DSP.pre + Biquad.b2]
+ vpbroadcastq m7, [dspq + DSP.pre + Biquad.a1]
+ vpbroadcastq m8, [dspq + DSP.pre + Biquad.a2]
+
+ vpbroadcastq m9, [dspq + DSP.rlb + Biquad.b0]
+ vpbroadcastq m10, [dspq + DSP.rlb + Biquad.b1]
+ vpbroadcastq m11, [dspq + DSP.rlb + Biquad.b2]
+ vpbroadcastq m12, [dspq + DSP.rlb + Biquad.a1]
+ vpbroadcastq m13, [dspq + DSP.rlb + Biquad.a2]
+
+ mov r7q, [dspq + DSP.y]
+ mov r8q, [dspq + DSP.z]
+
+ ; handle odd channel count
+ test channelsd, 1
+ jnz .tail
+
+.loop:
+ filter_channels 2
+ sub channelsd, 2
+ jg .loop
+ RET
+
+.tail:
+ filter_channels 1
+ dec channelsd
+ test channelsd, channelsd
+ jnz .loop
+ RET
diff --git a/libavfilter/x86/f_ebur128_init.c b/libavfilter/x86/f_ebur128_init.c
new file mode 100644
index 0000000000..c5edfde03a
--- /dev/null
+++ b/libavfilter/x86/f_ebur128_init.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/f_ebur128.h"
+
+void ff_ebur128_filter_channels_avx(const EBUR128DSPContext *, const double *,
+ double *, double *, double *, double *, int);
+
+av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_AVX(cpu_flags))
+ dsp->filter_channels = ff_ebur128_filter_channels_avx;
+}
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 08/13] avfilter/f_ebur128: remove pointless macro
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (5 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 09/13] avfilter/f_ebur128: move true peak calculation out of main loop Niklas Haas
` (4 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
This macro is not shortening the code nor aiding readability.
---
libavfilter/f_ebur128.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 2d94cefce7..2e1eedd855 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -682,20 +682,18 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
#endif
for (idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
- const int bin_id_400 = ebur128->i400.cache_pos;
- const int bin_id_3000 = ebur128->i3000.cache_pos;
-
-#define MOVE_TO_NEXT_CACHED_ENTRY(time) do { \
- ebur128->i##time.cache_pos++; \
- if (ebur128->i##time.cache_pos == \
- ebur128->i##time.cache_size) { \
- ebur128->i##time.filled = 1; \
- ebur128->i##time.cache_pos = 0; \
- } \
-} while (0)
+ const int bin_id_400 = ebur128->i400.cache_pos++;
+ const int bin_id_3000 = ebur128->i3000.cache_pos++;
+
+ if (ebur128->i400.cache_pos == ebur128->i400.cache_size) {
+ ebur128->i400.filled = 1;
+ ebur128->i400.cache_pos = 0;
+ }
- MOVE_TO_NEXT_CACHED_ENTRY(400);
- MOVE_TO_NEXT_CACHED_ENTRY(3000);
+ if (ebur128->i3000.cache_pos == ebur128->i3000.cache_size) {
+ ebur128->i3000.filled = 1;
+ ebur128->i3000.cache_pos = 0;
+ }
dsp->filter_channels(dsp, &samples[idx_insample * nb_channels],
&ebur128->i400.cache[bin_id_400 * nb_channels],
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 09/13] avfilter/f_ebur128: move true peak calculation out of main loop
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (6 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 08/13] avfilter/f_ebur128: remove pointless macro Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 10/13] avfilter/f_ebur128: lift sample " Niklas Haas
` (3 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
Easier to read, less convoluted, and ~30% faster. Most importantly, this
avoids repeating the redundant recalculation of the true peak on every single
sample, by moving the FIND_PEAK() loop out of the main loop. (Note that
FIND_PEAK() does not depend on the current sample index at all, so there is no
reason for it to ever be recomputed here)
---
libavfilter/f_ebur128.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 2e1eedd855..23092b597f 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -668,16 +668,22 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
(const uint8_t **)insamples->data, nb_samples);
if (ret < 0)
return ret;
- for (ch = 0; ch < nb_channels; ch++)
- ebur128->true_peaks_per_frame[ch] = 0.0;
- for (idx_insample = 0; idx_insample < ret; idx_insample++) {
- for (ch = 0; ch < nb_channels; ch++) {
- ebur128->true_peaks[ch] = FFMAX(ebur128->true_peaks[ch], fabs(*swr_samples));
- ebur128->true_peaks_per_frame[ch] = FFMAX(ebur128->true_peaks_per_frame[ch],
- fabs(*swr_samples));
- swr_samples++;
+
+ double maxpeak = 0.0;
+ for (int ch = 0; ch < nb_channels; ch++) {
+ double tp = ebur128->true_peaks[ch];
+ double tppf = 0.0;
+ for (int i = 0; i < ret; i++) {
+ const double sample = fabs(swr_samples[i * nb_channels]);
+ tp = FFMAX(tp, sample);
+ tppf = FFMAX(tppf, sample);
}
+ maxpeak = FFMAX(maxpeak, tp);
+ ebur128->true_peaks[ch] = tp;
+ ebur128->true_peaks_per_frame[ch] = tppf;
}
+
+ ebur128->true_peak = DBFS(maxpeak);
}
#endif
@@ -720,7 +726,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
}
FIND_PEAK(ebur128->sample_peak, ebur128->sample_peaks, SAMPLES);
- FIND_PEAK(ebur128->true_peak, ebur128->true_peaks, TRUE);
/* For integrated loudness, gating blocks are 400ms long with 75%
* overlap (see BS.1770-2 p5), so a re-computation is needed each 100ms
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 10/13] avfilter/f_ebur128: lift sample peak calculation out of main loop
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (7 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 09/13] avfilter/f_ebur128: move true peak calculation out of main loop Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 11/13] avfilter/f_ebur128: move variable declarations to usage site Niklas Haas
` (2 subsequent siblings)
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
This is substantially faster (~55%) than the transposed loop, and also
avoids an unnecessary macro.
---
libavfilter/f_ebur128.c | 38 ++++++++++++++++++--------------------
1 file changed, 18 insertions(+), 20 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 23092b597f..4051b1ea95 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -687,6 +687,24 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
}
#endif
+ if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS) {
+ double maxpeak = 0.0;
+ for (int ch = 0; ch < nb_channels; ch++) {
+ const double *restrict samples_ch = &samples[ch];
+ double sp = ebur128->sample_peaks[ch];
+
+ for (int i = ebur128->idx_insample; i < nb_samples; i++) {
+ const double sample = fabs(samples_ch[nb_channels * i]);
+ sp = FFMAX(sp, sample);
+ }
+ maxpeak = FFMAX(maxpeak, sp);
+ ebur128->sample_peaks[ch] = sp;
+ }
+
+ ebur128->sample_peak = DBFS(maxpeak);
+ }
+
+
for (idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
const int bin_id_400 = ebur128->i400.cache_pos++;
const int bin_id_3000 = ebur128->i3000.cache_pos++;
@@ -707,26 +725,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
ebur128->i400.sum, ebur128->i3000.sum,
nb_channels);
-#define FIND_PEAK(global, sp, ptype) do { \
- int ch; \
- double maxpeak; \
- maxpeak = 0.0; \
- if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) { \
- for (ch = 0; ch < ebur128->nb_channels; ch++) \
- maxpeak = FFMAX(maxpeak, sp[ch]); \
- global = DBFS(maxpeak); \
- } \
-} while (0)
-
- if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS) {
- for (ch = 0; ch < nb_channels; ch++) {
- const double sample = samples[idx_insample * nb_channels + ch];
- ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], fabs(sample));
- }
- }
-
- FIND_PEAK(ebur128->sample_peak, ebur128->sample_peaks, SAMPLES);
-
/* For integrated loudness, gating blocks are 400ms long with 75%
* overlap (see BS.1770-2 p5), so a re-computation is needed each 100ms
* (4800 samples at 48kHz). */
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 11/13] avfilter/f_ebur128: move variable declarations to usage site
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (8 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 10/13] avfilter/f_ebur128: lift sample " Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 12/13] avfilter/f_ebur128: move true peak calculation to DSP function Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 13/13] avfilter/x86/f_ebur128: implement AVX true peak calculation Niklas Haas
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
This is actually allowed by non-ancient versions of C.
---
libavfilter/f_ebur128.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 4051b1ea95..1fb7129271 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -652,7 +652,7 @@ void ff_ebur128_filter_channels_c(const EBUR128DSPContext *dsp,
static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
{
- int i, ch, idx_insample, ret;
+ int ret;
AVFilterContext *ctx = inlink->dst;
EBUR128Context *ebur128 = ctx->priv;
const EBUR128DSPContext *dsp = &ebur128->dsp;
@@ -705,7 +705,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
}
- for (idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
+ for (int idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
const int bin_id_400 = ebur128->i400.cache_pos++;
const int bin_id_3000 = ebur128->i3000.cache_pos++;
@@ -741,7 +741,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
#define COMPUTE_LOUDNESS(m, time) do { \
if (ebur128->i##time.filled) { \
/* weighting sum of the last <time> ms */ \
- for (ch = 0; ch < nb_channels; ch++) \
+ for (int ch = 0; ch < nb_channels; ch++) \
power_##time += ebur128->ch_weighting[ch] * ebur128->i##time.sum[ch]; \
power_##time /= I##time##_BINS(inlink->sample_rate); \
} \
@@ -762,7 +762,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
/* compute integrated loudness by summing the histogram values
* above the relative threshold */
- for (i = gate_hist_pos; i < HIST_SIZE; i++) {
+ for (int i = gate_hist_pos; i < HIST_SIZE; i++) {
const unsigned nb_v = ebur128->i400.histogram[i].count;
nb_integrated += nb_v;
integrated_sum += nb_v * ebur128->i400.histogram[i].energy;
@@ -788,7 +788,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
int gate_hist_pos = gate_update(&ebur128->i3000, power_3000,
loudness_3000, LRA_GATE_THRES);
- for (i = gate_hist_pos; i < HIST_SIZE; i++)
+ for (int i = gate_hist_pos; i < HIST_SIZE; i++)
nb_powers += ebur128->i3000.histogram[i].count;
if (nb_powers) {
uint64_t n, nb_pow;
@@ -796,7 +796,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
/* get lower loudness to consider */
n = 0;
nb_pow = LRA_LOWER_PRC * nb_powers * 0.01 + 0.5;
- for (i = gate_hist_pos; i < HIST_SIZE; i++) {
+ for (int i = gate_hist_pos; i < HIST_SIZE; i++) {
n += ebur128->i3000.histogram[i].count;
if (n >= nb_pow) {
ebur128->lra_low = ebur128->i3000.histogram[i].loudness;
@@ -807,7 +807,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
/* get higher loudness to consider */
n = nb_powers;
nb_pow = LRA_HIGHER_PRC * nb_powers * 0.01 + 0.5;
- for (i = HIST_SIZE - 1; i >= 0; i--) {
+ for (int i = HIST_SIZE - 1; i >= 0; i--) {
n -= FFMIN(n, ebur128->i3000.histogram[i].count);
if (n < nb_pow) {
ebur128->lra_high = ebur128->i3000.histogram[i].loudness;
@@ -909,7 +909,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) { \
double max_peak = 0.0; \
char key[64]; \
- for (ch = 0; ch < nb_channels; ch++) { \
+ for (int ch = 0; ch < nb_channels; ch++) { \
snprintf(key, sizeof(key), \
META_PREFIX AV_STRINGIFY(name) "_peaks_ch%d", ch); \
max_peak = fmax(max_peak, ebur128->name##_peaks[ch]); \
@@ -948,7 +948,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
#define PRINT_PEAKS(str, sp, ptype) do { \
if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) { \
av_log(ctx, ebur128->loglevel, " " str ":"); \
- for (ch = 0; ch < nb_channels; ch++) \
+ for (int ch = 0; ch < nb_channels; ch++) \
av_log(ctx, ebur128->loglevel, " %5.1f", DBFS(sp[ch])); \
av_log(ctx, ebur128->loglevel, " dBFS"); \
} \
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 12/13] avfilter/f_ebur128: move true peak calculation to DSP function
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (9 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 11/13] avfilter/f_ebur128: move variable declarations to usage site Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 13/13] avfilter/x86/f_ebur128: implement AVX true peak calculation Niklas Haas
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
---
libavfilter/f_ebur128.c | 43 ++++++++++++++++++++++++++---------------
libavfilter/f_ebur128.h | 4 ++++
2 files changed, 31 insertions(+), 16 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 1fb7129271..0adc89c823 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -580,6 +580,8 @@ static av_cold int init(AVFilterContext *ctx)
av_log(ctx, AV_LOG_VERBOSE, "EBU +%d scale\n", ebur128->meter);
ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;
+ ebur128->dsp.true_peak = ff_ebur128_true_peak_c;
+
#if ARCH_X86
ff_ebur128_init_x86(&ebur128->dsp);
#endif
@@ -650,6 +652,28 @@ void ff_ebur128_filter_channels_c(const EBUR128DSPContext *dsp,
}
}
+double ff_ebur128_true_peak_c(double *restrict true_peaks,
+ double *restrict true_peaks_per_frame,
+ const int nb_channels, const double *samples,
+ const int nb_samples)
+{
+ double maxpeak = 0.0;
+ for (int ch = 0; ch < nb_channels; ch++) {
+ double tp = true_peaks[ch];
+ double tppf = 0.0f;
+ for (int i = 0; i < nb_samples; i++) {
+ const double sample = fabs(samples[i * nb_channels]);
+ tp = FFMAX(tp, sample);
+ tppf = FFMAX(tppf, sample);
+ }
+ maxpeak = FFMAX(maxpeak, tp);
+ true_peaks[ch] = tp;
+ true_peaks_per_frame[ch] = tppf;
+ }
+
+ return maxpeak;
+}
+
static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
{
int ret;
@@ -669,21 +693,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
if (ret < 0)
return ret;
- double maxpeak = 0.0;
- for (int ch = 0; ch < nb_channels; ch++) {
- double tp = ebur128->true_peaks[ch];
- double tppf = 0.0;
- for (int i = 0; i < ret; i++) {
- const double sample = fabs(swr_samples[i * nb_channels]);
- tp = FFMAX(tp, sample);
- tppf = FFMAX(tppf, sample);
- }
- maxpeak = FFMAX(maxpeak, tp);
- ebur128->true_peaks[ch] = tp;
- ebur128->true_peaks_per_frame[ch] = tppf;
- }
-
- ebur128->true_peak = DBFS(maxpeak);
+ ebur128->true_peak = DBFS(dsp->true_peak(ebur128->true_peaks,
+ ebur128->true_peaks_per_frame,
+ nb_channels, swr_samples, ret));
}
#endif
@@ -704,7 +716,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
ebur128->sample_peak = DBFS(maxpeak);
}
-
for (int idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
const int bin_id_400 = ebur128->i400.cache_pos++;
const int bin_id_3000 = ebur128->i3000.cache_pos++;
diff --git a/libavfilter/f_ebur128.h b/libavfilter/f_ebur128.h
index decf3917af..78238e4bef 100644
--- a/libavfilter/f_ebur128.h
+++ b/libavfilter/f_ebur128.h
@@ -42,6 +42,8 @@ typedef struct EBUR128DSPContext {
double *cache_400, double *cache_3000,
double *sum_400, double *sum_3000,
int nb_channels);
+
+ double (*true_peak)(double *, double *, int, const double *, int);
} EBUR128DSPContext;
static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct layout mismatch");
@@ -52,3 +54,5 @@ void ff_ebur128_init_x86(EBUR128DSPContext *dsp);
void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
double *, double *, double *, double *, int);
+
+double ff_ebur128_true_peak_c(double *, double *, int, const double *, int);
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 13/13] avfilter/x86/f_ebur128: implement AVX true peak calculation
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
` (10 preceding siblings ...)
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 12/13] avfilter/f_ebur128: move true peak calculation to DSP function Niklas Haas
@ 2025-06-13 16:37 ` Niklas Haas
11 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-13 16:37 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
Stereo only, for simplicity. Slightly faster than the C code.
---
libavfilter/f_ebur128.c | 8 +++-----
libavfilter/f_ebur128.h | 2 +-
libavfilter/x86/f_ebur128.asm | 25 +++++++++++++++++++++++++
libavfilter/x86/f_ebur128_init.c | 9 +++++++--
4 files changed, 36 insertions(+), 8 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 0adc89c823..c64f6ed032 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -502,6 +502,9 @@ static int config_audio_output(AVFilterLink *outlink)
return AVERROR(ENOMEM);
}
+#if ARCH_X86
+ ff_ebur128_init_x86(&ebur128->dsp, nb_channels);
+#endif
return 0;
}
@@ -581,11 +584,6 @@ static av_cold int init(AVFilterContext *ctx)
ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;
ebur128->dsp.true_peak = ff_ebur128_true_peak_c;
-
-#if ARCH_X86
- ff_ebur128_init_x86(&ebur128->dsp);
-#endif
-
return 0;
}
diff --git a/libavfilter/f_ebur128.h b/libavfilter/f_ebur128.h
index 78238e4bef..9d167a3d4a 100644
--- a/libavfilter/f_ebur128.h
+++ b/libavfilter/f_ebur128.h
@@ -50,7 +50,7 @@ static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct l
static_assert(offsetof(EBUR128DSPContext, rlb) == 5 * sizeof(double), "struct layout mismatch");
static_assert(offsetof(EBUR128DSPContext, y) == 10 * sizeof(double), "struct layout mismatch");
-void ff_ebur128_init_x86(EBUR128DSPContext *dsp);
+void ff_ebur128_init_x86(EBUR128DSPContext *dsp, int nb_channels);
void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
double *, double *, double *, double *, int);
diff --git a/libavfilter/x86/f_ebur128.asm b/libavfilter/x86/f_ebur128.asm
index 28add77f70..99502ebe9d 100644
--- a/libavfilter/x86/f_ebur128.asm
+++ b/libavfilter/x86/f_ebur128.asm
@@ -40,6 +40,10 @@ struc DSP
.z resq 1
endstruc
+SECTION_RODATA
+
+abs_mask: dq 0x7FFFFFFFFFFFFFFF
+
SECTION .text
%macro MOVNQ 3 ; num, dst, src
@@ -140,3 +144,24 @@ cglobal ebur128_filter_channels, 7, 9, 14, dsp, samples, cache400, cache3000, su
test channelsd, channelsd
jnz .loop
RET
+
+cglobal ebur128_true_peak_stereo, 5, 6, 4, tp, tppf, channels, samples, nb_samples
+ vpbroadcastq m4, [abs_mask]
+ pxor m0, m0 ; maxpeak
+ movupd m1, [tpq] ; tp
+ pxor m2, m2 ; tppf
+.inner:
+ movupd m3, [samplesq]
+ add samplesq, 16
+ pand m3, m4
+ maxpd m1, m3
+ maxpd m2, m3
+ dec nb_samplesd
+ jg .inner
+ movupd [tpq], m1
+ movupd [tppfq], m2
+ maxpd m0, m1
+ shufpd m1, m0, m0, 1
+ maxpd m0, m1
+ movq rax, m0
+ RET
diff --git a/libavfilter/x86/f_ebur128_init.c b/libavfilter/x86/f_ebur128_init.c
index c5edfde03a..324faf7c31 100644
--- a/libavfilter/x86/f_ebur128_init.c
+++ b/libavfilter/x86/f_ebur128_init.c
@@ -26,10 +26,15 @@
void ff_ebur128_filter_channels_avx(const EBUR128DSPContext *, const double *,
double *, double *, double *, double *, int);
-av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp)
+double ff_ebur128_true_peak_stereo_avx(double *, double *, int, const double *, int);
+
+av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp, int nb_channels)
{
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_AVX(cpu_flags))
+ if (EXTERNAL_AVX(cpu_flags)) {
dsp->filter_channels = ff_ebur128_filter_channels_avx;
+ if (nb_channels == 2)
+ dsp->true_peak = ff_ebur128_true_peak_stereo_avx;
+ }
}
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation Niklas Haas
@ 2025-06-14 1:12 ` Michael Niedermayer
2025-06-16 11:19 ` Niklas Haas
2025-06-14 1:20 ` James Almer
1 sibling, 1 reply; 18+ messages in thread
From: Michael Niedermayer @ 2025-06-14 1:12 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 2703 bytes --]
Hi Niklas
On Fri, Jun 13, 2025 at 06:37:50PM +0200, Niklas Haas wrote:
> From: Niklas Haas <git@haasn.dev>
>
> Processes two channels in parallel, using 128-bit XMM registers.
>
> In theory, we could go up to YMM registers to process 4 channels, but this is
> not a gain except for relatively high channel counts (e.g. 7.1), and also
> complicates the sample load/store operations considerably.
>
> I decided to only add an AVX variant, since the C code is not substantially
> slower enough to justify a separate function just for ancient CPUs.
> ---
> libavfilter/f_ebur128.c | 15 ++--
> libavfilter/f_ebur128.h | 16 ++++
> libavfilter/x86/Makefile | 2 +
> libavfilter/x86/f_ebur128.asm | 142 +++++++++++++++++++++++++++++++
> libavfilter/x86/f_ebur128_init.c | 35 ++++++++
> 5 files changed, 205 insertions(+), 5 deletions(-)
> create mode 100644 libavfilter/x86/f_ebur128.asm
> create mode 100644 libavfilter/x86/f_ebur128_init.c
breaks x86-32
src/libavfilter/x86/f_ebur128.asm:116: error: symbol `m8' undefined
src/libavfilter/x86/f_ebur128.asm:118: error: symbol `m9' undefined
src/libavfilter/x86/f_ebur128.asm:119: error: symbol `m10' undefined
src/libavfilter/x86/f_ebur128.asm:120: error: symbol `m11' undefined
src/libavfilter/x86/f_ebur128.asm:121: error: symbol `m12' undefined
src/libavfilter/x86/f_ebur128.asm:122: error: symbol `m13' undefined
src/libavfilter/x86/f_ebur128.asm:124: error: symbol `r7q' undefined
src/libavfilter/x86/f_ebur128.asm:125: error: symbol `r8q' undefined
src/libavfilter/x86/f_ebur128.asm:132: error: symbol `r7q' undefined
src/libavfilter/x86/f_ebur128.asm:91: ... from macro `filter_channels' defined here
src/libavfilter/x86/f_ebur128.asm:57: ... from macro `FILTER' defined here
src//libavutil/x86/x86inc.asm:1654: ... from macro `movsd' defined here
src//libavutil/x86/x86inc.asm:1501: ... from macro `RUN_AVX_INSTR' defined here
src/libavfilter/x86/f_ebur128.asm:132: error: symbol `r7q' undefined
src/libavfilter/x86/f_ebur128.asm:91: ... from macro `filter_channels' defined here
src/libavfilter/x86/f_ebur128.asm:58: ... from macro `FILTER' defined here
src//libavutil/x86/x86inc.asm:1654: ... from macro `movsd' defined here
src//libavutil/x86/x86inc.asm:1501: ... from macro `RUN_AVX_INSTR' defined here
src/libavfilter/x86/f_ebur128.asm:132: error: symbol `r7q' undefined
...
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Concerning the gods, I have no means of knowing whether they exist or not
or of what sort they may be, because of the obscurity of the subject, and
the brevity of human life -- Protagoras
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation Niklas Haas
2025-06-14 1:12 ` Michael Niedermayer
@ 2025-06-14 1:20 ` James Almer
2025-06-16 11:15 ` Niklas Haas
1 sibling, 1 reply; 18+ messages in thread
From: James Almer @ 2025-06-14 1:20 UTC (permalink / raw)
To: ffmpeg-devel
[-- Attachment #1.1.1: Type: text/plain, Size: 11602 bytes --]
On 6/13/2025 1:37 PM, Niklas Haas wrote:
> From: Niklas Haas <git@haasn.dev>
>
> Processes two channels in parallel, using 128-bit XMM registers.
>
> In theory, we could go up to YMM registers to process 4 channels, but this is
> not a gain except for relatively high channel counts (e.g. 7.1), and also
> complicates the sample load/store operations considerably.
>
> I decided to only add an AVX variant, since the C code is not substantially
> slower enough to justify a separate function just for ancient CPUs.
> ---
> libavfilter/f_ebur128.c | 15 ++--
> libavfilter/f_ebur128.h | 16 ++++
> libavfilter/x86/Makefile | 2 +
> libavfilter/x86/f_ebur128.asm | 142 +++++++++++++++++++++++++++++++
> libavfilter/x86/f_ebur128_init.c | 35 ++++++++
> 5 files changed, 205 insertions(+), 5 deletions(-)
> create mode 100644 libavfilter/x86/f_ebur128.asm
> create mode 100644 libavfilter/x86/f_ebur128_init.c
>
> diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
> index b9e210c05a..2d94cefce7 100644
> --- a/libavfilter/f_ebur128.c
> +++ b/libavfilter/f_ebur128.c
> @@ -579,6 +579,11 @@ static av_cold int init(AVFilterContext *ctx)
> /* summary */
> av_log(ctx, AV_LOG_VERBOSE, "EBU +%d scale\n", ebur128->meter);
>
> + ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;
> +#if ARCH_X86
> + ff_ebur128_init_x86(&ebur128->dsp);
> +#endif
> +
> return 0;
> }
>
> @@ -692,11 +697,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
> MOVE_TO_NEXT_CACHED_ENTRY(400);
> MOVE_TO_NEXT_CACHED_ENTRY(3000);
>
> - ff_ebur128_filter_channels_c(dsp, &samples[idx_insample * nb_channels],
> - &ebur128->i400.cache[bin_id_400 * nb_channels],
> - &ebur128->i3000.cache[bin_id_3000 * nb_channels],
> - ebur128->i400.sum, ebur128->i3000.sum,
> - nb_channels);
> + dsp->filter_channels(dsp, &samples[idx_insample * nb_channels],
> + &ebur128->i400.cache[bin_id_400 * nb_channels],
> + &ebur128->i3000.cache[bin_id_3000 * nb_channels],
> + ebur128->i400.sum, ebur128->i3000.sum,
> + nb_channels);
>
> #define FIND_PEAK(global, sp, ptype) do { \
> int ch; \
> diff --git a/libavfilter/f_ebur128.h b/libavfilter/f_ebur128.h
> index dfc2e7c75b..decf3917af 100644
> --- a/libavfilter/f_ebur128.h
> +++ b/libavfilter/f_ebur128.h
> @@ -19,6 +19,9 @@
> * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> */
>
> +#include <assert.h>
> +#include <stddef.h>
> +
> typedef struct EBUR128Biquad {
> double b0, b1, b2;
> double a1, a2;
> @@ -32,7 +35,20 @@ typedef struct EBUR128DSPContext {
> /* Cache of 3 samples for each channel */
> double *y; /* after pre-filter */
> double *z; /* after RLB-filter */
> +
> + /* DSP functions */
> + void (*filter_channels)(const struct EBUR128DSPContext *dsp,
> + const double *samples,
> + double *cache_400, double *cache_3000,
> + double *sum_400, double *sum_3000,
> + int nb_channels);
> } EBUR128DSPContext;
>
> +static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct layout mismatch");
> +static_assert(offsetof(EBUR128DSPContext, rlb) == 5 * sizeof(double), "struct layout mismatch");
> +static_assert(offsetof(EBUR128DSPContext, y) == 10 * sizeof(double), "struct layout mismatch");
> +
> +void ff_ebur128_init_x86(EBUR128DSPContext *dsp);
> +
> void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
> double *, double *, double *, double *, int);
> diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
> index 0d9a28a935..0efe3f8d2c 100644
> --- a/libavfilter/x86/Makefile
> +++ b/libavfilter/x86/Makefile
> @@ -7,6 +7,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
> OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
> OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
> OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o
> +OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128_init.o
> OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o
> OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
> OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o
> @@ -52,6 +53,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
> X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
> X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
> X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
> +X86ASM-OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128.o
> X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
> X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o
> X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
> diff --git a/libavfilter/x86/f_ebur128.asm b/libavfilter/x86/f_ebur128.asm
> new file mode 100644
> index 0000000000..28add77f70
> --- /dev/null
> +++ b/libavfilter/x86/f_ebur128.asm
> @@ -0,0 +1,142 @@
> +;*****************************************************************************
> +;* x86-optimized functions for overlay filter
> +;*
> +;* Copyright (C) 2018 Paul B Mahol
> +;* Copyright (C) 2018 Henrik Gramner
> +;*
> +;* This file is part of FFmpeg.
> +;*
> +;* FFmpeg is free software; you can redistribute it and/or
> +;* modify it under the terms of the GNU Lesser General Public
> +;* License as published by the Free Software Foundation; either
> +;* version 2.1 of the License, or (at your option) any later version.
> +;*
> +;* FFmpeg is distributed in the hope that it will be useful,
> +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +;* Lesser General Public License for more details.
> +;*
> +;* You should have received a copy of the GNU Lesser General Public
> +;* License along with FFmpeg; if not, write to the Free Software
> +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +;*****************************************************************************
> +
> +%include "libavutil/x86/x86util.asm"
> +
> +cextern ebur128_filter_channels_c
> +
> +struc Biquad
> + .b0 resq 1
> + .b1 resq 1
> + .b2 resq 1
> + .a1 resq 1
> + .a2 resq 1
> +endstruc
> +
> +struc DSP
> + .pre resq 5
> + .rlb resq 5
> + .y resq 1
> + .z resq 1
> +endstruc
> +
> +SECTION .text
> +
> +%macro MOVNQ 3 ; num, dst, src
> +%if %1 == 1
> + movsd %2, %3
> +%else
> + movupd %2, %3
> +%endif
> +%endmacro
> +
> +%macro FILTER 11 ; y0, y1, y2, x, b0, b1, b2, a1, a2, samples, num_channels
> + ; Y[0] := b0 * X + Y1
> + ; Y[1] := b1 * X + Y2 - a1 * Y[0]
> + ; Y[2] := b2 * X - a2 * Y[0]
> + movsd %1, [%10 + 8]
> + movsd %3, [%10 + 16]
> +%if %11 > 1
> + movhpd %1, [%10 + 32]
> + movhpd %3, [%10 + 40]
> +%endif
> +
> + mulpd %2, %5, %4
> + addpd %1, %2
> +
> + mulpd %2, %8, %1
> + subpd %3, %2
> + mulpd %2, %6, %4
> + addpd %2, %3
> +
> + mulpd %3, %7, %4
> + mulpd %4, %9, %1
> + subpd %3, %4
> +
> + movlpd [%10 + 0], %1
> + movlpd [%10 + 8], %2
> + movlpd [%10 + 16], %3
movsd?
> +%if %11 > 1
> + movhpd [%10 + 24], %1
> + movhpd [%10 + 32], %2
> + movhpd [%10 + 40], %3
> +%endif
> + add %10, 24 * %11
> +%endmacro
> +
> +%macro filter_channels 1 ; num_channels
> + MOVNQ %1, m3, [samplesq]
> + add samplesq, 8 * %1
> +
> + FILTER m0, m1, m2, m3, m4, m5, m6, m7, m8, r7q, %1
> + FILTER m3, m1, m2, m0, m9, m10, m11, m12, m13, r8q, %1
> +
> + ; update sum and cache
> + mulpd m3, m3
> + subpd m0, m3, [cache400q]
> + subpd m1, m3, [cache3000q]
> + MOVNQ %1, [cache400q], m3
> + MOVNQ %1, [cache3000q], m3
> + add cache400q, 8 * %1
> + add cache3000q, 8 * %1
> + addpd m0, [sum400q]
> + addpd m1, [sum3000q]
> + MOVNQ %1, [sum400q], m0
> + MOVNQ %1, [sum3000q], m1
> + add sum400q, 8 * %1
> + add sum3000q, 8 * %1
> +%endmacro
> +
> +INIT_XMM avx
> +cglobal ebur128_filter_channels, 7, 9, 14, dsp, samples, cache400, cache3000, sum400, sum3000, channels
> + vpbroadcastq m4, [dspq + DSP.pre + Biquad.b0]
vpbroadcastq is AVX2, not AVX. Use movddup instead.
> + vpbroadcastq m5, [dspq + DSP.pre + Biquad.b1]
> + vpbroadcastq m6, [dspq + DSP.pre + Biquad.b2]
> + vpbroadcastq m7, [dspq + DSP.pre + Biquad.a1]
> + vpbroadcastq m8, [dspq + DSP.pre + Biquad.a2]
> +
> + vpbroadcastq m9, [dspq + DSP.rlb + Biquad.b0]
> + vpbroadcastq m10, [dspq + DSP.rlb + Biquad.b1]
> + vpbroadcastq m11, [dspq + DSP.rlb + Biquad.b2]
> + vpbroadcastq m12, [dspq + DSP.rlb + Biquad.a1]
> + vpbroadcastq m13, [dspq + DSP.rlb + Biquad.a2]
> +
> + mov r7q, [dspq + DSP.y]
> + mov r8q, [dspq + DSP.z]
> +
> + ; handle odd channel count
> + test channelsd, 1
> + jnz .tail
> +
> +.loop:
> + filter_channels 2
> + sub channelsd, 2
> + jg .loop
> + RET
> +
> +.tail:
> + filter_channels 1
> + dec channelsd
> + test channelsd, channelsd
> + jnz .loop
> + RET
> diff --git a/libavfilter/x86/f_ebur128_init.c b/libavfilter/x86/f_ebur128_init.c
> new file mode 100644
> index 0000000000..c5edfde03a
> --- /dev/null
> +++ b/libavfilter/x86/f_ebur128_init.c
> @@ -0,0 +1,35 @@
> +/*
> + * Copyright (c) 2018 Paul B Mahol
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/attributes.h"
> +#include "libavutil/cpu.h"
> +#include "libavutil/x86/cpu.h"
> +#include "libavfilter/f_ebur128.h"
> +
> +void ff_ebur128_filter_channels_avx(const EBUR128DSPContext *, const double *,
> + double *, double *, double *, double *, int);
> +
> +av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp)
> +{
> + int cpu_flags = av_get_cpu_flags();
> +
> + if (EXTERNAL_AVX(cpu_flags))
> + dsp->filter_channels = ff_ebur128_filter_channels_avx;
> +}
[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 495 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation
2025-06-14 1:20 ` James Almer
@ 2025-06-16 11:15 ` Niklas Haas
0 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-16 11:15 UTC (permalink / raw)
To: ffmpeg-devel
On Fri, 13 Jun 2025 22:20:03 -0300 James Almer <jamrial-at-gmail.com@ffmpeg.org> wrote:
> On 6/13/2025 1:37 PM, Niklas Haas wrote:
> > From: Niklas Haas <git@haasn.dev>
> >
> > Processes two channels in parallel, using 128-bit XMM registers.
> >
> > In theory, we could go up to YMM registers to process 4 channels, but this is
> > not a gain except for relatively high channel counts (e.g. 7.1), and also
> > complicates the sample load/store operations considerably.
> >
> > I decided to only add an AVX variant, since the C code is not substantially
> > slower enough to justify a separate function just for ancient CPUs.
> > ---
> > libavfilter/f_ebur128.c | 15 ++--
> > libavfilter/f_ebur128.h | 16 ++++
> > libavfilter/x86/Makefile | 2 +
> > libavfilter/x86/f_ebur128.asm | 142 +++++++++++++++++++++++++++++++
> > libavfilter/x86/f_ebur128_init.c | 35 ++++++++
> > 5 files changed, 205 insertions(+), 5 deletions(-)
> > create mode 100644 libavfilter/x86/f_ebur128.asm
> > create mode 100644 libavfilter/x86/f_ebur128_init.c
> >
> > diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
> > index b9e210c05a..2d94cefce7 100644
> > --- a/libavfilter/f_ebur128.c
> > +++ b/libavfilter/f_ebur128.c
> > @@ -579,6 +579,11 @@ static av_cold int init(AVFilterContext *ctx)
> > /* summary */
> > av_log(ctx, AV_LOG_VERBOSE, "EBU +%d scale\n", ebur128->meter);
> >
> > + ebur128->dsp.filter_channels = ff_ebur128_filter_channels_c;
> > +#if ARCH_X86
> > + ff_ebur128_init_x86(&ebur128->dsp);
> > +#endif
> > +
> > return 0;
> > }
> >
> > @@ -692,11 +697,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
> > MOVE_TO_NEXT_CACHED_ENTRY(400);
> > MOVE_TO_NEXT_CACHED_ENTRY(3000);
> >
> > - ff_ebur128_filter_channels_c(dsp, &samples[idx_insample * nb_channels],
> > - &ebur128->i400.cache[bin_id_400 * nb_channels],
> > - &ebur128->i3000.cache[bin_id_3000 * nb_channels],
> > - ebur128->i400.sum, ebur128->i3000.sum,
> > - nb_channels);
> > + dsp->filter_channels(dsp, &samples[idx_insample * nb_channels],
> > + &ebur128->i400.cache[bin_id_400 * nb_channels],
> > + &ebur128->i3000.cache[bin_id_3000 * nb_channels],
> > + ebur128->i400.sum, ebur128->i3000.sum,
> > + nb_channels);
> >
> > #define FIND_PEAK(global, sp, ptype) do { \
> > int ch; \
> > diff --git a/libavfilter/f_ebur128.h b/libavfilter/f_ebur128.h
> > index dfc2e7c75b..decf3917af 100644
> > --- a/libavfilter/f_ebur128.h
> > +++ b/libavfilter/f_ebur128.h
> > @@ -19,6 +19,9 @@
> > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > */
> >
> > +#include <assert.h>
> > +#include <stddef.h>
> > +
> > typedef struct EBUR128Biquad {
> > double b0, b1, b2;
> > double a1, a2;
> > @@ -32,7 +35,20 @@ typedef struct EBUR128DSPContext {
> > /* Cache of 3 samples for each channel */
> > double *y; /* after pre-filter */
> > double *z; /* after RLB-filter */
> > +
> > + /* DSP functions */
> > + void (*filter_channels)(const struct EBUR128DSPContext *dsp,
> > + const double *samples,
> > + double *cache_400, double *cache_3000,
> > + double *sum_400, double *sum_3000,
> > + int nb_channels);
> > } EBUR128DSPContext;
> >
> > +static_assert(offsetof(EBUR128DSPContext, pre) == 0, "struct layout mismatch");
> > +static_assert(offsetof(EBUR128DSPContext, rlb) == 5 * sizeof(double), "struct layout mismatch");
> > +static_assert(offsetof(EBUR128DSPContext, y) == 10 * sizeof(double), "struct layout mismatch");
> > +
> > +void ff_ebur128_init_x86(EBUR128DSPContext *dsp);
> > +
> > void ff_ebur128_filter_channels_c(const EBUR128DSPContext *, const double *,
> > double *, double *, double *, double *, int);
> > diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
> > index 0d9a28a935..0efe3f8d2c 100644
> > --- a/libavfilter/x86/Makefile
> > +++ b/libavfilter/x86/Makefile
> > @@ -7,6 +7,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend_init.o
> > OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif_init.o
> > OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp_init.o
> > OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution_init.o
> > +OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128_init.o
> > OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq_init.o
> > OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp_init.o
> > OBJS-$(CONFIG_GBLUR_FILTER) += x86/vf_gblur_init.o
> > @@ -52,6 +53,7 @@ X86ASM-OBJS-$(CONFIG_BLEND_FILTER) += x86/vf_blend.o
> > X86ASM-OBJS-$(CONFIG_BWDIF_FILTER) += x86/vf_bwdif.o
> > X86ASM-OBJS-$(CONFIG_COLORSPACE_FILTER) += x86/colorspacedsp.o
> > X86ASM-OBJS-$(CONFIG_CONVOLUTION_FILTER) += x86/vf_convolution.o
> > +X86ASM-OBJS-$(CONFIG_EBUR128_FILTER) += x86/f_ebur128.o
> > X86ASM-OBJS-$(CONFIG_EQ_FILTER) += x86/vf_eq.o
> > X86ASM-OBJS-$(CONFIG_FRAMERATE_FILTER) += x86/vf_framerate.o
> > X86ASM-OBJS-$(CONFIG_FSPP_FILTER) += x86/vf_fspp.o
> > diff --git a/libavfilter/x86/f_ebur128.asm b/libavfilter/x86/f_ebur128.asm
> > new file mode 100644
> > index 0000000000..28add77f70
> > --- /dev/null
> > +++ b/libavfilter/x86/f_ebur128.asm
> > @@ -0,0 +1,142 @@
> > +;*****************************************************************************
> > +;* x86-optimized functions for overlay filter
> > +;*
> > +;* Copyright (C) 2018 Paul B Mahol
> > +;* Copyright (C) 2018 Henrik Gramner
> > +;*
> > +;* This file is part of FFmpeg.
> > +;*
> > +;* FFmpeg is free software; you can redistribute it and/or
> > +;* modify it under the terms of the GNU Lesser General Public
> > +;* License as published by the Free Software Foundation; either
> > +;* version 2.1 of the License, or (at your option) any later version.
> > +;*
> > +;* FFmpeg is distributed in the hope that it will be useful,
> > +;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > +;* Lesser General Public License for more details.
> > +;*
> > +;* You should have received a copy of the GNU Lesser General Public
> > +;* License along with FFmpeg; if not, write to the Free Software
> > +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > +;*****************************************************************************
> > +
> > +%include "libavutil/x86/x86util.asm"
> > +
> > +cextern ebur128_filter_channels_c
> > +
> > +struc Biquad
> > + .b0 resq 1
> > + .b1 resq 1
> > + .b2 resq 1
> > + .a1 resq 1
> > + .a2 resq 1
> > +endstruc
> > +
> > +struc DSP
> > + .pre resq 5
> > + .rlb resq 5
> > + .y resq 1
> > + .z resq 1
> > +endstruc
> > +
> > +SECTION .text
> > +
> > +%macro MOVNQ 3 ; num, dst, src
> > +%if %1 == 1
> > + movsd %2, %3
> > +%else
> > + movupd %2, %3
> > +%endif
> > +%endmacro
> > +
> > +%macro FILTER 11 ; y0, y1, y2, x, b0, b1, b2, a1, a2, samples, num_channels
> > + ; Y[0] := b0 * X + Y1
> > + ; Y[1] := b1 * X + Y2 - a1 * Y[0]
> > + ; Y[2] := b2 * X - a2 * Y[0]
> > + movsd %1, [%10 + 8]
> > + movsd %3, [%10 + 16]
> > +%if %11 > 1
> > + movhpd %1, [%10 + 32]
> > + movhpd %3, [%10 + 40]
> > +%endif
> > +
> > + mulpd %2, %5, %4
> > + addpd %1, %2
> > +
> > + mulpd %2, %8, %1
> > + subpd %3, %2
> > + mulpd %2, %6, %4
> > + addpd %2, %3
> > +
> > + mulpd %3, %7, %4
> > + mulpd %4, %9, %1
> > + subpd %3, %4
> > +
> > + movlpd [%10 + 0], %1
> > + movlpd [%10 + 8], %2
> > + movlpd [%10 + 16], %3
>
> movsd?
Changed.
>
> > +%if %11 > 1
> > + movhpd [%10 + 24], %1
> > + movhpd [%10 + 32], %2
> > + movhpd [%10 + 40], %3
> > +%endif
> > + add %10, 24 * %11
> > +%endmacro
> > +
> > +%macro filter_channels 1 ; num_channels
> > + MOVNQ %1, m3, [samplesq]
> > + add samplesq, 8 * %1
> > +
> > + FILTER m0, m1, m2, m3, m4, m5, m6, m7, m8, r7q, %1
> > + FILTER m3, m1, m2, m0, m9, m10, m11, m12, m13, r8q, %1
> > +
> > + ; update sum and cache
> > + mulpd m3, m3
> > + subpd m0, m3, [cache400q]
> > + subpd m1, m3, [cache3000q]
> > + MOVNQ %1, [cache400q], m3
> > + MOVNQ %1, [cache3000q], m3
> > + add cache400q, 8 * %1
> > + add cache3000q, 8 * %1
> > + addpd m0, [sum400q]
> > + addpd m1, [sum3000q]
> > + MOVNQ %1, [sum400q], m0
> > + MOVNQ %1, [sum3000q], m1
> > + add sum400q, 8 * %1
> > + add sum3000q, 8 * %1
> > +%endmacro
> > +
> > +INIT_XMM avx
> > +cglobal ebur128_filter_channels, 7, 9, 14, dsp, samples, cache400, cache3000, sum400, sum3000, channels
> > + vpbroadcastq m4, [dspq + DSP.pre + Biquad.b0]
>
> vpbroadcastq is AVX2, not AVX. Use movddup instead.
Fixed.
>
> > + vpbroadcastq m5, [dspq + DSP.pre + Biquad.b1]
> > + vpbroadcastq m6, [dspq + DSP.pre + Biquad.b2]
> > + vpbroadcastq m7, [dspq + DSP.pre + Biquad.a1]
> > + vpbroadcastq m8, [dspq + DSP.pre + Biquad.a2]
> > +
> > + vpbroadcastq m9, [dspq + DSP.rlb + Biquad.b0]
> > + vpbroadcastq m10, [dspq + DSP.rlb + Biquad.b1]
> > + vpbroadcastq m11, [dspq + DSP.rlb + Biquad.b2]
> > + vpbroadcastq m12, [dspq + DSP.rlb + Biquad.a1]
> > + vpbroadcastq m13, [dspq + DSP.rlb + Biquad.a2]
> > +
> > + mov r7q, [dspq + DSP.y]
> > + mov r8q, [dspq + DSP.z]
> > +
> > + ; handle odd channel count
> > + test channelsd, 1
> > + jnz .tail
> > +
> > +.loop:
> > + filter_channels 2
> > + sub channelsd, 2
> > + jg .loop
> > + RET
> > +
> > +.tail:
> > + filter_channels 1
> > + dec channelsd
> > + test channelsd, channelsd
> > + jnz .loop
> > + RET
> > diff --git a/libavfilter/x86/f_ebur128_init.c b/libavfilter/x86/f_ebur128_init.c
> > new file mode 100644
> > index 0000000000..c5edfde03a
> > --- /dev/null
> > +++ b/libavfilter/x86/f_ebur128_init.c
> > @@ -0,0 +1,35 @@
> > +/*
> > + * Copyright (c) 2018 Paul B Mahol
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + */
> > +
> > +#include "libavutil/attributes.h"
> > +#include "libavutil/cpu.h"
> > +#include "libavutil/x86/cpu.h"
> > +#include "libavfilter/f_ebur128.h"
> > +
> > +void ff_ebur128_filter_channels_avx(const EBUR128DSPContext *, const double *,
> > + double *, double *, double *, double *, int);
> > +
> > +av_cold void ff_ebur128_init_x86(EBUR128DSPContext *dsp)
> > +{
> > + int cpu_flags = av_get_cpu_flags();
> > +
> > + if (EXTERNAL_AVX(cpu_flags))
> > + dsp->filter_channels = ff_ebur128_filter_channels_avx;
> > +}
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation
2025-06-14 1:12 ` Michael Niedermayer
@ 2025-06-16 11:19 ` Niklas Haas
0 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-16 11:19 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Sat, 14 Jun 2025 03:12:22 +0200 Michael Niedermayer <michael@niedermayer.cc> wrote:
> Hi Niklas
>
> On Fri, Jun 13, 2025 at 06:37:50PM +0200, Niklas Haas wrote:
> > From: Niklas Haas <git@haasn.dev>
> >
> > Processes two channels in parallel, using 128-bit XMM registers.
> >
> > In theory, we could go up to YMM registers to process 4 channels, but this is
> > not a gain except for relatively high channel counts (e.g. 7.1), and also
> > complicates the sample load/store operations considerably.
> >
> > I decided to only add an AVX variant, since the C code is not substantially
> > slower enough to justify a separate function just for ancient CPUs.
> > ---
> > libavfilter/f_ebur128.c | 15 ++--
> > libavfilter/f_ebur128.h | 16 ++++
> > libavfilter/x86/Makefile | 2 +
> > libavfilter/x86/f_ebur128.asm | 142 +++++++++++++++++++++++++++++++
> > libavfilter/x86/f_ebur128_init.c | 35 ++++++++
> > 5 files changed, 205 insertions(+), 5 deletions(-)
> > create mode 100644 libavfilter/x86/f_ebur128.asm
> > create mode 100644 libavfilter/x86/f_ebur128_init.c
>
> breaks x86-32
Fixed by enabling this only on 64 bit x86.
>
> src/libavfilter/x86/f_ebur128.asm:116: error: symbol `m8' undefined
> src/libavfilter/x86/f_ebur128.asm:118: error: symbol `m9' undefined
> src/libavfilter/x86/f_ebur128.asm:119: error: symbol `m10' undefined
> src/libavfilter/x86/f_ebur128.asm:120: error: symbol `m11' undefined
> src/libavfilter/x86/f_ebur128.asm:121: error: symbol `m12' undefined
> src/libavfilter/x86/f_ebur128.asm:122: error: symbol `m13' undefined
> src/libavfilter/x86/f_ebur128.asm:124: error: symbol `r7q' undefined
> src/libavfilter/x86/f_ebur128.asm:125: error: symbol `r8q' undefined
> src/libavfilter/x86/f_ebur128.asm:132: error: symbol `r7q' undefined
> src/libavfilter/x86/f_ebur128.asm:91: ... from macro `filter_channels' defined here
> src/libavfilter/x86/f_ebur128.asm:57: ... from macro `FILTER' defined here
> src//libavutil/x86/x86inc.asm:1654: ... from macro `movsd' defined here
> src//libavutil/x86/x86inc.asm:1501: ... from macro `RUN_AVX_INSTR' defined here
> src/libavfilter/x86/f_ebur128.asm:132: error: symbol `r7q' undefined
> src/libavfilter/x86/f_ebur128.asm:91: ... from macro `filter_channels' defined here
> src/libavfilter/x86/f_ebur128.asm:58: ... from macro `FILTER' defined here
> src//libavutil/x86/x86inc.asm:1654: ... from macro `movsd' defined here
> src//libavutil/x86/x86inc.asm:1501: ... from macro `RUN_AVX_INSTR' defined here
> src/libavfilter/x86/f_ebur128.asm:132: error: symbol `r7q' undefined
> ...
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Concerning the gods, I have no means of knowing whether they exist or not
> or of what sort they may be, because of the obscurity of the subject, and
> the brevity of human life -- Protagoras
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH v2 11/13] avfilter/f_ebur128: move variable declarations to usage site
2025-06-16 11:19 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
@ 2025-06-16 11:19 ` Niklas Haas
0 siblings, 0 replies; 18+ messages in thread
From: Niklas Haas @ 2025-06-16 11:19 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Niklas Haas
From: Niklas Haas <git@haasn.dev>
This is actually allowed by non-ancient versions of C.
---
libavfilter/f_ebur128.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index 4051b1ea95..1fb7129271 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -652,7 +652,7 @@ void ff_ebur128_filter_channels_c(const EBUR128DSPContext *dsp,
static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
{
- int i, ch, idx_insample, ret;
+ int ret;
AVFilterContext *ctx = inlink->dst;
EBUR128Context *ebur128 = ctx->priv;
const EBUR128DSPContext *dsp = &ebur128->dsp;
@@ -705,7 +705,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
}
- for (idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
+ for (int idx_insample = ebur128->idx_insample; idx_insample < nb_samples; idx_insample++) {
const int bin_id_400 = ebur128->i400.cache_pos++;
const int bin_id_3000 = ebur128->i3000.cache_pos++;
@@ -741,7 +741,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
#define COMPUTE_LOUDNESS(m, time) do { \
if (ebur128->i##time.filled) { \
/* weighting sum of the last <time> ms */ \
- for (ch = 0; ch < nb_channels; ch++) \
+ for (int ch = 0; ch < nb_channels; ch++) \
power_##time += ebur128->ch_weighting[ch] * ebur128->i##time.sum[ch]; \
power_##time /= I##time##_BINS(inlink->sample_rate); \
} \
@@ -762,7 +762,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
/* compute integrated loudness by summing the histogram values
* above the relative threshold */
- for (i = gate_hist_pos; i < HIST_SIZE; i++) {
+ for (int i = gate_hist_pos; i < HIST_SIZE; i++) {
const unsigned nb_v = ebur128->i400.histogram[i].count;
nb_integrated += nb_v;
integrated_sum += nb_v * ebur128->i400.histogram[i].energy;
@@ -788,7 +788,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
int gate_hist_pos = gate_update(&ebur128->i3000, power_3000,
loudness_3000, LRA_GATE_THRES);
- for (i = gate_hist_pos; i < HIST_SIZE; i++)
+ for (int i = gate_hist_pos; i < HIST_SIZE; i++)
nb_powers += ebur128->i3000.histogram[i].count;
if (nb_powers) {
uint64_t n, nb_pow;
@@ -796,7 +796,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
/* get lower loudness to consider */
n = 0;
nb_pow = LRA_LOWER_PRC * nb_powers * 0.01 + 0.5;
- for (i = gate_hist_pos; i < HIST_SIZE; i++) {
+ for (int i = gate_hist_pos; i < HIST_SIZE; i++) {
n += ebur128->i3000.histogram[i].count;
if (n >= nb_pow) {
ebur128->lra_low = ebur128->i3000.histogram[i].loudness;
@@ -807,7 +807,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
/* get higher loudness to consider */
n = nb_powers;
nb_pow = LRA_HIGHER_PRC * nb_powers * 0.01 + 0.5;
- for (i = HIST_SIZE - 1; i >= 0; i--) {
+ for (int i = HIST_SIZE - 1; i >= 0; i--) {
n -= FFMIN(n, ebur128->i3000.histogram[i].count);
if (n < nb_pow) {
ebur128->lra_high = ebur128->i3000.histogram[i].loudness;
@@ -909,7 +909,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) { \
double max_peak = 0.0; \
char key[64]; \
- for (ch = 0; ch < nb_channels; ch++) { \
+ for (int ch = 0; ch < nb_channels; ch++) { \
snprintf(key, sizeof(key), \
META_PREFIX AV_STRINGIFY(name) "_peaks_ch%d", ch); \
max_peak = fmax(max_peak, ebur128->name##_peaks[ch]); \
@@ -948,7 +948,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
#define PRINT_PEAKS(str, sp, ptype) do { \
if (ebur128->peak_mode & PEAK_MODE_ ## ptype ## _PEAKS) { \
av_log(ctx, ebur128->loglevel, " " str ":"); \
- for (ch = 0; ch < nb_channels; ch++) \
+ for (int ch = 0; ch < nb_channels; ch++) \
av_log(ctx, ebur128->loglevel, " %5.1f", DBFS(sp[ch])); \
av_log(ctx, ebur128->loglevel, " dBFS"); \
} \
--
2.49.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2025-06-16 11:21 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-06-13 16:37 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 02/13] avfilter/f_ebur128: simplify sample cache array Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 03/13] avfilter/f_ebur128: use structs for biquad weights Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 04/13] avfilter/f_ebur128: use a single packed array for the integrator cache Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 05/13] avfilter/f_ebur128: move weights and cache to EBUR128DSPContext Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 06/13] avfilter/f_ebur128: split off C implementation to separate function Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 07/13] avfilter/x86/f_ebur128: add x86 AVX implementation Niklas Haas
2025-06-14 1:12 ` Michael Niedermayer
2025-06-16 11:19 ` Niklas Haas
2025-06-14 1:20 ` James Almer
2025-06-16 11:15 ` Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 08/13] avfilter/f_ebur128: remove pointless macro Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 09/13] avfilter/f_ebur128: move true peak calculation out of main loop Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 10/13] avfilter/f_ebur128: lift sample " Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 11/13] avfilter/f_ebur128: move variable declarations to usage site Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 12/13] avfilter/f_ebur128: move true peak calculation to DSP function Niklas Haas
2025-06-13 16:37 ` [FFmpeg-devel] [PATCH v2 13/13] avfilter/x86/f_ebur128: implement AVX true peak calculation Niklas Haas
2025-06-16 11:19 [FFmpeg-devel] [PATCH v2 01/13] avfilter/f_ebur128: use transformed direct form II Niklas Haas
2025-06-16 11:19 ` [FFmpeg-devel] [PATCH v2 11/13] avfilter/f_ebur128: move variable declarations to usage site Niklas Haas
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git