* [FFmpeg-devel] [PATCH 2/2] checkasm/flacdsp: sanitize lpc arguments
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
@ 2024-05-11 19:46 ` James Almer
2024-05-11 20:31 ` [FFmpeg-devel] [PATCH 2/2 v2] " James Almer
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 3/8] x86/flacdsp: add a SSE4 version of lpc16 James Almer
` (7 subsequent siblings)
8 siblings, 1 reply; 18+ messages in thread
From: James Almer @ 2024-05-11 19:46 UTC (permalink / raw)
To: ffmpeg-devel
Fixes signed integer overflows as reported by ubsan.
Signed-off-by: James Almer <jamrial@gmail.com>
---
tests/checkasm/flacdsp.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
index 6561b4ed20..bf25cea39c 100644
--- a/tests/checkasm/flacdsp.c
+++ b/tests/checkasm/flacdsp.c
@@ -54,9 +54,10 @@ static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **ne
bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
}
-static void check_lpc(int pred_order)
+static void check_lpc(int pred_order, int bps)
{
int qlevel = rnd() % 16;
+ int coeff_prec = rnd() % 16;
LOCAL_ALIGNED_16(int32_t, coeffs, [32]);
LOCAL_ALIGNED_16(int32_t, dst, [BUF_SIZE]);
LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]);
@@ -64,11 +65,13 @@ static void check_lpc(int pred_order)
declare_func(void, int32_t *, const int[32], int, int, int);
+ if (bps <= 16)
+ coeff_prec = av_clip(coeff_prec, 0, 32 - bps - av_log2(pred_order));
+
for (int i = 0; i < 32; i++)
- coeffs[i] = rnd();
+ coeffs[i] = av_mod_uintp2(rnd(), coeff_prec);
for (int i = 0; i < BUF_SIZE; i++)
- dst[i] = rnd();
-
+ dst[i] = rnd() & ((1LL << bps) - 1);
memcpy(dst0, dst, BUF_SIZE * sizeof (int32_t));
memcpy(dst1, dst, BUF_SIZE * sizeof (int32_t));
call_ref(dst0, coeffs, pred_order, qlevel, BUF_SIZE);
@@ -116,10 +119,10 @@ void checkasm_check_flacdsp(void)
for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++)
if (check_func(h.lpc16, "flac_lpc_16_%d", pred_orders[i]))
- check_lpc(pred_orders[i]);
+ check_lpc(pred_orders[i], 16);
for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++)
if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i]))
- check_lpc(pred_orders[i]);
+ check_lpc(pred_orders[i], 32);
report("lpc");
}
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 2/2 v2] checkasm/flacdsp: sanitize lpc arguments
2024-05-11 19:46 ` [FFmpeg-devel] [PATCH 2/2] checkasm/flacdsp: sanitize lpc arguments James Almer
@ 2024-05-11 20:31 ` James Almer
0 siblings, 0 replies; 18+ messages in thread
From: James Almer @ 2024-05-11 20:31 UTC (permalink / raw)
To: ffmpeg-devel
Fixes signed integer overflows as reported by ubsan.
Signed-off-by: James Almer <jamrial@gmail.com>
---
Now allowing negative values.
tests/checkasm/flacdsp.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
index 6561b4ed20..d694c1093b 100644
--- a/tests/checkasm/flacdsp.c
+++ b/tests/checkasm/flacdsp.c
@@ -21,6 +21,7 @@
#include <string.h>
#include "checkasm.h"
#include "libavcodec/flacdsp.h"
+#include "libavcodec/mathops.h"
#include "libavutil/common.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
@@ -54,9 +55,10 @@ static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **ne
bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
}
-static void check_lpc(int pred_order)
+static void check_lpc(int pred_order, int bps)
{
int qlevel = rnd() % 16;
+ int coeff_prec = (rnd() % 15) + 1;
LOCAL_ALIGNED_16(int32_t, coeffs, [32]);
LOCAL_ALIGNED_16(int32_t, dst, [BUF_SIZE]);
LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]);
@@ -64,10 +66,13 @@ static void check_lpc(int pred_order)
declare_func(void, int32_t *, const int[32], int, int, int);
+ if (bps <= 16)
+ coeff_prec = av_clip(coeff_prec, 0, 32 - bps - av_log2(pred_order));
+
for (int i = 0; i < 32; i++)
- coeffs[i] = rnd();
+ coeffs[i] = sign_extend(rnd(), coeff_prec);
for (int i = 0; i < BUF_SIZE; i++)
- dst[i] = rnd();
+ dst[i] = sign_extend(rnd(), bps);
memcpy(dst0, dst, BUF_SIZE * sizeof (int32_t));
memcpy(dst1, dst, BUF_SIZE * sizeof (int32_t));
@@ -116,10 +121,10 @@ void checkasm_check_flacdsp(void)
for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++)
if (check_func(h.lpc16, "flac_lpc_16_%d", pred_orders[i]))
- check_lpc(pred_orders[i]);
+ check_lpc(pred_orders[i], 16);
for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++)
if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i]))
- check_lpc(pred_orders[i]);
+ check_lpc(pred_orders[i], 32);
report("lpc");
}
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 3/8] x86/flacdsp: add a SSE4 version of lpc16
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
2024-05-11 19:46 ` [FFmpeg-devel] [PATCH 2/2] checkasm/flacdsp: sanitize lpc arguments James Almer
@ 2024-05-12 16:06 ` James Almer
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 4/8] avcodec/flacdsp: split off wasted bit handling into dsp functions James Almer
` (6 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: James Almer @ 2024-05-12 16:06 UTC (permalink / raw)
To: ffmpeg-devel
flac_lpc_16_13_c: 2841.3
flac_lpc_16_13_sse4: 2151.8
flac_lpc_16_16_c: 3382.8
flac_lpc_16_16_sse4: 2228.3
flac_lpc_16_29_c: 5800.3
flac_lpc_16_29_sse4: 3727.3
flac_lpc_16_32_c: 5972.8
flac_lpc_16_32_sse4: 4052.3
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/x86/flacdsp.asm | 13 +++++++------
libavcodec/x86/flacdsp_init.c | 3 +++
2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 4b2fd65435..f38eb7db76 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -38,9 +38,9 @@ SECTION .text
%endif
%endmacro
-%macro LPC_32 1
+%macro LPC_32 3
INIT_XMM %1
-cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
+cglobal flac_lpc_%2, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
sub lend, pred_orderd
jle .ret
movsxdifnidn pred_orderq, pred_orderd
@@ -67,14 +67,14 @@ ALIGN 16
jl .loop_order
.end_order:
PMACSDQL m2, m0, m1, m2, m0
- psrlq m2, m4
+ %3 m2, m4
movd m0, [decodedq]
paddd m0, m2
movd [decodedq], m0
sub lend, 2
jl .ret
PMACSDQL m3, m1, m0, m3, m1
- psrlq m3, m4
+ %3 m3, m4
movd m1, [decodedq+4]
paddd m1, m3
movd [decodedq+4], m1
@@ -83,10 +83,11 @@ ALIGN 16
RET
%endmacro
+LPC_32 sse4, 16, psrad
+LPC_32 sse4, 32, psrlq
%if HAVE_XOP_EXTERNAL
-LPC_32 xop
+LPC_32 xop, 32, psrlq
%endif
-LPC_32 sse4
;----------------------------------------------------------------------------------
;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 87daed7005..dee4bf88fc 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -23,6 +23,8 @@
#include "libavutil/x86/cpu.h"
#include "config.h"
+void ff_flac_lpc_16_sse4(int32_t *samples, const int coeffs[32], int order,
+ int qlevel, int len);
void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
@@ -93,6 +95,7 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
}
}
if (EXTERNAL_SSE4(cpu_flags)) {
+ c->lpc16 = ff_flac_lpc_16_sse4;
c->lpc32 = ff_flac_lpc_32_sse4;
}
if (EXTERNAL_AVX(cpu_flags)) {
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 4/8] avcodec/flacdsp: split off wasted bit handling into dsp functions
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
2024-05-11 19:46 ` [FFmpeg-devel] [PATCH 2/2] checkasm/flacdsp: sanitize lpc arguments James Almer
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 3/8] x86/flacdsp: add a SSE4 version of lpc16 James Almer
@ 2024-05-12 16:06 ` James Almer
2024-05-12 16:15 ` Andreas Rheinhardt
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 5/8] checkasm/flacdsp: add a test for wasted32 James Almer
` (5 subsequent siblings)
8 siblings, 1 reply; 18+ messages in thread
From: James Almer @ 2024-05-12 16:06 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/flacdec.c | 8 ++------
libavcodec/flacdsp.c | 20 ++++++++++++++++++++
libavcodec/flacdsp.h | 3 +++
3 files changed, 25 insertions(+), 6 deletions(-)
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 91bbdc657d..6e6a2896b4 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -602,13 +602,9 @@ static inline int decode_subframe(FLACContext *s, int channel)
if (wasted) {
if (wasted+bps == 33) {
- int i;
- for (i = 0; i < s->blocksize; i++)
- s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
+ s->dsp.wasted33(s->decoded_33bps, decoded, wasted, s->blocksize);
} else if (wasted < 32) {
- int i;
- for (i = 0; i < s->blocksize; i++)
- decoded[i] = (unsigned)decoded[i] << wasted;
+ s->dsp.wasted32(decoded, wasted, s->blocksize);
}
}
diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
index 71b4ac44aa..610831348a 100644
--- a/libavcodec/flacdsp.c
+++ b/libavcodec/flacdsp.c
@@ -84,11 +84,31 @@ static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32],
}
+static void flac_wasted_32_c(int32_t *decoded, int wasted, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ decoded[i] = (unsigned)decoded[i] << wasted;
+}
+
+static void flac_wasted_33_c(int64_t *decoded, const int32_t *residual,
+ int wasted, int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ decoded[i] = (uint64_t)residual[i] << wasted;
+}
+
av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
{
c->lpc16 = flac_lpc_16_c;
c->lpc32 = flac_lpc_32_c;
+ c->wasted32 = flac_wasted_32_c;
+ c->wasted33 = flac_wasted_33_c;
+
switch (fmt) {
case AV_SAMPLE_FMT_S32:
c->decorrelate[0] = flac_decorrelate_indep_c_32;
diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 15149c026e..5a59c0c864 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -30,6 +30,9 @@ typedef struct FLACDSPContext {
int qlevel, int len);
void (*lpc32)(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
+ void (*wasted32)(int32_t *decoded, int wasted, int len);
+ void (*wasted33)(int64_t *decoded, const int32_t *residual,
+ int wasted, int len);
void (*lpc16_encode)(int32_t *res, const int32_t *smp, int len, int order,
const int32_t coefs[32], int shift);
void (*lpc32_encode)(int32_t *res, const int32_t *smp, int len, int order,
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/8] avcodec/flacdsp: split off wasted bit handling into dsp functions
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 4/8] avcodec/flacdsp: split off wasted bit handling into dsp functions James Almer
@ 2024-05-12 16:15 ` Andreas Rheinhardt
2024-05-12 16:44 ` James Almer
0 siblings, 1 reply; 18+ messages in thread
From: Andreas Rheinhardt @ 2024-05-12 16:15 UTC (permalink / raw)
To: ffmpeg-devel
James Almer:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> libavcodec/flacdec.c | 8 ++------
> libavcodec/flacdsp.c | 20 ++++++++++++++++++++
> libavcodec/flacdsp.h | 3 +++
> 3 files changed, 25 insertions(+), 6 deletions(-)
>
> diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
> index 91bbdc657d..6e6a2896b4 100644
> --- a/libavcodec/flacdec.c
> +++ b/libavcodec/flacdec.c
> @@ -602,13 +602,9 @@ static inline int decode_subframe(FLACContext *s, int channel)
>
> if (wasted) {
> if (wasted+bps == 33) {
> - int i;
> - for (i = 0; i < s->blocksize; i++)
> - s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
> + s->dsp.wasted33(s->decoded_33bps, decoded, wasted, s->blocksize);
> } else if (wasted < 32) {
> - int i;
> - for (i = 0; i < s->blocksize; i++)
> - decoded[i] = (unsigned)decoded[i] << wasted;
> + s->dsp.wasted32(decoded, wasted, s->blocksize);
> }
> }
>
> diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
> index 71b4ac44aa..610831348a 100644
> --- a/libavcodec/flacdsp.c
> +++ b/libavcodec/flacdsp.c
> @@ -84,11 +84,31 @@ static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32],
>
> }
>
> +static void flac_wasted_32_c(int32_t *decoded, int wasted, int len)
> +{
> + int i;
> +
> + for (i = 0; i < len; i++)
> + decoded[i] = (unsigned)decoded[i] << wasted;
> +}
> +
> +static void flac_wasted_33_c(int64_t *decoded, const int32_t *residual,
> + int wasted, int len)
> +{
> + int i;
> +
> + for (i = 0; i < len; i++)
for (int i = 0
> + decoded[i] = (uint64_t)residual[i] << wasted;
> +}
> +
> av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
> {
> c->lpc16 = flac_lpc_16_c;
> c->lpc32 = flac_lpc_32_c;
>
> + c->wasted32 = flac_wasted_32_c;
> + c->wasted33 = flac_wasted_33_c;
> +
> switch (fmt) {
> case AV_SAMPLE_FMT_S32:
> c->decorrelate[0] = flac_decorrelate_indep_c_32;
> diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
> index 15149c026e..5a59c0c864 100644
> --- a/libavcodec/flacdsp.h
> +++ b/libavcodec/flacdsp.h
> @@ -30,6 +30,9 @@ typedef struct FLACDSPContext {
> int qlevel, int len);
> void (*lpc32)(int32_t *samples, const int coeffs[32], int order,
> int qlevel, int len);
> + void (*wasted32)(int32_t *decoded, int wasted, int len);
> + void (*wasted33)(int64_t *decoded, const int32_t *residual,
> + int wasted, int len);
> void (*lpc16_encode)(int32_t *res, const int32_t *smp, int len, int order,
> const int32_t coefs[32], int shift);
> void (*lpc32_encode)(int32_t *res, const int32_t *smp, int len, int order,
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/8] avcodec/flacdsp: split off wasted bit handling into dsp functions
2024-05-12 16:15 ` Andreas Rheinhardt
@ 2024-05-12 16:44 ` James Almer
0 siblings, 0 replies; 18+ messages in thread
From: James Almer @ 2024-05-12 16:44 UTC (permalink / raw)
To: ffmpeg-devel
On 5/12/2024 1:15 PM, Andreas Rheinhardt wrote:
> James Almer:
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>> libavcodec/flacdec.c | 8 ++------
>> libavcodec/flacdsp.c | 20 ++++++++++++++++++++
>> libavcodec/flacdsp.h | 3 +++
>> 3 files changed, 25 insertions(+), 6 deletions(-)
>>
>> diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
>> index 91bbdc657d..6e6a2896b4 100644
>> --- a/libavcodec/flacdec.c
>> +++ b/libavcodec/flacdec.c
>> @@ -602,13 +602,9 @@ static inline int decode_subframe(FLACContext *s, int channel)
>>
>> if (wasted) {
>> if (wasted+bps == 33) {
>> - int i;
>> - for (i = 0; i < s->blocksize; i++)
>> - s->decoded_33bps[i] = (uint64_t)decoded[i] << wasted;
>> + s->dsp.wasted33(s->decoded_33bps, decoded, wasted, s->blocksize);
>> } else if (wasted < 32) {
>> - int i;
>> - for (i = 0; i < s->blocksize; i++)
>> - decoded[i] = (unsigned)decoded[i] << wasted;
>> + s->dsp.wasted32(decoded, wasted, s->blocksize);
>> }
>> }
>>
>> diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
>> index 71b4ac44aa..610831348a 100644
>> --- a/libavcodec/flacdsp.c
>> +++ b/libavcodec/flacdsp.c
>> @@ -84,11 +84,31 @@ static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32],
>>
>> }
>>
>> +static void flac_wasted_32_c(int32_t *decoded, int wasted, int len)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < len; i++)
>> + decoded[i] = (unsigned)decoded[i] << wasted;
>> +}
>> +
>> +static void flac_wasted_33_c(int64_t *decoded, const int32_t *residual,
>> + int wasted, int len)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < len; i++)
>
> for (int i = 0
Fixed locally.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 5/8] checkasm/flacdsp: add a test for wasted32
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
` (2 preceding siblings ...)
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 4/8] avcodec/flacdsp: split off wasted bit handling into dsp functions James Almer
@ 2024-05-12 16:06 ` James Almer
2024-05-12 16:38 ` Rémi Denis-Courmont
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 6/8] x86/flacdsp: add a SSE2 version of wasted32 James Almer
` (4 subsequent siblings)
8 siblings, 1 reply; 18+ messages in thread
From: James Almer @ 2024-05-12 16:06 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
tests/checkasm/flacdsp.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
index d694c1093b..343bee41ed 100644
--- a/tests/checkasm/flacdsp.c
+++ b/tests/checkasm/flacdsp.c
@@ -83,6 +83,27 @@ static void check_lpc(int pred_order, int bps)
bench_new(dst, coeffs, pred_order, qlevel, BUF_SIZE);
}
+static void check_wasted32(void)
+{
+ int wasted = rnd() % 32;
+ LOCAL_ALIGNED_16(int32_t, dst, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]);
+
+ declare_func(void, int32_t *, int, int);
+
+ for (int i = 0; i < BUF_SIZE; i++)
+ dst[i] = rnd();
+
+ memcpy(dst0, dst, BUF_SIZE * sizeof (int32_t));
+ memcpy(dst1, dst, BUF_SIZE * sizeof (int32_t));
+ call_ref(dst0, wasted, BUF_SIZE);
+ call_new(dst1, wasted, BUF_SIZE);
+ if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0)
+ fail();
+ bench_new(dst, wasted, BUF_SIZE);
+}
+
void checkasm_check_flacdsp(void)
{
LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
@@ -126,5 +147,8 @@ void checkasm_check_flacdsp(void)
if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i]))
check_lpc(pred_orders[i], 32);
+ if (check_func(h.wasted32, "flac_wasted_32"))
+ check_wasted32();
+
report("lpc");
}
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 5/8] checkasm/flacdsp: add a test for wasted32
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 5/8] checkasm/flacdsp: add a test for wasted32 James Almer
@ 2024-05-12 16:38 ` Rémi Denis-Courmont
2024-05-12 16:42 ` [FFmpeg-devel] [PATCH 5/8 v2] " James Almer
0 siblings, 1 reply; 18+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-12 16:38 UTC (permalink / raw)
To: ffmpeg-devel
Le sunnuntaina 12. toukokuuta 2024, 19.06.54 EEST James Almer a écrit :
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> tests/checkasm/flacdsp.c | 24 ++++++++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
> index d694c1093b..343bee41ed 100644
> --- a/tests/checkasm/flacdsp.c
> +++ b/tests/checkasm/flacdsp.c
> @@ -83,6 +83,27 @@ static void check_lpc(int pred_order, int bps)
> bench_new(dst, coeffs, pred_order, qlevel, BUF_SIZE);
> }
>
> +static void check_wasted32(void)
> +{
> + int wasted = rnd() % 32;
> + LOCAL_ALIGNED_16(int32_t, dst, [BUF_SIZE]);
> + LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]);
> + LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]);
> +
> + declare_func(void, int32_t *, int, int);
> +
> + for (int i = 0; i < BUF_SIZE; i++)
> + dst[i] = rnd();
> +
> + memcpy(dst0, dst, BUF_SIZE * sizeof (int32_t));
> + memcpy(dst1, dst, BUF_SIZE * sizeof (int32_t));
> + call_ref(dst0, wasted, BUF_SIZE);
> + call_new(dst1, wasted, BUF_SIZE);
> + if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0)
> + fail();
> + bench_new(dst, wasted, BUF_SIZE);
> +}
> +
> void checkasm_check_flacdsp(void)
> {
> LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
> @@ -126,5 +147,8 @@ void checkasm_check_flacdsp(void)
> if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i]))
> check_lpc(pred_orders[i], 32);
>
> + if (check_func(h.wasted32, "flac_wasted_32"))
> + check_wasted32();
> +
> report("lpc");
Missing report?
> }
--
Rémi Denis-Courmont
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 5/8 v2] checkasm/flacdsp: add a test for wasted32
2024-05-12 16:38 ` Rémi Denis-Courmont
@ 2024-05-12 16:42 ` James Almer
0 siblings, 0 replies; 18+ messages in thread
From: James Almer @ 2024-05-12 16:42 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
tests/checkasm/flacdsp.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
index d694c1093b..ee0897ed01 100644
--- a/tests/checkasm/flacdsp.c
+++ b/tests/checkasm/flacdsp.c
@@ -83,6 +83,27 @@ static void check_lpc(int pred_order, int bps)
bench_new(dst, coeffs, pred_order, qlevel, BUF_SIZE);
}
+static void check_wasted32(void)
+{
+ int wasted = rnd() % 32;
+ LOCAL_ALIGNED_16(int32_t, dst, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int32_t, dst0, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int32_t, dst1, [BUF_SIZE]);
+
+ declare_func(void, int32_t *, int, int);
+
+ for (int i = 0; i < BUF_SIZE; i++)
+ dst[i] = rnd();
+
+ memcpy(dst0, dst, BUF_SIZE * sizeof (int32_t));
+ memcpy(dst1, dst, BUF_SIZE * sizeof (int32_t));
+ call_ref(dst0, wasted, BUF_SIZE);
+ call_new(dst1, wasted, BUF_SIZE);
+ if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int32_t)) != 0)
+ fail();
+ bench_new(dst, wasted, BUF_SIZE);
+}
+
void checkasm_check_flacdsp(void)
{
LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
@@ -127,4 +148,9 @@ void checkasm_check_flacdsp(void)
check_lpc(pred_orders[i], 32);
report("lpc");
+
+ if (check_func(h.wasted32, "flac_wasted_32"))
+ check_wasted32();
+
+ report("wasted");
}
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 6/8] x86/flacdsp: add a SSE2 version of wasted32
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
` (3 preceding siblings ...)
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 5/8] checkasm/flacdsp: add a test for wasted32 James Almer
@ 2024-05-12 16:06 ` James Almer
2024-05-12 18:51 ` [FFmpeg-devel] [PATCH 6/8 v2] " James Almer
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 7/8] checkasm/flacdsp: add a test for wasted33 James Almer
` (3 subsequent siblings)
8 siblings, 1 reply; 18+ messages in thread
From: James Almer @ 2024-05-12 16:06 UTC (permalink / raw)
To: ffmpeg-devel
flac_wasted_32_c: 851.3
flac_wasted_32_sse2: 53.3
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/x86/flacdsp.asm | 15 +++++++++++++++
libavcodec/x86/flacdsp_init.c | 3 +++
2 files changed, 18 insertions(+)
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index f38eb7db76..3a940059c7 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -89,6 +89,21 @@ LPC_32 sse4, 32, psrlq
LPC_32 xop, 32, psrlq
%endif
+INIT_XMM sse2
+cglobal flac_wasted_32, 3,3,2, decoded, wasted, len
+ shl lend, 2
+ lea decodedq, [decodedq+lenq]
+ neg lenq
+ movd m1, wastedd
+ALIGN 16
+.loop:
+ mova m0, [decodedq+lenq]
+ pslld m0, m1
+ mova [decodedq+lenq], m0
+ add lenq, mmsize
+ jl .loop
+ RET
+
;----------------------------------------------------------------------------------
;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
; int len, int shift);
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index dee4bf88fc..67aa118760 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -30,6 +30,8 @@ void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
+void ff_flac_wasted_32_sse2(int32_t *decoded, int wasted, int len);
+
#define DECORRELATE_FUNCS(fmt, opt) \
void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
int len, int shift); \
@@ -63,6 +65,7 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) {
+ c->wasted32 = ff_flac_wasted_32_sse2;
if (fmt == AV_SAMPLE_FMT_S16) {
c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 6/8 v2] x86/flacdsp: add a SSE2 version of wasted32
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 6/8] x86/flacdsp: add a SSE2 version of wasted32 James Almer
@ 2024-05-12 18:51 ` James Almer
2024-05-12 20:22 ` Lynne via ffmpeg-devel
0 siblings, 1 reply; 18+ messages in thread
From: James Almer @ 2024-05-12 18:51 UTC (permalink / raw)
To: ffmpeg-devel
flac_wasted_32_c: 851.3
flac_wasted_32_sse2: 41.3
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/x86/flacdsp.asm | 24 ++++++++++++++++++++++++
libavcodec/x86/flacdsp_init.c | 3 +++
2 files changed, 27 insertions(+)
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index f38eb7db76..21b2439bc0 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -89,6 +89,30 @@ LPC_32 sse4, 32, psrlq
LPC_32 xop, 32, psrlq
%endif
+INIT_XMM sse2
+cglobal flac_wasted_32, 3,3,5, decoded, wasted, len
+ shl lend, 2
+ add decodedq, lenq
+ neg lenq
+ movd m4, wastedd
+ALIGN 16
+.loop:
+ mova m0, [decodedq+lenq+mmsize*0]
+ mova m1, [decodedq+lenq+mmsize*1]
+ mova m2, [decodedq+lenq+mmsize*2]
+ mova m3, [decodedq+lenq+mmsize*3]
+ pslld m0, m4
+ pslld m1, m4
+ pslld m2, m4
+ pslld m3, m4
+ mova [decodedq+lenq+mmsize*0], m0
+ mova [decodedq+lenq+mmsize*1], m1
+ mova [decodedq+lenq+mmsize*2], m2
+ mova [decodedq+lenq+mmsize*3], m3
+ add lenq, mmsize * 4
+ jl .loop
+ RET
+
;----------------------------------------------------------------------------------
;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
; int len, int shift);
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index dee4bf88fc..67aa118760 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -30,6 +30,8 @@ void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
+void ff_flac_wasted_32_sse2(int32_t *decoded, int wasted, int len);
+
#define DECORRELATE_FUNCS(fmt, opt) \
void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
int len, int shift); \
@@ -63,6 +65,7 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) {
+ c->wasted32 = ff_flac_wasted_32_sse2;
if (fmt == AV_SAMPLE_FMT_S16) {
c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [FFmpeg-devel] [PATCH 6/8 v2] x86/flacdsp: add a SSE2 version of wasted32
2024-05-12 18:51 ` [FFmpeg-devel] [PATCH 6/8 v2] " James Almer
@ 2024-05-12 20:22 ` Lynne via ffmpeg-devel
0 siblings, 0 replies; 18+ messages in thread
From: Lynne via ffmpeg-devel @ 2024-05-12 20:22 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Lynne
On 12/05/2024 20:51, James Almer wrote:
> flac_wasted_32_c: 851.3
> flac_wasted_32_sse2: 41.3
>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> libavcodec/x86/flacdsp.asm | 24 ++++++++++++++++++++++++
> libavcodec/x86/flacdsp_init.c | 3 +++
> 2 files changed, 27 insertions(+)
>
> diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
> index f38eb7db76..21b2439bc0 100644
> --- a/libavcodec/x86/flacdsp.asm
> +++ b/libavcodec/x86/flacdsp.asm
> @@ -89,6 +89,30 @@ LPC_32 sse4, 32, psrlq
> LPC_32 xop, 32, psrlq
> %endif
>
> +INIT_XMM sse2
> +cglobal flac_wasted_32, 3,3,5, decoded, wasted, len
> + shl lend, 2
> + add decodedq, lenq
> + neg lenq
> + movd m4, wastedd
> +ALIGN 16
> +.loop:
> + mova m0, [decodedq+lenq+mmsize*0]
> + mova m1, [decodedq+lenq+mmsize*1]
> + mova m2, [decodedq+lenq+mmsize*2]
> + mova m3, [decodedq+lenq+mmsize*3]
> + pslld m0, m4
> + pslld m1, m4
> + pslld m2, m4
> + pslld m3, m4
> + mova [decodedq+lenq+mmsize*0], m0
> + mova [decodedq+lenq+mmsize*1], m1
> + mova [decodedq+lenq+mmsize*2], m2
> + mova [decodedq+lenq+mmsize*3], m3
> + add lenq, mmsize * 4
> + jl .loop
> + RET
Looks good
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 7/8] checkasm/flacdsp: add a test for wasted33
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
` (4 preceding siblings ...)
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 6/8] x86/flacdsp: add a SSE2 version of wasted32 James Almer
@ 2024-05-12 16:06 ` James Almer
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 8/8] x86/flacdsp: add SSE4 and AVX2 versions of wasted33 James Almer
` (2 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: James Almer @ 2024-05-12 16:06 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
tests/checkasm/flacdsp.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
index 343bee41ed..6abdd255a3 100644
--- a/tests/checkasm/flacdsp.c
+++ b/tests/checkasm/flacdsp.c
@@ -104,6 +104,25 @@ static void check_wasted32(void)
bench_new(dst, wasted, BUF_SIZE);
}
+static void check_wasted33(void)
+{
+ int wasted = rnd() % 33;
+ LOCAL_ALIGNED_16(int32_t, residuals, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int64_t, dst0, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int64_t, dst1, [BUF_SIZE]);
+
+ declare_func(void, int64_t *, const int32_t *, int, int);
+
+ for (int i = 0; i < BUF_SIZE; i++)
+ residuals[i] = rnd();
+
+ call_ref(dst0, residuals, wasted, BUF_SIZE);
+ call_new(dst1, residuals, wasted, BUF_SIZE);
+ if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int64_t)) != 0)
+ fail();
+ bench_new(dst0, residuals, wasted, BUF_SIZE);
+}
+
void checkasm_check_flacdsp(void)
{
LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
@@ -149,6 +168,8 @@ void checkasm_check_flacdsp(void)
if (check_func(h.wasted32, "flac_wasted_32"))
check_wasted32();
+ if (check_func(h.wasted33, "flac_wasted_33"))
+ check_wasted33();
report("lpc");
}
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 8/8] x86/flacdsp: add SSE4 and AVX2 versions of wasted33
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
` (5 preceding siblings ...)
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 7/8] checkasm/flacdsp: add a test for wasted33 James Almer
@ 2024-05-12 16:06 ` James Almer
2024-05-12 18:53 ` [FFmpeg-devel] [PATCH 8/8 v2] x86/flacdsp: add an SSE4 version " James Almer
2024-05-12 20:36 ` [FFmpeg-devel] [PATCH 09/10] avcodec/flacdsp: split off lpc33 into a dsp function James Almer
2024-05-12 20:36 ` [FFmpeg-devel] [PATCH 10/10] checkasm/flacdsp: add a test for lpc33 James Almer
8 siblings, 1 reply; 18+ messages in thread
From: James Almer @ 2024-05-12 16:06 UTC (permalink / raw)
To: ffmpeg-devel
flac_wasted_33_c: 214.1
flac_wasted_33_sse4: 133.6
flac_wasted_33_avx2: 93.1
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/x86/flacdsp.asm | 24 ++++++++++++++++++++++++
libavcodec/x86/flacdsp_init.c | 6 ++++++
2 files changed, 30 insertions(+)
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 3a940059c7..84cd4dd465 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -104,6 +104,30 @@ ALIGN 16
jl .loop
RET
+%macro WASTED_33 1
+cglobal flac_wasted_33, 4,4,2, decoded, residuals, wasted, len
+ shl lend, 2
+ lea decodedq, [decodedq+lenq*2]
+ add residualsq, lenq
+ neg lenq
+ movd xm1, wastedd
+ALIGN 16
+.loop:
+ pmovsxdq m0, [residualsq+lenq]
+ psllq m0, xm1
+ mov%1 [decodedq+lenq*2], m0
+ add lenq, mmsize / 2
+ jl .loop
+ RET
+%endmacro
+
+INIT_XMM sse4
+WASTED_33 a
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+WASTED_33 u
+%endif
+
;----------------------------------------------------------------------------------
;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
; int len, int shift);
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 67aa118760..22482f8787 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -31,6 +31,8 @@ void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
void ff_flac_wasted_32_sse2(int32_t *decoded, int wasted, int len);
+void ff_flac_wasted_33_sse4(int64_t *decoded, const int32_t *residual, int wasted, int len);
+void ff_flac_wasted_33_avx2(int64_t *decoded, const int32_t *residual, int wasted, int len);
#define DECORRELATE_FUNCS(fmt, opt) \
void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
@@ -100,6 +102,7 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
if (EXTERNAL_SSE4(cpu_flags)) {
c->lpc16 = ff_flac_lpc_16_sse4;
c->lpc32 = ff_flac_lpc_32_sse4;
+ c->wasted33 = ff_flac_wasted_33_sse4;
}
if (EXTERNAL_AVX(cpu_flags)) {
if (fmt == AV_SAMPLE_FMT_S16) {
@@ -117,5 +120,8 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
if (EXTERNAL_XOP(cpu_flags)) {
c->lpc32 = ff_flac_lpc_32_xop;
}
+ if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+ c->wasted33 = ff_flac_wasted_33_avx2;
+ }
#endif /* HAVE_X86ASM */
}
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 8/8 v2] x86/flacdsp: add an SSE4 version of wasted33
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 8/8] x86/flacdsp: add SSE4 and AVX2 versions of wasted33 James Almer
@ 2024-05-12 18:53 ` James Almer
0 siblings, 0 replies; 18+ messages in thread
From: James Almer @ 2024-05-12 18:53 UTC (permalink / raw)
To: ffmpeg-devel
flac_wasted_33_c: 214.1
flac_wasted_33_sse4: 103.2
Signed-off-by: James Almer <jamrial@gmail.com>
---
Removed the AVX2 one as the lane crossing in pmovsxdq removed pretty much all
speed up for processing twice the amount of data.
libavcodec/x86/flacdsp.asm | 25 +++++++++++++++++++++++++
libavcodec/x86/flacdsp_init.c | 2 ++
2 files changed, 27 insertions(+)
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 21b2439bc0..15fcec4f08 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -113,6 +113,31 @@ ALIGN 16
jl .loop
RET
+INIT_XMM sse4
+cglobal flac_wasted_33, 4,4,5, decoded, residuals, wasted, len
+ shl lend, 2
+ lea decodedq, [decodedq+lenq*2]
+ add residualsq, lenq
+ neg lenq
+ movd m4, wastedd
+ALIGN 16
+.loop:
+ pmovsxdq m0, [residualsq+lenq+mmsize*0]
+ pmovsxdq m1, [residualsq+lenq+mmsize/2]
+ pmovsxdq m2, [residualsq+lenq+mmsize*1]
+ pmovsxdq m3, [residualsq+lenq+mmsize*1+mmsize/2]
+ psllq m0, m4
+ psllq m1, m4
+ psllq m2, m4
+ psllq m3, m4
+ mova [decodedq+lenq*2+mmsize*0], m0
+ mova [decodedq+lenq*2+mmsize*1], m1
+ mova [decodedq+lenq*2+mmsize*2], m2
+ mova [decodedq+lenq*2+mmsize*3], m3
+ add lenq, mmsize * 2
+ jl .loop
+ RET
+
;----------------------------------------------------------------------------------
;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
; int len, int shift);
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 67aa118760..fa993d3466 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -31,6 +31,7 @@ void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
void ff_flac_wasted_32_sse2(int32_t *decoded, int wasted, int len);
+void ff_flac_wasted_33_sse4(int64_t *decoded, const int32_t *residual, int wasted, int len);
#define DECORRELATE_FUNCS(fmt, opt) \
void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
@@ -100,6 +101,7 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
if (EXTERNAL_SSE4(cpu_flags)) {
c->lpc16 = ff_flac_lpc_16_sse4;
c->lpc32 = ff_flac_lpc_32_sse4;
+ c->wasted33 = ff_flac_wasted_33_sse4;
}
if (EXTERNAL_AVX(cpu_flags)) {
if (fmt == AV_SAMPLE_FMT_S16) {
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 09/10] avcodec/flacdsp: split off lpc33 into a dsp function
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
` (6 preceding siblings ...)
2024-05-12 16:06 ` [FFmpeg-devel] [PATCH 8/8] x86/flacdsp: add SSE4 and AVX2 versions of wasted33 James Almer
@ 2024-05-12 20:36 ` James Almer
2024-05-12 20:36 ` [FFmpeg-devel] [PATCH 10/10] checkasm/flacdsp: add a test for lpc33 James Almer
8 siblings, 0 replies; 18+ messages in thread
From: James Almer @ 2024-05-12 20:36 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/flacdec.c | 7 +------
libavcodec/flacdsp.c | 15 +++++++++++++++
libavcodec/flacdsp.h | 2 ++
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 6e6a2896b4..460d3bc01e 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -509,12 +509,7 @@ static int decode_subframe_lpc_33bps(FLACContext *s, int64_t *decoded,
if ((ret = decode_residuals(s, residual, pred_order)) < 0)
return ret;
- for (i = pred_order; i < s->blocksize; i++, decoded++) {
- int64_t sum = 0;
- for (j = 0; j < pred_order; j++)
- sum += (int64_t)coeffs[j] * (uint64_t)decoded[j];
- decoded[j] = residual[i] + (sum >> qlevel);
- }
+ s->dsp.lpc33(decoded, residual, coeffs, pred_order, qlevel, s->blocksize);
return 0;
}
diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
index 27d3e923ee..f5362bf66f 100644
--- a/libavcodec/flacdsp.c
+++ b/libavcodec/flacdsp.c
@@ -84,6 +84,20 @@ static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32],
}
+static void flac_lpc_33_c(int64_t *decoded, const int32_t *residual,
+ const int coeffs[32], int pred_order,
+ int qlevel, int len)
+{
+ int i, j;
+
+ for (i = pred_order; i < len; i++, decoded++) {
+ int64_t sum = 0;
+ for (j = 0; j < pred_order; j++)
+ sum += (int64_t)coeffs[j] * (uint64_t)decoded[j];
+ decoded[j] = residual[i] + (sum >> qlevel);
+ }
+}
+
static void flac_wasted_32_c(int32_t *decoded, int wasted, int len)
{
for (int i = 0; i < len; i++)
@@ -101,6 +115,7 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int cha
{
c->lpc16 = flac_lpc_16_c;
c->lpc32 = flac_lpc_32_c;
+ c->lpc33 = flac_lpc_33_c;
c->wasted32 = flac_wasted_32_c;
c->wasted33 = flac_wasted_33_c;
diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 5a59c0c864..3b7b35a112 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -30,6 +30,8 @@ typedef struct FLACDSPContext {
int qlevel, int len);
void (*lpc32)(int32_t *samples, const int coeffs[32], int order,
int qlevel, int len);
+ void (*lpc33)(int64_t *samples, const int32_t *residual, const int coeffs[32],
+ int pred_order, int qlevel, int len);
void (*wasted32)(int32_t *decoded, int wasted, int len);
void (*wasted33)(int64_t *decoded, const int32_t *residual,
int wasted, int len);
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread
* [FFmpeg-devel] [PATCH 10/10] checkasm/flacdsp: add a test for lpc33
2024-05-11 19:46 [FFmpeg-devel] [PATCH 1/2] checkasm/flacdsp: run lpc benchmarks with an unmodified buffer James Almer
` (7 preceding siblings ...)
2024-05-12 20:36 ` [FFmpeg-devel] [PATCH 09/10] avcodec/flacdsp: split off lpc33 into a dsp function James Almer
@ 2024-05-12 20:36 ` James Almer
8 siblings, 0 replies; 18+ messages in thread
From: James Almer @ 2024-05-12 20:36 UTC (permalink / raw)
To: ffmpeg-devel
Signed-off-by: James Almer <jamrial@gmail.com>
---
tests/checkasm/flacdsp.c | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
index 96b7d05f7e..6f8e8817b5 100644
--- a/tests/checkasm/flacdsp.c
+++ b/tests/checkasm/flacdsp.c
@@ -83,6 +83,35 @@ static void check_lpc(int pred_order, int bps)
bench_new(dst, coeffs, pred_order, qlevel, BUF_SIZE);
}
+static void check_lpc33(int pred_order)
+{
+ int qlevel = rnd() % 16;
+ int coeff_prec = (rnd() % 15) + 1;
+ LOCAL_ALIGNED_16(int64_t, dst, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int64_t, dst0, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int64_t, dst1, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int32_t, residuals, [BUF_SIZE]);
+ LOCAL_ALIGNED_16(int32_t, coeffs, [32]);
+
+ declare_func(void, int64_t *, const int32_t *, const int[32], int, int, int);
+
+ for (int i = 0; i < 32; i++)
+ coeffs[i] = sign_extend(rnd(), coeff_prec);
+
+ for (int i = 0; i < BUF_SIZE; i++) {
+ residuals[i] = sign_extend(rnd(), pred_order);
+ dst[i] = sign_extend64(((int64_t)rnd() << 1) | (rnd() & 1), 33);
+ }
+
+ memcpy(dst0, dst, BUF_SIZE * sizeof (int64_t));
+ memcpy(dst1, dst, BUF_SIZE * sizeof (int64_t));
+ call_ref(dst0, residuals, coeffs, pred_order, qlevel, BUF_SIZE);
+ call_new(dst1, residuals, coeffs, pred_order, qlevel, BUF_SIZE);
+ if (memcmp(dst0, dst1, BUF_SIZE * sizeof (int64_t)) != 0)
+ fail();
+ bench_new(dst, residuals, coeffs, pred_order, qlevel, BUF_SIZE);
+}
+
static void check_wasted32(void)
{
int wasted = rnd() % 32;
@@ -165,6 +194,9 @@ void checkasm_check_flacdsp(void)
for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++)
if (check_func(h.lpc32, "flac_lpc_32_%d", pred_orders[i]))
check_lpc(pred_orders[i], 32);
+ for (i = 0; i < FF_ARRAY_ELEMS(pred_orders); i++)
+ if (check_func(h.lpc33, "flac_lpc_33_%d", pred_orders[i]))
+ check_lpc33(pred_orders[i]);
report("lpc");
--
2.45.0
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 18+ messages in thread