* [FFmpeg-devel] [PATCH] x86/aacencdsp: add SSE2 and AVX versions of quantize_bands
@ 2024-06-04 1:23 James Almer
2024-06-04 1:42 ` Andreas Rheinhardt
2024-06-04 6:37 ` Rémi Denis-Courmont
0 siblings, 2 replies; 5+ messages in thread
From: James Almer @ 2024-06-04 1:23 UTC (permalink / raw)
To: ffmpeg-devel
quant_bands_signed_sse2: 417.0
quant_bands_signed_avx: 202.0
Signed-off-by: James Almer <jamrial@gmail.com>
---
libavcodec/aacenc.h | 2 +-
libavcodec/x86/aacencdsp.asm | 27 ++++++++++++++++++++++++---
libavcodec/x86/aacencdsp_init.c | 6 ++++++
tests/checkasm/aacencdsp.c | 4 ++--
4 files changed, 33 insertions(+), 6 deletions(-)
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index d07960620e..ae15f91e06 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -242,7 +242,7 @@ typedef struct AACEncContext {
enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to
AudioFrameQueue afq;
- DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
+ DECLARE_ALIGNED(32, int, qcoefs)[96]; ///< quantized coefficients
DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
uint16_t quantize_band_cost_cache_generation;
diff --git a/libavcodec/x86/aacencdsp.asm b/libavcodec/x86/aacencdsp.asm
index 0d3ba4b89d..99be2d87f5 100644
--- a/libavcodec/x86/aacencdsp.asm
+++ b/libavcodec/x86/aacencdsp.asm
@@ -53,8 +53,19 @@ cglobal abs_pow34, 3, 3, 3, out, in, size
; int size, int is_signed, int maxval, const float Q34,
; const float rounding)
;*******************************************************************
-INIT_XMM sse2
+%macro AAC_QUANTIZE_BANDS 0
cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
+%if mmsize == 32
+ vbroadcastss m0, Q34m
+ vbroadcastss m1, roundingm
+%if UNIX64 == 0
+ cvtsi2ss xm3, dword maxvalm
+%else
+ cvtsi2ss xm3, maxvald
+%endif
+ shufps xm3, xm3, xm3, 0
+ vinsertf128 m3, m3, xm3, 1
+%else ; mmsize == 16
%if UNIX64 == 0
movss m0, Q34m
movss m1, roundingm
@@ -65,9 +76,13 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
shufps m0, m0, 0
shufps m1, m1, 0
shufps m3, m3, 0
+%endif
shl is_signedd, 31
- movd m4, is_signedd
- shufps m4, m4, 0
+ movd xm4, is_signedd
+ shufps xm4, xm4, xm4, 0
+%if mmsize == 32
+ vinsertf128 m4, m4, xm4, 1
+%endif
shl sized, 2
add inq, sizeq
add outq, sizeq
@@ -84,3 +99,9 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
add sizeq, mmsize
jl .loop
RET
+%endmacro
+
+INIT_XMM sse2
+AAC_QUANTIZE_BANDS
+INIT_YMM avx
+AAC_QUANTIZE_BANDS
diff --git a/libavcodec/x86/aacencdsp_init.c b/libavcodec/x86/aacencdsp_init.c
index e0d8dec4f8..cf17dbf91d 100644
--- a/libavcodec/x86/aacencdsp_init.c
+++ b/libavcodec/x86/aacencdsp_init.c
@@ -30,6 +30,9 @@ void ff_abs_pow34_sse(float *out, const float *in, const int size);
void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled,
int size, int is_signed, int maxval, const float Q34,
const float rounding);
+void ff_aac_quantize_bands_avx(int *out, const float *in, const float *scaled,
+ int size, int is_signed, int maxval, const float Q34,
+ const float rounding);
av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
{
@@ -40,4 +43,7 @@ av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
if (EXTERNAL_SSE2(cpu_flags))
s->quant_bands = ff_aac_quantize_bands_sse2;
+
+ if (EXTERNAL_AVX_FAST(cpu_flags))
+ s->quant_bands = ff_aac_quantize_bands_avx;
}
diff --git a/tests/checkasm/aacencdsp.c b/tests/checkasm/aacencdsp.c
index 791dd30320..5308a2ac03 100644
--- a/tests/checkasm/aacencdsp.c
+++ b/tests/checkasm/aacencdsp.c
@@ -81,8 +81,8 @@ static void test_quant_bands(AACEncDSPContext *s)
for (int sign = 0; sign <= 1; sign++) {
if (check_func(s->quant_bands, "quant_bands_%s",
sign ? "signed" : "unsigned")) {
- LOCAL_ALIGNED_16(int, out, [BUF_SIZE]);
- LOCAL_ALIGNED_16(int, out2, [BUF_SIZE]);
+ LOCAL_ALIGNED_32(int, out, [BUF_SIZE]);
+ LOCAL_ALIGNED_32(int, out2, [BUF_SIZE]);
call_ref(out, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
call_new(out2, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
--
2.45.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] x86/aacencdsp: add SSE2 and AVX versions of quantize_bands
2024-06-04 1:23 [FFmpeg-devel] [PATCH] x86/aacencdsp: add SSE2 and AVX versions of quantize_bands James Almer
@ 2024-06-04 1:42 ` Andreas Rheinhardt
2024-06-04 1:45 ` James Almer
2024-06-04 6:37 ` Rémi Denis-Courmont
1 sibling, 1 reply; 5+ messages in thread
From: Andreas Rheinhardt @ 2024-06-04 1:42 UTC (permalink / raw)
To: ffmpeg-devel
James Almer:
> quant_bands_signed_sse2: 417.0
> quant_bands_signed_avx: 202.0
Missing benchmark numbers for the C code
>
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
> libavcodec/aacenc.h | 2 +-
> libavcodec/x86/aacencdsp.asm | 27 ++++++++++++++++++++++++---
> libavcodec/x86/aacencdsp_init.c | 6 ++++++
> tests/checkasm/aacencdsp.c | 4 ++--
> 4 files changed, 33 insertions(+), 6 deletions(-)
>
> diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
> index d07960620e..ae15f91e06 100644
> --- a/libavcodec/aacenc.h
> +++ b/libavcodec/aacenc.h
> @@ -242,7 +242,7 @@ typedef struct AACEncContext {
> enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to
>
> AudioFrameQueue afq;
> - DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
> + DECLARE_ALIGNED(32, int, qcoefs)[96]; ///< quantized coefficients
> DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
>
> uint16_t quantize_band_cost_cache_generation;
> diff --git a/libavcodec/x86/aacencdsp.asm b/libavcodec/x86/aacencdsp.asm
> index 0d3ba4b89d..99be2d87f5 100644
> --- a/libavcodec/x86/aacencdsp.asm
> +++ b/libavcodec/x86/aacencdsp.asm
> @@ -53,8 +53,19 @@ cglobal abs_pow34, 3, 3, 3, out, in, size
> ; int size, int is_signed, int maxval, const float Q34,
> ; const float rounding)
> ;*******************************************************************
> -INIT_XMM sse2
> +%macro AAC_QUANTIZE_BANDS 0
> cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
> +%if mmsize == 32
> + vbroadcastss m0, Q34m
> + vbroadcastss m1, roundingm
> +%if UNIX64 == 0
> + cvtsi2ss xm3, dword maxvalm
> +%else
> + cvtsi2ss xm3, maxvald
> +%endif
> + shufps xm3, xm3, xm3, 0
> + vinsertf128 m3, m3, xm3, 1
> +%else ; mmsize == 16
> %if UNIX64 == 0
> movss m0, Q34m
> movss m1, roundingm
> @@ -65,9 +76,13 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
> shufps m0, m0, 0
> shufps m1, m1, 0
> shufps m3, m3, 0
> +%endif
> shl is_signedd, 31
> - movd m4, is_signedd
> - shufps m4, m4, 0
> + movd xm4, is_signedd
> + shufps xm4, xm4, xm4, 0
> +%if mmsize == 32
> + vinsertf128 m4, m4, xm4, 1
> +%endif
> shl sized, 2
> add inq, sizeq
> add outq, sizeq
> @@ -84,3 +99,9 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
> add sizeq, mmsize
> jl .loop
> RET
> +%endmacro
> +
> +INIT_XMM sse2
> +AAC_QUANTIZE_BANDS
> +INIT_YMM avx
> +AAC_QUANTIZE_BANDS
> diff --git a/libavcodec/x86/aacencdsp_init.c b/libavcodec/x86/aacencdsp_init.c
> index e0d8dec4f8..cf17dbf91d 100644
> --- a/libavcodec/x86/aacencdsp_init.c
> +++ b/libavcodec/x86/aacencdsp_init.c
> @@ -30,6 +30,9 @@ void ff_abs_pow34_sse(float *out, const float *in, const int size);
> void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled,
> int size, int is_signed, int maxval, const float Q34,
> const float rounding);
> +void ff_aac_quantize_bands_avx(int *out, const float *in, const float *scaled,
> + int size, int is_signed, int maxval, const float Q34,
> + const float rounding);
>
> av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
> {
> @@ -40,4 +43,7 @@ av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
>
> if (EXTERNAL_SSE2(cpu_flags))
> s->quant_bands = ff_aac_quantize_bands_sse2;
Seems like the commit message is wrong: You are not adding an SSE2 version.
> +
> + if (EXTERNAL_AVX_FAST(cpu_flags))
> + s->quant_bands = ff_aac_quantize_bands_avx;
> }
> diff --git a/tests/checkasm/aacencdsp.c b/tests/checkasm/aacencdsp.c
> index 791dd30320..5308a2ac03 100644
> --- a/tests/checkasm/aacencdsp.c
> +++ b/tests/checkasm/aacencdsp.c
> @@ -81,8 +81,8 @@ static void test_quant_bands(AACEncDSPContext *s)
> for (int sign = 0; sign <= 1; sign++) {
> if (check_func(s->quant_bands, "quant_bands_%s",
> sign ? "signed" : "unsigned")) {
> - LOCAL_ALIGNED_16(int, out, [BUF_SIZE]);
> - LOCAL_ALIGNED_16(int, out2, [BUF_SIZE]);
> + LOCAL_ALIGNED_32(int, out, [BUF_SIZE]);
> + LOCAL_ALIGNED_32(int, out2, [BUF_SIZE]);
>
> call_ref(out, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
> call_new(out2, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] x86/aacencdsp: add SSE2 and AVX versions of quantize_bands
2024-06-04 1:42 ` Andreas Rheinhardt
@ 2024-06-04 1:45 ` James Almer
0 siblings, 0 replies; 5+ messages in thread
From: James Almer @ 2024-06-04 1:45 UTC (permalink / raw)
To: ffmpeg-devel
On 6/3/2024 10:42 PM, Andreas Rheinhardt wrote:
> James Almer:
>> quant_bands_signed_sse2: 417.0
>> quant_bands_signed_avx: 202.0
>
> Missing benchmark numbers for the C code
About 1670. And it doesn't matter as I'm only adding the AVX version
(The subject is wrong, copy-paste fail), so i mentioned the SSE2 as
comparison to the existing simd version.
But sure, i can add the C one before pushing.
>
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>> libavcodec/aacenc.h | 2 +-
>> libavcodec/x86/aacencdsp.asm | 27 ++++++++++++++++++++++++---
>> libavcodec/x86/aacencdsp_init.c | 6 ++++++
>> tests/checkasm/aacencdsp.c | 4 ++--
>> 4 files changed, 33 insertions(+), 6 deletions(-)
>>
>> diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
>> index d07960620e..ae15f91e06 100644
>> --- a/libavcodec/aacenc.h
>> +++ b/libavcodec/aacenc.h
>> @@ -242,7 +242,7 @@ typedef struct AACEncContext {
>> enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to
>>
>> AudioFrameQueue afq;
>> - DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
>> + DECLARE_ALIGNED(32, int, qcoefs)[96]; ///< quantized coefficients
>> DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
>>
>> uint16_t quantize_band_cost_cache_generation;
>> diff --git a/libavcodec/x86/aacencdsp.asm b/libavcodec/x86/aacencdsp.asm
>> index 0d3ba4b89d..99be2d87f5 100644
>> --- a/libavcodec/x86/aacencdsp.asm
>> +++ b/libavcodec/x86/aacencdsp.asm
>> @@ -53,8 +53,19 @@ cglobal abs_pow34, 3, 3, 3, out, in, size
>> ; int size, int is_signed, int maxval, const float Q34,
>> ; const float rounding)
>> ;*******************************************************************
>> -INIT_XMM sse2
>> +%macro AAC_QUANTIZE_BANDS 0
>> cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
>> +%if mmsize == 32
>> + vbroadcastss m0, Q34m
>> + vbroadcastss m1, roundingm
>> +%if UNIX64 == 0
>> + cvtsi2ss xm3, dword maxvalm
>> +%else
>> + cvtsi2ss xm3, maxvald
>> +%endif
>> + shufps xm3, xm3, xm3, 0
>> + vinsertf128 m3, m3, xm3, 1
>> +%else ; mmsize == 16
>> %if UNIX64 == 0
>> movss m0, Q34m
>> movss m1, roundingm
>> @@ -65,9 +76,13 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
>> shufps m0, m0, 0
>> shufps m1, m1, 0
>> shufps m3, m3, 0
>> +%endif
>> shl is_signedd, 31
>> - movd m4, is_signedd
>> - shufps m4, m4, 0
>> + movd xm4, is_signedd
>> + shufps xm4, xm4, xm4, 0
>> +%if mmsize == 32
>> + vinsertf128 m4, m4, xm4, 1
>> +%endif
>> shl sized, 2
>> add inq, sizeq
>> add outq, sizeq
>> @@ -84,3 +99,9 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
>> add sizeq, mmsize
>> jl .loop
>> RET
>> +%endmacro
>> +
>> +INIT_XMM sse2
>> +AAC_QUANTIZE_BANDS
>> +INIT_YMM avx
>> +AAC_QUANTIZE_BANDS
>> diff --git a/libavcodec/x86/aacencdsp_init.c b/libavcodec/x86/aacencdsp_init.c
>> index e0d8dec4f8..cf17dbf91d 100644
>> --- a/libavcodec/x86/aacencdsp_init.c
>> +++ b/libavcodec/x86/aacencdsp_init.c
>> @@ -30,6 +30,9 @@ void ff_abs_pow34_sse(float *out, const float *in, const int size);
>> void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled,
>> int size, int is_signed, int maxval, const float Q34,
>> const float rounding);
>> +void ff_aac_quantize_bands_avx(int *out, const float *in, const float *scaled,
>> + int size, int is_signed, int maxval, const float Q34,
>> + const float rounding);
>>
>> av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
>> {
>> @@ -40,4 +43,7 @@ av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
>>
>> if (EXTERNAL_SSE2(cpu_flags))
>> s->quant_bands = ff_aac_quantize_bands_sse2;
>
> Seems like the commit message is wrong: You are not adding an SSE2 version.
>
>> +
>> + if (EXTERNAL_AVX_FAST(cpu_flags))
>> + s->quant_bands = ff_aac_quantize_bands_avx;
>> }
>> diff --git a/tests/checkasm/aacencdsp.c b/tests/checkasm/aacencdsp.c
>> index 791dd30320..5308a2ac03 100644
>> --- a/tests/checkasm/aacencdsp.c
>> +++ b/tests/checkasm/aacencdsp.c
>> @@ -81,8 +81,8 @@ static void test_quant_bands(AACEncDSPContext *s)
>> for (int sign = 0; sign <= 1; sign++) {
>> if (check_func(s->quant_bands, "quant_bands_%s",
>> sign ? "signed" : "unsigned")) {
>> - LOCAL_ALIGNED_16(int, out, [BUF_SIZE]);
>> - LOCAL_ALIGNED_16(int, out2, [BUF_SIZE]);
>> + LOCAL_ALIGNED_32(int, out, [BUF_SIZE]);
>> + LOCAL_ALIGNED_32(int, out2, [BUF_SIZE]);
>>
>> call_ref(out, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
>> call_new(out2, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [FFmpeg-devel] [PATCH] x86/aacencdsp: add SSE2 and AVX versions of quantize_bands
2024-06-04 1:23 [FFmpeg-devel] [PATCH] x86/aacencdsp: add SSE2 and AVX versions of quantize_bands James Almer
2024-06-04 1:42 ` Andreas Rheinhardt
@ 2024-06-04 6:37 ` Rémi Denis-Courmont
2024-06-04 15:52 ` James Almer
1 sibling, 1 reply; 5+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-04 6:37 UTC (permalink / raw)
To: FFmpeg development discussions and patches
Le 4 juin 2024 04:23:43 GMT+03:00, James Almer <jamrial@gmail.com> a écrit :
>quant_bands_signed_sse2: 417.0
>quant_bands_signed_avx: 202.0
What about unsigned?
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-06-04 15:52 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-04 1:23 [FFmpeg-devel] [PATCH] x86/aacencdsp: add SSE2 and AVX versions of quantize_bands James Almer
2024-06-04 1:42 ` Andreas Rheinhardt
2024-06-04 1:45 ` James Almer
2024-06-04 6:37 ` Rémi Denis-Courmont
2024-06-04 15:52 ` James Almer
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git