* [FFmpeg-devel] [PATCH] avcodec: flac x86 asm fix
@ 2022-09-03 21:45 Paul B Mahol
2022-09-05 11:19 ` James Almer
0 siblings, 1 reply; 2+ messages in thread
From: Paul B Mahol @ 2022-09-03 21:45 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 16 bytes --]
Patch attached.
[-- Attachment #2: 0001-avcodec-x86-flacdsp-fix-bug-in-decorrelation.patch --]
[-- Type: text/x-patch, Size: 6071 bytes --]
From 0f220489eb6402c6be3bc1d897c95fa9bc10431c Mon Sep 17 00:00:00 2001
From: Paul B Mahol <onemda@gmail.com>
Date: Sat, 3 Sep 2022 23:41:38 +0200
Subject: [PATCH] avcodec/x86/flacdsp: fix bug in decorrelation
Fixes #9297
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
libavcodec/x86/flacdsp.asm | 23 +++++++++++++++-----
libavcodec/x86/flacdsp_init.c | 41 ++++++++++++++++++++++-------------
2 files changed, 44 insertions(+), 20 deletions(-)
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 7138611526..6d755f4972 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -23,6 +23,10 @@
%include "libavutil/x86/x86util.asm"
+SECTION_RODATA
+
+vector: db 0,1,4,5,8,9,12,13,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,4,5,8,9,12,13,
+
SECTION .text
%macro PMACSDQL 5
@@ -89,6 +93,9 @@ LPC_32 sse4
;----------------------------------------------------------------------------------
%macro FLAC_DECORRELATE_16 3-4
cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len
+%ifidn %1, indep2
+ VBROADCASTI128 m2, [vector]
+%endif
%if ARCH_X86_32
mov lend, lenm
%endif
@@ -112,11 +119,17 @@ align 16
%endif
%ifnidn %1, indep2
p%4d m2, m0, m1
+ packssdw m%2, m%2
+ packssdw m%3, m%3
+ punpcklwd m%2, m%3
+ psllw m%2, m3
+%else
+ pslld m%2, m3
+ pslld m%3, m3
+ pshufb m%2, m%2, m2
+ pshufb m%3, m%3, m2
+ punpcklwd m%2, m%3
%endif
- packssdw m%2, m%2
- packssdw m%3, m%3
- punpcklwd m%2, m%3
- psllw m%2, m3
mova [outq + lenq], m%2
add lenq, 16
jl .loop
@@ -292,7 +305,7 @@ align 16
REP_RET
%endmacro
-INIT_XMM sse2
+INIT_XMM ssse3
FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro
FLAC_DECORRELATE_INDEP 32, 2, 3, d
FLAC_DECORRELATE_INDEP 16, 4, 3, w
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 2deaf3117f..48e3e7c55c 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -34,7 +34,9 @@ void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int chann
void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
int len, int shift); \
void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
- int len, int shift); \
+ int len, int shift);
+
+#define DECORRELATE_IFUNCS(fmt, opt) \
void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
int len, int shift); \
void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
@@ -48,6 +50,10 @@ DECORRELATE_FUNCS(16, sse2);
DECORRELATE_FUNCS(16, avx);
DECORRELATE_FUNCS(32, sse2);
DECORRELATE_FUNCS(32, avx);
+DECORRELATE_IFUNCS(16, ssse3);
+DECORRELATE_IFUNCS(16, avx);
+DECORRELATE_IFUNCS(32, ssse3);
+DECORRELATE_IFUNCS(32, avx);
av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
{
@@ -55,30 +61,35 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags)) {
+ if (fmt == AV_SAMPLE_FMT_S16) {
+ c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
+ c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
+ c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
+ } else if (fmt == AV_SAMPLE_FMT_S32) {
+ c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
+ c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
+ c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
+ }
+ }
+ if (EXTERNAL_SSSE3(cpu_flags)) {
if (fmt == AV_SAMPLE_FMT_S16) {
if (channels == 2)
- c->decorrelate[0] = ff_flac_decorrelate_indep2_16_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep2_16_ssse3;
else if (channels == 4)
- c->decorrelate[0] = ff_flac_decorrelate_indep4_16_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep4_16_ssse3;
else if (channels == 6)
- c->decorrelate[0] = ff_flac_decorrelate_indep6_16_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep6_16_ssse3;
else if (ARCH_X86_64 && channels == 8)
- c->decorrelate[0] = ff_flac_decorrelate_indep8_16_sse2;
- c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
- c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
- c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep8_16_ssse3;
} else if (fmt == AV_SAMPLE_FMT_S32) {
if (channels == 2)
- c->decorrelate[0] = ff_flac_decorrelate_indep2_32_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep2_32_ssse3;
else if (channels == 4)
- c->decorrelate[0] = ff_flac_decorrelate_indep4_32_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep4_32_ssse3;
else if (channels == 6)
- c->decorrelate[0] = ff_flac_decorrelate_indep6_32_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep6_32_ssse3;
else if (ARCH_X86_64 && channels == 8)
- c->decorrelate[0] = ff_flac_decorrelate_indep8_32_sse2;
- c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
- c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
- c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
+ c->decorrelate[0] = ff_flac_decorrelate_indep8_32_ssse3;
}
}
if (EXTERNAL_SSE4(cpu_flags)) {
--
2.37.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avcodec: flac x86 asm fix
2022-09-03 21:45 [FFmpeg-devel] [PATCH] avcodec: flac x86 asm fix Paul B Mahol
@ 2022-09-05 11:19 ` James Almer
0 siblings, 0 replies; 2+ messages in thread
From: James Almer @ 2022-09-05 11:19 UTC (permalink / raw)
To: ffmpeg-devel
On 9/3/2022 6:45 PM, Paul B Mahol wrote:
> Patch attached.
[...]
> From 0f220489eb6402c6be3bc1d897c95fa9bc10431c Mon Sep 17 00:00:00 2001
> From: Paul B Mahol <onemda@gmail.com>
> Date: Sat, 3 Sep 2022 23:41:38 +0200
> Subject: [PATCH] avcodec/x86/flacdsp: fix bug in decorrelation
>
> Fixes #9297
>
> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
> libavcodec/x86/flacdsp.asm | 23 +++++++++++++++-----
> libavcodec/x86/flacdsp_init.c | 41 ++++++++++++++++++++++-------------
> 2 files changed, 44 insertions(+), 20 deletions(-)
>
> diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
> index 7138611526..6d755f4972 100644
> --- a/libavcodec/x86/flacdsp.asm
> +++ b/libavcodec/x86/flacdsp.asm
> @@ -23,6 +23,10 @@
>
> %include "libavutil/x86/x86util.asm"
>
> +SECTION_RODATA
> +
> +vector: db 0,1,4,5,8,9,12,13,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,4,5,8,9,12,13,
> +
> SECTION .text
>
> %macro PMACSDQL 5
> @@ -89,6 +93,9 @@ LPC_32 sse4
> ;----------------------------------------------------------------------------------
> %macro FLAC_DECORRELATE_16 3-4
> cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len
> +%ifidn %1, indep2
> + VBROADCASTI128 m2, [vector]
You're loading 16 bytes, yet vector is 32 bytes. Are you sure this is
ok? The upper 16 bytes look different, which makes me think you meant to
use them.
> +%endif
> %if ARCH_X86_32
> mov lend, lenm
> %endif
> @@ -112,11 +119,17 @@ align 16
> %endif
> %ifnidn %1, indep2
> p%4d m2, m0, m1
> + packssdw m%2, m%2
> + packssdw m%3, m%3
> + punpcklwd m%2, m%3
> + psllw m%2, m3
> +%else
> + pslld m%2, m3
> + pslld m%3, m3
> + pshufb m%2, m%2, m2
> + pshufb m%3, m%3, m2
> + punpcklwd m%2, m%3
> %endif
> - packssdw m%2, m%2
> - packssdw m%3, m%3
> - punpcklwd m%2, m%3
> - psllw m%2, m3
> mova [outq + lenq], m%2
> add lenq, 16
> jl .loop
> @@ -292,7 +305,7 @@ align 16
> REP_RET
> %endmacro
>
> -INIT_XMM sse2
> +INIT_XMM ssse3
Why are you turning all the unrelated indep functions into ssse3 when
you only changed the FLAC_DECORRELATE_16 macro to fix the stereo 16bit
indep path?
And did you try using FLAC_DECORRELATE_INDEP for it instead? I did
mention below i purposely used a different macro for that specific
scenario (probably to save some instructions), so the fix could have
been using the proper indep macro.
> FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro
> FLAC_DECORRELATE_INDEP 32, 2, 3, d
> FLAC_DECORRELATE_INDEP 16, 4, 3, w
> diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
> index 2deaf3117f..48e3e7c55c 100644
> --- a/libavcodec/x86/flacdsp_init.c
> +++ b/libavcodec/x86/flacdsp_init.c
> @@ -34,7 +34,9 @@ void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int chann
> void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
> int len, int shift); \
> void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
> - int len, int shift); \
> + int len, int shift);
> +
> +#define DECORRELATE_IFUNCS(fmt, opt) \
> void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
> int len, int shift); \
> void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
> @@ -48,6 +50,10 @@ DECORRELATE_FUNCS(16, sse2);
> DECORRELATE_FUNCS(16, avx);
> DECORRELATE_FUNCS(32, sse2);
> DECORRELATE_FUNCS(32, avx);
> +DECORRELATE_IFUNCS(16, ssse3);
> +DECORRELATE_IFUNCS(16, avx);
> +DECORRELATE_IFUNCS(32, ssse3);
> +DECORRELATE_IFUNCS(32, avx);
>
> av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels)
> {
> @@ -55,30 +61,35 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int
> int cpu_flags = av_get_cpu_flags();
>
> if (EXTERNAL_SSE2(cpu_flags)) {
> + if (fmt == AV_SAMPLE_FMT_S16) {
> + c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
> + c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
> + c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
> + } else if (fmt == AV_SAMPLE_FMT_S32) {
> + c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
> + c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
> + c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
> + }
> + }
> + if (EXTERNAL_SSSE3(cpu_flags)) {
> if (fmt == AV_SAMPLE_FMT_S16) {
> if (channels == 2)
> - c->decorrelate[0] = ff_flac_decorrelate_indep2_16_sse2;
> + c->decorrelate[0] = ff_flac_decorrelate_indep2_16_ssse3;
> else if (channels == 4)
> - c->decorrelate[0] = ff_flac_decorrelate_indep4_16_sse2;
> + c->decorrelate[0] = ff_flac_decorrelate_indep4_16_ssse3;
> else if (channels == 6)
> - c->decorrelate[0] = ff_flac_decorrelate_indep6_16_sse2;
> + c->decorrelate[0] = ff_flac_decorrelate_indep6_16_ssse3;
> else if (ARCH_X86_64 && channels == 8)
> - c->decorrelate[0] = ff_flac_decorrelate_indep8_16_sse2;
> - c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
> - c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
> - c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
> + c->decorrelate[0] = ff_flac_decorrelate_indep8_16_ssse3;
> } else if (fmt == AV_SAMPLE_FMT_S32) {
> if (channels == 2)
> - c->decorrelate[0] = ff_flac_decorrelate_indep2_32_sse2;
> + c->decorrelate[0] = ff_flac_decorrelate_indep2_32_ssse3;
> else if (channels == 4)
> - c->decorrelate[0] = ff_flac_decorrelate_indep4_32_sse2;
> + c->decorrelate[0] = ff_flac_decorrelate_indep4_32_ssse3;
> else if (channels == 6)
> - c->decorrelate[0] = ff_flac_decorrelate_indep6_32_sse2;
> + c->decorrelate[0] = ff_flac_decorrelate_indep6_32_ssse3;
> else if (ARCH_X86_64 && channels == 8)
> - c->decorrelate[0] = ff_flac_decorrelate_indep8_32_sse2;
> - c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
> - c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
> - c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
> + c->decorrelate[0] = ff_flac_decorrelate_indep8_32_ssse3;
> }
> }
> if (EXTERNAL_SSE4(cpu_flags)) {
> --
> 2.37.2
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-09-05 11:19 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-03 21:45 [FFmpeg-devel] [PATCH] avcodec: flac x86 asm fix Paul B Mahol
2022-09-05 11:19 ` James Almer
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git