From: Michael Niedermayer <michael@niedermayer.cc>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: Re: [FFmpeg-devel] [PATCH 3/3] swscale/x86/yuv2yuvX: Process tails by jumping back into the main loop.
Date: Sat, 15 Jul 2023 22:39:53 +0200
Message-ID: <20230715203953.GU1093384@pb2> (raw)
In-Reply-To: <20230714100847.475017-1-alankelly@google.com>
[-- Attachment #1.1: Type: text/plain, Size: 6714 bytes --]
On Fri, Jul 14, 2023 at 12:08:46PM +0200, Alan Kelly wrote:
> ---
> libswscale/x86/swscale.c | 11 ++++-------
> libswscale/x86/yuv2yuvX.asm | 12 ++++++++++--
> 2 files changed, 14 insertions(+), 9 deletions(-)
seems to segfault with
./ffmpeg_g -i mm-short.mpg -an -vcodec snow -t 0.2 -bitexact -pix_fmt yuv410p -s 199x199 -vstrict -2 -y snow3914-199-410.avi
Thread 79 "ffmpeg_g" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffaffef700 (LWP 23533)]
0x000055555658a0f6 in ff_yuv2yuvX_sse3 ()
(gdb) bt
#0 0x000055555658a0f6 in ff_yuv2yuvX_sse3 ()
#1 0x0000555556585bc6 in chr_planar_vscale ()
#2 0x00005555565817d1 in scale_internal ()
#3 0x00005555565827d9 in ff_sws_slice_worker ()
#4 0x000055555662b06e in thread_worker ()
#5 0x00007ffff75fc6db in start_thread (arg=0x7fffaffef700) at pthread_create.c:463
#6 0x00007fffed12861f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) disassemble $rip-32,$rip+32
Dump of assembler code from 0x55555658a0d6 to 0x55555658a116:
0x000055555658a0d6 <ff_yuv2yuvX_sse3+86>: std
0x000055555658a0d7 <ff_yuv2yuvX_sse3+87>: fldenv 0xf(%rsi)
0x000055555658a0da <ff_yuv2yuvX_sse3+90>: outsl %ds:(%rsi),(%dx)
0x000055555658a0db <ff_yuv2yuvX_sse3+91>: sti
0x000055555658a0dc <ff_yuv2yuvX_sse3+92>: psraw $0x4,%xmm7
0x000055555658a0e1 <ff_yuv2yuvX_sse3+97>: movdqa %xmm7,%xmm4
0x000055555658a0e5 <ff_yuv2yuvX_sse3+101>: movdqa %xmm7,%xmm3
0x000055555658a0e9 <ff_yuv2yuvX_sse3+105>: movdqa %xmm7,%xmm6
0x000055555658a0ed <ff_yuv2yuvX_sse3+109>: movdqa %xmm7,%xmm1
0x000055555658a0f1 <ff_yuv2yuvX_sse3+113>: movddup 0x8(%rsi),%xmm0
=> 0x000055555658a0f6 <ff_yuv2yuvX_sse3+118>: movdqa (%rdx,%rax,2),%xmm2
0x000055555658a0fb <ff_yuv2yuvX_sse3+123>: pmulhw %xmm0,%xmm2
0x000055555658a0ff <ff_yuv2yuvX_sse3+127>: movdqa 0x10(%rdx,%rax,2),%xmm5
0x000055555658a105 <ff_yuv2yuvX_sse3+133>: pmulhw %xmm0,%xmm5
0x000055555658a109 <ff_yuv2yuvX_sse3+137>: paddw %xmm2,%xmm3
0x000055555658a10d <ff_yuv2yuvX_sse3+141>: paddw %xmm5,%xmm4
0x000055555658a111 <ff_yuv2yuvX_sse3+145>: movdqa 0x20(%rdx,%rax,2),%xmm2
End of assembler dump.
(gdb) info all-registers
rax 0x12 18
rbx 0x32 50
rcx 0x555557915480 93825029723264
rdx 0x555557687680 93825027044992
rsi 0x555557666658 93825026909784
rdi 0x555557666658 93825026909784
rbp 0x55555765b880 0x55555765b880
rsp 0x7fffaffee7a8 0x7fffaffee7a8
r8 0x20 32
r9 0x32 50
r10 0x555556589860 93825009227872
r11 0x5555576f9dc0 93825027513792
r12 0x55555763b280 93825026732672
r13 0x555557666658 93825026909784
r14 0x5555577b5800 93825028282368
r15 0x555557622640 93825026631232
rip 0x55555658a0f6 0x55555658a0f6 <ff_yuv2yuvX_sse3+118>
eflags 0x10297 [ CF PF AF SF IF RF ]
cs 0x33 51
ss 0x2b 43
ds 0x0 0
es 0x0 0
fs 0x0 0
gs 0x0 0
st0 0 (raw 0x00000000000000000000)
st1 0 (raw 0x00000000000000000000)
st2 0 (raw 0x00000000000000000000)
st3 0 (raw 0x00000000000000000000)
st4 0 (raw 0x00000000000000000000)
st5 0 (raw 0x00000000000000000000)
st6 0 (raw 0x00000000000000000000)
st7 0 (raw 0x00000000000000000000)
fctrl 0xffff 65535
fstat 0xffff 65535
ftag 0xaaaa 43690
fiseg 0x1 1
fioff 0x0 0
foseg 0x5646 22086
fooff 0xa 10
fop 0x7ff 2047
mxcsr 0x1fa8 [ OE PE IM DM ZM OM UM PM ]
>
> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> index 52423a1199..71434f58d3 100644
> --- a/libswscale/x86/swscale.c
> +++ b/libswscale/x86/swscale.c
> @@ -202,17 +202,14 @@ static void yuv2yuvX_ ##opt(const int16_t *filter, int filterSize, \
> const int16_t **src, uint8_t *dest, int dstW, \
> const uint8_t *dither, int offset) \
> { \
> - int remainder = (dstW % step); \
> - int pixelsProcessed = dstW - remainder; \
> if(((uintptr_t)dest) & 15){ \
> yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); \
> return; \
> } \
> - if(pixelsProcessed > 0) \
> - ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, pixelsProcessed + offset, dither, offset); \
> - if(remainder > 0){ \
> - yuv2yuvX_ ##tail(filter, filterSize, src, dest, dstW, dither, offset); \
> - } \
> + if (dstW >= step) \
> + ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + offset, dither, offset); \
> + else \
> + yuv2yuvX_ ##tail(filter, filterSize, src, dest, dstW, dither, offset); \
> return; \
> }
>
> diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
> index 57bfa09d66..ad0e8bd448 100644
> --- a/libswscale/x86/yuv2yuvX.asm
> +++ b/libswscale/x86/yuv2yuvX.asm
> @@ -54,6 +54,8 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
> %else
> movq xm3, [ditherq]
> %endif ; avx2
> + mov ditherq, dstWq
> + sub dstWq, mmsize * unroll
>
> %if cpuflag(avx512)
> mova m15, [permutation]
> @@ -131,8 +133,14 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, dstW, dither, offset
> add offsetq, mmsize * unroll
> mov filterSizeq, filterq
> cmp offsetq, dstWq
> - jb .outerloop
> - RET
> + jb .outerloop
> +
> + mov dstWq, offsetq
> + mov offsetq, ditherq
> + sub offsetq, mmsize * unroll
> + cmp dstWq, ditherq
> + jb .outerloop
> + REP_RET
> %endmacro
>
> INIT_MMX mmxext
> --
> 2.41.0.255.g8b1d071c50-goog
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Into a blind darkness they enter who follow after the Ignorance,
they as if into a greater darkness enter who devote themselves
to the Knowledge alone. -- Isha Upanishad
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2023-07-15 20:40 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-14 10:08 Alan Kelly
2023-07-15 20:39 ` Michael Niedermayer [this message]
2023-07-17 9:29 ` Alan Kelly
2023-07-17 9:30 ` Alan Kelly
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230715203953.GU1093384@pb2 \
--to=michael@niedermayer.cc \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git