Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.
@ 2024-04-26 12:21 heng.hu.1989
  2024-05-02 20:07 ` hu heng
  0 siblings, 1 reply; 3+ messages in thread
From: heng.hu.1989 @ 2024-04-26 12:21 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: huheng

From: huheng <heng.hu.1989@gmail.com>

rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with
the names of standalone asm functions. When ffmpeg is compiled with
--disable-x86asm, using the scale function will cause the video to
be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT
is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language
version, which causes a problem of mismatch with the vfilter. This
problem has persisted from version 4.4 to the present. Fix it by using
inline yuv2yuv_X_mmxext, that can maintain the consistency of
use_mmx_vfilter.

reproduce the issue:
1. ./configure --disable-x86asm --enable-gpl --enable-libx264
2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4
the output.mp4 is abnormal

Signed-off-by: huheng <heng.hu.1989@gmail.com>
---
 libswscale/x86/swscale.c          |  6 +++-
 libswscale/x86/swscale_template.c | 53 +++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index ff16398988..1bb9d1d51a 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
     int cpu_flags = av_get_cpu_flags();
 
 #if HAVE_MMXEXT_INLINE
-    if (INLINE_MMXEXT(cpu_flags))
+    if (INLINE_MMXEXT(cpu_flags)) {
         sws_init_swscale_mmxext(c);
+        if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
+            c->yuv2planeX = yuv2yuv_X_mmxext;
+        }
+    }
 #endif
     if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
 #if HAVE_MMXEXT_EXTERNAL
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 6190fcb4fe..1b8794480d 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -33,6 +33,59 @@
 #define MOVNTQ2 "movntq "
 #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
 
+static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize,
+                           const int16_t **src, uint8_t *dest, int dstW,
+                           const uint8_t *dither, int offset)
+{
+    filterSize--;
+    __asm__ volatile(
+        "movd %0, %%mm1\n\t"
+        "punpcklwd %%mm1, %%mm1\n\t"
+        "punpckldq %%mm1, %%mm1\n\t"
+        "psllw        $3, %%mm1\n\t"
+        "paddw     %%mm1, %%mm3\n\t"
+        "paddw     %%mm1, %%mm4\n\t"
+        "psraw        $4, %%mm3\n\t"
+        "psraw        $4, %%mm4\n\t"
+        ::"m"(filterSize)
+     );
+
+    __asm__ volatile(\
+        "movq    %%mm3, %%mm6\n\t"
+        "movq    %%mm4, %%mm7\n\t"
+        "movl %3, %%ecx\n\t"
+        "mov                                 %0, %%"FF_REG_d"       \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\
+        ".p2align                             4                     \n\t" /* FIXME Unroll? */\
+        "1:                                                         \n\t"\
+        "movq                      8(%%"FF_REG_d"), %%mm0           \n\t" /* filterCoeff */\
+        "movq                (%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* srcData */\
+        "movq               8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* srcData */\
+        "add                                $16, %%"FF_REG_d"       \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\
+        "test                         %%"FF_REG_S", %%"FF_REG_S"    \n\t"\
+        "pmulhw                           %%mm0, %%mm2      \n\t"\
+        "pmulhw                           %%mm0, %%mm5      \n\t"\
+        "paddw                            %%mm2, %%mm3      \n\t"\
+        "paddw                            %%mm5, %%mm4      \n\t"\
+        " jnz                                1b             \n\t"\
+        "psraw                               $3, %%mm3      \n\t"\
+        "psraw                               $3, %%mm4      \n\t"\
+        "packuswb                         %%mm4, %%mm3      \n\t"
+        MOVNTQ2 "                         %%mm3, (%1, %%"FF_REG_c")\n\t"
+        "add                          $8, %%"FF_REG_c"      \n\t"\
+        "cmp                          %2, %%"FF_REG_c"      \n\t"\
+        "movq    %%mm6, %%mm3\n\t"
+        "movq    %%mm7, %%mm4\n\t"
+        "mov                                 %0, %%"FF_REG_d"     \n\t"\
+        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
+        "jb                                  1b                   \n\t"\
+        :: "g" (filter),
+           "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
+        : "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
+    );
+}
+
 #define YSCALEYUV2PACKEDX_UV \
     __asm__ volatile(\
         "xor                %%"FF_REG_a", %%"FF_REG_a"  \n\t"\
-- 
2.20.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.
  2024-04-26 12:21 [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm heng.hu.1989
@ 2024-05-02 20:07 ` hu heng
  2024-05-04  0:21   ` Michael Niedermayer
  0 siblings, 1 reply; 3+ messages in thread
From: hu heng @ 2024-05-02 20:07 UTC (permalink / raw)
  To: ffmpeg-devel

<heng.hu.1989@gmail.com> 于2024年4月26日周五 20:21写道:
>
> From: huheng <heng.hu.1989@gmail.com>
>
> rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with
> the names of standalone asm functions. When ffmpeg is compiled with
> --disable-x86asm, using the scale function will cause the video to
> be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT
> is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language
> version, which causes a problem of mismatch with the vfilter. This
> problem has persisted from version 4.4 to the present. Fix it by using
> inline yuv2yuv_X_mmxext, that can maintain the consistency of
> use_mmx_vfilter.
>
> reproduce the issue:
> 1. ./configure --disable-x86asm --enable-gpl --enable-libx264
> 2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4
> the output.mp4 is abnormal
>
> Signed-off-by: huheng <heng.hu.1989@gmail.com>
> ---
>  libswscale/x86/swscale.c          |  6 +++-
>  libswscale/x86/swscale_template.c | 53 +++++++++++++++++++++++++++++++
>  2 files changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> index ff16398988..1bb9d1d51a 100644
> --- a/libswscale/x86/swscale.c
> +++ b/libswscale/x86/swscale.c
> @@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
>      int cpu_flags = av_get_cpu_flags();
>
>  #if HAVE_MMXEXT_INLINE
> -    if (INLINE_MMXEXT(cpu_flags))
> +    if (INLINE_MMXEXT(cpu_flags)) {
>          sws_init_swscale_mmxext(c);
> +        if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
> +            c->yuv2planeX = yuv2yuv_X_mmxext;
> +        }
> +    }
>  #endif
>      if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
>  #if HAVE_MMXEXT_EXTERNAL
> diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
> index 6190fcb4fe..1b8794480d 100644
> --- a/libswscale/x86/swscale_template.c
> +++ b/libswscale/x86/swscale_template.c
> @@ -33,6 +33,59 @@
>  #define MOVNTQ2 "movntq "
>  #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
>
> +static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize,
> +                           const int16_t **src, uint8_t *dest, int dstW,
> +                           const uint8_t *dither, int offset)
> +{
> +    filterSize--;
> +    __asm__ volatile(
> +        "movd %0, %%mm1\n\t"
> +        "punpcklwd %%mm1, %%mm1\n\t"
> +        "punpckldq %%mm1, %%mm1\n\t"
> +        "psllw        $3, %%mm1\n\t"
> +        "paddw     %%mm1, %%mm3\n\t"
> +        "paddw     %%mm1, %%mm4\n\t"
> +        "psraw        $4, %%mm3\n\t"
> +        "psraw        $4, %%mm4\n\t"
> +        ::"m"(filterSize)
> +     );
> +
> +    __asm__ volatile(\
> +        "movq    %%mm3, %%mm6\n\t"
> +        "movq    %%mm4, %%mm7\n\t"
> +        "movl %3, %%ecx\n\t"
> +        "mov                                 %0, %%"FF_REG_d"       \n\t"\
> +        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\
> +        ".p2align                             4                     \n\t" /* FIXME Unroll? */\
> +        "1:                                                         \n\t"\
> +        "movq                      8(%%"FF_REG_d"), %%mm0           \n\t" /* filterCoeff */\
> +        "movq                (%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* srcData */\
> +        "movq               8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* srcData */\
> +        "add                                $16, %%"FF_REG_d"       \n\t"\
> +        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\
> +        "test                         %%"FF_REG_S", %%"FF_REG_S"    \n\t"\
> +        "pmulhw                           %%mm0, %%mm2      \n\t"\
> +        "pmulhw                           %%mm0, %%mm5      \n\t"\
> +        "paddw                            %%mm2, %%mm3      \n\t"\
> +        "paddw                            %%mm5, %%mm4      \n\t"\
> +        " jnz                                1b             \n\t"\
> +        "psraw                               $3, %%mm3      \n\t"\
> +        "psraw                               $3, %%mm4      \n\t"\
> +        "packuswb                         %%mm4, %%mm3      \n\t"
> +        MOVNTQ2 "                         %%mm3, (%1, %%"FF_REG_c")\n\t"
> +        "add                          $8, %%"FF_REG_c"      \n\t"\
> +        "cmp                          %2, %%"FF_REG_c"      \n\t"\
> +        "movq    %%mm6, %%mm3\n\t"
> +        "movq    %%mm7, %%mm4\n\t"
> +        "mov                                 %0, %%"FF_REG_d"     \n\t"\
> +        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
> +        "jb                                  1b                   \n\t"\
> +        :: "g" (filter),
> +           "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
> +        : "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
> +    );
> +}
> +
>  #define YSCALEYUV2PACKEDX_UV \
>      __asm__ volatile(\
>          "xor                %%"FF_REG_a", %%"FF_REG_a"  \n\t"\
> --
> 2.20.1
>
Request for review and some additional information:
I found that when compiling ffmpeg with the '-disable-x86asm' option,
there is an issue with the scale function causing video distortion.
This issue is very easily reproducible. I used bisect to identify the
problem and found that it was introduced after commit 554c2bc7086f49.
This commit moved the inline assembly function yuv2yuvX_sse3 to
standalone assembly, which broke the scale function under the
'disable-x86asm' option. My patch restores the original inline
assembly code under the 'disable-x86asm' option, fixing this issue.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm.
  2024-05-02 20:07 ` hu heng
@ 2024-05-04  0:21   ` Michael Niedermayer
  0 siblings, 0 replies; 3+ messages in thread
From: Michael Niedermayer @ 2024-05-04  0:21 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 6695 bytes --]

On Fri, May 03, 2024 at 04:07:36AM +0800, hu heng wrote:
> <heng.hu.1989@gmail.com> 于2024年4月26日周五 20:21写道:
> >
> > From: huheng <heng.hu.1989@gmail.com>
> >
> > rename old inline yuv2yuvX to yuv2yuv_X, to avoid conflicts with
> > the names of standalone asm functions. When ffmpeg is compiled with
> > --disable-x86asm, using the scale function will cause the video to
> > be blurred. The reason is that when disable-x86asm, INLINE_MMXEXT
> > is 1 and use_mmx_vfilter is 1, but c->yuv2planeX uses the c language
> > version, which causes a problem of mismatch with the vfilter. This
> > problem has persisted from version 4.4 to the present. Fix it by using
> > inline yuv2yuv_X_mmxext, that can maintain the consistency of
> > use_mmx_vfilter.
> >
> > reproduce the issue:
> > 1. ./configure --disable-x86asm --enable-gpl --enable-libx264
> > 2. ./ffmpeg -i input.mp4 -vf "scale=1280x720" -c:v libx264 output.mp4
> > the output.mp4 is abnormal
> >
> > Signed-off-by: huheng <heng.hu.1989@gmail.com>
> > ---
> >  libswscale/x86/swscale.c          |  6 +++-
> >  libswscale/x86/swscale_template.c | 53 +++++++++++++++++++++++++++++++
> >  2 files changed, 58 insertions(+), 1 deletion(-)
> >
> > diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> > index ff16398988..1bb9d1d51a 100644
> > --- a/libswscale/x86/swscale.c
> > +++ b/libswscale/x86/swscale.c
> > @@ -452,8 +452,12 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
> >      int cpu_flags = av_get_cpu_flags();
> >
> >  #if HAVE_MMXEXT_INLINE
> > -    if (INLINE_MMXEXT(cpu_flags))
> > +    if (INLINE_MMXEXT(cpu_flags)) {
> >          sws_init_swscale_mmxext(c);
> > +        if (c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
> > +            c->yuv2planeX = yuv2yuv_X_mmxext;
> > +        }
> > +    }
> >  #endif
> >      if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) {
> >  #if HAVE_MMXEXT_EXTERNAL
> > diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
> > index 6190fcb4fe..1b8794480d 100644
> > --- a/libswscale/x86/swscale_template.c
> > +++ b/libswscale/x86/swscale_template.c
> > @@ -33,6 +33,59 @@
> >  #define MOVNTQ2 "movntq "
> >  #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
> >
> > +static void RENAME(yuv2yuv_X)(const int16_t *filter, int filterSize,
> > +                           const int16_t **src, uint8_t *dest, int dstW,
> > +                           const uint8_t *dither, int offset)
> > +{
> > +    filterSize--;
> > +    __asm__ volatile(
> > +        "movd %0, %%mm1\n\t"
> > +        "punpcklwd %%mm1, %%mm1\n\t"
> > +        "punpckldq %%mm1, %%mm1\n\t"
> > +        "psllw        $3, %%mm1\n\t"
> > +        "paddw     %%mm1, %%mm3\n\t"
> > +        "paddw     %%mm1, %%mm4\n\t"
> > +        "psraw        $4, %%mm3\n\t"
> > +        "psraw        $4, %%mm4\n\t"
> > +        ::"m"(filterSize)
> > +     );
> > +
> > +    __asm__ volatile(\
> > +        "movq    %%mm3, %%mm6\n\t"
> > +        "movq    %%mm4, %%mm7\n\t"
> > +        "movl %3, %%ecx\n\t"
> > +        "mov                                 %0, %%"FF_REG_d"       \n\t"\
> > +        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\
> > +        ".p2align                             4                     \n\t" /* FIXME Unroll? */\
> > +        "1:                                                         \n\t"\
> > +        "movq                      8(%%"FF_REG_d"), %%mm0           \n\t" /* filterCoeff */\
> > +        "movq                (%%"FF_REG_S", %%"FF_REG_c", 2), %%mm2 \n\t" /* srcData */\
> > +        "movq               8(%%"FF_REG_S", %%"FF_REG_c", 2), %%mm5 \n\t" /* srcData */\
> > +        "add                                $16, %%"FF_REG_d"       \n\t"\
> > +        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"    \n\t"\
> > +        "test                         %%"FF_REG_S", %%"FF_REG_S"    \n\t"\
> > +        "pmulhw                           %%mm0, %%mm2      \n\t"\
> > +        "pmulhw                           %%mm0, %%mm5      \n\t"\
> > +        "paddw                            %%mm2, %%mm3      \n\t"\
> > +        "paddw                            %%mm5, %%mm4      \n\t"\
> > +        " jnz                                1b             \n\t"\
> > +        "psraw                               $3, %%mm3      \n\t"\
> > +        "psraw                               $3, %%mm4      \n\t"\
> > +        "packuswb                         %%mm4, %%mm3      \n\t"
> > +        MOVNTQ2 "                         %%mm3, (%1, %%"FF_REG_c")\n\t"
> > +        "add                          $8, %%"FF_REG_c"      \n\t"\
> > +        "cmp                          %2, %%"FF_REG_c"      \n\t"\
> > +        "movq    %%mm6, %%mm3\n\t"
> > +        "movq    %%mm7, %%mm4\n\t"
> > +        "mov                                 %0, %%"FF_REG_d"     \n\t"\
> > +        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"  \n\t"\
> > +        "jb                                  1b                   \n\t"\
> > +        :: "g" (filter),
> > +           "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
> > +        : "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
> > +    );
> > +}
> > +
> >  #define YSCALEYUV2PACKEDX_UV \
> >      __asm__ volatile(\
> >          "xor                %%"FF_REG_a", %%"FF_REG_a"  \n\t"\
> > --
> > 2.20.1
> >
> Request for review and some additional information:
> I found that when compiling ffmpeg with the '-disable-x86asm' option,
> there is an issue with the scale function causing video distortion.
> This issue is very easily reproducible. I used bisect to identify the
> problem and found that it was introduced after commit 554c2bc7086f49.
> This commit moved the inline assembly function yuv2yuvX_sse3 to
> standalone assembly, which broke the scale function under the
> 'disable-x86asm' option. My patch restores the original inline
> assembly code under the 'disable-x86asm' option, fixing this issue.

Having a inline asm AND a external asm implementation of the same
thing is not ideal

I think moving all asm to yasm/nasm fits more into FFmpegs direction.
that would likely make things consistent and this sort of bug disappear

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Any man who breaks a law that conscience tells him is unjust and willingly 
accepts the penalty by staying in jail in order to arouse the conscience of 
the community on the injustice of the law is at that moment expressing the 
very highest respect for law. - Martin Luther King Jr

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-05-04  0:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-26 12:21 [FFmpeg-devel] [PATCH v1] scale: Bring back the old yuv2yuvX, use it when disable-x86asm heng.hu.1989
2024-05-02 20:07 ` hu heng
2024-05-04  0:21   ` Michael Niedermayer

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git