[FFmpeg-devel] [PATCH] avcodec/x86/mathops: clip constants used with shift instructions within inline assembly

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* [FFmpeg-devel] [PATCH] avcodec/x86/mathops: clip constants used with shift instructions within inline assembly
@ 2023-07-16 15:19 James Almer
  2023-07-16 16:00 ` Andreas Rheinhardt
  0 siblings, 1 reply; 4+ messages in thread
From: James Almer @ 2023-07-16 15:19 UTC (permalink / raw)
  To: ffmpeg-devel

From: Rémi Denis-Courmont <remi@remlab.net>

Fixes assembling with binutil as >= 2.41

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavcodec/x86/mathops.h | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
index 6298f5ed19..ca7e2dffc1 100644
--- a/libavcodec/x86/mathops.h
+++ b/libavcodec/x86/mathops.h
@@ -35,12 +35,20 @@
 static av_always_inline av_const int MULL(int a, int b, unsigned shift)
 {
     int rt, dummy;
+    if (__builtin_constant_p(shift))
     __asm__ (
         "imull %3               \n\t"
         "shrdl %4, %%edx, %%eax \n\t"
         :"=a"(rt), "=d"(dummy)
-        :"a"(a), "rm"(b), "ci"((uint8_t)shift)
+        :"a"(a), "rm"(b), "i"(shift & 0x1F)
     );
+    else
+        __asm__ (
+            "imull %3               \n\t"
+            "shrdl %4, %%edx, %%eax \n\t"
+            :"=a"(rt), "=d"(dummy)
+            :"a"(a), "rm"(b), "c"((uint8_t)shift)
+        );
     return rt;
 }
 
@@ -113,19 +121,31 @@ __asm__ volatile(\
 // avoid +32 for shift optimization (gcc should do that ...)
 #define NEG_SSR32 NEG_SSR32
 static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
+    if (__builtin_constant_p(s))
     __asm__ ("sarl %1, %0\n\t"
          : "+r" (a)
-         : "ic" ((uint8_t)(-s))
+         : "i" (-s & 0x1F)
     );
+    else
+        __asm__ ("sarl %1, %0\n\t"
+               : "+r" (a)
+               : "c" ((uint8_t)(-s))
+        );
     return a;
 }
 
 #define NEG_USR32 NEG_USR32
 static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
+    if (__builtin_constant_p(s))
     __asm__ ("shrl %1, %0\n\t"
          : "+r" (a)
-         : "ic" ((uint8_t)(-s))
+         : "i" (-s & 0x1F)
     );
+    else
+        __asm__ ("shrl %1, %0\n\t"
+               : "+r" (a)
+               : "c" ((uint8_t)(-s))
+        );
     return a;
 }
 
-- 
2.41.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/x86/mathops: clip constants used with shift instructions within inline assembly
  2023-07-16 15:19 [FFmpeg-devel] [PATCH] avcodec/x86/mathops: clip constants used with shift instructions within inline assembly James Almer
@ 2023-07-16 16:00 ` Andreas Rheinhardt
  2023-07-16 16:04   ` James Almer
  2023-07-16 16:39   ` Rémi Denis-Courmont
  0 siblings, 2 replies; 4+ messages in thread
From: Andreas Rheinhardt @ 2023-07-16 16:00 UTC (permalink / raw)
  To: ffmpeg-devel

James Almer:
> From: Rémi Denis-Courmont <remi@remlab.net>
> 
> Fixes assembling with binutil as >= 2.41
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavcodec/x86/mathops.h | 26 +++++++++++++++++++++++---
>  1 file changed, 23 insertions(+), 3 deletions(-)
> 
> diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
> index 6298f5ed19..ca7e2dffc1 100644
> --- a/libavcodec/x86/mathops.h
> +++ b/libavcodec/x86/mathops.h
> @@ -35,12 +35,20 @@
>  static av_always_inline av_const int MULL(int a, int b, unsigned shift)
>  {
>      int rt, dummy;
> +    if (__builtin_constant_p(shift))

We actually have av_builtin_constant_p. Is it guaranteed that all
compilers supporting inline ASM also support __builtin_constant_p?

>      __asm__ (
>          "imull %3               \n\t"
>          "shrdl %4, %%edx, %%eax \n\t"
>          :"=a"(rt), "=d"(dummy)
> -        :"a"(a), "rm"(b), "ci"((uint8_t)shift)
> +        :"a"(a), "rm"(b), "i"(shift & 0x1F)
>      );
> +    else
> +        __asm__ (
> +            "imull %3               \n\t"
> +            "shrdl %4, %%edx, %%eax \n\t"
> +            :"=a"(rt), "=d"(dummy)
> +            :"a"(a), "rm"(b), "c"((uint8_t)shift)
> +        );
>      return rt;
>  }
>  
> @@ -113,19 +121,31 @@ __asm__ volatile(\
>  // avoid +32 for shift optimization (gcc should do that ...)
>  #define NEG_SSR32 NEG_SSR32
>  static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
> +    if (__builtin_constant_p(s))
>      __asm__ ("sarl %1, %0\n\t"
>           : "+r" (a)
> -         : "ic" ((uint8_t)(-s))
> +         : "i" (-s & 0x1F)
>      );
> +    else
> +        __asm__ ("sarl %1, %0\n\t"
> +               : "+r" (a)
> +               : "c" ((uint8_t)(-s))
> +        );
>      return a;
>  }
>  
>  #define NEG_USR32 NEG_USR32
>  static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
> +    if (__builtin_constant_p(s))
>      __asm__ ("shrl %1, %0\n\t"
>           : "+r" (a)
> -         : "ic" ((uint8_t)(-s))
> +         : "i" (-s & 0x1F)
>      );
> +    else
> +        __asm__ ("shrl %1, %0\n\t"
> +               : "+r" (a)
> +               : "c" ((uint8_t)(-s))
> +        );
>      return a;
>  }
>  

Does this have a performance or codesize impact?
And is the inline ASM actually any good? (When I comment the inline ASM
of NEG_USR32 out, code size actually increases with GCC 11, suggesting
that the inline ASM may be counterproductive as it impairs the compilers
ability to optimize.)

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/x86/mathops: clip constants used with shift instructions within inline assembly
  2023-07-16 16:00 ` Andreas Rheinhardt
@ 2023-07-16 16:04   ` James Almer
  2023-07-16 16:39   ` Rémi Denis-Courmont
  1 sibling, 0 replies; 4+ messages in thread
From: James Almer @ 2023-07-16 16:04 UTC (permalink / raw)
  To: ffmpeg-devel

On 7/16/2023 1:00 PM, Andreas Rheinhardt wrote:
> James Almer:
>> From: Rémi Denis-Courmont <remi@remlab.net>
>>
>> Fixes assembling with binutil as >= 2.41
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>   libavcodec/x86/mathops.h | 26 +++++++++++++++++++++++---
>>   1 file changed, 23 insertions(+), 3 deletions(-)
>>
>> diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
>> index 6298f5ed19..ca7e2dffc1 100644
>> --- a/libavcodec/x86/mathops.h
>> +++ b/libavcodec/x86/mathops.h
>> @@ -35,12 +35,20 @@
>>   static av_always_inline av_const int MULL(int a, int b, unsigned shift)
>>   {
>>       int rt, dummy;
>> +    if (__builtin_constant_p(shift))
> 
> We actually have av_builtin_constant_p. Is it guaranteed that all
> compilers supporting inline ASM also support __builtin_constant_p?

I can use av_builtin_constant_p() if you want, but it will be expanded 
to __builtin_constant_p() in all supported compilers, judging by how 
it's defined.

> 
>>       __asm__ (
>>           "imull %3               \n\t"
>>           "shrdl %4, %%edx, %%eax \n\t"
>>           :"=a"(rt), "=d"(dummy)
>> -        :"a"(a), "rm"(b), "ci"((uint8_t)shift)
>> +        :"a"(a), "rm"(b), "i"(shift & 0x1F)
>>       );
>> +    else
>> +        __asm__ (
>> +            "imull %3               \n\t"
>> +            "shrdl %4, %%edx, %%eax \n\t"
>> +            :"=a"(rt), "=d"(dummy)
>> +            :"a"(a), "rm"(b), "c"((uint8_t)shift)
>> +        );
>>       return rt;
>>   }
>>   
>> @@ -113,19 +121,31 @@ __asm__ volatile(\
>>   // avoid +32 for shift optimization (gcc should do that ...)
>>   #define NEG_SSR32 NEG_SSR32
>>   static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
>> +    if (__builtin_constant_p(s))
>>       __asm__ ("sarl %1, %0\n\t"
>>            : "+r" (a)
>> -         : "ic" ((uint8_t)(-s))
>> +         : "i" (-s & 0x1F)
>>       );
>> +    else
>> +        __asm__ ("sarl %1, %0\n\t"
>> +               : "+r" (a)
>> +               : "c" ((uint8_t)(-s))
>> +        );
>>       return a;
>>   }
>>   
>>   #define NEG_USR32 NEG_USR32
>>   static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
>> +    if (__builtin_constant_p(s))
>>       __asm__ ("shrl %1, %0\n\t"
>>            : "+r" (a)
>> -         : "ic" ((uint8_t)(-s))
>> +         : "i" (-s & 0x1F)
>>       );
>> +    else
>> +        __asm__ ("shrl %1, %0\n\t"
>> +               : "+r" (a)
>> +               : "c" ((uint8_t)(-s))
>> +        );
>>       return a;
>>   }
>>   
> 
> Does this have a performance or codesize impact?

It should behave the same it has until now.

> And is the inline ASM actually any good? (When I comment the inline ASM
> of NEG_USR32 out, code size actually increases with GCC 11, suggesting
> that the inline ASM may be counterproductive as it impairs the compilers
> ability to optimize.)

I did not test nor check if removing this ricing is better or not. It 
can be looked at later. Right now, i want lavc to compile with binutils 2.41

> 
> - Andreas
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [FFmpeg-devel] [PATCH] avcodec/x86/mathops: clip constants used with shift instructions within inline assembly
  2023-07-16 16:00 ` Andreas Rheinhardt
  2023-07-16 16:04   ` James Almer
@ 2023-07-16 16:39   ` Rémi Denis-Courmont
  1 sibling, 0 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2023-07-16 16:39 UTC (permalink / raw)
  To: ffmpeg-devel

Le sunnuntaina 16. heinäkuuta 2023, 19.00.35 EEST Andreas Rheinhardt a écrit :
> > @@ -113,19 +121,31 @@ __asm__ volatile(\
> > 
> >  // avoid +32 for shift optimization (gcc should do that ...)
> >  #define NEG_SSR32 NEG_SSR32
> >  static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
> > 
> > +    if (__builtin_constant_p(s))
> > 
> >      __asm__ ("sarl %1, %0\n\t"
> >      
> >           : "+r" (a)
> > 
> > -         : "ic" ((uint8_t)(-s))
> > +         : "i" (-s & 0x1F)
> > 
> >      );
> > 
> > +    else
> > +        __asm__ ("sarl %1, %0\n\t"
> > +               : "+r" (a)
> > +               : "c" ((uint8_t)(-s))
> > +        );
> > 
> >      return a;
> >  
> >  }
> >  
> >  #define NEG_USR32 NEG_USR32
> >  static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
> > 
> > +    if (__builtin_constant_p(s))
> > 
> >      __asm__ ("shrl %1, %0\n\t"
> >      
> >           : "+r" (a)
> > 
> > -         : "ic" ((uint8_t)(-s))
> > +         : "i" (-s & 0x1F)
> > 
> >      );
> > 
> > +    else
> > +        __asm__ ("shrl %1, %0\n\t"
> > +               : "+r" (a)
> > +               : "c" ((uint8_t)(-s))
> > +        );
> > 
> >      return a;
> >  
> >  }
> 
> Does this have a performance or codesize impact?

It performs the masking at compilation time rather than run-time, thus saving 
one masking instruction and sparing an implicit clobber on ECX.

However, since we are dealing with constants, we the shift amount could 
*presumably* just as well be fixed in the calling code. Not that I'd know.

> And is the inline ASM actually any good?

To be honest, even if it's good, it inhibits instruction scheduling by the 
compiler. So IMO the threshold should be for assembler to be strictly *better* 
than the C code.

(And I don't know the answer to that question.)

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-07-16 16:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-16 15:19 [FFmpeg-devel] [PATCH] avcodec/x86/mathops: clip constants used with shift instructions within inline assembly James Almer
2023-07-16 16:00 ` Andreas Rheinhardt
2023-07-16 16:04   ` James Almer
2023-07-16 16:39   ` Rémi Denis-Courmont

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git