Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avutil/aes: use pthread_once to fill the static tables
@ 2025-04-07  0:00 James Almer
  2025-04-07 16:26 ` [FFmpeg-devel] [PATCH 2/2] avutil/x86/aes: remove a few branches James Almer
  0 siblings, 1 reply; 2+ messages in thread
From: James Almer @ 2025-04-07  0:00 UTC (permalink / raw)
  To: ffmpeg-devel

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavutil/aes.c | 53 ++++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/libavutil/aes.c b/libavutil/aes.c
index 1f4cceb700..5f31412149 100644
--- a/libavutil/aes.c
+++ b/libavutil/aes.c
@@ -29,6 +29,7 @@
 #include "intreadwrite.h"
 #include "macros.h"
 #include "mem.h"
+#include "thread.h"
 
 const int av_aes_size= sizeof(AVAES);
 
@@ -197,6 +198,34 @@ static void init_multbl2(uint32_t tbl[][256], const int c[4],
     }
 }
 
+static AVOnce aes_static_init = AV_ONCE_INIT;
+
+static void aes_init_static(void)
+{
+    uint8_t log8[256];
+    uint8_t alog8[512];
+    int i, j = 1;
+
+    for (i = 0; i < 255; i++) {
+        alog8[i] = alog8[i + 255] = j;
+        log8[j] = i;
+        j ^= j + j;
+        if (j > 255)
+            j ^= 0x11B;
+    }
+    for (i = 0; i < 256; i++) {
+        j = i ? alog8[255 - log8[i]] : 0;
+        j ^= (j << 1) ^ (j << 2) ^ (j << 3) ^ (j << 4);
+        j = (j ^ (j >> 8) ^ 99) & 255;
+        inv_sbox[j] = i;
+        sbox[i]     = j;
+    }
+    init_multbl2(dec_multbl, (const int[4]) { 0xe, 0x9, 0xd, 0xb },
+                 log8, alog8, inv_sbox);
+    init_multbl2(enc_multbl, (const int[4]) { 0x2, 0x1, 0x1, 0x3 },
+                 log8, alog8, sbox);
+}
+
 // this is based on the reference AES code by Paulo Barreto and Vincent Rijmen
 int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
 {
@@ -204,34 +233,12 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
     uint8_t tk[8][4];
     int KC = key_bits >> 5;
     int rounds = KC + 6;
-    uint8_t log8[256];
-    uint8_t alog8[512];
 
     a->crypt = decrypt ? aes_decrypt : aes_encrypt;
     if (ARCH_X86)
         ff_init_aes_x86(a, decrypt);
 
-    if (!enc_multbl[FF_ARRAY_ELEMS(enc_multbl) - 1][FF_ARRAY_ELEMS(enc_multbl[0]) - 1]) {
-        j = 1;
-        for (i = 0; i < 255; i++) {
-            alog8[i] = alog8[i + 255] = j;
-            log8[j] = i;
-            j ^= j + j;
-            if (j > 255)
-                j ^= 0x11B;
-        }
-        for (i = 0; i < 256; i++) {
-            j = i ? alog8[255 - log8[i]] : 0;
-            j ^= (j << 1) ^ (j << 2) ^ (j << 3) ^ (j << 4);
-            j = (j ^ (j >> 8) ^ 99) & 255;
-            inv_sbox[j] = i;
-            sbox[i]     = j;
-        }
-        init_multbl2(dec_multbl, (const int[4]) { 0xe, 0x9, 0xd, 0xb },
-                     log8, alog8, inv_sbox);
-        init_multbl2(enc_multbl, (const int[4]) { 0x2, 0x1, 0x1, 0x3 },
-                     log8, alog8, sbox);
-    }
+    ff_thread_once(&aes_static_init, aes_init_static);
 
     if (key_bits != 128 && key_bits != 192 && key_bits != 256)
         return AVERROR(EINVAL);
-- 
2.49.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [FFmpeg-devel] [PATCH 2/2] avutil/x86/aes: remove a few branches
  2025-04-07  0:00 [FFmpeg-devel] [PATCH] avutil/aes: use pthread_once to fill the static tables James Almer
@ 2025-04-07 16:26 ` James Almer
  0 siblings, 0 replies; 2+ messages in thread
From: James Almer @ 2025-04-07 16:26 UTC (permalink / raw)
  To: ffmpeg-devel

The rounds value is constant and can be one of three hardcoded values, so
instead of checking it on every loop, just split the function into three
different implementations for each value.

Before:
aes_decrypt_128_aesni:                                  93.8 (47.58x)
aes_decrypt_192_aesni:                                 106.9 (49.30x)
aes_decrypt_256_aesni:                                 109.8 (56.50x)
aes_encrypt_128_aesni:                                  93.2 (47.70x)
aes_encrypt_192_aesni:                                 111.1 (48.36x)
aes_encrypt_256_aesni:                                 113.6 (56.27x)

After:
aes_decrypt_128_aesni:                                  71.5 (63.31x)
aes_decrypt_192_aesni:                                  96.8 (55.64x)
aes_decrypt_256_aesni:                                 106.1 (58.51x)
aes_encrypt_128_aesni:                                  81.3 (55.92x)
aes_encrypt_192_aesni:                                  91.2 (59.78x)
aes_encrypt_256_aesni:                                 109.0 (58.26x)

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavutil/aes.c          |  3 +--
 libavutil/x86/aes.asm    | 24 +++++++++++++-----------
 libavutil/x86/aes_init.c | 22 ++++++++++++++++++----
 3 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/libavutil/aes.c b/libavutil/aes.c
index 5f31412149..52a250bc00 100644
--- a/libavutil/aes.c
+++ b/libavutil/aes.c
@@ -234,6 +234,7 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
     int KC = key_bits >> 5;
     int rounds = KC + 6;
 
+    a->rounds = rounds;
     a->crypt = decrypt ? aes_decrypt : aes_encrypt;
     if (ARCH_X86)
         ff_init_aes_x86(a, decrypt);
@@ -243,8 +244,6 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
     if (key_bits != 128 && key_bits != 192 && key_bits != 256)
         return AVERROR(EINVAL);
 
-    a->rounds = rounds;
-
     memcpy(tk, key, KC * 4);
     memcpy(a->round_key[0].u8, key, KC * 4);
 
diff --git a/libavutil/x86/aes.asm b/libavutil/x86/aes.asm
index 7084c46055..e985a94685 100644
--- a/libavutil/x86/aes.asm
+++ b/libavutil/x86/aes.asm
@@ -26,12 +26,11 @@ SECTION .text
 ; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src,
 ;                     int count, uint8_t *iv, int rounds)
 ;-----------------------------------------------------------------------------
-%macro AES_CRYPT 1
-cglobal aes_%1rypt, 6,6,2
+%macro AES_CRYPT 2
+cglobal aes_%1rypt_%2, 5, 5, 2
     test     r3d, r3d
     je .ret
     shl      r3d, 4
-    add      r5d, r5d
     add       r0, 0x60
     add       r2, r3
     add       r1, r3
@@ -45,16 +44,15 @@ cglobal aes_%1rypt, 6,6,2
 %ifidn %1, enc
     pxor      m0, m1
 %endif
-    pxor      m0, [r0+8*r5-0x60]
-    cmp      r5d, 24
-    je .rounds12
-    jl .rounds10
+    pxor      m0, [r0+8*2*%2-0x60]
+%if %2 > 12
     aes%1     m0, [r0+0x70]
     aes%1     m0, [r0+0x60]
-.rounds12:
+%endif
+%if %2 > 10
     aes%1     m0, [r0+0x50]
     aes%1     m0, [r0+0x40]
-.rounds10:
+%endif
     aes%1     m0, [r0+0x30]
     aes%1     m0, [r0+0x20]
     aes%1     m0, [r0+0x10]
@@ -90,6 +88,10 @@ cglobal aes_%1rypt, 6,6,2
 
 %if HAVE_AESNI_EXTERNAL
 INIT_XMM aesni
-AES_CRYPT enc
-AES_CRYPT dec
+AES_CRYPT enc, 10
+AES_CRYPT enc, 12
+AES_CRYPT enc, 14
+AES_CRYPT dec, 10
+AES_CRYPT dec, 12
+AES_CRYPT dec, 14
 %endif
diff --git a/libavutil/x86/aes_init.c b/libavutil/x86/aes_init.c
index 0ac8c20239..c3e2003c07 100644
--- a/libavutil/x86/aes_init.c
+++ b/libavutil/x86/aes_init.c
@@ -22,15 +22,29 @@
 #include "libavutil/aes_internal.h"
 #include "libavutil/x86/cpu.h"
 
-void ff_aes_decrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
+void ff_aes_decrypt_10_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
                           int count, uint8_t *iv, int rounds);
-void ff_aes_encrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
+void ff_aes_decrypt_12_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
                           int count, uint8_t *iv, int rounds);
+void ff_aes_decrypt_14_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
+                             int count, uint8_t *iv, int rounds);
+void ff_aes_encrypt_10_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
+                             int count, uint8_t *iv, int rounds);
+void ff_aes_encrypt_12_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
+                             int count, uint8_t *iv, int rounds);
+void ff_aes_encrypt_14_aesni(AVAES *a, uint8_t *dst, const uint8_t *src,
+                             int count, uint8_t *iv, int rounds);
 
 void ff_init_aes_x86(AVAES *a, int decrypt)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_AESNI(cpu_flags))
-        a->crypt = decrypt ? ff_aes_decrypt_aesni : ff_aes_encrypt_aesni;
+    if (EXTERNAL_AESNI(cpu_flags)) {
+        if (a->rounds == 10)
+            a->crypt = decrypt ? ff_aes_decrypt_10_aesni : ff_aes_encrypt_10_aesni;
+        else if (a->rounds == 12)
+            a->crypt = decrypt ? ff_aes_decrypt_12_aesni : ff_aes_encrypt_12_aesni;
+        else if (a->rounds == 14)
+            a->crypt = decrypt ? ff_aes_decrypt_14_aesni : ff_aes_encrypt_14_aesni;
+    }
 }
-- 
2.49.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-04-07 16:26 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-07  0:00 [FFmpeg-devel] [PATCH] avutil/aes: use pthread_once to fill the static tables James Almer
2025-04-07 16:26 ` [FFmpeg-devel] [PATCH 2/2] avutil/x86/aes: remove a few branches James Almer

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git