From: James Almer <jamrial@gmail.com> To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH 2/2] avutil/x86/aes: remove a few branches Date: Mon, 7 Apr 2025 13:26:32 -0300 Message-ID: <20250407162632.1142-1-jamrial@gmail.com> (raw) In-Reply-To: <20250407000004.7306-1-jamrial@gmail.com> The rounds value is constant and can be one of three hardcoded values, so instead of checking it on every loop, just split the function into three different implementations for each value. Before: aes_decrypt_128_aesni: 93.8 (47.58x) aes_decrypt_192_aesni: 106.9 (49.30x) aes_decrypt_256_aesni: 109.8 (56.50x) aes_encrypt_128_aesni: 93.2 (47.70x) aes_encrypt_192_aesni: 111.1 (48.36x) aes_encrypt_256_aesni: 113.6 (56.27x) After: aes_decrypt_128_aesni: 71.5 (63.31x) aes_decrypt_192_aesni: 96.8 (55.64x) aes_decrypt_256_aesni: 106.1 (58.51x) aes_encrypt_128_aesni: 81.3 (55.92x) aes_encrypt_192_aesni: 91.2 (59.78x) aes_encrypt_256_aesni: 109.0 (58.26x) Signed-off-by: James Almer <jamrial@gmail.com> --- libavutil/aes.c | 3 +-- libavutil/x86/aes.asm | 24 +++++++++++++----------- libavutil/x86/aes_init.c | 22 ++++++++++++++++++---- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/libavutil/aes.c b/libavutil/aes.c index 5f31412149..52a250bc00 100644 --- a/libavutil/aes.c +++ b/libavutil/aes.c @@ -234,6 +234,7 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt) int KC = key_bits >> 5; int rounds = KC + 6; + a->rounds = rounds; a->crypt = decrypt ? aes_decrypt : aes_encrypt; if (ARCH_X86) ff_init_aes_x86(a, decrypt); @@ -243,8 +244,6 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt) if (key_bits != 128 && key_bits != 192 && key_bits != 256) return AVERROR(EINVAL); - a->rounds = rounds; - memcpy(tk, key, KC * 4); memcpy(a->round_key[0].u8, key, KC * 4); diff --git a/libavutil/x86/aes.asm b/libavutil/x86/aes.asm index 7084c46055..e985a94685 100644 --- a/libavutil/x86/aes.asm +++ b/libavutil/x86/aes.asm @@ -26,12 +26,11 @@ SECTION .text ; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src, ; int count, uint8_t *iv, int rounds) ;----------------------------------------------------------------------------- -%macro AES_CRYPT 1 -cglobal aes_%1rypt, 6,6,2 +%macro AES_CRYPT 2 +cglobal aes_%1rypt_%2, 5, 5, 2 test r3d, r3d je .ret shl r3d, 4 - add r5d, r5d add r0, 0x60 add r2, r3 add r1, r3 @@ -45,16 +44,15 @@ cglobal aes_%1rypt, 6,6,2 %ifidn %1, enc pxor m0, m1 %endif - pxor m0, [r0+8*r5-0x60] - cmp r5d, 24 - je .rounds12 - jl .rounds10 + pxor m0, [r0+8*2*%2-0x60] +%if %2 > 12 aes%1 m0, [r0+0x70] aes%1 m0, [r0+0x60] -.rounds12: +%endif +%if %2 > 10 aes%1 m0, [r0+0x50] aes%1 m0, [r0+0x40] -.rounds10: +%endif aes%1 m0, [r0+0x30] aes%1 m0, [r0+0x20] aes%1 m0, [r0+0x10] @@ -90,6 +88,10 @@ cglobal aes_%1rypt, 6,6,2 %if HAVE_AESNI_EXTERNAL INIT_XMM aesni -AES_CRYPT enc -AES_CRYPT dec +AES_CRYPT enc, 10 +AES_CRYPT enc, 12 +AES_CRYPT enc, 14 +AES_CRYPT dec, 10 +AES_CRYPT dec, 12 +AES_CRYPT dec, 14 %endif diff --git a/libavutil/x86/aes_init.c b/libavutil/x86/aes_init.c index 0ac8c20239..c3e2003c07 100644 --- a/libavutil/x86/aes_init.c +++ b/libavutil/x86/aes_init.c @@ -22,15 +22,29 @@ #include "libavutil/aes_internal.h" #include "libavutil/x86/cpu.h" -void ff_aes_decrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, +void ff_aes_decrypt_10_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int rounds); -void ff_aes_encrypt_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, +void ff_aes_decrypt_12_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int rounds); +void ff_aes_decrypt_14_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, + int count, uint8_t *iv, int rounds); +void ff_aes_encrypt_10_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, + int count, uint8_t *iv, int rounds); +void ff_aes_encrypt_12_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, + int count, uint8_t *iv, int rounds); +void ff_aes_encrypt_14_aesni(AVAES *a, uint8_t *dst, const uint8_t *src, + int count, uint8_t *iv, int rounds); void ff_init_aes_x86(AVAES *a, int decrypt) { int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_AESNI(cpu_flags)) - a->crypt = decrypt ? ff_aes_decrypt_aesni : ff_aes_encrypt_aesni; + if (EXTERNAL_AESNI(cpu_flags)) { + if (a->rounds == 10) + a->crypt = decrypt ? ff_aes_decrypt_10_aesni : ff_aes_encrypt_10_aesni; + else if (a->rounds == 12) + a->crypt = decrypt ? ff_aes_decrypt_12_aesni : ff_aes_encrypt_12_aesni; + else if (a->rounds == 14) + a->crypt = decrypt ? ff_aes_decrypt_14_aesni : ff_aes_encrypt_14_aesni; + } } -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
prev parent reply other threads:[~2025-04-07 16:26 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-04-07 0:00 [FFmpeg-devel] [PATCH] avutil/aes: use pthread_once to fill the static tables James Almer 2025-04-07 16:26 ` James Almer [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20250407162632.1142-1-jamrial@gmail.com \ --to=jamrial@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git