* [FFmpeg-devel] [PATCH] lavu/x86: add Intel CET support (PR #20919)
@ 2025-11-14 11:58 brad via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: brad via ffmpeg-devel @ 2025-11-14 11:58 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: brad
PR #20919 opened by brad
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20919
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20919.patch
Co-authored-by: Sebastien Marie <semarie@kapouay.eu.org>
>From c6926060a98f335e069c3e631b96cc4e0b8e758e Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Fri, 14 Nov 2025 05:01:17 -0500
Subject: [PATCH] lavu/x86: add Intel CET support
Co-authored-by: Sebastien Marie <semarie@kapouay.eu.org>
---
configure | 6 ++++++
libavutil/x86/tx_float.asm | 24 ++++++++++++++++++++++++
libavutil/x86/x86inc.asm | 7 +++++++
3 files changed, 37 insertions(+)
diff --git a/configure b/configure
index 659b428cfc..f1bcfcf729 100755
--- a/configure
+++ b/configure
@@ -2258,6 +2258,7 @@ ARCH_EXT_LIST_X86_SIMD="
avx2
avx512
avx512icl
+ cet
fma3
fma4
mmx
@@ -6639,6 +6640,11 @@ EOF
enabled ssse3 && check_inline_asm ssse3_inline '"pabsw %xmm0, %xmm0"'
enabled mmxext && check_inline_asm mmxext_inline '"pmaxub %mm0, %mm1"'
+ # check whether Intel CET is in use
+ if enabled x86_64; then
+ check_cpp_condition cet "stddef.h" "defined(__CET__)"
+ fi
+
probe_x86asm(){
x86asmexe_probe=$1
if test_cmd $x86asmexe_probe -v; then
diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm
index c030147ce8..7f31b05c60 100644
--- a/libavutil/x86/tx_float.asm
+++ b/libavutil/x86/tx_float.asm
@@ -135,6 +135,7 @@ SECTION .text
; %1 - coefficients (r0.reim, r1.reim)
; %2 - temporary
%macro FFT2 2
+ _CET_ENDBR
shufps %2, %1, %1, q3322
shufps %1, %1, %1, q1100
@@ -148,6 +149,7 @@ SECTION .text
; %2 - odd coefficients (r1.reim, r3.reim, r5.reim, r7.reim)
; %3 - temporary
%macro FFT4 3
+ _CET_ENDBR
subps %3, %1, %2 ; r1234, [r5678]
addps %1, %1, %2 ; t1234, [t5678]
@@ -171,6 +173,7 @@ SECTION .text
; %5 - temporary
; %6 - temporary
%macro FFT8 6
+ _CET_ENDBR
addps %5, %1, %3 ; q1-8
addps %6, %2, %4 ; k1-8
@@ -212,6 +215,7 @@ SECTION .text
; %3 - temporary
; %4 - temporary
%macro FFT8_AVX 4
+ _CET_ENDBR
subps %3, %1, %2 ; r1234, r5678
addps %1, %1, %2 ; q1234, q5678
@@ -251,6 +255,7 @@ SECTION .text
; %5, %6 - temporary
; %7, %8 - temporary (optional)
%macro FFT16 6-8
+ _CET_ENDBR
FFT4 %3, %4, %5
%if %0 > 7
FFT8_AVX %1, %2, %6, %7
@@ -320,6 +325,7 @@ SECTION .text
; xm14 - out[0]
; xm15 - out[10, 5]
%macro FFT15 0
+ _CET_ENDBR
shufps xm1, xm0, xm0, q3223 ; in[1].imrereim
shufps xm0, xm0, xm0, q1001 ; in[0].imrereim
@@ -438,6 +444,7 @@ SECTION .text
; Output is slightly permuted such that tx2,3's coefficients are interleaved
; on a 2-point basis (look at `doc/transforms.md`)
%macro SPLIT_RADIX_COMBINE 17
+ _CET_ENDBR
%if %1 && mmsize == 32
vperm2f128 %14, %6, %7, 0x20 ; m2[0], m2[1], m3[0], m3[1] even
vperm2f128 %16, %9, %8, 0x20 ; m2[0], m2[1], m3[0], m3[1] odd
@@ -517,6 +524,7 @@ SECTION .text
; however, if the twiddles aren't needed after this, the registers they use
; can be used as any of the temporary registers.
%macro SPLIT_RADIX_COMBINE_HALF 10
+ _CET_ENDBR
%if %1
shufps %8, %6, %6, q2200 ; cos00224466
shufps %9, %7, %7, q1133 ; wim77553311
@@ -559,6 +567,7 @@ SECTION .text
; Same as above, tries REALLY hard to use 2 temporary registers.
%macro SPLIT_RADIX_COMBINE_LITE 9
+ _CET_ENDBR
%if %1
shufps %8, %6, %6, q2200 ; cos00224466
shufps %9, %7, %7, q1133 ; wim77553311
@@ -607,6 +616,7 @@ SECTION .text
%endmacro
%macro SPLIT_RADIX_COMBINE_64 0
+ _CET_ENDBR
SPLIT_RADIX_COMBINE_LITE 1, m0, m1, tx1_e0, tx2_e0, tw_e, tw_o, tmp1, tmp2
movaps [outq + 0*mmsize], m0
@@ -648,6 +658,7 @@ SECTION .text
; combine loop
; %1 must contain len*2, %2 must contain len*4, %3 must contain len*6
%macro SPLIT_RADIX_LOAD_COMBINE_4 8
+ _CET_ENDBR
movaps m8, [rtabq + (%5)*mmsize + %7]
vperm2f128 m9, m9, [itabq - (%5)*mmsize + %8], 0x23
@@ -693,6 +704,7 @@ SECTION .text
%else
%define offset_i 0
%endif
+ _CET_ENDBR
SPLIT_RADIX_LOAD_COMBINE_4 %1, 2*%1, %2, 0, 0, offset_c, offset_r, offset_i
SPLIT_RADIX_LOAD_COMBINE_4 %1, 2*%1, %2, 1, 1, offset_c, offset_r, offset_i
@@ -705,6 +717,7 @@ SECTION .text
; a full combine+deinterleave loop
; %3 must contain len*2, %4 must contain len*4, %5 must contain len*6
%macro SPLIT_RADIX_COMBINE_DEINTERLEAVE_2 6
+ _CET_ENDBR
movaps m8, [rtabq + (0 + %2)*mmsize]
vperm2f128 m9, m9, [itabq - (0 + %2)*mmsize], 0x23
@@ -807,6 +820,7 @@ SECTION .text
%else
%define offset 0
%endif
+ _CET_ENDBR
SPLIT_RADIX_COMBINE_DEINTERLEAVE_2 0, 0, %1, %1*2, %2, offset
SPLIT_RADIX_COMBINE_DEINTERLEAVE_2 4, 2, %1, %1*2, %2, offset
%endmacro
@@ -826,6 +840,7 @@ cglobal fft2_float, 4, 4, 2, ctx, out, in, stride
%macro FFT4_FN 3
INIT_XMM sse2
+ _CET_ENDBR
%if %3
cglobal fft4_ %+ %1 %+ _asm_float, 0, 0, 0, ctx, out, in, stride
%else
@@ -862,6 +877,7 @@ FFT4_FN inv, 1, 1
%macro FFT8_SSE_FN 1
INIT_XMM sse3
+ _CET_ENDBR
%if %1
cglobal fft8_asm_float, 0, 0, 0, ctx, out, in, stride, tmp
movaps m0, [inq + 0*mmsize]
@@ -907,6 +923,7 @@ FFT8_SSE_FN 1
%macro FFT8_AVX_FN 1
INIT_YMM avx
+ _CET_ENDBR
%if %1
cglobal fft8_asm_float, 0, 0, 0, ctx, out, in, stride, tmp
movaps m0, [inq + 0*mmsize]
@@ -947,6 +964,7 @@ FFT8_AVX_FN 1
%macro FFT16_FN 2
INIT_YMM %1
+ _CET_ENDBR
%if %2
cglobal fft16_asm_float, 0, 0, 0, ctx, out, in, stride, tmp
movaps m0, [inq + 0*mmsize]
@@ -998,6 +1016,7 @@ FFT16_FN fma3, 1
%macro FFT32_FN 2
INIT_YMM %1
+ _CET_ENDBR
%if %2
cglobal fft32_asm_float, 0, 0, 0, ctx, out, in, stride, tmp
movaps m4, [inq + 4*mmsize]
@@ -1084,6 +1103,7 @@ FFT32_FN fma3, 1
%macro FFT_SPLIT_RADIX_DEF 1-2
ALIGN 16
+ _CET_ENDBR
.%1 %+ pt:
PUSH lenq
mov lenq, (%1/4)
@@ -1122,6 +1142,7 @@ ALIGN 16
%macro FFT_SPLIT_RADIX_FN 2
INIT_YMM %1
+ _CET_ENDBR
%if %2
cglobal fft_sr_asm_float, 0, 0, 0, ctx, out, in, stride, len, lut, itab, rtab, tgt, tmp
%else
@@ -1523,6 +1544,7 @@ FFT_SPLIT_RADIX_FN avx2, 1
%macro FFT15_FN 2
INIT_YMM avx2
+ _CET_ENDBR
cglobal fft15_ %+ %2, 4, 10, 16, ctx, out, in, stride, len, lut, tmp, tgt5, stride3, stride5
mov lutq, [ctxq + AVTXContext.map]
@@ -1586,6 +1608,7 @@ FFT15_FN 1, ns_float
%macro IMDCT_FN 1
INIT_YMM %1
+ _CET_ENDBR
cglobal mdct_inv_float, 4, 14, 16, 320, ctx, out, in, stride, len, lut, exp, t1, t2, t3, \
t4, t5, btmp
movsxd lenq, dword [ctxq + AVTXContext.len]
@@ -1773,6 +1796,7 @@ IMDCT_FN avx2
%macro PFA_15_FN 2
INIT_YMM %1
+ _CET_ENDBR
%if %2
cglobal fft_pfa_15xM_asm_float, 0, 0, 0, ctx, out, in, stride, len, lut, buf, map, tgt, tmp, \
tgt5, stride3, stride5, btmp
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index e61d924bc1..069a11433e 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -46,6 +46,12 @@
%endif
%endif
+%if HAVE_CET
+ %define _CET_ENDBR endbr64
+%else
+ %define _CET_ENDBR
+%endif
+
%define WIN64 0
%define UNIX64 0
%if ARCH_X86_64
@@ -849,6 +855,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
%endif
align function_align
%2:
+ _CET_ENDBR
RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer
%xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required
%assign stack_offset 0 ; stack pointer offset relative to the return address
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2025-11-14 11:59 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-14 11:58 [FFmpeg-devel] [PATCH] lavu/x86: add Intel CET support (PR #20919) brad via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git