From: mkver via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: mkver <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] avcodec/x86/me_cmp: Avoid MMX in (n)sse (PR #20822)
Date: Sun, 02 Nov 2025 22:06:51 -0000
Message-ID: <176212121236.25.5438835603124650670@2cb04c0e5124> (raw)
PR #20822 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20822
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20822.patch
>From 8c9f4f695859f018109294b6712b9f97eb777ed6 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 2 Nov 2025 17:43:10 +0100
Subject: [PATCH 1/5] avcodec/x86/me_cmp: Avoid unnecessary instruction
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/me_cmp.asm | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index 7825c8ef71..cf4ee941f7 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -282,9 +282,6 @@ HADAMARD8_DIFF 9
%macro SUM_SQUARED_ERRORS 1
cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
-%if %1 == mmsize
- shr hd, 1
-%endif
pxor m0, m0 ; mm0 = 0
pxor m7, m7 ; mm7 holds the sum
@@ -334,11 +331,12 @@ cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
%if %1 == mmsize
lea pix1q, [pix1q + 2*lsizeq]
lea pix2q, [pix2q + 2*lsizeq]
+ sub hd, 2
%else
add pix1q, lsizeq
add pix2q, lsizeq
-%endif
dec hd
+%endif
jnz .next2lines
HADDD m7, m1
--
2.49.1
>From 205a0e1a49d168fd07c55d152b6fc5fc0706aeb8 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 2 Nov 2025 17:50:15 +0100
Subject: [PATCH 2/5] avcodec/x86/me_cmp: Rename registers
This will avoid using xmm registers that are volatile for Win64
in the next commit.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/me_cmp.asm | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index cf4ee941f7..03db905346 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -283,7 +283,7 @@ HADAMARD8_DIFF 9
%macro SUM_SQUARED_ERRORS 1
cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
pxor m0, m0 ; mm0 = 0
- pxor m7, m7 ; mm7 holds the sum
+ pxor m5, m5 ; m5 holds the sum
.next2lines: ; FIXME why are these unaligned movs? pix1[] is aligned
movu m1, [pix1q] ; m1 = pix1[0][0-15], [0-7] for mmx
@@ -299,12 +299,12 @@ cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
; todo: mm1-mm2, mm3-mm4
; algo: subtract mm1 from mm2 with saturation and vice versa
; OR the result to get the absolute difference
- mova m5, m1
- mova m6, m3
+ mova m6, m1
+ mova m7, m3
psubusb m1, m2
psubusb m3, m4
- psubusb m2, m5
- psubusb m4, m6
+ psubusb m2, m6
+ psubusb m4, m7
por m2, m1
por m4, m3
@@ -325,8 +325,8 @@ cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
paddd m1, m2
paddd m3, m4
- paddd m7, m1
- paddd m7, m3
+ paddd m5, m1
+ paddd m5, m3
%if %1 == mmsize
lea pix1q, [pix1q + 2*lsizeq]
@@ -339,8 +339,8 @@ cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
%endif
jnz .next2lines
- HADDD m7, m1
- movd eax, m7 ; return value
+ HADDD m5, m1
+ movd eax, m5 ; return value
RET
%endmacro
--
2.49.1
>From d2e5fe5863476a420ae97c07e37e4003d6af3d61 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 2 Nov 2025 18:02:27 +0100
Subject: [PATCH 3/5] avcodec/x86/me_cmp: Add ff_sse8_sse2()
Benchmarks:
sse_1_c: 51.9 ( 1.00x)
sse_1_mmx: 16.5 ( 3.15x)
sse_1_sse2: 9.7 ( 5.36x)
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/me_cmp.asm | 20 ++++++++++++++++++--
libavcodec/x86/me_cmp_init.c | 3 +++
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index 03db905346..57a09a2b75 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -281,11 +281,25 @@ HADAMARD8_DIFF 9
; ptrdiff_t line_size, int h)
%macro SUM_SQUARED_ERRORS 1
-cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
+cglobal sse%1, 5,5,%1 < mmsize ? 6 : 8, v, pix1, pix2, lsize, h
pxor m0, m0 ; mm0 = 0
pxor m5, m5 ; m5 holds the sum
.next2lines: ; FIXME why are these unaligned movs? pix1[] is aligned
+%if %1 < mmsize
+ movh m1, [pix1q]
+ movh m2, [pix2q]
+ movh m3, [pix1q+lsizeq]
+ movh m4, [pix2q+lsizeq]
+ punpcklbw m1, m0
+ punpcklbw m2, m0
+ punpcklbw m3, m0
+ punpcklbw m4, m0
+ psubw m1, m2
+ psubw m3, m4
+ pmaddwd m1, m1
+ pmaddwd m3, m3
+%else
movu m1, [pix1q] ; m1 = pix1[0][0-15], [0-7] for mmx
movu m2, [pix2q] ; m2 = pix2[0][0-15], [0-7] for mmx
%if %1 == mmsize
@@ -325,10 +339,11 @@ cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
paddd m1, m2
paddd m3, m4
+%endif
paddd m5, m1
paddd m5, m3
-%if %1 == mmsize
+%if %1 <= mmsize
lea pix1q, [pix1q + 2*lsizeq]
lea pix2q, [pix2q + 2*lsizeq]
sub hd, 2
@@ -351,6 +366,7 @@ INIT_MMX mmx
SUM_SQUARED_ERRORS 16
INIT_XMM sse2
+SUM_SQUARED_ERRORS 8
SUM_SQUARED_ERRORS 16
;-----------------------------------------------
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index 9b23cbe4dc..dd5ffe0f45 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -32,6 +32,8 @@ int ff_sum_abs_dctelem_sse2(const int16_t *block);
int ff_sum_abs_dctelem_ssse3(const int16_t *block);
int ff_sse8_mmx(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
+int ff_sse8_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
+ ptrdiff_t stride, int h);
int ff_sse16_mmx(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_sse16_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
@@ -152,6 +154,7 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
if (EXTERNAL_SSE2(cpu_flags)) {
c->sse[0] = ff_sse16_sse2;
+ c->sse[1] = ff_sse8_sse2;
c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
c->pix_abs[0][0] = ff_sad16_sse2;
--
2.49.1
>From e5d144288852433a7546c8360563089e612b451b Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 2 Nov 2025 21:24:49 +0100
Subject: [PATCH 4/5] avcodec/x86/me_cmp: Port nsse{8,16} to SSSE3
Even nsse8 has to operate on eight words and therefore gains
a lot from xmm registers (and pabsw).
Old benchmarks:
nsse_0_c: 359.2 ( 1.00x)
nsse_0_mmx: 151.8 ( 2.37x)
nsse_1_c: 151.2 ( 1.00x)
nsse_1_mmx: 77.5 ( 1.95x)
New benchmarks:
nsse_0_c: 358.8 ( 1.00x)
nsse_0_ssse3: 62.2 ( 5.77x)
nsse_1_c: 151.2 ( 1.00x)
nsse_1_ssse3: 33.6 ( 4.50x)
The MMX nsse functions have been removed.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/me_cmp.asm | 106 +++++++++++++++++++----------------
libavcodec/x86/me_cmp_init.c | 38 ++++++-------
2 files changed, 75 insertions(+), 69 deletions(-)
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index 57a09a2b75..770a6f22ec 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -23,10 +23,15 @@
%include "libavutil/x86/x86util.asm"
+SECTION_RODATA
+
cextern pb_1
cextern pb_80
cextern pw_2
+pb_unpack1: db 0, 0xFF, 1, 0xFF, 2, 0xFF, 3, 0xFF, 4, 0xFF, 5, 0xFF, 6, 0xFF, 0xFF, 0xFF
+pb_unpack2: db 1, 0xFF, 2, 0xFF, 3, 0xFF, 4, 0xFF, 5, 0xFF, 6, 0xFF, 7, 0xFF, 0xFF, 0xFF
+
SECTION .text
%macro DIFF_PIXELS_1 4
@@ -403,19 +408,16 @@ INIT_XMM ssse3
SUM_ABS_DCTELEM 6, 2
;------------------------------------------------------------------------------
-; int ff_hf_noise*_mmx(const uint8_t *pix1, ptrdiff_t lsize, int h)
+; int ff_hf_noise*_ssse3(const uint8_t *pix1, ptrdiff_t lsize, int h)
;------------------------------------------------------------------------------
-; %1 = 8/16. %2-5=m#
-%macro HF_NOISE_PART1 5
- mova m%2, [pix1q]
-%if %1 == 8
+; %1 = 8/16, %2-5=m#, %6 = src
+%macro HF_NOISE_PART1 6
+%if %1 == mmsize
+ movu m%2, [%6]
mova m%3, m%2
- psllq m%2, 8
- psrlq m%3, 8
- psrlq m%2, 8
-%else
- mova m%3, [pix1q+1]
-%endif
+ pslldq m%2, 1
+ psrldq m%3, 1
+ psrldq m%2, 1
mova m%4, m%2
mova m%5, m%3
punpcklbw m%2, m7
@@ -424,57 +426,65 @@ SUM_ABS_DCTELEM 6, 2
punpckhbw m%5, m7
psubw m%2, m%3
psubw m%4, m%5
+%else
+ movh m%2, [%6]
+ pshufb m%3, m%2, m5
+ pshufb m%2, m%2, m4
+ psubw m%2, m%3
+%endif
%endmacro
-; %1-2 = m#
-%macro HF_NOISE_PART2 4
- psubw m%1, m%3
- psubw m%2, m%4
- pxor m3, m3
- pxor m1, m1
- pcmpgtw m3, m%1
- pcmpgtw m1, m%2
- pxor m%1, m3
- pxor m%2, m1
- psubw m%1, m3
- psubw m%2, m1
- paddw m%2, m%1
- paddw m6, m%2
+; %1 = 8/16, %2-5 = m#
+%macro HF_NOISE_PART2 5
+%if %1 == mmsize
+ psubw m%2, m%3
+ psubw m%4, m%5
+ pabsw m%2, m%2
+ pabsw m%4, m%4
+ paddw m%2, m%4
+%else
+ psubw m%2, m%3
+ pabsw m%2, m%2
+%endif
+ paddw m0, m%2
%endmacro
; %1 = 8/16
%macro HF_NOISE 1
-cglobal hf_noise%1, 3,3,0, pix1, lsize, h
+cglobal hf_noise%1, 3,3,(%1 == 8) ? 6 : 8, pix1, lsize, h
+%if %1 == 8
+ mova m4, [pb_unpack1]
+ mova m5, [pb_unpack2]
+%else
+ pxor m4, m4
+%endif
sub hd, 2
- pxor m7, m7
- pxor m6, m6
- HF_NOISE_PART1 %1, 0, 1, 2, 3
- add pix1q, lsizeq
- HF_NOISE_PART1 %1, 4, 1, 5, 3
- HF_NOISE_PART2 0, 2, 4, 5
- add pix1q, lsizeq
+ pxor m0, m0
+ HF_NOISE_PART1 %1, 1, 2, 5, 7, pix1q
+ HF_NOISE_PART1 %1, 3, 2, 6, 7, pix1q+lsizeq
+ lea pix1q, [pix1q+2*lsizeq]
+ HF_NOISE_PART2 %1, 1, 3, 5, 6
.loop:
- HF_NOISE_PART1 %1, 0, 1, 2, 3
- HF_NOISE_PART2 4, 5, 0, 2
- add pix1q, lsizeq
- HF_NOISE_PART1 %1, 4, 1, 5, 3
- HF_NOISE_PART2 0, 2, 4, 5
- add pix1q, lsizeq
+ HF_NOISE_PART1 %1, 1, 2, 5, 7, pix1q
+ HF_NOISE_PART2 %1, 3, 1, 6, 5
+ HF_NOISE_PART1 %1, 3, 2, 6, 7, pix1q+lsizeq
+ lea pix1q, [pix1q+2*lsizeq]
+ HF_NOISE_PART2 %1, 1, 3, 5, 6
sub hd, 2
jne .loop
- mova m0, m6
- punpcklwd m0, m7
- punpckhwd m6, m7
- paddd m6, m0
- mova m0, m6
- psrlq m6, 32
- paddd m0, m6
- movd eax, m0 ; eax = result of hf_noise8;
+%if %1 == 8
+ pxor m4, m4
+%endif
+ movhlps m1, m0
+ paddw m0, m1
+ punpcklwd m0, m4
+ HADDD m0, m1
+ movd eax, m0 ; eax = result of hf_noise;
RET ; return eax;
%endmacro
-INIT_MMX mmx
+INIT_XMM ssse3
HF_NOISE 8
HF_NOISE 16
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index dd5ffe0f45..e166af8dab 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -38,8 +38,8 @@ int ff_sse16_mmx(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_sse16_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
-int ff_hf_noise8_mmx(const uint8_t *pix1, ptrdiff_t stride, int h);
-int ff_hf_noise16_mmx(const uint8_t *pix1, ptrdiff_t stride, int h);
+int ff_hf_noise8_ssse3(const uint8_t *pix1, ptrdiff_t stride, int h);
+int ff_hf_noise16_ssse3(const uint8_t *pix1, ptrdiff_t stride, int h);
int ff_sad8_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_sad16_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
@@ -86,17 +86,12 @@ hadamard_func(sse2)
hadamard_func(ssse3)
#if HAVE_X86ASM
-static int nsse16_mmx(MPVEncContext *c, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t stride, int h)
+static int nsse16_ssse3(MPVEncContext *c, const uint8_t *pix1, const uint8_t *pix2,
+ ptrdiff_t stride, int h)
{
- int score1, score2;
-
- if (c)
- score1 = c->sse_cmp[0](c, pix1, pix2, stride, h);
- else
- score1 = ff_sse16_mmx(c, pix1, pix2, stride, h);
- score2 = ff_hf_noise16_mmx(pix1, stride, h) + ff_hf_noise8_mmx(pix1+8, stride, h)
- - ff_hf_noise16_mmx(pix2, stride, h) - ff_hf_noise8_mmx(pix2+8, stride, h);
+ int score1 = ff_sse16_sse2(c, pix1, pix2, stride, h);
+ int score2 = ff_hf_noise16_ssse3(pix1, stride, h) -
+ ff_hf_noise16_ssse3(pix2, stride, h);
if (c)
return score1 + FFABS(score2) * c->c.avctx->nsse_weight;
@@ -104,12 +99,12 @@ static int nsse16_mmx(MPVEncContext *c, const uint8_t *pix1, const uint8_t *pix2
return score1 + FFABS(score2) * 8;
}
-static int nsse8_mmx(MPVEncContext *c, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t stride, int h)
+static int nsse8_ssse3(MPVEncContext *c, const uint8_t *pix1, const uint8_t *pix2,
+ ptrdiff_t stride, int h)
{
- int score1 = ff_sse8_mmx(c, pix1, pix2, stride, h);
- int score2 = ff_hf_noise8_mmx(pix1, stride, h) -
- ff_hf_noise8_mmx(pix2, stride, h);
+ int score1 = ff_sse8_sse2(c, pix1, pix2, stride, h);
+ int score2 = ff_hf_noise8_ssse3(pix1, stride, h) -
+ ff_hf_noise8_ssse3(pix2, stride, h);
if (c)
return score1 + FFABS(score2) * c->c.avctx->nsse_weight;
@@ -121,14 +116,11 @@ static int nsse8_mmx(MPVEncContext *c, const uint8_t *pix1, const uint8_t *pix2,
av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
{
+#if HAVE_X86ASM
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMX(cpu_flags)) {
c->sse[1] = ff_sse8_mmx;
-#if HAVE_X86ASM
- c->nsse[0] = nsse16_mmx;
- c->nsse[1] = nsse8_mmx;
-#endif
}
if (EXTERNAL_MMXEXT(cpu_flags)) {
@@ -191,10 +183,14 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
}
if (EXTERNAL_SSSE3(cpu_flags)) {
+ c->nsse[0] = nsse16_ssse3;
+ c->nsse[1] = nsse8_ssse3;
+
c->sum_abs_dctelem = ff_sum_abs_dctelem_ssse3;
#if HAVE_ALIGNED_STACK
c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
#endif
}
+#endif
}
--
2.49.1
>From 22067082c467e4f55c7c2b968b4b486674865490 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 2 Nov 2025 21:36:25 +0100
Subject: [PATCH 5/5] avcodec/x86/me_cmp: Remove MMX sse functions
They are overridden by SSE2 and no longer needed by the no longer
existing nsse MMX functions. Saves 240B here.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/me_cmp.asm | 29 ++++++-----------------------
libavcodec/x86/me_cmp_init.c | 8 --------
2 files changed, 6 insertions(+), 31 deletions(-)
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index 770a6f22ec..4545eae276 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -282,8 +282,8 @@ INIT_XMM ssse3
%define ABS_SUM_8x8 ABS_SUM_8x8_64
HADAMARD8_DIFF 9
-; int ff_sse*_*(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
-; ptrdiff_t line_size, int h)
+; int ff_sse*_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
+; ptrdiff_t line_size, int h)
%macro SUM_SQUARED_ERRORS 1
cglobal sse%1, 5,5,%1 < mmsize ? 6 : 8, v, pix1, pix2, lsize, h
@@ -305,15 +305,10 @@ cglobal sse%1, 5,5,%1 < mmsize ? 6 : 8, v, pix1, pix2, lsize, h
pmaddwd m1, m1
pmaddwd m3, m3
%else
- movu m1, [pix1q] ; m1 = pix1[0][0-15], [0-7] for mmx
- movu m2, [pix2q] ; m2 = pix2[0][0-15], [0-7] for mmx
-%if %1 == mmsize
- movu m3, [pix1q+lsizeq] ; m3 = pix1[1][0-15], [0-7] for mmx
- movu m4, [pix2q+lsizeq] ; m4 = pix2[1][0-15], [0-7] for mmx
-%else ; %1 / 2 == mmsize; mmx only
- mova m3, [pix1q+8] ; m3 = pix1[0][8-15]
- mova m4, [pix2q+8] ; m4 = pix2[0][8-15]
-%endif
+ movu m1, [pix1q] ; m1 = pix1[0][0-15]
+ movu m2, [pix2q] ; m2 = pix2[0][0-15]
+ movu m3, [pix1q+lsizeq] ; m3 = pix1[1][0-15]
+ movu m4, [pix2q+lsizeq] ; m4 = pix2[1][0-15]
; todo: mm1-mm2, mm3-mm4
; algo: subtract mm1 from mm2 with saturation and vice versa
@@ -348,15 +343,9 @@ cglobal sse%1, 5,5,%1 < mmsize ? 6 : 8, v, pix1, pix2, lsize, h
paddd m5, m1
paddd m5, m3
-%if %1 <= mmsize
lea pix1q, [pix1q + 2*lsizeq]
lea pix2q, [pix2q + 2*lsizeq]
sub hd, 2
-%else
- add pix1q, lsizeq
- add pix2q, lsizeq
- dec hd
-%endif
jnz .next2lines
HADDD m5, m1
@@ -364,12 +353,6 @@ cglobal sse%1, 5,5,%1 < mmsize ? 6 : 8, v, pix1, pix2, lsize, h
RET
%endmacro
-INIT_MMX mmx
-SUM_SQUARED_ERRORS 8
-
-INIT_MMX mmx
-SUM_SQUARED_ERRORS 16
-
INIT_XMM sse2
SUM_SQUARED_ERRORS 8
SUM_SQUARED_ERRORS 16
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index e166af8dab..d4503eef3b 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -30,12 +30,8 @@
int ff_sum_abs_dctelem_sse2(const int16_t *block);
int ff_sum_abs_dctelem_ssse3(const int16_t *block);
-int ff_sse8_mmx(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t stride, int h);
int ff_sse8_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
-int ff_sse16_mmx(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
- ptrdiff_t stride, int h);
int ff_sse16_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
ptrdiff_t stride, int h);
int ff_hf_noise8_ssse3(const uint8_t *pix1, ptrdiff_t stride, int h);
@@ -119,10 +115,6 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
#if HAVE_X86ASM
int cpu_flags = av_get_cpu_flags();
- if (EXTERNAL_MMX(cpu_flags)) {
- c->sse[1] = ff_sse8_mmx;
- }
-
if (EXTERNAL_MMXEXT(cpu_flags)) {
#if !HAVE_ALIGNED_STACK
c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
--
2.49.1
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2025-11-02 22:07 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=176212121236.25.5438835603124650670@2cb04c0e5124 \
--to=ffmpeg-devel@ffmpeg.org \
--cc=code@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git