* [FFmpeg-devel] [PATCH 1/2] fate/ffmpeg: Add bitexact flag for ffmpeg-input-r test
@ 2023-09-04 10:30 Andreas Rheinhardt
2023-09-04 10:33 ` [FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_vp3: Merge into hpeldsp Andreas Rheinhardt
0 siblings, 1 reply; 3+ messages in thread
From: Andreas Rheinhardt @ 2023-09-04 10:30 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Andreas Rheinhardt
Fixes the test when the non-bitexact MMXEXT versions of
the hpeldsp functions are used.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
tests/fate/ffmpeg.mak | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/fate/ffmpeg.mak b/tests/fate/ffmpeg.mak
index 2b3135b6a6..04500d53a0 100644
--- a/tests/fate/ffmpeg.mak
+++ b/tests/fate/ffmpeg.mak
@@ -232,7 +232,7 @@ fate-time_base: CMD = md5 -i $(TARGET_SAMPLES)/mpeg2/dvd_single_frame.vob -an -s
FATE_SAMPLES_FFMPEG-yes += $(FATE_TIME_BASE-yes)
# test -r used as an input option
-fate-ffmpeg-input-r: CMD = framecrc -r 27 -idct simple -i $(TARGET_SAMPLES)/mpeg2/sony-ct3.bs
+fate-ffmpeg-input-r: CMD = framecrc -r 27 -idct simple -bitexact -i $(TARGET_SAMPLES)/mpeg2/sony-ct3.bs
FATE_SAMPLES_FFMPEG-$(call FRAMECRC, MPEGVIDEO, MPEG2VIDEO) += fate-ffmpeg-input-r
# file with completely undecodable TTA audio stream
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* [FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_vp3: Merge into hpeldsp
2023-09-04 10:30 [FFmpeg-devel] [PATCH 1/2] fate/ffmpeg: Add bitexact flag for ffmpeg-input-r test Andreas Rheinhardt
@ 2023-09-04 10:33 ` Andreas Rheinhardt
2023-09-06 9:37 ` Andreas Rheinhardt
0 siblings, 1 reply; 3+ messages in thread
From: Andreas Rheinhardt @ 2023-09-04 10:33 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Andreas Rheinhardt
Once upon a time, 413abbe16465a7b49472ac110e42939e853e24a1
added versions of some put_no_rnd_pixels functions for use
in VP3 and Theora (with an explicit check so that they are
only used for VP3 and Theora). When this was moved to hpeldsp
(from dsputil) in 3ced55d51c2e65b37e50d500dff88bcd80e01b9c,
the check was replaced by a check for the bitexact flag
(and a CONFIG_VP3_DECODER compile-time check), so that
these functions were now used for other codecs as well.
Later commit 1dfc3cf89d0eb026af28be46294b85d79499ffb5
split off the "VP3-specific bits into a separate file",
yet these bits were not really VP3-specific bits at all
any more. (The error was repeated in commit
0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553.) This commit
has not been reverted, because this would make future
changes from Libav (from where it originated) harder,
yet Libav is no more, so this commit effectively reverts
1dfc3cf89d0eb026af28be46294b85d79499ffb5.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/Makefile | 2 -
libavcodec/x86/hpeldsp.asm | 77 ++++++++++++++++++++++++
libavcodec/x86/hpeldsp.h | 4 --
libavcodec/x86/hpeldsp_init.c | 14 +++--
libavcodec/x86/hpeldsp_vp3.asm | 99 -------------------------------
libavcodec/x86/hpeldsp_vp3_init.c | 43 --------------
6 files changed, 86 insertions(+), 153 deletions(-)
delete mode 100644 libavcodec/x86/hpeldsp_vp3.asm
delete mode 100644 libavcodec/x86/hpeldsp_vp3_init.c
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 118daca333..b4cc5e0d08 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -75,7 +75,6 @@ OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp_init.o
OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \
x86/vp9dsp_init_10bpp.o \
@@ -192,7 +191,6 @@ X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp.o
X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
-X86ASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o
X86ASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
X86ASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
x86/vp9intrapred_16bpp.o \
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 7a2b7135d8..3bc278618c 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -165,6 +165,47 @@ cglobal put_no_rnd_pixels8_x2, 4,5
RET
+; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+INIT_MMX mmxext
+cglobal put_no_rnd_pixels8_x2_exact, 4,5
+ lea r4, [r2*3]
+ pcmpeqb m6, m6
+.loop:
+ mova m0, [r1]
+ mova m2, [r1+r2]
+ mova m1, [r1+1]
+ mova m3, [r1+r2+1]
+ pxor m0, m6
+ pxor m2, m6
+ pxor m1, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0], m0
+ mova [r0+r2], m2
+ mova m0, [r1+r2*2]
+ mova m1, [r1+r2*2+1]
+ mova m2, [r1+r4]
+ mova m3, [r1+r4+1]
+ pxor m0, m6
+ pxor m1, m6
+ pxor m2, m6
+ pxor m3, m6
+ PAVGB m0, m1
+ PAVGB m2, m3
+ pxor m0, m6
+ pxor m2, m6
+ mova [r0+r2*2], m0
+ mova [r0+r4], m2
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ RET
+
+
; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_Y2 0
%if cpuflag(sse2)
@@ -235,6 +276,42 @@ cglobal put_no_rnd_pixels8_y2, 4,5
RET
+; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+INIT_MMX mmxext
+cglobal put_no_rnd_pixels8_y2_exact, 4,5
+ lea r4, [r2*3]
+ mova m0, [r1]
+ pcmpeqb m6, m6
+ add r1, r2
+ pxor m0, m6
+.loop:
+ mova m1, [r1]
+ mova m2, [r1+r2]
+ pxor m1, m6
+ pxor m2, m6
+ PAVGB m0, m1
+ PAVGB m1, m2
+ pxor m0, m6
+ pxor m1, m6
+ mova [r0], m0
+ mova [r0+r2], m1
+ mova m1, [r1+r2*2]
+ mova m0, [r1+r4]
+ pxor m1, m6
+ pxor m0, m6
+ PAVGB m2, m1
+ PAVGB m1, m0
+ pxor m2, m6
+ pxor m1, m6
+ mova [r0+r2*2], m2
+ mova [r0+r4], m1
+ lea r1, [r1+r2*4]
+ lea r0, [r0+r2*4]
+ sub r3d, 4
+ jg .loop
+ RET
+
+
; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8_X2 0
%if cpuflag(sse2)
diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
index fd740da72e..ac7e625fda 100644
--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@ -22,8 +22,6 @@
#include <stddef.h>
#include <stdint.h>
-#include "libavcodec/hpeldsp.h"
-
void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
@@ -50,6 +48,4 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
-
#endif /* AVCODEC_X86_HPELDSP_H */
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index 09c48c341e..f08c66f5c8 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -22,8 +22,6 @@
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
*/
-#include "config_components.h"
-
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
@@ -47,10 +45,16 @@ void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
+ const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -183,6 +187,9 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+
if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
@@ -235,7 +242,4 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
if (EXTERNAL_SSSE3(cpu_flags))
hpeldsp_init_ssse3(c, flags);
-
- if (CONFIG_VP3_DECODER)
- ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
}
diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
deleted file mode 100644
index e580133e45..0000000000
--- a/libavcodec/x86/hpeldsp_vp3.asm
+++ /dev/null
@@ -1,99 +0,0 @@
-;******************************************************************************
-;* SIMD-optimized halfpel functions for VP3
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;******************************************************************************
-
-%include "libavutil/x86/x86util.asm"
-
-SECTION .text
-
-; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-INIT_MMX mmxext
-cglobal put_no_rnd_pixels8_x2_exact, 4,5
- lea r4, [r2*3]
- pcmpeqb m6, m6
-.loop:
- mova m0, [r1]
- mova m2, [r1+r2]
- mova m1, [r1+1]
- mova m3, [r1+r2+1]
- pxor m0, m6
- pxor m2, m6
- pxor m1, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0], m0
- mova [r0+r2], m2
- mova m0, [r1+r2*2]
- mova m1, [r1+r2*2+1]
- mova m2, [r1+r4]
- mova m3, [r1+r4+1]
- pxor m0, m6
- pxor m1, m6
- pxor m2, m6
- pxor m3, m6
- PAVGB m0, m1
- PAVGB m2, m3
- pxor m0, m6
- pxor m2, m6
- mova [r0+r2*2], m0
- mova [r0+r4], m2
- lea r1, [r1+r2*4]
- lea r0, [r0+r2*4]
- sub r3d, 4
- jg .loop
- RET
-
-
-; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-INIT_MMX mmxext
-cglobal put_no_rnd_pixels8_y2_exact, 4,5
- lea r4, [r2*3]
- mova m0, [r1]
- pcmpeqb m6, m6
- add r1, r2
- pxor m0, m6
-.loop:
- mova m1, [r1]
- mova m2, [r1+r2]
- pxor m1, m6
- pxor m2, m6
- PAVGB m0, m1
- PAVGB m1, m2
- pxor m0, m6
- pxor m1, m6
- mova [r0], m0
- mova [r0+r2], m1
- mova m1, [r1+r2*2]
- mova m0, [r1+r4]
- pxor m1, m6
- pxor m0, m6
- PAVGB m2, m1
- PAVGB m1, m0
- pxor m2, m6
- pxor m1, m6
- mova [r0+r2*2], m2
- mova [r0+r4], m1
- lea r1, [r1+r2*4]
- lea r0, [r0+r2*4]
- sub r3d, 4
- jg .loop
- RET
diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c
deleted file mode 100644
index 1dbd1ba6f9..0000000000
--- a/libavcodec/x86/hpeldsp_vp3_init.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/attributes.h"
-#include "libavutil/cpu.h"
-#include "libavutil/x86/cpu.h"
-
-#include "libavcodec/avcodec.h"
-#include "libavcodec/hpeldsp.h"
-
-#include "hpeldsp.h"
-
-void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
- const uint8_t *pixels,
- ptrdiff_t line_size, int h);
-
-av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
-{
- if (EXTERNAL_MMXEXT(cpu_flags)) {
- if (flags & AV_CODEC_FLAG_BITEXACT) {
- c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
- c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
- }
- }
-}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_vp3: Merge into hpeldsp
2023-09-04 10:33 ` [FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_vp3: Merge into hpeldsp Andreas Rheinhardt
@ 2023-09-06 9:37 ` Andreas Rheinhardt
0 siblings, 0 replies; 3+ messages in thread
From: Andreas Rheinhardt @ 2023-09-06 9:37 UTC (permalink / raw)
To: ffmpeg-devel
Andreas Rheinhardt:
> Once upon a time, 413abbe16465a7b49472ac110e42939e853e24a1
> added versions of some put_no_rnd_pixels functions for use
> in VP3 and Theora (with an explicit check so that they are
> only used for VP3 and Theora). When this was moved to hpeldsp
> (from dsputil) in 3ced55d51c2e65b37e50d500dff88bcd80e01b9c,
> the check was replaced by a check for the bitexact flag
> (and a CONFIG_VP3_DECODER compile-time check), so that
> these functions were now used for other codecs as well.
>
> Later commit 1dfc3cf89d0eb026af28be46294b85d79499ffb5
> split off the "VP3-specific bits into a separate file",
> yet these bits were not really VP3-specific bits at all
> any more. (The error was repeated in commit
> 0a39c9ac0bfd7345fe676b4e2707d9cec3cbb553.) This commit
> has not been reverted, because this would make future
> changes from Libav (from where it originated) harder,
> yet Libav is no more, so this commit effectively reverts
> 1dfc3cf89d0eb026af28be46294b85d79499ffb5.
>
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
> libavcodec/x86/Makefile | 2 -
> libavcodec/x86/hpeldsp.asm | 77 ++++++++++++++++++++++++
> libavcodec/x86/hpeldsp.h | 4 --
> libavcodec/x86/hpeldsp_init.c | 14 +++--
> libavcodec/x86/hpeldsp_vp3.asm | 99 -------------------------------
> libavcodec/x86/hpeldsp_vp3_init.c | 43 --------------
> 6 files changed, 86 insertions(+), 153 deletions(-)
> delete mode 100644 libavcodec/x86/hpeldsp_vp3.asm
> delete mode 100644 libavcodec/x86/hpeldsp_vp3_init.c
>
> diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
> index 118daca333..b4cc5e0d08 100644
> --- a/libavcodec/x86/Makefile
> +++ b/libavcodec/x86/Makefile
> @@ -75,7 +75,6 @@ OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp_init.o
> OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
> OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc_init.o
> OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o
> -OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3_init.o
> OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp_init.o
> OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o \
> x86/vp9dsp_init_10bpp.o \
> @@ -192,7 +191,6 @@ X86ASM-OBJS-$(CONFIG_UTVIDEO_DECODER) += x86/utvideodsp.o
> X86ASM-OBJS-$(CONFIG_V210_ENCODER) += x86/v210enc.o
> X86ASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
> X86ASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
> -X86ASM-OBJS-$(CONFIG_VP3_DECODER) += x86/hpeldsp_vp3.o
> X86ASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp6dsp.o
> X86ASM-OBJS-$(CONFIG_VP9_DECODER) += x86/vp9intrapred.o \
> x86/vp9intrapred_16bpp.o \
> diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
> index 7a2b7135d8..3bc278618c 100644
> --- a/libavcodec/x86/hpeldsp.asm
> +++ b/libavcodec/x86/hpeldsp.asm
> @@ -165,6 +165,47 @@ cglobal put_no_rnd_pixels8_x2, 4,5
> RET
>
>
> +; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> +INIT_MMX mmxext
> +cglobal put_no_rnd_pixels8_x2_exact, 4,5
> + lea r4, [r2*3]
> + pcmpeqb m6, m6
> +.loop:
> + mova m0, [r1]
> + mova m2, [r1+r2]
> + mova m1, [r1+1]
> + mova m3, [r1+r2+1]
> + pxor m0, m6
> + pxor m2, m6
> + pxor m1, m6
> + pxor m3, m6
> + PAVGB m0, m1
> + PAVGB m2, m3
> + pxor m0, m6
> + pxor m2, m6
> + mova [r0], m0
> + mova [r0+r2], m2
> + mova m0, [r1+r2*2]
> + mova m1, [r1+r2*2+1]
> + mova m2, [r1+r4]
> + mova m3, [r1+r4+1]
> + pxor m0, m6
> + pxor m1, m6
> + pxor m2, m6
> + pxor m3, m6
> + PAVGB m0, m1
> + PAVGB m2, m3
> + pxor m0, m6
> + pxor m2, m6
> + mova [r0+r2*2], m0
> + mova [r0+r4], m2
> + lea r1, [r1+r2*4]
> + lea r0, [r0+r2*4]
> + sub r3d, 4
> + jg .loop
> + RET
> +
> +
> ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> %macro PUT_PIXELS8_Y2 0
> %if cpuflag(sse2)
> @@ -235,6 +276,42 @@ cglobal put_no_rnd_pixels8_y2, 4,5
> RET
>
>
> +; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> +INIT_MMX mmxext
> +cglobal put_no_rnd_pixels8_y2_exact, 4,5
> + lea r4, [r2*3]
> + mova m0, [r1]
> + pcmpeqb m6, m6
> + add r1, r2
> + pxor m0, m6
> +.loop:
> + mova m1, [r1]
> + mova m2, [r1+r2]
> + pxor m1, m6
> + pxor m2, m6
> + PAVGB m0, m1
> + PAVGB m1, m2
> + pxor m0, m6
> + pxor m1, m6
> + mova [r0], m0
> + mova [r0+r2], m1
> + mova m1, [r1+r2*2]
> + mova m0, [r1+r4]
> + pxor m1, m6
> + pxor m0, m6
> + PAVGB m2, m1
> + PAVGB m1, m0
> + pxor m2, m6
> + pxor m1, m6
> + mova [r0+r2*2], m2
> + mova [r0+r4], m1
> + lea r1, [r1+r2*4]
> + lea r0, [r0+r2*4]
> + sub r3d, 4
> + jg .loop
> + RET
> +
> +
> ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> %macro AVG_PIXELS8_X2 0
> %if cpuflag(sse2)
> diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
> index fd740da72e..ac7e625fda 100644
> --- a/libavcodec/x86/hpeldsp.h
> +++ b/libavcodec/x86/hpeldsp.h
> @@ -22,8 +22,6 @@
> #include <stddef.h>
> #include <stdint.h>
>
> -#include "libavcodec/hpeldsp.h"
> -
> void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
>
> @@ -50,6 +48,4 @@ void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
> void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
>
> -void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags);
> -
> #endif /* AVCODEC_X86_HPELDSP_H */
> diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
> index 09c48c341e..f08c66f5c8 100644
> --- a/libavcodec/x86/hpeldsp_init.c
> +++ b/libavcodec/x86/hpeldsp_init.c
> @@ -22,8 +22,6 @@
> * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
> */
>
> -#include "config_components.h"
> -
> #include "libavutil/attributes.h"
> #include "libavutil/cpu.h"
> #include "libavutil/x86/cpu.h"
> @@ -47,10 +45,16 @@ void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> +void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
> + const uint8_t *pixels,
> + ptrdiff_t line_size, int h);
> void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> +void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
> + const uint8_t *pixels,
> + ptrdiff_t line_size, int h);
> void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
> ptrdiff_t line_size, int h);
> void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
> @@ -183,6 +187,9 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
> c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
> c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
>
> + c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
> + c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
> +
> if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
> c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
> c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
> @@ -235,7 +242,4 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
>
> if (EXTERNAL_SSSE3(cpu_flags))
> hpeldsp_init_ssse3(c, flags);
> -
> - if (CONFIG_VP3_DECODER)
> - ff_hpeldsp_vp3_init_x86(c, cpu_flags, flags);
> }
> diff --git a/libavcodec/x86/hpeldsp_vp3.asm b/libavcodec/x86/hpeldsp_vp3.asm
> deleted file mode 100644
> index e580133e45..0000000000
> --- a/libavcodec/x86/hpeldsp_vp3.asm
> +++ /dev/null
> @@ -1,99 +0,0 @@
> -;******************************************************************************
> -;* SIMD-optimized halfpel functions for VP3
> -;*
> -;* This file is part of FFmpeg.
> -;*
> -;* FFmpeg is free software; you can redistribute it and/or
> -;* modify it under the terms of the GNU Lesser General Public
> -;* License as published by the Free Software Foundation; either
> -;* version 2.1 of the License, or (at your option) any later version.
> -;*
> -;* FFmpeg is distributed in the hope that it will be useful,
> -;* but WITHOUT ANY WARRANTY; without even the implied warranty of
> -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> -;* Lesser General Public License for more details.
> -;*
> -;* You should have received a copy of the GNU Lesser General Public
> -;* License along with FFmpeg; if not, write to the Free Software
> -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> -;******************************************************************************
> -
> -%include "libavutil/x86/x86util.asm"
> -
> -SECTION .text
> -
> -; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> -INIT_MMX mmxext
> -cglobal put_no_rnd_pixels8_x2_exact, 4,5
> - lea r4, [r2*3]
> - pcmpeqb m6, m6
> -.loop:
> - mova m0, [r1]
> - mova m2, [r1+r2]
> - mova m1, [r1+1]
> - mova m3, [r1+r2+1]
> - pxor m0, m6
> - pxor m2, m6
> - pxor m1, m6
> - pxor m3, m6
> - PAVGB m0, m1
> - PAVGB m2, m3
> - pxor m0, m6
> - pxor m2, m6
> - mova [r0], m0
> - mova [r0+r2], m2
> - mova m0, [r1+r2*2]
> - mova m1, [r1+r2*2+1]
> - mova m2, [r1+r4]
> - mova m3, [r1+r4+1]
> - pxor m0, m6
> - pxor m1, m6
> - pxor m2, m6
> - pxor m3, m6
> - PAVGB m0, m1
> - PAVGB m2, m3
> - pxor m0, m6
> - pxor m2, m6
> - mova [r0+r2*2], m0
> - mova [r0+r4], m2
> - lea r1, [r1+r2*4]
> - lea r0, [r0+r2*4]
> - sub r3d, 4
> - jg .loop
> - RET
> -
> -
> -; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
> -INIT_MMX mmxext
> -cglobal put_no_rnd_pixels8_y2_exact, 4,5
> - lea r4, [r2*3]
> - mova m0, [r1]
> - pcmpeqb m6, m6
> - add r1, r2
> - pxor m0, m6
> -.loop:
> - mova m1, [r1]
> - mova m2, [r1+r2]
> - pxor m1, m6
> - pxor m2, m6
> - PAVGB m0, m1
> - PAVGB m1, m2
> - pxor m0, m6
> - pxor m1, m6
> - mova [r0], m0
> - mova [r0+r2], m1
> - mova m1, [r1+r2*2]
> - mova m0, [r1+r4]
> - pxor m1, m6
> - pxor m0, m6
> - PAVGB m2, m1
> - PAVGB m1, m0
> - pxor m2, m6
> - pxor m1, m6
> - mova [r0+r2*2], m2
> - mova [r0+r4], m1
> - lea r1, [r1+r2*4]
> - lea r0, [r0+r2*4]
> - sub r3d, 4
> - jg .loop
> - RET
> diff --git a/libavcodec/x86/hpeldsp_vp3_init.c b/libavcodec/x86/hpeldsp_vp3_init.c
> deleted file mode 100644
> index 1dbd1ba6f9..0000000000
> --- a/libavcodec/x86/hpeldsp_vp3_init.c
> +++ /dev/null
> @@ -1,43 +0,0 @@
> -/*
> - * This file is part of FFmpeg.
> - *
> - * FFmpeg is free software; you can redistribute it and/or
> - * modify it under the terms of the GNU Lesser General Public
> - * License as published by the Free Software Foundation; either
> - * version 2.1 of the License, or (at your option) any later version.
> - *
> - * FFmpeg is distributed in the hope that it will be useful,
> - * but WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - * Lesser General Public License for more details.
> - *
> - * You should have received a copy of the GNU Lesser General Public
> - * License along with FFmpeg; if not, write to the Free Software
> - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> - */
> -
> -#include "libavutil/attributes.h"
> -#include "libavutil/cpu.h"
> -#include "libavutil/x86/cpu.h"
> -
> -#include "libavcodec/avcodec.h"
> -#include "libavcodec/hpeldsp.h"
> -
> -#include "hpeldsp.h"
> -
> -void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
> - const uint8_t *pixels,
> - ptrdiff_t line_size, int h);
> -void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
> - const uint8_t *pixels,
> - ptrdiff_t line_size, int h);
> -
> -av_cold void ff_hpeldsp_vp3_init_x86(HpelDSPContext *c, int cpu_flags, int flags)
> -{
> - if (EXTERNAL_MMXEXT(cpu_flags)) {
> - if (flags & AV_CODEC_FLAG_BITEXACT) {
> - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
> - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
> - }
> - }
> -}
Will apply this tomorrow unless there are objections.
- Andreas
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-09-06 9:51 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-04 10:30 [FFmpeg-devel] [PATCH 1/2] fate/ffmpeg: Add bitexact flag for ffmpeg-input-r test Andreas Rheinhardt
2023-09-04 10:33 ` [FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_vp3: Merge into hpeldsp Andreas Rheinhardt
2023-09-06 9:37 ` Andreas Rheinhardt
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git