* [FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_init: Avoid using ff_avg_pixels16_mmx
2024-02-16 20:29 [FFmpeg-devel] [PATCH 1/2] avcodec/x86/fpel: Remove declarations of inexistent functions Andreas Rheinhardt
@ 2024-02-16 20:30 ` Andreas Rheinhardt
2024-02-17 0:12 ` [FFmpeg-devel] [PATCH 3/4] avcodec/h264qpel_template: Mark pointers as non-aliasing Andreas Rheinhardt
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Andreas Rheinhardt @ 2024-02-16 20:30 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Andreas Rheinhardt
Use ff_avg_pixels16_mmxext or ff_avg_pixels16_sse2
(for users with SSE2_FAST) instead.
This also allows to remove ff_avg_pixels16_mmx,
as this was its last remaining user.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/fpel.asm | 1 -
libavcodec/x86/fpel.h | 2 --
libavcodec/x86/hpeldsp_init.c | 15 +++++++--------
3 files changed, 7 insertions(+), 11 deletions(-)
diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm
index ebe8e43750..ecaca3c080 100644
--- a/libavcodec/x86/fpel.asm
+++ b/libavcodec/x86/fpel.asm
@@ -92,7 +92,6 @@ INIT_MMX mmx
OP_PIXELS put, 4
OP_PIXELS put, 8
OP_PIXELS put, 16
-OP_PIXELS avg, 16
INIT_MMX mmxext
OP_PIXELS avg, 4
diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h
index 90f7051a48..c533ca40b2 100644
--- a/libavcodec/x86/fpel.h
+++ b/libavcodec/x86/fpel.h
@@ -26,8 +26,6 @@ void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index f08c66f5c8..4a0513d06d 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -62,11 +62,9 @@ void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-#define avg_pixels16_mmx ff_avg_pixels16_mmx
#define put_pixels8_mmx ff_put_pixels8_mmx
#define put_pixels16_mmx ff_put_pixels16_mmx
#define put_pixels8_xy2_mmx ff_put_pixels8_xy2_mmx
-#define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx
#define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx
#define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
@@ -138,7 +136,6 @@ HPELDSP_AVG_PIXELS16(_mmxext)
if (HAVE_MMX_EXTERNAL) \
c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU
-#if HAVE_MMX_INLINE
#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) \
do { \
SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU); \
@@ -149,10 +146,6 @@ HPELDSP_AVG_PIXELS16(_mmxext)
c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
} while (0)
-#else
-#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)
-#define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU) ((void)0)
-#endif
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
do { \
SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU); \
@@ -161,11 +154,14 @@ HPELDSP_AVG_PIXELS16(_mmxext)
static void hpeldsp_init_mmx(HpelDSPContext *c, int flags)
{
+#if HAVE_MMX_INLINE
SET_HPEL_FUNCS03(put, [0], 16, mmx);
SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
- SET_HPEL_FUNCS(avg_no_rnd, , 16, mmx);
+ SET_HPEL_FUNCS12(avg_no_rnd, , 16, mmx);
+ c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_mmx;
SET_HPEL_FUNCS03(put, [1], 8, mmx);
SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx);
+#endif
}
static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
@@ -190,6 +186,8 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
+ c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_mmxext;
+
if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
@@ -214,6 +212,7 @@ static void hpeldsp_init_sse2_fast(HpelDSPContext *c, int flags)
c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2;
c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2;
c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_sse2;
+ c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_sse2;
#endif /* HAVE_SSE2_EXTERNAL */
}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 3/4] avcodec/h264qpel_template: Mark pointers as non-aliasing
2024-02-16 20:29 [FFmpeg-devel] [PATCH 1/2] avcodec/x86/fpel: Remove declarations of inexistent functions Andreas Rheinhardt
2024-02-16 20:30 ` [FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_init: Avoid using ff_avg_pixels16_mmx Andreas Rheinhardt
@ 2024-02-17 0:12 ` Andreas Rheinhardt
2024-02-17 0:12 ` [FFmpeg-devel] [PATCH 4/4] avcodec/x86/h264_qpel: Remove put_h264_qpel[48]_mmxext Andreas Rheinhardt
2024-02-18 23:14 ` [FFmpeg-devel] [PATCH 1/2] avcodec/x86/fpel: Remove declarations of inexistent functions Andreas Rheinhardt
3 siblings, 0 replies; 5+ messages in thread
From: Andreas Rheinhardt @ 2024-02-17 0:12 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Andreas Rheinhardt
It allows the compiler to combine two reads and writes of adjacent
32bit memory locations into 64bit read-writes.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/h264qpel_template.c | 93 +++++++++++++++++++---------------
1 file changed, 52 insertions(+), 41 deletions(-)
diff --git a/libavcodec/h264qpel_template.c b/libavcodec/h264qpel_template.c
index 27c5b8f17f..61fa55c689 100644
--- a/libavcodec/h264qpel_template.c
+++ b/libavcodec/h264qpel_template.c
@@ -26,7 +26,7 @@
#include "hpel_template.c"
#include "pel_template.c"
-static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *av_restrict src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
@@ -37,7 +37,7 @@ static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstSt
}
}
-static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *av_restrict src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
@@ -48,7 +48,7 @@ static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstSt
}
}
-static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *av_restrict src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
@@ -60,7 +60,7 @@ static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstSt
}
}
-static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *av_restrict src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
@@ -75,12 +75,13 @@ static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstS
}
#define H264_LOWPASS(OPNAME, OP, OP2) \
-static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, const uint8_t *p_src, int dstStride, int srcStride){\
+static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, const uint8_t *av_restrict p_src, int dstStride, int srcStride)\
+{\
const int h=2;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)p_dst;\
- const pixel *src = (const pixel*)p_src;\
+ const pixel *av_restrict src = (const pixel*)p_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
for(i=0; i<h; i++)\
@@ -92,12 +93,13 @@ static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, const
}\
}\
\
-static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
+static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, const uint8_t *av_restrict _src, int dstStride, int srcStride)\
+{\
const int w=2;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)_dst;\
- const pixel *src = (const pixel*)_src;\
+ const pixel *av_restrict src = (const pixel*)_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
for(i=0; i<w; i++)\
@@ -116,14 +118,15 @@ static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, const
}\
}\
\
-static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *av_restrict _src, int dstStride, int tmpStride, int srcStride)\
+{\
const int h=2;\
const int w=2;\
const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)_dst;\
- const pixel *src = (const pixel*)_src;\
+ const pixel *av_restrict src = (const pixel*)_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
src -= 2*srcStride;\
@@ -150,12 +153,13 @@ static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, pixel
tmp++;\
}\
}\
-static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, const uint8_t *av_restrict _src, int dstStride, int srcStride)\
+{\
const int h=4;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)_dst;\
- const pixel *src = (const pixel*)_src;\
+ const pixel *av_restrict src = (const pixel*)_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
for(i=0; i<h; i++)\
@@ -169,12 +173,13 @@ static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, const uint8_t *_
}\
}\
\
-static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, const uint8_t *av_restrict _src, int dstStride, int srcStride)\
+{\
const int w=4;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)_dst;\
- const pixel *src = (const pixel*)_src;\
+ const pixel *av_restrict src = (const pixel*)_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
for(i=0; i<w; i++)\
@@ -197,14 +202,15 @@ static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, const uint8_t *_
}\
}\
\
-static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *av_restrict _src, int dstStride, int tmpStride, int srcStride)\
+{\
const int h=4;\
const int w=4;\
const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)_dst;\
- const pixel *src = (const pixel*)_src;\
+ const pixel *av_restrict src = (const pixel*)_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
src -= 2*srcStride;\
@@ -238,12 +244,13 @@ static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp,
}\
}\
\
-static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, const uint8_t *av_restrict _src, int dstStride, int srcStride)\
+{\
const int h=8;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)_dst;\
- const pixel *src = (const pixel*)_src;\
+ const pixel *av_restrict src = (const pixel*)_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
for(i=0; i<h; i++)\
@@ -261,12 +268,13 @@ static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, const uint8_t *_
}\
}\
\
-static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, const uint8_t *av_restrict _src, int dstStride, int srcStride)\
+{\
const int w=8;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)_dst;\
- const pixel *src = (const pixel*)_src;\
+ const pixel *av_restrict src = (const pixel*)_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
for(i=0; i<w; i++)\
@@ -297,14 +305,15 @@ static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, const uint8_t *_
}\
}\
\
-static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *av_restrict _src, int dstStride, int tmpStride, int srcStride)\
+{\
const int h=8;\
const int w=8;\
const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
INIT_CLIP\
int i;\
pixel *dst = (pixel*)_dst;\
- const pixel *src = (const pixel*)_src;\
+ const pixel *av_restrict src = (const pixel*)_src;\
dstStride >>= sizeof(pixel)-1;\
srcStride >>= sizeof(pixel)-1;\
src -= 2*srcStride;\
@@ -350,7 +359,8 @@ static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp,
}\
}\
\
-static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, const uint8_t *av_restrict src, int dstStride, int srcStride)\
+{\
FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\
FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
src += 8*srcStride;\
@@ -359,7 +369,8 @@ static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, const uint8_t *s
FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
}\
\
-static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, const uint8_t *av_restrict src, int dstStride, int srcStride)\
+{\
FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\
FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
src += 8*srcStride;\
@@ -368,7 +379,7 @@ static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, const uint8_t *s
FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
}\
\
-static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, pixeltmp *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, pixeltmp *tmp, const uint8_t *av_restrict src, int dstStride, int tmpStride, int srcStride){\
FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\
FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
src += 8*srcStride;\
@@ -378,31 +389,31 @@ static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, pixeltmp *tmp,
}\
#define H264_MC(OPNAME, SIZE) \
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t half[SIZE*SIZE*sizeof(pixel)];\
FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t half[SIZE*SIZE*sizeof(pixel)];\
FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
@@ -412,7 +423,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
@@ -420,7 +431,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, const uint
FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
@@ -430,7 +441,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
@@ -442,7 +453,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
@@ -454,7 +465,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
@@ -466,7 +477,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
@@ -478,13 +489,13 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
@@ -494,7 +505,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
@@ -504,7 +515,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
@@ -517,7 +528,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, const uint
FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\
}\
\
-static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, const uint8_t *av_restrict src, ptrdiff_t stride)\
{\
uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread
* [FFmpeg-devel] [PATCH 4/4] avcodec/x86/h264_qpel: Remove put_h264_qpel[48]_mmxext
2024-02-16 20:29 [FFmpeg-devel] [PATCH 1/2] avcodec/x86/fpel: Remove declarations of inexistent functions Andreas Rheinhardt
2024-02-16 20:30 ` [FFmpeg-devel] [PATCH 2/2] avcodec/x86/hpeldsp_init: Avoid using ff_avg_pixels16_mmx Andreas Rheinhardt
2024-02-17 0:12 ` [FFmpeg-devel] [PATCH 3/4] avcodec/h264qpel_template: Mark pointers as non-aliasing Andreas Rheinhardt
@ 2024-02-17 0:12 ` Andreas Rheinhardt
2024-02-18 23:14 ` [FFmpeg-devel] [PATCH 1/2] avcodec/x86/fpel: Remove declarations of inexistent functions Andreas Rheinhardt
3 siblings, 0 replies; 5+ messages in thread
From: Andreas Rheinhardt @ 2024-02-17 0:12 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Andreas Rheinhardt
These functions are not faster than the C versions.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/x86/fpel.asm | 1 -
libavcodec/x86/fpel.h | 2 --
libavcodec/x86/h264_qpel.c | 32 +++++++++++++++++++++-----------
3 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm
index ecaca3c080..278d1410fc 100644
--- a/libavcodec/x86/fpel.asm
+++ b/libavcodec/x86/fpel.asm
@@ -89,7 +89,6 @@ cglobal %1_pixels%2, 4,5,4
%endmacro
INIT_MMX mmx
-OP_PIXELS put, 4
OP_PIXELS put, 8
OP_PIXELS put, 16
diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h
index c533ca40b2..47ffc8eec7 100644
--- a/libavcodec/x86/fpel.h
+++ b/libavcodec/x86/fpel.h
@@ -30,8 +30,6 @@ void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index 2df4c11f82..d69ccda89c 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -47,8 +47,8 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t
#define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
#define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
#define ff_put_pixels16_mmxext ff_put_pixels16_mmx
-#define ff_put_pixels8_mmxext ff_put_pixels8_mmx
-#define ff_put_pixels4_mmxext ff_put_pixels4_mmx
+#define ff_put_pixels8_mmxext(...)
+#define ff_put_pixels4_mmxext(...)
#define DEF_QPEL(OPNAME)\
void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
@@ -217,11 +217,10 @@ static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src,
{
ff_avg_pixels16_sse2(dst, src, stride, 16);
}
-#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext
#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext
#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
+static void av_unused OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
{\
ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
}\
@@ -424,16 +423,20 @@ LUMA_MC_816(10, mc33, sse2)
#endif /* HAVE_X86ASM */
-#define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX) \
+#define SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX) \
do { \
- c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
} while (0)
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
+#define SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX) \
do { \
- SET_QPEL_FUNCS0123(PFX, IDX, SIZE, CPU, PREFIX); \
+ c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
+ SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX); \
+ } while (0)
+#define SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX) \
+ do { \
+ SET_QPEL_FUNCS123(PFX, IDX, SIZE, CPU, PREFIX); \
c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
@@ -447,6 +450,11 @@ LUMA_MC_816(10, mc33, sse2)
c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
} while (0)
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
+ do { \
+ c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
+ SET_QPEL_FUNCS_1PP(PFX, IDX, SIZE, CPU, PREFIX); \
+ } while (0)
#define H264_QPEL_FUNCS(x, y, CPU) \
do { \
@@ -473,8 +481,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
if (EXTERNAL_MMXEXT(cpu_flags)) {
if (!high_bit_depth) {
SET_QPEL_FUNCS0123(put_h264_qpel, 0, 16, mmxext, );
- SET_QPEL_FUNCS0123(put_h264_qpel, 1, 8, mmxext, );
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, );
+ SET_QPEL_FUNCS123 (put_h264_qpel, 1, 8, mmxext, );
+ SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, );
SET_QPEL_FUNCS0123(avg_h264_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS0123(avg_h264_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, );
@@ -513,7 +521,9 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth)
if (EXTERNAL_SSE2_FAST(cpu_flags)) {
if (!high_bit_depth) {
- H264_QPEL_FUNCS(0, 0, sse2);
+ c->put_h264_qpel_pixels_tab[0][0] = put_h264_qpel16_mc00_sse2;
+ c->avg_h264_qpel_pixels_tab[0][0] = avg_h264_qpel16_mc00_sse2;
+ c->avg_h264_qpel_pixels_tab[1][0] = avg_h264_qpel8_mc00_sse2;
}
}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 5+ messages in thread