From e9f30cea6185b9f9def4f5110d0067751acdf8d0 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Thu, 3 Apr 2025 18:44:47 +0200 Subject: [PATCH 13/23] avcodec/x86/hevc/dsp_init: Make ff_hevc_put_bi_[eq]?pel* funcs static Given that there are actually ASM functions of this type, one can't simply remove the ff_ prefix from the definitions and declare them as static. Yet one can do nearly that if one keeps the ff_ prefix and removes the declarations for the (now static) functions defined in dsp_init.c from hevcdsp.h and if one defines the functions in the correct order (smaller width first) so that no forward declarations are necessary (which was already true). The new declarations avoid nested macros to simplify things. It nevertheless turned out to be beneficial line-wise. (It would be possible to avoid most of these declarations: It is legal to repeat a function declaration without static if the first declaration declared a function as static. So if the macros simply declared all the functions that they call, one could avoid declarations for the functions that are called. While this is legal C, it unfortuntaly clashes with GCC's -Wredundant-decls (which configure enables) and it is also ugly, as these macro definitions would provide declarations used in ff_hevc_dsp_init_x86().) Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/hevc/dsp_init.c | 8 +- libavcodec/x86/hevcdsp.h | 224 ++++++++++++++------------------- 2 files changed, 95 insertions(+), 137 deletions(-) diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c index f8057dee9f..dc4157b071 100644 --- a/libavcodec/x86/hevc/dsp_init.c +++ b/libavcodec/x86/hevc/dsp_init.c @@ -217,7 +217,7 @@ static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dst } \ } #define mc_rep_bi_func(name, bitd, step, W, opt) \ -void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ +static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ ptrdiff_t _srcstride, const int16_t *_src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ @@ -257,7 +257,7 @@ static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dsts height, mx, my, width); \ } #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \ -void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ +static void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ ptrdiff_t _srcstride, const int16_t *src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ @@ -284,7 +284,7 @@ static void hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src } #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ -void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ +static void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ ptrdiff_t _srcstride, const int16_t *src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ @@ -320,7 +320,7 @@ static void hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, } #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ -void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ +static void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ ptrdiff_t _srcstride, const int16_t *src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h index b89a67f2e9..b18d9449d7 100644 --- a/libavcodec/x86/hevcdsp.h +++ b/libavcodec/x86/hevcdsp.h @@ -37,35 +37,17 @@ dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; dst ## _bi_w[idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt -#define PEL_PROTOTYPE(name, D, opt) \ -void ff_hevc_put_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); \ +typedef void bi_pel_func(uint8_t *_dst, ptrdiff_t _dststride, + const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, + int height, intptr_t mx, intptr_t my, int width); +#define BI_PEL_PROTOTYPE(name, W, D, opt) \ +bi_pel_func ff_hevc_put_bi_ ## name ## W ## _ ## D ## _##opt /////////////////////////////////////////////////////////////////////////////// // MC functions /////////////////////////////////////////////////////////////////////////////// -#define EPEL_PROTOTYPES(fname, bitd, opt) \ - PEL_PROTOTYPE(fname##4, bitd, opt); \ - PEL_PROTOTYPE(fname##6, bitd, opt); \ - PEL_PROTOTYPE(fname##8, bitd, opt); \ - PEL_PROTOTYPE(fname##12, bitd, opt); \ - PEL_PROTOTYPE(fname##16, bitd, opt); \ - PEL_PROTOTYPE(fname##24, bitd, opt); \ - PEL_PROTOTYPE(fname##32, bitd, opt); \ - PEL_PROTOTYPE(fname##48, bitd, opt); \ - PEL_PROTOTYPE(fname##64, bitd, opt) - -#define QPEL_PROTOTYPES(fname, bitd, opt) \ - PEL_PROTOTYPE(fname##4, bitd, opt); \ - PEL_PROTOTYPE(fname##8, bitd, opt); \ - PEL_PROTOTYPE(fname##12, bitd, opt); \ - PEL_PROTOTYPE(fname##16, bitd, opt); \ - PEL_PROTOTYPE(fname##24, bitd, opt); \ - PEL_PROTOTYPE(fname##32, bitd, opt); \ - PEL_PROTOTYPE(fname##48, bitd, opt); \ - PEL_PROTOTYPE(fname##64, bitd, opt) - #define WEIGHTING_PROTOTYPE(width, bitd, opt) \ void ff_hevc_put_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, int height, int denom, int _wx, int _ox); \ void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, const int16_t *_src2, int height, int denom, int _wx0, int _wx1, int _ox0, int _ox1) @@ -83,125 +65,101 @@ void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, /////////////////////////////////////////////////////////////////////////////// -// QPEL_PIXELS EPEL_PIXELS +// EPEL_PIXELS /////////////////////////////////////////////////////////////////////////////// -EPEL_PROTOTYPES(pel_pixels , 8, sse4); -EPEL_PROTOTYPES(pel_pixels , 10, sse4); -EPEL_PROTOTYPES(pel_pixels , 12, sse4); - -void ff_hevc_put_bi_pel_pixels16_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels24_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); - -void ff_hevc_put_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); + +BI_PEL_PROTOTYPE(pel_pixels, 4, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 4, 10, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 4, 12, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 6, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 6, 10, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 6, 12, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 8, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 8, 10, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 8, 12, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 12, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 16, 8, sse4); +BI_PEL_PROTOTYPE(pel_pixels, 16, 10, avx2); +BI_PEL_PROTOTYPE(pel_pixels, 32, 8, avx2); /////////////////////////////////////////////////////////////////////////////// // EPEL /////////////////////////////////////////////////////////////////////////////// -EPEL_PROTOTYPES(epel_h , 8, sse4); -EPEL_PROTOTYPES(epel_h , 10, sse4); -EPEL_PROTOTYPES(epel_h , 12, sse4); - -EPEL_PROTOTYPES(epel_v , 8, sse4); -EPEL_PROTOTYPES(epel_v , 10, sse4); -EPEL_PROTOTYPES(epel_v , 12, sse4); - -EPEL_PROTOTYPES(epel_hv , 8, sse4); -EPEL_PROTOTYPES(epel_hv , 10, sse4); -EPEL_PROTOTYPES(epel_hv , 12, sse4); - -PEL_PROTOTYPE(epel_h16, 8, avx2); -PEL_PROTOTYPE(epel_h24, 8, avx2); -PEL_PROTOTYPE(epel_h32, 8, avx2); -PEL_PROTOTYPE(epel_h48, 8, avx2); -PEL_PROTOTYPE(epel_h64, 8, avx2); - -PEL_PROTOTYPE(epel_h16,10, avx2); -PEL_PROTOTYPE(epel_h24,10, avx2); -PEL_PROTOTYPE(epel_h32,10, avx2); -PEL_PROTOTYPE(epel_h48,10, avx2); -PEL_PROTOTYPE(epel_h64,10, avx2); - -PEL_PROTOTYPE(epel_v16, 8, avx2); -PEL_PROTOTYPE(epel_v24, 8, avx2); -PEL_PROTOTYPE(epel_v32, 8, avx2); -PEL_PROTOTYPE(epel_v48, 8, avx2); -PEL_PROTOTYPE(epel_v64, 8, avx2); - -PEL_PROTOTYPE(epel_v16,10, avx2); -PEL_PROTOTYPE(epel_v24,10, avx2); -PEL_PROTOTYPE(epel_v32,10, avx2); -PEL_PROTOTYPE(epel_v48,10, avx2); -PEL_PROTOTYPE(epel_v64,10, avx2); - -PEL_PROTOTYPE(epel_hv16, 8, avx2); -PEL_PROTOTYPE(epel_hv24, 8, avx2); -PEL_PROTOTYPE(epel_hv32, 8, avx2); -PEL_PROTOTYPE(epel_hv48, 8, avx2); -PEL_PROTOTYPE(epel_hv64, 8, avx2); - -PEL_PROTOTYPE(epel_hv16,10, avx2); -PEL_PROTOTYPE(epel_hv24,10, avx2); -PEL_PROTOTYPE(epel_hv32,10, avx2); -PEL_PROTOTYPE(epel_hv48,10, avx2); -PEL_PROTOTYPE(epel_hv64,10, avx2); + +BI_PEL_PROTOTYPE(epel_h, 4, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 4, 10, sse4); +BI_PEL_PROTOTYPE(epel_h, 4, 12, sse4); +BI_PEL_PROTOTYPE(epel_h, 6, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 6, 10, sse4); +BI_PEL_PROTOTYPE(epel_h, 6, 12, sse4); +BI_PEL_PROTOTYPE(epel_h, 8, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 8, 10, sse4); +BI_PEL_PROTOTYPE(epel_h, 8, 12, sse4); +BI_PEL_PROTOTYPE(epel_h, 12, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 16, 8, sse4); +BI_PEL_PROTOTYPE(epel_h, 16, 10, avx2); +BI_PEL_PROTOTYPE(epel_h, 32, 8, avx2); + +BI_PEL_PROTOTYPE(epel_hv, 4, 8, sse4); +BI_PEL_PROTOTYPE(epel_hv, 4, 10, sse4); +BI_PEL_PROTOTYPE(epel_hv, 4, 12, sse4); +BI_PEL_PROTOTYPE(epel_hv, 6, 8, sse4); +BI_PEL_PROTOTYPE(epel_hv, 6, 10, sse4); +BI_PEL_PROTOTYPE(epel_hv, 6, 12, sse4); +BI_PEL_PROTOTYPE(epel_hv, 8, 8, sse4); +BI_PEL_PROTOTYPE(epel_hv, 8, 10, sse4); +BI_PEL_PROTOTYPE(epel_hv, 8, 12, sse4); +BI_PEL_PROTOTYPE(epel_hv, 16, 8, sse4); +BI_PEL_PROTOTYPE(epel_hv, 16, 10, avx2); +BI_PEL_PROTOTYPE(epel_hv, 32, 8, avx2); + +BI_PEL_PROTOTYPE(epel_v, 4, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 4, 10, sse4); +BI_PEL_PROTOTYPE(epel_v, 4, 12, sse4); +BI_PEL_PROTOTYPE(epel_v, 6, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 6, 10, sse4); +BI_PEL_PROTOTYPE(epel_v, 6, 12, sse4); +BI_PEL_PROTOTYPE(epel_v, 8, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 8, 10, sse4); +BI_PEL_PROTOTYPE(epel_v, 8, 12, sse4); +BI_PEL_PROTOTYPE(epel_v, 12, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 16, 8, sse4); +BI_PEL_PROTOTYPE(epel_v, 16, 10, avx2); +BI_PEL_PROTOTYPE(epel_v, 32, 8, avx2); /////////////////////////////////////////////////////////////////////////////// // QPEL /////////////////////////////////////////////////////////////////////////////// -QPEL_PROTOTYPES(qpel_h , 8, sse4); -QPEL_PROTOTYPES(qpel_h , 10, sse4); -QPEL_PROTOTYPES(qpel_h , 12, sse4); - -QPEL_PROTOTYPES(qpel_v, 8, sse4); -QPEL_PROTOTYPES(qpel_v, 10, sse4); -QPEL_PROTOTYPES(qpel_v, 12, sse4); - -QPEL_PROTOTYPES(qpel_hv, 8, sse4); -QPEL_PROTOTYPES(qpel_hv, 10, sse4); -QPEL_PROTOTYPES(qpel_hv, 12, sse4); - -PEL_PROTOTYPE(qpel_h16, 8, avx2); -PEL_PROTOTYPE(qpel_h24, 8, avx2); -PEL_PROTOTYPE(qpel_h32, 8, avx2); -PEL_PROTOTYPE(qpel_h48, 8, avx2); -PEL_PROTOTYPE(qpel_h64, 8, avx2); - -PEL_PROTOTYPE(qpel_h16,10, avx2); -PEL_PROTOTYPE(qpel_h24,10, avx2); -PEL_PROTOTYPE(qpel_h32,10, avx2); -PEL_PROTOTYPE(qpel_h48,10, avx2); -PEL_PROTOTYPE(qpel_h64,10, avx2); - -PEL_PROTOTYPE(qpel_v16, 8, avx2); -PEL_PROTOTYPE(qpel_v24, 8, avx2); -PEL_PROTOTYPE(qpel_v32, 8, avx2); -PEL_PROTOTYPE(qpel_v48, 8, avx2); -PEL_PROTOTYPE(qpel_v64, 8, avx2); - -PEL_PROTOTYPE(qpel_v16,10, avx2); -PEL_PROTOTYPE(qpel_v24,10, avx2); -PEL_PROTOTYPE(qpel_v32,10, avx2); -PEL_PROTOTYPE(qpel_v48,10, avx2); -PEL_PROTOTYPE(qpel_v64,10, avx2); - -PEL_PROTOTYPE(qpel_hv16, 8, avx2); -PEL_PROTOTYPE(qpel_hv24, 8, avx2); -PEL_PROTOTYPE(qpel_hv32, 8, avx2); -PEL_PROTOTYPE(qpel_hv48, 8, avx2); -PEL_PROTOTYPE(qpel_hv64, 8, avx2); - -PEL_PROTOTYPE(qpel_hv16,10, avx2); -PEL_PROTOTYPE(qpel_hv24,10, avx2); -PEL_PROTOTYPE(qpel_hv32,10, avx2); -PEL_PROTOTYPE(qpel_hv48,10, avx2); -PEL_PROTOTYPE(qpel_hv64,10, avx2); + +BI_PEL_PROTOTYPE(qpel_h, 4, 8, sse4); +BI_PEL_PROTOTYPE(qpel_h, 4, 10, sse4); +BI_PEL_PROTOTYPE(qpel_h, 4, 12, sse4); +BI_PEL_PROTOTYPE(qpel_h, 8, 8, sse4); +BI_PEL_PROTOTYPE(qpel_h, 8, 10, sse4); +BI_PEL_PROTOTYPE(qpel_h, 8, 12, sse4); +BI_PEL_PROTOTYPE(qpel_h, 12, 8, sse4); +BI_PEL_PROTOTYPE(qpel_h, 16, 8, sse4); +BI_PEL_PROTOTYPE(qpel_h, 16, 10, avx2); +BI_PEL_PROTOTYPE(qpel_h, 32, 8, avx2); + +BI_PEL_PROTOTYPE(qpel_hv, 4, 8, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 4, 10, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 4, 12, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 8, 8, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 8, 10, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 8, 12, sse4); +BI_PEL_PROTOTYPE(qpel_hv, 16, 10, avx2); + +BI_PEL_PROTOTYPE(qpel_v, 4, 8, sse4); +BI_PEL_PROTOTYPE(qpel_v, 4, 10, sse4); +BI_PEL_PROTOTYPE(qpel_v, 4, 12, sse4); +BI_PEL_PROTOTYPE(qpel_v, 8, 8, sse4); +BI_PEL_PROTOTYPE(qpel_v, 8, 10, sse4); +BI_PEL_PROTOTYPE(qpel_v, 8, 12, sse4); +BI_PEL_PROTOTYPE(qpel_v, 12, 8, sse4); +BI_PEL_PROTOTYPE(qpel_v, 16, 8, sse4); +BI_PEL_PROTOTYPE(qpel_v, 16, 10, avx2); +BI_PEL_PROTOTYPE(qpel_v, 32, 8, avx2); WEIGHTING_PROTOTYPES(8, sse4); WEIGHTING_PROTOTYPES(10, sse4); -- 2.45.2