From 1f4940ce3e6ecba1c34c760bf17a73907749a071 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Thu, 3 Apr 2025 16:55:18 +0200 Subject: [PATCH 11/23] avcodec/x86/hevc/dsp_init: Make ff_hevc_put_uni_[qe]?pel* funcs static Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/hevc/dsp_init.c | 166 +++++++++++++++++---------------- libavcodec/x86/hevcdsp.h | 10 +- 2 files changed, 88 insertions(+), 88 deletions(-) diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c index b9b4837461..f2ca51b6b0 100644 --- a/libavcodec/x86/hevc/dsp_init.c +++ b/libavcodec/x86/hevc/dsp_init.c @@ -100,7 +100,7 @@ void ff_hevc_put_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, } #define FW_PUT_UNI(p, a, b, depth, opt) \ -void ff_hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \ +static void hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \ const uint8_t *src, ptrdiff_t srcstride, \ int height, intptr_t mx, intptr_t my, int width) \ { \ @@ -203,7 +203,7 @@ void ff_hevc_put_##name##W##_##bitd##_##opt(int16_t *_dst, } \ } #define mc_rep_uni_func(name, bitd, step, W, opt) \ -void ff_hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \ +static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \ const uint8_t *_src, ptrdiff_t _srcstride, int height, \ intptr_t mx, intptr_t my, int width) \ { \ @@ -212,7 +212,7 @@ void ff_hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststri for (i = 0; i < W; i += step) { \ const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ dst = _dst + (i * ((bitd + 7) / 8)); \ - ff_hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ + hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ height, mx, my, width); \ } \ } @@ -247,12 +247,12 @@ void ff_hevc_put_##name##W##_##bitd##_##opt(int16_t *dst, _srcstride, height, mx, my, width); \ } #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ -void ff_hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ +static void hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ const uint8_t *src, ptrdiff_t _srcstride, int height, \ intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \ - ff_hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ + hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \ + hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ src + (step1 * ((bitd + 7) / 8)), _srcstride, \ height, mx, my, width); \ } @@ -295,13 +295,13 @@ void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride } #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ -void ff_hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ +static void hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ const uint8_t *src, ptrdiff_t _srcstride, int height, \ intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ + hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ height, mx, my, width); \ - ff_hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ + hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ height, mx, my, width); \ } @@ -331,13 +331,13 @@ void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, } #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ -void ff_hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ +static void hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ const uint8_t *src, ptrdiff_t _srcstride, int height, \ intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ + hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ height, mx, my, width); \ - ff_hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ + hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ height, mx, my, width); \ } @@ -365,11 +365,11 @@ mc_rep_mixs_10(qpel_v , 24, 16, 8, avx2, sse4, 32) mc_rep_mixs_10(qpel_hv, 24, 16, 8, avx2, sse4, 32) +mc_rep_funcs(pel_pixels, 8, 32, 64, avx2) + mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit -mc_rep_funcs(pel_pixels, 8, 32, 64, avx2) - mc_rep_func(pel_pixels, 10, 16, 32, avx2) mc_rep_func(pel_pixels, 10, 16, 48, avx2) mc_rep_func(pel_pixels, 10, 32, 64, avx2) @@ -880,10 +880,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2; c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2; } +#if HAVE_AVX2_EXTERNAL if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; - if (ARCH_X86_64) { + +#if ARCH_X86_64 c->put_hevc_epel[7][0][0] = ff_hevc_put_pel_pixels32_8_avx2; c->put_hevc_epel[8][0][0] = ff_hevc_put_pel_pixels48_8_avx2; c->put_hevc_epel[9][0][0] = ff_hevc_put_pel_pixels64_8_avx2; @@ -892,13 +894,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel[8][0][0] = ff_hevc_put_pel_pixels48_8_avx2; c->put_hevc_qpel[9][0][0] = ff_hevc_put_pel_pixels64_8_avx2; - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_uni_pel_pixels32_8_avx2; - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_uni_pel_pixels48_8_avx2; - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_uni_pel_pixels64_8_avx2; + c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2; + c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2; + c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2; - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_uni_pel_pixels32_8_avx2; - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_uni_pel_pixels48_8_avx2; - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_uni_pel_pixels64_8_avx2; + c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels32_8_avx2; + c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels48_8_avx2; + c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels64_8_avx2; c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2; c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2; @@ -912,9 +914,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_epel[8][0][1] = ff_hevc_put_epel_h48_8_avx2; c->put_hevc_epel[9][0][1] = ff_hevc_put_epel_h64_8_avx2; - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_uni_epel_h32_8_avx2; - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_uni_epel_h48_8_avx2; - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_uni_epel_h64_8_avx2; + c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_8_avx2; + c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_8_avx2; + c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_8_avx2; c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_8_avx2; c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_8_avx2; @@ -924,9 +926,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_epel[8][1][0] = ff_hevc_put_epel_v48_8_avx2; c->put_hevc_epel[9][1][0] = ff_hevc_put_epel_v64_8_avx2; - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_uni_epel_v32_8_avx2; - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_uni_epel_v48_8_avx2; - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_uni_epel_v64_8_avx2; + c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_8_avx2; + c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_8_avx2; + c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_8_avx2; c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_8_avx2; c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_8_avx2; @@ -936,9 +938,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_epel[8][1][1] = ff_hevc_put_epel_hv48_8_avx2; c->put_hevc_epel[9][1][1] = ff_hevc_put_epel_hv64_8_avx2; - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_uni_epel_hv32_8_avx2; - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_uni_epel_hv48_8_avx2; - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_uni_epel_hv64_8_avx2; + c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_8_avx2; + c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_8_avx2; + c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_8_avx2; c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_8_avx2; c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_8_avx2; @@ -952,13 +954,13 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel[8][1][0] = ff_hevc_put_qpel_v48_8_avx2; c->put_hevc_qpel[9][1][0] = ff_hevc_put_qpel_v64_8_avx2; - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_uni_qpel_h32_8_avx2; - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_uni_qpel_h48_8_avx2; - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_uni_qpel_h64_8_avx2; + c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_8_avx2; + c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_8_avx2; + c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_8_avx2; - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_uni_qpel_v32_8_avx2; - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_uni_qpel_v48_8_avx2; - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_uni_qpel_v64_8_avx2; + c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_8_avx2; + c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_8_avx2; + c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_8_avx2; c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_8_avx2; c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_8_avx2; @@ -967,7 +969,8 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_8_avx2; c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_8_avx2; c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_8_avx2; - } +#endif /* ARCH_X86_64 */ + SAO_BAND_INIT(8, avx2); c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_8_avx2; @@ -976,6 +979,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->add_residual[3] = ff_hevc_add_residual_32_8_avx2; } +#endif /* HAVE_AVX2_EXTERNAL */ if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) { c->put_hevc_qpel[1][0][1] = ff_hevc_put_qpel_h4_8_avx512icl; c->put_hevc_qpel[3][0][1] = ff_hevc_put_qpel_h8_8_avx512icl; @@ -1049,10 +1053,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) if (EXTERNAL_AVX2(cpu_flags)) { c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2; } +#if HAVE_AVX2_EXTERNAL if (EXTERNAL_AVX2_FAST(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; - if (ARCH_X86_64) { + +#if ARCH_X86_64 c->put_hevc_epel[5][0][0] = ff_hevc_put_pel_pixels16_10_avx2; c->put_hevc_epel[6][0][0] = ff_hevc_put_pel_pixels24_10_avx2; c->put_hevc_epel[7][0][0] = ff_hevc_put_pel_pixels32_10_avx2; @@ -1065,17 +1071,17 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel[8][0][0] = ff_hevc_put_pel_pixels48_10_avx2; c->put_hevc_qpel[9][0][0] = ff_hevc_put_pel_pixels64_10_avx2; - c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_uni_pel_pixels32_8_avx2; - c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_uni_pel_pixels48_8_avx2; - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_uni_pel_pixels64_8_avx2; - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_uni_pel_pixels96_8_avx2; - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_uni_pel_pixels128_8_avx2; + c->put_hevc_epel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2; + c->put_hevc_epel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2; + c->put_hevc_epel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2; + c->put_hevc_epel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2; + c->put_hevc_epel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2; - c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_uni_pel_pixels32_8_avx2; - c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_uni_pel_pixels48_8_avx2; - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_uni_pel_pixels64_8_avx2; - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_uni_pel_pixels96_8_avx2; - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_uni_pel_pixels128_8_avx2; + c->put_hevc_qpel_uni[5][0][0] = hevc_put_uni_pel_pixels32_8_avx2; + c->put_hevc_qpel_uni[6][0][0] = hevc_put_uni_pel_pixels48_8_avx2; + c->put_hevc_qpel_uni[7][0][0] = hevc_put_uni_pel_pixels64_8_avx2; + c->put_hevc_qpel_uni[8][0][0] = hevc_put_uni_pel_pixels96_8_avx2; + c->put_hevc_qpel_uni[9][0][0] = hevc_put_uni_pel_pixels128_8_avx2; c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2; c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2; @@ -1094,11 +1100,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_epel[8][0][1] = ff_hevc_put_epel_h48_10_avx2; c->put_hevc_epel[9][0][1] = ff_hevc_put_epel_h64_10_avx2; - c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_uni_epel_h16_10_avx2; - c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_uni_epel_h24_10_avx2; - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_uni_epel_h32_10_avx2; - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_uni_epel_h48_10_avx2; - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_uni_epel_h64_10_avx2; + c->put_hevc_epel_uni[5][0][1] = hevc_put_uni_epel_h16_10_avx2; + c->put_hevc_epel_uni[6][0][1] = hevc_put_uni_epel_h24_10_avx2; + c->put_hevc_epel_uni[7][0][1] = hevc_put_uni_epel_h32_10_avx2; + c->put_hevc_epel_uni[8][0][1] = hevc_put_uni_epel_h48_10_avx2; + c->put_hevc_epel_uni[9][0][1] = hevc_put_uni_epel_h64_10_avx2; c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_bi_epel_h16_10_avx2; c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_bi_epel_h24_10_avx2; @@ -1112,11 +1118,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_epel[8][1][0] = ff_hevc_put_epel_v48_10_avx2; c->put_hevc_epel[9][1][0] = ff_hevc_put_epel_v64_10_avx2; - c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_uni_epel_v16_10_avx2; - c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_uni_epel_v24_10_avx2; - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_uni_epel_v32_10_avx2; - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_uni_epel_v48_10_avx2; - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_uni_epel_v64_10_avx2; + c->put_hevc_epel_uni[5][1][0] = hevc_put_uni_epel_v16_10_avx2; + c->put_hevc_epel_uni[6][1][0] = hevc_put_uni_epel_v24_10_avx2; + c->put_hevc_epel_uni[7][1][0] = hevc_put_uni_epel_v32_10_avx2; + c->put_hevc_epel_uni[8][1][0] = hevc_put_uni_epel_v48_10_avx2; + c->put_hevc_epel_uni[9][1][0] = hevc_put_uni_epel_v64_10_avx2; c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_bi_epel_v16_10_avx2; c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_bi_epel_v24_10_avx2; @@ -1130,11 +1136,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_epel[8][1][1] = ff_hevc_put_epel_hv48_10_avx2; c->put_hevc_epel[9][1][1] = ff_hevc_put_epel_hv64_10_avx2; - c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_uni_epel_hv16_10_avx2; - c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_uni_epel_hv24_10_avx2; - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_uni_epel_hv32_10_avx2; - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_uni_epel_hv48_10_avx2; - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_uni_epel_hv64_10_avx2; + c->put_hevc_epel_uni[5][1][1] = hevc_put_uni_epel_hv16_10_avx2; + c->put_hevc_epel_uni[6][1][1] = hevc_put_uni_epel_hv24_10_avx2; + c->put_hevc_epel_uni[7][1][1] = hevc_put_uni_epel_hv32_10_avx2; + c->put_hevc_epel_uni[8][1][1] = hevc_put_uni_epel_hv48_10_avx2; + c->put_hevc_epel_uni[9][1][1] = hevc_put_uni_epel_hv64_10_avx2; c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_bi_epel_hv16_10_avx2; c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_bi_epel_hv24_10_avx2; @@ -1148,11 +1154,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel[8][0][1] = ff_hevc_put_qpel_h48_10_avx2; c->put_hevc_qpel[9][0][1] = ff_hevc_put_qpel_h64_10_avx2; - c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_uni_qpel_h16_10_avx2; - c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_uni_qpel_h24_10_avx2; - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_uni_qpel_h32_10_avx2; - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_uni_qpel_h48_10_avx2; - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_uni_qpel_h64_10_avx2; + c->put_hevc_qpel_uni[5][0][1] = hevc_put_uni_qpel_h16_10_avx2; + c->put_hevc_qpel_uni[6][0][1] = hevc_put_uni_qpel_h24_10_avx2; + c->put_hevc_qpel_uni[7][0][1] = hevc_put_uni_qpel_h32_10_avx2; + c->put_hevc_qpel_uni[8][0][1] = hevc_put_uni_qpel_h48_10_avx2; + c->put_hevc_qpel_uni[9][0][1] = hevc_put_uni_qpel_h64_10_avx2; c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_bi_qpel_h16_10_avx2; c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_bi_qpel_h24_10_avx2; @@ -1166,11 +1172,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel[8][1][0] = ff_hevc_put_qpel_v48_10_avx2; c->put_hevc_qpel[9][1][0] = ff_hevc_put_qpel_v64_10_avx2; - c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_uni_qpel_v16_10_avx2; - c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_uni_qpel_v24_10_avx2; - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_uni_qpel_v32_10_avx2; - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_uni_qpel_v48_10_avx2; - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_uni_qpel_v64_10_avx2; + c->put_hevc_qpel_uni[5][1][0] = hevc_put_uni_qpel_v16_10_avx2; + c->put_hevc_qpel_uni[6][1][0] = hevc_put_uni_qpel_v24_10_avx2; + c->put_hevc_qpel_uni[7][1][0] = hevc_put_uni_qpel_v32_10_avx2; + c->put_hevc_qpel_uni[8][1][0] = hevc_put_uni_qpel_v48_10_avx2; + c->put_hevc_qpel_uni[9][1][0] = hevc_put_uni_qpel_v64_10_avx2; c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_bi_qpel_v16_10_avx2; c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_bi_qpel_v24_10_avx2; @@ -1184,24 +1190,26 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel[8][1][1] = ff_hevc_put_qpel_hv48_10_avx2; c->put_hevc_qpel[9][1][1] = ff_hevc_put_qpel_hv64_10_avx2; - c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_uni_qpel_hv16_10_avx2; - c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_uni_qpel_hv24_10_avx2; - c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_uni_qpel_hv32_10_avx2; - c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_uni_qpel_hv48_10_avx2; - c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_uni_qpel_hv64_10_avx2; + c->put_hevc_qpel_uni[5][1][1] = hevc_put_uni_qpel_hv16_10_avx2; + c->put_hevc_qpel_uni[6][1][1] = hevc_put_uni_qpel_hv24_10_avx2; + c->put_hevc_qpel_uni[7][1][1] = hevc_put_uni_qpel_hv32_10_avx2; + c->put_hevc_qpel_uni[8][1][1] = hevc_put_uni_qpel_hv48_10_avx2; + c->put_hevc_qpel_uni[9][1][1] = hevc_put_uni_qpel_hv64_10_avx2; c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_bi_qpel_hv16_10_avx2; c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_bi_qpel_hv24_10_avx2; c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_bi_qpel_hv32_10_avx2; c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_bi_qpel_hv48_10_avx2; c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_bi_qpel_hv64_10_avx2; - } +#endif /* ARCH_X86_64 */ + SAO_BAND_INIT(10, avx2); SAO_EDGE_INIT(10, avx2); c->add_residual[2] = ff_hevc_add_residual_16_10_avx2; c->add_residual[3] = ff_hevc_add_residual_32_10_avx2; } +#endif /* HAVE_AVX2_EXTERNAL */ } else if (bit_depth == 12) { if (EXTERNAL_MMXEXT(cpu_flags)) { c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_mmxext; diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h index 2627585bda..4bf74ef99e 100644 --- a/libavcodec/x86/hevcdsp.h +++ b/libavcodec/x86/hevcdsp.h @@ -32,7 +32,7 @@ #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \ dst[idx1][idx2][idx3] = ff_hevc_put_ ## name ## _ ## D ## _##opt; \ dst ## _bi[idx1][idx2][idx3] = ff_hevc_put_bi_ ## name ## _ ## D ## _##opt; \ -dst ## _uni[idx1][idx2][idx3] = ff_hevc_put_uni_ ## name ## _ ## D ## _##opt; \ +dst ## _uni[idx1][idx2][idx3] = hevc_put_uni_ ## name ## _ ## D ## _##opt; \ dst ## _uni_w[idx1][idx2][idx3] = hevc_put_uni_w_ ## name ## _ ## D ## _##opt; \ dst ## _bi_w[idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt @@ -40,7 +40,6 @@ dst ## _bi_w[idx1][idx2][idx3] = hevc_put_bi_w_ ## name ## _ ## D ## _##opt #define PEL_PROTOTYPE(name, D, opt) \ void ff_hevc_put_ ## name ## _ ## D ## _##opt(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); \ void ff_hevc_put_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); \ -void ff_hevc_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); \ /////////////////////////////////////////////////////////////////////////////// @@ -105,13 +104,6 @@ void ff_hevc_put_pel_pixels64_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff -void ff_hevc_put_uni_pel_pixels32_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_uni_pel_pixels48_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_uni_pel_pixels64_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_uni_pel_pixels96_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); //used for 10bit -void ff_hevc_put_uni_pel_pixels128_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);//used for 10bit - - void ff_hevc_put_bi_pel_pixels16_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); void ff_hevc_put_bi_pel_pixels24_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); void ff_hevc_put_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -- 2.45.2