From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> To: ffmpeg-devel@ffmpeg.org Subject: Re: [FFmpeg-devel] [PATCH] avcodec/x86/hevc/dsp_init: Rename, ff_hevc_put_hevc->ff_hevc_put Date: Wed, 12 Mar 2025 04:52:13 +0100 Message-ID: <GV1P250MB0737A5A3C9610524F6B2BA5E8FD02@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> (raw) In-Reply-To: <GV1P250MB0737C57ED55B47AC44C042798FD02@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> [-- Attachment #1: Type: text/plain, Size: 88 bytes --] Andreas Rheinhardt: > Patch attached. > > - Andreas > Now truely attached. - Andreas [-- Attachment #2: 0001-avcodec-x86-hevc-dsp_init-Rename-ff_hevc_put_hevc-ff.patch --] [-- Type: text/x-patch, Size: 84395 bytes --] From 778df2069a87d32763f052908a6405c96ac9f30e Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Date: Thu, 23 Jun 2022 03:19:49 +0200 Subject: [PATCH] avcodec/x86/hevc/dsp_init: Rename ff_hevc_put_hevc->ff_hevc_put Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- libavcodec/x86/hevc/dsp_init.c | 614 ++++++++++++++++----------------- libavcodec/x86/hevc/mc.asm | 24 +- libavcodec/x86/hevcdsp.h | 86 ++--- 3 files changed, 362 insertions(+), 362 deletions(-) diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c index 2c0fca303e..ef0e707747 100644 --- a/libavcodec/x86/hevc/dsp_init.c +++ b/libavcodec/x86/hevc/dsp_init.c @@ -92,15 +92,15 @@ IDCT_FUNCS(avx) const uint8_t *vf = ff_hevc_ ## f ## _filters[my]; #define FW_PUT(p, a, b, depth, opt) \ -void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \ - int height, intptr_t mx, intptr_t my,int width) \ +void ff_hevc_put_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, \ + int height, intptr_t mx, intptr_t my,int width) \ { \ DECL_HV_FILTER(p) \ ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \ } #define FW_PUT_UNI(p, a, b, depth, opt) \ -void ff_hevc_put_hevc_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \ +void ff_hevc_put_uni_ ## a ## _ ## depth ## _##opt(uint8_t *dst, ptrdiff_t dststride, \ const uint8_t *src, ptrdiff_t srcstride, \ int height, intptr_t mx, intptr_t my, int width) \ { \ @@ -190,36 +190,36 @@ FW_QPEL_HV(16, 10, avx2) #endif #define mc_rep_func(name, bitd, step, W, opt) \ -void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, \ - const uint8_t *_src, ptrdiff_t _srcstride, int height, \ - intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_##name##W##_##bitd##_##opt(int16_t *_dst, \ + const uint8_t *_src, ptrdiff_t _srcstride, int height, \ + intptr_t mx, intptr_t my, int width) \ { \ int i; \ int16_t *dst; \ for (i = 0; i < W; i += step) { \ const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ dst = _dst + i; \ - ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ + ff_hevc_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ } \ } #define mc_rep_uni_func(name, bitd, step, W, opt) \ -void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \ - const uint8_t *_src, ptrdiff_t _srcstride, int height, \ - intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, \ + const uint8_t *_src, ptrdiff_t _srcstride, int height, \ + intptr_t mx, intptr_t my, int width) \ { \ int i; \ uint8_t *dst; \ for (i = 0; i < W; i += step) { \ const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ dst = _dst + (i * ((bitd + 7) / 8)); \ - ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ - height, mx, my, width); \ + ff_hevc_put_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, \ + height, mx, my, width); \ } \ } #define mc_rep_bi_func(name, bitd, step, W, opt) \ -void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ - ptrdiff_t _srcstride, const int16_t *_src2, \ - int height, intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const uint8_t *_src, \ + ptrdiff_t _srcstride, const int16_t *_src2, \ + int height, intptr_t mx, intptr_t my, int width) \ { \ int i; \ uint8_t *dst; \ @@ -227,8 +227,8 @@ void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dst const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ const int16_t *src2 = _src2 + i; \ dst = _dst + (i * ((bitd + 7) / 8)); \ - ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \ - height, mx, my, width); \ + ff_hevc_put_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, \ + height, mx, my, width); \ } \ } @@ -238,33 +238,33 @@ void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dst mc_rep_bi_func(name, bitd, step, W, opt) #define mc_rep_func2(name, bitd, step1, step2, W, opt) \ -void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *dst, \ - const uint8_t *src, ptrdiff_t _srcstride, int height, \ - intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_##name##W##_##bitd##_##opt(int16_t *dst, \ + const uint8_t *src, ptrdiff_t _srcstride, int height, \ + intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_hevc_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ - ff_hevc_put_hevc_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \ - _srcstride, height, mx, my, width); \ + ff_hevc_put_##name##step1##_##bitd##_##opt(dst, src, _srcstride, height, mx, my, width); \ + ff_hevc_put_##name##step2##_##bitd##_##opt(dst + step1, src + (step1 * ((bitd + 7) / 8)), \ + _srcstride, height, mx, my, width); \ } #define mc_rep_uni_func2(name, bitd, step1, step2, W, opt) \ -void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ - const uint8_t *src, ptrdiff_t _srcstride, int height, \ - intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_uni_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, \ + const uint8_t *src, ptrdiff_t _srcstride, int height, \ + intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_hevc_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width);\ - ff_hevc_put_hevc_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ - src + (step1 * ((bitd + 7) / 8)), _srcstride, \ - height, mx, my, width); \ + ff_hevc_put_uni_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \ + ff_hevc_put_uni_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ + src + (step1 * ((bitd + 7) / 8)), _srcstride, \ + height, mx, my, width); \ } #define mc_rep_bi_func2(name, bitd, step1, step2, W, opt) \ -void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ +void ff_hevc_put_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ ptrdiff_t _srcstride, const int16_t *src2, \ int height, intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_hevc_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\ - ff_hevc_put_hevc_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ - src + (step1 * ((bitd + 7) / 8)), _srcstride, \ - src2 + step1, height, mx, my, width); \ + ff_hevc_put_bi_##name##step1##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, height, mx, my, width);\ + ff_hevc_put_bi_##name##step2##_##bitd##_##opt(dst + (step1 * ((bitd + 7) / 8)), dststride, \ + src + (step1 * ((bitd + 7) / 8)), _srcstride, \ + src2 + step1, height, mx, my, width); \ } #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \ @@ -275,34 +275,34 @@ void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dsts #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL #define mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ -void ff_hevc_put_hevc_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ - int height, intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_##name##width1##_10_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ + int height, intptr_t mx, intptr_t my, int width) \ \ { \ - ff_hevc_put_hevc_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \ - ff_hevc_put_hevc_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \ + ff_hevc_put_##name##width2##_10_##opt1(dst, src, _srcstride, height, mx, my, width); \ + ff_hevc_put_##name##width3##_10_##opt2(dst+ width2, src+ width4, _srcstride, height, mx, my, width); \ } #define mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ -void ff_hevc_put_hevc_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ - ptrdiff_t _srcstride, const int16_t *src2, \ - int height, intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_bi_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ + ptrdiff_t _srcstride, const int16_t *src2, \ + int height, intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_hevc_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \ - height, mx, my, width); \ - ff_hevc_put_hevc_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2,\ - height, mx, my, width); \ + ff_hevc_put_bi_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, src2, \ + height, mx, my, width); \ + ff_hevc_put_bi_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, src2+width2, \ + height, mx, my, width); \ } #define mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ -void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ - const uint8_t *src, ptrdiff_t _srcstride, int height, \ - intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dststride, \ + const uint8_t *src, ptrdiff_t _srcstride, int height, \ + intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_hevc_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ - height, mx, my, width); \ - ff_hevc_put_hevc_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ - height, mx, my, width); \ + ff_hevc_put_uni_##name##width2##_10_##opt1(dst, dststride, src, _srcstride, \ + height, mx, my, width); \ + ff_hevc_put_uni_##name##width3##_10_##opt2(dst+width4, dststride, src+width4, _srcstride, \ + height, mx, my, width); \ } #define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4) \ @@ -311,34 +311,34 @@ mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) \ mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4) #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ -void ff_hevc_put_hevc_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ - int height, intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_##name##width1##_8_##opt1(int16_t *dst, const uint8_t *src, ptrdiff_t _srcstride, \ + int height, intptr_t mx, intptr_t my, int width) \ \ { \ - ff_hevc_put_hevc_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \ - ff_hevc_put_hevc_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \ + ff_hevc_put_##name##width2##_8_##opt1(dst, src, _srcstride, height, mx, my, width); \ + ff_hevc_put_##name##width3##_8_##opt2(dst+ width2, src+ width2, _srcstride, height, mx, my, width); \ } #define mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ -void ff_hevc_put_hevc_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ - ptrdiff_t _srcstride, const int16_t *src2, \ - int height, intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_bi_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ + ptrdiff_t _srcstride, const int16_t *src2, \ + int height, intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_hevc_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ - src2, height, mx, my, width); \ - ff_hevc_put_hevc_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ - src2+width2, height, mx, my, width); \ + ff_hevc_put_bi_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ + src2, height, mx, my, width); \ + ff_hevc_put_bi_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ + src2+width2, height, mx, my, width); \ } #define mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2) \ -void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ - const uint8_t *src, ptrdiff_t _srcstride, int height, \ - intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dststride, \ + const uint8_t *src, ptrdiff_t _srcstride, int height, \ + intptr_t mx, intptr_t my, int width) \ { \ - ff_hevc_put_hevc_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ - height, mx, my, width); \ - ff_hevc_put_hevc_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ - height, mx, my, width); \ + ff_hevc_put_uni_##name##width2##_8_##opt1(dst, dststride, src, _srcstride, \ + height, mx, my, width); \ + ff_hevc_put_uni_##name##width3##_8_##opt2(dst+width2, dststride, src+width2, _srcstride, \ + height, mx, my, width); \ } #define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2) \ @@ -536,16 +536,16 @@ mc_rep_funcs(qpel_hv,12, 8, 16, sse4) mc_rep_funcs(qpel_hv,12, 4, 12, sse4) #define mc_rep_uni_w(bitd, step, W, opt) \ -void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ - int height, int denom, int _wx, int _ox) \ +void ff_hevc_put_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ + int height, int denom, int _wx, int _ox) \ { \ int i; \ uint8_t *dst; \ for (i = 0; i < W; i += step) { \ const int16_t *src = _src + i; \ dst= _dst + (i * ((bitd + 7) / 8)); \ - ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, \ - height, denom, _wx, _ox); \ + ff_hevc_put_uni_w##step##_##bitd##_##opt(dst, dststride, src, \ + height, denom, _wx, _ox); \ } \ } @@ -571,9 +571,9 @@ mc_rep_uni_w(12, 8, 48, sse4) mc_rep_uni_w(12, 8, 64, sse4) #define mc_rep_bi_w(bitd, step, W, opt) \ -void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ - const int16_t *_src2, int height, \ - int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ +void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ + const int16_t *_src2, int height, \ + int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ { \ int i; \ uint8_t *dst; \ @@ -581,8 +581,8 @@ void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststrid const int16_t *src = _src + i; \ const int16_t *src2 = _src2 + i; \ dst = _dst + (i * ((bitd + 7) / 8)); \ - ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \ - height, denom, _wx0, _wx1, _ox0, _ox1); \ + ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \ + height, denom, _wx0, _wx1, _ox0, _ox1); \ } \ } @@ -608,15 +608,15 @@ mc_rep_bi_w(12, 8, 48, sse4) mc_rep_bi_w(12, 8, 64, sse4) #define mc_uni_w_func(name, bitd, W, opt) \ -void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ - const uint8_t *_src, ptrdiff_t _srcstride, \ - int height, int denom, \ - int _wx, int _ox, \ - intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ + const uint8_t *_src, ptrdiff_t _srcstride, \ + int height, int denom, \ + int _wx, int _ox, \ + intptr_t mx, intptr_t my, int width) \ { \ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ - ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ - ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\ + ff_hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ + ff_hevc_put_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox); \ } #define mc_uni_w_funcs(name, bitd, opt) \ @@ -666,17 +666,17 @@ mc_uni_w_funcs(qpel_v, 12, sse4) mc_uni_w_funcs(qpel_hv, 12, sse4) #define mc_bi_w_func(name, bitd, W, opt) \ -void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ - const uint8_t *_src, ptrdiff_t _srcstride, \ - const int16_t *_src2, \ - int height, int denom, \ - int _wx0, int _wx1, int _ox0, int _ox1, \ - intptr_t mx, intptr_t my, int width) \ +void ff_hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride, \ + const uint8_t *_src, ptrdiff_t _srcstride, \ + const int16_t *_src2, \ + int height, int denom, \ + int _wx0, int _wx1, int _ox0, int _ox1, \ + intptr_t mx, intptr_t my, int width) \ { \ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ - ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ - ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \ - height, denom, _wx0, _wx1, _ox0, _ox1); \ + ff_hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ + ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \ + height, denom, _wx0, _wx1, _ox0, _ox1); \ } #define mc_bi_w_funcs(name, bitd, opt) \ @@ -882,89 +882,89 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2; if (ARCH_X86_64) { - c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; - c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; - c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; + c->put_hevc_epel[7][0][0] = ff_hevc_put_pel_pixels32_8_avx2; + c->put_hevc_epel[8][0][0] = ff_hevc_put_pel_pixels48_8_avx2; + c->put_hevc_epel[9][0][0] = ff_hevc_put_pel_pixels64_8_avx2; - c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_8_avx2; - c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_8_avx2; - c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_8_avx2; + c->put_hevc_qpel[7][0][0] = ff_hevc_put_pel_pixels32_8_avx2; + c->put_hevc_qpel[8][0][0] = ff_hevc_put_pel_pixels48_8_avx2; + c->put_hevc_qpel[9][0][0] = ff_hevc_put_pel_pixels64_8_avx2; - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_uni_pel_pixels32_8_avx2; + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_uni_pel_pixels48_8_avx2; + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_uni_pel_pixels64_8_avx2; - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_uni_pel_pixels32_8_avx2; + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_uni_pel_pixels48_8_avx2; + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_uni_pel_pixels64_8_avx2; - c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; - c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; - c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2; + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2; + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2; - c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_8_avx2; - c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_8_avx2; - c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_8_avx2; + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_8_avx2; + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_8_avx2; + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_8_avx2; - c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_8_avx2; - c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_8_avx2; - c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_8_avx2; + c->put_hevc_epel[7][0][1] = ff_hevc_put_epel_h32_8_avx2; + c->put_hevc_epel[8][0][1] = ff_hevc_put_epel_h48_8_avx2; + c->put_hevc_epel[9][0][1] = ff_hevc_put_epel_h64_8_avx2; - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_8_avx2; - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_8_avx2; - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_8_avx2; + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_uni_epel_h32_8_avx2; + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_uni_epel_h48_8_avx2; + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_uni_epel_h64_8_avx2; - c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_8_avx2; - c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_8_avx2; - c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_8_avx2; + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_8_avx2; + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_8_avx2; + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_8_avx2; - c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_8_avx2; - c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_8_avx2; - c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_8_avx2; + c->put_hevc_epel[7][1][0] = ff_hevc_put_epel_v32_8_avx2; + c->put_hevc_epel[8][1][0] = ff_hevc_put_epel_v48_8_avx2; + c->put_hevc_epel[9][1][0] = ff_hevc_put_epel_v64_8_avx2; - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_8_avx2; - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_8_avx2; - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_8_avx2; + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_uni_epel_v32_8_avx2; + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_uni_epel_v48_8_avx2; + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_uni_epel_v64_8_avx2; - c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_8_avx2; - c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_8_avx2; - c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_8_avx2; + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_8_avx2; + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_8_avx2; + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_8_avx2; - c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_8_avx2; - c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_8_avx2; - c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_8_avx2; + c->put_hevc_epel[7][1][1] = ff_hevc_put_epel_hv32_8_avx2; + c->put_hevc_epel[8][1][1] = ff_hevc_put_epel_hv48_8_avx2; + c->put_hevc_epel[9][1][1] = ff_hevc_put_epel_hv64_8_avx2; - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_8_avx2; - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_8_avx2; - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_8_avx2; + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_uni_epel_hv32_8_avx2; + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_uni_epel_hv48_8_avx2; + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_uni_epel_hv64_8_avx2; - c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_8_avx2; - c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_8_avx2; - c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_8_avx2; + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_8_avx2; + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_8_avx2; + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_8_avx2; - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx2; - c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_8_avx2; - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx2; + c->put_hevc_qpel[7][0][1] = ff_hevc_put_qpel_h32_8_avx2; + c->put_hevc_qpel[8][0][1] = ff_hevc_put_qpel_h48_8_avx2; + c->put_hevc_qpel[9][0][1] = ff_hevc_put_qpel_h64_8_avx2; - c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_8_avx2; - c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_8_avx2; - c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_8_avx2; + c->put_hevc_qpel[7][1][0] = ff_hevc_put_qpel_v32_8_avx2; + c->put_hevc_qpel[8][1][0] = ff_hevc_put_qpel_v48_8_avx2; + c->put_hevc_qpel[9][1][0] = ff_hevc_put_qpel_v64_8_avx2; - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_8_avx2; - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_8_avx2; - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_8_avx2; + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_uni_qpel_h32_8_avx2; + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_uni_qpel_h48_8_avx2; + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_uni_qpel_h64_8_avx2; - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_8_avx2; - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_8_avx2; - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_8_avx2; + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_uni_qpel_v32_8_avx2; + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_uni_qpel_v48_8_avx2; + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_uni_qpel_v64_8_avx2; - c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_8_avx2; - c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_8_avx2; - c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_8_avx2; + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_8_avx2; + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_8_avx2; + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_8_avx2; - c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_8_avx2; - c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_8_avx2; - c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_8_avx2; + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_8_avx2; + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_8_avx2; + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_8_avx2; } SAO_BAND_INIT(8, avx2); @@ -975,12 +975,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->add_residual[3] = ff_hevc_add_residual_32_8_avx2; } if (EXTERNAL_AVX512ICL(cpu_flags) && ARCH_X86_64) { - c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_avx512icl; - c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_avx512icl; - c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_avx512icl; - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_8_avx512icl; - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_8_avx512icl; - c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_avx512icl; + c->put_hevc_qpel[1][0][1] = ff_hevc_put_qpel_h4_8_avx512icl; + c->put_hevc_qpel[3][0][1] = ff_hevc_put_qpel_h8_8_avx512icl; + c->put_hevc_qpel[5][0][1] = ff_hevc_put_qpel_h16_8_avx512icl; + c->put_hevc_qpel[7][0][1] = ff_hevc_put_qpel_h32_8_avx512icl; + c->put_hevc_qpel[9][0][1] = ff_hevc_put_qpel_h64_8_avx512icl; + c->put_hevc_qpel[3][1][1] = ff_hevc_put_qpel_hv8_8_avx512icl; } } else if (bit_depth == 10) { if (EXTERNAL_MMXEXT(cpu_flags)) { @@ -1049,148 +1049,148 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2; c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2; if (ARCH_X86_64) { - c->put_hevc_epel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; - c->put_hevc_epel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; - c->put_hevc_epel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; - c->put_hevc_epel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; - c->put_hevc_epel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; - - c->put_hevc_qpel[5][0][0] = ff_hevc_put_hevc_pel_pixels16_10_avx2; - c->put_hevc_qpel[6][0][0] = ff_hevc_put_hevc_pel_pixels24_10_avx2; - c->put_hevc_qpel[7][0][0] = ff_hevc_put_hevc_pel_pixels32_10_avx2; - c->put_hevc_qpel[8][0][0] = ff_hevc_put_hevc_pel_pixels48_10_avx2; - c->put_hevc_qpel[9][0][0] = ff_hevc_put_hevc_pel_pixels64_10_avx2; - - c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; - c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; - c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; - c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; - c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; - - c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_hevc_uni_pel_pixels32_8_avx2; - c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_hevc_uni_pel_pixels48_8_avx2; - c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_hevc_uni_pel_pixels64_8_avx2; - c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_hevc_uni_pel_pixels96_8_avx2; - c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_hevc_uni_pel_pixels128_8_avx2; - - c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; - c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; - c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; - c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; - c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; - c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_bi_pel_pixels16_10_avx2; - c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_bi_pel_pixels24_10_avx2; - c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_hevc_bi_pel_pixels32_10_avx2; - c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_hevc_bi_pel_pixels48_10_avx2; - c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_hevc_bi_pel_pixels64_10_avx2; - - c->put_hevc_epel[5][0][1] = ff_hevc_put_hevc_epel_h16_10_avx2; - c->put_hevc_epel[6][0][1] = ff_hevc_put_hevc_epel_h24_10_avx2; - c->put_hevc_epel[7][0][1] = ff_hevc_put_hevc_epel_h32_10_avx2; - c->put_hevc_epel[8][0][1] = ff_hevc_put_hevc_epel_h48_10_avx2; - c->put_hevc_epel[9][0][1] = ff_hevc_put_hevc_epel_h64_10_avx2; - - c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_hevc_uni_epel_h16_10_avx2; - c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_hevc_uni_epel_h24_10_avx2; - c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_hevc_uni_epel_h32_10_avx2; - c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_hevc_uni_epel_h48_10_avx2; - c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_hevc_uni_epel_h64_10_avx2; - - c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_hevc_bi_epel_h16_10_avx2; - c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_hevc_bi_epel_h24_10_avx2; - c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_hevc_bi_epel_h32_10_avx2; - c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_hevc_bi_epel_h48_10_avx2; - c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_hevc_bi_epel_h64_10_avx2; - - c->put_hevc_epel[5][1][0] = ff_hevc_put_hevc_epel_v16_10_avx2; - c->put_hevc_epel[6][1][0] = ff_hevc_put_hevc_epel_v24_10_avx2; - c->put_hevc_epel[7][1][0] = ff_hevc_put_hevc_epel_v32_10_avx2; - c->put_hevc_epel[8][1][0] = ff_hevc_put_hevc_epel_v48_10_avx2; - c->put_hevc_epel[9][1][0] = ff_hevc_put_hevc_epel_v64_10_avx2; - - c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_hevc_uni_epel_v16_10_avx2; - c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_hevc_uni_epel_v24_10_avx2; - c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_hevc_uni_epel_v32_10_avx2; - c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_hevc_uni_epel_v48_10_avx2; - c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_hevc_uni_epel_v64_10_avx2; - - c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_hevc_bi_epel_v16_10_avx2; - c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_hevc_bi_epel_v24_10_avx2; - c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_hevc_bi_epel_v32_10_avx2; - c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_hevc_bi_epel_v48_10_avx2; - c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_hevc_bi_epel_v64_10_avx2; - - c->put_hevc_epel[5][1][1] = ff_hevc_put_hevc_epel_hv16_10_avx2; - c->put_hevc_epel[6][1][1] = ff_hevc_put_hevc_epel_hv24_10_avx2; - c->put_hevc_epel[7][1][1] = ff_hevc_put_hevc_epel_hv32_10_avx2; - c->put_hevc_epel[8][1][1] = ff_hevc_put_hevc_epel_hv48_10_avx2; - c->put_hevc_epel[9][1][1] = ff_hevc_put_hevc_epel_hv64_10_avx2; - - c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_hevc_uni_epel_hv16_10_avx2; - c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_hevc_uni_epel_hv24_10_avx2; - c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_hevc_uni_epel_hv32_10_avx2; - c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_hevc_uni_epel_hv48_10_avx2; - c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_hevc_uni_epel_hv64_10_avx2; - - c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_hevc_bi_epel_hv16_10_avx2; - c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_hevc_bi_epel_hv24_10_avx2; - c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_hevc_bi_epel_hv32_10_avx2; - c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_hevc_bi_epel_hv48_10_avx2; - c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_hevc_bi_epel_hv64_10_avx2; - - c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_10_avx2; - c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h24_10_avx2; - c->put_hevc_qpel[7][0][1] = ff_hevc_put_hevc_qpel_h32_10_avx2; - c->put_hevc_qpel[8][0][1] = ff_hevc_put_hevc_qpel_h48_10_avx2; - c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h64_10_avx2; - - c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_uni_qpel_h16_10_avx2; - c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_uni_qpel_h24_10_avx2; - c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_hevc_uni_qpel_h32_10_avx2; - c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_hevc_uni_qpel_h48_10_avx2; - c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_uni_qpel_h64_10_avx2; - - c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_bi_qpel_h16_10_avx2; - c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_bi_qpel_h24_10_avx2; - c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_hevc_bi_qpel_h32_10_avx2; - c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_hevc_bi_qpel_h48_10_avx2; - c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_bi_qpel_h64_10_avx2; - - c->put_hevc_qpel[5][1][0] = ff_hevc_put_hevc_qpel_v16_10_avx2; - c->put_hevc_qpel[6][1][0] = ff_hevc_put_hevc_qpel_v24_10_avx2; - c->put_hevc_qpel[7][1][0] = ff_hevc_put_hevc_qpel_v32_10_avx2; - c->put_hevc_qpel[8][1][0] = ff_hevc_put_hevc_qpel_v48_10_avx2; - c->put_hevc_qpel[9][1][0] = ff_hevc_put_hevc_qpel_v64_10_avx2; - - c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_hevc_uni_qpel_v16_10_avx2; - c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_hevc_uni_qpel_v24_10_avx2; - c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_hevc_uni_qpel_v32_10_avx2; - c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_hevc_uni_qpel_v48_10_avx2; - c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_hevc_uni_qpel_v64_10_avx2; - - c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_hevc_bi_qpel_v16_10_avx2; - c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_hevc_bi_qpel_v24_10_avx2; - c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_hevc_bi_qpel_v32_10_avx2; - c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_hevc_bi_qpel_v48_10_avx2; - c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_hevc_bi_qpel_v64_10_avx2; - - c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_10_avx2; - c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_10_avx2; - c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_10_avx2; - c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_10_avx2; - c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_10_avx2; - - c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_hevc_uni_qpel_hv16_10_avx2; - c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_hevc_uni_qpel_hv24_10_avx2; - c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_hevc_uni_qpel_hv32_10_avx2; - c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_hevc_uni_qpel_hv48_10_avx2; - c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_hevc_uni_qpel_hv64_10_avx2; - - c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_hevc_bi_qpel_hv16_10_avx2; - c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_hevc_bi_qpel_hv24_10_avx2; - c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_hevc_bi_qpel_hv32_10_avx2; - c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_hevc_bi_qpel_hv48_10_avx2; - c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2; + c->put_hevc_epel[5][0][0] = ff_hevc_put_pel_pixels16_10_avx2; + c->put_hevc_epel[6][0][0] = ff_hevc_put_pel_pixels24_10_avx2; + c->put_hevc_epel[7][0][0] = ff_hevc_put_pel_pixels32_10_avx2; + c->put_hevc_epel[8][0][0] = ff_hevc_put_pel_pixels48_10_avx2; + c->put_hevc_epel[9][0][0] = ff_hevc_put_pel_pixels64_10_avx2; + + c->put_hevc_qpel[5][0][0] = ff_hevc_put_pel_pixels16_10_avx2; + c->put_hevc_qpel[6][0][0] = ff_hevc_put_pel_pixels24_10_avx2; + c->put_hevc_qpel[7][0][0] = ff_hevc_put_pel_pixels32_10_avx2; + c->put_hevc_qpel[8][0][0] = ff_hevc_put_pel_pixels48_10_avx2; + c->put_hevc_qpel[9][0][0] = ff_hevc_put_pel_pixels64_10_avx2; + + c->put_hevc_epel_uni[5][0][0] = ff_hevc_put_uni_pel_pixels32_8_avx2; + c->put_hevc_epel_uni[6][0][0] = ff_hevc_put_uni_pel_pixels48_8_avx2; + c->put_hevc_epel_uni[7][0][0] = ff_hevc_put_uni_pel_pixels64_8_avx2; + c->put_hevc_epel_uni[8][0][0] = ff_hevc_put_uni_pel_pixels96_8_avx2; + c->put_hevc_epel_uni[9][0][0] = ff_hevc_put_uni_pel_pixels128_8_avx2; + + c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_uni_pel_pixels32_8_avx2; + c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_uni_pel_pixels48_8_avx2; + c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_uni_pel_pixels64_8_avx2; + c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_uni_pel_pixels96_8_avx2; + c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_uni_pel_pixels128_8_avx2; + + c->put_hevc_epel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2; + c->put_hevc_epel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2; + c->put_hevc_epel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2; + c->put_hevc_epel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2; + c->put_hevc_epel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2; + c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_bi_pel_pixels16_10_avx2; + c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_bi_pel_pixels24_10_avx2; + c->put_hevc_qpel_bi[7][0][0] = ff_hevc_put_bi_pel_pixels32_10_avx2; + c->put_hevc_qpel_bi[8][0][0] = ff_hevc_put_bi_pel_pixels48_10_avx2; + c->put_hevc_qpel_bi[9][0][0] = ff_hevc_put_bi_pel_pixels64_10_avx2; + + c->put_hevc_epel[5][0][1] = ff_hevc_put_epel_h16_10_avx2; + c->put_hevc_epel[6][0][1] = ff_hevc_put_epel_h24_10_avx2; + c->put_hevc_epel[7][0][1] = ff_hevc_put_epel_h32_10_avx2; + c->put_hevc_epel[8][0][1] = ff_hevc_put_epel_h48_10_avx2; + c->put_hevc_epel[9][0][1] = ff_hevc_put_epel_h64_10_avx2; + + c->put_hevc_epel_uni[5][0][1] = ff_hevc_put_uni_epel_h16_10_avx2; + c->put_hevc_epel_uni[6][0][1] = ff_hevc_put_uni_epel_h24_10_avx2; + c->put_hevc_epel_uni[7][0][1] = ff_hevc_put_uni_epel_h32_10_avx2; + c->put_hevc_epel_uni[8][0][1] = ff_hevc_put_uni_epel_h48_10_avx2; + c->put_hevc_epel_uni[9][0][1] = ff_hevc_put_uni_epel_h64_10_avx2; + + c->put_hevc_epel_bi[5][0][1] = ff_hevc_put_bi_epel_h16_10_avx2; + c->put_hevc_epel_bi[6][0][1] = ff_hevc_put_bi_epel_h24_10_avx2; + c->put_hevc_epel_bi[7][0][1] = ff_hevc_put_bi_epel_h32_10_avx2; + c->put_hevc_epel_bi[8][0][1] = ff_hevc_put_bi_epel_h48_10_avx2; + c->put_hevc_epel_bi[9][0][1] = ff_hevc_put_bi_epel_h64_10_avx2; + + c->put_hevc_epel[5][1][0] = ff_hevc_put_epel_v16_10_avx2; + c->put_hevc_epel[6][1][0] = ff_hevc_put_epel_v24_10_avx2; + c->put_hevc_epel[7][1][0] = ff_hevc_put_epel_v32_10_avx2; + c->put_hevc_epel[8][1][0] = ff_hevc_put_epel_v48_10_avx2; + c->put_hevc_epel[9][1][0] = ff_hevc_put_epel_v64_10_avx2; + + c->put_hevc_epel_uni[5][1][0] = ff_hevc_put_uni_epel_v16_10_avx2; + c->put_hevc_epel_uni[6][1][0] = ff_hevc_put_uni_epel_v24_10_avx2; + c->put_hevc_epel_uni[7][1][0] = ff_hevc_put_uni_epel_v32_10_avx2; + c->put_hevc_epel_uni[8][1][0] = ff_hevc_put_uni_epel_v48_10_avx2; + c->put_hevc_epel_uni[9][1][0] = ff_hevc_put_uni_epel_v64_10_avx2; + + c->put_hevc_epel_bi[5][1][0] = ff_hevc_put_bi_epel_v16_10_avx2; + c->put_hevc_epel_bi[6][1][0] = ff_hevc_put_bi_epel_v24_10_avx2; + c->put_hevc_epel_bi[7][1][0] = ff_hevc_put_bi_epel_v32_10_avx2; + c->put_hevc_epel_bi[8][1][0] = ff_hevc_put_bi_epel_v48_10_avx2; + c->put_hevc_epel_bi[9][1][0] = ff_hevc_put_bi_epel_v64_10_avx2; + + c->put_hevc_epel[5][1][1] = ff_hevc_put_epel_hv16_10_avx2; + c->put_hevc_epel[6][1][1] = ff_hevc_put_epel_hv24_10_avx2; + c->put_hevc_epel[7][1][1] = ff_hevc_put_epel_hv32_10_avx2; + c->put_hevc_epel[8][1][1] = ff_hevc_put_epel_hv48_10_avx2; + c->put_hevc_epel[9][1][1] = ff_hevc_put_epel_hv64_10_avx2; + + c->put_hevc_epel_uni[5][1][1] = ff_hevc_put_uni_epel_hv16_10_avx2; + c->put_hevc_epel_uni[6][1][1] = ff_hevc_put_uni_epel_hv24_10_avx2; + c->put_hevc_epel_uni[7][1][1] = ff_hevc_put_uni_epel_hv32_10_avx2; + c->put_hevc_epel_uni[8][1][1] = ff_hevc_put_uni_epel_hv48_10_avx2; + c->put_hevc_epel_uni[9][1][1] = ff_hevc_put_uni_epel_hv64_10_avx2; + + c->put_hevc_epel_bi[5][1][1] = ff_hevc_put_bi_epel_hv16_10_avx2; + c->put_hevc_epel_bi[6][1][1] = ff_hevc_put_bi_epel_hv24_10_avx2; + c->put_hevc_epel_bi[7][1][1] = ff_hevc_put_bi_epel_hv32_10_avx2; + c->put_hevc_epel_bi[8][1][1] = ff_hevc_put_bi_epel_hv48_10_avx2; + c->put_hevc_epel_bi[9][1][1] = ff_hevc_put_bi_epel_hv64_10_avx2; + + c->put_hevc_qpel[5][0][1] = ff_hevc_put_qpel_h16_10_avx2; + c->put_hevc_qpel[6][0][1] = ff_hevc_put_qpel_h24_10_avx2; + c->put_hevc_qpel[7][0][1] = ff_hevc_put_qpel_h32_10_avx2; + c->put_hevc_qpel[8][0][1] = ff_hevc_put_qpel_h48_10_avx2; + c->put_hevc_qpel[9][0][1] = ff_hevc_put_qpel_h64_10_avx2; + + c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_uni_qpel_h16_10_avx2; + c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_uni_qpel_h24_10_avx2; + c->put_hevc_qpel_uni[7][0][1] = ff_hevc_put_uni_qpel_h32_10_avx2; + c->put_hevc_qpel_uni[8][0][1] = ff_hevc_put_uni_qpel_h48_10_avx2; + c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_uni_qpel_h64_10_avx2; + + c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_bi_qpel_h16_10_avx2; + c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_bi_qpel_h24_10_avx2; + c->put_hevc_qpel_bi[7][0][1] = ff_hevc_put_bi_qpel_h32_10_avx2; + c->put_hevc_qpel_bi[8][0][1] = ff_hevc_put_bi_qpel_h48_10_avx2; + c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_bi_qpel_h64_10_avx2; + + c->put_hevc_qpel[5][1][0] = ff_hevc_put_qpel_v16_10_avx2; + c->put_hevc_qpel[6][1][0] = ff_hevc_put_qpel_v24_10_avx2; + c->put_hevc_qpel[7][1][0] = ff_hevc_put_qpel_v32_10_avx2; + c->put_hevc_qpel[8][1][0] = ff_hevc_put_qpel_v48_10_avx2; + c->put_hevc_qpel[9][1][0] = ff_hevc_put_qpel_v64_10_avx2; + + c->put_hevc_qpel_uni[5][1][0] = ff_hevc_put_uni_qpel_v16_10_avx2; + c->put_hevc_qpel_uni[6][1][0] = ff_hevc_put_uni_qpel_v24_10_avx2; + c->put_hevc_qpel_uni[7][1][0] = ff_hevc_put_uni_qpel_v32_10_avx2; + c->put_hevc_qpel_uni[8][1][0] = ff_hevc_put_uni_qpel_v48_10_avx2; + c->put_hevc_qpel_uni[9][1][0] = ff_hevc_put_uni_qpel_v64_10_avx2; + + c->put_hevc_qpel_bi[5][1][0] = ff_hevc_put_bi_qpel_v16_10_avx2; + c->put_hevc_qpel_bi[6][1][0] = ff_hevc_put_bi_qpel_v24_10_avx2; + c->put_hevc_qpel_bi[7][1][0] = ff_hevc_put_bi_qpel_v32_10_avx2; + c->put_hevc_qpel_bi[8][1][0] = ff_hevc_put_bi_qpel_v48_10_avx2; + c->put_hevc_qpel_bi[9][1][0] = ff_hevc_put_bi_qpel_v64_10_avx2; + + c->put_hevc_qpel[5][1][1] = ff_hevc_put_qpel_hv16_10_avx2; + c->put_hevc_qpel[6][1][1] = ff_hevc_put_qpel_hv24_10_avx2; + c->put_hevc_qpel[7][1][1] = ff_hevc_put_qpel_hv32_10_avx2; + c->put_hevc_qpel[8][1][1] = ff_hevc_put_qpel_hv48_10_avx2; + c->put_hevc_qpel[9][1][1] = ff_hevc_put_qpel_hv64_10_avx2; + + c->put_hevc_qpel_uni[5][1][1] = ff_hevc_put_uni_qpel_hv16_10_avx2; + c->put_hevc_qpel_uni[6][1][1] = ff_hevc_put_uni_qpel_hv24_10_avx2; + c->put_hevc_qpel_uni[7][1][1] = ff_hevc_put_uni_qpel_hv32_10_avx2; + c->put_hevc_qpel_uni[8][1][1] = ff_hevc_put_uni_qpel_hv48_10_avx2; + c->put_hevc_qpel_uni[9][1][1] = ff_hevc_put_uni_qpel_hv64_10_avx2; + + c->put_hevc_qpel_bi[5][1][1] = ff_hevc_put_bi_qpel_hv16_10_avx2; + c->put_hevc_qpel_bi[6][1][1] = ff_hevc_put_bi_qpel_hv24_10_avx2; + c->put_hevc_qpel_bi[7][1][1] = ff_hevc_put_bi_qpel_hv32_10_avx2; + c->put_hevc_qpel_bi[8][1][1] = ff_hevc_put_bi_qpel_hv48_10_avx2; + c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_bi_qpel_hv64_10_avx2; } SAO_BAND_INIT(10, avx2); SAO_EDGE_INIT(10, avx2); diff --git a/libavcodec/x86/hevc/mc.asm b/libavcodec/x86/hevc/mc.asm index b3b589b271..ad4c4adc2e 100644 --- a/libavcodec/x86/hevc/mc.asm +++ b/libavcodec/x86/hevc/mc.asm @@ -716,7 +716,7 @@ SECTION .text ; ****************************** %macro HEVC_BI_PEL_PIXELS 2 -cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstride, src2, height +cglobal hevc_put_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstride, src2, height pxor m2, m2 movdqa m5, [pw_bi_%2] .loop: @@ -748,7 +748,7 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstrid %define XMM_REGS 8 %endif -cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, XMM_REGS, dst, dststride, src, srcstride, src2, height, mx, rfilter +cglobal hevc_put_bi_epel_h%1_%2, 7, 8, XMM_REGS, dst, dststride, src, srcstride, src2, height, mx, rfilter %assign %%stride ((%2 + 7)/8) movdqa m6, [pw_bi_%2] EPEL_FILTER %2, mx, m4, m5, rfilter @@ -771,7 +771,7 @@ cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, XMM_REGS, dst, dststride, src, srcst ; int height, int mx, int my, int width) ; ****************************** -cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, XMM_REGS, dst, dststride, src, srcstride, src2, height, r3src, my +cglobal hevc_put_bi_epel_v%1_%2, 6, 8, XMM_REGS, dst, dststride, src, srcstride, src2, height, r3src, my movifnidn myd, mym movdqa m6, [pw_bi_%2] sub srcq, srcstrideq @@ -800,7 +800,7 @@ cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, XMM_REGS, dst, dststride, src, srcst %macro HEVC_PUT_HEVC_EPEL_HV 2 -cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src +cglobal hevc_put_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src %assign %%stride ((%2 + 7)/8) sub srcq, srcstrideq EPEL_HV_FILTER %2 @@ -882,7 +882,7 @@ cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride, %macro HEVC_PUT_HEVC_QPEL 2 -cglobal hevc_put_hevc_bi_qpel_h%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, src2, height, mx, rfilter +cglobal hevc_put_bi_qpel_h%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, src2, height, mx, rfilter movdqa m9, [pw_bi_%2] QPEL_FILTER %2, mx .loop: @@ -909,7 +909,7 @@ cglobal hevc_put_hevc_bi_qpel_h%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, ; ****************************** -cglobal hevc_put_hevc_bi_qpel_v%1_%2, 6, 10, 16, dst, dststride, src, srcstride, src2, height, r3src, my, rfilter +cglobal hevc_put_bi_qpel_v%1_%2, 6, 10, 16, dst, dststride, src, srcstride, src2, height, r3src, my, rfilter movifnidn myd, mym movdqa m9, [pw_bi_%2] lea r3srcq, [srcstrideq*3] @@ -939,7 +939,7 @@ cglobal hevc_put_hevc_bi_qpel_v%1_%2, 6, 10, 16, dst, dststride, src, srcstride, ; ****************************** %macro HEVC_PUT_HEVC_QPEL_HV 2 -cglobal hevc_put_hevc_bi_qpel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src, rfilter +cglobal hevc_put_bi_qpel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride, src2, height, mx, my, r3src, rfilter %if cpuflag(avx2) %assign %%shift 4 %else @@ -1025,11 +1025,11 @@ cglobal hevc_put_hevc_bi_qpel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride %macro WEIGHTING_FUNCS 2 %if WIN64 || ARCH_X86_32 -cglobal hevc_put_hevc_uni_w%1_%2, 4, 5, 7, dst, dststride, src, height, denom, wx, ox +cglobal hevc_put_uni_w%1_%2, 4, 5, 7, dst, dststride, src, height, denom, wx, ox mov r4d, denomm %define SHIFT r4d %else -cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, height, denom, wx, ox +cglobal hevc_put_uni_w%1_%2, 6, 6, 7, dst, dststride, src, height, denom, wx, ox %define SHIFT denomd %endif lea SHIFT, [SHIFT+14-%2] ; shift = 14 - bitd + denom @@ -1090,7 +1090,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, height, denom, w jnz .loop ; height loop RET -cglobal hevc_put_hevc_bi_w%1_%2, 4, 6, 10, dst, dststride, src, src2, height, denom, wx0, wx1, ox0, ox1 +cglobal hevc_put_bi_w%1_%2, 4, 6, 10, dst, dststride, src, src2, height, denom, wx0, wx1, ox0, ox1 movifnidn r5d, denomm %if %1 <= 4 pxor m1, m1 @@ -1329,7 +1329,7 @@ HEVC_PUT_HEVC_QPEL_HV 16, 10 %endmacro %macro HEVC_PUT_HEVC_QPEL_AVX512ICL 2 -cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 8, dst, src, srcstride, height, mx, tmp +cglobal hevc_put_qpel_h%1_%2, 5, 6, 8, dst, src, srcstride, height, mx, tmp QPEL_FILTER_H %1, mx, 0, 1, tmp QPEL_LOAD_SHUF 2, 3 .loop: @@ -1355,7 +1355,7 @@ cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 8, dst, src, srcstride, height, mx, tmp %endmacro %macro HEVC_PUT_HEVC_QPEL_HV_AVX512ICL 2 -cglobal hevc_put_hevc_qpel_hv%1_%2, 6, 7, 27, dst, src, srcstride, height, mx, my, tmp +cglobal hevc_put_qpel_hv%1_%2, 6, 7, 27, dst, src, srcstride, height, mx, my, tmp %assign %%shift 6 %assign %%extra 7 QPEL_FILTER_H %1, mx, 0, 1, tmp diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h index 037519fad4..d596f9e50c 100644 --- a/libavcodec/x86/hevcdsp.h +++ b/libavcodec/x86/hevcdsp.h @@ -30,19 +30,19 @@ #define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \ -dst[idx1][idx2][idx3] = ff_hevc_put_hevc_ ## name ## _ ## D ## _##opt; \ -dst ## _bi[idx1][idx2][idx3] = ff_hevc_put_hevc_bi_ ## name ## _ ## D ## _##opt; \ -dst ## _uni[idx1][idx2][idx3] = ff_hevc_put_hevc_uni_ ## name ## _ ## D ## _##opt; \ -dst ## _uni_w[idx1][idx2][idx3] = ff_hevc_put_hevc_uni_w_ ## name ## _ ## D ## _##opt; \ -dst ## _bi_w[idx1][idx2][idx3] = ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt +dst[idx1][idx2][idx3] = ff_hevc_put_ ## name ## _ ## D ## _##opt; \ +dst ## _bi[idx1][idx2][idx3] = ff_hevc_put_bi_ ## name ## _ ## D ## _##opt; \ +dst ## _uni[idx1][idx2][idx3] = ff_hevc_put_uni_ ## name ## _ ## D ## _##opt; \ +dst ## _uni_w[idx1][idx2][idx3] = ff_hevc_put_uni_w_ ## name ## _ ## D ## _##opt; \ +dst ## _bi_w[idx1][idx2][idx3] = ff_hevc_put_bi_w_ ## name ## _ ## D ## _##opt #define PEL_PROTOTYPE(name, D, opt) \ -void ff_hevc_put_hevc_ ## name ## _ ## D ## _##opt(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); \ -void ff_hevc_put_hevc_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); \ -void ff_hevc_put_hevc_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); \ -void ff_hevc_put_hevc_uni_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); \ -void ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width) +void ff_hevc_put_ ## name ## _ ## D ## _##opt(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); \ +void ff_hevc_put_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); \ +void ff_hevc_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); \ +void ff_hevc_put_uni_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); \ +void ff_hevc_put_bi_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width) /////////////////////////////////////////////////////////////////////////////// @@ -71,8 +71,8 @@ void ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t PEL_PROTOTYPE(fname##64, bitd, opt) #define WEIGHTING_PROTOTYPE(width, bitd, opt) \ -void ff_hevc_put_hevc_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, int height, int denom, int _wx, int _ox); \ -void ff_hevc_put_hevc_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, const int16_t *_src2, int height, int denom, int _wx0, int _wx1, int _ox0, int _ox1) +void ff_hevc_put_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, int height, int denom, int _wx, int _ox); \ +void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, const int16_t *_src2, int height, int denom, int _wx0, int _wx1, int _ox0, int _ox1) #define WEIGHTING_PROTOTYPES(bitd, opt) \ WEIGHTING_PROTOTYPE(2, bitd, opt); \ @@ -94,38 +94,38 @@ EPEL_PROTOTYPES(pel_pixels , 8, sse4); EPEL_PROTOTYPES(pel_pixels , 10, sse4); EPEL_PROTOTYPES(pel_pixels , 12, sse4); -void ff_hevc_put_hevc_pel_pixels16_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels24_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels32_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels48_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels64_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels16_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels24_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels32_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels48_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels64_8_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels16_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels24_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels32_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels48_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_pel_pixels64_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels16_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels24_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels32_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels48_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_pel_pixels64_10_avx2(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_uni_pel_pixels32_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_uni_pel_pixels48_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_uni_pel_pixels64_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); -void ff_hevc_put_hevc_uni_pel_pixels96_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); //used for 10bit -void ff_hevc_put_hevc_uni_pel_pixels128_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);//used for 10bit +void ff_hevc_put_uni_pel_pixels32_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_uni_pel_pixels48_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_uni_pel_pixels64_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); +void ff_hevc_put_uni_pel_pixels96_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); //used for 10bit +void ff_hevc_put_uni_pel_pixels128_8_avx2(uint8_t *dst, ptrdiff_t dststride,const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width);//used for 10bit -void ff_hevc_put_hevc_bi_pel_pixels16_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels24_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels16_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels24_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels32_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels48_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels64_8_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels16_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels24_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels32_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels48_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_bi_pel_pixels64_10_avx2(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width); /////////////////////////////////////////////////////////////////////////////// // EPEL @@ -233,12 +233,12 @@ WEIGHTING_PROTOTYPES(8, sse4); WEIGHTING_PROTOTYPES(10, sse4); WEIGHTING_PROTOTYPES(12, sse4); -void ff_hevc_put_hevc_qpel_h4_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h8_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h16_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h32_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_h64_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); -void ff_hevc_put_hevc_qpel_hv8_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_qpel_h4_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_qpel_h8_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_qpel_h16_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_qpel_h32_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_qpel_h64_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); +void ff_hevc_put_qpel_hv8_8_avx512icl(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); /////////////////////////////////////////////////////////////////////////////// // TRANSFORM_ADD -- 2.45.2 [-- Attachment #3: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
prev parent reply other threads:[~2025-03-12 3:52 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-03-12 3:50 Andreas Rheinhardt 2025-03-12 3:52 ` Andreas Rheinhardt [this message]
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=GV1P250MB0737A5A3C9610524F6B2BA5E8FD02@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM \ --to=andreas.rheinhardt@outlook.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git