From: toqsxw@outlook.com To: ffmpeg-devel@ffmpeg.org Cc: Wu Jianhua <toqsxw@outlook.com> Subject: [FFmpeg-devel] [PATCH v4 4/8] avcodec/x86/h26x/h2656_inter: add dststride to put Date: Wed, 24 Jan 2024 02:17:07 +0800 Message-ID: <TYWP286MB2172B45C237A2232BD601FD7CA742@TYWP286MB2172.JPNP286.PROD.OUTLOOK.COM> (raw) In-Reply-To: <20240123181711.402946-1-toqsxw@outlook.com> From: Wu Jianhua <toqsxw@outlook.com> Signed-off-by: Wu Jianhua <toqsxw@outlook.com> --- libavcodec/x86/h26x/h2656_inter.asm | 32 ++++++++++++++--------------- libavcodec/x86/h26x/h2656dsp.c | 4 ++-- libavcodec/x86/h26x/h2656dsp.h | 2 +- libavcodec/x86/hevcdsp_init.c | 2 +- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/libavcodec/x86/h26x/h2656_inter.asm b/libavcodec/x86/h26x/h2656_inter.asm index aa296d549c..cbba0c1ea5 100644 --- a/libavcodec/x86/h26x/h2656_inter.asm +++ b/libavcodec/x86/h26x/h2656_inter.asm @@ -22,8 +22,6 @@ ; */ %include "libavutil/x86/x86util.asm" -%define MAX_PB_SIZE 64 - SECTION_RODATA 32 cextern pw_255 cextern pw_512 @@ -342,7 +340,7 @@ SECTION .text %endmacro %macro LOOP_END 3 - add %1q, 2*MAX_PB_SIZE ; dst += dststride + add %1q, dststrideq ; dst += dststride add %2q, %3q ; src += srcstride dec heightd ; cmp height jnz .loop ; height loop @@ -539,7 +537,7 @@ SECTION .text ; ****************************** -; void %1_put_pixels(int16_t *dst, const uint8_t *_src, ptrdiff_t srcstride, +; void %1_put_pixels(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t srcstride, ; int height, const int8_t *hf, const int8_t *vf, int width) ; ****************************** @@ -549,7 +547,7 @@ SECTION .text %endmacro %macro MC_PIXELS 3 -cglobal %1_put_pixels%2_%3, 4, 4, 3, dst, src, srcstride, height +cglobal %1_put_pixels%2_%3, 5, 5, 3, dst, dststride, src, srcstride, height pxor m2, m2 .loop: SIMPLE_LOAD %2, %3, srcq, m0 @@ -579,10 +577,10 @@ cglobal %1_put_uni_pixels%2_%3, 5, 5, 2, dst, dststride, src, srcstride, height %endif ; ****************************** -; void %1_put_4tap_hX(int16_t *dst, +; void %1_put_4tap_hX(int16_t *dst, ptrdiff_t dststride, ; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width); ; ****************************** -cglobal %1_put_4tap_h%2_%3, 5, 5, XMM_REGS, dst, src, srcstride, height, hf +cglobal %1_put_4tap_h%2_%3, 6, 6, XMM_REGS, dst, dststride, src, srcstride, height, hf %assign %%stride ((%3 + 7)/8) MC_4TAP_FILTER %3, hf, m4, m5 .loop: @@ -612,10 +610,10 @@ cglobal %1_put_uni_4tap_h%2_%3, 6, 7, XMM_REGS, dst, dststride, src, srcstride, RET ; ****************************** -; void %1_put_4tap_v(int16_t *dst, +; void %1_put_4tap_v(int16_t *dst, ptrdiff_t dststride, ; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width) ; ****************************** -cglobal %1_put_4tap_v%2_%3, 6, 6, XMM_REGS, dst, src, srcstride, height, r3src, vf +cglobal %1_put_4tap_v%2_%3, 7, 7, XMM_REGS, dst, dststride, src, srcstride, height, r3src, vf sub srcq, srcstrideq MC_4TAP_FILTER %3, vf, m4, m5 lea r3srcq, [srcstrideq*3] @@ -649,10 +647,10 @@ cglobal %1_put_uni_4tap_v%2_%3, 7, 7, XMM_REGS, dst, dststride, src, srcstride, %macro PUT_4TAP_HV 3 ; ****************************** -; void put_4tap_hv(int16_t *dst, +; void put_4tap_hv(int16_t *dst, ptrdiff_t dststride, ; const uint8_t *_src, ptrdiff_t _srcstride, int height, int8_t *hf, int8_t *vf, int width) ; ****************************** -cglobal %1_put_4tap_hv%2_%3, 6, 7, 16 , dst, src, srcstride, height, hf, vf, r3src +cglobal %1_put_4tap_hv%2_%3, 7, 8, 16 , dst, dststride, src, srcstride, height, hf, vf, r3src %assign %%stride ((%3 + 7)/8) sub srcq, srcstrideq MC_4TAP_HV_FILTER %3 @@ -784,12 +782,12 @@ cglobal %1_put_uni_4tap_hv%2_%3, 7, 8, 16 , dst, dststride, src, srcstride, heig %endmacro ; ****************************** -; void put_8tap_hX_X_X(int16_t *dst, const uint8_t *_src, ptrdiff_t srcstride, +; void put_8tap_hX_X_X(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t srcstride, ; int height, const int8_t *hf, const int8_t *vf, int width) ; ****************************** %macro PUT_8TAP 3 -cglobal %1_put_8tap_h%2_%3, 5, 5, 16, dst, src, srcstride, height, hf +cglobal %1_put_8tap_h%2_%3, 6, 6, 16, dst, dststride, src, srcstride, height, hf MC_8TAP_FILTER %3, hf .loop: MC_8TAP_H_LOAD %3, srcq, %2, 10 @@ -824,10 +822,10 @@ cglobal %1_put_uni_8tap_h%2_%3, 6, 7, 16 , dst, dststride, src, srcstride, heigh ; ****************************** -; void put_8tap_vX_X_X(int16_t *dst, const uint8_t *_src, ptrdiff_t srcstride, +; void put_8tap_vX_X_X(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t srcstride, ; int height, const int8_t *hf, const int8_t *vf, int width) ; ****************************** -cglobal %1_put_8tap_v%2_%3, 6, 8, 16, dst, src, srcstride, height, r3src, vf +cglobal %1_put_8tap_v%2_%3, 7, 8, 16, dst, dststride, src, srcstride, height, r3src, vf MC_8TAP_FILTER %3, vf lea r3srcq, [srcstrideq*3] .loop: @@ -866,11 +864,11 @@ cglobal %1_put_uni_8tap_v%2_%3, 7, 9, 16, dst, dststride, src, srcstride, height ; ****************************** -; void put_8tap_hvX_X(int16_t *dst, const uint8_t *_src, ptrdiff_t srcstride, +; void put_8tap_hvX_X(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t srcstride, ; int height, const int8_t *hf, const int8_t *vf, int width) ; ****************************** %macro PUT_8TAP_HV 3 -cglobal %1_put_8tap_hv%2_%3, 6, 7, 16, 0 - mmsize*16, dst, src, srcstride, height, hf, vf, r3src +cglobal %1_put_8tap_hv%2_%3, 7, 8, 16, 0 - mmsize*16, dst, dststride, src, srcstride, height, hf, vf, r3src MC_8TAP_FILTER %3, hf, 0 lea hfq, [rsp] MC_8TAP_FILTER %3, vf, 8*mmsize diff --git a/libavcodec/x86/h26x/h2656dsp.c b/libavcodec/x86/h26x/h2656dsp.c index 27769f9c55..7ef1234936 100644 --- a/libavcodec/x86/h26x/h2656dsp.c +++ b/libavcodec/x86/h26x/h2656dsp.c @@ -24,7 +24,7 @@ #include "h2656dsp.h" #define mc_rep_func(name, bitd, step, W, opt) \ -void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst, \ +void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride, \ const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width) \ { \ int i; \ @@ -32,7 +32,7 @@ void ff_h2656_put_##name##W##_##bitd##_##opt(int16_t *_dst, for (i = 0; i < W; i += step) { \ const uint8_t *src = _src + (i * ((bitd + 7) / 8)); \ dst = _dst + i; \ - ff_h2656_put_##name##step##_##bitd##_##opt(dst, src, _srcstride, height, hf, vf, width); \ + ff_h2656_put_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, height, hf, vf, width); \ } \ } diff --git a/libavcodec/x86/h26x/h2656dsp.h b/libavcodec/x86/h26x/h2656dsp.h index 8a2ab13607..e31aae6b0d 100644 --- a/libavcodec/x86/h26x/h2656dsp.h +++ b/libavcodec/x86/h26x/h2656dsp.h @@ -30,7 +30,7 @@ #include <stdlib.h> #define H2656_PEL_PROTOTYPE(name, D, opt) \ -void ff_h2656_put_ ## name ## _ ## D ## _##opt(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \ +void ff_h2656_put_ ## name ## _ ## D ## _##opt(int16_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \ void ff_h2656_put_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width); \ #define H2656_MC_8TAP_PROTOTYPES(fname, bitd, opt) \ diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 5c19330e19..e0dc82eef0 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -96,7 +96,7 @@ void ff_hevc_put_hevc_ ## a ## _ ## depth ## _##opt(int16_t *dst, const uint8_t int height, intptr_t mx, intptr_t my,int width) \ { \ DECL_HV_FILTER(p) \ - ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, src, srcstride, height, hf, vf, width); \ + ff_h2656_put_ ## b ## _ ## depth ## _##opt(dst, 2 * MAX_PB_SIZE, src, srcstride, height, hf, vf, width); \ } #define FW_PUT_UNI(p, a, b, depth, opt) \ -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-01-23 18:17 UTC|newest] Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top [not found] <20240123181711.402946-1-toqsxw@outlook.com> 2024-01-23 18:17 ` [FFmpeg-devel] [PATCH v4 2/8] avcodec/hevcdsp_template: reuse put/put_luma/put_chroma from h2656_inter_template toqsxw 2024-01-23 18:17 ` [FFmpeg-devel] [PATCH v4 3/8] avcodec/x86/hevc_mc: move put/put_uni to h26x/h2656_inter.asm toqsxw 2024-01-23 18:17 ` toqsxw [this message] 2024-01-23 18:17 ` [FFmpeg-devel] [PATCH v4 5/8] avcodec/vvcdec: reuse h26x/2656_inter.asm to enable x86 optimizations toqsxw 2024-01-23 18:17 ` [FFmpeg-devel] [PATCH v4 6/8] tests/checkasm: add checkasm_check_vvc_mc toqsxw 2024-01-23 18:17 ` [FFmpeg-devel] [PATCH v4 7/8] avcodec/x86/vvc: add avg and avg_w AVX2 optimizations toqsxw 2024-01-23 18:17 ` [FFmpeg-devel] [PATCH v4 8/8] tests/checkasm/vvc_mc: add check_avg toqsxw
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=TYWP286MB2172B45C237A2232BD601FD7CA742@TYWP286MB2172.JPNP286.PROD.OUTLOOK.COM \ --to=toqsxw@outlook.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git