From: "Logan.Lyu" <Logan.Lyu@myais.com.cn>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH 4/5] lavc/aarch64: new optimization for 8-bit hevc_epel_h
Date: Sun, 18 Jun 2023 16:26:48 +0800
Message-ID: <aa8a0543-3787-4610-e262-b8189fd381e3@myais.com.cn> (raw)
In-Reply-To: <d924c6e8-37ac-01a2-8c14-4fdc4b55ed4d@myais.com.cn>
[-- Attachment #1: Type: text/plain, Size: 3838 bytes --]
Add missing patch attachment...
在 2023/6/18 16:23, Logan.Lyu 写道:
> Hi, Martin,
>
> I modified it according to your comments. Please review again.
>
> And here are the checkasm benchmark results of the related functions:
>
> put_hevc_epel_h4_8_c: 67.1
> put_hevc_epel_h4_8_i8mm: 21.1
> put_hevc_epel_h6_8_c: 147.1
> put_hevc_epel_h6_8_i8mm: 45.1
> put_hevc_epel_h8_8_c: 237.4
> put_hevc_epel_h8_8_i8mm: 72.1
> put_hevc_epel_h12_8_c: 527.4
> put_hevc_epel_h12_8_i8mm: 115.4
> put_hevc_epel_h16_8_c: 943.6
> put_hevc_epel_h16_8_i8mm: 153.9
> put_hevc_epel_h24_8_c: 2105.4
> put_hevc_epel_h24_8_i8mm: 384.4
> put_hevc_epel_h32_8_c: 3631.4
> put_hevc_epel_h32_8_i8mm: 519.9
> put_hevc_epel_h48_8_c: 8082.1
> put_hevc_epel_h48_8_i8mm: 1110.4
> put_hevc_epel_h64_8_c: 14400.6
> put_hevc_epel_h64_8_i8mm: 2057.1
>
> put_hevc_qpel_h4_8_c: 124.9
> put_hevc_qpel_h4_8_neon: 43.1
> put_hevc_qpel_h4_8_i8mm: 33.1
> put_hevc_qpel_h6_8_c: 269.4
> put_hevc_qpel_h6_8_neon: 90.6
> put_hevc_qpel_h6_8_i8mm: 61.4
> put_hevc_qpel_h8_8_c: 477.6
> put_hevc_qpel_h8_8_neon: 82.1
> put_hevc_qpel_h8_8_i8mm: 99.9
> put_hevc_qpel_h12_8_c: 1062.4
> put_hevc_qpel_h12_8_neon: 226.9
> put_hevc_qpel_h12_8_i8mm: 170.9
> put_hevc_qpel_h16_8_c: 1880.6
> put_hevc_qpel_h16_8_neon: 302.9
> put_hevc_qpel_h16_8_i8mm: 251.4
> put_hevc_qpel_h24_8_c: 4221.9
> put_hevc_qpel_h24_8_neon: 893.9
> put_hevc_qpel_h24_8_i8mm: 626.1
> put_hevc_qpel_h32_8_c: 7437.6
> put_hevc_qpel_h32_8_neon: 1189.9
> put_hevc_qpel_h32_8_i8mm: 959.1
> put_hevc_qpel_h48_8_c: 16838.4
> put_hevc_qpel_h48_8_neon: 2727.9
> put_hevc_qpel_h48_8_i8mm: 2163.9
> put_hevc_qpel_h64_8_c: 29982.1
> put_hevc_qpel_h64_8_neon: 4777.6
>
>
> 在 2023/6/12 16:12, Martin Storsjö 写道:
>> On Sun, 4 Jun 2023, Logan.Lyu@myais.com.cn wrote:
>>
>>> From: Logan Lyu <Logan.Lyu@myais.com.cn>
>>>
>>> Signed-off-by: Logan Lyu <Logan.Lyu@myais.com.cn>
>>> ---
>>> libavcodec/aarch64/hevcdsp_epel_neon.S | 343 ++++++++++++++++++++++
>>> libavcodec/aarch64/hevcdsp_init_aarch64.c | 7 +-
>>> 2 files changed, 349 insertions(+), 1 deletion(-)
>>
>>
>>> + st2 {v20.8h, v21.8h}, [x7]
>>> + subs w3, w3, #1 // height
>>> + b.ne 1b
>>> + ret
>>
>> In general, place the loop counter decrement somewhere else than
>> exactly before the branch that depends on the result. E.g. after the
>> initial loads is usually a good place, or between the st1/2
>> instructions and the instructions that calculate the final output
>> values.
>>
>> The same goes probably for all places in all these patches.
>>
>>> @@ -283,13 +287,14 @@ av_cold void
>>> ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
>>> NEON8_FNASSIGN_PARTIAL_4(c->put_hevc_qpel_uni_w, 1, 0,
>>> qpel_uni_w_v,);
>>>
>>> if (have_i8mm(cpu_flags)) {
>>> + NEON8_FNASSIGN(c->put_hevc_epel, 0, 1, epel_h, _i8mm);
>>> NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 1,
>>> epel_uni_w_h ,_i8mm);
>>> NEON8_FNASSIGN(c->put_hevc_qpel, 0, 1, qpel_h, _i8mm);
>>> NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 1,
>>> qpel_uni_w_h, _i8mm);
>>> NEON8_FNASSIGN_PARTIAL_5(c->put_hevc_qpel_uni_w, 1, 1,
>>> qpel_uni_w_hv, _i8mm);
>>> }
>>> -
>>> }
>>> +
>>> if (bit_depth == 10) {
>>
>> Here are some stray unrelated whitespace changes.
>>
>> Other than that, this patch looks mostly reasonable.
>>
>> // Martin
>>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
[-- Attachment #2: 0004-lavc-aarch64-new-optimization-for-8-bit-hevc_epel_h.patch --]
[-- Type: text/plain, Size: 16093 bytes --]
From e5432a25ce05cb9c47e8bcd345d1ab0c1133c82b Mon Sep 17 00:00:00 2001
From: Logan Lyu <Logan.Lyu@myais.com.cn>
Date: Sun, 28 May 2023 10:30:28 +0800
Subject: [PATCH 4/5] lavc/aarch64: new optimization for 8-bit hevc_epel_h
Signed-off-by: Logan Lyu <Logan.Lyu@myais.com.cn>
---
libavcodec/aarch64/hevcdsp_epel_neon.S | 343 ++++++++++++++++++++++
libavcodec/aarch64/hevcdsp_init_aarch64.c | 5 +
2 files changed, 348 insertions(+)
diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S b/libavcodec/aarch64/hevcdsp_epel_neon.S
index ca37ce1786..8b6f396a0b 100644
--- a/libavcodec/aarch64/hevcdsp_epel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -33,6 +33,349 @@ const epel_filters, align=4
endconst
#if HAVE_I8MM
+
+.macro EPEL_H_HEADER
+ movrel x5, epel_filters
+ add x5, x5, x4, lsl #2
+ ld1r {v30.4s}, [x5]
+ sub x1, x1, #1
+ mov x10, #(MAX_PB_SIZE * 2)
+.endm
+
+function ff_hevc_put_hevc_epel_h4_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+1: ld1 {v4.8b}, [x1], x2
+ subs w3, w3, #1 // height
+ ext v5.8b, v4.8b, v4.8b, #1
+ ext v6.8b, v4.8b, v4.8b, #2
+ ext v7.8b, v4.8b, v4.8b, #3
+ trn1 v4.2s, v4.2s, v5.2s
+ trn1 v6.2s, v6.2s, v7.2s
+ trn1 v4.2d, v4.2d, v6.2d
+ movi v16.2d, #0
+ usdot v16.4s, v4.16b, v30.16b
+ xtn v16.4h, v16.4s
+ st1 {v16.4h}, [x0], x10
+ b.ne 1b
+ ret
+endfunc
+
+
+function ff_hevc_put_hevc_epel_h6_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+1: ld1 {v4.16b}, [x1], x2
+ subs w3, w3, #1 // height
+ ext v5.16b, v4.16b, v4.16b, #1
+ ext v6.8b, v4.8b, v4.8b, #2
+ ext v7.8b, v4.8b, v4.8b, #3
+ trn1 v16.2s, v4.2s, v5.2s
+ trn2 v17.2s, v4.2s, v5.2s
+ trn1 v6.2s, v6.2s, v7.2s
+ trn1 v16.2d, v16.2d, v6.2d
+ movi v18.2d, #0
+ movi v19.2d, #0
+ usdot v18.4s, v16.16b, v30.16b
+ usdot v19.2s, v17.8b, v30.8b
+ xtn v18.4h, v18.4s
+ xtn v19.4h, v19.4s
+ str d18, [x0]
+ str s19, [x0, #8]
+ add x0, x0, x10
+ b.ne 1b
+ ret
+endfunc
+
+function ff_hevc_put_hevc_epel_h8_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+1: ld1 {v4.16b}, [x1], x2
+ subs w3, w3, #1 // height
+ ext v5.16b, v4.16b, v4.16b, #1
+ ext v6.16b, v4.16b, v4.16b, #2
+ ext v7.16b, v4.16b, v4.16b, #3
+ zip1 v20.4s, v4.4s, v6.4s
+ zip1 v21.4s, v5.4s, v7.4s
+ movi v16.2d, #0
+ movi v17.2d, #0
+ usdot v16.4s, v20.16b, v30.16b
+ usdot v17.4s, v21.16b, v30.16b
+ xtn v16.4h, v16.4s
+ xtn v17.4h, v17.4s
+ st2 {v16.4h, v17.4h}, [x0], x10
+ b.ne 1b
+ ret
+endfunc
+
+function ff_hevc_put_hevc_epel_h12_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+1: ld1 {v4.16b}, [x1], x2
+ subs w3, w3, #1 // height
+ ext v5.16b, v4.16b, v4.16b, #1
+ ext v6.16b, v4.16b, v4.16b, #2
+ ext v7.16b, v4.16b, v4.16b, #3
+ trn1 v20.2d, v4.2d, v6.2d
+ trn2 v22.2d, v4.2d, v6.2d
+ trn1 v21.2d, v5.2d, v7.2d
+ trn2 v23.2d, v5.2d, v7.2d
+ trn1 v4.4s, v20.4s, v21.4s
+ trn2 v5.4s, v20.4s, v21.4s
+ trn1 v6.4s, v22.4s, v23.4s
+ movi v16.2d, #0
+ movi v17.2d, #0
+ movi v18.2d, #0
+ usdot v16.4s, v4.16b, v30.16b
+ usdot v17.4s, v5.16b, v30.16b
+ usdot v18.4s, v6.16b, v30.16b
+ xtn v16.4h, v16.4s
+ xtn2 v16.8h, v17.4s
+ xtn v18.4h, v18.4s
+ str q16, [x0]
+ str d18, [x0, #16]
+ add x0, x0, x10
+ b.ne 1b
+ ret
+endfunc
+
+function ff_hevc_put_hevc_epel_h16_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+1: ld1 {v0.16b, v1.16b}, [x1], x2
+ subs w3, w3, #1 // height
+ ext v5.16b, v0.16b, v1.16b, #1
+ ext v6.16b, v0.16b, v1.16b, #2
+ ext v7.16b, v0.16b, v1.16b, #3
+ zip1 v20.4s, v0.4s, v6.4s
+ zip2 v22.4s, v0.4s, v6.4s
+ zip1 v21.4s, v5.4s, v7.4s
+ zip2 v23.4s, v5.4s, v7.4s
+ movi v16.2d, #0
+ movi v17.2d, #0
+ movi v18.2d, #0
+ movi v19.2d, #0
+ usdot v16.4s, v20.16b, v30.16b
+ usdot v17.4s, v21.16b, v30.16b
+ usdot v18.4s, v22.16b, v30.16b
+ usdot v19.4s, v23.16b, v30.16b
+ xtn v16.4h, v16.4s
+ xtn2 v16.8h, v18.4s
+ xtn v17.4h, v17.4s
+ xtn2 v17.8h, v19.4s
+ st2 {v16.8h, v17.8h}, [x0], x10
+ b.ne 1b
+ ret
+endfunc
+
+function ff_hevc_put_hevc_epel_h24_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+1: ld1 {v0.16b, v1.16b}, [x1], x2
+ subs w3, w3, #1 // height
+ ext v5.16b, v0.16b, v1.16b, #1
+ ext v6.16b, v0.16b, v1.16b, #2
+ ext v7.16b, v0.16b, v1.16b, #3
+ ext v26.16b, v1.16b, v1.16b, #1
+ ext v27.16b, v1.16b, v1.16b, #2
+ ext v28.16b, v1.16b, v1.16b, #3
+ movi v16.2d, #0
+ movi v17.2d, #0
+ movi v18.2d, #0
+ movi v19.2d, #0
+ movi v20.2d, #0
+ movi v21.2d, #0
+ movi v22.2d, #0
+ movi v23.2d, #0
+ usdot v16.4s, v0.16b, v30.16b
+ usdot v17.4s, v5.16b, v30.16b
+ usdot v18.4s, v6.16b, v30.16b
+ usdot v19.4s, v7.16b, v30.16b
+ usdot v20.4s, v1.16b, v30.16b
+ usdot v21.4s, v26.16b, v30.16b
+ usdot v22.4s, v27.16b, v30.16b
+ usdot v23.4s, v28.16b, v30.16b
+ xtn v16.4h, v16.4s
+ xtn2 v16.8h, v20.4s
+ xtn v17.4h, v17.4s
+ xtn2 v17.8h, v21.4s
+ xtn v18.4h, v18.4s
+ xtn2 v18.8h, v22.4s
+ xtn v19.4h, v19.4s
+ xtn2 v19.8h, v23.4s
+ zip1 v20.8h, v16.8h, v18.8h
+ zip1 v21.8h, v17.8h, v19.8h
+ zip2 v22.8h, v16.8h, v18.8h
+ zip2 v23.8h, v17.8h, v19.8h
+ zip1 v22.8h, v22.8h, v23.8h
+ add x7, x0, #32
+ st2 {v20.8h, v21.8h}, [x0], x10
+ st1 {v22.8h}, [x7]
+ b.ne 1b
+ ret
+endfunc
+
+function ff_hevc_put_hevc_epel_h32_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+1: ld1 {v0.16b, v1.16b, v2.16b}, [x1], x2
+ subs w3, w3, #1 // height
+ ext v5.16b, v0.16b, v1.16b, #1
+ ext v6.16b, v0.16b, v1.16b, #2
+ ext v7.16b, v0.16b, v1.16b, #3
+ ext v26.16b, v1.16b, v2.16b, #1
+ ext v27.16b, v1.16b, v2.16b, #2
+ ext v28.16b, v1.16b, v2.16b, #3
+ movi v16.2d, #0
+ movi v17.2d, #0
+ movi v18.2d, #0
+ movi v19.2d, #0
+ movi v20.2d, #0
+ movi v21.2d, #0
+ movi v22.2d, #0
+ movi v23.2d, #0
+ usdot v16.4s, v0.16b, v30.16b
+ usdot v17.4s, v5.16b, v30.16b
+ usdot v18.4s, v6.16b, v30.16b
+ usdot v19.4s, v7.16b, v30.16b
+ usdot v20.4s, v1.16b, v30.16b
+ usdot v21.4s, v26.16b, v30.16b
+ usdot v22.4s, v27.16b, v30.16b
+ usdot v23.4s, v28.16b, v30.16b
+ xtn v16.4h, v16.4s
+ xtn2 v16.8h, v20.4s
+ xtn v17.4h, v17.4s
+ xtn2 v17.8h, v21.4s
+ xtn v18.4h, v18.4s
+ xtn2 v18.8h, v22.4s
+ xtn v19.4h, v19.4s
+ xtn2 v19.8h, v23.4s
+ st4 {v16.8h, v17.8h, v18.8h, v19.8h}, [x0], x10
+ b.ne 1b
+ ret
+endfunc
+
+function ff_hevc_put_hevc_epel_h48_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+1: ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x1], x2
+ subs w3, w3, #1 // height
+ ext v4.16b, v0.16b, v1.16b, #1
+ ext v5.16b, v0.16b, v1.16b, #2
+ ext v6.16b, v0.16b, v1.16b, #3
+ ext v16.16b, v1.16b, v2.16b, #1
+ ext v17.16b, v1.16b, v2.16b, #2
+ ext v18.16b, v1.16b, v2.16b, #3
+ movi v20.2d, #0
+ movi v21.2d, #0
+ movi v22.2d, #0
+ movi v23.2d, #0
+ usdot v20.4s, v0.16b, v30.16b
+ usdot v21.4s, v4.16b, v30.16b
+ usdot v22.4s, v5.16b, v30.16b
+ usdot v23.4s, v6.16b, v30.16b
+ movi v24.2d, #0
+ movi v25.2d, #0
+ movi v26.2d, #0
+ movi v27.2d, #0
+ usdot v24.4s, v1.16b, v30.16b
+ usdot v25.4s, v16.16b, v30.16b
+ usdot v26.4s, v17.16b, v30.16b
+ usdot v27.4s, v18.16b, v30.16b
+ xtn v20.4h, v20.4s
+ xtn2 v20.8h, v24.4s
+ xtn v21.4h, v21.4s
+ xtn2 v21.8h, v25.4s
+ xtn v22.4h, v22.4s
+ xtn2 v22.8h, v26.4s
+ xtn v23.4h, v23.4s
+ xtn2 v23.8h, v27.4s
+ st4 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], x10
+ ext v4.16b, v2.16b, v3.16b, #1
+ ext v5.16b, v2.16b, v3.16b, #2
+ ext v6.16b, v2.16b, v3.16b, #3
+ movi v20.2d, #0
+ movi v21.2d, #0
+ movi v22.2d, #0
+ movi v23.2d, #0
+ usdot v20.4s, v2.16b, v30.16b
+ usdot v21.4s, v4.16b, v30.16b
+ usdot v22.4s, v5.16b, v30.16b
+ usdot v23.4s, v6.16b, v30.16b
+ xtn v20.4h, v20.4s
+ xtn2 v20.8h, v22.4s
+ xtn v21.4h, v21.4s
+ xtn2 v21.8h, v23.4s
+ add x7, x0, #64
+ st2 {v20.8h, v21.8h}, [x7]
+ b.ne 1b
+ ret
+endfunc
+
+function ff_hevc_put_hevc_epel_h64_8_neon_i8mm, export=1
+ EPEL_H_HEADER
+ sub x2, x2, #64
+1: ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x1], #64
+ subs w3, w3, #1 // height
+ ext v4.16b, v0.16b, v1.16b, #1
+ ext v5.16b, v0.16b, v1.16b, #2
+ ext v6.16b, v0.16b, v1.16b, #3
+ ext v16.16b, v1.16b, v2.16b, #1
+ ext v17.16b, v1.16b, v2.16b, #2
+ ext v18.16b, v1.16b, v2.16b, #3
+ movi v20.2d, #0
+ movi v21.2d, #0
+ movi v22.2d, #0
+ movi v23.2d, #0
+ usdot v20.4s, v0.16b, v30.16b
+ usdot v21.4s, v4.16b, v30.16b
+ usdot v22.4s, v5.16b, v30.16b
+ usdot v23.4s, v6.16b, v30.16b
+ movi v24.2d, #0
+ movi v25.2d, #0
+ movi v26.2d, #0
+ movi v27.2d, #0
+ usdot v24.4s, v1.16b, v30.16b
+ usdot v25.4s, v16.16b, v30.16b
+ usdot v26.4s, v17.16b, v30.16b
+ usdot v27.4s, v18.16b, v30.16b
+ xtn v20.4h, v20.4s
+ xtn2 v20.8h, v24.4s
+ xtn v21.4h, v21.4s
+ xtn2 v21.8h, v25.4s
+ xtn v22.4h, v22.4s
+ xtn2 v22.8h, v26.4s
+ xtn v23.4h, v23.4s
+ xtn2 v23.8h, v27.4s
+ st4 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64
+ ld1 {v7.8b}, [x1], x2
+ ext v4.16b, v2.16b, v3.16b, #1
+ ext v5.16b, v2.16b, v3.16b, #2
+ ext v6.16b, v2.16b, v3.16b, #3
+ ext v16.16b, v3.16b, v7.16b, #1
+ ext v17.16b, v3.16b, v7.16b, #2
+ ext v18.16b, v3.16b, v7.16b, #3
+ movi v20.2d, #0
+ movi v21.2d, #0
+ movi v22.2d, #0
+ movi v23.2d, #0
+ usdot v20.4s, v2.16b, v30.16b
+ usdot v21.4s, v4.16b, v30.16b
+ usdot v22.4s, v5.16b, v30.16b
+ usdot v23.4s, v6.16b, v30.16b
+ movi v24.2d, #0
+ movi v25.2d, #0
+ movi v26.2d, #0
+ movi v27.2d, #0
+ usdot v24.4s, v3.16b, v30.16b
+ usdot v25.4s, v16.16b, v30.16b
+ usdot v26.4s, v17.16b, v30.16b
+ usdot v27.4s, v18.16b, v30.16b
+ xtn v20.4h, v20.4s
+ xtn2 v20.8h, v24.4s
+ xtn v21.4h, v21.4s
+ xtn2 v21.8h, v25.4s
+ xtn v22.4h, v22.4s
+ xtn2 v22.8h, v26.4s
+ xtn v23.4h, v23.4s
+ xtn2 v23.8h, v27.4s
+ st4 {v20.8h, v21.8h, v22.8h, v23.8h}, [x0], #64
+ b.ne 1b
+ ret
+endfunc
+
.macro EPEL_UNI_W_H_HEADER
ldr x12, [sp]
sub x2, x2, #1
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 4a260e1d9a..b448d755b9 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -171,6 +171,10 @@ NEON8_FNPROTO_PARTIAL_4(qpel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
int height, int denom, int wx, int ox,
intptr_t mx, intptr_t my, int width),);
+NEON8_FNPROTO(epel_h, (int16_t *dst,
+ const uint8_t *_src, ptrdiff_t _srcstride,
+ int height, intptr_t mx, intptr_t my, int width), _i8mm);
+
NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
const uint8_t *_src, ptrdiff_t _srcstride,
int height, int denom, int wx, int ox,
@@ -283,6 +287,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
NEON8_FNASSIGN_PARTIAL_4(c->put_hevc_qpel_uni_w, 1, 0, qpel_uni_w_v,);
if (have_i8mm(cpu_flags)) {
+ NEON8_FNASSIGN(c->put_hevc_epel, 0, 1, epel_h, _i8mm);
NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 1, epel_uni_w_h ,_i8mm);
NEON8_FNASSIGN(c->put_hevc_qpel, 0, 1, qpel_h, _i8mm);
NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h, _i8mm);
--
2.38.0.windows.1
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2023-06-18 8:27 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-04 4:17 [FFmpeg-devel] [PATCH 1/5] lavc/aarch64: new optimization for 8-bit hevc_pel_uni_pixels Logan.Lyu
2023-06-04 4:17 ` [FFmpeg-devel] [PATCH 2/5] lavc/aarch64: new optimization for 8-bit hevc_epel_uni_w_h Logan.Lyu
2023-06-12 7:59 ` Martin Storsjö
2023-06-18 8:21 ` Logan.Lyu
2023-06-04 4:17 ` [FFmpeg-devel] [PATCH 3/5] lavc/aarch64: new optimization for 8-bit hevc_epel_uni_w_v Logan.Lyu
2023-06-12 8:09 ` Martin Storsjö
2023-06-12 9:08 ` Martin Storsjö
2023-06-18 8:22 ` Logan.Lyu
2023-07-01 21:21 ` Martin Storsjö
2023-06-04 4:17 ` [FFmpeg-devel] [PATCH 4/5] lavc/aarch64: new optimization for 8-bit hevc_epel_h Logan.Lyu
2023-06-12 8:12 ` Martin Storsjö
2023-06-18 8:23 ` Logan.Lyu
2023-06-18 8:26 ` Logan.Lyu [this message]
2023-06-04 4:17 ` [FFmpeg-devel] [PATCH 5/5] lavc/aarch64: new optimization for 8-bit hevc_epel_uni_w_hv Logan.Lyu
2023-06-12 8:19 ` Martin Storsjö
2023-06-18 8:25 ` Logan.Lyu
2023-07-01 21:28 ` Martin Storsjö
2023-07-13 14:54 ` Logan.Lyu
2023-07-14 9:28 ` Martin Storsjö
2023-06-12 7:47 ` [FFmpeg-devel] [PATCH 1/5] lavc/aarch64: new optimization for 8-bit hevc_pel_uni_pixels Martin Storsjö
2023-06-18 8:29 ` Logan.Lyu
2023-07-01 21:16 ` Martin Storsjö
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aa8a0543-3787-4610-e262-b8189fd381e3@myais.com.cn \
--to=logan.lyu@myais.com.cn \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git