Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
@ 2023-11-18  2:06 Logan.Lyu
  2023-11-19 20:42 ` Michael Niedermayer
  2023-12-01 18:09 ` Martin Storsjö
  0 siblings, 2 replies; 8+ messages in thread
From: Logan.Lyu @ 2023-11-18  2:06 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: jdek

[-- Attachment #1: Type: text/plain, Size: 10265 bytes --]

put_hevc_pel_bi_pixels4_8_c: 54.7
put_hevc_pel_bi_pixels4_8_neon: 43.0
put_hevc_pel_bi_pixels6_8_c: 94.7
put_hevc_pel_bi_pixels6_8_neon: 37.0
put_hevc_pel_bi_pixels8_8_c: 171.0
put_hevc_pel_bi_pixels8_8_neon: 24.0
put_hevc_pel_bi_pixels12_8_c: 354.0
put_hevc_pel_bi_pixels12_8_neon: 68.7
put_hevc_pel_bi_pixels16_8_c: 588.2
put_hevc_pel_bi_pixels16_8_neon: 77.5
put_hevc_pel_bi_pixels24_8_c: 1670.7
put_hevc_pel_bi_pixels24_8_neon: 173.0
put_hevc_pel_bi_pixels32_8_c: 2267.7
put_hevc_pel_bi_pixels32_8_neon: 281.2
put_hevc_pel_bi_pixels48_8_c: 5787.5
put_hevc_pel_bi_pixels48_8_neon: 673.5
put_hevc_pel_bi_pixels64_8_c: 9897.0
put_hevc_pel_bi_pixels64_8_neon: 1159.5

Co-Authored-By: J. Dekker <jdek@itanimul.li>
Signed-off-by: Logan Lyu <Logan.Lyu@myais.com.cn>
---
  libavcodec/aarch64/hevcdsp_epel_neon.S    | 179 ++++++++++++++++++++++
  libavcodec/aarch64/hevcdsp_init_aarch64.c |  10 +-
  2 files changed, 187 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S 
b/libavcodec/aarch64/hevcdsp_epel_neon.S
index 708b903b00..74165273d7 100644
--- a/libavcodec/aarch64/hevcdsp_epel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -244,6 +244,185 @@ function ff_hevc_put_hevc_pel_pixels64_8_neon, 
export=1
  endfunc
   +function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.s}[0], [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ld1             {v20.4h}, [x4], x10 // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        st1             {v0.s}[0], [x0], x1
+        subs            w5, w5, #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+        sub             x1, x1, #4
+1:      ld1             {v0.8b}, [x2], x3
+        ushll           v16.8h, v0.8b, #6
+        ld1             {v20.8h}, [x4], x10
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        st1             {v0.s}[0], [x0], #4
+        st1             {v0.h}[2], [x0], x1
+        subs            w5, w5, #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.8b}, [x2], x3    // src
+        ushll           v16.8h, v0.8b, #6
+        ld1             {v20.8h}, [x4], x10  // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        subs            w5, w5, #1
+        st1             {v0.8b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+        sub             x1, x1, #8
+1:      ld1             {v0.16b}, [x2], x3
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ld1             {v20.8h, v21.8h}, [x4], x10
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqadd           v17.8h, v17.8h, v21.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        st1             {v0.8b}, [x0], #8
+        subs            w5, w5, #1
+        st1             {v0.s}[2], [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.16b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ld1             {v20.8h, v21.8h}, [x4], x10  // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqadd           v17.8h, v17.8h, v21.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        subs            w5, w5, #1
+        st1             {v0.16b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.8b-v2.8b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll           v17.8h, v1.8b, #6
+        ushll           v18.8h, v2.8b, #6
+        ld1             {v20.8h-v22.8h}, [x4], x10  // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqadd           v17.8h, v17.8h, v21.8h
+        sqadd           v18.8h, v18.8h, v22.8h
+        sqrshrun        v0.8b, v16.8h, #7
+        sqrshrun        v1.8b, v17.8h, #7
+        sqrshrun        v2.8b, v18.8h, #7
+        subs            w5, w5, #1
+        st1             {v0.8b-v2.8b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.16b-v1.16b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ushll           v18.8h, v1.8b, #6
+        ushll2          v19.8h, v1.16b, #6
+        ld1             {v20.8h-v23.8h}, [x4], x10  // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqadd           v17.8h, v17.8h, v21.8h
+        sqadd           v18.8h, v18.8h, v22.8h
+        sqadd           v19.8h, v19.8h, v23.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        sqrshrun        v1.8b,  v18.8h, #7
+        sqrshrun2       v1.16b, v19.8h, #7
+        st1             {v0.16b-v1.16b}, [x0], x1
+        subs            w5, w5, #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE)
+1:      ld1             {v0.16b-v2.16b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ushll           v18.8h, v1.8b, #6
+        ushll2          v19.8h, v1.16b, #6
+        ushll           v20.8h, v2.8b, #6
+        ushll2          v21.8h, v2.16b, #6
+        ld1             {v24.8h-v27.8h}, [x4], #(MAX_PB_SIZE) // src2
+        sqadd           v16.8h, v16.8h, v24.8h
+        sqadd           v17.8h, v17.8h, v25.8h
+        sqadd           v18.8h, v18.8h, v26.8h
+        sqadd           v19.8h, v19.8h, v27.8h
+        ld1             {v24.8h-v25.8h}, [x4], x10
+        sqadd           v20.8h, v20.8h, v24.8h
+        sqadd           v21.8h, v21.8h, v25.8h
+        sqrshrun        v0.8b, v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        sqrshrun        v1.8b, v18.8h, #7
+        sqrshrun2       v1.16b, v19.8h, #7
+        sqrshrun        v2.8b, v20.8h, #7
+        sqrshrun2       v2.16b, v21.8h, #7
+        subs            w5, w5, #1
+        st1             {v0.16b-v2.16b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels64_8_neon, export=1
+1:      ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ushll           v18.8h, v1.8b, #6
+        ushll2          v19.8h, v1.16b, #6
+        ushll           v20.8h, v2.8b, #6
+        ushll2          v21.8h, v2.16b, #6
+        ushll           v22.8h, v3.8b, #6
+        ushll2          v23.8h, v3.16b, #6
+        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], 
#(MAX_PB_SIZE) // src2
+        sqadd           v16.8h, v16.8h, v24.8h
+        sqadd           v17.8h, v17.8h, v25.8h
+        sqadd           v18.8h, v18.8h, v26.8h
+        sqadd           v19.8h, v19.8h, v27.8h
+        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], 
#(MAX_PB_SIZE)
+        sqadd           v20.8h, v20.8h, v24.8h
+        sqadd           v21.8h, v21.8h, v25.8h
+        sqadd           v22.8h, v22.8h, v26.8h
+        sqadd           v23.8h, v23.8h, v27.8h
+        sqrshrun        v0.8b, v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        sqrshrun        v1.8b, v18.8h, #7
+        sqrshrun2       v1.16b, v19.8h, #7
+        sqrshrun        v2.8b, v20.8h, #7
+        sqrshrun2       v2.16b, v21.8h, #7
+        sqrshrun        v3.8b, v22.8h, #7
+        sqrshrun2       v3.16b, v23.8h, #7
+        st1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+        subs            w5, w5, #1
+        b.ne            1b
+        ret
+endfunc
+
  function ff_hevc_put_hevc_epel_v4_8_neon, export=1
          load_epel_filterb x5, x4
          sub             x1, x1, x2
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index c51488275c..cf171023e7 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -156,8 +156,12 @@ NEON8_FNPROTO(pel_pixels, (int16_t *dst,
          const uint8_t *src, ptrdiff_t srcstride,
          int height, intptr_t mx, intptr_t my, int width),);
  -NEON8_FNPROTO(epel_v, (int16_t *dst,
-        const uint8_t *src, ptrdiff_t srcstride,
+NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride,
+        const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+        int height, intptr_t mx, intptr_t my, int width),);
+
+NEON8_FNPROTO(epel_v, (uint8_t *dst, ptrdiff_t dststride,
+        const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
          int height, intptr_t mx, intptr_t my, int width),);
   NEON8_FNPROTO(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
@@ -324,6 +328,8 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext 
*c, const int bit_depth)
          NEON8_FNASSIGN(c->put_hevc_epel, 1, 0, epel_v,);
          NEON8_FNASSIGN(c->put_hevc_qpel, 0, 0, pel_pixels,);
          NEON8_FNASSIGN(c->put_hevc_qpel, 1, 0, qpel_v,);
+        NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 0, pel_bi_pixels,);
+        NEON8_FNASSIGN(c->put_hevc_qpel_bi, 0, 0, pel_bi_pixels,);
          NEON8_FNASSIGN(c->put_hevc_epel_uni, 0, 0, pel_uni_pixels,);
          NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 0, epel_uni_v,);
          NEON8_FNASSIGN(c->put_hevc_qpel_uni, 0, 0, pel_uni_pixels,);
-- 
2.38.0.windows.1

[-- Attachment #2: 0001-lavc-aarch64-new-optimization-for-8-bit-hevc_pel_bi_pixels.patch --]
[-- Type: text/plain, Size: 10427 bytes --]

From f91409b1e35d955b639160942765455cd6c49ab3 Mon Sep 17 00:00:00 2001
From: Logan Lyu <Logan.Lyu@myais.com.cn>
Date: Sun, 5 Nov 2023 16:33:17 +0800
Subject: [PATCH 1/6] lavc/aarch64: new optimization for 8-bit
 hevc_pel_bi_pixels

put_hevc_pel_bi_pixels4_8_c: 54.7
put_hevc_pel_bi_pixels4_8_neon: 43.0
put_hevc_pel_bi_pixels6_8_c: 94.7
put_hevc_pel_bi_pixels6_8_neon: 37.0
put_hevc_pel_bi_pixels8_8_c: 171.0
put_hevc_pel_bi_pixels8_8_neon: 24.0
put_hevc_pel_bi_pixels12_8_c: 354.0
put_hevc_pel_bi_pixels12_8_neon: 68.7
put_hevc_pel_bi_pixels16_8_c: 588.2
put_hevc_pel_bi_pixels16_8_neon: 77.5
put_hevc_pel_bi_pixels24_8_c: 1670.7
put_hevc_pel_bi_pixels24_8_neon: 173.0
put_hevc_pel_bi_pixels32_8_c: 2267.7
put_hevc_pel_bi_pixels32_8_neon: 281.2
put_hevc_pel_bi_pixels48_8_c: 5787.5
put_hevc_pel_bi_pixels48_8_neon: 673.5
put_hevc_pel_bi_pixels64_8_c: 9897.0
put_hevc_pel_bi_pixels64_8_neon: 1159.5

Co-Authored-By: J. Dekker <jdek@itanimul.li>
---
 libavcodec/aarch64/hevcdsp_epel_neon.S    | 179 ++++++++++++++++++++++
 libavcodec/aarch64/hevcdsp_init_aarch64.c |  10 +-
 2 files changed, 187 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S b/libavcodec/aarch64/hevcdsp_epel_neon.S
index 708b903b00..74165273d7 100644
--- a/libavcodec/aarch64/hevcdsp_epel_neon.S
+++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
@@ -244,6 +244,185 @@ function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
 endfunc
 
 
+function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.s}[0], [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ld1             {v20.4h}, [x4], x10 // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        st1             {v0.s}[0], [x0], x1
+        subs            w5, w5, #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels6_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+        sub             x1, x1, #4
+1:      ld1             {v0.8b}, [x2], x3
+        ushll           v16.8h, v0.8b, #6
+        ld1             {v20.8h}, [x4], x10
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        st1             {v0.s}[0], [x0], #4
+        st1             {v0.h}[2], [x0], x1
+        subs            w5, w5, #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels8_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.8b}, [x2], x3    // src
+        ushll           v16.8h, v0.8b, #6
+        ld1             {v20.8h}, [x4], x10  // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        subs            w5, w5, #1
+        st1             {v0.8b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels12_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+        sub             x1, x1, #8
+1:      ld1             {v0.16b}, [x2], x3
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ld1             {v20.8h, v21.8h}, [x4], x10
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqadd           v17.8h, v17.8h, v21.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        st1             {v0.8b}, [x0], #8
+        subs            w5, w5, #1
+        st1             {v0.s}[2], [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels16_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.16b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ld1             {v20.8h, v21.8h}, [x4], x10  // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqadd           v17.8h, v17.8h, v21.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        subs            w5, w5, #1
+        st1             {v0.16b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels24_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.8b-v2.8b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll           v17.8h, v1.8b, #6
+        ushll           v18.8h, v2.8b, #6
+        ld1             {v20.8h-v22.8h}, [x4], x10  // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqadd           v17.8h, v17.8h, v21.8h
+        sqadd           v18.8h, v18.8h, v22.8h
+        sqrshrun        v0.8b, v16.8h, #7
+        sqrshrun        v1.8b, v17.8h, #7
+        sqrshrun        v2.8b, v18.8h, #7
+        subs            w5, w5, #1
+        st1             {v0.8b-v2.8b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels32_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE * 2)
+1:      ld1             {v0.16b-v1.16b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ushll           v18.8h, v1.8b, #6
+        ushll2          v19.8h, v1.16b, #6
+        ld1             {v20.8h-v23.8h}, [x4], x10  // src2
+        sqadd           v16.8h, v16.8h, v20.8h
+        sqadd           v17.8h, v17.8h, v21.8h
+        sqadd           v18.8h, v18.8h, v22.8h
+        sqadd           v19.8h, v19.8h, v23.8h
+        sqrshrun        v0.8b,  v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        sqrshrun        v1.8b,  v18.8h, #7
+        sqrshrun2       v1.16b, v19.8h, #7
+        st1             {v0.16b-v1.16b}, [x0], x1
+        subs            w5, w5, #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels48_8_neon, export=1
+        mov             x10, #(MAX_PB_SIZE)
+1:      ld1             {v0.16b-v2.16b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ushll           v18.8h, v1.8b, #6
+        ushll2          v19.8h, v1.16b, #6
+        ushll           v20.8h, v2.8b, #6
+        ushll2          v21.8h, v2.16b, #6
+        ld1             {v24.8h-v27.8h}, [x4], #(MAX_PB_SIZE) // src2
+        sqadd           v16.8h, v16.8h, v24.8h
+        sqadd           v17.8h, v17.8h, v25.8h
+        sqadd           v18.8h, v18.8h, v26.8h
+        sqadd           v19.8h, v19.8h, v27.8h
+        ld1             {v24.8h-v25.8h}, [x4], x10
+        sqadd           v20.8h, v20.8h, v24.8h
+        sqadd           v21.8h, v21.8h, v25.8h
+        sqrshrun        v0.8b, v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        sqrshrun        v1.8b, v18.8h, #7
+        sqrshrun2       v1.16b, v19.8h, #7
+        sqrshrun        v2.8b, v20.8h, #7
+        sqrshrun2       v2.16b, v21.8h, #7
+        subs            w5, w5, #1
+        st1             {v0.16b-v2.16b}, [x0], x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_hevc_put_hevc_pel_bi_pixels64_8_neon, export=1
+1:      ld1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x2], x3 // src
+        ushll           v16.8h, v0.8b, #6
+        ushll2          v17.8h, v0.16b, #6
+        ushll           v18.8h, v1.8b, #6
+        ushll2          v19.8h, v1.16b, #6
+        ushll           v20.8h, v2.8b, #6
+        ushll2          v21.8h, v2.16b, #6
+        ushll           v22.8h, v3.8b, #6
+        ushll2          v23.8h, v3.16b, #6
+        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(MAX_PB_SIZE) // src2
+        sqadd           v16.8h, v16.8h, v24.8h
+        sqadd           v17.8h, v17.8h, v25.8h
+        sqadd           v18.8h, v18.8h, v26.8h
+        sqadd           v19.8h, v19.8h, v27.8h
+        ld1             {v24.8h, v25.8h, v26.8h, v27.8h}, [x4], #(MAX_PB_SIZE)
+        sqadd           v20.8h, v20.8h, v24.8h
+        sqadd           v21.8h, v21.8h, v25.8h
+        sqadd           v22.8h, v22.8h, v26.8h
+        sqadd           v23.8h, v23.8h, v27.8h
+        sqrshrun        v0.8b, v16.8h, #7
+        sqrshrun2       v0.16b, v17.8h, #7
+        sqrshrun        v1.8b, v18.8h, #7
+        sqrshrun2       v1.16b, v19.8h, #7
+        sqrshrun        v2.8b, v20.8h, #7
+        sqrshrun2       v2.16b, v21.8h, #7
+        sqrshrun        v3.8b, v22.8h, #7
+        sqrshrun2       v3.16b, v23.8h, #7
+        st1             {v0.16b, v1.16b, v2.16b, v3.16b}, [x0], x1
+        subs            w5, w5, #1
+        b.ne            1b
+        ret
+endfunc
+
 function ff_hevc_put_hevc_epel_v4_8_neon, export=1
         load_epel_filterb x5, x4
         sub             x1, x1, x2
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index c51488275c..cf171023e7 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -156,8 +156,12 @@ NEON8_FNPROTO(pel_pixels, (int16_t *dst,
         const uint8_t *src, ptrdiff_t srcstride,
         int height, intptr_t mx, intptr_t my, int width),);
 
-NEON8_FNPROTO(epel_v, (int16_t *dst,
-        const uint8_t *src, ptrdiff_t srcstride,
+NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride,
+        const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
+        int height, intptr_t mx, intptr_t my, int width),);
+
+NEON8_FNPROTO(epel_v, (uint8_t *dst, ptrdiff_t dststride,
+        const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
         int height, intptr_t mx, intptr_t my, int width),);
 
 NEON8_FNPROTO(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
@@ -324,6 +328,8 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
         NEON8_FNASSIGN(c->put_hevc_epel, 1, 0, epel_v,);
         NEON8_FNASSIGN(c->put_hevc_qpel, 0, 0, pel_pixels,);
         NEON8_FNASSIGN(c->put_hevc_qpel, 1, 0, qpel_v,);
+        NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 0, pel_bi_pixels,);
+        NEON8_FNASSIGN(c->put_hevc_qpel_bi, 0, 0, pel_bi_pixels,);
         NEON8_FNASSIGN(c->put_hevc_epel_uni, 0, 0, pel_uni_pixels,);
         NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 0, epel_uni_v,);
         NEON8_FNASSIGN(c->put_hevc_qpel_uni, 0, 0, pel_uni_pixels,);
-- 
2.38.0.windows.1


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
  2023-11-18  2:06 [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels Logan.Lyu
@ 2023-11-19 20:42 ` Michael Niedermayer
  2023-11-22 12:30   ` Logan.Lyu
  2023-12-01 18:09 ` Martin Storsjö
  1 sibling, 1 reply; 8+ messages in thread
From: Michael Niedermayer @ 2023-11-19 20:42 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 1650 bytes --]

On Sat, Nov 18, 2023 at 10:06:37AM +0800, Logan.Lyu wrote:
> put_hevc_pel_bi_pixels4_8_c: 54.7
> put_hevc_pel_bi_pixels4_8_neon: 43.0
> put_hevc_pel_bi_pixels6_8_c: 94.7
> put_hevc_pel_bi_pixels6_8_neon: 37.0
> put_hevc_pel_bi_pixels8_8_c: 171.0
> put_hevc_pel_bi_pixels8_8_neon: 24.0
> put_hevc_pel_bi_pixels12_8_c: 354.0
> put_hevc_pel_bi_pixels12_8_neon: 68.7
> put_hevc_pel_bi_pixels16_8_c: 588.2
> put_hevc_pel_bi_pixels16_8_neon: 77.5
> put_hevc_pel_bi_pixels24_8_c: 1670.7
> put_hevc_pel_bi_pixels24_8_neon: 173.0
> put_hevc_pel_bi_pixels32_8_c: 2267.7
> put_hevc_pel_bi_pixels32_8_neon: 281.2
> put_hevc_pel_bi_pixels48_8_c: 5787.5
> put_hevc_pel_bi_pixels48_8_neon: 673.5
> put_hevc_pel_bi_pixels64_8_c: 9897.0
> put_hevc_pel_bi_pixels64_8_neon: 1159.5
> 
> Co-Authored-By: J. Dekker <jdek@itanimul.li>
> Signed-off-by: Logan Lyu <Logan.Lyu@myais.com.cn>
> ---
>  libavcodec/aarch64/hevcdsp_epel_neon.S    | 179 ++++++++++++++++++++++
>  libavcodec/aarch64/hevcdsp_init_aarch64.c |  10 +-
>  2 files changed, 187 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S
> b/libavcodec/aarch64/hevcdsp_epel_neon.S
> index 708b903b00..74165273d7 100644
> --- a/libavcodec/aarch64/hevcdsp_epel_neon.S
> +++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
> @@ -244,6 +244,185 @@ function ff_hevc_put_hevc_pel_pixels64_8_neon,
> export=1

error: corrupt patch at line 194

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Good people do not need laws to tell them to act responsibly, while bad
people will find a way around the laws. -- Plato

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
  2023-11-19 20:42 ` Michael Niedermayer
@ 2023-11-22 12:30   ` Logan.Lyu
  2023-11-22 12:36     ` Martin Storsjö via ffmpeg-devel
  0 siblings, 1 reply; 8+ messages in thread
From: Logan.Lyu @ 2023-11-22 12:30 UTC (permalink / raw)
  To: FFmpeg development discussions and patches, Michael Niedermayer

Hi Michael,

Thank you for reply.

I can't reproduce the error you mentioned...

I can apply patches to the ffmpeg master branch normally using 'git 
apply xxx.patch/.eml' or 'git am xxx.patch/.eml'.

By the way, I create these patches by /'git format-patch -s -o "../" 
--add-header "X-Unsent: 1" --suffix .eml --to ffmpeg-devel@ffmpeg.org -6 
--filename-max-length=100' /to generate .eml file.  And /'git 
format-patch  -4 --filename-max-length=100' /to generate .patch file as 
attachment to prevent encoding format problems in email files. These 
command should be OK, right?

Can you try these patches again? If the error still occurs, please tell 
me how it occurred then I will fixed it.


在 2023/11/20 4:42, Michael Niedermayer 写道:
> On Sat, Nov 18, 2023 at 10:06:37AM +0800, Logan.Lyu wrote:
>> put_hevc_pel_bi_pixels4_8_c: 54.7
>> put_hevc_pel_bi_pixels4_8_neon: 43.0
>> put_hevc_pel_bi_pixels6_8_c: 94.7
>> put_hevc_pel_bi_pixels6_8_neon: 37.0
>> put_hevc_pel_bi_pixels8_8_c: 171.0
>> put_hevc_pel_bi_pixels8_8_neon: 24.0
>> put_hevc_pel_bi_pixels12_8_c: 354.0
>> put_hevc_pel_bi_pixels12_8_neon: 68.7
>> put_hevc_pel_bi_pixels16_8_c: 588.2
>> put_hevc_pel_bi_pixels16_8_neon: 77.5
>> put_hevc_pel_bi_pixels24_8_c: 1670.7
>> put_hevc_pel_bi_pixels24_8_neon: 173.0
>> put_hevc_pel_bi_pixels32_8_c: 2267.7
>> put_hevc_pel_bi_pixels32_8_neon: 281.2
>> put_hevc_pel_bi_pixels48_8_c: 5787.5
>> put_hevc_pel_bi_pixels48_8_neon: 673.5
>> put_hevc_pel_bi_pixels64_8_c: 9897.0
>> put_hevc_pel_bi_pixels64_8_neon: 1159.5
>>
>> Co-Authored-By: J. Dekker<jdek@itanimul.li>
>> Signed-off-by: Logan Lyu<Logan.Lyu@myais.com.cn>
>> ---
>>   libavcodec/aarch64/hevcdsp_epel_neon.S    | 179 ++++++++++++++++++++++
>>   libavcodec/aarch64/hevcdsp_init_aarch64.c |  10 +-
>>   2 files changed, 187 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S
>> b/libavcodec/aarch64/hevcdsp_epel_neon.S
>> index 708b903b00..74165273d7 100644
>> --- a/libavcodec/aarch64/hevcdsp_epel_neon.S
>> +++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
>> @@ -244,6 +244,185 @@ function ff_hevc_put_hevc_pel_pixels64_8_neon,
>> export=1
> error: corrupt patch at line 194
>
> [...]
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org  with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
  2023-11-22 12:30   ` Logan.Lyu
@ 2023-11-22 12:36     ` Martin Storsjö via ffmpeg-devel
  2023-11-24 12:19       ` Logan.Lyu
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Storsjö via ffmpeg-devel @ 2023-11-22 12:36 UTC (permalink / raw)
  To: FFmpeg development discussions and patches
  Cc: Martin Storsjö, Michael Niedermayer

On Wed, 22 Nov 2023, Logan.Lyu wrote:

> I can't reproduce the error you mentioned...
>
> I can apply patches to the ffmpeg master branch normally using 'git 
> apply xxx.patch/.eml' or 'git am xxx.patch/.eml'.

I guess you have emails that haven't been through the complete delivery 
chain, while some later party might have rewritten things.

> By the way, I create these patches by /'git format-patch -s -o "../" 
> --add-header "X-Unsent: 1" --suffix .eml --to ffmpeg-devel@ffmpeg.org -6 
> --filename-max-length=100' /to generate .eml file.  And /'git 
> format-patch  -4 --filename-max-length=100' /to generate .patch file as 
> attachment to prevent encoding format problems in email files. These 
> command should be OK, right?

That's right for generating the patch files. I guess the problem here 
isn't about the patch files themselves, but the email delivery of them.

> Can you try these patches again? If the error still occurs, please tell 
> me how it occurred then I will fixed it.

I also tried applying them now with "git am", and I got this:

Applying: lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
error: corrupt patch at line 194
Patch failed at 0001 lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
hint: Use 'git am --show-current-patch=diff' to see the failed patch

Thus, same issue here.

In earlier iterations, the patches have arrived correctly when you have 
sent them as attachments, instead of when sending them as plain inline 
patches with git send-email.

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
  2023-11-22 12:36     ` Martin Storsjö via ffmpeg-devel
@ 2023-11-24 12:19       ` Logan.Lyu
  2023-11-24 12:30         ` Martin Storsjö
  0 siblings, 1 reply; 8+ messages in thread
From: Logan.Lyu @ 2023-11-24 12:19 UTC (permalink / raw)
  To: FFmpeg development discussions and patches
  Cc: Michael Niedermayer, Martin Storsjö

Hi, Martin,

This is indeed a weird mistake... I don't know how to fixed it...

And  can you try those .patch files I attached in every email?  I 
downloaded the attachment from the ffmpeg-devel mailing list I 
subscribed to and applied it, and it seems to work.

If the method mentioned above still doesn't work, should I re-send the 
.eml files one by one?  Please tell me how to deal with it, I will be 
grateful.

Thanks


在 2023/11/22 20:36, Martin Storsjö via ffmpeg-devel 写道:
> On Wed, 22 Nov 2023, Logan.Lyu wrote:
>
>> I can't reproduce the error you mentioned...
>>
>> I can apply patches to the ffmpeg master branch normally using 'git 
>> apply xxx.patch/.eml' or 'git am xxx.patch/.eml'.
>
> I guess you have emails that haven't been through the complete 
> delivery chain, while some later party might have rewritten things.
>
>> By the way, I create these patches by /'git format-patch -s -o "../" 
>> --add-header "X-Unsent: 1" --suffix .eml --to ffmpeg-devel@ffmpeg.org 
>> -6 --filename-max-length=100' /to generate .eml file.  And /'git 
>> format-patch  -4 --filename-max-length=100' /to generate .patch file 
>> as attachment to prevent encoding format problems in email files. 
>> These command should be OK, right?
>
> That's right for generating the patch files. I guess the problem here 
> isn't about the patch files themselves, but the email delivery of them.
>
>> Can you try these patches again? If the error still occurs, please 
>> tell me how it occurred then I will fixed it.
>
> I also tried applying them now with "git am", and I got this:
>
> Applying: lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
> error: corrupt patch at line 194
> Patch failed at 0001 lavc/aarch64: new optimization for 8-bit 
> hevc_pel_bi_pixels
> hint: Use 'git am --show-current-patch=diff' to see the failed patch
>
> Thus, same issue here.
>
> In earlier iterations, the patches have arrived correctly when you 
> have sent them as attachments, instead of when sending them as plain 
> inline patches with git send-email.
>
> // Martin
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
  2023-11-24 12:19       ` Logan.Lyu
@ 2023-11-24 12:30         ` Martin Storsjö
  2023-12-01 19:29           ` Martin Storsjö
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Storsjö @ 2023-11-24 12:30 UTC (permalink / raw)
  To: Logan.Lyu; +Cc: Michael Niedermayer, FFmpeg development discussions and patches

Hi Logan,

On Fri, 24 Nov 2023, Logan.Lyu wrote:

> And  can you try those .patch files I attached in every email?  I downloaded 
> the attachment from the ffmpeg-devel mailing list I subscribed to and applied 
> it, and it seems to work.

Oh, I see - I didn't notice the attached patch. As the patch also was sent 
inline, I only tried to apply the full mail as a patch (which had 
corrupted whitespace somewhere), but the attached patch files did work. 
Now I've successfully applied your patchset, so I can start testing and 
reviewing it when I get time for it. Thanks!

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
  2023-11-18  2:06 [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels Logan.Lyu
  2023-11-19 20:42 ` Michael Niedermayer
@ 2023-12-01 18:09 ` Martin Storsjö
  1 sibling, 0 replies; 8+ messages in thread
From: Martin Storsjö @ 2023-12-01 18:09 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: jdek

On Sat, 18 Nov 2023, Logan.Lyu wrote:

> diff --git a/libavcodec/aarch64/hevcdsp_epel_neon.S 
> b/libavcodec/aarch64/hevcdsp_epel_neon.S
> index 708b903b00..74165273d7 100644
> --- a/libavcodec/aarch64/hevcdsp_epel_neon.S
> +++ b/libavcodec/aarch64/hevcdsp_epel_neon.S
> @@ -244,6 +244,185 @@ function ff_hevc_put_hevc_pel_pixels64_8_neon, export=1
> endfunc
>  +function ff_hevc_put_hevc_pel_bi_pixels4_8_neon, export=1
> +        mov             x10, #(MAX_PB_SIZE * 2)
> +1:      ld1             {v0.s}[0], [x2], x3 // src
> +        ushll           v16.8h, v0.8b, #6
> +        ld1             {v20.4h}, [x4], x10 // src2
> +        sqadd           v16.8h, v16.8h, v20.8h
> +        sqrshrun        v0.8b,  v16.8h, #7
> +        st1             {v0.s}[0], [x0], x1
> +        subs            w5, w5, #1
> +        b.ne            1b

In many of these functions, the "subs" instruction could be scheduled 
better, either after the ld1, or between sqrshrun and st1. It probably 
doesn't matter much, but if you have access to an in-order core, you might 
gain a cycle per iteration here.

> diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
> b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> index c51488275c..cf171023e7 100644
> --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
> +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> @@ -156,8 +156,12 @@ NEON8_FNPROTO(pel_pixels, (int16_t *dst,
>         const uint8_t *src, ptrdiff_t srcstride,
>         int height, intptr_t mx, intptr_t my, int width),);
> -NEON8_FNPROTO(epel_v, (int16_t *dst,
> -        const uint8_t *src, ptrdiff_t srcstride,
> +NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride,
> +        const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
> +        int height, intptr_t mx, intptr_t my, int width),);
> +
> +NEON8_FNPROTO(epel_v, (uint8_t *dst, ptrdiff_t dststride,
> +        const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,

Here, you're breaking the interface of the existing prototypes for epel_v. 
Depending on compiler, this either causes warnings, or with modern Clang, 
errors. Please pay attention to potential warnings in the file you edit, 
when authoring a new patch.

// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels
  2023-11-24 12:30         ` Martin Storsjö
@ 2023-12-01 19:29           ` Martin Storsjö
  0 siblings, 0 replies; 8+ messages in thread
From: Martin Storsjö @ 2023-12-01 19:29 UTC (permalink / raw)
  To: Logan.Lyu; +Cc: Michael Niedermayer, FFmpeg development discussions and patches

On Fri, 24 Nov 2023, Martin Storsjö wrote:

> Hi Logan,
>
> On Fri, 24 Nov 2023, Logan.Lyu wrote:
>
>> And  can you try those .patch files I attached in every email?  I 
>> downloaded the attachment from the ffmpeg-devel mailing list I subscribed 
>> to and applied it, and it seems to work.
>
> Oh, I see - I didn't notice the attached patch. As the patch also was sent 
> inline, I only tried to apply the full mail as a patch (which had corrupted 
> whitespace somewhere), but the attached patch files did work. Now I've 
> successfully applied your patchset, so I can start testing and reviewing it 
> when I get time for it. Thanks!

I had a look at the patchset now, it looked mostly good.

I fixed most of the trivial issues I noticed, and pushed with that.

Please have a look at the comments I made, in particular, the weird

+        mov             x8, #32
+        str             x8, [sp, #-80]!

in patch 5/6 is left untouched as is. Feel free to propose follow-up 
patches to fix that. I also didn't try to tweak the scheduling of e.g. the 
subs instructions.

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-12-01 19:30 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-18  2:06 [FFmpeg-devel] [PATCH 1/6] lavc/aarch64: new optimization for 8-bit hevc_pel_bi_pixels Logan.Lyu
2023-11-19 20:42 ` Michael Niedermayer
2023-11-22 12:30   ` Logan.Lyu
2023-11-22 12:36     ` Martin Storsjö via ffmpeg-devel
2023-11-24 12:19       ` Logan.Lyu
2023-11-24 12:30         ` Martin Storsjö
2023-12-01 19:29           ` Martin Storsjö
2023-12-01 18:09 ` Martin Storsjö

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git