Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] avcodec/aarch64: add NEON optimization for draw_edges
@ 2026-01-22  8:50 chenmingbiao via ffmpeg-devel
  2026-01-22  9:11 ` [FFmpeg-devel] " Martin Storsjö via ffmpeg-devel
  0 siblings, 1 reply; 2+ messages in thread
From: chenmingbiao via ffmpeg-devel @ 2026-01-22  8:50 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: chenmingbiao

From: mbillchan <mbillchan@gmail.com>

Performance improvement for draw_edges function using NEON instructions.
Optimized for edge widths 4, 8, and 16.

Benchmark results (AArch64):
draw_edges_8_1724_4_c:       851.4
draw_edges_8_1724_4_neon:    824.7 (1.03x)
draw_edges_8_1724_16_c:     1620.9
draw_edges_8_1724_16_neon:  1049.9 (1.54x)
draw_edges_1920_4_16_c:      849.0
draw_edges_1920_4_16_neon:   571.1 (1.49x)
---
 libavcodec/aarch64/mpegvideoencdsp_init.c |   3 +
 libavcodec/aarch64/mpegvideoencdsp_neon.S | 157 ++++++++++++++++++++++
 tests/checkasm/mpegvideoencdsp.c          |   5 +
 3 files changed, 165 insertions(+)

diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c
index 0402ac9e82..8363b18f48 100644
--- a/libavcodec/aarch64/mpegvideoencdsp_init.c
+++ b/libavcodec/aarch64/mpegvideoencdsp_init.c
@@ -26,6 +26,8 @@
 
 int ff_pix_sum16_neon(const uint8_t *pix, ptrdiff_t line_size);
 int ff_pix_norm1_neon(const uint8_t *pix, ptrdiff_t line_size);
+void ff_draw_edges_neon(uint8_t *buf, ptrdiff_t wrap, int width, int height,
+                        int w, int h, int sides);
 
 #if HAVE_DOTPROD
 int ff_pix_norm1_neon_dotprod(const uint8_t *pix, ptrdiff_t line_size);
@@ -39,6 +41,7 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
     if (have_neon(cpu_flags)) {
         c->pix_sum   = ff_pix_sum16_neon;
         c->pix_norm1 = ff_pix_norm1_neon;
+        c->draw_edges = ff_draw_edges_neon;
     }
 
 #if HAVE_DOTPROD
diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S
index 8175d8975e..6b9cfc62ea 100644
--- a/libavcodec/aarch64/mpegvideoencdsp_neon.S
+++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S
@@ -91,3 +91,160 @@ endfunc
 
 DISABLE_DOTPROD
 #endif
+
+function ff_draw_edges_neon, export=1
+    mov     x9, x0
+    mov     x10, x1
+    uxtw    x11, w2
+    uxtw    x12, w3
+    uxtw    x13, w4
+    uxtw    x14, w5
+    uxtw    x15, w6
+    mov     x0, x9
+    mov     x1, x12
+
+    cmp     w13, #16
+    b.eq    1f
+    cmp     w13, #8
+    b.eq    2f
+    b       3f
+
+1:
+    ldrb    w2, [x0]
+    dup     v0.16b, w2
+    sub     x3, x0, #16
+    st1     {v0.16b}, [x3]
+
+    add     x3, x0, x11
+    sub     x4, x3, #1
+    ldrb    w2, [x4]
+    dup     v1.16b, w2
+    st1     {v1.16b}, [x3]
+
+    add     x0, x0, x10
+    subs    x1, x1, #1
+    b.ne    1b
+    b       4f
+
+2:
+    ldrb    w2, [x0]
+    dup     v0.8b, w2
+    sub     x3, x0, #8
+    st1     {v0.8b}, [x3]
+
+    add     x3, x0, x11
+    sub     x4, x3, #1
+    ldrb    w2, [x4]
+    dup     v1.8b, w2
+    st1     {v1.8b}, [x3]
+
+    add     x0, x0, x10
+    subs    x1, x1, #1
+    b.ne    2b
+    b       4f
+
+3:
+    ldrb    w2, [x0]
+    dup     v0.8b, w2
+    sub     x3, x0, #4
+    st1     {v0.s}[0], [x3]
+
+    add     x3, x0, x11
+    sub     x4, x3, #1
+    ldrb    w2, [x4]
+    dup     v1.8b, w2
+    st1     {v1.s}[0], [x3]
+
+    add     x0, x0, x10
+    subs    x1, x1, #1
+    b.ne    3b
+
+4:
+    sub     x9, x9, x13
+    tbnz    w15, #0, 5f
+    b       6f
+
+5:
+    add     x2, x11, x13, lsl #1
+    mov     x3, x14
+    mov     x4, x9
+    sub     x5, x9, x10
+
+7:
+    mov     x6, x5
+    mov     x7, x4
+    mov     x8, x2
+
+8:
+    cmp     x8, #64
+    b.lt    17f
+    ld1     {v0.16b, v1.16b, v2.16b, v3.16b}, [x7], #64
+    st1     {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], #64
+    sub     x8, x8, #64
+    b       8b
+
+17:
+    cmp     x8, #16
+    b.lt    9f
+    ld1     {v0.16b}, [x7], #16
+    st1     {v0.16b}, [x6], #16
+    sub     x8, x8, #16
+    b       17b
+9:
+    cbz     x8, 10f
+    ldrb    w16, [x7], #1
+    strb    w16, [x6], #1
+    sub     x8, x8, #1
+    b       9b
+10:
+    sub     x5, x5, x10
+    subs    x3, x3, #1
+    b.ne    7b
+
+6:
+    tbnz    w15, #1, 11f
+    b       12f
+
+11:
+    sub     x3, x12, #1
+    mul     x3, x3, x10
+    add     x4, x9, x3
+    add     x5, x4, x10
+
+    add     x2, x11, x13, lsl #1
+    mov     x3, x14
+
+13:
+    mov     x6, x5
+    mov     x7, x4
+    mov     x8, x2
+
+14:
+    cmp     x8, #64
+    b.lt    18f
+    ld1     {v0.16b, v1.16b, v2.16b, v3.16b}, [x7], #64
+    st1     {v0.16b, v1.16b, v2.16b, v3.16b}, [x6], #64
+    sub     x8, x8, #64
+    b       14b
+
+18:
+    cmp     x8, #16
+    b.lt    15f
+    ld1     {v0.16b}, [x7], #16
+    st1     {v0.16b}, [x6], #16
+    sub     x8, x8, #16
+    b       18b
+15:
+    cbz     x8, 16f
+    ldrb    w16, [x7], #1
+    strb    w16, [x6], #1
+    sub     x8, x8, #1
+    b       15b
+16:
+    add     x5, x5, x10
+    subs    x3, x3, #1
+    b.ne    13b
+
+12:
+    ret
+endfunc
diff --git a/tests/checkasm/mpegvideoencdsp.c b/tests/checkasm/mpegvideoencdsp.c
index 955cd9f5b7..fdaed08e86 100644
--- a/tests/checkasm/mpegvideoencdsp.c
+++ b/tests/checkasm/mpegvideoencdsp.c
@@ -147,8 +147,13 @@ static void check_draw_edges(MpegvideoEncDSPContext *c)
     LOCAL_ALIGNED_16(uint8_t, buf0, [BUFSIZE]);
     LOCAL_ALIGNED_16(uint8_t, buf1, [BUFSIZE]);
 
+#if ARCH_X86
     declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *buf, ptrdiff_t wrap, int width, int height,
                                              int w, int h, int sides);
+#else
+    declare_func(void, uint8_t *buf, ptrdiff_t wrap, int width, int height,
+                                             int w, int h, int sides);
+#endif
 
     for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
         int input_size = input_sizes[isi];
-- 
2.48.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [FFmpeg-devel] Re: [PATCH] avcodec/aarch64: add NEON optimization for draw_edges
  2026-01-22  8:50 [FFmpeg-devel] [PATCH] avcodec/aarch64: add NEON optimization for draw_edges chenmingbiao via ffmpeg-devel
@ 2026-01-22  9:11 ` Martin Storsjö via ffmpeg-devel
  0 siblings, 0 replies; 2+ messages in thread
From: Martin Storsjö via ffmpeg-devel @ 2026-01-22  9:11 UTC (permalink / raw)
  To: chenmingbiao via ffmpeg-devel; +Cc: Martin Storsjö

On Thu, 22 Jan 2026, chenmingbiao via ffmpeg-devel wrote:

> From: mbillchan <mbillchan@gmail.com>
>
> Performance improvement for draw_edges function using NEON instructions.
> Optimized for edge widths 4, 8, and 16.
>
> Benchmark results (AArch64):
> draw_edges_8_1724_4_c:       851.4
> draw_edges_8_1724_4_neon:    824.7 (1.03x)
> draw_edges_8_1724_16_c:     1620.9
> draw_edges_8_1724_16_neon:  1049.9 (1.54x)
> draw_edges_1920_4_16_c:      849.0
> draw_edges_1920_4_16_neon:   571.1 (1.49x)
> ---

Please submit this patch at https://code.ffmpeg.org/FFmpeg/FFmpeg. This 
includes CI checking of the code, and will indicate to you that your 
assembly is inconsistently indented compared with all the existing 
assembly. (You can also check this locally by running 
./tools/check_arm_indent.sh.)

// Martin

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-01-22  9:12 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-22  8:50 [FFmpeg-devel] [PATCH] avcodec/aarch64: add NEON optimization for draw_edges chenmingbiao via ffmpeg-devel
2026-01-22  9:11 ` [FFmpeg-devel] " Martin Storsjö via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git