[FFmpeg-devel] [PR] avcodec/aarch64: add pngdsp (PR #21394)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* [FFmpeg-devel] [PR] avcodec/aarch64: add pngdsp (PR #21394)
@ 2026-01-06 14:39 Zhao Zhili via ffmpeg-devel
  0 siblings, 0 replies; only message in thread
From: Zhao Zhili via ffmpeg-devel @ 2026-01-06 14:39 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Zhao Zhili

PR #21394 opened by Zhao Zhili (quink)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21394
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21394.patch

```
                                Apple M4 (clang)    Rpi5 A76 (gcc-12)
-----------------------------------------------------------------------
add_bytes_l2_4096_c:         |    87.9 ( 1.00x)   |   2059.9 ( 1.00x)
add_bytes_l2_4096_neon:      |    47.1 ( 1.87x)   |    452.5 ( 4.55x)
add_paeth_prediction_3_c:    |  7904.5 ( 1.00x)   |  43817.5 ( 1.00x)
add_paeth_prediction_3_neon: |  4901.8 ( 1.61x)   |  23283.7 ( 1.88x)
add_paeth_prediction_4_c:    |  7072.8 ( 1.00x)   |  43624.6 ( 1.00x)
add_paeth_prediction_4_neon: |  3788.3 ( 1.87x)   |  17681.6 ( 2.47x)
add_paeth_prediction_6_c:    |  5592.2 ( 1.00x)   |  41674.0 ( 1.00x)
add_paeth_prediction_6_neon: |  2901.2 ( 1.93x)   |  11750.3 ( 3.55x)
add_paeth_prediction_8_c:    |  4064.6 ( 1.00x)   |  42175.8 ( 1.00x)
add_paeth_prediction_8_neon: |  2147.7 ( 1.89x)   |   8827.5 ( 4.78x)
```


>From 34c22baf4caa689a3b6372ddaa86337857c48649 Mon Sep 17 00:00:00 2001
From: Zhao Zhili <zhilizhao@tencent.com>
Date: Sun, 4 Jan 2026 17:22:56 +0800
Subject: [PATCH 1/2] tests/checkasm: add test for png

---
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 ++
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/png.c      | 103 ++++++++++++++++++++++++++++++++++++++
 tests/fate/checkasm.mak   |   1 +
 5 files changed, 109 insertions(+)
 create mode 100644 tests/checkasm/png.c

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 48f358d40d..0f630e2804 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -42,6 +42,7 @@ AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER)   += huffyuvdsp.o
 AVCODECOBJS-$(CONFIG_JPEG2000_DECODER)  += jpeg2000dsp.o
 AVCODECOBJS-$(CONFIG_OPUS_DECODER)      += opusdsp.o
 AVCODECOBJS-$(CONFIG_PIXBLOCKDSP)       += pixblockdsp.o
+AVCODECOBJS-$(CONFIG_PNG_DECODER)       += png.o
 AVCODECOBJS-$(CONFIG_HEVC_DECODER)      += hevc_add_res.o hevc_deblock.o hevc_idct.o hevc_sao.o hevc_pel.o
 AVCODECOBJS-$(CONFIG_RV34DSP)           += rv34dsp.o
 AVCODECOBJS-$(CONFIG_RV40_DECODER)      += rv40dsp.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 7dcdaeb2a4..bfe0bd31be 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -230,6 +230,9 @@ static const struct {
     #if CONFIG_PIXBLOCKDSP
         { "pixblockdsp", checkasm_check_pixblockdsp },
     #endif
+    #if CONFIG_PNG_DECODER
+        { "png", checkasm_check_png },
+    #endif
     #if CONFIG_QPELDSP
         { "qpeldsp", checkasm_check_qpeldsp },
     #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index f288320069..cc45087e41 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -129,6 +129,7 @@ void checkasm_check_mpegvideoencdsp(void);
 void checkasm_check_nlmeans(void);
 void checkasm_check_opusdsp(void);
 void checkasm_check_pixblockdsp(void);
+void checkasm_check_png(void);
 void checkasm_check_qpeldsp(void);
 void checkasm_check_sbrdsp(void);
 void checkasm_check_rv34dsp(void);
diff --git a/tests/checkasm/png.c b/tests/checkasm/png.c
new file mode 100644
index 0000000000..d2070afe42
--- /dev/null
+++ b/tests/checkasm/png.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2026 Zhao Zhili <quinkblack@foxmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "libavutil/mem_internal.h"
+#include "libavcodec/pngdsp.h"
+
+#include "checkasm.h"
+
+#define BUF_SIZE 4096
+
+#define randomize_buf(buf, size)        \
+    do {                                \
+        for (int i = 0; i < size; i++)  \
+            buf[i] = (uint8_t)rnd();    \
+    } while (0)
+
+static void check_add_bytes_l2(const PNGDSPContext *c)
+{
+    LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src, [2], [BUF_SIZE]);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src1, uint8_t *src2, int w);
+
+    randomize_buf(dst0, BUF_SIZE);
+    memcpy(dst1, dst0, BUF_SIZE);
+    randomize_buf(src[0], BUF_SIZE);
+    randomize_buf(src[1], BUF_SIZE);
+
+    const int size[] = {15, 2043, 4096};
+    for (int i = 0; i < FF_ARRAY_ELEMS(size); i++) {
+        if (check_func(c->add_bytes_l2, "add_bytes_l2_%d", size[i])) {
+            call_ref(dst0, src[0], src[1], size[i]);
+            call_new(dst1, src[0], src[1], size[i]);
+            checkasm_check(uint8_t, dst0, BUF_SIZE, dst1, BUF_SIZE, BUF_SIZE, 1, "dst");
+            if (size[i] == BUF_SIZE)
+                bench_new(dst1, src[0], src[1], BUF_SIZE);
+        }
+    }
+}
+
+static void check_add_paeth_prediction(const PNGDSPContext *c)
+{
+    LOCAL_ALIGNED_16(uint8_t, dst0_buf, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst1_buf, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, top_buf, [BUF_SIZE]);
+
+    randomize_buf(dst0_buf, BUF_SIZE);
+    randomize_buf(src, BUF_SIZE);
+    randomize_buf(top_buf, BUF_SIZE);
+
+    declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
+
+    const int bpps[] = {3, 4, 6, 8};
+    for (int i = 0; i < FF_ARRAY_ELEMS(bpps); i++) {
+        int bpp = bpps[i];
+        if (check_func(c->add_paeth_prediction, "add_paeth_prediction_%d", bpp)) {
+            uint8_t *dst0 = &dst0_buf[bpp];
+            uint8_t *dst1 = &dst1_buf[bpp];
+            uint8_t *top = &top_buf[bpp];
+            int w = BUF_SIZE - bpp;
+
+            // dst buffer is both read and written, so dst0 and dst1 must remain the same before test
+            memcpy(dst1_buf, dst0_buf, BUF_SIZE);
+
+            call_ref(dst0, src, top, w, bpp);
+            call_new(dst1, src, top, w, bpp);
+            checkasm_check(uint8_t, dst0_buf, BUF_SIZE, dst1_buf, BUF_SIZE, BUF_SIZE, 1, "dst");
+            bench_new(dst1, src, top, w, bpp);
+        }
+    }
+}
+
+void checkasm_check_png(void)
+{
+    PNGDSPContext c;
+
+    ff_pngdsp_init(&c);
+
+    check_add_bytes_l2(&c);
+    report("add_bytes_l2");
+    check_add_paeth_prediction(&c);
+    report("add_paeth_prediction");
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index b08c1947cd..89e319c9b9 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -44,6 +44,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
                 fate-checkasm-mpegvideoencdsp                           \
                 fate-checkasm-opusdsp                                   \
                 fate-checkasm-pixblockdsp                               \
+                fate-checkasm-png                                       \
                 fate-checkasm-qpeldsp                                   \
                 fate-checkasm-sbrdsp                                    \
                 fate-checkasm-rv34dsp                                   \
-- 
2.49.1


>From 16eecc10ccce1a6ad43bade9975ef686d8008f3c Mon Sep 17 00:00:00 2001
From: Zhao Zhili <zhilizhao@tencent.com>
Date: Mon, 5 Jan 2026 12:24:45 +0800
Subject: [PATCH 2/2] avcodec/aarch64: add pngdsp

                                Apple M4 (clang)    Rpi5 A76 (gcc-12)
-----------------------------------------------------------------------
add_bytes_l2_4096_c:         |    87.9 ( 1.00x)   |   2059.9 ( 1.00x)
add_bytes_l2_4096_neon:      |    47.1 ( 1.87x)   |    452.5 ( 4.55x)
add_paeth_prediction_3_c:    |  7904.5 ( 1.00x)   |  43817.5 ( 1.00x)
add_paeth_prediction_3_neon: |  4901.8 ( 1.61x)   |  23283.7 ( 1.88x)
add_paeth_prediction_4_c:    |  7072.8 ( 1.00x)   |  43624.6 ( 1.00x)
add_paeth_prediction_4_neon: |  3788.3 ( 1.87x)   |  17681.6 ( 2.47x)
add_paeth_prediction_6_c:    |  5592.2 ( 1.00x)   |  41674.0 ( 1.00x)
add_paeth_prediction_6_neon: |  2901.2 ( 1.93x)   |  11750.3 ( 3.55x)
add_paeth_prediction_8_c:    |  4064.6 ( 1.00x)   |  42175.8 ( 1.00x)
add_paeth_prediction_8_neon: |  2147.7 ( 1.89x)   |   8827.5 ( 4.78x)
---
 libavcodec/aarch64/Makefile      |   2 +
 libavcodec/aarch64/pngdsp_init.c |  40 ++++++
 libavcodec/aarch64/pngdsp_neon.S | 206 +++++++++++++++++++++++++++++++
 libavcodec/pngdsp.c              |   4 +-
 libavcodec/pngdsp.h              |   1 +
 5 files changed, 252 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/aarch64/pngdsp_init.c
 create mode 100644 libavcodec/aarch64/pngdsp_neon.S

diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 2bf48dfa28..ce98a4a9c5 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -13,6 +13,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP)             += aarch64/mpegaudiodsp_init.o
 OBJS-$(CONFIG_MPEGVIDEOENCDSP)          += aarch64/mpegvideoencdsp_init.o
 OBJS-$(CONFIG_NEON_CLOBBER_TEST)        += aarch64/neontest.o
 OBJS-$(CONFIG_PIXBLOCKDSP)              += aarch64/pixblockdsp_init_aarch64.o
+OBJS-$(CONFIG_PNG_DECODER)              += aarch64/pngdsp_init.o
 OBJS-$(CONFIG_VIDEODSP)                 += aarch64/videodsp_init.o
 OBJS-$(CONFIG_VP8DSP)                   += aarch64/vp8dsp_init_aarch64.o
 
@@ -56,6 +57,7 @@ NEON-OBJS-$(CONFIG_ME_CMP)              += aarch64/me_cmp_neon.o
 NEON-OBJS-$(CONFIG_MPEGAUDIODSP)        += aarch64/mpegaudiodsp_neon.o
 NEON-OBJS-$(CONFIG_MPEGVIDEOENCDSP)     += aarch64/mpegvideoencdsp_neon.o
 NEON-OBJS-$(CONFIG_PIXBLOCKDSP)         += aarch64/pixblockdsp_neon.o
+NEON-OBJS-$(CONFIG_PNG_DECODER)         += aarch64/pngdsp_neon.o
 NEON-OBJS-$(CONFIG_VC1DSP)              += aarch64/vc1dsp_neon.o
 NEON-OBJS-$(CONFIG_VP8DSP)              += aarch64/vp8dsp_neon.o
 
diff --git a/libavcodec/aarch64/pngdsp_init.c b/libavcodec/aarch64/pngdsp_init.c
new file mode 100644
index 0000000000..5850b09b03
--- /dev/null
+++ b/libavcodec/aarch64/pngdsp_init.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2026 Zhao Zhili <zhilizhao@tencent.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/pngdsp.h"
+
+void ff_add_bytes_l2_neon(uint8_t *dst, uint8_t *src1,
+                          uint8_t *src2, int w);
+void ff_add_png_paeth_prediction_neon(uint8_t *dst, uint8_t *src,
+                                      uint8_t *top, int w, int bpp);
+
+av_cold void ff_pngdsp_init_aarch64(PNGDSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        dsp->add_bytes_l2         = ff_add_bytes_l2_neon;
+        dsp->add_paeth_prediction = ff_add_png_paeth_prediction_neon;
+    }
+}
diff --git a/libavcodec/aarch64/pngdsp_neon.S b/libavcodec/aarch64/pngdsp_neon.S
new file mode 100644
index 0000000000..afccb92ffe
--- /dev/null
+++ b/libavcodec/aarch64/pngdsp_neon.S
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2026 Zhao Zhili <zhilizhao@tencent.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+function ff_add_bytes_l2_neon, export=1
+        bic             w4, w3, #63
+        and             w3, w3, #63
+        cbz             w4, 2f
+1:
+        // 64 bytes per loop iteration
+        ld1             {v0.16b - v3.16b}, [x1], #64
+        ld1             {v4.16b - v7.16b}, [x2], #64
+        subs            w4, w4, #64
+        add             v0.16b, v0.16b, v4.16b
+        add             v1.16b, v1.16b, v5.16b
+        add             v2.16b, v2.16b, v6.16b
+        add             v3.16b, v3.16b, v7.16b
+        st1             {v0.16b - v3.16b}, [x0], #64
+        b.ne            1b
+2:
+        bic             w4, w3, #15
+        and             w3, w3, #15
+        cbz             w4, 4f
+3:
+        // 16 bytes per loop iteration
+        ld1             {v0.16b}, [x1], #16
+        ld1             {v4.16b}, [x2], #16
+        subs            w4, w4, #16
+        add             v0.16b, v0.16b, v4.16b
+        st1             {v0.16b}, [x0], #16
+        b.ne            3b
+4:
+        cbz             w3, 6f
+5:
+        ldrb            w5, [x1], #1
+        ldrb            w6, [x2], #1
+        subs            w3, w3, #1
+        add             w5, w5, w6
+        strb            w5, [x0], #1
+        b.ne            5b
+6:
+        ret
+endfunc
+
+.macro add_png_paeth_prediction, bpp, load=0
+.if \load
+        ldr             q0, [x0, #(-\bpp)]          // a = dst[i - bpp]
+        ldr             q1, [x2, #(-\bpp)]          // c = top[i - bpp]
+        ld1             {v2.16b}, [x2]              // b = top[i]
+        ld1             {v18.16b}, [x1]
+.else
+        mov             v1.16b, v2.16b
+        ext             v2.16b, v2.16b, v2.16b, #\bpp
+        ext             v18.16b, v18.16b, v18.16b, #\bpp
+.endif
+
+        uabd            v4.8b, v2.8b, v1.8b         // pa = abs(b - c)
+        uaddl           v7.8h, v1.8b, v1.8b         // 2 * c
+        uabd            v3.8b, v0.8b, v1.8b         // pb = abs(a - c)
+        uaddl           v5.8h, v0.8b, v2.8b         // a + b
+
+        cmhs            v16.8b, v3.8b, v4.8b        // pb >= pa
+        uabd            v5.8h, v5.8h, v7.8h
+        umin            v6.8b, v4.8b, v3.8b         // min(pa, pb)
+        uqxtn           v5.8b, v5.8h
+
+        bsl             v16.8b, v0.8b, v2.8b        // pb >= pa ? a : b
+        cmhs            v6.8b, v5.8b, v6.8b         // pc >= min(pa, pb)
+        bsl             v6.8b, v16.8b, v1.8b        // pc >= min ? (a or b) : c
+
+        add             v0.8b, v6.8b, v18.8b
+.if \bpp == 3 || \bpp == 4
+        str             s0, [x0], #\bpp
+.else
+        str             d0, [x0], #\bpp
+.endif
+.endm
+
+function ff_add_png_paeth_prediction_neon, export=1
+        cmp             w4, #3
+        b.gt            40f
+30:     // bpp = 3
+        // 15 bytes per loop
+        // overread and write one byte, so (w3 - 1) first
+        sub             w5, w3, #1
+        mov             w7, #15
+        udiv            w5, w5, w7
+        mul             w5, w5, w7
+        sub             w3, w3, w5
+        cbz             w5, 2f
+31:
+        add_png_paeth_prediction 3, load=1
+        subs            w5, w5, w7
+        add             x1, x1, x7
+        add             x2, x2, x7
+.rept   4
+        add_png_paeth_prediction 3
+.endr
+        b.ne            31b
+        b               2f
+
+40:     // bpp = 4
+        cmp             w4, #4
+        b.gt            60f
+        // 16 bytes per loop
+        bic             w5, w3, #15
+        and             w3, w3, #15
+        cbz             w5, 2f
+41:
+        add_png_paeth_prediction 4, load=1
+        subs            w5, w5, #16
+        add             x1, x1, #16
+        add             x2, x2, #16
+.rept   3
+        add_png_paeth_prediction 4
+.endr
+        b.ne            41b
+        b               2f
+
+60:     // bpp = 6
+        cmp             w4, #6
+        b.gt            80f
+        // 12 bytes per loop
+        // overread 4 bytes, overwrite 2 byte, (w3 - 4) first
+        sub             w5, w3, #4
+        mov             w7, #12
+        udiv            w5, w5, w7
+        mul             w5, w5, w7
+        sub             w3, w3, w5
+        cbz             w5, 2f
+61:
+        add_png_paeth_prediction 6, load=1
+        add_png_paeth_prediction 6
+        subs            w5, w5, w7
+        add             x1, x1, x7
+        add             x2, x2, x7
+        b.ne            61b
+        b               2f
+
+80:     // 16 bytes per loop
+        bic             w5, w3, #15
+        and             w3, w3, #15
+        cbz             w5, 2f
+81:
+        add_png_paeth_prediction 8, load=1
+        add_png_paeth_prediction 8
+        subs            w5, w5, #16
+        add             x1, x1, #16
+        add             x2, x2, #16
+        b.ne            81b
+2:
+        neg             w6, w4                      // -bpp
+        cbz             w3, 8f
+3:
+        ldrb            w7, [x0, w6, sxtw]          // a = dst[i - bpp]
+        ldrb            w9, [x2, w6, sxtw]          // c = top[i - bpp]
+        ldrb            w8, [x2], #1                // b = top[i]
+
+        sub             w10, w8, w9                 // p = b - c
+        sub             w11, w7, w9                 // a - c
+
+        cmp             w10, #0
+        cneg            w12, w10, lt                // pa = abs(b -c)
+        cmp             w11, #0
+        add             w14, w10, w11
+        cneg            w13, w11, lt                // pb = abs(a - c)
+        cmp             w14, #0
+        cneg            w14, w14, lt                // pc = abs(a + b - 2*c)
+
+        ldrb            w16, [x1], #1
+
+        cmp             w12, w13
+        b.gt            4f
+        cmp             w12, w14
+        b.gt            4f
+        mov             w15, w7
+        b               5f
+4:
+        cmp             w13, w14
+        csel            w15, w8, w9, le
+5:
+        subs            w3, w3, #1
+        add             w15, w15, w16
+        strb            w15, [x0], #1
+        b.ne            3b
+8:
+        ret
+endfunc
diff --git a/libavcodec/pngdsp.c b/libavcodec/pngdsp.c
index ae40113a51..5c96b9a32c 100644
--- a/libavcodec/pngdsp.c
+++ b/libavcodec/pngdsp.c
@@ -58,7 +58,9 @@ av_cold void ff_pngdsp_init(PNGDSPContext *dsp)
     dsp->add_bytes_l2         = add_bytes_l2_c;
     dsp->add_paeth_prediction = ff_add_png_paeth_prediction;
 
-#if ARCH_X86 && HAVE_X86ASM
+#if ARCH_AARCH64
+    ff_pngdsp_init_aarch64(dsp);
+#elif ARCH_X86 && HAVE_X86ASM
     ff_pngdsp_init_x86(dsp);
 #endif
 }
diff --git a/libavcodec/pngdsp.h b/libavcodec/pngdsp.h
index 5475d0d943..29cf96f22c 100644
--- a/libavcodec/pngdsp.h
+++ b/libavcodec/pngdsp.h
@@ -35,6 +35,7 @@ typedef struct PNGDSPContext {
 } PNGDSPContext;
 
 void ff_pngdsp_init(PNGDSPContext *dsp);
+void ff_pngdsp_init_aarch64(PNGDSPContext *dsp);
 void ff_pngdsp_init_x86(PNGDSPContext *dsp);
 
 #endif /* AVCODEC_PNGDSP_H */
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2026-01-06 14:40 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-06 14:39 [FFmpeg-devel] [PR] avcodec/aarch64: add pngdsp (PR #21394) Zhao Zhili via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git