Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: "Martin Storsjö" <martin@martin.st>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: thomas.mundt@hr.de, John Cox <jc@kynesim.co.uk>
Subject: Re: [FFmpeg-devel] [PATCH 11/15] avfilter/vf_bwdif: Add neon for filter_line
Date: Sun, 2 Jul 2023 00:44:10 +0300 (EEST)
Message-ID: <e6ab477b-222d-c044-bdb-3e2ea6fe565@martin.st> (raw)
In-Reply-To: <20230629175729.224383-12-jc@kynesim.co.uk>

On Thu, 29 Jun 2023, John Cox wrote:

> Signed-off-by: John Cox <jc@kynesim.co.uk>
> ---
> libavfilter/aarch64/vf_bwdif_init_aarch64.c |  21 ++
> libavfilter/aarch64/vf_bwdif_neon.S         | 215 ++++++++++++++++++++
> 2 files changed, 236 insertions(+)
>
> diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
> index e75cf2f204..21e67884ab 100644
> --- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c
> +++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
> @@ -31,6 +31,26 @@ void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1,
> void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs,
>                                 int prefs3, int mrefs3, int parity, int clip_max);
>
> +void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1,
> +                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
> +                               int prefs3, int mrefs3, int prefs4, int mrefs4,
> +                               int parity, int clip_max);
> +
> +
> +static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1,
> +                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
> +                               int prefs3, int mrefs3, int prefs4, int mrefs4,
> +                               int parity, int clip_max)
> +{
> +    const int w0 = clip_max != 255 ? 0 : w & ~15;
> +
> +    ff_bwdif_filter_line_neon(dst1, prev1, cur1, next1,
> +                              w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
> +
> +    if (w0 < w)
> +        ff_bwdif_filter_line_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
> +                               w - w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
> +}
>
> static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1,
>                                int w, int prefs, int mrefs, int prefs2, int mrefs2,
> @@ -71,6 +91,7 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
>         return;
>
>     s->filter_intra = filter_intra_helper;
> +    s->filter_line  = filter_line_helper;
>     s->filter_edge  = filter_edge_helper;
> }
>
> diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S
> index a33b235882..675e97d966 100644
> --- a/libavfilter/aarch64/vf_bwdif_neon.S
> +++ b/libavfilter/aarch64/vf_bwdif_neon.S
> @@ -128,6 +128,221 @@ coeffs:
>         .hword          5570, 3801, 1016, -3801         // hf[0] = v0.h[2], -hf[1] = v0.h[5]
>         .hword          5077, 981                       // sp[0] = v0.h[6]
>
> +// ===========================================================================
> +//
> +// void filter_line(
> +//      void *dst1,     // x0
> +//      void *prev1,    // x1
> +//      void *cur1,     // x2
> +//      void *next1,    // x3
> +//      int w,          // w4
> +//      int prefs,      // w5
> +//      int mrefs,      // w6
> +//      int prefs2,     // w7
> +//      int mrefs2,     // [sp, #0]
> +//      int prefs3,     // [sp, #8]
> +//      int mrefs3,     // [sp, #16]
> +//      int prefs4,     // [sp, #24]
> +//      int mrefs4,     // [sp, #32]
> +//      int parity,     // [sp, #40]
> +//      int clip_max)   // [sp, #48]
> +
> +function ff_bwdif_filter_line_neon, export=1
> +        // Sanity check w
> +        cmp             w4, #0
> +        ble             99f
> +
> +        // Rearrange regs to be the same as line3 for ease of debug!
> +        mov             w10, w4                         // w10 = loop count
> +        mov             w9,  w6                         // w9  = mref
> +        mov             w12, w7                         // w12 = pref2
> +        mov             w11, w5                         // w11 = pref
> +        ldr             w8,  [sp, #0]                   // w8 =  mref2
> +        ldr             w7,  [sp, #16]                  // w7  = mref3
> +        ldr             w6,  [sp, #32]                  // w6  = mref4
> +        ldr             w13, [sp, #8]                   // w13 = pref3
> +        ldr             w14, [sp, #24]                  // w14 = pref4

Btw, remember that you can load two arguments from the stack at once with 
ldp, e.g. "ldp x8, x13, [sp, #0]". If they're made intptr_t/ptrdiff_t, you 
won't have an issue with garbage in the upper 32 bits either.



> +
> +        mov             x4,  x3
> +        mov             x3,  x2
> +        mov             x2,  x1
> +
> +// #define prev2 cur
> +//        const uint8_t * restrict next2 = parity ? prev : next;
> +        ldr             w17, [sp, #40]                  // parity
> +        cmp             w17, #0
> +        csel            x17, x2, x4, ne
> +
> +        // We want all the V registers - save all the ones we must
> +        stp             d14, d15, [sp, #-64]!
> +        stp             d8,  d9,  [sp, #48]
> +        stp             d10, d11, [sp, #32]
> +        stp             d12, d13, [sp, #16]

The order looks a bit weird here even if they end up sequential on the 
stack. If you'd fill it from the bottom up, e.g.

stp d8, d9, [sp, #-64]!
stp d10, d11, [sp, #16]
stp d12, d13, [sp, #32]
stp d14, d15, [sp, #48]

they're sequential both in code and on the stack.

// Martin

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  reply	other threads:[~2023-07-01 21:44 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-29 17:57 [FFmpeg-devel] [PATCH 00/15] avfilter/vf_bwdif: Add aarch64 neon functions John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 01/15] avfilter/vf_bwdif: Add outline for aarch " John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 02/15] avfilter/vf_bwdif: Add common macros and consts for aarch64 neon John Cox
2023-07-01 21:35   ` Martin Storsjö
2023-07-02 10:27     ` John Cox
2023-07-02 20:07       ` Martin Storsjö
2023-07-02 21:02         ` Martin Storsjö
2023-07-03  8:31           ` John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 03/15] avfilter/vf_bwdif: Export C filter_intra John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 04/15] avfilter/vf_bwdif: Add neon for filter_intra John Cox
2023-07-01 21:37   ` Martin Storsjö
2023-07-02 10:43     ` John Cox
2023-07-02 20:18       ` Martin Storsjö
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 05/15] tests/checkasm: Add test for vf_bwdif filter_intra John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 06/15] avfilter/vf_bwdif: Add clip and spatial macros for aarch64 neon John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 07/15] avfilter/vf_bwdif: Export C filter_edge John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 08/15] avfilter/vf_bwdif: Add neon for filter_edge John Cox
2023-07-01 21:40   ` Martin Storsjö
2023-07-02 10:50     ` John Cox
2023-07-02 20:36       ` Martin Storsjö
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 09/15] tests/checkasm: Add test for vf_bwdif filter_edge John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 10/15] avfilter/vf_bwdif: Export C filter_line John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 11/15] avfilter/vf_bwdif: Add neon for filter_line John Cox
2023-07-01 21:44   ` Martin Storsjö [this message]
2023-07-02 10:57     ` John Cox
2023-07-02 20:40       ` Martin Storsjö
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 12/15] avfilter/vf_bwdif: Add a filter_line3 method for optimisation John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 13/15] avfilter/vf_bwdif: Add neon for filter_line3 John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 14/15] tests/checkasm: Add test for vf_bwdif filter_line3 John Cox
2023-06-29 17:57 ` [FFmpeg-devel] [PATCH 15/15] avfilter/vf_bwdif: Block filter slices into a multiple of 4 lines John Cox
2023-07-01 21:33 ` [FFmpeg-devel] [PATCH 00/15] avfilter/vf_bwdif: Add aarch64 neon functions Martin Storsjö
2023-07-02 10:18   ` John Cox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e6ab477b-222d-c044-bdb-3e2ea6fe565@martin.st \
    --to=martin@martin.st \
    --cc=ffmpeg-devel@ffmpeg.org \
    --cc=jc@kynesim.co.uk \
    --cc=thomas.mundt@hr.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git