Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: "Rémi Denis-Courmont" <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH v2 3/9] lavc/vp9dsp: R-V V ipred hor
Date: Tue, 07 May 2024 19:08:56 +0300
Message-ID: <4581169.LvFx2qVVIh@basile.remlab.net> (raw)
In-Reply-To: <tencent_50E8C2D878CE548B2D1886C08ADBE680B009@qq.com>

Le tiistaina 7. toukokuuta 2024, 10.36.07 EEST uk7b@foxmail.com a écrit :
> From: sunyuechi <sunyuechi@iscas.ac.cn>
> 
> C908:
> vp9_hor_8x8_8bpp_c: 74.7
> vp9_hor_8x8_8bpp_rvv_i32: 35.7
> vp9_hor_16x16_8bpp_c: 175.5
> vp9_hor_16x16_8bpp_rvv_i32: 80.2
> vp9_hor_32x32_8bpp_c: 510.2
> vp9_hor_32x32_8bpp_rvv_i32: 264.0
> ---
>  libavcodec/riscv/vp9_intra_rvv.S | 56 ++++++++++++++++++++++++++++++++
>  libavcodec/riscv/vp9dsp.h        |  6 ++++
>  libavcodec/riscv/vp9dsp_init.c   |  3 ++
>  3 files changed, 65 insertions(+)
> 
> diff --git a/libavcodec/riscv/vp9_intra_rvv.S
> b/libavcodec/riscv/vp9_intra_rvv.S index db9774c263..dd9bc036e7 100644
> --- a/libavcodec/riscv/vp9_intra_rvv.S
> +++ b/libavcodec/riscv/vp9_intra_rvv.S
> @@ -113,3 +113,59 @@ func_dc dc_left  8   left 3  0  zve64x
>  func_dc dc_top   32  top  5  1  zve32x
>  func_dc dc_top   16  top  4  1  zve32x
>  func_dc dc_top   8   top  3  0  zve64x
> +
> +func ff_h_32x32_rvv, zve32x
> +        li           t0, 32
> +        addi         a2, a2, 31
> +        vsetvli      zero, t0, e8, m2, ta, ma
> +
> +        .rept 2
> +        .irp n 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
> +        lbu          t1, (a2)
> +        addi         a2, a2, -1
> +        vmv.v.x      v\n, t1
> +        .endr
> +        .irp n 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
> +        vse8.v       v\n, (a0)
> +        add          a0, a0, a1
> +        .endr
> +        .endr

Do you gain much by unrolling all the way to 16x? Given that you have the 
counter value already in t0, it should not make much difference to just unroll 
2x or maybe 4x and then loop.

It might also be faster to use lhu or lwu and shift to reduce scalar loads, at 
least if the vector is suitably aligned.

> +
> +        ret
> +endfunc
> +
> +func ff_h_16x16_rvv, zve32x
> +        addi         a2, a2, 15
> +        vsetivli     zero, 16, e8, m1, ta, ma
> +
> +        .irp n 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
> +        lbu          t1, (a2)
> +        addi         a2, a2, -1
> +        vmv.v.x      v\n, t1
> +        .endr
> +        .irp n 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
> +        vse8.v       v\n, (a0)
> +        add          a0, a0, a1
> +        .endr
> +        vse8.v       v23, (a0)
> +
> +        ret
> +endfunc
> +
> +func ff_h_8x8_rvv, zve32x
> +        addi         a2, a2, 7
> +        vsetivli     zero, 8, e8, mf2, ta, ma
> +
> +        .irp n 8, 9, 10, 11, 12, 13, 14, 15
> +        lbu          t1, (a2)
> +        addi         a2, a2, -1
> +        vmv.v.x      v\n, t1
> +        .endr
> +        .irp n 8, 9, 10, 11, 12, 13, 14
> +        vse8.v       v\n, (a0)
> +        add          a0, a0, a1
> +        .endr
> +        vse8.v       v15, (a0)
> +
> +        ret
> +endfunc
> diff --git a/libavcodec/riscv/vp9dsp.h b/libavcodec/riscv/vp9dsp.h
> index b8ff282f8a..0ad961c7e0 100644
> --- a/libavcodec/riscv/vp9dsp.h
> +++ b/libavcodec/riscv/vp9dsp.h
> @@ -66,6 +66,12 @@ void ff_v_16x16_rvi(uint8_t *dst, ptrdiff_t stride, const
> uint8_t *l, const uint8_t *a);
>  void ff_v_8x8_rvi(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
>                    const uint8_t *a);
> +void ff_h_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_h_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                    const uint8_t *a);
> +void ff_h_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
> +                  const uint8_t *a);
> 
>  #define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx)                      
>   \ void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t
> dststride,   \ diff --git a/libavcodec/riscv/vp9dsp_init.c
> b/libavcodec/riscv/vp9dsp_init.c index c10f8bbe41..7816b13fe0 100644
> --- a/libavcodec/riscv/vp9dsp_init.c
> +++ b/libavcodec/riscv/vp9dsp_init.c
> @@ -59,6 +59,9 @@ static av_cold void
> vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int bpp)
> dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
> dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
> dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv; +           
> dsp->intra_pred[TX_32X32][HOR_PRED] = ff_h_32x32_rvv; +           
> dsp->intra_pred[TX_16X16][HOR_PRED] = ff_h_16x16_rvv; +           
> dsp->intra_pred[TX_8X8][HOR_PRED] = ff_h_8x8_rvv;
>          }
>      #endif
>      #endif


-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  reply	other threads:[~2024-05-07 16:10 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20240507073613.2871668-1-uk7b@foxmail.com>
2024-05-07  7:36 ` [FFmpeg-devel] [PATCH v2 2/9] lavc/vp9dsp: R-V mc copy uk7b
2024-05-07  7:36 ` [FFmpeg-devel] [PATCH v2 3/9] lavc/vp9dsp: R-V V ipred hor uk7b
2024-05-07 16:08   ` Rémi Denis-Courmont [this message]
2024-05-07 18:42     ` flow gg
2024-05-07  7:36 ` [FFmpeg-devel] [PATCH v2 4/9] lavc/vp9dsp: R-V V ipred tm uk7b
2024-05-07  7:36 ` [FFmpeg-devel] [PATCH v2 5/9] lavc/vp9dsp: R-V V mc avg uk7b
2024-05-07  7:36 ` [FFmpeg-devel] [PATCH v2 6/9] lavc/vp9dsp: R-V V mc bilin h v uk7b
2024-05-07  7:36 ` [FFmpeg-devel] [PATCH v2 7/9] lavc/vp9dsp: R-V V mc tap " uk7b
2024-05-07  7:36 ` [FFmpeg-devel] [PATCH v2 8/9] lavc/vp9dsp: R-V V mc bilin hv uk7b
2024-05-07  7:36 ` [FFmpeg-devel] [PATCH v2 9/9] lavc/vp9dsp: R-V V mc tap hv uk7b

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4581169.LvFx2qVVIh@basile.remlab.net \
    --to=remi@remlab.net \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git