From: "Rémi Denis-Courmont" <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Cc: Arnie Chang <arnie.chang@sifive.com>
Subject: Re: [FFmpeg-devel] [PATCH] lavc/h264chroma: RISC-V V add motion compensation for 4xH and 2xH chroma blocks
Date: Wed, 14 Jun 2023 18:57:50 +0300
Message-ID: <3559820.ilfcA4O0Wq@basile.remlab.net> (raw)
In-Reply-To: <20230609071727.524-1-arnie.chang@sifive.com>
Le perjantaina 9. kesäkuuta 2023, 10.17.27 EEST Arnie Chang a écrit :
> Optimize the put and avg filtering for 4xH and 2xH blocks
>
> Signed-off-by: Arnie Chang <arnie.chang@sifive.com>
> ---
> checkasm: using random seed 3475799765
> RVVi32:
> - h264chroma.chroma_mc [OK]
> checkasm: all 6 tests passed
> avg_h264_chroma_mc1_8_c: 1821.5
> avg_h264_chroma_mc1_8_rvv_i32: 466.5
> avg_h264_chroma_mc2_8_c: 939.2
> avg_h264_chroma_mc2_8_rvv_i32: 466.5
> avg_h264_chroma_mc4_8_c: 502.2
> avg_h264_chroma_mc4_8_rvv_i32: 466.5
> put_h264_chroma_mc1_8_c: 1436.5
> put_h264_chroma_mc1_8_rvv_i32: 382.5
> put_h264_chroma_mc2_8_c: 824.2
> put_h264_chroma_mc2_8_rvv_i32: 382.5
> put_h264_chroma_mc4_8_c: 431.2
> put_h264_chroma_mc4_8_rvv_i32: 382.5
>
> libavcodec/riscv/h264_chroma_init_riscv.c | 8 +
> libavcodec/riscv/h264_mc_chroma.S | 216 ++++++++++++++--------
> 2 files changed, 144 insertions(+), 80 deletions(-)
>
> diff --git a/libavcodec/riscv/h264_chroma_init_riscv.c
> b/libavcodec/riscv/h264_chroma_init_riscv.c index 7c905edfcd..9f95150ea3
> 100644
> --- a/libavcodec/riscv/h264_chroma_init_riscv.c
> +++ b/libavcodec/riscv/h264_chroma_init_riscv.c
> @@ -27,6 +27,10 @@
>
> void h264_put_chroma_mc8_rvv(uint8_t *p_dst, const uint8_t *p_src,
> ptrdiff_t stride, int h, int x, int y); void
> h264_avg_chroma_mc8_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t
> stride, int h, int x, int y); +void h264_put_chroma_mc4_rvv(uint8_t *p_dst,
> const uint8_t *p_src, ptrdiff_t stride, int h, int x, int y); +void
> h264_avg_chroma_mc4_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t
> stride, int h, int x, int y); +void h264_put_chroma_mc2_rvv(uint8_t *p_dst,
> const uint8_t *p_src, ptrdiff_t stride, int h, int x, int y); +void
> h264_avg_chroma_mc2_rvv(uint8_t *p_dst, const uint8_t *p_src, ptrdiff_t
> stride, int h, int x, int y);
>
> av_cold void ff_h264chroma_init_riscv(H264ChromaContext *c, int bit_depth)
> {
> @@ -36,6 +40,10 @@ av_cold void ff_h264chroma_init_riscv(H264ChromaContext
> *c, int bit_depth) if (bit_depth == 8 && (flags & AV_CPU_FLAG_RVV_I32) &&
> ff_get_rv_vlenb() >= 16) { c->put_h264_chroma_pixels_tab[0] =
> h264_put_chroma_mc8_rvv; c->avg_h264_chroma_pixels_tab[0] =
> h264_avg_chroma_mc8_rvv; + c->put_h264_chroma_pixels_tab[1] =
> h264_put_chroma_mc4_rvv; + c->avg_h264_chroma_pixels_tab[1] =
> h264_avg_chroma_mc4_rvv; + c->put_h264_chroma_pixels_tab[2] =
> h264_put_chroma_mc2_rvv; + c->avg_h264_chroma_pixels_tab[2] =
> h264_avg_chroma_mc2_rvv; }
> #endif
> }
> diff --git a/libavcodec/riscv/h264_mc_chroma.S
> b/libavcodec/riscv/h264_mc_chroma.S index 364bc3156e..c97cdbad86 100644
> --- a/libavcodec/riscv/h264_mc_chroma.S
> +++ b/libavcodec/riscv/h264_mc_chroma.S
> @@ -19,8 +19,7 @@
> */
> #include "libavutil/riscv/asm.S"
>
> -.macro h264_chroma_mc8 type
> -func h264_\type\()_chroma_mc8_rvv, zve32x
> +.macro do_chroma_mc type width unroll
It looks like \width is only ever used as AVL. You could advantageously pass
it as a run-time argument to an internal function, and spare the instruction
cache, instead of instantiating otherwise identical code thrice.
> csrw vxrm, zero
> slli t2, a5, 3
> mul t1, a5, a4
> @@ -30,94 +29,104 @@ func h264_\type\()_chroma_mc8_rvv, zve32x
> sub a7, a4, t1
> addi a6, a5, 64
> sub t0, t2, t1
> - vsetivli t3, 8, e8, m1, ta, mu
> + vsetivli t3, \width, e8, m1, ta, mu
> beqz t1, 2f
> blez a3, 8f
> li t4, 0
> li t2, 0
> li t5, 1
> addi a5, t3, 1
> + .ifc \unroll,1
> slli t3, a2, 2
> + .else
> + slli t3, a2, 1
> + .endif
Note that all those 5-line conditional shift blocks could be simplified by
folding, e.g.:
slli t3, a2, (1 + \unroll)
Though I wonder if we could leverage SH*ADD instructions in some cases instead
of SLLI?
(..)
> +.endm
> +
> +.macro h264_chroma_mc type width
> +func h264_\type\()_chroma_mc\width\()_rvv, zve32x
> + .ifc \width,8
> + do_chroma_mc \type 8 1
> + .else
> + li a7, 3
> + blt a3, a7, 11f
> + do_chroma_mc \type \width 1
> +11:
> + do_chroma_mc \type \width 0
> + .endif
--
Rémi Denis-Courmont
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2023-06-14 15:58 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-09 7:17 Arnie Chang
2023-06-10 14:55 ` Lynne
2023-06-10 16:43 ` Arnie Chang
2023-06-12 14:59 ` Rémi Denis-Courmont
2023-06-12 15:28 ` Arnie Chang
2023-06-12 15:29 ` Rémi Denis-Courmont
2023-06-14 15:57 ` Rémi Denis-Courmont [this message]
2023-06-15 14:58 ` Arnie Chang
2023-06-15 18:48 ` Rémi Denis-Courmont
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3559820.ilfcA4O0Wq@basile.remlab.net \
--to=remi@remlab.net \
--cc=arnie.chang@sifive.com \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git