From: "Rémi Denis-Courmont" <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH 2/5] lavc/h264dsp: move R-V V idct_dc_add
Date: Thu, 18 Jul 2024 22:35:43 +0300
Message-ID: <20240718193546.18939-2-remi@remlab.net> (raw)
In-Reply-To: <20240718193546.18939-1-remi@remlab.net>
From: "J. Dekker" <jdek@itanimul.li>
No functional changes. This just moves the assembler so that it can be
referenced by other functions in h264idct_rvv.S with local jumps.
Edited-by: Rémi Denis-Courmont <remi@remlab.net>
---
libavcodec/riscv/h264dsp_rvv.S | 103 -------------------------------
libavcodec/riscv/h264idct_rvv.S | 105 ++++++++++++++++++++++++++++++++
2 files changed, 105 insertions(+), 103 deletions(-)
diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S
index 5c70709cf2..ed6a16a9c4 100644
--- a/libavcodec/riscv/h264dsp_rvv.S
+++ b/libavcodec/riscv/h264dsp_rvv.S
@@ -1,7 +1,6 @@
/*
* SPDX-License-Identifier: BSD-2-Clause
*
- * Copyright (c) 2024 J. Dekker <jdek@itanimul.li>
* Copyright © 2024 Rémi Denis-Courmont.
*
* Redistribution and use in source and binary forms, with or without
@@ -326,105 +325,3 @@ func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x
vssseg6e8.v v8, (a0), a1
ret
endfunc
-
-.macro idct_dc_add8 width
-func ff_h264_idct\width\()_dc_add_8_rvv, zve64x, zba
-.if \width == 8
- vsetivli zero, \width, e16, m1, ta, ma
-.else
- vsetivli zero, \width, e16, mf2, ta, ma
-.endif
- lh a3, 0(a1)
- addi a3, a3, 32
- srai a3, a3, 6
- sh zero, 0(a1)
-.if \width == 8
- vlse64.v v24, (a0), a2
- vsetvli t0, zero, e16, m8, ta, ma
-.else
- vlse32.v v24, (a0), a2
- vsetvli t0, zero, e16, m4, ta, ma
-.endif
- vzext.vf2 v0, v24
- vadd.vx v0, v0, a3
- vmax.vx v0, v0, zero
-.if \width == 8
- vsetvli zero, zero, e8, m4, ta, ma
-.else
- vsetvli zero, zero, e8, m2, ta, ma
-.endif
- vnclipu.wi v24, v0, 0
- vsetivli zero, \width, e8, m1, ta, ma
-.if \width == 8
- vsse64.v v24, (a0), a2
-.else
- vsse32.v v24, (a0), a2
-.endif
- ret
-endfunc
-.endm
-
-idct_dc_add8 4
-idct_dc_add8 8
-
-.macro idct_dc_add width
-func ff_h264_idct\width\()_dc_add_16_rvv, zve64x, zba
- vsetivli zero, \width, e16, m1, ta, ma
- lw a3, 0(a1)
- addi a3, a3, 32
- srai a3, a3, 6
- sw zero, 0(a1)
- add t4, a0, a2
- sh1add t5, a2, a0
- sh1add t6, a2, t4
-.if \width == 8
- sh2add t0, a2, a0
- sh2add t1, a2, t4
- sh2add t2, a2, t5
- sh2add t3, a2, t6
-.endif
- vle16.v v0, (a0)
- vle16.v v1, (t4)
- vle16.v v2, (t5)
- vle16.v v3, (t6)
-.if \width == 8
- vle16.v v4, (t0)
- vle16.v v5, (t1)
- vle16.v v6, (t2)
- vle16.v v7, (t3)
- vsetvli a6, zero, e16, m8, ta, ma
-.else
- vsetvli a6, zero, e16, m4, ta, ma
-.endif
- vadd.vx v0, v0, a3
- vmax.vx v0, v0, zero
- vmin.vx v0, v0, a5
- vsetivli zero, \width, e16, m1, ta, ma
- vse16.v v0, (a0)
- vse16.v v1, (t4)
- vse16.v v2, (t5)
- vse16.v v3, (t6)
-.if \width == 8
- vse16.v v4, (t0)
- vse16.v v5, (t1)
- vse16.v v6, (t2)
- vse16.v v7, (t3)
-.endif
- ret
-endfunc
-.endm
-
-idct_dc_add 4
-idct_dc_add 8
-
-.irp depth,9,10,12,14
-func ff_h264_idct4_dc_add_\depth\()_rvv, zve64x
- li a5, (1 << \depth) - 1
- j ff_h264_idct4_dc_add_16_rvv
-endfunc
-
-func ff_h264_idct8_dc_add_\depth\()_rvv, zve64x
- li a5, (1 << \depth) - 1
- j ff_h264_idct8_dc_add_16_rvv
-endfunc
-.endr
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index 505f491308..37b27fc92a 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -1,4 +1,7 @@
/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 J. Dekker <jdek@itanimul.li>
* Copyright © 2024 Rémi Denis-Courmont.
*
* Redistribution and use in source and binary forms, with or without
@@ -412,6 +415,108 @@ func ff_h264_idct8_add_\depth\()_rvv, zve32x
endfunc
.endr
+.macro idct_dc_add8 width
+func ff_h264_idct\width\()_dc_add_8_rvv, zve64x, zba
+.if \width == 8
+ vsetivli zero, \width, e16, m1, ta, ma
+.else
+ vsetivli zero, \width, e16, mf2, ta, ma
+.endif
+ lh a3, 0(a1)
+ addi a3, a3, 32
+ srai a3, a3, 6
+ sh zero, 0(a1)
+.if \width == 8
+ vlse64.v v24, (a0), a2
+ vsetvli t0, zero, e16, m8, ta, ma
+.else
+ vlse32.v v24, (a0), a2
+ vsetvli t0, zero, e16, m4, ta, ma
+.endif
+ vzext.vf2 v0, v24
+ vadd.vx v0, v0, a3
+ vmax.vx v0, v0, zero
+.if \width == 8
+ vsetvli zero, zero, e8, m4, ta, ma
+.else
+ vsetvli zero, zero, e8, m2, ta, ma
+.endif
+ vnclipu.wi v24, v0, 0
+ vsetivli zero, \width, e8, m1, ta, ma
+.if \width == 8
+ vsse64.v v24, (a0), a2
+.else
+ vsse32.v v24, (a0), a2
+.endif
+ ret
+endfunc
+.endm
+
+idct_dc_add8 4
+idct_dc_add8 8
+
+.macro idct_dc_add width
+func ff_h264_idct\width\()_dc_add_16_rvv, zve64x, zba
+ vsetivli zero, \width, e16, m1, ta, ma
+ lw a3, 0(a1)
+ addi a3, a3, 32
+ srai a3, a3, 6
+ sw zero, 0(a1)
+ add t4, a0, a2
+ sh1add t5, a2, a0
+ sh1add t6, a2, t4
+.if \width == 8
+ sh2add t0, a2, a0
+ sh2add t1, a2, t4
+ sh2add t2, a2, t5
+ sh2add t3, a2, t6
+.endif
+ vle16.v v0, (a0)
+ vle16.v v1, (t4)
+ vle16.v v2, (t5)
+ vle16.v v3, (t6)
+.if \width == 8
+ vle16.v v4, (t0)
+ vle16.v v5, (t1)
+ vle16.v v6, (t2)
+ vle16.v v7, (t3)
+ vsetvli a6, zero, e16, m8, ta, ma
+.else
+ vsetvli a6, zero, e16, m4, ta, ma
+.endif
+ vadd.vx v0, v0, a3
+ vmax.vx v0, v0, zero
+ vmin.vx v0, v0, a5
+ vsetivli zero, \width, e16, m1, ta, ma
+ vse16.v v0, (a0)
+ vse16.v v1, (t4)
+ vse16.v v2, (t5)
+ vse16.v v3, (t6)
+.if \width == 8
+ vse16.v v4, (t0)
+ vse16.v v5, (t1)
+ vse16.v v6, (t2)
+ vse16.v v7, (t3)
+.endif
+ ret
+endfunc
+.endm
+
+idct_dc_add 4
+idct_dc_add 8
+
+.irp depth,9,10,12,14
+func ff_h264_idct4_dc_add_\depth\()_rvv, zve64x
+ li a5, (1 << \depth) - 1
+ j ff_h264_idct4_dc_add_16_rvv
+endfunc
+
+func ff_h264_idct8_dc_add_\depth\()_rvv, zve64x
+ li a5, (1 << \depth) - 1
+ j ff_h264_idct8_dc_add_16_rvv
+endfunc
+.endr
+
const ff_h264_scan8
.byte 014, 015, 024, 025, 016, 017, 026, 027
.byte 034, 035, 044, 045, 036, 037, 046, 047
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-07-18 19:36 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-18 19:35 [FFmpeg-devel] [PATCH 1/5] lavc/h264dsp: factor some mostly identical R-V V code Rémi Denis-Courmont
2024-07-18 19:35 ` Rémi Denis-Courmont [this message]
2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 3/5] lavc/h264dsp: correct VL and LMUL in idct_dc_add Rémi Denis-Courmont
2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 4/5] lavc/h264dsp: reuse the R-V V IDCT DC add functions Rémi Denis-Courmont
2024-07-18 19:35 ` [FFmpeg-devel] [PATCH 5/5] lavc/h264dsp: reduce spills in R-V V idct_add16 Rémi Denis-Courmont
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240718193546.18939-2-remi@remlab.net \
--to=remi@remlab.net \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git