From: "Rémi Denis-Courmont" <remi@remlab.net>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH] lavc/h264dsp: R-V V 8-bit chroma_dc_dequant_idct
Date: Sun, 7 Jul 2024 21:49:26 +0300
Message-ID: <20240707184926.139234-1-remi@remlab.net> (raw)
Performance gains is around 0.5% on K230 cycle counter, and not
measurable with the architecture timer, so probably no worth merging.
---
libavcodec/riscv/h264dsp_init.c | 4 +++
libavcodec/riscv/h264idct_rvv.S | 57 +++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 88afec8df0..8a83e2dee2 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -51,6 +51,7 @@ void ff_h264_idct_add16intra_8_rvv(uint8_t *dst, const int *blockoffset,
void ff_h264_idct8_add4_8_rvv(uint8_t *dst, const int *blockoffset,
int16_t *block, int stride,
const uint8_t nnzc[5 * 8]);
+void ff_h264_luma_dc_dequant_idct_8_rvv(int16_t *out, int16_t *in, int qmul);
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@@ -84,6 +85,9 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv;
dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv;
dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv;
+ if (flags & AV_CPU_FLAG_RVV_I64)
+ dsp->h264_luma_dc_dequant_idct =
+ ff_h264_luma_dc_dequant_idct_8_rvv;
# endif
}
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index 370d162bf2..eeeba0c7eb 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -452,4 +452,61 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x
ret
endfunc
.endr
+
+func ff_h264_luma_dc_dequant_idct_8_rvv, zve64x
+ csrwi vxrm, 0
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vlseg4e16.v v0, (a1)
+ addi sp, sp, -16 * 2
+ vadd.vv v8, v0, v1 # z0
+ vsub.vv v10, v0, v1 # z1
+ addi t1, sp, 1 * 4 * 2
+ vsub.vv v12, v2, v3 # z2
+ addi t2, sp, 2 * 4 * 2
+ vadd.vv v14, v2, v3 # z3
+ vadd.vv v0, v8, v14
+ addi t3, sp, 3 * 4 * 2
+ vsub.vv v1, v8, v14
+ vsub.vv v2, v10, v12
+ vadd.vv v3, v10, v12
+ vsseg4e16.v v0, (sp)
+ vle16.v v0, (sp)
+ vle16.v v2, (t2)
+ vle16.v v1, (t1)
+ # same as 4-point iDCT but without right shifts
+ vadd.vv v8, v0, v2 # z0
+ vle16.v v3, (t3)
+ vsub.vv v10, v0, v2 # z1
+ addi sp, sp, 16 * 2
+ vsub.vv v12, v1, v3 # z2
+ lui t0, 0x50004 # 0x5000_4000
+ vadd.vv v14, v1, v3 # z3
+ vwadd.vv v0, v8, v14
+ slli t0, t0, 16 - 12 # 0x5_0004_0000
+ vwadd.vv v2, v10, v12
+ vwsub.vv v4, v10, v12
+ ori t0, t0, 1 # 0x5_0004_0001
+ vwsub.vv v6, v8, v14
+ vsetvli zero, zero, e32, m1, ta, ma
+ vmul.vx v0, v0, a2
+ slli t0, t0, 1 + 16 + 4 + 1 # 0x00A0_0080_0020_0000
+ vmul.vx v2, v2, a2
+ vmul.vx v4, v4, a2
+ addi t1, a0, 1 * 16 * 2
+ vmul.vx v6, v6, a2
+ vsetvli zero, zero, e64, m2, ta, ma
+ vmv.s.x v12, t0
+ vsetvli zero, zero, e16, mf2, ta, ma
+ vnclip.wi v8, v0, 8
+ addi t2, a0, 4 * 16 * 2
+ vnclip.wi v9, v2, 8
+ vnclip.wi v10, v4, 8
+ addi t3, a0, 5 * 16 * 2
+ vnclip.wi v11, v6, 8
+ vsuxei16.v v8, (a0), v12
+ vsuxei16.v v9, (t1), v12
+ vsuxei16.v v10, (t2), v12
+ vsuxei16.v v11, (t3), v12
+ ret
+endfunc
#endif
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
reply other threads:[~2024-07-07 18:49 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240707184926.139234-1-remi@remlab.net \
--to=remi@remlab.net \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git