* [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD
@ 2024-07-22 19:38 Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 2/6] lavu/riscv: add forward-edge CFI landing pads Rémi Denis-Courmont
` (6 more replies)
0 siblings, 7 replies; 8+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 19:38 UTC (permalink / raw)
To: ffmpeg-devel
This instruction, if aligned on a 4-byte boundary, defines a valid target
("landing pad") for an indirect call or jump. Since this instruction is a
HINT, it is safe to assemble even if not included in the target
instruction set architecture.
The necessary alignment is already provided by the `func` macro. However
this still lacks the ELF attribute to indicate that the zicfilp is supported
in simple mode. This is left for future work as the ELF specification is not
ratified as of yet.
This will also nonobviously require the assembler to support zicfilp,
insofar as the `tail` pseudo-instruction shall clobber T2 (instead of T1) as
its temporary register.
---
libavutil/riscv/asm.S | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index 2cf4f7b7ab..37fd7d3b03 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -77,6 +77,12 @@
.endm
.endm
+#if !defined (__riscv_zicfilp)
+ .macro lpad lpl
+ auipc zero, \lpl
+ .endm
+#endif
+
#if !defined (__riscv_zba)
/* SH{1,2,3}ADD definitions for pre-Zba assemblers */
.macro shnadd n, rd, rs1, rs2
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH 2/6] lavu/riscv: add forward-edge CFI landing pads
2024-07-22 19:38 [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD Rémi Denis-Courmont
@ 2024-07-22 19:38 ` Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 3/6] lavc/riscv: " Rémi Denis-Courmont
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 19:38 UTC (permalink / raw)
To: ffmpeg-devel
---
libavutil/riscv/fixed_dsp_rvv.S | 6 ++++++
libavutil/riscv/float_dsp_rvv.S | 12 ++++++++++++
libavutil/riscv/lls_rvv.S | 1 +
3 files changed, 19 insertions(+)
diff --git a/libavutil/riscv/fixed_dsp_rvv.S b/libavutil/riscv/fixed_dsp_rvv.S
index 6bac5813b8..7a872f7763 100644
--- a/libavutil/riscv/fixed_dsp_rvv.S
+++ b/libavutil/riscv/fixed_dsp_rvv.S
@@ -21,6 +21,7 @@
#include "asm.S"
func ff_vector_fmul_window_scaled_rvv, zve64x
+ lpad 0
csrwi vxrm, 0
vsetvli t0, zero, e16, m1, ta, ma
sh2add a2, a4, a2
@@ -69,6 +70,7 @@ func ff_vector_fmul_window_scaled_rvv, zve64x
endfunc
func ff_vector_fmul_window_fixed_rvv, zve64x
+ lpad 0
csrwi vxrm, 0
vsetvli t0, zero, e16, m1, ta, ma
sh2add a2, a4, a2
@@ -113,6 +115,7 @@ func ff_vector_fmul_window_fixed_rvv, zve64x
endfunc
func ff_vector_fmul_fixed_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
1:
vsetvli t0, a3, e32, m4, ta, ma
@@ -156,6 +159,7 @@ func ff_vector_fmul_reverse_fixed_rvv, zve32x
endfunc
func ff_vector_fmul_add_fixed_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
1:
vsetvli t0, a4, e32, m8, ta, ma
@@ -176,6 +180,7 @@ func ff_vector_fmul_add_fixed_rvv, zve32x
endfunc
func ff_scalarproduct_fixed_rvv, zve64x
+ lpad 0
li t1, 1 << 30
vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v8, zero
@@ -199,6 +204,7 @@ endfunc
// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
func ff_butterflies_fixed_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a2, e32, m4, ta, ma
vle32.v v16, (a0)
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index 2f0ade6db6..e738268549 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -22,6 +22,7 @@
// (a0) = (a1) * (a2) [0..a3-1]
func ff_vector_fmul_rvv, zve32f
+ lpad 0
1:
vsetvli t0, a3, e32, m8, ta, ma
vle32.v v16, (a1)
@@ -39,6 +40,7 @@ endfunc
// (a0) += (a1) * fa0 [0..a2-1]
func ff_vector_fmac_scalar_rvv, zve32f
+ lpad 0
NOHWF fmv.w.x fa0, a2
NOHWF mv a2, a3
1:
@@ -58,6 +60,7 @@ endfunc
// (a0) = (a1) * fa0 [0..a2-1]
func ff_vector_fmul_scalar_rvv, zve32f
+ lpad 0
NOHWF fmv.w.x fa0, a2
NOHWF mv a2, a3
1:
@@ -74,6 +77,7 @@ NOHWF mv a2, a3
endfunc
func ff_vector_fmul_window_rvv, zve32f
+ lpad 0
// a0: dst, a1: src0, a2: src1, a3: window, a4: length
// e16/m2 and e32/m4 are possible but slower due to gather.
vsetvli t0, zero, e16, m1, ta, ma
@@ -114,6 +118,7 @@ endfunc
// (a0) = (a1) * (a2) + (a3) [0..a4-1]
func ff_vector_fmul_add_rvv, zve32f
+ lpad 0
1:
vsetvli t0, a4, e32, m8, ta, ma
vle32.v v8, (a1)
@@ -134,6 +139,7 @@ endfunc
// TODO factor vrsub, separate last iteration?
// (a0) = (a1) * reverse(a2) [0..a3-1]
func ff_vector_fmul_reverse_rvv, zve32f
+ lpad 0
// e16/m4 and e32/m8 are possible but slower due to gather.
vsetvli t0, zero, e16, m1, ta, ma
sh2add a2, a3, a2
@@ -160,6 +166,7 @@ endfunc
// (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1]
func ff_butterflies_float_rvv, zve32f
+ lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v16, (a0)
@@ -178,6 +185,7 @@ endfunc
// a0 = (a0).(a1) [0..a2-1]
func ff_scalarproduct_float_rvv, zve32f
+ lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v8, zero
vmv.s.x v0, zero
@@ -200,6 +208,7 @@ endfunc
// (a0) = (a1) * (a2) [0..a3-1]
func ff_vector_dmul_rvv, zve64d
+ lpad 0
1:
vsetvli t0, a3, e64, m8, ta, ma
vle64.v v16, (a1)
@@ -217,6 +226,7 @@ endfunc
// (a0) += (a1) * fa0 [0..a2-1]
func ff_vector_dmac_scalar_rvv, zve64d
+ lpad 0
NOHWD fmv.d.x fa0, a2
NOHWD mv a2, a3
1:
@@ -235,6 +245,7 @@ endfunc
// (a0) = (a1) * fa0 [0..a2-1]
func ff_vector_dmul_scalar_rvv, zve64d
+ lpad 0
NOHWD fmv.d.x fa0, a2
NOHWD mv a2, a3
1:
@@ -251,6 +262,7 @@ NOHWD mv a2, a3
endfunc
func ff_scalarproduct_double_rvv, zve64f
+ lpad 0
vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v8, zero
vmv.s.x v0, zero
diff --git a/libavutil/riscv/lls_rvv.S b/libavutil/riscv/lls_rvv.S
index a36055bd7a..bd9f74ee5f 100644
--- a/libavutil/riscv/lls_rvv.S
+++ b/libavutil/riscv/lls_rvv.S
@@ -21,6 +21,7 @@
#include "asm.S"
func ff_lls_update_covariance_rvv, zve64d, zbb
+ lpad 0
vtype_vli t0, a2, t1, e64, ta, ma
vsetvl zero, a2, t0
vle64.v v8, (a1)
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH 3/6] lavc/riscv: add forward-edge CFI landing pads
2024-07-22 19:38 [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 2/6] lavu/riscv: add forward-edge CFI landing pads Rémi Denis-Courmont
@ 2024-07-22 19:38 ` Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 4/6] lavfi/riscv: " Rémi Denis-Courmont
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 19:38 UTC (permalink / raw)
To: ffmpeg-devel
---
libavcodec/riscv/aacencdsp_rvv.S | 2 ++
libavcodec/riscv/aacpsdsp_rvv.S | 5 +++++
libavcodec/riscv/ac3dsp_rvb.S | 2 ++
libavcodec/riscv/ac3dsp_rvv.S | 4 ++++
libavcodec/riscv/ac3dsp_rvvb.S | 1 +
libavcodec/riscv/alacdsp_rvv.S | 3 +++
libavcodec/riscv/audiodsp_rvf.S | 1 +
libavcodec/riscv/audiodsp_rvv.S | 2 ++
libavcodec/riscv/blockdsp_rvv.S | 4 ++++
libavcodec/riscv/bswapdsp_rvb.S | 1 +
libavcodec/riscv/bswapdsp_rvv.S | 1 +
libavcodec/riscv/exrdsp_rvv.S | 1 +
libavcodec/riscv/flacdsp_rvv.S | 22 ++++++++++++++++++++--
libavcodec/riscv/fmtconvert_rvv.S | 2 ++
libavcodec/riscv/g722dsp_rvv.S | 1 +
libavcodec/riscv/h263dsp_rvv.S | 2 ++
libavcodec/riscv/h264_mc_chroma.S | 8 ++++++++
libavcodec/riscv/h264addpx_rvv.S | 4 ++++
libavcodec/riscv/h264dsp_rvv.S | 5 +++++
libavcodec/riscv/h264idct_rvv.S | 16 ++++++++++++++++
libavcodec/riscv/huffyuvdsp_rvv.S | 2 ++
libavcodec/riscv/idctdsp_rvv.S | 3 +++
libavcodec/riscv/jpeg2000dsp_rvv.S | 2 ++
libavcodec/riscv/llauddsp_rvv.S | 2 ++
libavcodec/riscv/llviddsp_rvv.S | 1 +
libavcodec/riscv/llvidencdsp_rvv.S | 1 +
libavcodec/riscv/lpc_rvv.S | 2 ++
libavcodec/riscv/me_cmp_rvv.S | 17 +++++++++++++++++
libavcodec/riscv/opusdsp_rvv.S | 1 +
libavcodec/riscv/pixblockdsp_rvi.S | 2 ++
libavcodec/riscv/pixblockdsp_rvv.S | 4 ++++
libavcodec/riscv/rv34dsp_rvv.S | 2 ++
libavcodec/riscv/rv40dsp_rvv.S | 4 ++++
libavcodec/riscv/sbrdsp_rvv.S | 13 +++++++++++--
libavcodec/riscv/startcode_rvb.S | 1 +
libavcodec/riscv/startcode_rvv.S | 1 +
libavcodec/riscv/svqenc_rvv.S | 1 +
libavcodec/riscv/takdsp_rvv.S | 4 ++++
libavcodec/riscv/utvideodsp_rvv.S | 2 ++
libavcodec/riscv/vc1dsp_rvi.S | 2 ++
libavcodec/riscv/vc1dsp_rvv.S | 11 +++++++++++
libavcodec/riscv/vorbisdsp_rvv.S | 1 +
libavcodec/riscv/vp7dsp_rvv.S | 3 +++
libavcodec/riscv/vp8dsp_rvi.S | 3 +++
libavcodec/riscv/vp8dsp_rvv.S | 12 ++++++++++++
libavcodec/riscv/vp9_intra_rvi.S | 3 +++
libavcodec/riscv/vp9_intra_rvv.S | 7 +++++++
libavcodec/riscv/vp9_mc_rvi.S | 5 +++++
libavcodec/riscv/vp9_mc_rvv.S | 1 +
49 files changed, 196 insertions(+), 4 deletions(-)
diff --git a/libavcodec/riscv/aacencdsp_rvv.S b/libavcodec/riscv/aacencdsp_rvv.S
index 21e66a77ae..e9e776dc9b 100644
--- a/libavcodec/riscv/aacencdsp_rvv.S
+++ b/libavcodec/riscv/aacencdsp_rvv.S
@@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_abs_pow34_rvv, zve32f
+ lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
sub a2, a2, t0
@@ -39,6 +40,7 @@ func ff_abs_pow34_rvv, zve32f
endfunc
func ff_aac_quant_bands_rvv, zve32f
+ lpad 0
NOHWF fmv.w.x fa0, a6
NOHWF fmv.w.x fa1, a7
fcvt.s.w ft0, a5
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index 2d6858688a..6d01bfb734 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_ps_add_squares_rvv, zve64f
+ lpad 0
li t1, 32
1:
vsetvli t0, a2, e32, m4, ta, ma
@@ -40,6 +41,7 @@ func ff_ps_add_squares_rvv, zve64f
endfunc
func ff_ps_mul_pair_single_rvv, zve32f
+ lpad 0
1:
vsetvli t0, a3, e32, m4, ta, ma
vlseg2e32.v v24, (a1)
@@ -57,6 +59,7 @@ func ff_ps_mul_pair_single_rvv, zve32f
endfunc
func ff_ps_hybrid_analysis_rvv, zve32f
+ lpad 0
/* We need 26 FP registers, for 20 scratch ones. Spill fs0-fs5. */
addi sp, sp, -48
.irp n, 0, 1, 2, 3, 4, 5
@@ -135,6 +138,7 @@ NOHWD flw fs\n, (4 * \n)(sp)
endfunc
func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */
+ lpad 0
slli t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4)
sh2add a1, a2, a1
add a0, a0, t0
@@ -208,6 +212,7 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve64x
endfunc
func ff_ps_stereo_interpolate_rvv, zve32f, zbb
+ lpad 0
vsetvli t0, zero, e32, m2, ta, ma
vid.v v24
flw ft0, (a2)
diff --git a/libavcodec/riscv/ac3dsp_rvb.S b/libavcodec/riscv/ac3dsp_rvb.S
index 0ca56466e1..a3c5187cfe 100644
--- a/libavcodec/riscv/ac3dsp_rvb.S
+++ b/libavcodec/riscv/ac3dsp_rvb.S
@@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_ac3_exponent_min_rvb, zbb
+ lpad 0
beqz a1, 3f
1:
addi a2, a2, -1
@@ -43,6 +44,7 @@ func ff_ac3_exponent_min_rvb, zbb
endfunc
func ff_extract_exponents_rvb, zbb
+ lpad 0
1:
lw t0, (a1)
addi a0, a0, 1
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
index 1b5f67a9ec..0ca1332bf1 100644
--- a/libavcodec/riscv/ac3dsp_rvv.S
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_ac3_exponent_min_rvv, zve32x
+ lpad 0
beqz a1, 3f
1:
vsetvli t2, a2, e8, m8, ta, ma
@@ -44,6 +45,7 @@ func ff_ac3_exponent_min_rvv, zve32x
endfunc
func ff_float_to_fixed24_rvv, zve32f
+ lpad 0
li t1, 1 << 24
fcvt.s.w f0, t1
1:
@@ -62,6 +64,7 @@ endfunc
#if __riscv_xlen >= 64
func ff_sum_square_butterfly_int32_rvv, zve64x
+ lpad 0
vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v0, zero
vmv.v.x v8, zero
@@ -102,6 +105,7 @@ endfunc
#endif
func ff_sum_square_butterfly_float_rvv, zve32f
+ lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero
vmv.v.x v8, zero
diff --git a/libavcodec/riscv/ac3dsp_rvvb.S b/libavcodec/riscv/ac3dsp_rvvb.S
index 64766b56be..031e38affd 100644
--- a/libavcodec/riscv/ac3dsp_rvvb.S
+++ b/libavcodec/riscv/ac3dsp_rvvb.S
@@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_extract_exponents_rvvb, zve32x, zvbb
+ lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a1)
diff --git a/libavcodec/riscv/alacdsp_rvv.S b/libavcodec/riscv/alacdsp_rvv.S
index 8efb04e0c8..2a942bad8c 100644
--- a/libavcodec/riscv/alacdsp_rvv.S
+++ b/libavcodec/riscv/alacdsp_rvv.S
@@ -22,6 +22,7 @@
#if (__riscv_xlen == 64)
func ff_alac_decorrelate_stereo_rvv, zve32x
+ lpad 0
ld a4, 8(a0)
ld a0, 0(a0)
1:
@@ -44,6 +45,7 @@ func ff_alac_decorrelate_stereo_rvv, zve32x
endfunc
func ff_alac_append_extra_bits_mono_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a1, (a1)
1:
@@ -62,6 +64,7 @@ func ff_alac_append_extra_bits_mono_rvv, zve32x
endfunc
func ff_alac_append_extra_bits_stereo_rvv, zve32x
+ lpad 0
ld a6, 8(a0)
ld a0, (a0)
ld a7, 8(a1)
diff --git a/libavcodec/riscv/audiodsp_rvf.S b/libavcodec/riscv/audiodsp_rvf.S
index 2ec8a11691..97aa930ab5 100644
--- a/libavcodec/riscv/audiodsp_rvf.S
+++ b/libavcodec/riscv/audiodsp_rvf.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_vector_clipf_rvf, f
+ lpad 0
NOHWF fmv.w.x fa0, a3
NOHWF fmv.w.x fa1, a4
1:
diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S
index f0b23bab5e..b749e9ed4a 100644
--- a/libavcodec/riscv/audiodsp_rvv.S
+++ b/libavcodec/riscv/audiodsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_scalarproduct_int16_rvv, zve32x
+ lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v8, zero
vmv.s.x v0, zero
@@ -56,6 +57,7 @@ func ff_vector_clip_int32_rvv, zve32x
endfunc
func ff_vector_clipf_rvv, zve32f
+ lpad 0
NOHWF fmv.w.x fa0, a3
NOHWF fmv.w.x fa1, a4
1:
diff --git a/libavcodec/riscv/blockdsp_rvv.S b/libavcodec/riscv/blockdsp_rvv.S
index 18ab17da00..04da265417 100644
--- a/libavcodec/riscv/blockdsp_rvv.S
+++ b/libavcodec/riscv/blockdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_clear_block_rvv, zve64x
+ lpad 0
vsetivli zero, 16, e64, m8, ta, ma
vmv.v.i v0, 0
vse64.v v0, (a0)
@@ -29,6 +30,7 @@ func ff_clear_block_rvv, zve64x
endfunc
func ff_clear_blocks_rvv, zve64x
+ lpad 0
vsetivli zero, 16, e64, m8, ta, ma
vmv.v.i v0, 0
@@ -42,6 +44,7 @@ func ff_clear_blocks_rvv, zve64x
endfunc
func ff_fill_block16_rvv, zve32x
+ lpad 0
vsetivli t0, 16, e8, m1, ta, ma
vmv.v.x v8, a1
1:
@@ -54,6 +57,7 @@ func ff_fill_block16_rvv, zve32x
endfunc
func ff_fill_block8_rvv, zve64x
+ lpad 0
vsetvli t0, zero, e8, m4, ta, ma
vmv.v.x v8, a1
vsetvli t0, a3, e64, m4, ta, ma
diff --git a/libavcodec/riscv/bswapdsp_rvb.S b/libavcodec/riscv/bswapdsp_rvb.S
index 0786bd3f36..82e44ec84c 100644
--- a/libavcodec/riscv/bswapdsp_rvb.S
+++ b/libavcodec/riscv/bswapdsp_rvb.S
@@ -24,6 +24,7 @@
#if (__riscv_xlen >= 64)
func ff_bswap32_buf_rvb, zbb
+ lpad 0
bswap32_rvb a0, a1, a2
endfunc
#endif
diff --git a/libavcodec/riscv/bswapdsp_rvv.S b/libavcodec/riscv/bswapdsp_rvv.S
index b37fe26255..af55820fe4 100644
--- a/libavcodec/riscv/bswapdsp_rvv.S
+++ b/libavcodec/riscv/bswapdsp_rvv.S
@@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_bswap16_buf_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a2, e16, m8, ta, ma
vle16.v v8, (a1)
diff --git a/libavcodec/riscv/exrdsp_rvv.S b/libavcodec/riscv/exrdsp_rvv.S
index f4a35f58ff..f087fce0db 100644
--- a/libavcodec/riscv/exrdsp_rvv.S
+++ b/libavcodec/riscv/exrdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_reorder_pixels_rvv, zve32x
+ lpad 0
srai a2, a2, 1
add t1, a1, a2
1:
diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
index 1724aee9d7..0829042bfa 100644
--- a/libavcodec/riscv/flacdsp_rvv.S
+++ b/libavcodec/riscv/flacdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_flac_lpc16_rvv, zve32x, zbb
+ lpad 0
vtype_vli t0, a2, t2, e32, ta, ma
vsetvl zero, a2, t0
vle32.v v8, (a1)
@@ -46,6 +47,7 @@ endfunc
#if (__riscv_xlen == 64)
func ff_flac_lpc32_rvv, zve64x
+ lpad 0
addi t2, a2, -16
ble t2, zero, ff_flac_lpc32_rvv_simple
vsetivli zero, 1, e64, m1, ta, ma
@@ -77,6 +79,7 @@ func ff_flac_lpc32_rvv, zve64x
endfunc
func ff_flac_lpc32_rvv_simple, zve64x, zbb
+ lpad 0
vtype_vli t3, a2, t1, e64, ta, ma
vntypei t2, t3
vsetvl zero, a2, t3 // e64
@@ -105,6 +108,7 @@ func ff_flac_lpc32_rvv_simple, zve64x, zbb
endfunc
func ff_flac_lpc33_rvv, zve64x, zbb
+ lpad 0
vtype_vli t0, a3, t1, e64, ta, ma
vsetvl zero, a3, t0
vmv.s.x v0, zero
@@ -133,6 +137,7 @@ endfunc
#endif
func ff_flac_wasted32_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a0)
@@ -146,6 +151,7 @@ func ff_flac_wasted32_rvv, zve32x
endfunc
func ff_flac_wasted33_rvv, zve64x
+ lpad 0
srli t0, a2, 5
li t1, 1
bnez t0, 2f
@@ -178,6 +184,7 @@ endfunc
#if (__riscv_xlen == 64)
func ff_flac_decorrelate_indep2_16_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@@ -201,6 +208,7 @@ func ff_flac_decorrelate_indep2_16_rvv, zve32x
endfunc
func ff_flac_decorrelate_indep4_16_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@@ -234,6 +242,7 @@ func ff_flac_decorrelate_indep4_16_rvv, zve32x
endfunc
func ff_flac_decorrelate_indep6_16_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@@ -279,6 +288,7 @@ func ff_flac_decorrelate_indep6_16_rvv, zve32x
endfunc
func ff_flac_decorrelate_indep8_16_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@@ -332,9 +342,8 @@ func ff_flac_decorrelate_indep8_16_rvv, zve32x
ret
endfunc
-
-
func ff_flac_decorrelate_ls_16_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@@ -359,6 +368,7 @@ func ff_flac_decorrelate_ls_16_rvv, zve32x
endfunc
func ff_flac_decorrelate_rs_16_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@@ -383,6 +393,7 @@ func ff_flac_decorrelate_rs_16_rvv, zve32x
endfunc
func ff_flac_decorrelate_ms_16_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@@ -409,6 +420,7 @@ func ff_flac_decorrelate_ms_16_rvv, zve32x
endfunc
func ff_flac_decorrelate_indep2_32_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@@ -429,6 +441,7 @@ func ff_flac_decorrelate_indep2_32_rvv, zve32x
endfunc
func ff_flac_decorrelate_indep4_32_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@@ -458,6 +471,7 @@ func ff_flac_decorrelate_indep4_32_rvv, zve32x
endfunc
func ff_flac_decorrelate_indep6_32_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@@ -496,6 +510,7 @@ func ff_flac_decorrelate_indep6_32_rvv, zve32x
endfunc
func ff_flac_decorrelate_indep8_32_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@@ -541,6 +556,7 @@ func ff_flac_decorrelate_indep8_32_rvv, zve32x
endfunc
func ff_flac_decorrelate_ls_32_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@@ -562,6 +578,7 @@ func ff_flac_decorrelate_ls_32_rvv, zve32x
endfunc
func ff_flac_decorrelate_rs_32_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@@ -583,6 +600,7 @@ func ff_flac_decorrelate_rs_32_rvv, zve32x
endfunc
func ff_flac_decorrelate_ms_32_rvv, zve32x
+ lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S
index d0e2f106d5..c356196c97 100644
--- a/libavcodec/riscv/fmtconvert_rvv.S
+++ b/libavcodec/riscv/fmtconvert_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_int32_to_float_fmul_scalar_rvv, zve32f
+ lpad 0
NOHWF fmv.w.x fa0, a2
NOHWF mv a2, a3
1:
@@ -38,6 +39,7 @@ NOHWF mv a2, a3
endfunc
func ff_int32_to_float_fmul_array8_rvv, zve32f
+ lpad 0
srai a4, a4, 3
1: vsetvli t0, a4, e32, m1, ta, ma
diff --git a/libavcodec/riscv/g722dsp_rvv.S b/libavcodec/riscv/g722dsp_rvv.S
index 981d5cecd8..6ceb70fde1 100644
--- a/libavcodec/riscv/g722dsp_rvv.S
+++ b/libavcodec/riscv/g722dsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_g722_apply_qmf_rvv, zve32x
+ lpad 0
lla t0, qmf_coeffs
vsetivli zero, 12, e16, m2, ta, ma
vlseg2e16.v v28, (a0)
diff --git a/libavcodec/riscv/h263dsp_rvv.S b/libavcodec/riscv/h263dsp_rvv.S
index 97503d527c..c78483926a 100644
--- a/libavcodec/riscv/h263dsp_rvv.S
+++ b/libavcodec/riscv/h263dsp_rvv.S
@@ -23,6 +23,7 @@
.option push
.option norelax
func ff_h263_h_loop_filter_rvv, zve32x
+ lpad 0
addi a0, a0, -2
vsetivli zero, 8, e8, mf2, ta, ma
vlsseg4e8.v v8, (a0), a1
@@ -83,6 +84,7 @@ endfunc
.option pop
func ff_h263_v_loop_filter_rvv, zve32x
+ lpad 0
sub a4, a0, a1
vsetivli zero, 8, e8, mf2, ta, ma
vle8.v v10, (a0)
diff --git a/libavcodec/riscv/h264_mc_chroma.S b/libavcodec/riscv/h264_mc_chroma.S
index ce99bda44d..22ac4ef20b 100644
--- a/libavcodec/riscv/h264_mc_chroma.S
+++ b/libavcodec/riscv/h264_mc_chroma.S
@@ -325,6 +325,7 @@
ret
.endm
+ .variant_cc h264_put_chroma_mc_rvv
func h264_put_chroma_mc_rvv, zve32x
11:
li a7, 3
@@ -334,6 +335,7 @@ func h264_put_chroma_mc_rvv, zve32x
do_chroma_mc put 0
endfunc
+ .variant_cc h264_avg_chroma_mc_rvv
func h264_avg_chroma_mc_rvv, zve32x
21:
li a7, 3
@@ -344,31 +346,37 @@ func h264_avg_chroma_mc_rvv, zve32x
endfunc
func h264_put_chroma_mc8_rvv, zve32x
+ lpad 0
li t6, 8
j 11b
endfunc
func h264_put_chroma_mc4_rvv, zve32x
+ lpad 0
li t6, 4
j 11b
endfunc
func h264_put_chroma_mc2_rvv, zve32x
+ lpad 0
li t6, 2
j 11b
endfunc
func h264_avg_chroma_mc8_rvv, zve32x
+ lpad 0
li t6, 8
j 21b
endfunc
func h264_avg_chroma_mc4_rvv, zve32x
+ lpad 0
li t6, 4
j 21b
endfunc
func h264_avg_chroma_mc2_rvv, zve32x
+ lpad 0
li t6, 2
j 21b
endfunc
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S
index 3c0700d1d9..82739881d9 100644
--- a/libavcodec/riscv/h264addpx_rvv.S
+++ b/libavcodec/riscv/h264addpx_rvv.S
@@ -37,6 +37,7 @@
.endm
func ff_h264_add_pixels4_8_rvv, zve32x
+ lpad 0
vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v8, (a0), a2
vsetivli zero, 4 * 4, e8, m1, ta, ma
@@ -54,6 +55,7 @@ func ff_h264_add_pixels4_8_rvv, zve32x
endfunc
func ff_h264_add_pixels4_16_rvv, zve64x
+ lpad 0
vsetivli zero, 4, e16, mf2, ta, ma
vlse64.v v8, (a0), a2
vsetivli zero, 4 * 4, e16, m2, ta, ma
@@ -71,6 +73,7 @@ func ff_h264_add_pixels4_16_rvv, zve64x
endfunc
func ff_h264_add_pixels8_8_rvv, zve64x
+ lpad 0
li t0, 8 * 8
vsetivli zero, 8, e8, mf2, ta, ma
vlse64.v v8, (a0), a2
@@ -89,6 +92,7 @@ func ff_h264_add_pixels8_8_rvv, zve64x
endfunc
func ff_h264_add_pixels8_16_rvv, zve32x
+ lpad 0
li t0, 8
vsetivli zero, 8, e16, m1, ta, ma
1:
diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S
index ed6a16a9c4..a38bf7ef1d 100644
--- a/libavcodec/riscv/h264dsp_rvv.S
+++ b/libavcodec/riscv/h264dsp_rvv.S
@@ -164,6 +164,7 @@ endfunc
.irp w, 16, 8, 4, 2
func ff_h264_weight_pixels\w\()_8_rvv, zve32x
+ lpad 0
li a6, \w
.if \w == 16
j ff_h264_weight_pixels_simple_8_rvv
@@ -173,6 +174,7 @@ func ff_h264_weight_pixels\w\()_8_rvv, zve32x
endfunc
func ff_h264_biweight_pixels\w\()_8_rvv, zve32x
+ lpad 0
li t6, \w
.if \w == 16
j ff_h264_biweight_pixels_simple_8_rvv
@@ -272,6 +274,7 @@ func ff_h264_loop_filter_luma_8_rvv, zve32x
endfunc
func ff_h264_v_loop_filter_luma_8_rvv, zve32x
+ lpad 0
vsetivli zero, 4, e32, m1, ta, ma
vle8.v v4, (a4)
li t0, 0x01010101
@@ -299,6 +302,7 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x
endfunc
func ff_h264_h_loop_filter_luma_8_rvv, zve32x
+ lpad 0
vsetivli zero, 4, e32, m1, ta, ma
vle8.v v4, (a4)
li t0, 0x01010101
@@ -313,6 +317,7 @@ func ff_h264_h_loop_filter_luma_8_rvv, zve32x
endfunc
func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x
+ lpad 0
vsetivli zero, 4, e16, mf2, ta, ma
vle8.v v4, (a4)
li t0, 0x0101
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index c74ea18c19..f01f7c2a90 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -55,6 +55,7 @@ func ff_h264_idct4_rvv, zve32x
endfunc
func ff_h264_idct_add_8_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
.Lidct_add4_8_rvv:
vsetivli zero, 4, e16, mf2, ta, ma
@@ -213,6 +214,7 @@ func ff_h264_idct8_rvv, zve32x
endfunc
func ff_h264_idct8_add_8_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
.Lidct8_add_8_rvv:
vsetivli zero, 8, e16, m1, ta, ma
@@ -405,11 +407,13 @@ endfunc
.irp depth, 9, 10, 12, 14
func ff_h264_idct_add_\depth\()_rvv, zve32x
+ lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct_add_16_rvv
endfunc
func ff_h264_idct8_add_\depth\()_rvv, zve32x
+ lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct8_add_16_rvv
endfunc
@@ -417,6 +421,7 @@ endfunc
.macro idct_dc_add8 width
func ff_h264_idct\width\()_dc_add_8_rvv, zve64x
+ lpad 0
.if \width == 8
vsetivli zero, \width, e8, mf2, ta, ma
.else
@@ -517,11 +522,13 @@ idct_dc_add 8
.irp depth,9,10,12,14
func ff_h264_idct4_dc_add_\depth\()_rvv, zve64x
+ lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct4_dc_add_16_rvv
endfunc
func ff_h264_idct8_dc_add_\depth\()_rvv, zve64x
+ lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct8_dc_add_16_rvv
endfunc
@@ -535,6 +542,9 @@ endconst
#if (__riscv_xlen == 64)
.macro idct4_adds type, depth
func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
+.if \depth == 8
+ lpad 0
+.endif
csrwi vxrm, 0
addi sp, sp, -16
lla t0, ff_h264_scan8
@@ -612,6 +622,9 @@ idct4_adds 16, \depth
idct4_adds 16intra, \depth
func ff_h264_idct8_add4_\depth\()_rvv, zve32x
+.if \depth == 8
+ lpad 0
+.endif
csrwi vxrm, 0
addi sp, sp, -64
lla t0, ff_h264_scan8
@@ -688,16 +701,19 @@ endfunc
.irp depth, 9, 10, 12, 14
func ff_h264_idct_add16_\depth\()_rvv, zve32x
+ lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct_add16_16_rvv
endfunc
func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
+ lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct_add16intra_16_rvv
endfunc
func ff_h264_idct8_add4_\depth\()_rvv, zve32x
+ lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct8_add4_16_rvv
endfunc
diff --git a/libavcodec/riscv/huffyuvdsp_rvv.S b/libavcodec/riscv/huffyuvdsp_rvv.S
index d334f5c6d0..5b8c2473b9 100644
--- a/libavcodec/riscv/huffyuvdsp_rvv.S
+++ b/libavcodec/riscv/huffyuvdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_add_int16_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a3, e16, m8, ta, ma
vle16.v v16, (a0)
@@ -37,6 +38,7 @@ func ff_add_int16_rvv, zve32x
endfunc
func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, zbb
+ lpad 0
vtype_ivli t1, 4, e8, ta, ma
li t0, 4
vsetvl zero, t0, t1
diff --git a/libavcodec/riscv/idctdsp_rvv.S b/libavcodec/riscv/idctdsp_rvv.S
index e93e6b5e7a..de229a9ae7 100644
--- a/libavcodec/riscv/idctdsp_rvv.S
+++ b/libavcodec/riscv/idctdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_put_pixels_clamped_rvv, zve64x
+ lpad 0
li t0, 8 * 8
vsetvli zero, t0, e16, m8, ta, ma
vle16.v v24, (a0)
@@ -35,6 +36,7 @@ func ff_put_pixels_clamped_rvv, zve64x
endfunc
func ff_put_signed_pixels_clamped_rvv, zve64x
+ lpad 0
li t0, 8 * 8
vsetvli zero, t0, e8, m4, ta, ma
vle16.v v24, (a0)
@@ -47,6 +49,7 @@ func ff_put_signed_pixels_clamped_rvv, zve64x
endfunc
func ff_add_pixels_clamped_rvv, zve64x
+ lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
vlse64.v v16, (a1), a2
diff --git a/libavcodec/riscv/jpeg2000dsp_rvv.S b/libavcodec/riscv/jpeg2000dsp_rvv.S
index 10efe6b0db..73ed78239e 100644
--- a/libavcodec/riscv/jpeg2000dsp_rvv.S
+++ b/libavcodec/riscv/jpeg2000dsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_ict_float_rvv, zve32f
+ lpad 0
lla t0, ff_jpeg2000_f_ict_params
flw ft0, 0(t0)
flw ft1, 4(t0)
@@ -49,6 +50,7 @@ func ff_ict_float_rvv, zve32f
endfunc
func ff_rct_int_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a3, e32, m8, ta, ma
vle32.v v16, (a1)
diff --git a/libavcodec/riscv/llauddsp_rvv.S b/libavcodec/riscv/llauddsp_rvv.S
index 5569864832..54ffbeb666 100644
--- a/libavcodec/riscv/llauddsp_rvv.S
+++ b/libavcodec/riscv/llauddsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_scalarproduct_and_madd_int16_rvv, zve32x
+ lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero
1:
@@ -45,6 +46,7 @@ func ff_scalarproduct_and_madd_int16_rvv, zve32x
endfunc
func ff_scalarproduct_and_madd_int32_rvv, zve32x
+ lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero
1:
diff --git a/libavcodec/riscv/llviddsp_rvv.S b/libavcodec/riscv/llviddsp_rvv.S
index a4814837b9..9572e92dce 100644
--- a/libavcodec/riscv/llviddsp_rvv.S
+++ b/libavcodec/riscv/llviddsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_llvid_add_bytes_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a2, e8, m8, ta, ma
vle8.v v0, (a1)
diff --git a/libavcodec/riscv/llvidencdsp_rvv.S b/libavcodec/riscv/llvidencdsp_rvv.S
index 0342165127..44bf3ac7e5 100644
--- a/libavcodec/riscv/llvidencdsp_rvv.S
+++ b/libavcodec/riscv/llvidencdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_llvidenc_diff_bytes_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a3, e8, m8, ta, ma
vle8.v v0, (a1)
diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S
index fe80305d9a..e70c5f3121 100644
--- a/libavcodec/riscv/lpc_rvv.S
+++ b/libavcodec/riscv/lpc_rvv.S
@@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_lpc_apply_welch_window_rvv, zve64d
+ lpad 0
vsetvli t0, zero, e64, m8, ta, ma
vid.v v0
addi t2, a1, -1
@@ -87,6 +88,7 @@ func ff_lpc_apply_welch_window_rvv, zve64d
endfunc
func ff_lpc_compute_autocorr_rvv, zve64d, zbb
+ lpad 0
vtype_vli t1, a2, t2, e64, ta, ma, 1
addi a2, a2, 1
li t0, 1
diff --git a/libavcodec/riscv/me_cmp_rvv.S b/libavcodec/riscv/me_cmp_rvv.S
index c9ae5bb6fc..8989c91dde 100644
--- a/libavcodec/riscv/me_cmp_rvv.S
+++ b/libavcodec/riscv/me_cmp_rvv.S
@@ -27,6 +27,7 @@
.endm
func ff_pix_abs16_rvv, zve32x
+ lpad 0
vsetivli zero, 1, e32, m1, ta, ma
vmv.s.x v0, zero
1:
@@ -47,6 +48,7 @@ func ff_pix_abs16_rvv, zve32x
endfunc
func ff_pix_abs8_rvv, zve32x
+ lpad 0
vsetivli zero, 1, e32, m1, ta, ma
vmv.s.x v0, zero
1:
@@ -67,6 +69,7 @@ func ff_pix_abs8_rvv, zve32x
endfunc
func ff_pix_abs16_x2_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
li t5, 1
@@ -92,6 +95,7 @@ func ff_pix_abs16_x2_rvv, zve32x
endfunc
func ff_pix_abs8_x2_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
li t5, 1
@@ -117,6 +121,7 @@ func ff_pix_abs8_x2_rvv, zve32x
endfunc
func ff_pix_abs16_y2_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
add t1, a2, a3
@@ -142,6 +147,7 @@ func ff_pix_abs16_y2_rvv, zve32x
endfunc
func ff_pix_abs8_y2_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
add t1, a2, a3
@@ -167,6 +173,7 @@ func ff_pix_abs8_y2_rvv, zve32x
endfunc
func ff_sse16_rvv, zve32x
+ lpad 0
vsetivli t0, 16, e32, m4, ta, ma
vmv.v.x v24, zero
vmv.s.x v0, zero
@@ -189,6 +196,7 @@ func ff_sse16_rvv, zve32x
endfunc
func ff_sse8_rvv, zve32x
+ lpad 0
vsetivli t0, 8, e32, m2, ta, ma
vmv.v.x v24, zero
vmv.s.x v0, zero
@@ -211,6 +219,7 @@ func ff_sse8_rvv, zve32x
endfunc
func ff_sse4_rvv, zve32x
+ lpad 0
vsetivli t0, 4, e32, m1, ta, ma
vmv.v.x v24, zero
vmv.s.x v0, zero
@@ -239,6 +248,7 @@ endfunc
.endm
.macro vsad_vsse16 type
+ lpad 0
vsetivli t0, 16, e32, m4, ta, ma
addi a4, a4, -1
add t1, a1, a3
@@ -277,6 +287,7 @@ endfunc
.endm
.macro vsad_vsse8 type
+ lpad 0
vsetivli t0, 8, e32, m2, ta, ma
addi a4, a4, -1
add t1, a1, a3
@@ -315,6 +326,7 @@ endfunc
.endm
.macro vsad_vsse_intra16 type
+ lpad 0
vsetivli t0, 16, e32, m4, ta, ma
addi a4, a4, -1
add t1, a1, a3
@@ -346,6 +358,7 @@ endfunc
.endm
.macro vsad_vsse_intra8 type
+ lpad 0
vsetivli t0, 8, e32, m2, ta, ma
addi a4, a4, -1
add t1, a1, a3
@@ -409,6 +422,8 @@ func ff_vsad_intra8_rvv, zve32x
endfunc
func ff_nsse16_rvv, zve32x
+ lpad 0
+
.macro squarediff16
vsetivli zero, 16, e8, m1, tu, ma
vle8.v v4, (a1)
@@ -468,6 +483,8 @@ func ff_nsse16_rvv, zve32x
endfunc
func ff_nsse8_rvv, zve32x
+ lpad 0
+
.macro squarediff8
vsetivli zero, 8, e8, mf2, tu, ma
vle8.v v4, (a1)
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 42d845a370..bf4b2319e1 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_opus_postfilter_rvv, zve32f, zbb
+ lpad 0
flw fa0, 0(a2) // g0
slli t1, a1, 2
flw fa1, 4(a2) // g1
diff --git a/libavcodec/riscv/pixblockdsp_rvi.S b/libavcodec/riscv/pixblockdsp_rvi.S
index efdd422228..ed1af70251 100644
--- a/libavcodec/riscv/pixblockdsp_rvi.S
+++ b/libavcodec/riscv/pixblockdsp_rvi.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_get_pixels_8_rvi
+ lpad 0
.irp row, 0, 1, 2, 3, 4, 5, 6, 7
ld t0, (a1)
add a1, a1, a2
@@ -47,6 +48,7 @@ func ff_get_pixels_8_rvi
endfunc
func ff_get_pixels_16_rvi
+ lpad 0
.irp row, 0, 1, 2, 3, 4, 5, 6, 7
ld t0, 0(a1)
ld t1, 8(a1)
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S
index 4213cd1b85..85233470cf 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_get_pixels_8_rvv, zve64x
+ lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
1:
@@ -32,6 +33,7 @@ func ff_get_pixels_8_rvv, zve64x
endfunc
func ff_get_pixels_unaligned_8_rvv, zve64x
+ lpad 0
andi t1, a1, 7
vsetivli zero, 8, e64, m4, ta, ma
li t0, 8 * 8
@@ -52,6 +54,7 @@ func ff_get_pixels_unaligned_8_rvv, zve64x
endfunc
func ff_diff_pixels_rvv, zve64x
+ lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
vlse64.v v16, (a1), a3
@@ -63,6 +66,7 @@ func ff_diff_pixels_rvv, zve64x
endfunc
func ff_diff_pixels_unaligned_rvv, zve32x
+ lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
vlsseg8e8.v v16, (a1), a3
vlsseg8e8.v v24, (a2), a3
diff --git a/libavcodec/riscv/rv34dsp_rvv.S b/libavcodec/riscv/rv34dsp_rvv.S
index 8eda01665d..478bc0a860 100644
--- a/libavcodec/riscv/rv34dsp_rvv.S
+++ b/libavcodec/riscv/rv34dsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_rv34_inv_transform_dc_rvv, zve32x
+ lpad 0
lh t1, 0(a0)
li t0, 13 * 13 * 3
mul t2, t0, t1
@@ -33,6 +34,7 @@ func ff_rv34_inv_transform_dc_rvv, zve32x
endfunc
func ff_rv34_idct_dc_add_rvv, zve32x
+ lpad 0
vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v0, (a0), a1
li t1, 169
diff --git a/libavcodec/riscv/rv40dsp_rvv.S b/libavcodec/riscv/rv40dsp_rvv.S
index e49345ef70..edb979ffb0 100644
--- a/libavcodec/riscv/rv40dsp_rvv.S
+++ b/libavcodec/riscv/rv40dsp_rvv.S
@@ -351,21 +351,25 @@ func ff_avg_rv40_chroma_mc_rvv, zve32x
endfunc
func ff_put_rv40_chroma_mc8_rvv, zve32x
+ lpad 0
li t6, 8
j 11b
endfunc
func ff_put_rv40_chroma_mc4_rvv, zve32x
+ lpad 0
li t6, 4
j 11b
endfunc
func ff_avg_rv40_chroma_mc8_rvv, zve32x
+ lpad 0
li t6, 8
j 21b
endfunc
func ff_avg_rv40_chroma_mc4_rvv, zve32x
+ lpad 0
li t6, 4
j 21b
endfunc
diff --git a/libavcodec/riscv/sbrdsp_rvv.S b/libavcodec/riscv/sbrdsp_rvv.S
index 331b88022c..7c6103d081 100644
--- a/libavcodec/riscv/sbrdsp_rvv.S
+++ b/libavcodec/riscv/sbrdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_sbr_sum64x5_rvv, zve32f
+ lpad 0
li a5, 64
addi a1, a0, 64 * 4
addi a2, a0, 128 * 4
@@ -50,6 +51,7 @@ func ff_sbr_sum64x5_rvv, zve32f
endfunc
func ff_sbr_sum_square_rvv, zve32f
+ lpad 0
vsetvli t0, zero, e32, m8, ta, ma
slli a1, a1, 1
vmv.v.x v8, zero
@@ -69,6 +71,7 @@ NOHWF fmv.x.w a0, fa0
endfunc
func ff_sbr_autocorrelate_rvv, zve32f
+ lpad 0
vsetvli t0, zero, e32, m4, ta, ma
vmv.v.x v0, zero
flw fa0, (a0)
@@ -158,6 +161,7 @@ func ff_sbr_autocorrelate_rvv, zve32f
endfunc
func ff_sbr_hf_gen_rvv, zve32f
+ lpad 0
NOHWF fmv.w.x fa0, a4
NOHWF mv a4, a5
NOHWF mv a5, a6
@@ -208,6 +212,7 @@ NOHWF mv a5, a6
endfunc
func ff_sbr_hf_g_filt_rvv, zve32f
+ lpad 0
li t1, 40 * 2 * 4
sh3add a1, a4, a1
1:
@@ -273,15 +278,18 @@ endfunc
.endm
func ff_sbr_hf_apply_noise_0_rvv, zve32f, zbb
+ lpad 0
hf_apply_noise 0
endfunc
func ff_sbr_hf_apply_noise_3_rvv, zve32f, zbb
- not a4, a4 // invert parity of kx
- // fall through
+ lpad 0
+ not a4, a4 // invert parity of kx
+ // fall through
endfunc
func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb
+ lpad 0
vsetvli t0, zero, e32, m4, ta, ma
vid.v v4
vxor.vx v4, v4, a4
@@ -290,5 +298,6 @@ func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb
endfunc
func ff_sbr_hf_apply_noise_2_rvv, zve32f, zbb
+ lpad 0
hf_apply_noise 2
endfunc
diff --git a/libavcodec/riscv/startcode_rvb.S b/libavcodec/riscv/startcode_rvb.S
index c043d59809..eec92d3340 100644
--- a/libavcodec/riscv/startcode_rvb.S
+++ b/libavcodec/riscv/startcode_rvb.S
@@ -37,6 +37,7 @@
.endm
func ff_startcode_find_candidate_rvb, zbb
+ lpad 0
add a1, a0, a1
// Potentially unaligned head
diff --git a/libavcodec/riscv/startcode_rvv.S b/libavcodec/riscv/startcode_rvv.S
index 36a3369431..f4d0a0f087 100644
--- a/libavcodec/riscv/startcode_rvv.S
+++ b/libavcodec/riscv/startcode_rvv.S
@@ -27,6 +27,7 @@
#include "libavutil/riscv/asm.S"
func ff_startcode_find_candidate_rvv, zve32x
+ lpad 0
mv t0, a0
1:
vsetvli t1, a1, e8, m8, ta, ma
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
index cfc27154dd..55bec57a7b 100644
--- a/libavcodec/riscv/svqenc_rvv.S
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_ssd_int8_vs_int16_rvv, zve32x
+ lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v24, zero
1:
diff --git a/libavcodec/riscv/takdsp_rvv.S b/libavcodec/riscv/takdsp_rvv.S
index fa942a3be6..3128d00a7c 100644
--- a/libavcodec/riscv/takdsp_rvv.S
+++ b/libavcodec/riscv/takdsp_rvv.S
@@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_decorrelate_ls_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
sub a2, a2, t0
@@ -36,6 +37,7 @@ func ff_decorrelate_ls_rvv, zve32x
endfunc
func ff_decorrelate_sr_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v0, (a0)
@@ -50,6 +52,7 @@ func ff_decorrelate_sr_rvv, zve32x
endfunc
func ff_decorrelate_sm_rvv, zve32x
+ lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a1)
@@ -68,6 +71,7 @@ func ff_decorrelate_sm_rvv, zve32x
endfunc
func ff_decorrelate_sf_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
1:
vsetvli t0, a2, e32, m8, ta, ma
diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S
index fa70d0eb34..4c0c177d30 100644
--- a/libavcodec/riscv/utvideodsp_rvv.S
+++ b/libavcodec/riscv/utvideodsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_restore_rgb_planes_rvv, zve32x
+ lpad 0
li t1, -0x80
sub a3, a3, a6
sub a4, a4, a6
@@ -53,6 +54,7 @@ func ff_restore_rgb_planes_rvv, zve32x
endfunc
func ff_restore_rgb_planes10_rvv, zve32x
+ lpad 0
li t1, -0x200
li t2, 0x3FF
sub a3, a3, a6
diff --git a/libavcodec/riscv/vc1dsp_rvi.S b/libavcodec/riscv/vc1dsp_rvi.S
index d4a1b5bf49..7725bfb628 100644
--- a/libavcodec/riscv/vc1dsp_rvi.S
+++ b/libavcodec/riscv/vc1dsp_rvi.S
@@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_put_pixels8x8_rvi
+ lpad 0
.rept 8
ld t0, (a1)
sd t0, (a0)
@@ -33,6 +34,7 @@ func ff_put_pixels8x8_rvi
endfunc
func ff_put_pixels16x16_rvi
+ lpad 0
.rept 16
ld t0, (a1)
ld t1, 8(a1)
diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index d8b62579aa..2fcd125f55 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_vc1_inv_trans_8x8_dc_rvv, zve64x
+ lpad 0
lh t2, (a2)
vsetivli zero, 8, e8, mf2, ta, ma
vlse64.v v0, (a0), a1
@@ -44,6 +45,7 @@ func ff_vc1_inv_trans_8x8_dc_rvv, zve64x
endfunc
func ff_vc1_inv_trans_4x8_dc_rvv, zve32x
+ lpad 0
lh t2, (a2)
vsetivli zero, 8, e8, mf2, ta, ma
vlse32.v v0, (a0), a1
@@ -68,6 +70,7 @@ func ff_vc1_inv_trans_4x8_dc_rvv, zve32x
endfunc
func ff_vc1_inv_trans_8x4_dc_rvv, zve64x
+ lpad 0
lh t2, (a2)
vsetivli zero, 4, e8, mf4, ta, ma
vlse64.v v0, (a0), a1
@@ -91,6 +94,7 @@ func ff_vc1_inv_trans_8x4_dc_rvv, zve64x
endfunc
func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
+ lpad 0
lh t2, (a2)
vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v0, (a0), a1
@@ -203,6 +207,7 @@ func ff_vc1_inv_trans_4_rvv, zve32x
endfunc
func ff_vc1_inv_trans_8x8_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
vsetivli zero, 8, e16, m1, ta, ma
addi a1, a0, 1 * 8 * 2
@@ -240,6 +245,7 @@ func ff_vc1_inv_trans_8x8_rvv, zve32x
endfunc
func ff_vc1_inv_trans_8x4_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma
vlseg8e16.v v0, (a2)
@@ -285,6 +291,7 @@ func ff_vc1_inv_trans_8x4_rvv, zve32x
endfunc
func ff_vc1_inv_trans_4x8_rvv, zve32x
+ lpad 0
li a3, 8 * 2
csrwi vxrm, 0
vsetivli zero, 8, e16, m1, ta, ma
@@ -359,6 +366,7 @@ func ff_vc1_inv_trans_4x8_rvv, zve32x
endfunc
func ff_vc1_inv_trans_4x4_rvv, zve32x
+ lpad 0
li a3, 8 * 2
csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma
@@ -422,12 +430,14 @@ endfunc
.endm
func ff_avg_pixels16x16_rvv, zve32x
+ lpad 0
li t0, 16
vsetivli zero, 16, e8, m1, ta, ma
j 1f
endfunc
func ff_avg_pixels8x8_rvv, zve32x
+ lpad 0
li t0, 8
vsetivli zero, 8, e8, mf2, ta, ma
1:
@@ -446,6 +456,7 @@ func ff_avg_pixels8x8_rvv, zve32x
endfunc
func ff_vc1_unescape_buffer_rvv, zve32x
+ lpad 0
vsetivli zero, 2, e8, m1, ta, ma
vmv.v.i v8, -1
li t4, 1
diff --git a/libavcodec/riscv/vorbisdsp_rvv.S b/libavcodec/riscv/vorbisdsp_rvv.S
index 81a6c62a65..14abebb20d 100644
--- a/libavcodec/riscv/vorbisdsp_rvv.S
+++ b/libavcodec/riscv/vorbisdsp_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_vorbis_inverse_coupling_rvv, zve32f
+ lpad 0
fmv.w.x ft0, zero
1:
vsetvli t0, a2, e32, m4, ta, ma
diff --git a/libavcodec/riscv/vp7dsp_rvv.S b/libavcodec/riscv/vp7dsp_rvv.S
index 856b0e8c96..a0d257e5df 100644
--- a/libavcodec/riscv/vp7dsp_rvv.S
+++ b/libavcodec/riscv/vp7dsp_rvv.S
@@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_vp7_luma_dc_wht_rvv, zve32x
+ lpad 0
li a2, 4 * 16 * 2
li a7, 16 * 2
jal t0, 1f
@@ -99,6 +100,7 @@ func ff_vp7_luma_dc_wht_rvv, zve32x
endfunc
func ff_vp7_idct_add_rvv, zve32x
+ lpad 0
jal t0, 1b
csrwi vxrm, 2
vsetvli zero, zero, e8, mf4, ta, ma
@@ -130,6 +132,7 @@ endfunc
.irp type, y, uv
func ff_vp7_idct_dc_add4\type\()_rvv, zve32x
+ lpad 0
li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma
li t1, 23170
diff --git a/libavcodec/riscv/vp8dsp_rvi.S b/libavcodec/riscv/vp8dsp_rvi.S
index 50ba4f293f..07d5c85032 100644
--- a/libavcodec/riscv/vp8dsp_rvi.S
+++ b/libavcodec/riscv/vp8dsp_rvi.S
@@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_put_vp8_pixels16_rvi
+ lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@@ -36,6 +37,7 @@ func ff_put_vp8_pixels16_rvi
endfunc
func ff_put_vp8_pixels8_rvi
+ lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@@ -49,6 +51,7 @@ endfunc
#endif
func ff_put_vp8_pixels4_rvi
+ lpad 0
1:
addi a4, a4, -1
lw t0, (a2)
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index d366748a0a..e5d5a80bf8 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -45,6 +45,7 @@
#if __riscv_xlen >= 64
func ff_vp8_luma_dc_wht_rvv, zve64x
+ lpad 0
vsetivli zero, 1, e64, m1, ta, ma
vlseg4e64.v v4, (a1)
vsetivli zero, 4, e16, mf2, ta, ma
@@ -99,6 +100,7 @@ endfunc
#endif
func ff_vp8_idct_add_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma
addi a3, a1, 1 * 4 * 2
@@ -158,6 +160,7 @@ func ff_vp8_idct_add_rvv, zve32x
endfunc
func ff_vp8_idct_dc_add_rvv, zve32x
+ lpad 0
lh a3, (a1)
addi a3, a3, 4
srai a3, a3, 3
@@ -182,6 +185,7 @@ func ff_vp78_idct_dc_add_rvv, zve32x
endfunc
func ff_vp8_idct_dc_add4y_rvv, zve32x
+ lpad 0
li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma
li t1, 4 - (128 << 3)
@@ -217,6 +221,7 @@ func ff_vp78_idct_dc_add4y_rvv, zve32x
endfunc
func ff_vp8_idct_dc_add4uv_rvv, zve32x
+ lpad 0
li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma
li t1, 4 - (128 << 3)
@@ -265,6 +270,7 @@ endfunc
.macro put_vp8_bilin_h_v type mn
func ff_put_vp8_bilin4_\type\()_rvv, zve32x
+ lpad 0
vsetvlstatic8 4
.Lbilin_\type:
li t1, 8
@@ -310,6 +316,7 @@ put_vp8_bilin_h_v h a5
put_vp8_bilin_h_v v a6
func ff_put_vp8_bilin4_hv_rvv, zve32x
+ lpad 0
vsetvlstatic8 4
.Lbilin_hv:
li t3, 8
@@ -335,16 +342,19 @@ endfunc
.irp len,16,8
func ff_put_vp8_bilin\len\()_h_rvv, zve32x
+ lpad 0
vsetvlstatic8 \len
j .Lbilin_h
endfunc
func ff_put_vp8_bilin\len\()_v_rvv, zve32x
+ lpad 0
vsetvlstatic8 \len
j .Lbilin_v
endfunc
func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
+ lpad 0
vsetvlstatic8 \len
j .Lbilin_hv
endfunc
@@ -441,6 +451,7 @@ endconst
.macro epel len size type
func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
+ lpad 0
epel_filter \size \type t
vsetvlstatic8 \len
1:
@@ -456,6 +467,7 @@ endfunc
.macro epel_hv len hsize vsize
func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x
+ lpad 0
#if __riscv_xlen == 64
addi sp, sp, -48
.irp n,0,1,2,3,4,5
diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S
index 16b6bdb25a..d14008f849 100644
--- a/libavcodec/riscv/vp9_intra_rvi.S
+++ b/libavcodec/riscv/vp9_intra_rvi.S
@@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_v_32x32_rvi
+ lpad 0
ld t0, (a3)
ld t1, 8(a3)
ld t2, 16(a3)
@@ -43,6 +44,7 @@ func ff_v_32x32_rvi
endfunc
func ff_v_16x16_rvi
+ lpad 0
ld t0, (a3)
ld t1, 8(a3)
.rept 8
@@ -58,6 +60,7 @@ func ff_v_16x16_rvi
endfunc
func ff_v_8x8_rvi
+ lpad 0
ld t0, (a3)
.rept 4
add a7, a0, a1
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
index beeb1ff88c..13d695c831 100644
--- a/libavcodec/riscv/vp9_intra_rvv.S
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -91,6 +91,7 @@
.macro func_dc name size type n restore ext
func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext
+ lpad 0
.if \size == 8
dc_e64 \type \size \n \restore
.else
@@ -119,6 +120,7 @@ func_dc dc_top 16 top 4 1 zve32x
func_dc dc_top 8 top 3 0 zve64x
func ff_h_32x32_rvv, zve32x
+ lpad 0
li t0, 32
addi a2, a2, 31
vsetvli zero, t0, e8, m2, ta, ma
@@ -139,6 +141,7 @@ func ff_h_32x32_rvv, zve32x
endfunc
func ff_h_16x16_rvv, zve32x
+ lpad 0
addi a2, a2, 15
vsetivli zero, 16, e8, m1, ta, ma
@@ -157,6 +160,7 @@ func ff_h_16x16_rvv, zve32x
endfunc
func ff_h_8x8_rvv, zve32x
+ lpad 0
addi a2, a2, 7
vsetivli zero, 8, e8, mf2, ta, ma
@@ -190,6 +194,7 @@ endfunc
.endm
func ff_tm_32x32_rvv, zve32x
+ lpad 0
lbu a4, -1(a3)
li t5, 32
@@ -244,6 +249,7 @@ func ff_tm_16x16_rvv, zve32x
endfunc
func ff_tm_8x8_rvv, zve32x
+ lpad 0
vsetivli zero, 8, e16, m1, ta, ma
vle8.v v8, (a3)
vzext.vf2 v28, v8
@@ -269,6 +275,7 @@ func ff_tm_8x8_rvv, zve32x
endfunc
func ff_tm_4x4_rvv, zve32x
+ lpad 0
vsetivli zero, 4, e16, mf2, ta, ma
vle8.v v8, (a3)
vzext.vf2 v28, v8
diff --git a/libavcodec/riscv/vp9_mc_rvi.S b/libavcodec/riscv/vp9_mc_rvi.S
index 0db14e83c7..4a8371b232 100644
--- a/libavcodec/riscv/vp9_mc_rvi.S
+++ b/libavcodec/riscv/vp9_mc_rvi.S
@@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_copy64_rvi
+ lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@@ -48,6 +49,7 @@ func ff_copy64_rvi
endfunc
func ff_copy32_rvi
+ lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@@ -66,6 +68,7 @@ func ff_copy32_rvi
endfunc
func ff_copy16_rvi
+ lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@@ -80,6 +83,7 @@ func ff_copy16_rvi
endfunc
func ff_copy8_rvi
+ lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@@ -93,6 +97,7 @@ endfunc
#endif
func ff_copy4_rvi
+ lpad 0
1:
addi a4, a4, -1
lw t0, (a2)
diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index 7cb38ec94a..8d776661d9 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -38,6 +38,7 @@
.macro copy_avg len
func ff_vp9_avg\len\()_rvv, zve32x
+ lpad 0
csrwi vxrm, 0
vsetvlstatic8 \len, t0, 64
1:
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH 4/6] lavfi/riscv: add forward-edge CFI landing pads
2024-07-22 19:38 [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 2/6] lavu/riscv: add forward-edge CFI landing pads Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 3/6] lavc/riscv: " Rémi Denis-Courmont
@ 2024-07-22 19:38 ` Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 5/6] sws/riscv: " Rémi Denis-Courmont
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 19:38 UTC (permalink / raw)
To: ffmpeg-devel
---
libavfilter/riscv/af_afir_rvv.S | 1 +
1 file changed, 1 insertion(+)
diff --git a/libavfilter/riscv/af_afir_rvv.S b/libavfilter/riscv/af_afir_rvv.S
index 04ec2e50d8..2d2b8b1ed3 100644
--- a/libavfilter/riscv/af_afir_rvv.S
+++ b/libavfilter/riscv/af_afir_rvv.S
@@ -22,6 +22,7 @@
// void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
func ff_fcmul_add_rvv, zve64f
+ lpad 0
li t1, 32
1:
vsetvli t0, a3, e32, m4, ta, ma
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH 5/6] sws/riscv: add forward-edge CFI landing pads
2024-07-22 19:38 [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD Rémi Denis-Courmont
` (2 preceding siblings ...)
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 4/6] lavfi/riscv: " Rémi Denis-Courmont
@ 2024-07-22 19:38 ` Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 6/6] checkasm/riscv: " Rémi Denis-Courmont
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 19:38 UTC (permalink / raw)
To: ffmpeg-devel
---
libswscale/riscv/input_rvv.S | 12 ++++++++++++
libswscale/riscv/range_rvv.S | 4 ++++
libswscale/riscv/rgb2rgb_rvb.S | 1 +
libswscale/riscv/rgb2rgb_rvv.S | 7 +++++++
4 files changed, 24 insertions(+)
diff --git a/libswscale/riscv/input_rvv.S b/libswscale/riscv/input_rvv.S
index 1d7de59c66..0562e55921 100644
--- a/libswscale/riscv/input_rvv.S
+++ b/libswscale/riscv/input_rvv.S
@@ -21,12 +21,14 @@
#include "libavutil/riscv/asm.S"
func ff_bgr24ToY_rvv, zve32x
+ lpad 0
lw t1, 8(a5) # BY
lw t3, 0(a5) # RY
j 1f
endfunc
func ff_rgb24ToY_rvv, zve32x
+ lpad 0
lw t1, 0(a5) # RY
lw t3, 8(a5) # BY
1:
@@ -55,6 +57,7 @@ func ff_rgb24ToY_rvv, zve32x
endfunc
func ff_bgr24ToUV_rvv, zve32x
+ lpad 0
lw t1, 20(a6) # BU
lw t4, 32(a6) # BV
lw t3, 12(a6) # RU
@@ -63,6 +66,7 @@ func ff_bgr24ToUV_rvv, zve32x
endfunc
func ff_rgb24ToUV_rvv, zve32x
+ lpad 0
lw t1, 12(a6) # RU
lw t4, 24(a6) # RV
lw t3, 20(a6) # BU
@@ -101,6 +105,7 @@ func ff_rgb24ToUV_rvv, zve32x
endfunc
func ff_bgr24ToUV_half_rvv, zve32x
+ lpad 0
lw t1, 20(a6) # BU
lw t4, 32(a6) # BV
lw t3, 12(a6) # RU
@@ -109,6 +114,7 @@ func ff_bgr24ToUV_half_rvv, zve32x
endfunc
func ff_rgb24ToUV_half_rvv, zve32x
+ lpad 0
lw t1, 12(a6) # RU
lw t4, 24(a6) # RV
lw t3, 20(a6) # BU
@@ -152,12 +158,14 @@ endfunc
.macro rgba_input chr0, chr1, high
func ff_\chr1\()ToY_rvv, zve32x
+ lpad 0
lw t1, 8(a5) # BY
lw t3, 0(a5) # RY
j 1f
endfunc
func ff_\chr0\()ToY_rvv, zve32x
+ lpad 0
lw t1, 0(a5) # RY
lw t3, 8(a5) # BY
1:
@@ -192,6 +200,7 @@ func ff_\chr0\()ToY_rvv, zve32x
endfunc
func ff_\chr1\()ToUV_rvv, zve32x
+ lpad 0
lw t1, 20(a6) # BU
lw t4, 32(a6) # BV
lw t3, 12(a6) # RU
@@ -200,6 +209,7 @@ func ff_\chr1\()ToUV_rvv, zve32x
endfunc
func ff_\chr0\()ToUV_rvv, zve32x
+ lpad 0
lw t1, 12(a6) # RU
lw t4, 24(a6) # RV
lw t3, 20(a6) # BU
@@ -244,6 +254,7 @@ func ff_\chr0\()ToUV_rvv, zve32x
endfunc
func ff_\chr1\()ToUV_half_rvv, zve32x
+ lpad 0
lw t1, 20(a6) # BU
lw t4, 32(a6) # BV
lw t3, 12(a6) # RU
@@ -252,6 +263,7 @@ func ff_\chr1\()ToUV_half_rvv, zve32x
endfunc
func ff_\chr0\()ToUV_half_rvv, zve32x
+ lpad 0
lw t1, 12(a6) # RU
lw t4, 24(a6) # RV
lw t3, 20(a6) # BU
diff --git a/libswscale/riscv/range_rvv.S b/libswscale/riscv/range_rvv.S
index 9da80e6199..1d71ef29f6 100644
--- a/libswscale/riscv/range_rvv.S
+++ b/libswscale/riscv/range_rvv.S
@@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_range_lum_to_jpeg_16_rvv, zve32x
+ lpad 0
li t1, 30189
li t2, 19077
li t3, -39057361
@@ -42,6 +43,7 @@ func ff_range_lum_to_jpeg_16_rvv, zve32x
endfunc
func ff_range_lum_from_jpeg_16_rvv, zve32x
+ lpad 0
li t1, 14071
li t2, 33561947
1:
@@ -61,6 +63,7 @@ func ff_range_lum_from_jpeg_16_rvv, zve32x
endfunc
func ff_range_chr_to_jpeg_16_rvv, zve32x
+ lpad 0
li t1, 30775
li t2, 4663
li t3, -9289992
@@ -89,6 +92,7 @@ func ff_range_chr_to_jpeg_16_rvv, zve32x
endfunc
func ff_range_chr_from_jpeg_16_rvv, zve32x
+ lpad 0
li t1, 1799
li t2, 4081085
1:
diff --git a/libswscale/riscv/rgb2rgb_rvb.S b/libswscale/riscv/rgb2rgb_rvb.S
index af127b32ed..d9e56d77be 100644
--- a/libswscale/riscv/rgb2rgb_rvb.S
+++ b/libswscale/riscv/rgb2rgb_rvb.S
@@ -24,6 +24,7 @@
#if (__riscv_xlen >= 64)
func ff_shuffle_bytes_3210_rvb, zbb
+ lpad 0
srli a2, a2, 2
bswap32_rvb a1, a0, a2
endfunc
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 19f7aaf67d..8ca1ad94b2 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -21,11 +21,13 @@
#include "libavutil/riscv/asm.S"
func ff_shuffle_bytes_0321_rvv, zve32x
+ lpad 0
li t1, 0x00ff00ff
j 1f
endfunc
func ff_shuffle_bytes_2103_rvv, zve32x
+ lpad 0
li t1, ~0x00ff00ff
1:
not t2, t1
@@ -49,12 +51,14 @@ func ff_shuffle_bytes_2103_rvv, zve32x
endfunc
func ff_shuffle_bytes_1230_rvv, zve32x
+ lpad 0
li t1, 24
li t2, 8
j 3f
endfunc
func ff_shuffle_bytes_3012_rvv, zve32x
+ lpad 0
li t1, 8
li t2, 24
3:
@@ -75,6 +79,7 @@ func ff_shuffle_bytes_3012_rvv, zve32x
endfunc
func ff_interleave_bytes_rvv, zve32x
+ lpad 0
1:
mv t0, a0
mv t1, a1
@@ -101,6 +106,7 @@ func ff_interleave_bytes_rvv, zve32x
endfunc
func ff_deinterleave_bytes_rvv, zve32x
+ lpad 0
1:
mv t0, a0
mv t1, a1
@@ -127,6 +133,7 @@ func ff_deinterleave_bytes_rvv, zve32x
endfunc
.macro yuy2_to_i422p luma, chroma
+ lpad 0
srai t4, a4, 1 // pixel width -> chroma width
lw t6, (sp)
slli t5, a4, 1 // pixel width -> (source) byte width
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH 6/6] checkasm/riscv: add forward-edge CFI landing pads
2024-07-22 19:38 [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD Rémi Denis-Courmont
` (3 preceding siblings ...)
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 5/6] sws/riscv: " Rémi Denis-Courmont
@ 2024-07-22 19:38 ` Rémi Denis-Courmont
2024-07-22 20:13 ` [FFmpeg-devel] [PATCH 7/8] lavu/riscv: align functions to 4 bytes Rémi Denis-Courmont
2024-07-22 20:13 ` [FFmpeg-devel] [PATCH 8/8] checkasm/riscv: align the landing pads Rémi Denis-Courmont
6 siblings, 0 replies; 8+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 19:38 UTC (permalink / raw)
To: ffmpeg-devel
---
tests/checkasm/riscv/checkasm.S | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S
index 73ca85f344..835cc7d315 100644
--- a/tests/checkasm/riscv/checkasm.S
+++ b/tests/checkasm/riscv/checkasm.S
@@ -49,6 +49,7 @@ saved_regs:
.endr
func checkasm_set_function
+ lpad 0
la.tls.ie t0, checked_func
add t0, tp, t0
sd a0, (t0)
@@ -56,6 +57,7 @@ func checkasm_set_function
endfunc
func checkasm_get_wrapper, v
+ lpad 0
addi sp, sp, -16
sd fp, (sp)
sd ra, 8(sp)
@@ -74,6 +76,7 @@ func checkasm_get_wrapper, v
ret
2: /* <-- Entry point with the Vector extension --> */
+ lpad 0
/* Clobber the vectors */
vsetvli t0, zero, e32, m8, ta, ma
li t0, 0xdeadbeef
@@ -90,6 +93,7 @@ func checkasm_get_wrapper, v
csrwi vxsat, 1 /* Saturation: encountered */
3: /* <-- Entry point without the Vector extension --> */
+ lpad 0
/* Save RA, unallocatable and callee-saved registers */
la.tls.ie t0, saved_regs
add t0, tp, t0
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH 7/8] lavu/riscv: align functions to 4 bytes
2024-07-22 19:38 [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD Rémi Denis-Courmont
` (4 preceding siblings ...)
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 6/6] checkasm/riscv: " Rémi Denis-Courmont
@ 2024-07-22 20:13 ` Rémi Denis-Courmont
2024-07-22 20:13 ` [FFmpeg-devel] [PATCH 8/8] checkasm/riscv: align the landing pads Rémi Denis-Courmont
6 siblings, 0 replies; 8+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 20:13 UTC (permalink / raw)
To: ffmpeg-devel
Currently the start of the byte range for each function is aligned to
4 bytes. But this can lead to situations whence the function is preceded
by a 2-byte C.NOP at the aligned 4-byte boundary. Then the first actual
instruction and the function symbol are only aligned on 2 bytes.
This forcefully disables compression for the alignment and the symbol,
thus ensuring that there is no padding before the function.
---
libavutil/riscv/asm.S | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S
index 37fd7d3b03..633c93d5fd 100644
--- a/libavutil/riscv/asm.S
+++ b/libavutil/riscv/asm.S
@@ -38,7 +38,6 @@
.macro func sym, ext1=, ext2=
.text
- .align 2
.option push
.ifnb \ext1
@@ -51,7 +50,11 @@
.global \sym
.hidden \sym
.type \sym, %function
+ .option push
+ .option norvc
+ .align 2
\sym:
+ .option pop
.macro endfunc
.size \sym, . - \sym
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH 8/8] checkasm/riscv: align the landing pads
2024-07-22 19:38 [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD Rémi Denis-Courmont
` (5 preceding siblings ...)
2024-07-22 20:13 ` [FFmpeg-devel] [PATCH 7/8] lavu/riscv: align functions to 4 bytes Rémi Denis-Courmont
@ 2024-07-22 20:13 ` Rémi Denis-Courmont
6 siblings, 0 replies; 8+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 20:13 UTC (permalink / raw)
To: ffmpeg-devel
---
tests/checkasm/riscv/checkasm.S | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tests/checkasm/riscv/checkasm.S b/tests/checkasm/riscv/checkasm.S
index 835cc7d315..e8bcbb271e 100644
--- a/tests/checkasm/riscv/checkasm.S
+++ b/tests/checkasm/riscv/checkasm.S
@@ -75,6 +75,8 @@ func checkasm_get_wrapper, v
addi sp, sp, 16
ret
+ .option norvc
+ .align 2
2: /* <-- Entry point with the Vector extension --> */
lpad 0
/* Clobber the vectors */
@@ -92,6 +94,7 @@ func checkasm_get_wrapper, v
csrwi vxrm, 3 /* Rounding mode: round-to-odd */
csrwi vxsat, 1 /* Saturation: encountered */
+ .align 2
3: /* <-- Entry point without the Vector extension --> */
lpad 0
/* Save RA, unallocatable and callee-saved registers */
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-07-22 20:14 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-22 19:38 [FFmpeg-devel] [PATCH 1/6] lavu/riscv: assembly for zicfilp LPAD Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 2/6] lavu/riscv: add forward-edge CFI landing pads Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 3/6] lavc/riscv: " Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 4/6] lavfi/riscv: " Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 5/6] sws/riscv: " Rémi Denis-Courmont
2024-07-22 19:38 ` [FFmpeg-devel] [PATCH 6/6] checkasm/riscv: " Rémi Denis-Courmont
2024-07-22 20:13 ` [FFmpeg-devel] [PATCH 7/8] lavu/riscv: align functions to 4 bytes Rémi Denis-Courmont
2024-07-22 20:13 ` [FFmpeg-devel] [PATCH 8/8] checkasm/riscv: align the landing pads Rémi Denis-Courmont
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git