* [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: restrict vertical intra pointers
@ 2024-07-22 18:11 Rémi Denis-Courmont
2024-07-22 18:11 ` [FFmpeg-devel] [PATCH 2/4] lavc/vp9dsp: use restrict qualifier for copy/avg MC Rémi Denis-Courmont
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 18:11 UTC (permalink / raw)
To: ffmpeg-devel
This lets the compiler unroll ever so slightly better (at least in the
16x16 case for RISC-V GCC).
---
libavcodec/vp9dsp_template.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/libavcodec/vp9dsp_template.c b/libavcodec/vp9dsp_template.c
index 9b11661704..5c4fb5d6e2 100644
--- a/libavcodec/vp9dsp_template.c
+++ b/libavcodec/vp9dsp_template.c
@@ -30,7 +30,7 @@
// FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8)
// back with h264pred.[ch]
-static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+static void vert_4x4_c(uint8_t *restrict _dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
@@ -44,7 +44,7 @@ static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride,
AV_WN4PA(dst + stride * 3, p4);
}
-static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+static void vert_8x8_c(uint8_t *restrict _dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
@@ -61,7 +61,7 @@ static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride,
}
}
-static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+static void vert_16x16_c(uint8_t *restrict _dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
@@ -82,7 +82,7 @@ static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride,
}
}
-static void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+static void vert_32x32_c(uint8_t *restrict _dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* [FFmpeg-devel] [PATCH 2/4] lavc/vp9dsp: use restrict qualifier for copy/avg MC
2024-07-22 18:11 [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: restrict vertical intra pointers Rémi Denis-Courmont
@ 2024-07-22 18:11 ` Rémi Denis-Courmont
2024-07-22 18:12 ` [FFmpeg-devel] [PATCH 3/4] lavc/vp9dsp: copy 8 pixels at once Rémi Denis-Courmont
2024-07-22 18:12 ` [FFmpeg-devel] [PATCH 4/4] lavc/vp9dsp: remove R-V I intra functions Rémi Denis-Courmont
2 siblings, 0 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 18:11 UTC (permalink / raw)
To: ffmpeg-devel
Same as previous commit.
---
libavcodec/vp9dsp_template.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/libavcodec/vp9dsp_template.c b/libavcodec/vp9dsp_template.c
index 5c4fb5d6e2..da3cc28e5e 100644
--- a/libavcodec/vp9dsp_template.c
+++ b/libavcodec/vp9dsp_template.c
@@ -1936,9 +1936,9 @@ static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp)
#if BIT_DEPTH != 12
-static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
- const uint8_t *src, ptrdiff_t src_stride,
- int w, int h)
+static av_always_inline void copy_c(uint8_t *restrict dst, ptrdiff_t dst_stride,
+ const uint8_t *restrict src,
+ ptrdiff_t src_stride, int w, int h)
{
do {
memcpy(dst, src, w * sizeof(pixel));
@@ -1948,9 +1948,9 @@ static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
} while (--h);
}
-static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride,
- const uint8_t *_src, ptrdiff_t src_stride,
- int w, int h)
+static av_always_inline void avg_c(uint8_t *restrict _dst, ptrdiff_t dst_stride,
+ const uint8_t *restrict _src,
+ ptrdiff_t src_stride, int w, int h)
{
pixel *dst = (pixel *) _dst;
const pixel *src = (const pixel *) _src;
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* [FFmpeg-devel] [PATCH 3/4] lavc/vp9dsp: copy 8 pixels at once
2024-07-22 18:11 [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: restrict vertical intra pointers Rémi Denis-Courmont
2024-07-22 18:11 ` [FFmpeg-devel] [PATCH 2/4] lavc/vp9dsp: use restrict qualifier for copy/avg MC Rémi Denis-Courmont
@ 2024-07-22 18:12 ` Rémi Denis-Courmont
2024-07-22 18:12 ` [FFmpeg-devel] [PATCH 4/4] lavc/vp9dsp: remove R-V I intra functions Rémi Denis-Courmont
2 siblings, 0 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 18:12 UTC (permalink / raw)
To: ffmpeg-devel
In the 8-bit case, we can actually read/write 8 aligned pixel values per
load/store, which unsurprisingly tends to be faster on 64-bit systems (and
makes no differences on 32-bit systems). This requires ifdef'ing though.
---
libavcodec/vp9dsp_template.c | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/libavcodec/vp9dsp_template.c b/libavcodec/vp9dsp_template.c
index da3cc28e5e..9e5b25142d 100644
--- a/libavcodec/vp9dsp_template.c
+++ b/libavcodec/vp9dsp_template.c
@@ -49,14 +49,22 @@ static void vert_8x8_c(uint8_t *restrict _dst, ptrdiff_t stride,
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
+#if BIT_DEPTH == 8
+ uint64_t p8 = AV_RN64A(top);
+#else
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
+#endif
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
+#if BIT_DEPTH == 8
+ AV_WN64A(dst, p8);
+#else
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
+#endif
dst += stride;
}
}
@@ -66,18 +74,28 @@ static void vert_16x16_c(uint8_t *restrict _dst, ptrdiff_t stride,
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
+#if BIT_DEPTH == 8
+ uint64_t p8a = AV_RN64A(top);
+ uint64_t p8b = AV_RN64A(top + 8);
+#else
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
pixel4 p4c = AV_RN4PA(top + 8);
pixel4 p4d = AV_RN4PA(top + 12);
+#endif
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
+#if BIT_DEPTH == 8
+ AV_WN64A(dst + 0, p8a);
+ AV_WN64A(dst + 8, p8b);
+#else
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
AV_WN4PA(dst + 8, p4c);
AV_WN4PA(dst + 12, p4d);
+#endif
dst += stride;
}
}
@@ -87,6 +105,12 @@ static void vert_32x32_c(uint8_t *restrict _dst, ptrdiff_t stride,
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
+#if BIT_DEPTH == 8
+ uint64_t p8a = AV_RN64A(top);
+ uint64_t p8b = AV_RN64A(top + 8);
+ uint64_t p8c = AV_RN64A(top + 16);
+ uint64_t p8d = AV_RN64A(top + 24);
+#else
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
pixel4 p4c = AV_RN4PA(top + 8);
@@ -95,10 +119,17 @@ static void vert_32x32_c(uint8_t *restrict _dst, ptrdiff_t stride,
pixel4 p4f = AV_RN4PA(top + 20);
pixel4 p4g = AV_RN4PA(top + 24);
pixel4 p4h = AV_RN4PA(top + 28);
+#endif
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
+#if BIT_DEPTH == 8
+ AV_WN64A(dst + 0, p8a);
+ AV_WN64A(dst + 8, p8b);
+ AV_WN64A(dst + 16, p8c);
+ AV_WN64A(dst + 24, p8d);
+#else
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
AV_WN4PA(dst + 8, p4c);
@@ -107,6 +138,7 @@ static void vert_32x32_c(uint8_t *restrict _dst, ptrdiff_t stride,
AV_WN4PA(dst + 20, p4f);
AV_WN4PA(dst + 24, p4g);
AV_WN4PA(dst + 28, p4h);
+#endif
dst += stride;
}
}
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
* [FFmpeg-devel] [PATCH 4/4] lavc/vp9dsp: remove R-V I intra functions
2024-07-22 18:11 [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: restrict vertical intra pointers Rémi Denis-Courmont
2024-07-22 18:11 ` [FFmpeg-devel] [PATCH 2/4] lavc/vp9dsp: use restrict qualifier for copy/avg MC Rémi Denis-Courmont
2024-07-22 18:12 ` [FFmpeg-devel] [PATCH 3/4] lavc/vp9dsp: copy 8 pixels at once Rémi Denis-Courmont
@ 2024-07-22 18:12 ` Rémi Denis-Courmont
2 siblings, 0 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-22 18:12 UTC (permalink / raw)
To: ffmpeg-devel
At this point, they are identical to the C code, except for isntruction
ordering. In fact, they are typically slower or no faster than the C code.
(Also FWIW, they were incorrectly flagged as requiring fast unaligned memory
accesses.)
---
libavcodec/riscv/Makefile | 3 +-
libavcodec/riscv/vp9_intra_rvi.S | 71 --------------------------------
libavcodec/riscv/vp9dsp_init.c | 7 ----
3 files changed, 1 insertion(+), 80 deletions(-)
delete mode 100644 libavcodec/riscv/vp9_intra_rvi.S
diff --git a/libavcodec/riscv/Makefile b/libavcodec/riscv/Makefile
index 0bbdd38116..a6cdcb71e9 100644
--- a/libavcodec/riscv/Makefile
+++ b/libavcodec/riscv/Makefile
@@ -73,8 +73,7 @@ OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
RV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvi.o
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
-RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvi.o \
- riscv/vp9_mc_rvi.o
+RV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_mc_rvi.o
RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o \
riscv/vp9_mc_rvv.o
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S
deleted file mode 100644
index 16b6bdb25a..0000000000
--- a/libavcodec/riscv/vp9_intra_rvi.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/riscv/asm.S"
-
-#if __riscv_xlen >= 64
-func ff_v_32x32_rvi
- ld t0, (a3)
- ld t1, 8(a3)
- ld t2, 16(a3)
- ld t3, 24(a3)
- .rept 16
- add a7, a0, a1
- sd t0, (a0)
- sd t1, 8(a0)
- sd t2, 16(a0)
- sd t3, 24(a0)
- sh1add a0, a1, a0
- sd t0, (a7)
- sd t1, 8(a7)
- sd t2, 16(a7)
- sd t3, 24(a7)
- .endr
-
- ret
-endfunc
-
-func ff_v_16x16_rvi
- ld t0, (a3)
- ld t1, 8(a3)
- .rept 8
- add a7, a0, a1
- sd t0, (a0)
- sd t1, 8(a0)
- sh1add a0, a1, a0
- sd t0, (a7)
- sd t1, 8(a7)
- .endr
-
- ret
-endfunc
-
-func ff_v_8x8_rvi
- ld t0, (a3)
- .rept 4
- add a7, a0, a1
- sd t0, (a0)
- sh1add a0, a1, a0
- sd t0, (a7)
- .endr
-
- ret
-endfunc
-#endif
diff --git a/libavcodec/riscv/vp9dsp_init.c b/libavcodec/riscv/vp9dsp_init.c
index 454dcd963f..2034e1c976 100644
--- a/libavcodec/riscv/vp9dsp_init.c
+++ b/libavcodec/riscv/vp9dsp_init.c
@@ -74,13 +74,6 @@ static av_cold void vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int bpp)
#if HAVE_RV
int flags = av_get_cpu_flags();
-# if __riscv_xlen >= 64
- if (bpp == 8 && (flags & AV_CPU_FLAG_RVB_ADDR)) {
- dsp->intra_pred[TX_32X32][VERT_PRED] = ff_v_32x32_rvi;
- dsp->intra_pred[TX_16X16][VERT_PRED] = ff_v_16x16_rvi;
- dsp->intra_pred[TX_8X8][VERT_PRED] = ff_v_8x8_rvi;
- }
-# endif
#if HAVE_RVV
if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_rv_vlen_least(128)) {
dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
--
2.45.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-07-22 18:12 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-22 18:11 [FFmpeg-devel] [PATCH 1/4] lavc/vp9dsp: restrict vertical intra pointers Rémi Denis-Courmont
2024-07-22 18:11 ` [FFmpeg-devel] [PATCH 2/4] lavc/vp9dsp: use restrict qualifier for copy/avg MC Rémi Denis-Courmont
2024-07-22 18:12 ` [FFmpeg-devel] [PATCH 3/4] lavc/vp9dsp: copy 8 pixels at once Rémi Denis-Courmont
2024-07-22 18:12 ` [FFmpeg-devel] [PATCH 4/4] lavc/vp9dsp: remove R-V I intra functions Rémi Denis-Courmont
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git