Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [RFC] [PATCH 1/4] lavc/h264_loopfilter: expose tc0_table (for checkasm)
@ 2024-07-01 17:08 Rémi Denis-Courmont
  2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 2/4] lavc/h264_loopfilter: align TC and bS tables Rémi Denis-Courmont
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 17:08 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavcodec/h264_loopfilter.c | 50 ++++++++++++++++++------------------
 libavcodec/h264dsp.h         |  2 ++
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index c164a289b7..9481882dd0 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -66,7 +66,7 @@ static const uint8_t beta_table[52*3] = {
     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
 };
-static const uint8_t tc0_table[52*3][4] = {
+const int8_t ff_h264_tc0_table[52*3][4] = {
     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
@@ -109,10 +109,10 @@ static av_always_inline void filter_mb_edgev(uint8_t *pix, int stride,
 
     if( bS[0] < 4 || !intra ) {
         int8_t tc[4];
-        tc[0] = tc0_table[index_a][bS[0]];
-        tc[1] = tc0_table[index_a][bS[1]];
-        tc[2] = tc0_table[index_a][bS[2]];
-        tc[3] = tc0_table[index_a][bS[3]];
+        tc[0] = ff_h264_tc0_table[index_a][bS[0]];
+        tc[1] = ff_h264_tc0_table[index_a][bS[1]];
+        tc[2] = ff_h264_tc0_table[index_a][bS[2]];
+        tc[3] = ff_h264_tc0_table[index_a][bS[3]];
         h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
     } else {
         h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
@@ -131,10 +131,10 @@ static av_always_inline void filter_mb_edgecv(uint8_t *pix, int stride,
 
     if( bS[0] < 4 || !intra ) {
         int8_t tc[4];
-        tc[0] = tc0_table[index_a][bS[0]]+1;
-        tc[1] = tc0_table[index_a][bS[1]]+1;
-        tc[2] = tc0_table[index_a][bS[2]]+1;
-        tc[3] = tc0_table[index_a][bS[3]]+1;
+        tc[0] = ff_h264_tc0_table[index_a][bS[0]]+1;
+        tc[1] = ff_h264_tc0_table[index_a][bS[1]]+1;
+        tc[2] = ff_h264_tc0_table[index_a][bS[2]]+1;
+        tc[3] = ff_h264_tc0_table[index_a][bS[3]]+1;
         h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
     } else {
         h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
@@ -154,10 +154,10 @@ static av_always_inline void filter_mb_mbaff_edgev(const H264Context *h, uint8_t
 
     if( bS[0] < 4 || !intra ) {
         int8_t tc[4];
-        tc[0] = tc0_table[index_a][bS[0*bsi]];
-        tc[1] = tc0_table[index_a][bS[1*bsi]];
-        tc[2] = tc0_table[index_a][bS[2*bsi]];
-        tc[3] = tc0_table[index_a][bS[3*bsi]];
+        tc[0] = ff_h264_tc0_table[index_a][bS[0*bsi]];
+        tc[1] = ff_h264_tc0_table[index_a][bS[1*bsi]];
+        tc[2] = ff_h264_tc0_table[index_a][bS[2*bsi]];
+        tc[3] = ff_h264_tc0_table[index_a][bS[3*bsi]];
         h->h264dsp.h264_h_loop_filter_luma_mbaff(pix, stride, alpha, beta, tc);
     } else {
         h->h264dsp.h264_h_loop_filter_luma_mbaff_intra(pix, stride, alpha, beta);
@@ -177,10 +177,10 @@ static av_always_inline void filter_mb_mbaff_edgecv(const H264Context *h,
 
     if( bS[0] < 4 || !intra ) {
         int8_t tc[4];
-        tc[0] = tc0_table[index_a][bS[0*bsi]] + 1;
-        tc[1] = tc0_table[index_a][bS[1*bsi]] + 1;
-        tc[2] = tc0_table[index_a][bS[2*bsi]] + 1;
-        tc[3] = tc0_table[index_a][bS[3*bsi]] + 1;
+        tc[0] = ff_h264_tc0_table[index_a][bS[0*bsi]] + 1;
+        tc[1] = ff_h264_tc0_table[index_a][bS[1*bsi]] + 1;
+        tc[2] = ff_h264_tc0_table[index_a][bS[2*bsi]] + 1;
+        tc[3] = ff_h264_tc0_table[index_a][bS[3*bsi]] + 1;
         h->h264dsp.h264_h_loop_filter_chroma_mbaff(pix, stride, alpha, beta, tc);
     } else {
         h->h264dsp.h264_h_loop_filter_chroma_mbaff_intra(pix, stride, alpha, beta);
@@ -199,10 +199,10 @@ static av_always_inline void filter_mb_edgeh(uint8_t *pix, int stride,
 
     if( bS[0] < 4 || !intra ) {
         int8_t tc[4];
-        tc[0] = tc0_table[index_a][bS[0]];
-        tc[1] = tc0_table[index_a][bS[1]];
-        tc[2] = tc0_table[index_a][bS[2]];
-        tc[3] = tc0_table[index_a][bS[3]];
+        tc[0] = ff_h264_tc0_table[index_a][bS[0]];
+        tc[1] = ff_h264_tc0_table[index_a][bS[1]];
+        tc[2] = ff_h264_tc0_table[index_a][bS[2]];
+        tc[3] = ff_h264_tc0_table[index_a][bS[3]];
         h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
     } else {
         h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
@@ -221,10 +221,10 @@ static av_always_inline void filter_mb_edgech(uint8_t *pix, int stride,
 
     if( bS[0] < 4 || !intra ) {
         int8_t tc[4];
-        tc[0] = tc0_table[index_a][bS[0]]+1;
-        tc[1] = tc0_table[index_a][bS[1]]+1;
-        tc[2] = tc0_table[index_a][bS[2]]+1;
-        tc[3] = tc0_table[index_a][bS[3]]+1;
+        tc[0] = ff_h264_tc0_table[index_a][bS[0]]+1;
+        tc[1] = ff_h264_tc0_table[index_a][bS[1]]+1;
+        tc[2] = ff_h264_tc0_table[index_a][bS[2]]+1;
+        tc[3] = ff_h264_tc0_table[index_a][bS[3]]+1;
         h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
     } else {
         h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 4a9cb1568d..13371c59ea 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -117,6 +117,8 @@ typedef struct H264DSPContext {
     int (*startcode_find_candidate)(const uint8_t *buf, int size);
 } H264DSPContext;
 
+extern const int8_t ff_h264_tc0_table[][4];
+
 void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
                      const int chroma_format_idc);
 void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] lavc/h264_loopfilter: align TC and bS tables
  2024-07-01 17:08 [FFmpeg-devel] [RFC] [PATCH 1/4] lavc/h264_loopfilter: expose tc0_table (for checkasm) Rémi Denis-Courmont
@ 2024-07-01 17:08 ` Rémi Denis-Courmont
  2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 3/4] WIP: lavc/h264dsp: take over looking up TC values Rémi Denis-Courmont
  2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 4/4] lavc/h264dsp: update R-V V intra luma loop filter Rémi Denis-Courmont
  2 siblings, 0 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 17:08 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavcodec/h264_loopfilter.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 9481882dd0..96f572c1d2 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -66,7 +66,7 @@ static const uint8_t beta_table[52*3] = {
     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
 };
-const int8_t ff_h264_tc0_table[52*3][4] = {
+const DECLARE_ALIGNED_4(int8_t, ff_h264_tc0_table)[52*3][4] = {
     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
@@ -266,8 +266,8 @@ static av_always_inline void h264_filter_mb_fast_internal(const H264Context *h,
     qpc1 = (qpc + qpc1 + 1) >> 1;
 
     if( IS_INTRA(mb_type) ) {
-        static const int16_t bS4[4] = {4,4,4,4};
-        static const int16_t bS3[4] = {3,3,3,3};
+        static const DECLARE_ALIGNED_8(int16_t, bS4)[4] = {4,4,4,4};
+        static const DECLARE_ALIGNED_8(int16_t, bS3)[4] = {3,3,3,3};
         const int16_t *bSH = FIELD_PICTURE(h) ? bS3 : bS4;
         if(left_type)
             filter_mb_edgev( &img_y[4*0<<pixel_shift], linesize, bS4, qp0, a, b, h, 1);
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] WIP: lavc/h264dsp: take over looking up TC values
  2024-07-01 17:08 [FFmpeg-devel] [RFC] [PATCH 1/4] lavc/h264_loopfilter: expose tc0_table (for checkasm) Rémi Denis-Courmont
  2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 2/4] lavc/h264_loopfilter: align TC and bS tables Rémi Denis-Courmont
@ 2024-07-01 17:08 ` Rémi Denis-Courmont
  2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 4/4] lavc/h264dsp: update R-V V intra luma loop filter Rémi Denis-Courmont
  2 siblings, 0 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 17:08 UTC (permalink / raw)
  To: ffmpeg-devel

This moves the look-up of TC values from bS from the generic C loop
filter code to the DSP functions. This (potentially) eliminates a
round-trip to the stack for the looked-up values.

This is work-in-progress. 8 functions need to be updated and this
only updates one of them. Also updating the platform-specific
optimisations is left as an exercise for a future version, as is
updating checkasm.
---
 libavcodec/h264_loopfilter.c  | 8 ++------
 libavcodec/h264dsp.h          | 4 +++-
 libavcodec/h264dsp_template.c | 9 +++++++--
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 96f572c1d2..8fca08811c 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -108,12 +108,8 @@ static av_always_inline void filter_mb_edgev(uint8_t *pix, int stride,
     if (alpha ==0 || beta == 0) return;
 
     if( bS[0] < 4 || !intra ) {
-        int8_t tc[4];
-        tc[0] = ff_h264_tc0_table[index_a][bS[0]];
-        tc[1] = ff_h264_tc0_table[index_a][bS[1]];
-        tc[2] = ff_h264_tc0_table[index_a][bS[2]];
-        tc[3] = ff_h264_tc0_table[index_a][bS[3]];
-        h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
+        const int8_t *tc = ff_h264_tc0_table[index_a];
+        h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc, bS);
     } else {
         h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
     }
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 13371c59ea..f37ff5414c 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -48,7 +48,9 @@ typedef struct H264DSPContext {
     void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
                                     int alpha, int beta, int8_t *tc0);
     void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, ptrdiff_t stride,
-                                    int alpha, int beta, int8_t *tc0);
+                                    int alpha, int beta,
+                                    const int8_t tc0[4] /*align 4*/,
+                                    const int16_t bs[4] /*align 8*/);
     void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, ptrdiff_t stride,
                                           int alpha, int beta, int8_t *tc0);
     /* v/h_loop_filter_luma_intra: align 16 */
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index fe23a2cff1..4d4e34cf81 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -153,9 +153,14 @@ static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int a
 {
     FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0);
 }
-static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
+static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, const int8_t tc0[4], const int16_t bS[4])
 {
-    FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0);
+    int8_t tc[4];
+
+    for (size_t i = 0; i < 4; i++)
+        tc[i] = tc0[bS[i]];
+
+    FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc);
 }
 static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
 {
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [FFmpeg-devel] [PATCH 4/4] lavc/h264dsp: update R-V V intra luma loop filter
  2024-07-01 17:08 [FFmpeg-devel] [RFC] [PATCH 1/4] lavc/h264_loopfilter: expose tc0_table (for checkasm) Rémi Denis-Courmont
  2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 2/4] lavc/h264_loopfilter: align TC and bS tables Rémi Denis-Courmont
  2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 3/4] WIP: lavc/h264dsp: take over looking up TC values Rémi Denis-Courmont
@ 2024-07-01 17:08 ` Rémi Denis-Courmont
  2 siblings, 0 replies; 4+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 17:08 UTC (permalink / raw)
  To: ffmpeg-devel

Note that the performance reported by checkasm is slightly worse.
This is expected since the assembler is now doing more work.
---
 libavcodec/riscv/h264dsp_init.c | 3 ++-
 libavcodec/riscv/h264dsp_rvv.S  | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index ab412a9924..9650cae66b 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -30,7 +30,8 @@
 void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                       int alpha, int beta, int8_t *tc0);
 void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
-                                      int alpha, int beta, int8_t *tc0);
+                                      int alpha, int beta, const int8_t *tc0,
+                                      const int16_t *bS);
 void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
                                             int alpha, int beta, int8_t *tc0);
 
diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S
index 96a8a0a8a3..6bc5406ba3 100644
--- a/libavcodec/riscv/h264dsp_rvv.S
+++ b/libavcodec/riscv/h264dsp_rvv.S
@@ -126,9 +126,11 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x
 endfunc
 
 func ff_h264_h_loop_filter_luma_8_rvv, zve32x
-        vsetivli    zero, 4, e32, m1, ta, ma
-        vle8.v      v4, (a4)
+        vsetivli    zero, 4, e8, mf4, ta, ma
+        vle16.v     v8, (a5)
         li          t0, 0x01010101
+        vluxei16.v  v4, (a4), v8
+        vsetivli    zero, 4, e32, m1, ta, ma
         vzext.vf4   v6, v4
         addi        a0, a0, -3
         vmul.vx     v6, v6, t0
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-07-01 17:08 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-01 17:08 [FFmpeg-devel] [RFC] [PATCH 1/4] lavc/h264_loopfilter: expose tc0_table (for checkasm) Rémi Denis-Courmont
2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 2/4] lavc/h264_loopfilter: align TC and bS tables Rémi Denis-Courmont
2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 3/4] WIP: lavc/h264dsp: take over looking up TC values Rémi Denis-Courmont
2024-07-01 17:08 ` [FFmpeg-devel] [PATCH 4/4] lavc/h264dsp: update R-V V intra luma loop filter Rémi Denis-Courmont

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git