[FFmpeg-devel] [PR] avcodec/vvc/inter: Combine offsets early (PR #22246)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

* [FFmpeg-devel] [PR] avcodec/vvc/inter: Combine offsets early (PR #22246)
@ 2026-02-22  2:06 mkver via ffmpeg-devel
  0 siblings, 0 replies; only message in thread
From: mkver via ffmpeg-devel @ 2026-02-22  2:06 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: mkver

PR #22246 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22246
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22246.patch

For bi-predicted weighted averages, only the sum
of the two offsets is ever used, so add the two early.


>From 4e16436743c12edaa1658f9626a62cefa682e3f2 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 20 Feb 2026 18:24:24 +0100
Subject: [PATCH] avcodec/vvc/inter: Combine offsets early

For bi-predicted weighted averages, only the sum
of the two offsets is ever used, so add the two early.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/aarch64/vvc/dsp_init.c |  4 ++--
 libavcodec/riscv/vvc/dsp_init.c   |  2 +-
 libavcodec/riscv/vvc/mc_rvv.S     |  2 --
 libavcodec/vvc/dsp.h              |  2 +-
 libavcodec/vvc/inter.c            | 10 +++++-----
 libavcodec/vvc/inter_template.c   |  4 ++--
 libavcodec/x86/vvc/dsp_init.c     |  2 +-
 libavcodec/x86/vvc/mc.asm         |  5 ++---
 tests/checkasm/vvc_mc.c           |  8 ++++----
 9 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c
index bc2677945e..570070a28c 100644
--- a/libavcodec/aarch64/vvc/dsp_init.c
+++ b/libavcodec/aarch64/vvc/dsp_init.c
@@ -107,10 +107,10 @@ void ff_vvc_w_avg_12_neon(uint8_t *_dst, ptrdiff_t _dst_stride,
 #define W_AVG_FUN(bit_depth) \
 static void vvc_w_avg_ ## bit_depth(uint8_t *dst, ptrdiff_t dst_stride, \
     const int16_t *src0, const int16_t *src1, int width, int height, \
-    int denom, int w0, int w1, int o0, int o1) \
+    int denom, int w0, int w1, int o) \
 { \
     int shift = denom + FFMAX(3, 15 - bit_depth); \
-    int offset = ((o0 + o1) * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \
+    int offset = (o * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \
     uintptr_t w0_w1 = ((uintptr_t)w0 << 32) | (uint32_t)w1; \
     uintptr_t offset_shift = ((uintptr_t)offset << 32) | (uint32_t)shift; \
     ff_vvc_w_avg_ ## bit_depth ## _neon(dst, dst_stride, src0, src1, width, height, w0_w1, offset_shift); \
diff --git a/libavcodec/riscv/vvc/dsp_init.c b/libavcodec/riscv/vvc/dsp_init.c
index d7a89f4779..f8fde41529 100644
--- a/libavcodec/riscv/vvc/dsp_init.c
+++ b/libavcodec/riscv/vvc/dsp_init.c
@@ -34,7 +34,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,
     const int16_t *src0, const int16_t *src1, int width, int height);                                \
 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,                                   \
     const int16_t *src0, const int16_t *src1, int width, int height,                                 \
-    int denom, int w0, int w1, int o0, int o1);
+    int denom, int w0, int w1, int o);
 
 AVG_PROTOTYPES(8, rvv_128)
 AVG_PROTOTYPES(8, rvv_256)
diff --git a/libavcodec/riscv/vvc/mc_rvv.S b/libavcodec/riscv/vvc/mc_rvv.S
index e6b2aadafe..a612290e3a 100644
--- a/libavcodec/riscv/vvc/mc_rvv.S
+++ b/libavcodec/riscv/vvc/mc_rvv.S
@@ -163,9 +163,7 @@ func ff_vvc_w_avg_8_rvv_\vlen\(), zve32x, zbb, zba
         addi              t6, a6, 7
         ld                t3, (sp)
         ld                t4, 8(sp)
-        ld                t5, 16(sp)
         addi              t4, t4, 1       // o0 + o1 + 1
-        add               t4, t4, t5
         addi              t5, t6, -1      // shift - 1
         sll               t4, t4, t5
         POW2_J            \vlen, 2, a4
diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
index ae22900931..29cdd7e8f4 100644
--- a/libavcodec/vvc/dsp.h
+++ b/libavcodec/vvc/dsp.h
@@ -75,7 +75,7 @@ typedef struct VVCInterDSPContext {
 
     void (*w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
         const int16_t *src0, const int16_t *src1, int width, int height,
-        int denom, int w0, int w1, int o0, int o1);
+        int denom, int w0, int w1, int o);
 
     void (*put_ciip)(uint8_t *dst, ptrdiff_t dst_stride, int width, int height,
         const uint8_t *inter, ptrdiff_t inter_stride, int inter_weight);
diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
index 85e0665a75..7d6e79e49b 100644
--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@@ -232,22 +232,22 @@ static void apply_averaging(uint8_t *dst, const ptrdiff_t dst_stride,
         return;
     }
 
-    int denom, w0, w1, o1, o2;
+    int denom, w0, w1, o;
     if (bcw_idx) {
         denom = 2;
         w1 = bcw_w_lut[bcw_idx];
         w0 = 8 - w1;
-        o1 = o2 = 0;
+        o  = 0;
     } else {
         const PredWeightTable *w = pps->r->pps_wp_info_in_ph_flag ? &fc->ps.ph.pwt : &sh->pwt;
 
         denom = w->log2_denom[c_idx > 0];
         w0 = w->weight[L0][c_idx][mvf->ref_idx[L0]];
         w1 = w->weight[L1][c_idx][mvf->ref_idx[L1]];
-        o1 = w->offset[L0][c_idx][mvf->ref_idx[L0]];
-        o2 = w->offset[L1][c_idx][mvf->ref_idx[L1]];
+        o  = w->offset[L0][c_idx][mvf->ref_idx[L0]]
+           + w->offset[L1][c_idx][mvf->ref_idx[L1]];
     }
-    fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o1, o2);
+    fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o);
 }
 
 #define INTER_FILTER(t, frac)  (is_chroma ? ff_vvc_inter_chroma_filters[t][frac] : ff_vvc_inter_luma_filters[t][frac])
diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index aee4994c17..efa432d1fd 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -201,12 +201,12 @@ static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
 
 static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
     const int16_t *src0, const int16_t *src1, const int width, const int height,
-    const int denom, const int w0, const int w1, const int o0, const int o1)
+    const int denom, const int w0, const int w1, const int o)
 {
     pixel *dst                  = (pixel*)_dst;
     const ptrdiff_t dst_stride  = _dst_stride / sizeof(pixel);
     const int shift             = denom + FFMAX(3, 15 - BIT_DEPTH);
-    const int offset            = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
+    const int offset            = (o * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
 
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++)
diff --git a/libavcodec/x86/vvc/dsp_init.c b/libavcodec/x86/vvc/dsp_init.c
index cd3d02c0fb..37ddbcb73b 100644
--- a/libavcodec/x86/vvc/dsp_init.c
+++ b/libavcodec/x86/vvc/dsp_init.c
@@ -231,7 +231,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,     \
     const int16_t *src0, const int16_t *src1, int width, int height);\
 void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,   \
     const int16_t *src0, const int16_t *src1, int width, int height, \
-    int denom, int w0, int w1,  int o0, int o1);                     \
+    int denom, int w0, int w1, int o);                               \
     c->inter.avg    = bf(ff_vvc_avg, bd, opt);                       \
     c->inter.w_avg  = bf(ff_vvc_w_avg, bd, opt);                     \
 } while (0)
diff --git a/libavcodec/x86/vvc/mc.asm b/libavcodec/x86/vvc/mc.asm
index 8ba493aebd..4f078ea8d0 100644
--- a/libavcodec/x86/vvc/mc.asm
+++ b/libavcodec/x86/vvc/mc.asm
@@ -244,7 +244,7 @@ cglobal vvc_avg_%2, 4, 7, 5, dst, stride, src0, src1, w, h
 
 ;void ff_vvc_w_avg_%2_avx(uint8_t *dst, ptrdiff_t dst_stride,
 ;                         const int16_t *src0, const int16_t *src1, int width, int height,
-;                         int denom, intptr_t w0, int w1, int o0, int o1);
+;                         int denom, intptr_t w0, int w1, int o);
 %macro VVC_W_AVG_AVX2 3
 cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w, h
 %if UNIX64
@@ -256,8 +256,7 @@ cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w,
 %endif
 
     mov                 t1d, r6m                ; denom
-    mov                 t0d, r9m                ; o0
-    add                 t0d, r10m               ; o1
+    mov                 t0d, r9m                ; o0 + o1
     movifnidn           t2d, r8m                ; w1
     add                 t1d, 15-%2
 %if %2 != 8
diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
index 754cf19065..623b6142f1 100644
--- a/tests/checkasm/vvc_mc.c
+++ b/tests/checkasm/vvc_mc.c
@@ -306,7 +306,7 @@ static void check_avg(void)
                 {
                     declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
                         const int16_t *src0, const int16_t *src1, int width, int height,
-                        int denom, int w0, int w1, int o0, int o1);
+                        int denom, int w0, int w1, int o);
                     {
                         const int denom = rnd() % 8;
                         const int w0    = rnd() % 256 - 128;
@@ -317,12 +317,12 @@ static void check_avg(void)
                             memset(dst0, 0, AVG_DST_BUF_SIZE);
                             memset(dst1, 0, AVG_DST_BUF_SIZE);
 
-                            call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
-                            call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1);
+                            call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1);
+                            call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0 + o1);
                             if (memcmp(dst0, dst1, DST_BUF_SIZE))
                                 fail();
                             if (w == h)
-                                bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
+                                bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1);
                         }
                     }
                 }
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2026-02-22  6:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-02-22  2:06 [FFmpeg-devel] [PR] avcodec/vvc/inter: Combine offsets early (PR #22246) mkver via ffmpeg-devel

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git