* [FFmpeg-devel] [PR] avcodec/vvc/inter: Combine offsets early (PR #22246)
@ 2026-02-22 2:06 mkver via ffmpeg-devel
0 siblings, 0 replies; only message in thread
From: mkver via ffmpeg-devel @ 2026-02-22 2:06 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: mkver
PR #22246 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22246
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22246.patch
For bi-predicted weighted averages, only the sum
of the two offsets is ever used, so add the two early.
>From 4e16436743c12edaa1658f9626a62cefa682e3f2 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Fri, 20 Feb 2026 18:24:24 +0100
Subject: [PATCH] avcodec/vvc/inter: Combine offsets early
For bi-predicted weighted averages, only the sum
of the two offsets is ever used, so add the two early.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
libavcodec/aarch64/vvc/dsp_init.c | 4 ++--
libavcodec/riscv/vvc/dsp_init.c | 2 +-
libavcodec/riscv/vvc/mc_rvv.S | 2 --
libavcodec/vvc/dsp.h | 2 +-
libavcodec/vvc/inter.c | 10 +++++-----
libavcodec/vvc/inter_template.c | 4 ++--
libavcodec/x86/vvc/dsp_init.c | 2 +-
libavcodec/x86/vvc/mc.asm | 5 ++---
tests/checkasm/vvc_mc.c | 8 ++++----
9 files changed, 18 insertions(+), 21 deletions(-)
diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c
index bc2677945e..570070a28c 100644
--- a/libavcodec/aarch64/vvc/dsp_init.c
+++ b/libavcodec/aarch64/vvc/dsp_init.c
@@ -107,10 +107,10 @@ void ff_vvc_w_avg_12_neon(uint8_t *_dst, ptrdiff_t _dst_stride,
#define W_AVG_FUN(bit_depth) \
static void vvc_w_avg_ ## bit_depth(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height, \
- int denom, int w0, int w1, int o0, int o1) \
+ int denom, int w0, int w1, int o) \
{ \
int shift = denom + FFMAX(3, 15 - bit_depth); \
- int offset = ((o0 + o1) * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \
+ int offset = (o * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \
uintptr_t w0_w1 = ((uintptr_t)w0 << 32) | (uint32_t)w1; \
uintptr_t offset_shift = ((uintptr_t)offset << 32) | (uint32_t)shift; \
ff_vvc_w_avg_ ## bit_depth ## _neon(dst, dst_stride, src0, src1, width, height, w0_w1, offset_shift); \
diff --git a/libavcodec/riscv/vvc/dsp_init.c b/libavcodec/riscv/vvc/dsp_init.c
index d7a89f4779..f8fde41529 100644
--- a/libavcodec/riscv/vvc/dsp_init.c
+++ b/libavcodec/riscv/vvc/dsp_init.c
@@ -34,7 +34,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *src0, const int16_t *src1, int width, int height); \
void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height, \
- int denom, int w0, int w1, int o0, int o1);
+ int denom, int w0, int w1, int o);
AVG_PROTOTYPES(8, rvv_128)
AVG_PROTOTYPES(8, rvv_256)
diff --git a/libavcodec/riscv/vvc/mc_rvv.S b/libavcodec/riscv/vvc/mc_rvv.S
index e6b2aadafe..a612290e3a 100644
--- a/libavcodec/riscv/vvc/mc_rvv.S
+++ b/libavcodec/riscv/vvc/mc_rvv.S
@@ -163,9 +163,7 @@ func ff_vvc_w_avg_8_rvv_\vlen\(), zve32x, zbb, zba
addi t6, a6, 7
ld t3, (sp)
ld t4, 8(sp)
- ld t5, 16(sp)
addi t4, t4, 1 // o0 + o1 + 1
- add t4, t4, t5
addi t5, t6, -1 // shift - 1
sll t4, t4, t5
POW2_J \vlen, 2, a4
diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h
index ae22900931..29cdd7e8f4 100644
--- a/libavcodec/vvc/dsp.h
+++ b/libavcodec/vvc/dsp.h
@@ -75,7 +75,7 @@ typedef struct VVCInterDSPContext {
void (*w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
const int16_t *src0, const int16_t *src1, int width, int height,
- int denom, int w0, int w1, int o0, int o1);
+ int denom, int w0, int w1, int o);
void (*put_ciip)(uint8_t *dst, ptrdiff_t dst_stride, int width, int height,
const uint8_t *inter, ptrdiff_t inter_stride, int inter_weight);
diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
index 85e0665a75..7d6e79e49b 100644
--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@@ -232,22 +232,22 @@ static void apply_averaging(uint8_t *dst, const ptrdiff_t dst_stride,
return;
}
- int denom, w0, w1, o1, o2;
+ int denom, w0, w1, o;
if (bcw_idx) {
denom = 2;
w1 = bcw_w_lut[bcw_idx];
w0 = 8 - w1;
- o1 = o2 = 0;
+ o = 0;
} else {
const PredWeightTable *w = pps->r->pps_wp_info_in_ph_flag ? &fc->ps.ph.pwt : &sh->pwt;
denom = w->log2_denom[c_idx > 0];
w0 = w->weight[L0][c_idx][mvf->ref_idx[L0]];
w1 = w->weight[L1][c_idx][mvf->ref_idx[L1]];
- o1 = w->offset[L0][c_idx][mvf->ref_idx[L0]];
- o2 = w->offset[L1][c_idx][mvf->ref_idx[L1]];
+ o = w->offset[L0][c_idx][mvf->ref_idx[L0]]
+ + w->offset[L1][c_idx][mvf->ref_idx[L1]];
}
- fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o1, o2);
+ fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o);
}
#define INTER_FILTER(t, frac) (is_chroma ? ff_vvc_inter_chroma_filters[t][frac] : ff_vvc_inter_luma_filters[t][frac])
diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c
index aee4994c17..efa432d1fd 100644
--- a/libavcodec/vvc/inter_template.c
+++ b/libavcodec/vvc/inter_template.c
@@ -201,12 +201,12 @@ static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
const int16_t *src0, const int16_t *src1, const int width, const int height,
- const int denom, const int w0, const int w1, const int o0, const int o1)
+ const int denom, const int w0, const int w1, const int o)
{
pixel *dst = (pixel*)_dst;
const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
const int shift = denom + FFMAX(3, 15 - BIT_DEPTH);
- const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
+ const int offset = (o * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++)
diff --git a/libavcodec/x86/vvc/dsp_init.c b/libavcodec/x86/vvc/dsp_init.c
index cd3d02c0fb..37ddbcb73b 100644
--- a/libavcodec/x86/vvc/dsp_init.c
+++ b/libavcodec/x86/vvc/dsp_init.c
@@ -231,7 +231,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height);\
void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height, \
- int denom, int w0, int w1, int o0, int o1); \
+ int denom, int w0, int w1, int o); \
c->inter.avg = bf(ff_vvc_avg, bd, opt); \
c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
} while (0)
diff --git a/libavcodec/x86/vvc/mc.asm b/libavcodec/x86/vvc/mc.asm
index 8ba493aebd..4f078ea8d0 100644
--- a/libavcodec/x86/vvc/mc.asm
+++ b/libavcodec/x86/vvc/mc.asm
@@ -244,7 +244,7 @@ cglobal vvc_avg_%2, 4, 7, 5, dst, stride, src0, src1, w, h
;void ff_vvc_w_avg_%2_avx(uint8_t *dst, ptrdiff_t dst_stride,
; const int16_t *src0, const int16_t *src1, int width, int height,
-; int denom, intptr_t w0, int w1, int o0, int o1);
+; int denom, intptr_t w0, int w1, int o);
%macro VVC_W_AVG_AVX2 3
cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w, h
%if UNIX64
@@ -256,8 +256,7 @@ cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w,
%endif
mov t1d, r6m ; denom
- mov t0d, r9m ; o0
- add t0d, r10m ; o1
+ mov t0d, r9m ; o0 + o1
movifnidn t2d, r8m ; w1
add t1d, 15-%2
%if %2 != 8
diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c
index 754cf19065..623b6142f1 100644
--- a/tests/checkasm/vvc_mc.c
+++ b/tests/checkasm/vvc_mc.c
@@ -306,7 +306,7 @@ static void check_avg(void)
{
declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *src0, const int16_t *src1, int width, int height,
- int denom, int w0, int w1, int o0, int o1);
+ int denom, int w0, int w1, int o);
{
const int denom = rnd() % 8;
const int w0 = rnd() % 256 - 128;
@@ -317,12 +317,12 @@ static void check_avg(void)
memset(dst0, 0, AVG_DST_BUF_SIZE);
memset(dst1, 0, AVG_DST_BUF_SIZE);
- call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
- call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1);
+ call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1);
+ call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0 + o1);
if (memcmp(dst0, dst1, DST_BUF_SIZE))
fail();
if (w == h)
- bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
+ bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1);
}
}
}
--
2.52.0
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2026-02-22 6:23 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-02-22 2:06 [FFmpeg-devel] [PR] avcodec/vvc/inter: Combine offsets early (PR #22246) mkver via ffmpeg-devel
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git