* [FFmpeg-devel] [PATCH 02/11] avcodec/vvcdec: refact, combine bs tab with tu tab
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
@ 2024-07-28 3:17 ` Nuo Mi
2024-07-28 3:17 ` [FFmpeg-devel] [PATCH 03/11] avcodec/vvcdec: remove unnecessary perframe initializations Nuo Mi
` (8 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:17 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
---
libavcodec/vvc/dec.c | 23 ++++-------------------
libavcodec/vvc/dec.h | 2 --
libavcodec/vvc/filter.c | 4 ++--
3 files changed, 6 insertions(+), 23 deletions(-)
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index d04f68e4cf..ee009d4181 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -178,24 +178,12 @@ static void min_tu_tl_init(TabList *l, VVCFrameContext *fc)
TL_ADD(tu_coded_flag[i], pic_size_in_min_tu);
TL_ADD(qp[i], pic_size_in_min_tu);
}
-}
-
-static void bs_tl_init(TabList *l, VVCFrameContext *fc)
-{
- const VVCPPS *pps = fc->ps.pps;
- const int bs_width = pps ? (pps->width >> 2) + 1 : 0;
- const int bs_height = pps ? (pps->height >> 2) + 1 : 0;
- const int bs_count = bs_width * bs_height;
- const int changed = fc->tab.sz.bs_width != bs_width ||
- fc->tab.sz.bs_height != bs_height;
-
- tl_init(l, 1, changed);
- for (int i = 0; i < 2; i++) {
+ for (int vertical = 0; vertical < 2; vertical++) {
for (int j = 0; j < VVC_MAX_SAMPLE_ARRAYS; j++)
- TL_ADD(bs[i][j], bs_count);
- TL_ADD(max_len_p[i], bs_count);
- TL_ADD(max_len_q[i], bs_count);
+ TL_ADD(bs[vertical][j], pic_size_in_min_tu);
+ TL_ADD(max_len_p[vertical], pic_size_in_min_tu);
+ TL_ADD(max_len_q[vertical], pic_size_in_min_tu);
}
}
@@ -297,7 +285,6 @@ static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabLis
min_cb_tl_init,
min_pu_tl_init,
min_tu_tl_init,
- bs_tl_init,
pixel_buffer_nz_tl_init,
msm_tl_init,
ispmf_tl_init,
@@ -376,8 +363,6 @@ static int pic_arrays_init(VVCContext *s, VVCFrameContext *fc)
fc->tab.sz.ctu_height = pps->ctb_height;
fc->tab.sz.chroma_format_idc = sps->r->sps_chroma_format_idc;
fc->tab.sz.pixel_shift = sps->pixel_shift;
- fc->tab.sz.bs_width = (fc->ps.pps->width >> 2) + 1;
- fc->tab.sz.bs_height = (fc->ps.pps->height >> 2) + 1;
return 0;
}
diff --git a/libavcodec/vvc/dec.h b/libavcodec/vvc/dec.h
index a8492f1398..eb8d0bad6b 100644
--- a/libavcodec/vvc/dec.h
+++ b/libavcodec/vvc/dec.h
@@ -205,8 +205,6 @@ typedef struct VVCFrameContext {
int height;
int chroma_format_idc;
int pixel_shift;
- int bs_width;
- int bs_height;
int ibc_buffer_width; ///< IbcBufWidth
} sz;
} tab;
diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 7ffcb29f47..c96e59df89 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -372,8 +372,8 @@ void ff_vvc_sao_filter(VVCLocalContext *lc, int x0, int y0)
}
}
-#define TAB_BS(t, x, y) (t)[((y) >> 2) * (fc->tab.sz.bs_width) + ((x) >> 2)]
-#define TAB_MAX_LEN(t, x, y) (t)[((y) >> 2) * (fc->tab.sz.bs_width) + ((x) >> 2)]
+#define TAB_BS(t, x, y) (t)[((y) >> MIN_TU_LOG2) * (fc->ps.pps->min_tu_width) + ((x) >> MIN_TU_LOG2)]
+#define TAB_MAX_LEN(t, x, y) (t)[((y) >> MIN_TU_LOG2) * (fc->ps.pps->min_tu_width) + ((x) >> MIN_TU_LOG2)]
//8 samples a time
#define DEBLOCK_STEP 8
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 03/11] avcodec/vvcdec: remove unnecessary perframe initializations
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
2024-07-28 3:17 ` [FFmpeg-devel] [PATCH 02/11] avcodec/vvcdec: refact, combine bs tab with tu tab Nuo Mi
@ 2024-07-28 3:17 ` Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 04/11] avcodec/vvcdec: split ctu table to zero init and no zero init parts Nuo Mi
` (7 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:17 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
deblock, sao, alf
skip, imtf, ipm, cqt_depth, cb_pos_x, cb_pos_y, cb_height, cp_mv,
tb_pos_x0, tb_pos_y0, tb_width, tb_height
---
libavcodec/vvc/dec.c | 63 ++++++++++++++++++++++++++++++++------------
1 file changed, 46 insertions(+), 17 deletions(-)
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index ee009d4181..d609fc5184 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -99,9 +99,6 @@ static void ctu_tl_init(TabList *l, VVCFrameContext *fc)
tl_init(l, 1, changed);
- TL_ADD(deblock, ctu_count);
- TL_ADD(sao, ctu_count);
- TL_ADD(alf, ctu_count);
TL_ADD(ctus, ctu_count);
}
@@ -114,6 +111,10 @@ static void ctu_nz_tl_init(TabList *l, VVCFrameContext *fc)
const int changed = fc->tab.sz.ctu_count != ctu_count || fc->tab.sz.ctu_size != ctu_size;
tl_init(l, 0, changed);
+
+ TL_ADD(deblock, ctu_count);
+ TL_ADD(sao, ctu_count);
+ TL_ADD(alf, ctu_count);
TL_ADD(slice_idx, ctu_count);
TL_ADD(coeffs, ctu_count * ctu_size * VVC_MAX_SAMPLE_ARRAYS);
}
@@ -126,21 +127,34 @@ static void min_cb_tl_init(TabList *l, VVCFrameContext *fc)
tl_init(l, 1, changed);
- TL_ADD(skip, pic_size_in_min_cb);
TL_ADD(imf, pic_size_in_min_cb);
- TL_ADD(imtf, pic_size_in_min_cb);
TL_ADD(imm, pic_size_in_min_cb);
+
+ for (int i = LUMA; i <= CHROMA; i++) {
+ TL_ADD(cb_width[i], pic_size_in_min_cb); //is_a0_available requires this
+ TL_ADD(cpm[i], pic_size_in_min_cb);
+ };
+}
+
+static void min_cb_nz_tl_init(TabList *l, VVCFrameContext *fc)
+{
+ const VVCPPS *pps = fc->ps.pps;
+ const int pic_size_in_min_cb = pps ? pps->min_cb_width * pps->min_cb_height : 0;
+ const int changed = fc->tab.sz.pic_size_in_min_cb != pic_size_in_min_cb;
+
+ tl_init(l, 0, changed);
+
+ TL_ADD(skip, pic_size_in_min_cb);
+ TL_ADD(imtf, pic_size_in_min_cb);
TL_ADD(ipm, pic_size_in_min_cb);
for (int i = LUMA; i <= CHROMA; i++) {
+ TL_ADD(cqt_depth[i], pic_size_in_min_cb);
TL_ADD(cb_pos_x[i], pic_size_in_min_cb);
TL_ADD(cb_pos_y[i], pic_size_in_min_cb);
- TL_ADD(cb_width[i], pic_size_in_min_cb);
TL_ADD(cb_height[i], pic_size_in_min_cb);
- TL_ADD(cqt_depth[i], pic_size_in_min_cb);
- TL_ADD(cpm[i], pic_size_in_min_cb);
TL_ADD(cp_mv[i], pic_size_in_min_cb * MAX_CONTROL_POINTS);
- };
+ }
}
static void min_pu_tl_init(TabList *l, VVCFrameContext *fc)
@@ -166,22 +180,35 @@ static void min_tu_tl_init(TabList *l, VVCFrameContext *fc)
tl_init(l, 1, changed);
TL_ADD(tu_joint_cbcr_residual_flag, pic_size_in_min_tu);
+
+ for (int i = LUMA; i <= CHROMA; i++)
+ TL_ADD(pcmf[i], pic_size_in_min_tu);
+
+ for (int i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++) {
+ TL_ADD(tu_coded_flag[i], pic_size_in_min_tu);
+ TL_ADD(qp[i], pic_size_in_min_tu);
+
+ for (int vertical = 0; vertical < 2; vertical++)
+ TL_ADD(bs[vertical][i], pic_size_in_min_tu);
+ }
+}
+
+static void min_tu_nz_tl_init(TabList *l, VVCFrameContext *fc)
+{
+ const VVCPPS *pps = fc->ps.pps;
+ const int pic_size_in_min_tu = pps ? pps->min_tu_width * pps->min_tu_height : 0;
+ const int changed = fc->tab.sz.pic_size_in_min_tu != pic_size_in_min_tu;
+
+ tl_init(l, 0, changed);
+
for (int i = LUMA; i <= CHROMA; i++) {
TL_ADD(tb_pos_x0[i], pic_size_in_min_tu);
TL_ADD(tb_pos_y0[i], pic_size_in_min_tu);
TL_ADD(tb_width[i], pic_size_in_min_tu);
TL_ADD(tb_height[i], pic_size_in_min_tu);
- TL_ADD(pcmf[i], pic_size_in_min_tu);
- }
-
- for (int i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++) {
- TL_ADD(tu_coded_flag[i], pic_size_in_min_tu);
- TL_ADD(qp[i], pic_size_in_min_tu);
}
for (int vertical = 0; vertical < 2; vertical++) {
- for (int j = 0; j < VVC_MAX_SAMPLE_ARRAYS; j++)
- TL_ADD(bs[vertical][j], pic_size_in_min_tu);
TL_ADD(max_len_p[vertical], pic_size_in_min_tu);
TL_ADD(max_len_q[vertical], pic_size_in_min_tu);
}
@@ -283,8 +310,10 @@ static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabLis
ctu_tl_init,
ctu_nz_tl_init,
min_cb_tl_init,
+ min_cb_nz_tl_init,
min_pu_tl_init,
min_tu_tl_init,
+ min_tu_nz_tl_init,
pixel_buffer_nz_tl_init,
msm_tl_init,
ispmf_tl_init,
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 04/11] avcodec/vvcdec: split ctu table to zero init and no zero init parts
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
2024-07-28 3:17 ` [FFmpeg-devel] [PATCH 02/11] avcodec/vvcdec: refact, combine bs tab with tu tab Nuo Mi
2024-07-28 3:17 ` [FFmpeg-devel] [PATCH 03/11] avcodec/vvcdec: remove unnecessary perframe initializations Nuo Mi
@ 2024-07-28 3:18 ` Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 05/11] avcodec/vvcdec: refact out is_available from is_a0_available Nuo Mi
` (6 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
cus need to init to zero, other parts are not
---
libavcodec/vvc/ctu.c | 11 ++++++-----
libavcodec/vvc/ctu.h | 3 +--
libavcodec/vvc/dec.c | 21 +++++++--------------
libavcodec/vvc/dec.h | 5 +++--
libavcodec/vvc/inter.c | 3 +--
libavcodec/vvc/intra.c | 5 ++---
6 files changed, 20 insertions(+), 28 deletions(-)
diff --git a/libavcodec/vvc/ctu.c b/libavcodec/vvc/ctu.c
index 3f9a75190b..d39dd579ae 100644
--- a/libavcodec/vvc/ctu.c
+++ b/libavcodec/vvc/ctu.c
@@ -1174,7 +1174,7 @@ static CodingUnit* alloc_cu(VVCLocalContext *lc, const int x0, const int y0)
const VVCPPS *pps = fc->ps.pps;
const int rx = x0 >> sps->ctb_log2_size_y;
const int ry = y0 >> sps->ctb_log2_size_y;
- CTU *ctu = fc->tab.ctus + ry * pps->ctb_width + rx;
+ CodingUnit **cus = fc->tab.cus + ry * pps->ctb_width + rx;
CodingUnit *cu = ff_refstruct_pool_get(fc->cu_pool);
if (!cu)
@@ -1184,7 +1184,7 @@ static CodingUnit* alloc_cu(VVCLocalContext *lc, const int x0, const int y0)
if (lc->cu)
lc->cu->next = cu;
else
- ctu->cus = cu;
+ *cus = cu;
lc->cu = cu;
return cu;
@@ -2429,7 +2429,9 @@ static void ctu_get_pred(VVCLocalContext *lc, const int rs)
const VVCFrameContext *fc = lc->fc;
const H266RawSliceHeader *rsh = lc->sc->sh.r;
CTU *ctu = fc->tab.ctus + rs;
- const CodingUnit *cu = ctu->cus;
+ const CodingUnit *cu = fc->tab.cus[rs];
+
+ ctu->has_dmvr = 0;
if (IS_I(rsh))
return;
@@ -2526,9 +2528,8 @@ void ff_vvc_set_neighbour_available(VVCLocalContext *lc,
lc->na.cand_up_right = lc->na.cand_up_right_sap && (x0 + w) < lc->end_of_tiles_x;
}
-void ff_vvc_ctu_free_cus(CTU *ctu)
+void ff_vvc_ctu_free_cus(CodingUnit **cus)
{
- CodingUnit **cus = &ctu->cus;
while (*cus) {
CodingUnit *cu = *cus;
TransformUnit **head = &cu->tus.head;
diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index d5c3e8d96f..eab4612561 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -329,7 +329,6 @@ typedef struct CodingUnit {
} CodingUnit;
typedef struct CTU {
- CodingUnit *cus;
int max_y[2][VVC_MAX_REF_ENTRIES];
int max_y_idx[2];
int has_dmvr;
@@ -484,7 +483,7 @@ int ff_vvc_coding_tree_unit(VVCLocalContext *lc, int ctu_idx, int rs, int rx, in
//utils
void ff_vvc_set_neighbour_available(VVCLocalContext *lc, int x0, int y0, int w, int h);
void ff_vvc_decode_neighbour(VVCLocalContext *lc, int x_ctb, int y_ctb, int rx, int ry, int rs);
-void ff_vvc_ctu_free_cus(CTU *ctu);
+void ff_vvc_ctu_free_cus(CodingUnit **cus);
int ff_vvc_get_qPy(const VVCFrameContext *fc, int xc, int yc);
void ff_vvc_ep_init_stat_coeff(EntryPoint *ep, int bit_depth, int persistent_rice_adaptation_enabled_flag);
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index d609fc5184..568229d2c3 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -91,17 +91,6 @@ static int tl_create(TabList *l)
return 0;
}
-static void ctu_tl_init(TabList *l, VVCFrameContext *fc)
-{
- const VVCPPS *pps = fc->ps.pps;
- const int ctu_count = pps ? pps->ctb_count : 0;
- const int changed = fc->tab.sz.ctu_count != ctu_count;
-
- tl_init(l, 1, changed);
-
- TL_ADD(ctus, ctu_count);
-}
-
static void ctu_nz_tl_init(TabList *l, VVCFrameContext *fc)
{
const VVCSPS *sps = fc->ps.sps;
@@ -112,6 +101,8 @@ static void ctu_nz_tl_init(TabList *l, VVCFrameContext *fc)
tl_init(l, 0, changed);
+ TL_ADD(cus, ctu_count);
+ TL_ADD(ctus, ctu_count);
TL_ADD(deblock, ctu_count);
TL_ADD(sao, ctu_count);
TL_ADD(alf, ctu_count);
@@ -307,7 +298,6 @@ typedef void (*tl_init_fn)(TabList *l, VVCFrameContext *fc);
static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabList *l))
{
const tl_init_fn init[] = {
- ctu_tl_init,
ctu_nz_tl_init,
min_cb_tl_init,
min_cb_nz_tl_init,
@@ -334,9 +324,9 @@ static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabLis
static void free_cus(VVCFrameContext *fc)
{
- if (fc->tab.ctus) {
+ if (fc->tab.cus) {
for (int i = 0; i < fc->tab.sz.ctu_count; i++)
- ff_vvc_ctu_free_cus(fc->tab.ctus + i);
+ ff_vvc_ctu_free_cus(fc->tab.cus + i);
}
}
@@ -364,6 +354,9 @@ static int pic_arrays_init(VVCContext *s, VVCFrameContext *fc)
if (ret < 0)
return ret;
+ // for error handling case, we may call free_cus before VVC_TASK_STAGE_INIT, so we need to set cus to 0 here
+ memset(fc->tab.cus, 0, sizeof(*fc->tab.cus) * ctu_count);
+
memset(fc->tab.slice_idx, -1, sizeof(*fc->tab.slice_idx) * ctu_count);
if (fc->tab.sz.ctu_count != ctu_count) {
diff --git a/libavcodec/vvc/dec.h b/libavcodec/vvc/dec.h
index eb8d0bad6b..d27cf52ca2 100644
--- a/libavcodec/vvc/dec.h
+++ b/libavcodec/vvc/dec.h
@@ -187,8 +187,9 @@ typedef struct VVCFrameContext {
uint8_t *alf_pixel_buffer_h[VVC_MAX_SAMPLE_ARRAYS][2];
uint8_t *alf_pixel_buffer_v[VVC_MAX_SAMPLE_ARRAYS][2];
- int *coeffs;
- struct CTU *ctus;
+ int *coeffs;
+ struct CTU *ctus;
+ struct CodingUnit **cus;
uint8_t *ibc_vir_buf[VVC_MAX_SAMPLE_ARRAYS]; ///< IbcVirBuf[]
diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c
index 9578fd8de4..64a9dd1e46 100644
--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@@ -1003,8 +1003,7 @@ static int has_inter_luma(const CodingUnit *cu)
int ff_vvc_predict_inter(VVCLocalContext *lc, const int rs)
{
const VVCFrameContext *fc = lc->fc;
- const CTU *ctu = fc->tab.ctus + rs;
- CodingUnit *cu = ctu->cus;
+ CodingUnit *cu = fc->tab.cus[rs];
while (cu) {
lc->cu = cu;
diff --git a/libavcodec/vvc/intra.c b/libavcodec/vvc/intra.c
index f77a012f09..e79a83bc30 100644
--- a/libavcodec/vvc/intra.c
+++ b/libavcodec/vvc/intra.c
@@ -664,8 +664,7 @@ int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const in
const VVCSPS *sps = fc->ps.sps;
const int x_ctb = rx << sps->ctb_log2_size_y;
const int y_ctb = ry << sps->ctb_log2_size_y;
- CTU *ctu = fc->tab.ctus + rs;
- CodingUnit *cu = ctu->cus;
+ CodingUnit *cu = fc->tab.cus[rs];
int ret = 0;
lc->num_ras[0] = lc->num_ras[1] = 0;
@@ -691,7 +690,7 @@ int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const in
ibc_fill_vir_buf(lc, cu);
cu = cu->next;
}
- ff_vvc_ctu_free_cus(ctu);
+ ff_vvc_ctu_free_cus(fc->tab.cus + rs);
return ret;
}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 05/11] avcodec/vvcdec: refact out is_available from is_a0_available
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
` (2 preceding siblings ...)
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 04/11] avcodec/vvcdec: split ctu table to zero init and no zero init parts Nuo Mi
@ 2024-07-28 3:18 ` Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 06/11] avcodec/vvcdec: do not zero frame mvf table Nuo Mi
` (5 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
---
libavcodec/vvc/mvs.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/libavcodec/vvc/mvs.c b/libavcodec/vvc/mvs.c
index 1788a7150b..905edf1cd0 100644
--- a/libavcodec/vvc/mvs.c
+++ b/libavcodec/vvc/mvs.c
@@ -545,6 +545,16 @@ typedef struct NeighbourContext {
const VVCLocalContext *lc;
} NeighbourContext;
+static int is_available(const VVCFrameContext *fc, const int x0, const int y0)
+{
+ const VVCSPS *sps = fc->ps.sps;
+ const int x = x0 >> sps->min_cb_log2_size_y;
+ const int y = y0 >> sps->min_cb_log2_size_y;
+ const int min_cb_width = fc->ps.pps->min_cb_width;
+
+ return SAMPLE_CTB(fc->tab.cb_width[0], x, y) != 0;
+}
+
static int is_a0_available(const VVCLocalContext *lc, const CodingUnit *cu)
{
const VVCFrameContext *fc = lc->fc;
@@ -555,15 +565,11 @@ static int is_a0_available(const VVCLocalContext *lc, const CodingUnit *cu)
if (!x0b && !lc->ctb_left_flag) {
cand_bottom_left = 0;
} else {
- const int log2_min_cb_size = sps->min_cb_log2_size_y;
- const int min_cb_width = fc->ps.pps->min_cb_width;
- const int x = (cu->x0 - 1) >> log2_min_cb_size;
- const int y = (cu->y0 + cu->cb_height) >> log2_min_cb_size;
- const int max_y = FFMIN(fc->ps.pps->height, ((cu->y0 >> sps->ctb_log2_size_y) + 1) << sps->ctb_log2_size_y);
+ const int max_y = FFMIN(fc->ps.pps->height, ((cu->y0 >> sps->ctb_log2_size_y) + 1) << sps->ctb_log2_size_y);
if (cu->y0 + cu->cb_height >= max_y)
cand_bottom_left = 0;
else
- cand_bottom_left = SAMPLE_CTB(fc->tab.cb_width[0], x, y) != 0;
+ cand_bottom_left = is_available(fc, cu->x0 - 1, cu->y0 + cu->cb_height);
}
return cand_bottom_left;
}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 06/11] avcodec/vvcdec: do not zero frame mvf table
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
` (3 preceding siblings ...)
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 05/11] avcodec/vvcdec: refact out is_available from is_a0_available Nuo Mi
@ 2024-07-28 3:18 ` Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 07/11] avcodec/vvcdec: check_available, use && instead of &= for shortcut evaluation Nuo Mi
` (4 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
---
libavcodec/vvc/dec.c | 11 +++++++++++
libavcodec/vvc/mvs.c | 2 +-
2 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index 568229d2c3..be23f2bd54 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -159,6 +159,16 @@ static void min_pu_tl_init(TabList *l, VVCFrameContext *fc)
TL_ADD(msf, pic_size_in_min_pu);
TL_ADD(iaf, pic_size_in_min_pu);
TL_ADD(mmi, pic_size_in_min_pu);
+}
+
+static void min_pu_nz_tl_init(TabList *l, VVCFrameContext *fc)
+{
+ const VVCPPS *pps = fc->ps.pps;
+ const int pic_size_in_min_pu = pps ? pps->min_pu_width * pps->min_pu_height : 0;
+ const int changed = fc->tab.sz.pic_size_in_min_pu != pic_size_in_min_pu;
+
+ tl_init(l, 0, changed);
+
TL_ADD(mvf, pic_size_in_min_pu);
}
@@ -302,6 +312,7 @@ static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabLis
min_cb_tl_init,
min_cb_nz_tl_init,
min_pu_tl_init,
+ min_pu_nz_tl_init,
min_tu_tl_init,
min_tu_nz_tl_init,
pixel_buffer_nz_tl_init,
diff --git a/libavcodec/vvc/mvs.c b/libavcodec/vvc/mvs.c
index 905edf1cd0..fe047cfd5a 100644
--- a/libavcodec/vvc/mvs.c
+++ b/libavcodec/vvc/mvs.c
@@ -614,7 +614,7 @@ static int check_available(Neighbour *n, const VVCLocalContext *lc, const int ch
if (!n->checked) {
n->checked = 1;
n->available = !sps->r->sps_entropy_coding_sync_enabled_flag || ((n->x >> sps->ctb_log2_size_y) <= (cu->x0 >> sps->ctb_log2_size_y));
- n->available &= cu->pred_mode == pred_flag_to_mode(TAB_MVF(n->x, n->y).pred_flag);
+ n->available &= is_available(fc, n->x, n->y) && cu->pred_mode == pred_flag_to_mode(TAB_MVF(n->x, n->y).pred_flag);
if (check_mer)
n->available &= !is_same_mer(fc, n->x, n->y, cu->x0, cu->y0);
}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 07/11] avcodec/vvcdec: check_available, use && instead of &= for shortcut evaluation
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
` (4 preceding siblings ...)
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 06/11] avcodec/vvcdec: do not zero frame mvf table Nuo Mi
@ 2024-07-28 3:18 ` Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 08/11] avcodec/vvcdec: do not zero frame cpm table Nuo Mi
` (3 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
---
libavcodec/vvc/mvs.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libavcodec/vvc/mvs.c b/libavcodec/vvc/mvs.c
index fe047cfd5a..e3f18f1861 100644
--- a/libavcodec/vvc/mvs.c
+++ b/libavcodec/vvc/mvs.c
@@ -614,9 +614,9 @@ static int check_available(Neighbour *n, const VVCLocalContext *lc, const int ch
if (!n->checked) {
n->checked = 1;
n->available = !sps->r->sps_entropy_coding_sync_enabled_flag || ((n->x >> sps->ctb_log2_size_y) <= (cu->x0 >> sps->ctb_log2_size_y));
- n->available &= is_available(fc, n->x, n->y) && cu->pred_mode == pred_flag_to_mode(TAB_MVF(n->x, n->y).pred_flag);
+ n->available = n->available && is_available(fc, n->x, n->y) && cu->pred_mode == pred_flag_to_mode(TAB_MVF(n->x, n->y).pred_flag);
if (check_mer)
- n->available &= !is_same_mer(fc, n->x, n->y, cu->x0, cu->y0);
+ n->available = n->available && !is_same_mer(fc, n->x, n->y, cu->x0, cu->y0);
}
return n->available;
}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 08/11] avcodec/vvcdec: do not zero frame cpm table
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
` (5 preceding siblings ...)
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 07/11] avcodec/vvcdec: check_available, use && instead of &= for shortcut evaluation Nuo Mi
@ 2024-07-28 3:18 ` Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 09/11] avcodec/vvcdec: do not zero frame msf mmi table Nuo Mi
` (2 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
---
libavcodec/vvc/ctu.c | 15 ++++++++-------
libavcodec/vvc/dec.c | 5 ++---
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/libavcodec/vvc/ctu.c b/libavcodec/vvc/ctu.c
index d39dd579ae..579337759f 100644
--- a/libavcodec/vvc/ctu.c
+++ b/libavcodec/vvc/ctu.c
@@ -1080,10 +1080,15 @@ static PredMode pred_mode_decode(VVCLocalContext *lc,
}
if (pred_mode_ibc_flag)
pred_mode = MODE_IBC;
- return pred_mode;
} else {
- return MODE_INTRA;
+ pred_mode = MODE_INTRA;
}
+
+ set_cb_tab(lc, fc->tab.cpm[cu->ch_type], pred_mode);
+ if (tree_type == SINGLE_TREE)
+ set_cb_tab(lc, fc->tab.cpm[CHROMA], pred_mode);
+
+ return pred_mode;
}
static void sbt_info(VVCLocalContext *lc, const VVCSPS *sps)
@@ -1232,12 +1237,8 @@ static void set_cu_tabs(const VVCLocalContext *lc, const CodingUnit *cu)
const VVCFrameContext *fc = lc->fc;
const TransformUnit *tu = cu->tus.head;
- if (cu->tree_type != DUAL_TREE_CHROMA) {
- set_cb_tab(lc, fc->tab.cpm[LUMA], cu->pred_mode);
+ if (cu->tree_type != DUAL_TREE_CHROMA)
set_cb_tab(lc, fc->tab.skip, cu->skip_flag);
- }
- if (fc->ps.sps->r->sps_chroma_format_idc && cu->tree_type != DUAL_TREE_LUMA)
- set_cb_tab(lc, fc->tab.cpm[CHROMA], cu->pred_mode);
while (tu) {
for (int j = 0; j < tu->nb_tbs; j++) {
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index be23f2bd54..e078f9387b 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -121,10 +121,8 @@ static void min_cb_tl_init(TabList *l, VVCFrameContext *fc)
TL_ADD(imf, pic_size_in_min_cb);
TL_ADD(imm, pic_size_in_min_cb);
- for (int i = LUMA; i <= CHROMA; i++) {
+ for (int i = LUMA; i <= CHROMA; i++)
TL_ADD(cb_width[i], pic_size_in_min_cb); //is_a0_available requires this
- TL_ADD(cpm[i], pic_size_in_min_cb);
- };
}
static void min_cb_nz_tl_init(TabList *l, VVCFrameContext *fc)
@@ -145,6 +143,7 @@ static void min_cb_nz_tl_init(TabList *l, VVCFrameContext *fc)
TL_ADD(cb_pos_y[i], pic_size_in_min_cb);
TL_ADD(cb_height[i], pic_size_in_min_cb);
TL_ADD(cp_mv[i], pic_size_in_min_cb * MAX_CONTROL_POINTS);
+ TL_ADD(cpm[i], pic_size_in_min_cb);
}
}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 09/11] avcodec/vvcdec: do not zero frame msf mmi table
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
` (6 preceding siblings ...)
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 08/11] avcodec/vvcdec: do not zero frame cpm table Nuo Mi
@ 2024-07-28 3:18 ` Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 10/11] avcodec/vvcdec: do not zero frame qp table Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 11/11] avcodec/vvcdec: move frame tab memset from the main thread to worker threads Nuo Mi
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
---
libavcodec/vvc/ctu.c | 8 +++++---
libavcodec/vvc/dec.c | 4 ++--
libavcodec/vvc/mvs.c | 1 -
3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/libavcodec/vvc/ctu.c b/libavcodec/vvc/ctu.c
index 579337759f..06b57215a4 100644
--- a/libavcodec/vvc/ctu.c
+++ b/libavcodec/vvc/ctu.c
@@ -1234,9 +1234,12 @@ static CodingUnit* add_cu(VVCLocalContext *lc, const int x0, const int y0,
static void set_cu_tabs(const VVCLocalContext *lc, const CodingUnit *cu)
{
- const VVCFrameContext *fc = lc->fc;
- const TransformUnit *tu = cu->tus.head;
+ const VVCFrameContext *fc = lc->fc;
+ const PredictionUnit *pu = &cu->pu;
+ const TransformUnit *tu = cu->tus.head;
+ set_cb_tab(lc, fc->tab.mmi, pu->mi.motion_model_idc);
+ set_cb_tab(lc, fc->tab.msf, pu->merge_subblock_flag);
if (cu->tree_type != DUAL_TREE_CHROMA)
set_cb_tab(lc, fc->tab.skip, cu->skip_flag);
@@ -1325,7 +1328,6 @@ static void merge_data_subblock(VVCLocalContext *lc)
PredictionUnit *pu = &cu->pu;
int merge_subblock_idx = 0;
- set_cb_tab(lc, fc->tab.msf, pu->merge_subblock_flag);
if (ph->max_num_subblock_merge_cand > 1) {
merge_subblock_idx = ff_vvc_merge_subblock_idx(lc, ph->max_num_subblock_merge_cand);
}
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index e078f9387b..c688f7d70d 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -155,9 +155,7 @@ static void min_pu_tl_init(TabList *l, VVCFrameContext *fc)
tl_init(l, 1, changed);
- TL_ADD(msf, pic_size_in_min_pu);
TL_ADD(iaf, pic_size_in_min_pu);
- TL_ADD(mmi, pic_size_in_min_pu);
}
static void min_pu_nz_tl_init(TabList *l, VVCFrameContext *fc)
@@ -168,6 +166,8 @@ static void min_pu_nz_tl_init(TabList *l, VVCFrameContext *fc)
tl_init(l, 0, changed);
+ TL_ADD(msf, pic_size_in_min_pu);
+ TL_ADD(mmi, pic_size_in_min_pu);
TL_ADD(mvf, pic_size_in_min_pu);
}
diff --git a/libavcodec/vvc/mvs.c b/libavcodec/vvc/mvs.c
index e3f18f1861..86ad310035 100644
--- a/libavcodec/vvc/mvs.c
+++ b/libavcodec/vvc/mvs.c
@@ -399,7 +399,6 @@ static void store_cp_mv(const VVCLocalContext *lc, const MotionInfo *mi, const i
const int offset = (y_cb * min_cb_width + x_cb) * MAX_CONTROL_POINTS;
memcpy(&fc->tab.cp_mv[lx][offset], mi->mv[lx], sizeof(Mv) * num_cp_mv);
- SAMPLE_CTB(fc->tab.mmi, x_cb, y_cb) = mi->motion_model_idc;
}
}
}
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 10/11] avcodec/vvcdec: do not zero frame qp table
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
` (7 preceding siblings ...)
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 09/11] avcodec/vvcdec: do not zero frame msf mmi table Nuo Mi
@ 2024-07-28 3:18 ` Nuo Mi
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 11/11] avcodec/vvcdec: move frame tab memset from the main thread to worker threads Nuo Mi
9 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
For luma, qp can only change at the CU level, so the qp tab size is related to the CU.
For chroma, considering the joint CbCr, the QP tab size is related to the TU.
---
libavcodec/vvc/dec.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index c688f7d70d..575bcfa33d 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -145,6 +145,8 @@ static void min_cb_nz_tl_init(TabList *l, VVCFrameContext *fc)
TL_ADD(cp_mv[i], pic_size_in_min_cb * MAX_CONTROL_POINTS);
TL_ADD(cpm[i], pic_size_in_min_cb);
}
+ // For luma, qp can only change at the CU level, so the qp tab size is related to the CU.
+ TL_ADD(qp[LUMA], pic_size_in_min_cb);
}
static void min_pu_tl_init(TabList *l, VVCFrameContext *fc)
@@ -186,7 +188,6 @@ static void min_tu_tl_init(TabList *l, VVCFrameContext *fc)
for (int i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++) {
TL_ADD(tu_coded_flag[i], pic_size_in_min_tu);
- TL_ADD(qp[i], pic_size_in_min_tu);
for (int vertical = 0; vertical < 2; vertical++)
TL_ADD(bs[vertical][i], pic_size_in_min_tu);
@@ -212,6 +213,10 @@ static void min_tu_nz_tl_init(TabList *l, VVCFrameContext *fc)
TL_ADD(max_len_p[vertical], pic_size_in_min_tu);
TL_ADD(max_len_q[vertical], pic_size_in_min_tu);
}
+
+ // For chroma, considering the joint CbCr, the QP tab size is related to the TU.
+ for (int i = CB; i < VVC_MAX_SAMPLE_ARRAYS; i++)
+ TL_ADD(qp[i], pic_size_in_min_tu);
}
static void pixel_buffer_nz_tl_init(TabList *l, VVCFrameContext *fc)
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 11/11] avcodec/vvcdec: move frame tab memset from the main thread to worker threads
[not found] <20240728031807.462810-1-nuomi2021@gmail.com>
` (8 preceding siblings ...)
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 10/11] avcodec/vvcdec: do not zero frame qp table Nuo Mi
@ 2024-07-28 3:18 ` Nuo Mi
2024-08-11 14:01 ` Nuo Mi
9 siblings, 1 reply; 12+ messages in thread
From: Nuo Mi @ 2024-07-28 3:18 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Nuo Mi
memset tables in the main thread can become a bottleneck for the decoder.
For example, if it takes 1% of the processing time for one core, the maximum achievable FPS will be 100.
Move the memeset to worker threads will fix the issue.
---
libavcodec/vvc/dec.c | 13 ++++-
libavcodec/vvc/thread.c | 122 ++++++++++++++++++++++++----------------
libavcodec/vvc/thread.h | 1 +
3 files changed, 85 insertions(+), 51 deletions(-)
diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
index 575bcfa33d..d34713296d 100644
--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@@ -82,7 +82,13 @@ static int tl_create(TabList *l)
if (!*t->tab)
return AVERROR(ENOMEM);
}
- } else if (l->zero) {
+ }
+ return 0;
+}
+
+static int tl_zero(TabList *l)
+{
+ if (l->zero) {
for (int i = 0; i < l->nb_tabs; i++) {
Tab *t = l->tabs + i;
memset(*t->tab, 0, t->size);
@@ -404,6 +410,11 @@ static int pic_arrays_init(VVCContext *s, VVCFrameContext *fc)
return 0;
}
+int ff_vvc_per_frame_init(VVCFrameContext *fc)
+{
+ return frame_context_for_each_tl(fc, tl_zero);
+}
+
static int min_positive(const int idx, const int diff, const int min_diff)
{
return diff > 0 && (idx < 0 || diff < min_diff);
diff --git a/libavcodec/vvc/thread.c b/libavcodec/vvc/thread.c
index 28065d726f..74f8e4e9d0 100644
--- a/libavcodec/vvc/thread.c
+++ b/libavcodec/vvc/thread.c
@@ -40,6 +40,7 @@ typedef struct ProgressListener {
} ProgressListener;
typedef enum VVCTaskStage {
+ VVC_TASK_STAGE_INIT, // for CTU(0, 0) only
VVC_TASK_STAGE_PARSE,
VVC_TASK_STAGE_INTER,
VVC_TASK_STAGE_RECON,
@@ -175,10 +176,14 @@ static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uin
uint8_t target = 0;
VVCFrameContext *fc = t->fc;
+ if (stage == VVC_TASK_STAGE_INIT)
+ return 1;
+
if (stage == VVC_TASK_STAGE_PARSE) {
- const H266RawSPS *rsps = fc->ps.sps->r;
- const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
- target = 2 + wpp - 1; //left parse + colocation + wpp - no previous stage
+ const H266RawSPS *rsps = fc->ps.sps->r;
+ const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
+ const int no_prev_stage = t->rs > 0;
+ target = 2 + wpp - no_prev_stage; //left parse + colocation + wpp - no_prev_stage
} else if (stage == VVC_TASK_STAGE_INTER) {
target = atomic_load(&t->target_inter_score);
} else {
@@ -399,6 +404,55 @@ static int task_priority_higher(const AVTask *_a, const AVTask *_b)
return a->ry < b->ry;
}
+static void check_colocation(VVCContext *s, VVCTask *t)
+{
+ const VVCFrameContext *fc = t->fc;
+
+ if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
+ VVCFrame *col = fc->ref->collocated_ref;
+ const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
+ if (col && first_col) {
+ //we depend on bottom and right boundary, do not - 1 for y
+ const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
+ add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
+ return;
+ }
+ }
+ frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
+}
+
+static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
+{
+ const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
+ VVCTask *t = ft->tasks + rs;
+
+ frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
+}
+
+static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
+{
+ VVCFrameContext *fc = lc->fc;
+ VVCFrameThread *ft = fc->ft;
+ const int ret = ff_vvc_per_frame_init(fc);
+
+ if (ret < 0)
+ return ret;
+
+ for (int i = 0; i < fc->nb_slices; i++) {
+ SliceContext *sc = fc->slices[i];
+ for (int j = 0; j < sc->nb_eps; j++) {
+ EntryPoint *ep = sc->eps + j;
+ for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
+ const int rs = sc->sh.ctb_addr_in_curr_slice[k];
+ VVCTask *t = ft->tasks + rs;
+ check_colocation(s, t);
+ }
+ submit_entry_point(s, ft, sc, ep);
+ }
+ }
+ return 0;
+}
+
static void report_frame_progress(VVCFrameContext *fc,
const int ry, const VVCProgress idx)
{
@@ -547,6 +601,7 @@ static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
#define VVC_THREAD_DEBUG
#ifdef VVC_THREAD_DEBUG
const static char* task_name[] = {
+ "INIT",
"P",
"I",
"R",
@@ -567,6 +622,7 @@ static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
VVCFrameThread *ft = fc->ft;
const VVCTaskStage stage = t->stage;
static const run_func run[] = {
+ run_init,
run_parse,
run_inter,
run_recon,
@@ -726,7 +782,7 @@ int ff_vvc_frame_thread_init(VVCFrameContext *fc)
for (int rs = 0; rs < ft->ctu_count; rs++) {
VVCTask *t = ft->tasks + rs;
- task_init(t, VVC_TASK_STAGE_PARSE, fc, rs % ft->ctu_width, rs / ft->ctu_width);
+ task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width);
}
memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
@@ -745,59 +801,25 @@ fail:
return AVERROR(ENOMEM);
}
-static void check_colocation(VVCContext *s, VVCTask *t)
-{
- const VVCFrameContext *fc = t->fc;
-
- if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
- VVCFrame *col = fc->ref->collocated_ref;
- const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
- if (col && first_col) {
- //we depend on bottom and right boundary, do not - 1 for y
- const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
- add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
- return;
- }
- }
- frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
-}
-
-static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
-{
- const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
- VVCTask *t = ft->tasks + rs;
-
- frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
-}
-
int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
{
VVCFrameThread *ft = fc->ft;
- // We'll handle this in two passes:
- // Pass 0 to initialize tasks with parser, this will help detect bit stream error
- // Pass 1 to shedule location check and submit the entry point
- for (int pass = 0; pass < 2; pass++) {
- for (int i = 0; i < fc->nb_slices; i++) {
- SliceContext *sc = fc->slices[i];
- for (int j = 0; j < sc->nb_eps; j++) {
- EntryPoint *ep = sc->eps + j;
- for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
- const int rs = sc->sh.ctb_addr_in_curr_slice[k];
- VVCTask *t = ft->tasks + rs;
- if (pass) {
- check_colocation(s, t);
- } else {
- const int ret = task_init_parse(t, sc, ep, k);
- if (ret < 0)
- return ret;
- }
- }
- if (pass)
- submit_entry_point(s, ft, sc, ep);
+ for (int i = 0; i < fc->nb_slices; i++) {
+ SliceContext *sc = fc->slices[i];
+ for (int j = 0; j < sc->nb_eps; j++) {
+ EntryPoint *ep = sc->eps + j;
+ for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
+ const int rs = sc->sh.ctb_addr_in_curr_slice[k];
+ VVCTask *t = ft->tasks + rs;
+ const int ret = task_init_parse(t, sc, ep, k);
+ if (ret < 0)
+ return ret;
}
}
}
+ frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
+
return 0;
}
diff --git a/libavcodec/vvc/thread.h b/libavcodec/vvc/thread.h
index 8ac59b2ecf..7b15dbee59 100644
--- a/libavcodec/vvc/thread.h
+++ b/libavcodec/vvc/thread.h
@@ -32,5 +32,6 @@ int ff_vvc_frame_thread_init(VVCFrameContext *fc);
void ff_vvc_frame_thread_free(VVCFrameContext *fc);
int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc);
int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc);
+int ff_vvc_per_frame_init(VVCFrameContext *fc);
#endif // AVCODEC_VVC_THREAD_H
--
2.34.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 11/11] avcodec/vvcdec: move frame tab memset from the main thread to worker threads
2024-07-28 3:18 ` [FFmpeg-devel] [PATCH 11/11] avcodec/vvcdec: move frame tab memset from the main thread to worker threads Nuo Mi
@ 2024-08-11 14:01 ` Nuo Mi
2024-08-15 12:45 ` Nuo Mi
0 siblings, 1 reply; 12+ messages in thread
From: Nuo Mi @ 2024-08-11 14:01 UTC (permalink / raw)
To: ffmpeg-devel
On Sun, Jul 28, 2024 at 11:19 AM Nuo Mi <nuomi2021@gmail.com> wrote:
> memset tables in the main thread can become a bottleneck for the decoder.
> For example, if it takes 1% of the processing time for one core, the
> maximum achievable FPS will be 100.
> Move the memeset to worker threads will fix the issue.
>
will apply next week if there are no objections
> ---
> libavcodec/vvc/dec.c | 13 ++++-
> libavcodec/vvc/thread.c | 122 ++++++++++++++++++++++++----------------
> libavcodec/vvc/thread.h | 1 +
> 3 files changed, 85 insertions(+), 51 deletions(-)
>
> diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
> index 575bcfa33d..d34713296d 100644
> --- a/libavcodec/vvc/dec.c
> +++ b/libavcodec/vvc/dec.c
> @@ -82,7 +82,13 @@ static int tl_create(TabList *l)
> if (!*t->tab)
> return AVERROR(ENOMEM);
> }
> - } else if (l->zero) {
> + }
> + return 0;
> +}
> +
> +static int tl_zero(TabList *l)
> +{
> + if (l->zero) {
> for (int i = 0; i < l->nb_tabs; i++) {
> Tab *t = l->tabs + i;
> memset(*t->tab, 0, t->size);
> @@ -404,6 +410,11 @@ static int pic_arrays_init(VVCContext *s,
> VVCFrameContext *fc)
> return 0;
> }
>
> +int ff_vvc_per_frame_init(VVCFrameContext *fc)
> +{
> + return frame_context_for_each_tl(fc, tl_zero);
> +}
> +
> static int min_positive(const int idx, const int diff, const int min_diff)
> {
> return diff > 0 && (idx < 0 || diff < min_diff);
> diff --git a/libavcodec/vvc/thread.c b/libavcodec/vvc/thread.c
> index 28065d726f..74f8e4e9d0 100644
> --- a/libavcodec/vvc/thread.c
> +++ b/libavcodec/vvc/thread.c
> @@ -40,6 +40,7 @@ typedef struct ProgressListener {
> } ProgressListener;
>
> typedef enum VVCTaskStage {
> + VVC_TASK_STAGE_INIT, // for CTU(0, 0) only
> VVC_TASK_STAGE_PARSE,
> VVC_TASK_STAGE_INTER,
> VVC_TASK_STAGE_RECON,
> @@ -175,10 +176,14 @@ static int task_has_target_score(VVCTask *t, const
> VVCTaskStage stage, const uin
> uint8_t target = 0;
> VVCFrameContext *fc = t->fc;
>
> + if (stage == VVC_TASK_STAGE_INIT)
> + return 1;
> +
> if (stage == VVC_TASK_STAGE_PARSE) {
> - const H266RawSPS *rsps = fc->ps.sps->r;
> - const int wpp = rsps->sps_entropy_coding_sync_enabled_flag &&
> !is_first_row(fc, t->rx, t->ry);
> - target = 2 + wpp - 1; //left parse +
> colocation + wpp - no previous stage
> + const H266RawSPS *rsps = fc->ps.sps->r;
> + const int wpp =
> rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx,
> t->ry);
> + const int no_prev_stage = t->rs > 0;
> + target = 2 + wpp - no_prev_stage;
> //left parse + colocation + wpp - no_prev_stage
> } else if (stage == VVC_TASK_STAGE_INTER) {
> target = atomic_load(&t->target_inter_score);
> } else {
> @@ -399,6 +404,55 @@ static int task_priority_higher(const AVTask *_a,
> const AVTask *_b)
> return a->ry < b->ry;
> }
>
> +static void check_colocation(VVCContext *s, VVCTask *t)
> +{
> + const VVCFrameContext *fc = t->fc;
> +
> + if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag ||
> fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
> + VVCFrame *col = fc->ref->collocated_ref;
> + const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
> + if (col && first_col) {
> + //we depend on bottom and right boundary, do not - 1 for y
> + const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
> + add_progress_listener(col, &t->col_listener, t, s,
> VVC_PROGRESS_MV, y);
> + return;
> + }
> + }
> + frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
> +}
> +
> +static void submit_entry_point(VVCContext *s, VVCFrameThread *ft,
> SliceContext *sc, EntryPoint *ep)
> +{
> + const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
> + VVCTask *t = ft->tasks + rs;
> +
> + frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
> +}
> +
> +static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
> +{
> + VVCFrameContext *fc = lc->fc;
> + VVCFrameThread *ft = fc->ft;
> + const int ret = ff_vvc_per_frame_init(fc);
> +
> + if (ret < 0)
> + return ret;
> +
> + for (int i = 0; i < fc->nb_slices; i++) {
> + SliceContext *sc = fc->slices[i];
> + for (int j = 0; j < sc->nb_eps; j++) {
> + EntryPoint *ep = sc->eps + j;
> + for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
> + const int rs = sc->sh.ctb_addr_in_curr_slice[k];
> + VVCTask *t = ft->tasks + rs;
> + check_colocation(s, t);
> + }
> + submit_entry_point(s, ft, sc, ep);
> + }
> + }
> + return 0;
> +}
> +
> static void report_frame_progress(VVCFrameContext *fc,
> const int ry, const VVCProgress idx)
> {
> @@ -547,6 +601,7 @@ static int run_alf(VVCContext *s, VVCLocalContext *lc,
> VVCTask *t)
> #define VVC_THREAD_DEBUG
> #ifdef VVC_THREAD_DEBUG
> const static char* task_name[] = {
> + "INIT",
> "P",
> "I",
> "R",
> @@ -567,6 +622,7 @@ static void task_run_stage(VVCTask *t, VVCContext *s,
> VVCLocalContext *lc)
> VVCFrameThread *ft = fc->ft;
> const VVCTaskStage stage = t->stage;
> static const run_func run[] = {
> + run_init,
> run_parse,
> run_inter,
> run_recon,
> @@ -726,7 +782,7 @@ int ff_vvc_frame_thread_init(VVCFrameContext *fc)
>
> for (int rs = 0; rs < ft->ctu_count; rs++) {
> VVCTask *t = ft->tasks + rs;
> - task_init(t, VVC_TASK_STAGE_PARSE, fc, rs % ft->ctu_width, rs /
> ft->ctu_width);
> + task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc,
> rs % ft->ctu_width, rs / ft->ctu_width);
> }
>
> memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
> @@ -745,59 +801,25 @@ fail:
> return AVERROR(ENOMEM);
> }
>
> -static void check_colocation(VVCContext *s, VVCTask *t)
> -{
> - const VVCFrameContext *fc = t->fc;
> -
> - if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag ||
> fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
> - VVCFrame *col = fc->ref->collocated_ref;
> - const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
> - if (col && first_col) {
> - //we depend on bottom and right boundary, do not - 1 for y
> - const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
> - add_progress_listener(col, &t->col_listener, t, s,
> VVC_PROGRESS_MV, y);
> - return;
> - }
> - }
> - frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
> -}
> -
> -static void submit_entry_point(VVCContext *s, VVCFrameThread *ft,
> SliceContext *sc, EntryPoint *ep)
> -{
> - const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
> - VVCTask *t = ft->tasks + rs;
> -
> - frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
> -}
> -
> int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
> {
> VVCFrameThread *ft = fc->ft;
>
> - // We'll handle this in two passes:
> - // Pass 0 to initialize tasks with parser, this will help detect bit
> stream error
> - // Pass 1 to shedule location check and submit the entry point
> - for (int pass = 0; pass < 2; pass++) {
> - for (int i = 0; i < fc->nb_slices; i++) {
> - SliceContext *sc = fc->slices[i];
> - for (int j = 0; j < sc->nb_eps; j++) {
> - EntryPoint *ep = sc->eps + j;
> - for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
> - const int rs = sc->sh.ctb_addr_in_curr_slice[k];
> - VVCTask *t = ft->tasks + rs;
> - if (pass) {
> - check_colocation(s, t);
> - } else {
> - const int ret = task_init_parse(t, sc, ep, k);
> - if (ret < 0)
> - return ret;
> - }
> - }
> - if (pass)
> - submit_entry_point(s, ft, sc, ep);
> + for (int i = 0; i < fc->nb_slices; i++) {
> + SliceContext *sc = fc->slices[i];
> + for (int j = 0; j < sc->nb_eps; j++) {
> + EntryPoint *ep = sc->eps + j;
> + for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
> + const int rs = sc->sh.ctb_addr_in_curr_slice[k];
> + VVCTask *t = ft->tasks + rs;
> + const int ret = task_init_parse(t, sc, ep, k);
> + if (ret < 0)
> + return ret;
> }
> }
> }
> + frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
> +
> return 0;
> }
>
> diff --git a/libavcodec/vvc/thread.h b/libavcodec/vvc/thread.h
> index 8ac59b2ecf..7b15dbee59 100644
> --- a/libavcodec/vvc/thread.h
> +++ b/libavcodec/vvc/thread.h
> @@ -32,5 +32,6 @@ int ff_vvc_frame_thread_init(VVCFrameContext *fc);
> void ff_vvc_frame_thread_free(VVCFrameContext *fc);
> int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc);
> int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc);
> +int ff_vvc_per_frame_init(VVCFrameContext *fc);
>
> #endif // AVCODEC_VVC_THREAD_H
> --
> 2.34.1
>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 11/11] avcodec/vvcdec: move frame tab memset from the main thread to worker threads
2024-08-11 14:01 ` Nuo Mi
@ 2024-08-15 12:45 ` Nuo Mi
0 siblings, 0 replies; 12+ messages in thread
From: Nuo Mi @ 2024-08-15 12:45 UTC (permalink / raw)
To: ffmpeg-devel
On Sun, Aug 11, 2024 at 10:01 PM Nuo Mi <nuomi2021@gmail.com> wrote:
>
>
> On Sun, Jul 28, 2024 at 11:19 AM Nuo Mi <nuomi2021@gmail.com> wrote:
>
>> memset tables in the main thread can become a bottleneck for the decoder.
>> For example, if it takes 1% of the processing time for one core, the
>> maximum achievable FPS will be 100.
>> Move the memeset to worker threads will fix the issue.
>>
> will apply next week if there are no objections
>
Done
> ---
>> libavcodec/vvc/dec.c | 13 ++++-
>> libavcodec/vvc/thread.c | 122 ++++++++++++++++++++++++----------------
>> libavcodec/vvc/thread.h | 1 +
>> 3 files changed, 85 insertions(+), 51 deletions(-)
>>
>> diff --git a/libavcodec/vvc/dec.c b/libavcodec/vvc/dec.c
>> index 575bcfa33d..d34713296d 100644
>> --- a/libavcodec/vvc/dec.c
>> +++ b/libavcodec/vvc/dec.c
>> @@ -82,7 +82,13 @@ static int tl_create(TabList *l)
>> if (!*t->tab)
>> return AVERROR(ENOMEM);
>> }
>> - } else if (l->zero) {
>> + }
>> + return 0;
>> +}
>> +
>> +static int tl_zero(TabList *l)
>> +{
>> + if (l->zero) {
>> for (int i = 0; i < l->nb_tabs; i++) {
>> Tab *t = l->tabs + i;
>> memset(*t->tab, 0, t->size);
>> @@ -404,6 +410,11 @@ static int pic_arrays_init(VVCContext *s,
>> VVCFrameContext *fc)
>> return 0;
>> }
>>
>> +int ff_vvc_per_frame_init(VVCFrameContext *fc)
>> +{
>> + return frame_context_for_each_tl(fc, tl_zero);
>> +}
>> +
>> static int min_positive(const int idx, const int diff, const int
>> min_diff)
>> {
>> return diff > 0 && (idx < 0 || diff < min_diff);
>> diff --git a/libavcodec/vvc/thread.c b/libavcodec/vvc/thread.c
>> index 28065d726f..74f8e4e9d0 100644
>> --- a/libavcodec/vvc/thread.c
>> +++ b/libavcodec/vvc/thread.c
>> @@ -40,6 +40,7 @@ typedef struct ProgressListener {
>> } ProgressListener;
>>
>> typedef enum VVCTaskStage {
>> + VVC_TASK_STAGE_INIT, // for CTU(0, 0) only
>> VVC_TASK_STAGE_PARSE,
>> VVC_TASK_STAGE_INTER,
>> VVC_TASK_STAGE_RECON,
>> @@ -175,10 +176,14 @@ static int task_has_target_score(VVCTask *t, const
>> VVCTaskStage stage, const uin
>> uint8_t target = 0;
>> VVCFrameContext *fc = t->fc;
>>
>> + if (stage == VVC_TASK_STAGE_INIT)
>> + return 1;
>> +
>> if (stage == VVC_TASK_STAGE_PARSE) {
>> - const H266RawSPS *rsps = fc->ps.sps->r;
>> - const int wpp = rsps->sps_entropy_coding_sync_enabled_flag &&
>> !is_first_row(fc, t->rx, t->ry);
>> - target = 2 + wpp - 1; //left parse +
>> colocation + wpp - no previous stage
>> + const H266RawSPS *rsps = fc->ps.sps->r;
>> + const int wpp =
>> rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx,
>> t->ry);
>> + const int no_prev_stage = t->rs > 0;
>> + target = 2 + wpp - no_prev_stage;
>> //left parse + colocation + wpp - no_prev_stage
>> } else if (stage == VVC_TASK_STAGE_INTER) {
>> target = atomic_load(&t->target_inter_score);
>> } else {
>> @@ -399,6 +404,55 @@ static int task_priority_higher(const AVTask *_a,
>> const AVTask *_b)
>> return a->ry < b->ry;
>> }
>>
>> +static void check_colocation(VVCContext *s, VVCTask *t)
>> +{
>> + const VVCFrameContext *fc = t->fc;
>> +
>> + if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag ||
>> fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
>> + VVCFrame *col = fc->ref->collocated_ref;
>> + const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
>> + if (col && first_col) {
>> + //we depend on bottom and right boundary, do not - 1 for y
>> + const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
>> + add_progress_listener(col, &t->col_listener, t, s,
>> VVC_PROGRESS_MV, y);
>> + return;
>> + }
>> + }
>> + frame_thread_add_score(s, fc->ft, t->rx, t->ry,
>> VVC_TASK_STAGE_PARSE);
>> +}
>> +
>> +static void submit_entry_point(VVCContext *s, VVCFrameThread *ft,
>> SliceContext *sc, EntryPoint *ep)
>> +{
>> + const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
>> + VVCTask *t = ft->tasks + rs;
>> +
>> + frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
>> +}
>> +
>> +static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
>> +{
>> + VVCFrameContext *fc = lc->fc;
>> + VVCFrameThread *ft = fc->ft;
>> + const int ret = ff_vvc_per_frame_init(fc);
>> +
>> + if (ret < 0)
>> + return ret;
>> +
>> + for (int i = 0; i < fc->nb_slices; i++) {
>> + SliceContext *sc = fc->slices[i];
>> + for (int j = 0; j < sc->nb_eps; j++) {
>> + EntryPoint *ep = sc->eps + j;
>> + for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
>> + const int rs = sc->sh.ctb_addr_in_curr_slice[k];
>> + VVCTask *t = ft->tasks + rs;
>> + check_colocation(s, t);
>> + }
>> + submit_entry_point(s, ft, sc, ep);
>> + }
>> + }
>> + return 0;
>> +}
>> +
>> static void report_frame_progress(VVCFrameContext *fc,
>> const int ry, const VVCProgress idx)
>> {
>> @@ -547,6 +601,7 @@ static int run_alf(VVCContext *s, VVCLocalContext
>> *lc, VVCTask *t)
>> #define VVC_THREAD_DEBUG
>> #ifdef VVC_THREAD_DEBUG
>> const static char* task_name[] = {
>> + "INIT",
>> "P",
>> "I",
>> "R",
>> @@ -567,6 +622,7 @@ static void task_run_stage(VVCTask *t, VVCContext *s,
>> VVCLocalContext *lc)
>> VVCFrameThread *ft = fc->ft;
>> const VVCTaskStage stage = t->stage;
>> static const run_func run[] = {
>> + run_init,
>> run_parse,
>> run_inter,
>> run_recon,
>> @@ -726,7 +782,7 @@ int ff_vvc_frame_thread_init(VVCFrameContext *fc)
>>
>> for (int rs = 0; rs < ft->ctu_count; rs++) {
>> VVCTask *t = ft->tasks + rs;
>> - task_init(t, VVC_TASK_STAGE_PARSE, fc, rs % ft->ctu_width, rs /
>> ft->ctu_width);
>> + task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT,
>> fc, rs % ft->ctu_width, rs / ft->ctu_width);
>> }
>>
>> memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
>> @@ -745,59 +801,25 @@ fail:
>> return AVERROR(ENOMEM);
>> }
>>
>> -static void check_colocation(VVCContext *s, VVCTask *t)
>> -{
>> - const VVCFrameContext *fc = t->fc;
>> -
>> - if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag ||
>> fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
>> - VVCFrame *col = fc->ref->collocated_ref;
>> - const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
>> - if (col && first_col) {
>> - //we depend on bottom and right boundary, do not - 1 for y
>> - const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
>> - add_progress_listener(col, &t->col_listener, t, s,
>> VVC_PROGRESS_MV, y);
>> - return;
>> - }
>> - }
>> - frame_thread_add_score(s, fc->ft, t->rx, t->ry,
>> VVC_TASK_STAGE_PARSE);
>> -}
>> -
>> -static void submit_entry_point(VVCContext *s, VVCFrameThread *ft,
>> SliceContext *sc, EntryPoint *ep)
>> -{
>> - const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
>> - VVCTask *t = ft->tasks + rs;
>> -
>> - frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
>> -}
>> -
>> int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
>> {
>> VVCFrameThread *ft = fc->ft;
>>
>> - // We'll handle this in two passes:
>> - // Pass 0 to initialize tasks with parser, this will help detect bit
>> stream error
>> - // Pass 1 to shedule location check and submit the entry point
>> - for (int pass = 0; pass < 2; pass++) {
>> - for (int i = 0; i < fc->nb_slices; i++) {
>> - SliceContext *sc = fc->slices[i];
>> - for (int j = 0; j < sc->nb_eps; j++) {
>> - EntryPoint *ep = sc->eps + j;
>> - for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
>> - const int rs = sc->sh.ctb_addr_in_curr_slice[k];
>> - VVCTask *t = ft->tasks + rs;
>> - if (pass) {
>> - check_colocation(s, t);
>> - } else {
>> - const int ret = task_init_parse(t, sc, ep, k);
>> - if (ret < 0)
>> - return ret;
>> - }
>> - }
>> - if (pass)
>> - submit_entry_point(s, ft, sc, ep);
>> + for (int i = 0; i < fc->nb_slices; i++) {
>> + SliceContext *sc = fc->slices[i];
>> + for (int j = 0; j < sc->nb_eps; j++) {
>> + EntryPoint *ep = sc->eps + j;
>> + for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
>> + const int rs = sc->sh.ctb_addr_in_curr_slice[k];
>> + VVCTask *t = ft->tasks + rs;
>> + const int ret = task_init_parse(t, sc, ep, k);
>> + if (ret < 0)
>> + return ret;
>> }
>> }
>> }
>> + frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
>> +
>> return 0;
>> }
>>
>> diff --git a/libavcodec/vvc/thread.h b/libavcodec/vvc/thread.h
>> index 8ac59b2ecf..7b15dbee59 100644
>> --- a/libavcodec/vvc/thread.h
>> +++ b/libavcodec/vvc/thread.h
>> @@ -32,5 +32,6 @@ int ff_vvc_frame_thread_init(VVCFrameContext *fc);
>> void ff_vvc_frame_thread_free(VVCFrameContext *fc);
>> int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc);
>> int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc);
>> +int ff_vvc_per_frame_init(VVCFrameContext *fc);
>>
>> #endif // AVCODEC_VVC_THREAD_H
>> --
>> 2.34.1
>>
>>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread