* [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx @ 2024-04-10 13:31 Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 02/10] lavc/hevcdec: track local context count separately from WPP thread count Anton Khirnov ` (8 more replies) 0 siblings, 9 replies; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel It is more consistent with our naming conventions. --- libavcodec/hevcdec.c | 30 +++++++++++++++--------------- libavcodec/hevcdec.h | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index d3b668af00..c70937a756 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -2697,14 +2697,14 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) } for (i = 1; i < s->threads_number; i++) { - if (s->HEVClcList[i]) + if (s->local_ctx[i]) continue; - s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext)); - if (!s->HEVClcList[i]) + s->local_ctx[i] = av_mallocz(sizeof(HEVCLocalContext)); + if (!s->local_ctx[i]) return AVERROR(ENOMEM); - s->HEVClcList[i]->logctx = s->avctx; - s->HEVClcList[i]->parent = s; - s->HEVClcList[i]->common_cabac_state = &s->cabac; + s->local_ctx[i]->logctx = s->avctx; + s->local_ctx[i]->parent = s; + s->local_ctx[i]->common_cabac_state = &s->cabac; } offset = (lc->gb.index >> 3); @@ -2742,8 +2742,8 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) s->data = data; for (i = 1; i < s->threads_number; i++) { - s->HEVClcList[i]->first_qp_group = 1; - s->HEVClcList[i]->qp_y = s->HEVClc->qp_y; + s->local_ctx[i]->first_qp_group = 1; + s->local_ctx[i]->qp_y = s->HEVClc->qp_y; } atomic_store(&s->wpp_err, 0); @@ -2756,7 +2756,7 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) return AVERROR(ENOMEM); if (s->ps.pps->entropy_coding_sync_enabled_flag) - s->avctx->execute2(s->avctx, hls_decode_entry_wpp, s->HEVClcList, ret, s->sh.num_entry_point_offsets + 1); + s->avctx->execute2(s->avctx, hls_decode_entry_wpp, s->local_ctx, ret, s->sh.num_entry_point_offsets + 1); for (i = 0; i <= s->sh.num_entry_point_offsets; i++) res += ret[i]; @@ -3474,13 +3474,13 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) av_freep(&s->sh.offset); av_freep(&s->sh.size); - if (s->HEVClcList) { + if (s->local_ctx) { for (i = 1; i < s->threads_number; i++) { - av_freep(&s->HEVClcList[i]); + av_freep(&s->local_ctx[i]); } } av_freep(&s->HEVClc); - av_freep(&s->HEVClcList); + av_freep(&s->local_ctx); ff_h2645_packet_uninit(&s->pkt); @@ -3497,13 +3497,13 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) s->avctx = avctx; s->HEVClc = av_mallocz(sizeof(HEVCLocalContext)); - s->HEVClcList = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number); - if (!s->HEVClc || !s->HEVClcList) + s->local_ctx = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number); + if (!s->HEVClc || !s->local_ctx) return AVERROR(ENOMEM); s->HEVClc->parent = s; s->HEVClc->logctx = avctx; s->HEVClc->common_cabac_state = &s->cabac; - s->HEVClcList[0] = s->HEVClc; + s->local_ctx[0] = s->HEVClc; s->output_frame = av_frame_alloc(); if (!s->output_frame) diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index e65a6180ca..9e3e6a8cd7 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -441,7 +441,7 @@ typedef struct HEVCContext { const AVClass *c; // needed by private avoptions AVCodecContext *avctx; - HEVCLocalContext **HEVClcList; + HEVCLocalContext **local_ctx; HEVCLocalContext *HEVClc; uint8_t threads_type; -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 02/10] lavc/hevcdec: track local context count separately from WPP thread count 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers Anton Khirnov ` (7 subsequent siblings) 8 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel The latter can be lowered while decoding, which would lead to memleaks. --- libavcodec/hevcdec.c | 7 +++++-- libavcodec/hevcdec.h | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index c70937a756..55f72af972 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -2697,11 +2697,13 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) } for (i = 1; i < s->threads_number; i++) { - if (s->local_ctx[i]) + if (i < s->nb_local_ctx) continue; s->local_ctx[i] = av_mallocz(sizeof(HEVCLocalContext)); if (!s->local_ctx[i]) return AVERROR(ENOMEM); + s->nb_local_ctx++; + s->local_ctx[i]->logctx = s->avctx; s->local_ctx[i]->parent = s; s->local_ctx[i]->common_cabac_state = &s->cabac; @@ -3475,7 +3477,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) av_freep(&s->sh.size); if (s->local_ctx) { - for (i = 1; i < s->threads_number; i++) { + for (i = 1; i < s->nb_local_ctx; i++) { av_freep(&s->local_ctx[i]); } } @@ -3504,6 +3506,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) s->HEVClc->logctx = avctx; s->HEVClc->common_cabac_state = &s->cabac; s->local_ctx[0] = s->HEVClc; + s->nb_local_ctx = 1; s->output_frame = av_frame_alloc(); if (!s->output_frame) diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index 9e3e6a8cd7..a881eb9981 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -442,6 +442,8 @@ typedef struct HEVCContext { AVCodecContext *avctx; HEVCLocalContext **local_ctx; + unsigned nb_local_ctx; + HEVCLocalContext *HEVClc; uint8_t threads_type; -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 02/10] lavc/hevcdec: track local context count separately from WPP thread count Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-17 9:29 ` Andreas Rheinhardt 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 04/10] lavc/hevcdec: drop a useless execute() call with 1 job Anton Khirnov ` (6 subsequent siblings) 8 siblings, 1 reply; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel It is more efficient and easier to manage. --- libavcodec/hevcdec.c | 57 +++++++++++++++++++++++++------------------- libavcodec/hevcdec.h | 2 +- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 55f72af972..47226ef0ab 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -2598,7 +2598,7 @@ static int hls_slice_data(HEVCContext *s) static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *hevc_lclist, int job, int self_id) { - HEVCLocalContext *lc = ((HEVCLocalContext**)hevc_lclist)[self_id]; + HEVCLocalContext *lc = &((HEVCLocalContext*)hevc_lclist)[self_id]; const HEVCContext *const s = lc->parent; int ctb_size = 1 << s->ps.sps->log2_ctb_size; int more_data = 1; @@ -2682,7 +2682,7 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) { const uint8_t *data = nal->data; int length = nal->size; - HEVCLocalContext *lc = s->HEVClc; + HEVCLocalContext *lc; int *ret; int64_t offset; int64_t startheader, cmpt = 0; @@ -2696,19 +2696,31 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) return AVERROR_INVALIDDATA; } - for (i = 1; i < s->threads_number; i++) { - if (i < s->nb_local_ctx) - continue; - s->local_ctx[i] = av_mallocz(sizeof(HEVCLocalContext)); - if (!s->local_ctx[i]) - return AVERROR(ENOMEM); - s->nb_local_ctx++; + if (s->threads_number > s->nb_local_ctx) { + HEVCLocalContext *tmp = av_malloc_array(s->threads_number, sizeof(*s->local_ctx)); - s->local_ctx[i]->logctx = s->avctx; - s->local_ctx[i]->parent = s; - s->local_ctx[i]->common_cabac_state = &s->cabac; + if (!tmp) + return AVERROR(ENOMEM); + + memcpy(tmp, s->local_ctx, sizeof(*s->local_ctx) * s->nb_local_ctx); + av_free(s->local_ctx); + s->local_ctx = tmp; + s->HEVClc = &s->local_ctx[0]; + + for (unsigned i = s->nb_local_ctx; i < s->threads_number; i++) { + tmp = &s->local_ctx[i]; + + memset(tmp, 0, sizeof(*tmp)); + + tmp->logctx = s->avctx; + tmp->parent = s; + tmp->common_cabac_state = &s->cabac; + } + + s->nb_local_ctx = s->threads_number; } + lc = &s->local_ctx[0]; offset = (lc->gb.index >> 3); for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) { @@ -2744,8 +2756,8 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) s->data = data; for (i = 1; i < s->threads_number; i++) { - s->local_ctx[i]->first_qp_group = 1; - s->local_ctx[i]->qp_y = s->HEVClc->qp_y; + s->local_ctx[i].first_qp_group = 1; + s->local_ctx[i].qp_y = s->HEVClc->qp_y; } atomic_store(&s->wpp_err, 0); @@ -3476,12 +3488,6 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) av_freep(&s->sh.offset); av_freep(&s->sh.size); - if (s->local_ctx) { - for (i = 1; i < s->nb_local_ctx; i++) { - av_freep(&s->local_ctx[i]); - } - } - av_freep(&s->HEVClc); av_freep(&s->local_ctx); ff_h2645_packet_uninit(&s->pkt); @@ -3498,15 +3504,16 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) s->avctx = avctx; - s->HEVClc = av_mallocz(sizeof(HEVCLocalContext)); - s->local_ctx = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number); - if (!s->HEVClc || !s->local_ctx) + s->local_ctx = av_mallocz(sizeof(*s->local_ctx)); + if (!s->local_ctx) return AVERROR(ENOMEM); + s->nb_local_ctx = 1; + + s->HEVClc = &s->local_ctx[0]; + s->HEVClc->parent = s; s->HEVClc->logctx = avctx; s->HEVClc->common_cabac_state = &s->cabac; - s->local_ctx[0] = s->HEVClc; - s->nb_local_ctx = 1; s->output_frame = av_frame_alloc(); if (!s->output_frame) diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index a881eb9981..5a4ed270e8 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -441,7 +441,7 @@ typedef struct HEVCContext { const AVClass *c; // needed by private avoptions AVCodecContext *avctx; - HEVCLocalContext **local_ctx; + HEVCLocalContext *local_ctx; unsigned nb_local_ctx; HEVCLocalContext *HEVClc; -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers Anton Khirnov @ 2024-04-17 9:29 ` Andreas Rheinhardt 2024-05-24 9:03 ` Anton Khirnov 0 siblings, 1 reply; 22+ messages in thread From: Andreas Rheinhardt @ 2024-04-17 9:29 UTC (permalink / raw) To: ffmpeg-devel Anton Khirnov: > It is more efficient and easier to manage. > --- Allocating structures used by slice contexts jointly has the potential downside of false sharing if the structures are not sufficiently aligned/padded. - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers 2024-04-17 9:29 ` Andreas Rheinhardt @ 2024-05-24 9:03 ` Anton Khirnov 2024-05-27 13:10 ` Andreas Rheinhardt 0 siblings, 1 reply; 22+ messages in thread From: Anton Khirnov @ 2024-05-24 9:03 UTC (permalink / raw) To: FFmpeg development discussions and patches Quoting Andreas Rheinhardt (2024-04-17 11:29:18) > Anton Khirnov: > > It is more efficient and easier to manage. > > --- > > Allocating structures used by slice contexts jointly has the potential > downside of false sharing if the structures are not sufficiently > aligned/padded. What do you suggest? Align first member to cacheline size? -- Anton Khirnov _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers 2024-05-24 9:03 ` Anton Khirnov @ 2024-05-27 13:10 ` Andreas Rheinhardt 2024-05-28 13:54 ` [FFmpeg-devel] [PATCH v2 " Anton Khirnov 0 siblings, 1 reply; 22+ messages in thread From: Andreas Rheinhardt @ 2024-05-27 13:10 UTC (permalink / raw) To: ffmpeg-devel Anton Khirnov: > Quoting Andreas Rheinhardt (2024-04-17 11:29:18) >> Anton Khirnov: >>> It is more efficient and easier to manage. >>> --- >> >> Allocating structures used by slice contexts jointly has the potential >> downside of false sharing if the structures are not sufficiently >> aligned/padded. > > What do you suggest? Align first member to cacheline size? > This is problematic, because av_malloc is not necessarily aligned to the chacheline size; for the same reason it is not possible to simply DECLARE_ALIGNED_64 for it (see e.g. 7945d30e91b96d2f4f5b612048169087d214d41e). Given that the structure we are talking about is already pretty big, the easiest way is to add explicit padding at the end. Use __GCC_DESTRUCTIVE_SIZE if that is defined or 64 if not (or maybe always use 128B? https://stackoverflow.com/questions/72126606/should-the-cache-padding-size-of-x86-64-be-128-bytes). - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH v2 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers 2024-05-27 13:10 ` Andreas Rheinhardt @ 2024-05-28 13:54 ` Anton Khirnov 0 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-05-28 13:54 UTC (permalink / raw) To: ffmpeg-devel It is more efficient and easier to manage. --- libavcodec/hevcdec.c | 57 +++++++++++++++++++++++++------------------- libavcodec/hevcdec.h | 6 ++++- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index e84f45e3f8..88a481c043 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -2598,7 +2598,7 @@ static int hls_slice_data(HEVCContext *s) static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *hevc_lclist, int job, int self_id) { - HEVCLocalContext *lc = ((HEVCLocalContext**)hevc_lclist)[self_id]; + HEVCLocalContext *lc = &((HEVCLocalContext*)hevc_lclist)[self_id]; const HEVCContext *const s = lc->parent; int ctb_size = 1 << s->ps.sps->log2_ctb_size; int more_data = 1; @@ -2682,7 +2682,7 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) { const uint8_t *data = nal->data; int length = nal->size; - HEVCLocalContext *lc = s->HEVClc; + HEVCLocalContext *lc; int *ret; int64_t offset; int64_t startheader, cmpt = 0; @@ -2696,19 +2696,31 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) return AVERROR_INVALIDDATA; } - for (i = 1; i < s->threads_number; i++) { - if (i < s->nb_local_ctx) - continue; - s->local_ctx[i] = av_mallocz(sizeof(HEVCLocalContext)); - if (!s->local_ctx[i]) - return AVERROR(ENOMEM); - s->nb_local_ctx++; + if (s->threads_number > s->nb_local_ctx) { + HEVCLocalContext *tmp = av_malloc_array(s->threads_number, sizeof(*s->local_ctx)); - s->local_ctx[i]->logctx = s->avctx; - s->local_ctx[i]->parent = s; - s->local_ctx[i]->common_cabac_state = &s->cabac; + if (!tmp) + return AVERROR(ENOMEM); + + memcpy(tmp, s->local_ctx, sizeof(*s->local_ctx) * s->nb_local_ctx); + av_free(s->local_ctx); + s->local_ctx = tmp; + s->HEVClc = &s->local_ctx[0]; + + for (unsigned i = s->nb_local_ctx; i < s->threads_number; i++) { + tmp = &s->local_ctx[i]; + + memset(tmp, 0, sizeof(*tmp)); + + tmp->logctx = s->avctx; + tmp->parent = s; + tmp->common_cabac_state = &s->cabac; + } + + s->nb_local_ctx = s->threads_number; } + lc = &s->local_ctx[0]; offset = (lc->gb.index >> 3); for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) { @@ -2744,8 +2756,8 @@ static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal) s->data = data; for (i = 1; i < s->threads_number; i++) { - s->local_ctx[i]->first_qp_group = 1; - s->local_ctx[i]->qp_y = s->HEVClc->qp_y; + s->local_ctx[i].first_qp_group = 1; + s->local_ctx[i].qp_y = s->HEVClc->qp_y; } atomic_store(&s->wpp_err, 0); @@ -3474,12 +3486,6 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) av_freep(&s->sh.offset); av_freep(&s->sh.size); - if (s->local_ctx) { - for (i = 1; i < s->nb_local_ctx; i++) { - av_freep(&s->local_ctx[i]); - } - } - av_freep(&s->HEVClc); av_freep(&s->local_ctx); ff_h2645_packet_uninit(&s->pkt); @@ -3496,15 +3502,16 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) s->avctx = avctx; - s->HEVClc = av_mallocz(sizeof(HEVCLocalContext)); - s->local_ctx = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number); - if (!s->HEVClc || !s->local_ctx) + s->local_ctx = av_mallocz(sizeof(*s->local_ctx)); + if (!s->local_ctx) return AVERROR(ENOMEM); + s->nb_local_ctx = 1; + + s->HEVClc = &s->local_ctx[0]; + s->HEVClc->parent = s; s->HEVClc->logctx = avctx; s->HEVClc->common_cabac_state = &s->cabac; - s->local_ctx[0] = s->HEVClc; - s->nb_local_ctx = 1; s->output_frame = av_frame_alloc(); if (!s->output_frame) diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index ca68fb54a7..5aa3d40450 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -439,13 +439,17 @@ typedef struct HEVCLocalContext { /* properties of the boundary of the current CTB for the purposes * of the deblocking filter */ int boundary_flags; + + // an array of these structs is used for per-thread state - pad its size + // to avoid false sharing + char padding[128]; } HEVCLocalContext; typedef struct HEVCContext { const AVClass *c; // needed by private avoptions AVCodecContext *avctx; - HEVCLocalContext **local_ctx; + HEVCLocalContext *local_ctx; unsigned nb_local_ctx; HEVCLocalContext *HEVClc; -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 04/10] lavc/hevcdec: drop a useless execute() call with 1 job 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 02/10] lavc/hevcdec: track local context count separately from WPP thread count Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 05/10] lavc/hevc_ps: reduce the size of used_by_curr_pic_lt_sps_flag Anton Khirnov ` (5 subsequent siblings) 8 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel --- libavcodec/hevcdec.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 47226ef0ab..6b2c0b75ee 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -2526,9 +2526,8 @@ static void hls_decode_neighbour(HEVCLocalContext *lc, int x_ctb, int y_ctb, lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]])); } -static int hls_decode_entry(AVCodecContext *avctxt, void *arg) +static int hls_decode_entry(HEVCContext *s) { - HEVCContext *s = avctxt->priv_data; HEVCLocalContext *const lc = s->HEVClc; int ctb_size = 1 << s->ps.sps->log2_ctb_size; int more_data = 1; @@ -2588,13 +2587,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *arg) return ctb_addr_ts; } -static int hls_slice_data(HEVCContext *s) -{ - int ret = 0; - - s->avctx->execute(s->avctx, hls_decode_entry, NULL, &ret , 1, 0); - return ret; -} static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *hevc_lclist, int job, int self_id) { @@ -3133,7 +3125,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0) ctb_addr_ts = hls_slice_data_wpp(s, nal); else - ctb_addr_ts = hls_slice_data(s); + ctb_addr_ts = hls_decode_entry(s); if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) { ret = hevc_frame_end(s); if (ret < 0) -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 05/10] lavc/hevc_ps: reduce the size of used_by_curr_pic_lt_sps_flag 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov ` (2 preceding siblings ...) 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 04/10] lavc/hevcdec: drop a useless execute() call with 1 job Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 06/10] lavc/hevc_ps/HEVCSPS: change flags into size-1 bitfields Anton Khirnov ` (4 subsequent siblings) 8 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel It is currently an array of 32 uint8_t, each storing a single flag. A single uint32_t is sufficient. --- libavcodec/hevc_ps.c | 4 +++- libavcodec/hevc_ps.h | 2 +- libavcodec/hevcdec.c | 2 +- libavcodec/vulkan_hevc.c | 3 +-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 25f087ed75..8d5fc0d0ca 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -1094,9 +1094,11 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->num_long_term_ref_pics_sps); return AVERROR_INVALIDDATA; } + + sps->used_by_curr_pic_lt = 0; for (i = 0; i < sps->num_long_term_ref_pics_sps; i++) { sps->lt_ref_pic_poc_lsb_sps[i] = get_bits(gb, sps->log2_max_poc_lsb); - sps->used_by_curr_pic_lt_sps_flag[i] = get_bits1(gb); + sps->used_by_curr_pic_lt |= get_bits1(gb) * (1 << i); } } diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 61a0fe2219..b2f3a8dbd1 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -230,7 +230,7 @@ typedef struct HEVCSPS { uint8_t long_term_ref_pics_present_flag; uint16_t lt_ref_pic_poc_lsb_sps[HEVC_MAX_LONG_TERM_REF_PICS]; - uint8_t used_by_curr_pic_lt_sps_flag[HEVC_MAX_LONG_TERM_REF_PICS]; + uint32_t used_by_curr_pic_lt; uint8_t num_long_term_ref_pics_sps; struct { diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 6b2c0b75ee..4a7046bdec 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -295,7 +295,7 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps)); rps->poc[i] = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps]; - rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps]; + rps->used[i] = !!(sps->used_by_curr_pic_lt & (1 << lt_idx_sps)); } else { rps->poc[i] = get_bits(gb, sps->log2_max_poc_lsb); rps->used[i] = get_bits1(gb); diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c index 2705a965b9..9b40f5ad58 100644 --- a/libavcodec/vulkan_hevc.c +++ b/libavcodec/vulkan_hevc.c @@ -386,11 +386,10 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, } *ltr = (StdVideoH265LongTermRefPicsSps) { - .used_by_curr_pic_lt_sps_flag = 0x0, + .used_by_curr_pic_lt_sps_flag = sps->used_by_curr_pic_lt, }; for (int i = 0; i < sps->num_long_term_ref_pics_sps; i++) { - ltr->used_by_curr_pic_lt_sps_flag |= sps->used_by_curr_pic_lt_sps_flag[i] << i; ltr->lt_ref_pic_poc_lsb_sps[i] = sps->lt_ref_pic_poc_lsb_sps[i]; } -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 06/10] lavc/hevc_ps/HEVCSPS: change flags into size-1 bitfields 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov ` (3 preceding siblings ...) 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 05/10] lavc/hevc_ps: reduce the size of used_by_curr_pic_lt_sps_flag Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-11 11:55 ` Andreas Rheinhardt 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 07/10] lavc/hevc_ps: fix variable signedness in ff_hevc_decode_short_term_rps() Anton Khirnov ` (3 subsequent siblings) 8 siblings, 1 reply; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel Reduces sizeof(HEVCSPS) by 96 bytes. Also improve flag names: drop redundant suffixes and prefixes, and consistently use disabled/enabled. --- libavcodec/dxva2_hevc.c | 24 ++++----- libavcodec/hevc_cabac.c | 36 ++++++------- libavcodec/hevc_filter.c | 8 +-- libavcodec/hevc_parser.c | 2 +- libavcodec/hevc_ps.c | 95 +++++++++++++++++----------------- libavcodec/hevc_ps.h | 62 +++++++++++----------- libavcodec/hevcdec.c | 10 ++-- libavcodec/hevcpred_template.c | 4 +- libavcodec/mips/hevcpred_msa.c | 6 +-- libavcodec/nvdec_hevc.c | 42 +++++++-------- libavcodec/qsvenc_hevc.c | 2 +- libavcodec/vaapi_hevc.c | 42 +++++++-------- libavcodec/vdpau_hevc.c | 36 ++++++------- libavcodec/vulkan_hevc.c | 56 ++++++++++---------- 14 files changed, 212 insertions(+), 213 deletions(-) diff --git a/libavcodec/dxva2_hevc.c b/libavcodec/dxva2_hevc.c index 31d74a7164..b500d7917a 100644 --- a/libavcodec/dxva2_hevc.c +++ b/libavcodec/dxva2_hevc.c @@ -72,7 +72,7 @@ void ff_dxva2_hevc_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACo pp->PicHeightInMinCbsY = sps->min_cb_height; pp->wFormatAndSequenceInfoFlags = (sps->chroma_format_idc << 0) | - (sps->separate_colour_plane_flag << 2) | + (sps->separate_colour_plane << 2) | ((sps->bit_depth - 8) << 3) | ((sps->bit_depth - 8) << 6) | ((sps->log2_max_poc_lsb - 4) << 9) | @@ -99,18 +99,18 @@ void ff_dxva2_hevc_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACo pp->wNumBitsForShortTermRPSInSlice = h->sh.short_term_ref_pic_set_size; } - pp->dwCodingParamToolFlags = (sps->scaling_list_enable_flag << 0) | - (sps->amp_enabled_flag << 1) | + pp->dwCodingParamToolFlags = (sps->scaling_list_enabled << 0) | + (sps->amp_enabled << 1) | (sps->sao_enabled << 2) | - (sps->pcm_enabled_flag << 3) | - ((sps->pcm_enabled_flag ? (sps->pcm.bit_depth - 1) : 0) << 4) | - ((sps->pcm_enabled_flag ? (sps->pcm.bit_depth_chroma - 1) : 0) << 8) | - ((sps->pcm_enabled_flag ? (sps->pcm.log2_min_pcm_cb_size - 3) : 0) << 12) | - ((sps->pcm_enabled_flag ? (sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size) : 0) << 14) | - (sps->pcm.loop_filter_disable_flag << 16) | - (sps->long_term_ref_pics_present_flag << 17) | - (sps->sps_temporal_mvp_enabled_flag << 18) | - (sps->sps_strong_intra_smoothing_enable_flag << 19) | + (sps->pcm_enabled << 3) | + ((sps->pcm_enabled ? (sps->pcm.bit_depth - 1) : 0) << 4) | + ((sps->pcm_enabled ? (sps->pcm.bit_depth_chroma - 1) : 0) << 8) | + ((sps->pcm_enabled ? (sps->pcm.log2_min_pcm_cb_size - 3) : 0) << 12) | + ((sps->pcm_enabled ? (sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size) : 0) << 14) | + (sps->pcm_loop_filter_disabled << 16) | + (sps->long_term_ref_pics_present << 17) | + (sps->temporal_mvp_enabled << 18) | + (sps->strong_intra_smoothing_enabled << 19) | (pps->dependent_slice_segments_enabled_flag << 20) | (pps->output_flag_present_flag << 21) | (pps->num_extra_slice_header_bits << 22) | diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c index 63ffb3d37c..2e639a7e41 100644 --- a/libavcodec/hevc_cabac.c +++ b/libavcodec/hevc_cabac.c @@ -408,7 +408,7 @@ void ff_hevc_save_states(HEVCLocalContext *lc, int ctb_addr_ts) (s->ps.sps->ctb_width == 2 && ctb_addr_ts % s->ps.sps->ctb_width == 0))) { memcpy(lc->common_cabac_state->state, lc->cabac_state, HEVC_CONTEXTS); - if (s->ps.sps->persistent_rice_adaptation_enabled_flag) { + if (s->ps.sps->persistent_rice_adaptation_enabled) { memcpy(lc->common_cabac_state->stat_coeff, lc->stat_coeff, HEVC_STAT_COEFFS); } } @@ -417,7 +417,7 @@ void ff_hevc_save_states(HEVCLocalContext *lc, int ctb_addr_ts) static void load_states(HEVCLocalContext *lc, const HEVCContext *s) { memcpy(lc->cabac_state, lc->common_cabac_state->state, HEVC_CONTEXTS); - if (s->ps.sps->persistent_rice_adaptation_enabled_flag) { + if (s->ps.sps->persistent_rice_adaptation_enabled) { memcpy(lc->stat_coeff, lc->common_cabac_state->stat_coeff, HEVC_STAT_COEFFS); } } @@ -683,7 +683,7 @@ int ff_hevc_part_mode_decode(HEVCLocalContext *lc, int log2_cb_size) return PART_NxN; // 000 } - if (!lc->parent->ps.sps->amp_enabled_flag) { + if (!lc->parent->ps.sps->amp_enabled) { if (GET_CABAC(PART_MODE_OFFSET + 1)) // 01 return PART_2NxN; return PART_Nx2N; @@ -1091,7 +1091,7 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, scale_m = 16; // default when no custom scaling lists. dc_scale = 16; - if (s->ps.sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) { + if (s->ps.sps->scaling_list_enabled && !(transform_skip_flag && log2_trafo_size > 2)) { const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ? &s->ps.pps->scaling_list : &s->ps.sps->scaling_list; int matrix_id = lc->cu.pred_mode != MODE_INTRA; @@ -1109,7 +1109,7 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, dc_scale = 0; } - if (lc->cu.pred_mode == MODE_INTER && s->ps.sps->explicit_rdpcm_enabled_flag && + if (lc->cu.pred_mode == MODE_INTER && s->ps.sps->explicit_rdpcm_enabled && (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { explicit_rdpcm_flag = explicit_rdpcm_flag_decode(lc, c_idx); if (explicit_rdpcm_flag) { @@ -1240,7 +1240,7 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, }; const uint8_t *ctx_idx_map_p; int scf_offset = 0; - if (s->ps.sps->transform_skip_context_enabled_flag && + if (s->ps.sps->transform_skip_context_enabled && (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { ctx_idx_map_p = &ctx_idx_map[4 * 16]; if (c_idx == 0) { @@ -1281,7 +1281,7 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, } } if (implicit_non_zero_coeff == 0) { - if (s->ps.sps->transform_skip_context_enabled_flag && + if (s->ps.sps->transform_skip_context_enabled && (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { if (c_idx == 0) { scf_offset = 42; @@ -1326,7 +1326,7 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, // initialize first elem of coeff_bas_level_greater1_flag int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0; - if (s->ps.sps->persistent_rice_adaptation_enabled_flag) { + if (s->ps.sps->persistent_rice_adaptation_enabled) { if (!transform_skip_flag && !lc->cu.cu_transquant_bypass_flag) sb_type = 2 * (c_idx == 0 ? 1 : 0); else @@ -1355,7 +1355,7 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, if (lc->cu.cu_transquant_bypass_flag || (lc->cu.pred_mode == MODE_INTRA && - s->ps.sps->implicit_rdpcm_enabled_flag && transform_skip_flag && + s->ps.sps->implicit_rdpcm_enabled && transform_skip_flag && (pred_mode_intra == 10 || pred_mode_intra == 26 )) || explicit_rdpcm_flag) sign_hidden = 0; @@ -1381,8 +1381,8 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, trans_coeff_level += last_coeff_abs_level_remaining; if (trans_coeff_level > (3 << c_rice_param)) - c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4); - if (s->ps.sps->persistent_rice_adaptation_enabled_flag && !rice_init) { + c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4); + if (s->ps.sps->persistent_rice_adaptation_enabled && !rice_init) { int c_rice_p_init = lc->stat_coeff[sb_type] / 4; if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init)) lc->stat_coeff[sb_type]++; @@ -1397,8 +1397,8 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, trans_coeff_level = 1 + last_coeff_abs_level_remaining; if (trans_coeff_level > (3 << c_rice_param)) - c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4); - if (s->ps.sps->persistent_rice_adaptation_enabled_flag && !rice_init) { + c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4); + if (s->ps.sps->persistent_rice_adaptation_enabled && !rice_init) { int c_rice_p_init = lc->stat_coeff[sb_type] / 4; if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init)) lc->stat_coeff[sb_type]++; @@ -1417,7 +1417,7 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, trans_coeff_level = -trans_coeff_level; coeff_sign_flag <<= 1; if(!lc->cu.cu_transquant_bypass_flag) { - if (s->ps.sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) { + if (s->ps.sps->scaling_list_enabled && !(transform_skip_flag && log2_trafo_size > 2)) { if(y_c || x_c || log2_trafo_size < 4) { switch(log2_trafo_size) { case 3: pos = (y_c << 3) + x_c; break; @@ -1445,15 +1445,15 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, } if (lc->cu.cu_transquant_bypass_flag) { - if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag && + if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled && (pred_mode_intra == 10 || pred_mode_intra == 26))) { - int mode = s->ps.sps->implicit_rdpcm_enabled_flag ? (pred_mode_intra == 26) : explicit_rdpcm_dir_flag; + int mode = s->ps.sps->implicit_rdpcm_enabled ? (pred_mode_intra == 26) : explicit_rdpcm_dir_flag; s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode); } } else { if (transform_skip_flag) { - int rot = s->ps.sps->transform_skip_rotation_enabled_flag && + int rot = s->ps.sps->transform_skip_rotation_enabled && log2_trafo_size == 2 && lc->cu.pred_mode == MODE_INTRA; if (rot) { @@ -1463,7 +1463,7 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, int x0, int y0, s->hevcdsp.dequant(coeffs, log2_trafo_size); - if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag && + if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled && lc->cu.pred_mode == MODE_INTRA && (pred_mode_intra == 10 || pred_mode_intra == 26))) { int mode = explicit_rdpcm_flag ? explicit_rdpcm_dir_flag : (pred_mode_intra == 26); diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c index 0c45310ea6..70d3ca588a 100644 --- a/libavcodec/hevc_filter.c +++ b/libavcodec/hevc_filter.c @@ -224,7 +224,7 @@ static void restore_tqb_pixels(const HEVCContext *s, int x0, int y0, int width, int height, int c_idx) { if ( s->ps.pps->transquant_bypass_enable_flag || - (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) { + (s->ps.sps->pcm_loop_filter_disabled && s->ps.sps->pcm_enabled)) { int x, y; int min_pu_size = 1 << s->ps.sps->log2_min_pu_size; int hshift = s->ps.sps->hshift[c_idx]; @@ -330,7 +330,7 @@ static void sao_filter_CTB(HEVCLocalContext *lc, const HEVCContext *s, int x, in copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx, x_ctb, y_ctb); if (s->ps.pps->transquant_bypass_enable_flag || - (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) { + (s->ps.sps->pcm_loop_filter_disabled && s->ps.sps->pcm_enabled)) { dst = lc->edge_emu_buffer; stride_dst = 2*MAX_PB_SIZE; copy_CTB(dst, src, width << s->ps.sps->pixel_shift, height, stride_dst, stride_src); @@ -500,8 +500,8 @@ static void deblocking_filter_CTB(const HEVCContext *s, int x0, int y0) int cur_beta_offset = s->deblock[ctb].beta_offset; int left_tc_offset, left_beta_offset; int tc_offset, beta_offset; - int pcmf = (s->ps.sps->pcm_enabled_flag && - s->ps.sps->pcm.loop_filter_disable_flag) || + int pcmf = (s->ps.sps->pcm_enabled && + s->ps.sps->pcm_loop_filter_disabled) || s->ps.pps->transquant_bypass_enable_flag; if (x0) { diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c index f174d419e4..73fc5d6372 100644 --- a/libavcodec/hevc_parser.c +++ b/libavcodec/hevc_parser.c @@ -150,7 +150,7 @@ static int hevc_parse_slice_header(AVCodecParserContext *s, H2645NAL *nal, if (ps->pps->output_flag_present_flag) skip_bits1(gb); // pic_output_flag - if (ps->sps->separate_colour_plane_flag) + if (ps->sps->separate_colour_plane) skip_bits(gb, 2); // colour_plane_id if (!IS_IDR_NAL(nal)) { diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 8d5fc0d0ca..1af691414e 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -895,7 +895,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - sps->temporal_id_nesting_flag = get_bits(gb, 1); + sps->temporal_id_nesting = get_bits(gb, 1); if ((ret = parse_ptl(gb, avctx, &sps->ptl, sps->max_sub_layers)) < 0) return ret; @@ -913,9 +913,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } if (sps->chroma_format_idc == 3) - sps->separate_colour_plane_flag = get_bits1(gb); + sps->separate_colour_plane = get_bits1(gb); - if (sps->separate_colour_plane_flag) + if (sps->separate_colour_plane) sps->chroma_format_idc = 0; sps->width = get_ue_golomb_long(gb); @@ -924,8 +924,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->height, 0, avctx)) < 0) return ret; - sps->conformance_window_flag = get_bits1(gb); - if (sps->conformance_window_flag) { + sps->conformance_window = get_bits1(gb); + if (sps->conformance_window) { int vert_mult = hevc_sub_height_c[sps->chroma_format_idc]; int horiz_mult = hevc_sub_width_c[sps->chroma_format_idc]; sps->pic_conf_win.left_offset = get_ue_golomb_long(gb) * horiz_mult; @@ -982,8 +982,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - sps->sublayer_ordering_info_flag = get_bits1(gb); - start = sps->sublayer_ordering_info_flag ? 0 : sps->max_sub_layers - 1; + sps->sublayer_ordering_info = get_bits1(gb); + start = sps->sublayer_ordering_info ? 0 : sps->max_sub_layers - 1; for (i = start; i < sps->max_sub_layers; i++) { sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1; sps->temporal_layer[i].num_reorder_pics = get_ue_golomb_long(gb); @@ -1004,7 +1004,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - if (!sps->sublayer_ordering_info_flag) { + if (!sps->sublayer_ordering_info) { for (i = 0; i < start; i++) { sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering; sps->temporal_layer[i].num_reorder_pics = sps->temporal_layer[start].num_reorder_pics; @@ -1043,8 +1043,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, sps->max_transform_hierarchy_depth_inter = get_ue_golomb_long(gb); sps->max_transform_hierarchy_depth_intra = get_ue_golomb_long(gb); - sps->scaling_list_enable_flag = get_bits1(gb); - if (sps->scaling_list_enable_flag) { + sps->scaling_list_enabled = get_bits1(gb); + if (sps->scaling_list_enabled) { set_default_scaling_list_data(&sps->scaling_list); if (get_bits1(gb)) { @@ -1054,11 +1054,11 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - sps->amp_enabled_flag = get_bits1(gb); - sps->sao_enabled = get_bits1(gb); + sps->amp_enabled = get_bits1(gb); + sps->sao_enabled = get_bits1(gb); - sps->pcm_enabled_flag = get_bits1(gb); - if (sps->pcm_enabled_flag) { + sps->pcm_enabled = get_bits1(gb); + if (sps->pcm_enabled) { sps->pcm.bit_depth = get_bits(gb, 4) + 1; sps->pcm.bit_depth_chroma = get_bits(gb, 4) + 1; sps->pcm.log2_min_pcm_cb_size = get_ue_golomb_long(gb) + 3; @@ -1071,7 +1071,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return AVERROR_INVALIDDATA; } - sps->pcm.loop_filter_disable_flag = get_bits1(gb); + sps->pcm_loop_filter_disabled = get_bits1(gb); } sps->nb_st_rps = get_ue_golomb_long(gb); @@ -1086,8 +1086,8 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, return ret; } - sps->long_term_ref_pics_present_flag = get_bits1(gb); - if (sps->long_term_ref_pics_present_flag) { + sps->long_term_ref_pics_present = get_bits1(gb); + if (sps->long_term_ref_pics_present) { sps->num_long_term_ref_pics_sps = get_ue_golomb_long(gb); if (sps->num_long_term_ref_pics_sps > HEVC_MAX_LONG_TERM_REF_PICS) { av_log(avctx, AV_LOG_ERROR, "Too many long term ref pics: %d.\n", @@ -1102,54 +1102,53 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } - sps->sps_temporal_mvp_enabled_flag = get_bits1(gb); - sps->sps_strong_intra_smoothing_enable_flag = get_bits1(gb); + sps->temporal_mvp_enabled = get_bits1(gb); + sps->strong_intra_smoothing_enabled = get_bits1(gb); sps->vui.common.sar = (AVRational){0, 1}; sps->vui_present = get_bits1(gb); if (sps->vui_present) decode_vui(gb, avctx, apply_defdispwin, sps); - sps->sps_extension_present_flag = get_bits1(gb); - if (sps->sps_extension_present_flag) { - sps->sps_range_extension_flag = get_bits1(gb); - sps->sps_multilayer_extension_flag = get_bits1(gb); - sps->sps_3d_extension_flag = get_bits1(gb); - sps->sps_scc_extension_flag = get_bits1(gb); + sps->extension_present = get_bits1(gb); + if (sps->extension_present) { + sps->range_extension = get_bits1(gb); + sps->multilayer_extension = get_bits1(gb); + sps->sps_3d_extension = get_bits1(gb); + sps->scc_extension = get_bits1(gb); skip_bits(gb, 4); // sps_extension_4bits - if (sps->sps_range_extension_flag) { - sps->transform_skip_rotation_enabled_flag = get_bits1(gb); - sps->transform_skip_context_enabled_flag = get_bits1(gb); - sps->implicit_rdpcm_enabled_flag = get_bits1(gb); + if (sps->range_extension) { + sps->transform_skip_rotation_enabled = get_bits1(gb); + sps->transform_skip_context_enabled = get_bits1(gb); + sps->implicit_rdpcm_enabled = get_bits1(gb); + sps->explicit_rdpcm_enabled = get_bits1(gb); - sps->explicit_rdpcm_enabled_flag = get_bits1(gb); - - sps->extended_precision_processing_flag = get_bits1(gb); - if (sps->extended_precision_processing_flag) + sps->extended_precision_processing = get_bits1(gb); + if (sps->extended_precision_processing) av_log(avctx, AV_LOG_WARNING, "extended_precision_processing_flag not yet implemented\n"); - sps->intra_smoothing_disabled_flag = get_bits1(gb); - sps->high_precision_offsets_enabled_flag = get_bits1(gb); - if (sps->high_precision_offsets_enabled_flag) + sps->intra_smoothing_disabled = get_bits1(gb); + sps->high_precision_offsets_enabled = get_bits1(gb); + if (sps->high_precision_offsets_enabled) av_log(avctx, AV_LOG_WARNING, "high_precision_offsets_enabled_flag not yet implemented\n"); - sps->persistent_rice_adaptation_enabled_flag = get_bits1(gb); + sps->persistent_rice_adaptation_enabled = get_bits1(gb); - sps->cabac_bypass_alignment_enabled_flag = get_bits1(gb); - if (sps->cabac_bypass_alignment_enabled_flag) + sps->cabac_bypass_alignment_enabled = get_bits1(gb); + if (sps->cabac_bypass_alignment_enabled) av_log(avctx, AV_LOG_WARNING, "cabac_bypass_alignment_enabled_flag not yet implemented\n"); } - if (sps->sps_multilayer_extension_flag) { + if (sps->multilayer_extension) { skip_bits1(gb); // inter_view_mv_vert_constraint_flag av_log(avctx, AV_LOG_WARNING, "sps_multilayer_extension_flag not yet implemented\n"); } - if (sps->sps_3d_extension_flag) { + if (sps->sps_3d_extension) { for (i = 0; i <= 1; i++) { skip_bits1(gb); // iv_di_mc_enabled_flag skip_bits1(gb); // iv_mv_scal_enabled_flag @@ -1173,15 +1172,15 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, "sps_3d_extension_flag not yet implemented\n"); } - if (sps->sps_scc_extension_flag) { - sps->sps_curr_pic_ref_enabled_flag = get_bits1(gb); - sps->palette_mode_enabled_flag = get_bits1(gb); - if (sps->palette_mode_enabled_flag) { + if (sps->scc_extension) { + sps->curr_pic_ref_enabled = get_bits1(gb); + sps->palette_mode_enabled = get_bits1(gb); + if (sps->palette_mode_enabled) { sps->palette_max_size = get_ue_golomb(gb); sps->delta_palette_max_predictor_size = get_ue_golomb(gb); - sps->sps_palette_predictor_initializers_present_flag = get_bits1(gb); + sps->palette_predictor_initializers_present = get_bits1(gb); - if (sps->sps_palette_predictor_initializers_present_flag) { + if (sps->palette_predictor_initializers_present) { sps->sps_num_palette_predictor_initializers = get_ue_golomb(gb) + 1; if (sps->sps_num_palette_predictor_initializers > HEVC_MAX_PALETTE_PREDICTOR_SIZE) { av_log(avctx, AV_LOG_ERROR, @@ -1198,7 +1197,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id, } } sps->motion_vector_resolution_control_idc = get_bits(gb, 2); - sps->intra_boundary_filtering_disabled_flag = get_bits1(gb); + sps->intra_boundary_filtering_disabled = get_bits1(gb); } } if (apply_defdispwin) { diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index b2f3a8dbd1..9801b4347f 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -188,11 +188,9 @@ typedef struct ScalingList { typedef struct HEVCSPS { unsigned vps_id; int chroma_format_idc; - uint8_t separate_colour_plane_flag; HEVCWindow output_window; - uint8_t conformance_window_flag; HEVCWindow pic_conf_win; HEVCHdrParams hdr; @@ -203,32 +201,23 @@ typedef struct HEVCSPS { enum AVPixelFormat pix_fmt; unsigned int log2_max_poc_lsb; - int pcm_enabled_flag; - uint8_t sublayer_ordering_info_flag; int max_sub_layers; struct { int max_dec_pic_buffering; int num_reorder_pics; int max_latency_increase; } temporal_layer[HEVC_MAX_SUB_LAYERS]; - uint8_t temporal_id_nesting_flag; int vui_present; VUI vui; PTL ptl; - uint8_t sps_extension_present_flag; - uint8_t scaling_list_enable_flag; ScalingList scaling_list; unsigned int nb_st_rps; ShortTermRPS st_rps[HEVC_MAX_SHORT_TERM_REF_PIC_SETS]; - uint8_t amp_enabled_flag; - uint8_t sao_enabled; - - uint8_t long_term_ref_pics_present_flag; uint16_t lt_ref_pic_poc_lsb_sps[HEVC_MAX_LONG_TERM_REF_PICS]; uint32_t used_by_curr_pic_lt; uint8_t num_long_term_ref_pics_sps; @@ -238,10 +227,7 @@ typedef struct HEVCSPS { uint8_t bit_depth_chroma; unsigned int log2_min_pcm_cb_size; unsigned int log2_max_pcm_cb_size; - uint8_t loop_filter_disable_flag; } pcm; - uint8_t sps_temporal_mvp_enabled_flag; - uint8_t sps_strong_intra_smoothing_enable_flag; unsigned int log2_min_cb_size; unsigned int log2_diff_max_min_coding_block_size; @@ -254,30 +240,44 @@ typedef struct HEVCSPS { int max_transform_hierarchy_depth_inter; int max_transform_hierarchy_depth_intra; - int sps_range_extension_flag; - int transform_skip_rotation_enabled_flag; - int transform_skip_context_enabled_flag; - int implicit_rdpcm_enabled_flag; - int explicit_rdpcm_enabled_flag; - int extended_precision_processing_flag; - int intra_smoothing_disabled_flag; - int high_precision_offsets_enabled_flag; - int persistent_rice_adaptation_enabled_flag; - int cabac_bypass_alignment_enabled_flag; + unsigned separate_colour_plane:1; + unsigned conformance_window:1; + unsigned pcm_enabled:1; + unsigned pcm_loop_filter_disabled:1; + unsigned sublayer_ordering_info:1; + unsigned temporal_id_nesting:1; + unsigned extension_present:1; + unsigned scaling_list_enabled:1; + unsigned amp_enabled:1; + unsigned sao_enabled:1; + unsigned long_term_ref_pics_present:1; + unsigned temporal_mvp_enabled:1; + unsigned strong_intra_smoothing_enabled:1; + unsigned range_extension:1; + unsigned transform_skip_rotation_enabled:1; + unsigned transform_skip_context_enabled:1; + unsigned implicit_rdpcm_enabled:1; + unsigned explicit_rdpcm_enabled:1; + unsigned extended_precision_processing:1; + unsigned intra_smoothing_disabled:1; + unsigned high_precision_offsets_enabled:1; + unsigned persistent_rice_adaptation_enabled:1; + unsigned cabac_bypass_alignment_enabled:1; - int sps_multilayer_extension_flag; - int sps_3d_extension_flag; + unsigned multilayer_extension:1; + unsigned sps_3d_extension:1; + + unsigned scc_extension:1; + unsigned curr_pic_ref_enabled:1; + unsigned palette_mode_enabled:1; + unsigned palette_predictor_initializers_present:1; + unsigned intra_boundary_filtering_disabled:1; - int sps_scc_extension_flag; - int sps_curr_pic_ref_enabled_flag; - int palette_mode_enabled_flag; int palette_max_size; int delta_palette_max_predictor_size; - int sps_palette_predictor_initializers_present_flag; int sps_num_palette_predictor_initializers; int sps_palette_predictor_initializer[3][HEVC_MAX_PALETTE_PREDICTOR_SIZE]; int motion_vector_resolution_control_idc; - int intra_boundary_filtering_disabled_flag; ///< coded frame dimension in various units int width; diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 4a7046bdec..eb03c45e8a 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -272,7 +272,7 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb) int i; rps->nb_refs = 0; - if (!sps->long_term_ref_pics_present_flag) + if (!sps->long_term_ref_pics_present) return 0; if (sps->num_long_term_ref_pics_sps > 0) @@ -705,7 +705,7 @@ static int hls_slice_header(HEVCContext *s) if (s->ps.pps->output_flag_present_flag) sh->pic_output_flag = get_bits1(gb); - if (s->ps.sps->separate_colour_plane_flag) + if (s->ps.sps->separate_colour_plane) sh->colour_plane_id = get_bits(gb, 2); if (!IS_IDR(s)) { @@ -753,7 +753,7 @@ static int hls_slice_header(HEVCContext *s) } sh->long_term_ref_pic_set_size = pos - get_bits_left(gb); - if (s->ps.sps->sps_temporal_mvp_enabled_flag) + if (s->ps.sps->temporal_mvp_enabled) sh->slice_temporal_mvp_enabled_flag = get_bits1(gb); else sh->slice_temporal_mvp_enabled_flag = 0; @@ -2294,7 +2294,7 @@ static int hls_coding_unit(HEVCLocalContext *lc, const HEVCContext *s, int x0, i } if (lc->cu.pred_mode == MODE_INTRA) { - if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag && + if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled && log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size && log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) { pcm_flag = ff_hevc_pcm_flag_decode(lc); @@ -2302,7 +2302,7 @@ static int hls_coding_unit(HEVCLocalContext *lc, const HEVCContext *s, int x0, i if (pcm_flag) { intra_prediction_unit_default_value(lc, x0, y0, log2_cb_size); ret = hls_pcm_sample(lc, x0, y0, log2_cb_size); - if (s->ps.sps->pcm.loop_filter_disable_flag) + if (s->ps.sps->pcm_loop_filter_disabled) set_deblocking_bypass(s, x0, y0, log2_cb_size); if (ret < 0) diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c index 46bd806523..9082a816e0 100644 --- a/libavcodec/hevcpred_template.c +++ b/libavcodec/hevcpred_template.c @@ -285,14 +285,14 @@ do { \ top[-1] = left[-1]; // Filtering process - if (!s->ps.sps->intra_smoothing_disabled_flag && (c_idx == 0 || s->ps.sps->chroma_format_idc == 3)) { + if (!s->ps.sps->intra_smoothing_disabled && (c_idx == 0 || s->ps.sps->chroma_format_idc == 3)) { if (mode != INTRA_DC && size != 4){ int intra_hor_ver_dist_thresh[] = { 7, 1, 0 }; int min_dist_vert_hor = FFMIN(FFABS((int)(mode - 26U)), FFABS((int)(mode - 10U))); if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) { int threshold = 1 << (BIT_DEPTH - 5); - if (s->ps.sps->sps_strong_intra_smoothing_enable_flag && c_idx == 0 && + if (s->ps.sps->strong_intra_smoothing_enabled && c_idx == 0 && log2_size == 5 && FFABS(top[-1] + top[63] - 2 * top[31]) < threshold && FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) { diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c index b056ee986b..d9137519ad 100644 --- a/libavcodec/mips/hevcpred_msa.c +++ b/libavcodec/mips/hevcpred_msa.c @@ -2369,7 +2369,7 @@ void ff_intra_pred_8_16x16_msa(HEVCLocalContext *lc, int x0, int y0, int c_idx) top[-1] = left[-1]; - if (!s->ps.sps->intra_smoothing_disabled_flag + if (!s->ps.sps->intra_smoothing_disabled && (c_idx == 0 || s->ps.sps->chroma_format_idc == 3)) { if (mode != INTRA_DC && 16 != 4) { int intra_hor_ver_dist_thresh[] = { 7, 1, 0 }; @@ -2886,7 +2886,7 @@ void ff_intra_pred_8_32x32_msa(HEVCLocalContext *lc, int x0, int y0, int c_idx) top[-1] = left[-1]; - if (!s->ps.sps->intra_smoothing_disabled_flag + if (!s->ps.sps->intra_smoothing_disabled && (c_idx == 0 || s->ps.sps->chroma_format_idc == 3)) { if (mode != INTRA_DC && 32 != 4) { int intra_hor_ver_dist_thresh[] = { 7, 1, 0 }; @@ -2901,7 +2901,7 @@ void ff_intra_pred_8_32x32_msa(HEVCLocalContext *lc, int x0, int y0, int c_idx) 0 ? ((int) (mode - 26U)) : (-((int) (mode - 26U)))))); if (min_dist_vert_hor > intra_hor_ver_dist_thresh[5 - 3]) { int threshold = 1 << (8 - 5); - if (s->ps.sps->sps_strong_intra_smoothing_enable_flag + if (s->ps.sps->strong_intra_smoothing_enabled && c_idx == 0 && ((top[-1] + top[63] - 2 * top[31]) >= 0 ? (top[-1] + top[63] - diff --git a/libavcodec/nvdec_hevc.c b/libavcodec/nvdec_hevc.c index f26ced71ef..3d704666d0 100644 --- a/libavcodec/nvdec_hevc.c +++ b/libavcodec/nvdec_hevc.c @@ -105,44 +105,44 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx, .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, .log2_min_transform_block_size_minus2 = sps->log2_min_tb_size - 2, .log2_diff_max_min_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size, - .pcm_enabled_flag = sps->pcm_enabled_flag, - .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm_enabled_flag ? sps->pcm.log2_min_pcm_cb_size - 3 : 0, + .pcm_enabled_flag = sps->pcm_enabled, + .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm_enabled ? sps->pcm.log2_min_pcm_cb_size - 3 : 0, .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, - .pcm_sample_bit_depth_luma_minus1 = sps->pcm_enabled_flag ? sps->pcm.bit_depth - 1 : 0, - .pcm_sample_bit_depth_chroma_minus1 = sps->pcm_enabled_flag ? sps->pcm.bit_depth_chroma - 1 : 0, + .pcm_sample_bit_depth_luma_minus1 = sps->pcm_enabled ? sps->pcm.bit_depth - 1 : 0, + .pcm_sample_bit_depth_chroma_minus1 = sps->pcm_enabled ? sps->pcm.bit_depth_chroma - 1 : 0, #if NVDECAPI_CHECK_VERSION(8, 1) .log2_max_transform_skip_block_size_minus2 = pps->log2_max_transform_skip_block_size - 2, .log2_sao_offset_scale_luma = pps->log2_sao_offset_scale_luma, .log2_sao_offset_scale_chroma = pps->log2_sao_offset_scale_chroma, - .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag, + .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled, #endif - .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag, - .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag, + .pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled, + .strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled, .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, - .amp_enabled_flag = sps->amp_enabled_flag, - .separate_colour_plane_flag = sps->separate_colour_plane_flag, + .amp_enabled_flag = sps->amp_enabled, + .separate_colour_plane_flag = sps->separate_colour_plane, .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, .num_short_term_ref_pic_sets = sps->nb_st_rps, - .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag, + .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present, .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, - .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag, + .sps_temporal_mvp_enabled_flag = sps->temporal_mvp_enabled, .sample_adaptive_offset_enabled_flag = sps->sao_enabled, - .scaling_list_enable_flag = sps->scaling_list_enable_flag, + .scaling_list_enable_flag = sps->scaling_list_enabled, .IrapPicFlag = IS_IRAP(s), .IdrPicFlag = IS_IDR(s), .bit_depth_luma_minus8 = sps->bit_depth - 8, .bit_depth_chroma_minus8 = sps->bit_depth - 8, #if NVDECAPI_CHECK_VERSION(9, 0) - .sps_range_extension_flag = sps->sps_range_extension_flag, - .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag, - .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag, - .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag, - .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag, - .extended_precision_processing_flag = sps->extended_precision_processing_flag, - .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag, - .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag, - .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag, + .sps_range_extension_flag = sps->range_extension, + .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled, + .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled, + .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled, + .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled, + .extended_precision_processing_flag = sps->extended_precision_processing, + .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled, + .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled, + .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled, #endif .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c index 2ccbe0464b..40cff6335a 100644 --- a/libavcodec/qsvenc_hevc.c +++ b/libavcodec/qsvenc_hevc.c @@ -108,7 +108,7 @@ static int generate_fake_vps(QSVEncContext *q, AVCodecContext *avctx) /* generate the VPS */ vps.vps_max_layers = 1; vps.vps_max_sub_layers = sps.max_sub_layers; - vps.vps_temporal_id_nesting_flag = sps.temporal_id_nesting_flag; + vps.vps_temporal_id_nesting_flag = sps.temporal_id_nesting; memcpy(&vps.ptl, &sps.ptl, sizeof(vps.ptl)); vps.vps_sub_layer_ordering_info_present_flag = 1; for (i = 0; i < HEVC_MAX_SUB_LAYERS; i++) { diff --git a/libavcodec/vaapi_hevc.c b/libavcodec/vaapi_hevc.c index 3bdd2dd1b8..5af297c2af 100644 --- a/libavcodec/vaapi_hevc.c +++ b/libavcodec/vaapi_hevc.c @@ -171,12 +171,12 @@ static int vaapi_hevc_start_frame(AVCodecContext *avctx, .pic_fields.bits = { .chroma_format_idc = sps->chroma_format_idc, .tiles_enabled_flag = pps->tiles_enabled_flag, - .separate_colour_plane_flag = sps->separate_colour_plane_flag, - .pcm_enabled_flag = sps->pcm_enabled_flag, - .scaling_list_enabled_flag = sps->scaling_list_enable_flag, + .separate_colour_plane_flag = sps->separate_colour_plane, + .pcm_enabled_flag = sps->pcm_enabled, + .scaling_list_enabled_flag = sps->scaling_list_enabled, .transform_skip_enabled_flag = pps->transform_skip_enabled_flag, - .amp_enabled_flag = sps->amp_enabled_flag, - .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag, + .amp_enabled_flag = sps->amp_enabled, + .strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled, .sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag, .constrained_intra_pred_flag = pps->constrained_intra_pred_flag, .cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag, @@ -186,12 +186,12 @@ static int vaapi_hevc_start_frame(AVCodecContext *avctx, .entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag, .pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag, .loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag, - .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag, + .pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled, }, .slice_parsing_fields.bits = { .lists_modification_present_flag = pps->lists_modification_present_flag, - .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag, - .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag, + .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present, + .sps_temporal_mvp_enabled_flag = sps->temporal_mvp_enabled, .cabac_init_present_flag = pps->cabac_init_present_flag, .output_flag_present_flag = pps->output_flag_present_flag, .dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag, @@ -231,15 +231,15 @@ static int vaapi_hevc_start_frame(AVCodecContext *avctx, avctx->profile == AV_PROFILE_HEVC_SCC) { pic->pic_param.rext = (VAPictureParameterBufferHEVCRext) { .range_extension_pic_fields.bits = { - .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag, - .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag, - .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag, - .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag, - .extended_precision_processing_flag = sps->extended_precision_processing_flag, - .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag, - .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag, - .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag, - .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag, + .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled, + .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled, + .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled, + .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled, + .extended_precision_processing_flag = sps->extended_precision_processing, + .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled, + .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled, + .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled, + .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled, .cross_component_prediction_enabled_flag = pps->cross_component_prediction_enabled_flag, .chroma_qp_offset_list_enabled_flag = pps->chroma_qp_offset_list_enabled_flag, }, @@ -258,7 +258,7 @@ static int vaapi_hevc_start_frame(AVCodecContext *avctx, pre_palette_size = pps->pps_palette_predictor_initializers_present_flag ? pps->pps_num_palette_predictor_initializers : - (sps->sps_palette_predictor_initializers_present_flag ? + (sps->palette_predictor_initializers_present ? sps->sps_num_palette_predictor_initializers : 0); @@ -266,9 +266,9 @@ static int vaapi_hevc_start_frame(AVCodecContext *avctx, pic->pic_param.scc = (VAPictureParameterBufferHEVCScc) { .screen_content_pic_fields.bits = { .pps_curr_pic_ref_enabled_flag = pps->pps_curr_pic_ref_enabled_flag, - .palette_mode_enabled_flag = sps->palette_mode_enabled_flag, + .palette_mode_enabled_flag = sps->palette_mode_enabled, .motion_vector_resolution_control_idc = sps->motion_vector_resolution_control_idc, - .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disabled_flag, + .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disabled, .residual_adaptive_colour_transform_enabled_flag = pps->residual_adaptive_colour_transform_enabled_flag, .pps_slice_act_qp_offsets_present_flag = pps->pps_slice_act_qp_offsets_present_flag, @@ -305,7 +305,7 @@ static int vaapi_hevc_start_frame(AVCodecContext *avctx, if (pps->scaling_list_data_present_flag) scaling_list = &pps->scaling_list; - else if (sps->scaling_list_enable_flag) + else if (sps->scaling_list_enabled) scaling_list = &sps->scaling_list; if (scaling_list) { diff --git a/libavcodec/vdpau_hevc.c b/libavcodec/vdpau_hevc.c index 4cd7ce5621..b029f9c42e 100644 --- a/libavcodec/vdpau_hevc.c +++ b/libavcodec/vdpau_hevc.c @@ -53,7 +53,7 @@ static int vdpau_hevc_start_frame(AVCodecContext *avctx, /* SPS */ info->chroma_format_idc = sps->chroma_format_idc; - info->separate_colour_plane_flag = sps->separate_colour_plane_flag; + info->separate_colour_plane_flag = sps->separate_colour_plane; info->pic_width_in_luma_samples = sps->width; info->pic_height_in_luma_samples = sps->height; info->bit_depth_luma_minus8 = sps->bit_depth - 8; @@ -68,7 +68,7 @@ static int vdpau_hevc_start_frame(AVCodecContext *avctx, info->log2_diff_max_min_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size; info->max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter; info->max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra; - info->scaling_list_enabled_flag = sps->scaling_list_enable_flag; + info->scaling_list_enabled_flag = sps->scaling_list_enabled; /* Scaling lists, in diagonal order, to be used for this frame. */ for (size_t i = 0; i < 6; i++) { for (size_t j = 0; j < 16; j++) { @@ -100,9 +100,9 @@ static int vdpau_hevc_start_frame(AVCodecContext *avctx, info->ScalingListDCCoeff32x32[i] = sl->sl_dc[1][i * 3]; } } - info->amp_enabled_flag = sps->amp_enabled_flag; + info->amp_enabled_flag = sps->amp_enabled; info->sample_adaptive_offset_enabled_flag = sps->sao_enabled; - info->pcm_enabled_flag = sps->pcm_enabled_flag; + info->pcm_enabled_flag = sps->pcm_enabled; if (info->pcm_enabled_flag) { /* Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */ info->pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1; @@ -113,17 +113,17 @@ static int vdpau_hevc_start_frame(AVCodecContext *avctx, /* Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */ info->log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size; /* Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */ - info->pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag; + info->pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled; } /* Per spec, when zero, assume short_term_ref_pic_set_sps_flag is also zero. */ info->num_short_term_ref_pic_sets = sps->nb_st_rps; - info->long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag; + info->long_term_ref_pics_present_flag = sps->long_term_ref_pics_present; /* Only needed if long_term_ref_pics_present_flag is set. Ignored otherwise. */ info->num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps; - info->sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag; - info->strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag; + info->sps_temporal_mvp_enabled_flag = sps->temporal_mvp_enabled; + info->strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled; /* Copy the HEVC Picture Parameter Set bitstream fields. */ info->dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag; @@ -360,17 +360,17 @@ static int vdpau_hevc_start_frame(AVCodecContext *avctx, } #ifdef VDP_YCBCR_FORMAT_Y_U_V_444 - if (sps->sps_range_extension_flag) { + if (sps->range_extension) { info2->sps_range_extension_flag = 1; - info2->transformSkipRotationEnableFlag = sps->transform_skip_rotation_enabled_flag; - info2->transformSkipContextEnableFlag = sps->transform_skip_context_enabled_flag; - info2->implicitRdpcmEnableFlag = sps->implicit_rdpcm_enabled_flag; - info2->explicitRdpcmEnableFlag = sps->explicit_rdpcm_enabled_flag; - info2->extendedPrecisionProcessingFlag = sps->extended_precision_processing_flag; - info2->intraSmoothingDisabledFlag = sps->intra_smoothing_disabled_flag; - info2->highPrecisionOffsetsEnableFlag = sps->high_precision_offsets_enabled_flag; - info2->persistentRiceAdaptationEnableFlag = sps->persistent_rice_adaptation_enabled_flag; - info2->cabacBypassAlignmentEnableFlag = sps->cabac_bypass_alignment_enabled_flag; + info2->transformSkipRotationEnableFlag = sps->transform_skip_rotation_enabled; + info2->transformSkipContextEnableFlag = sps->transform_skip_context_enabled; + info2->implicitRdpcmEnableFlag = sps->implicit_rdpcm_enabled; + info2->explicitRdpcmEnableFlag = sps->explicit_rdpcm_enabled; + info2->extendedPrecisionProcessingFlag = sps->extended_precision_processing; + info2->intraSmoothingDisabledFlag = sps->intra_smoothing_disabled; + info2->highPrecisionOffsetsEnableFlag = sps->high_precision_offsets_enabled; + info2->persistentRiceAdaptationEnableFlag = sps->persistent_rice_adaptation_enabled; + info2->cabacBypassAlignmentEnableFlag = sps->cabac_bypass_alignment_enabled; } else { info2->sps_range_extension_flag = 0; } diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c index 9b40f5ad58..df86049d22 100644 --- a/libavcodec/vulkan_hevc.c +++ b/libavcodec/vulkan_hevc.c @@ -395,36 +395,36 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, *vksps = (StdVideoH265SequenceParameterSet) { .flags = (StdVideoH265SpsFlags) { - .sps_temporal_id_nesting_flag = sps->temporal_id_nesting_flag, - .separate_colour_plane_flag = sps->separate_colour_plane_flag, - .conformance_window_flag = sps->conformance_window_flag, - .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info_flag, - .scaling_list_enabled_flag = sps->scaling_list_enable_flag, - .sps_scaling_list_data_present_flag = sps->scaling_list_enable_flag, - .amp_enabled_flag = sps->amp_enabled_flag, + .sps_temporal_id_nesting_flag = sps->temporal_id_nesting, + .separate_colour_plane_flag = sps->separate_colour_plane, + .conformance_window_flag = sps->conformance_window, + .sps_sub_layer_ordering_info_present_flag = sps->sublayer_ordering_info, + .scaling_list_enabled_flag = sps->scaling_list_enabled, + .sps_scaling_list_data_present_flag = sps->scaling_list_enabled, + .amp_enabled_flag = sps->amp_enabled, .sample_adaptive_offset_enabled_flag = sps->sao_enabled, - .pcm_enabled_flag = sps->pcm_enabled_flag, - .pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag, - .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag, - .sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag, - .strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag, + .pcm_enabled_flag = sps->pcm_enabled, + .pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled, + .long_term_ref_pics_present_flag = sps->long_term_ref_pics_present, + .sps_temporal_mvp_enabled_flag = sps->temporal_mvp_enabled, + .strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled, .vui_parameters_present_flag = sps->vui_present, - .sps_extension_present_flag = sps->sps_extension_present_flag, - .sps_range_extension_flag = sps->sps_range_extension_flag, - .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled_flag, - .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled_flag, - .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled_flag, - .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled_flag, - .extended_precision_processing_flag = sps->extended_precision_processing_flag, - .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled_flag, - .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled_flag, - .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled_flag, - .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled_flag, - .sps_scc_extension_flag = sps->sps_scc_extension_flag, - .sps_curr_pic_ref_enabled_flag = sps->sps_curr_pic_ref_enabled_flag, - .palette_mode_enabled_flag = sps->palette_mode_enabled_flag, - .sps_palette_predictor_initializers_present_flag = sps->sps_palette_predictor_initializers_present_flag, - .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disabled_flag, + .sps_extension_present_flag = sps->extension_present, + .sps_range_extension_flag = sps->range_extension, + .transform_skip_rotation_enabled_flag = sps->transform_skip_rotation_enabled, + .transform_skip_context_enabled_flag = sps->transform_skip_context_enabled, + .implicit_rdpcm_enabled_flag = sps->implicit_rdpcm_enabled, + .explicit_rdpcm_enabled_flag = sps->explicit_rdpcm_enabled, + .extended_precision_processing_flag = sps->extended_precision_processing, + .intra_smoothing_disabled_flag = sps->intra_smoothing_disabled, + .high_precision_offsets_enabled_flag = sps->high_precision_offsets_enabled, + .persistent_rice_adaptation_enabled_flag = sps->persistent_rice_adaptation_enabled, + .cabac_bypass_alignment_enabled_flag = sps->cabac_bypass_alignment_enabled, + .sps_scc_extension_flag = sps->scc_extension, + .sps_curr_pic_ref_enabled_flag = sps->curr_pic_ref_enabled, + .palette_mode_enabled_flag = sps->palette_mode_enabled, + .sps_palette_predictor_initializers_present_flag = sps->palette_predictor_initializers_present, + .intra_boundary_filtering_disabled_flag = sps->intra_boundary_filtering_disabled, }, .chroma_format_idc = sps->chroma_format_idc, .pic_width_in_luma_samples = sps->width, -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 06/10] lavc/hevc_ps/HEVCSPS: change flags into size-1 bitfields 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 06/10] lavc/hevc_ps/HEVCSPS: change flags into size-1 bitfields Anton Khirnov @ 2024-04-11 11:55 ` Andreas Rheinhardt 2024-05-24 9:07 ` Anton Khirnov 0 siblings, 1 reply; 22+ messages in thread From: Andreas Rheinhardt @ 2024-04-11 11:55 UTC (permalink / raw) To: ffmpeg-devel Anton Khirnov: > Reduces sizeof(HEVCSPS) by 96 bytes. > > Also improve flag names: drop redundant suffixes and prefixes, and > consistently use disabled/enabled. > --- > libavcodec/dxva2_hevc.c | 24 ++++----- > libavcodec/hevc_cabac.c | 36 ++++++------- > libavcodec/hevc_filter.c | 8 +-- > libavcodec/hevc_parser.c | 2 +- > libavcodec/hevc_ps.c | 95 +++++++++++++++++----------------- > libavcodec/hevc_ps.h | 62 +++++++++++----------- > libavcodec/hevcdec.c | 10 ++-- > libavcodec/hevcpred_template.c | 4 +- > libavcodec/mips/hevcpred_msa.c | 6 +-- > libavcodec/nvdec_hevc.c | 42 +++++++-------- > libavcodec/qsvenc_hevc.c | 2 +- > libavcodec/vaapi_hevc.c | 42 +++++++-------- > libavcodec/vdpau_hevc.c | 36 ++++++------- > libavcodec/vulkan_hevc.c | 56 ++++++++++---------- > 14 files changed, 212 insertions(+), 213 deletions(-) > Making this a bitfield is not worth it. It will necessitate masking operations on every access to these fields which may increase the size of .text by more than 96B; it may even be that this more than offsets the savings of 96B from using a bitfield. - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 06/10] lavc/hevc_ps/HEVCSPS: change flags into size-1 bitfields 2024-04-11 11:55 ` Andreas Rheinhardt @ 2024-05-24 9:07 ` Anton Khirnov 0 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-05-24 9:07 UTC (permalink / raw) To: FFmpeg development discussions and patches Quoting Andreas Rheinhardt (2024-04-11 13:55:05) > Anton Khirnov: > > Reduces sizeof(HEVCSPS) by 96 bytes. > > > > Also improve flag names: drop redundant suffixes and prefixes, and > > consistently use disabled/enabled. > > --- > > libavcodec/dxva2_hevc.c | 24 ++++----- > > libavcodec/hevc_cabac.c | 36 ++++++------- > > libavcodec/hevc_filter.c | 8 +-- > > libavcodec/hevc_parser.c | 2 +- > > libavcodec/hevc_ps.c | 95 +++++++++++++++++----------------- > > libavcodec/hevc_ps.h | 62 +++++++++++----------- > > libavcodec/hevcdec.c | 10 ++-- > > libavcodec/hevcpred_template.c | 4 +- > > libavcodec/mips/hevcpred_msa.c | 6 +-- > > libavcodec/nvdec_hevc.c | 42 +++++++-------- > > libavcodec/qsvenc_hevc.c | 2 +- > > libavcodec/vaapi_hevc.c | 42 +++++++-------- > > libavcodec/vdpau_hevc.c | 36 ++++++------- > > libavcodec/vulkan_hevc.c | 56 ++++++++++---------- > > 14 files changed, 212 insertions(+), 213 deletions(-) > > > > Making this a bitfield is not worth it. It will necessitate masking > operations on every access to these fields which may increase the size > of .text by more than 96B; it may even be that this more than offsets > the savings of 96B from using a bitfield. Okay, then changed to uint8_t locally. -- Anton Khirnov _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 07/10] lavc/hevc_ps: fix variable signedness in ff_hevc_decode_short_term_rps() 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov ` (4 preceding siblings ...) 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 06/10] lavc/hevc_ps/HEVCSPS: change flags into size-1 bitfields Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 08/10] lavc/hevc_ps: do not store delta_poc_s[01] in ShortTermRPS Anton Khirnov ` (2 subsequent siblings) 8 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel It is actually supposed to go negative in the loop over num_negative pics, but underflow does not break anything as the result is then assigned to a signed int. --- libavcodec/hevc_ps.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 1af691414e..d90f172c46 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -197,7 +197,8 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } } } else { - unsigned int prev, nb_positive_pics; + unsigned int nb_positive_pics; + rps->num_negative_pics = get_ue_golomb_long(gb); nb_positive_pics = get_ue_golomb_long(gb); @@ -209,7 +210,8 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, rps->num_delta_pocs = rps->num_negative_pics + nb_positive_pics; if (rps->num_delta_pocs) { - prev = 0; + int prev = 0; + for (i = 0; i < rps->num_negative_pics; i++) { delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 08/10] lavc/hevc_ps: do not store delta_poc_s[01] in ShortTermRPS 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov ` (5 preceding siblings ...) 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 07/10] lavc/hevc_ps: fix variable signedness in ff_hevc_decode_short_term_rps() Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 10/10] lavc/hevc_ps: compactify ShortTermRPS Anton Khirnov 8 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel They are only used in vulkan_hevc and are not actually needed, as they can be computed from delta_poc. Reduces sizeof(HEVCSPS) by 16kB. --- libavcodec/hevc_ps.c | 4 ++-- libavcodec/hevc_ps.h | 2 -- libavcodec/vulkan_hevc.c | 7 +++++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index d90f172c46..a6b0021bc3 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -213,7 +213,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, int prev = 0; for (i = 0; i < rps->num_negative_pics; i++) { - delta_poc = rps->delta_poc_s0[i] = get_ue_golomb_long(gb) + 1; + delta_poc = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_poc: %d\n", @@ -226,7 +226,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } prev = 0; for (i = 0; i < nb_positive_pics; i++) { - delta_poc = rps->delta_poc_s1[i] = get_ue_golomb_long(gb) + 1; + delta_poc = get_ue_golomb_long(gb) + 1; if (delta_poc < 1 || delta_poc > 32768) { av_log(avctx, AV_LOG_ERROR, "Invalid value of delta_poc: %d\n", diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 9801b4347f..6ef29a8ea7 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -78,8 +78,6 @@ typedef struct ShortTermRPS { unsigned int num_negative_pics; int num_delta_pocs; int rps_idx_num_delta_pocs; - int32_t delta_poc_s0[32]; - int32_t delta_poc_s1[32]; int32_t delta_poc[32]; uint8_t used[32]; } ShortTermRPS; diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c index df86049d22..5d7c6b1b64 100644 --- a/libavcodec/vulkan_hevc.c +++ b/libavcodec/vulkan_hevc.c @@ -351,6 +351,8 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, pal->PredictorPaletteEntries[i][j] = sps->sps_palette_predictor_initializer[i][j]; for (int i = 0; i < sps->nb_st_rps; i++) { + const ShortTermRPS *st_rps = &sps->st_rps[i]; + str[i] = (StdVideoH265ShortTermRefPicSet) { .flags = (StdVideoH265ShortTermRefPicSetFlags) { .inter_ref_pic_set_prediction_flag = sps->st_rps[i].rps_predict, @@ -375,12 +377,13 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j; for (int j = 0; j < str[i].num_negative_pics; j++) { - str[i].delta_poc_s0_minus1[j] = sps->st_rps[i].delta_poc_s0[j] - 1; + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j; } for (int j = 0; j < str[i].num_positive_pics; j++) { - str[i].delta_poc_s1_minus1[j] = sps->st_rps[i].delta_poc_s1[j] - 1; + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[st_rps->num_negative_pics + j] - + (j ? st_rps->delta_poc[st_rps->num_negative_pics + j - 1] : 0) - 1; str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[str[i].num_negative_pics + j] << j; } } -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov ` (6 preceding siblings ...) 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 08/10] lavc/hevc_ps: do not store delta_poc_s[01] in ShortTermRPS Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-10 13:42 ` James Almer 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 10/10] lavc/hevc_ps: compactify ShortTermRPS Anton Khirnov 8 siblings, 1 reply; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel It is currently an array of 32 uint8_t, each storing a single flag. A single uint32_t is sufficient. Reduces sizeof(HEVCSPS) by 1792 bytes. --- libavcodec/hevc_ps.c | 33 +++++++++++++++++++-------------- libavcodec/hevc_ps.h | 2 +- libavcodec/hevc_refs.c | 6 +++--- libavcodec/vulkan_hevc.c | 8 ++++---- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index a6b0021bc3..76fe507e7b 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -107,6 +107,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, int k = 0; int i; + rps->used = 0; rps->rps_predict = 0; if (rps != sps->st_rps && sps->nb_st_rps) @@ -114,6 +115,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, if (rps->rps_predict) { const ShortTermRPS *rps_ridx; + uint8_t used[32] = { 0 }; int delta_rps; if (is_slice_header) { @@ -139,13 +141,13 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps; for (i = 0; i <= rps_ridx->num_delta_pocs; i++) { - int used = rps->used[k] = get_bits1(gb); + used[k] = get_bits1(gb); rps->use_delta_flag = 0; - if (!used) + if (!used[k]) rps->use_delta_flag = get_bits1(gb); - if (used || rps->use_delta_flag) { + if (used[k] || rps->use_delta_flag) { if (i < rps_ridx->num_delta_pocs) delta_poc = delta_rps + rps_ridx->delta_poc[i]; else @@ -157,7 +159,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } } - if (k >= FF_ARRAY_ELEMS(rps->used)) { + if (k >= FF_ARRAY_ELEMS(used)) { av_log(avctx, AV_LOG_ERROR, "Invalid num_delta_pocs: %d\n", k); return AVERROR_INVALIDDATA; @@ -167,35 +169,38 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, rps->num_negative_pics = k0; // sort in increasing order (smallest first) if (rps->num_delta_pocs != 0) { - int used, tmp; + int u, tmp; for (i = 1; i < rps->num_delta_pocs; i++) { delta_poc = rps->delta_poc[i]; - used = rps->used[i]; + u = used[i]; for (k = i - 1; k >= 0; k--) { tmp = rps->delta_poc[k]; if (delta_poc < tmp) { rps->delta_poc[k + 1] = tmp; - rps->used[k + 1] = rps->used[k]; + used[k + 1] = used[k]; rps->delta_poc[k] = delta_poc; - rps->used[k] = used; + used[k] = u; } } } } if ((rps->num_negative_pics >> 1) != 0) { - int used; + int u; k = rps->num_negative_pics - 1; // flip the negative values to largest first for (i = 0; i < rps->num_negative_pics >> 1; i++) { delta_poc = rps->delta_poc[i]; - used = rps->used[i]; + u = used[i]; rps->delta_poc[i] = rps->delta_poc[k]; - rps->used[i] = rps->used[k]; + used[i] = used[k]; rps->delta_poc[k] = delta_poc; - rps->used[k] = used; + used[k] = u; k--; } } + + for (unsigned i = 0; i < FF_ARRAY_ELEMS(used); i++) + rps->used |= used[i] * (1 << i); } else { unsigned int nb_positive_pics; @@ -222,7 +227,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } prev -= delta_poc; rps->delta_poc[i] = prev; - rps->used[i] = get_bits1(gb); + rps->used |= get_bits1(gb) * (1 << i); } prev = 0; for (i = 0; i < nb_positive_pics; i++) { @@ -235,7 +240,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } prev += delta_poc; rps->delta_poc[rps->num_negative_pics + i] = prev; - rps->used[rps->num_negative_pics + i] = get_bits1(gb); + rps->used |= get_bits1(gb) * (1 << (rps->num_negative_pics + i)); } } } diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 6ef29a8ea7..92b85115f7 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -79,7 +79,7 @@ typedef struct ShortTermRPS { int num_delta_pocs; int rps_idx_num_delta_pocs; int32_t delta_poc[32]; - uint8_t used[32]; + uint32_t used; } ShortTermRPS; typedef struct HEVCWindow { diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c index aed649933d..19f3fa81da 100644 --- a/libavcodec/hevc_refs.c +++ b/libavcodec/hevc_refs.c @@ -501,7 +501,7 @@ int ff_hevc_frame_rps(HEVCContext *s) int poc = s->poc + short_rps->delta_poc[i]; int list; - if (!short_rps->used[i]) + if (!(short_rps->used & (1 << i))) list = ST_FOLL; else if (i < short_rps->num_negative_pics) list = ST_CURR_BEF; @@ -540,9 +540,9 @@ int ff_hevc_frame_nb_refs(const HEVCContext *s) if (rps) { for (i = 0; i < rps->num_negative_pics; i++) - ret += !!rps->used[i]; + ret += !!(rps->used & (1 << i)); for (; i < rps->num_delta_pocs; i++) - ret += !!rps->used[i]; + ret += !!(rps->used & (1 << i)); } if (long_rps) { diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c index 5d7c6b1b64..c2b65fc201 100644 --- a/libavcodec/vulkan_hevc.c +++ b/libavcodec/vulkan_hevc.c @@ -374,17 +374,17 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, /* NOTE: This is the predicted, and *reordered* version. * Probably incorrect, but the spec doesn't say which version to use. */ for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++) - str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j; + str[i].used_by_curr_pic_flag |= st_rps->used; for (int j = 0; j < str[i].num_negative_pics; j++) { - str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j; + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; + str[i].used_by_curr_pic_s0_flag |= st_rps->used & ((1 << str[i].num_negative_pics) - 1); } for (int j = 0; j < str[i].num_positive_pics; j++) { str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[st_rps->num_negative_pics + j] - (j ? st_rps->delta_poc[st_rps->num_negative_pics + j - 1] : 0) - 1; - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[str[i].num_negative_pics + j] << j; + str[i].used_by_curr_pic_s0_flag |= st_rps->used >> str[i].num_negative_pics; } } -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used Anton Khirnov @ 2024-04-10 13:42 ` James Almer 2024-05-24 9:11 ` Anton Khirnov 0 siblings, 1 reply; 22+ messages in thread From: James Almer @ 2024-04-10 13:42 UTC (permalink / raw) To: ffmpeg-devel On 4/10/2024 10:31 AM, Anton Khirnov wrote: > It is currently an array of 32 uint8_t, each storing a single flag. A > single uint32_t is sufficient. > > Reduces sizeof(HEVCSPS) by 1792 bytes. > --- > libavcodec/hevc_ps.c | 33 +++++++++++++++++++-------------- > libavcodec/hevc_ps.h | 2 +- > libavcodec/hevc_refs.c | 6 +++--- > libavcodec/vulkan_hevc.c | 8 ++++---- > 4 files changed, 27 insertions(+), 22 deletions(-) > > diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c > index a6b0021bc3..76fe507e7b 100644 > --- a/libavcodec/hevc_ps.c > +++ b/libavcodec/hevc_ps.c > @@ -107,6 +107,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, > int k = 0; > int i; > > + rps->used = 0; > rps->rps_predict = 0; > > if (rps != sps->st_rps && sps->nb_st_rps) > @@ -114,6 +115,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, > > if (rps->rps_predict) { > const ShortTermRPS *rps_ridx; > + uint8_t used[32] = { 0 }; > int delta_rps; > > if (is_slice_header) { > @@ -139,13 +141,13 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, > } > delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps; > for (i = 0; i <= rps_ridx->num_delta_pocs; i++) { > - int used = rps->used[k] = get_bits1(gb); > + used[k] = get_bits1(gb); > > rps->use_delta_flag = 0; > - if (!used) > + if (!used[k]) > rps->use_delta_flag = get_bits1(gb); > > - if (used || rps->use_delta_flag) { > + if (used[k] || rps->use_delta_flag) { > if (i < rps_ridx->num_delta_pocs) > delta_poc = delta_rps + rps_ridx->delta_poc[i]; > else > @@ -157,7 +159,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, > } > } > > - if (k >= FF_ARRAY_ELEMS(rps->used)) { > + if (k >= FF_ARRAY_ELEMS(used)) { > av_log(avctx, AV_LOG_ERROR, > "Invalid num_delta_pocs: %d\n", k); > return AVERROR_INVALIDDATA; > @@ -167,35 +169,38 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, > rps->num_negative_pics = k0; > // sort in increasing order (smallest first) > if (rps->num_delta_pocs != 0) { > - int used, tmp; > + int u, tmp; > for (i = 1; i < rps->num_delta_pocs; i++) { > delta_poc = rps->delta_poc[i]; > - used = rps->used[i]; > + u = used[i]; > for (k = i - 1; k >= 0; k--) { > tmp = rps->delta_poc[k]; > if (delta_poc < tmp) { > rps->delta_poc[k + 1] = tmp; > - rps->used[k + 1] = rps->used[k]; > + used[k + 1] = used[k]; > rps->delta_poc[k] = delta_poc; > - rps->used[k] = used; > + used[k] = u; > } > } > } > } > if ((rps->num_negative_pics >> 1) != 0) { > - int used; > + int u; > k = rps->num_negative_pics - 1; > // flip the negative values to largest first > for (i = 0; i < rps->num_negative_pics >> 1; i++) { > delta_poc = rps->delta_poc[i]; > - used = rps->used[i]; > + u = used[i]; > rps->delta_poc[i] = rps->delta_poc[k]; > - rps->used[i] = rps->used[k]; > + used[i] = used[k]; > rps->delta_poc[k] = delta_poc; > - rps->used[k] = used; > + used[k] = u; > k--; > } > } > + > + for (unsigned i = 0; i < FF_ARRAY_ELEMS(used); i++) > + rps->used |= used[i] * (1 << i); > } else { > unsigned int nb_positive_pics; > > @@ -222,7 +227,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, > } > prev -= delta_poc; > rps->delta_poc[i] = prev; > - rps->used[i] = get_bits1(gb); > + rps->used |= get_bits1(gb) * (1 << i); > } > prev = 0; > for (i = 0; i < nb_positive_pics; i++) { > @@ -235,7 +240,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, > } > prev += delta_poc; > rps->delta_poc[rps->num_negative_pics + i] = prev; > - rps->used[rps->num_negative_pics + i] = get_bits1(gb); > + rps->used |= get_bits1(gb) * (1 << (rps->num_negative_pics + i)); > } > } > } > diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h > index 6ef29a8ea7..92b85115f7 100644 > --- a/libavcodec/hevc_ps.h > +++ b/libavcodec/hevc_ps.h > @@ -79,7 +79,7 @@ typedef struct ShortTermRPS { > int num_delta_pocs; > int rps_idx_num_delta_pocs; > int32_t delta_poc[32]; > - uint8_t used[32]; > + uint32_t used; > } ShortTermRPS; > > typedef struct HEVCWindow { > diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c > index aed649933d..19f3fa81da 100644 > --- a/libavcodec/hevc_refs.c > +++ b/libavcodec/hevc_refs.c > @@ -501,7 +501,7 @@ int ff_hevc_frame_rps(HEVCContext *s) > int poc = s->poc + short_rps->delta_poc[i]; > int list; > > - if (!short_rps->used[i]) > + if (!(short_rps->used & (1 << i))) > list = ST_FOLL; > else if (i < short_rps->num_negative_pics) > list = ST_CURR_BEF; > @@ -540,9 +540,9 @@ int ff_hevc_frame_nb_refs(const HEVCContext *s) > > if (rps) { > for (i = 0; i < rps->num_negative_pics; i++) > - ret += !!rps->used[i]; > + ret += !!(rps->used & (1 << i)); > for (; i < rps->num_delta_pocs; i++) > - ret += !!rps->used[i]; > + ret += !!(rps->used & (1 << i)); > } > > if (long_rps) { > diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c > index 5d7c6b1b64..c2b65fc201 100644 > --- a/libavcodec/vulkan_hevc.c > +++ b/libavcodec/vulkan_hevc.c > @@ -374,17 +374,17 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, > /* NOTE: This is the predicted, and *reordered* version. > * Probably incorrect, but the spec doesn't say which version to use. */ > for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++) > - str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j; > + str[i].used_by_curr_pic_flag |= st_rps->used; > > for (int j = 0; j < str[i].num_negative_pics; j++) { > - str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; > - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j; > + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; > + str[i].used_by_curr_pic_s0_flag |= st_rps->used & ((1 << str[i].num_negative_pics) - 1); av_mod_uintp2(st_rps->used, str[i].num_negative_pics). > } > > for (int j = 0; j < str[i].num_positive_pics; j++) { > str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[st_rps->num_negative_pics + j] - > (j ? st_rps->delta_poc[st_rps->num_negative_pics + j - 1] : 0) - 1; > - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[str[i].num_negative_pics + j] << j; > + str[i].used_by_curr_pic_s0_flag |= st_rps->used >> str[i].num_negative_pics; > } > } > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used 2024-04-10 13:42 ` James Almer @ 2024-05-24 9:11 ` Anton Khirnov 2024-05-24 11:54 ` James Almer 0 siblings, 1 reply; 22+ messages in thread From: Anton Khirnov @ 2024-05-24 9:11 UTC (permalink / raw) To: FFmpeg development discussions and patches Quoting James Almer (2024-04-10 15:42:51) > > > On 4/10/2024 10:31 AM, Anton Khirnov wrote: > > diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c > > index 5d7c6b1b64..c2b65fc201 100644 > > --- a/libavcodec/vulkan_hevc.c > > +++ b/libavcodec/vulkan_hevc.c > > @@ -374,17 +374,17 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, > > /* NOTE: This is the predicted, and *reordered* version. > > * Probably incorrect, but the spec doesn't say which version to use. */ > > for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++) > > - str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j; > > + str[i].used_by_curr_pic_flag |= st_rps->used; > > > > for (int j = 0; j < str[i].num_negative_pics; j++) { > > - str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; > > - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j; > > + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; > > + str[i].used_by_curr_pic_s0_flag |= st_rps->used & ((1 << str[i].num_negative_pics) - 1); > > av_mod_uintp2(st_rps->used, str[i].num_negative_pics). unrelated change -- Anton Khirnov _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used 2024-05-24 9:11 ` Anton Khirnov @ 2024-05-24 11:54 ` James Almer 2024-05-29 8:05 ` [FFmpeg-devel] [PATCH v2 09-10] " Anton Khirnov 0 siblings, 1 reply; 22+ messages in thread From: James Almer @ 2024-05-24 11:54 UTC (permalink / raw) To: ffmpeg-devel On 5/24/2024 6:11 AM, Anton Khirnov wrote: > Quoting James Almer (2024-04-10 15:42:51) >> >> >> On 4/10/2024 10:31 AM, Anton Khirnov wrote: >>> diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c >>> index 5d7c6b1b64..c2b65fc201 100644 >>> --- a/libavcodec/vulkan_hevc.c >>> +++ b/libavcodec/vulkan_hevc.c >>> @@ -374,17 +374,17 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, >>> /* NOTE: This is the predicted, and *reordered* version. >>> * Probably incorrect, but the spec doesn't say which version to use. */ >>> for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++) >>> - str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j; >>> + str[i].used_by_curr_pic_flag |= st_rps->used; >>> >>> for (int j = 0; j < str[i].num_negative_pics; j++) { >>> - str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; >>> - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j; >>> + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; >>> + str[i].used_by_curr_pic_s0_flag |= st_rps->used & ((1 << str[i].num_negative_pics) - 1); >> >> av_mod_uintp2(st_rps->used, str[i].num_negative_pics). > > unrelated change It's a nit, but not unrelated. You're adding a "i & ((1 << j) - 1)" line, which can be simplified (and optimized) by using av_mod_uintp2() instead. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH v2 09-10] lavc/hevc_ps: reduce the size of ShortTermRPS.used 2024-05-24 11:54 ` James Almer @ 2024-05-29 8:05 ` Anton Khirnov 0 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-05-29 8:05 UTC (permalink / raw) To: ffmpeg-devel It is currently an array of 32 uint8_t, each storing a single flag. A single uint32_t is sufficient. Reduces sizeof(HEVCSPS) by 1792 bytes. --- libavcodec/hevc_ps.c | 33 +++++++++++++++++++-------------- libavcodec/hevc_ps.h | 2 +- libavcodec/hevc_refs.c | 6 +++--- libavcodec/vulkan_hevc.c | 13 +++++-------- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index a6b0021bc3..76fe507e7b 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -107,6 +107,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, int k = 0; int i; + rps->used = 0; rps->rps_predict = 0; if (rps != sps->st_rps && sps->nb_st_rps) @@ -114,6 +115,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, if (rps->rps_predict) { const ShortTermRPS *rps_ridx; + uint8_t used[32] = { 0 }; int delta_rps; if (is_slice_header) { @@ -139,13 +141,13 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps; for (i = 0; i <= rps_ridx->num_delta_pocs; i++) { - int used = rps->used[k] = get_bits1(gb); + used[k] = get_bits1(gb); rps->use_delta_flag = 0; - if (!used) + if (!used[k]) rps->use_delta_flag = get_bits1(gb); - if (used || rps->use_delta_flag) { + if (used[k] || rps->use_delta_flag) { if (i < rps_ridx->num_delta_pocs) delta_poc = delta_rps + rps_ridx->delta_poc[i]; else @@ -157,7 +159,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } } - if (k >= FF_ARRAY_ELEMS(rps->used)) { + if (k >= FF_ARRAY_ELEMS(used)) { av_log(avctx, AV_LOG_ERROR, "Invalid num_delta_pocs: %d\n", k); return AVERROR_INVALIDDATA; @@ -167,35 +169,38 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, rps->num_negative_pics = k0; // sort in increasing order (smallest first) if (rps->num_delta_pocs != 0) { - int used, tmp; + int u, tmp; for (i = 1; i < rps->num_delta_pocs; i++) { delta_poc = rps->delta_poc[i]; - used = rps->used[i]; + u = used[i]; for (k = i - 1; k >= 0; k--) { tmp = rps->delta_poc[k]; if (delta_poc < tmp) { rps->delta_poc[k + 1] = tmp; - rps->used[k + 1] = rps->used[k]; + used[k + 1] = used[k]; rps->delta_poc[k] = delta_poc; - rps->used[k] = used; + used[k] = u; } } } } if ((rps->num_negative_pics >> 1) != 0) { - int used; + int u; k = rps->num_negative_pics - 1; // flip the negative values to largest first for (i = 0; i < rps->num_negative_pics >> 1; i++) { delta_poc = rps->delta_poc[i]; - used = rps->used[i]; + u = used[i]; rps->delta_poc[i] = rps->delta_poc[k]; - rps->used[i] = rps->used[k]; + used[i] = used[k]; rps->delta_poc[k] = delta_poc; - rps->used[k] = used; + used[k] = u; k--; } } + + for (unsigned i = 0; i < FF_ARRAY_ELEMS(used); i++) + rps->used |= used[i] * (1 << i); } else { unsigned int nb_positive_pics; @@ -222,7 +227,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } prev -= delta_poc; rps->delta_poc[i] = prev; - rps->used[i] = get_bits1(gb); + rps->used |= get_bits1(gb) * (1 << i); } prev = 0; for (i = 0; i < nb_positive_pics; i++) { @@ -235,7 +240,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, } prev += delta_poc; rps->delta_poc[rps->num_negative_pics + i] = prev; - rps->used[rps->num_negative_pics + i] = get_bits1(gb); + rps->used |= get_bits1(gb) * (1 << (rps->num_negative_pics + i)); } } } diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 1d3bdca4c6..ed6372c747 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -79,7 +79,7 @@ typedef struct ShortTermRPS { int num_delta_pocs; int rps_idx_num_delta_pocs; int32_t delta_poc[32]; - uint8_t used[32]; + uint32_t used; } ShortTermRPS; typedef struct HEVCWindow { diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c index 8da9ec982a..d6dc2f9e0a 100644 --- a/libavcodec/hevc_refs.c +++ b/libavcodec/hevc_refs.c @@ -497,7 +497,7 @@ int ff_hevc_frame_rps(HEVCContext *s) int poc = s->poc + short_rps->delta_poc[i]; int list; - if (!short_rps->used[i]) + if (!(short_rps->used & (1 << i))) list = ST_FOLL; else if (i < short_rps->num_negative_pics) list = ST_CURR_BEF; @@ -536,9 +536,9 @@ int ff_hevc_frame_nb_refs(const HEVCContext *s) if (rps) { for (i = 0; i < rps->num_negative_pics; i++) - ret += !!rps->used[i]; + ret += !!(rps->used & (1 << i)); for (; i < rps->num_delta_pocs; i++) - ret += !!rps->used[i]; + ret += !!(rps->used & (1 << i)); } if (long_rps) { diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c index 21cf49c0ec..a35f3d992d 100644 --- a/libavcodec/vulkan_hevc.c +++ b/libavcodec/vulkan_hevc.c @@ -373,19 +373,16 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, /* NOTE: This is the predicted, and *reordered* version. * Probably incorrect, but the spec doesn't say which version to use. */ - for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++) - str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j; + str[i].used_by_curr_pic_flag = st_rps->used; + str[i].used_by_curr_pic_s0_flag = av_mod_uintp2(st_rps->used, str[i].num_negative_pics); + str[i].used_by_curr_pic_s1_flag = st_rps->used >> str[i].num_negative_pics; - for (int j = 0; j < str[i].num_negative_pics; j++) { + for (int j = 0; j < str[i].num_negative_pics; j++) str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1; - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j; - } - for (int j = 0; j < str[i].num_positive_pics; j++) { + for (int j = 0; j < str[i].num_positive_pics; j++) str[i].delta_poc_s1_minus1[j] = st_rps->delta_poc[st_rps->num_negative_pics + j] - (j ? st_rps->delta_poc[st_rps->num_negative_pics + j - 1] : 0) - 1; - str[i].used_by_curr_pic_s1_flag |= sps->st_rps[i].used[str[i].num_negative_pics + j] << j; - } } *ltr = (StdVideoH265LongTermRefPicsSps) { -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* [FFmpeg-devel] [PATCH 10/10] lavc/hevc_ps: compactify ShortTermRPS 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov ` (7 preceding siblings ...) 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used Anton Khirnov @ 2024-04-10 13:31 ` Anton Khirnov 2024-04-11 12:35 ` James Almer 8 siblings, 1 reply; 22+ messages in thread From: Anton Khirnov @ 2024-04-10 13:31 UTC (permalink / raw) To: ffmpeg-devel Do not use larger fields than needed, use size-1 bitfields for flags. Reduces sizeof(HEVCSPS) by 1280 bytes. --- libavcodec/hevc_ps.c | 6 +++--- libavcodec/hevc_ps.h | 19 +++++++++++-------- libavcodec/vulkan_hevc.c | 2 +- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c index 76fe507e7b..7b486ce0af 100644 --- a/libavcodec/hevc_ps.c +++ b/libavcodec/hevc_ps.c @@ -143,11 +143,11 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, for (i = 0; i <= rps_ridx->num_delta_pocs; i++) { used[k] = get_bits1(gb); - rps->use_delta_flag = 0; + rps->use_delta = 0; if (!used[k]) - rps->use_delta_flag = get_bits1(gb); + rps->use_delta = get_bits1(gb); - if (used[k] || rps->use_delta_flag) { + if (used[k] || rps->use_delta) { if (i < rps_ridx->num_delta_pocs) delta_poc = delta_rps + rps_ridx->delta_poc[i]; else diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h index 92b85115f7..a8d07aa1b2 100644 --- a/libavcodec/hevc_ps.h +++ b/libavcodec/hevc_ps.h @@ -70,16 +70,19 @@ typedef struct HEVCHdrParams { } HEVCHdrParams; typedef struct ShortTermRPS { - uint8_t rps_predict; - unsigned int delta_idx; - uint8_t use_delta_flag; - uint8_t delta_rps_sign; - unsigned int abs_delta_rps; - unsigned int num_negative_pics; - int num_delta_pocs; - int rps_idx_num_delta_pocs; int32_t delta_poc[32]; uint32_t used; + + uint8_t delta_idx; + uint8_t num_negative_pics; + uint8_t num_delta_pocs; + uint8_t rps_idx_num_delta_pocs; + + uint16_t abs_delta_rps; + unsigned delta_rps_sign:1; + + unsigned rps_predict:1; + unsigned use_delta:1; } ShortTermRPS; typedef struct HEVCWindow { diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c index c2b65fc201..b109475194 100644 --- a/libavcodec/vulkan_hevc.c +++ b/libavcodec/vulkan_hevc.c @@ -359,7 +359,7 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, .delta_rps_sign = sps->st_rps[i].delta_rps_sign, }, .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1, - .use_delta_flag = sps->st_rps[i].use_delta_flag, + .use_delta_flag = sps->st_rps[i].use_delta, .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1, .used_by_curr_pic_flag = 0x0, .used_by_curr_pic_s0_flag = 0x0, -- 2.43.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 10/10] lavc/hevc_ps: compactify ShortTermRPS 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 10/10] lavc/hevc_ps: compactify ShortTermRPS Anton Khirnov @ 2024-04-11 12:35 ` James Almer 2024-05-24 9:19 ` Anton Khirnov 0 siblings, 1 reply; 22+ messages in thread From: James Almer @ 2024-04-11 12:35 UTC (permalink / raw) To: ffmpeg-devel On 4/10/2024 10:31 AM, Anton Khirnov wrote: > Do not use larger fields than needed, use size-1 bitfields for flags. > > Reduces sizeof(HEVCSPS) by 1280 bytes. > --- > libavcodec/hevc_ps.c | 6 +++--- > libavcodec/hevc_ps.h | 19 +++++++++++-------- > libavcodec/vulkan_hevc.c | 2 +- > 3 files changed, 15 insertions(+), 12 deletions(-) > > diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c > index 76fe507e7b..7b486ce0af 100644 > --- a/libavcodec/hevc_ps.c > +++ b/libavcodec/hevc_ps.c > @@ -143,11 +143,11 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, > for (i = 0; i <= rps_ridx->num_delta_pocs; i++) { > used[k] = get_bits1(gb); > > - rps->use_delta_flag = 0; > + rps->use_delta = 0; > if (!used[k]) > - rps->use_delta_flag = get_bits1(gb); > + rps->use_delta = get_bits1(gb); > > - if (used[k] || rps->use_delta_flag) { > + if (used[k] || rps->use_delta) { > if (i < rps_ridx->num_delta_pocs) > delta_poc = delta_rps + rps_ridx->delta_poc[i]; > else > diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h > index 92b85115f7..a8d07aa1b2 100644 > --- a/libavcodec/hevc_ps.h > +++ b/libavcodec/hevc_ps.h > @@ -70,16 +70,19 @@ typedef struct HEVCHdrParams { > } HEVCHdrParams; > > typedef struct ShortTermRPS { > - uint8_t rps_predict; > - unsigned int delta_idx; > - uint8_t use_delta_flag; > - uint8_t delta_rps_sign; > - unsigned int abs_delta_rps; > - unsigned int num_negative_pics; > - int num_delta_pocs; > - int rps_idx_num_delta_pocs; > int32_t delta_poc[32]; > uint32_t used; > + > + uint8_t delta_idx; > + uint8_t num_negative_pics; > + uint8_t num_delta_pocs; > + uint8_t rps_idx_num_delta_pocs; > + > + uint16_t abs_delta_rps; > + unsigned delta_rps_sign:1; Wont the compiler add two bytes of padding if you put this here? > + > + unsigned rps_predict:1; > + unsigned use_delta:1; > } ShortTermRPS; > > typedef struct HEVCWindow { > diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c > index c2b65fc201..b109475194 100644 > --- a/libavcodec/vulkan_hevc.c > +++ b/libavcodec/vulkan_hevc.c > @@ -359,7 +359,7 @@ static void set_sps(const HEVCSPS *sps, int sps_idx, > .delta_rps_sign = sps->st_rps[i].delta_rps_sign, > }, > .delta_idx_minus1 = sps->st_rps[i].delta_idx - 1, > - .use_delta_flag = sps->st_rps[i].use_delta_flag, > + .use_delta_flag = sps->st_rps[i].use_delta, > .abs_delta_rps_minus1 = sps->st_rps[i].abs_delta_rps - 1, > .used_by_curr_pic_flag = 0x0, > .used_by_curr_pic_s0_flag = 0x0, _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [FFmpeg-devel] [PATCH 10/10] lavc/hevc_ps: compactify ShortTermRPS 2024-04-11 12:35 ` James Almer @ 2024-05-24 9:19 ` Anton Khirnov 0 siblings, 0 replies; 22+ messages in thread From: Anton Khirnov @ 2024-05-24 9:19 UTC (permalink / raw) To: FFmpeg development discussions and patches Quoting James Almer (2024-04-11 14:35:37) > > diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h > > index 92b85115f7..a8d07aa1b2 100644 > > --- a/libavcodec/hevc_ps.h > > +++ b/libavcodec/hevc_ps.h > > @@ -70,16 +70,19 @@ typedef struct HEVCHdrParams { > > } HEVCHdrParams; > > > > typedef struct ShortTermRPS { > > - uint8_t rps_predict; > > - unsigned int delta_idx; > > - uint8_t use_delta_flag; > > - uint8_t delta_rps_sign; > > - unsigned int abs_delta_rps; > > - unsigned int num_negative_pics; > > - int num_delta_pocs; > > - int rps_idx_num_delta_pocs; > > int32_t delta_poc[32]; > > uint32_t used; > > + > > + uint8_t delta_idx; > > + uint8_t num_negative_pics; > > + uint8_t num_delta_pocs; > > + uint8_t rps_idx_num_delta_pocs; > > + > > + uint16_t abs_delta_rps; > > + unsigned delta_rps_sign:1; > > Wont the compiler add two bytes of padding if you put this here? No. Pahole says struct ShortTermRPS { int32_t delta_poc[32]; /* 0 128 */ /* --- cacheline 2 boundary (128 bytes) --- */ uint32_t used; /* 128 4 */ uint8_t delta_idx; /* 132 1 */ uint8_t num_negative_pics; /* 133 1 */ uint8_t num_delta_pocs; /* 134 1 */ uint8_t rps_idx_num_delta_pocs; /* 135 1 */ uint16_t abs_delta_rps; /* 136 2 */ /* Bitfield combined with previous fields */ unsigned int delta_rps_sign:1; /* 136:16 4 */ unsigned int rps_predict:1; /* 136:17 4 */ unsigned int use_delta:1; /* 136:18 4 */ /* size: 140, cachelines: 3, members: 10 */ /* bit_padding: 13 bits */ /* last cacheline: 12 bytes */ }; -- Anton Khirnov _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2024-05-29 8:06 UTC | newest] Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-04-10 13:31 [FFmpeg-devel] [PATCH 01/10] lavc/hevcdec: rename HEVCContext.HEVClcList to local_ctx Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 02/10] lavc/hevcdec: track local context count separately from WPP thread count Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers Anton Khirnov 2024-04-17 9:29 ` Andreas Rheinhardt 2024-05-24 9:03 ` Anton Khirnov 2024-05-27 13:10 ` Andreas Rheinhardt 2024-05-28 13:54 ` [FFmpeg-devel] [PATCH v2 " Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 04/10] lavc/hevcdec: drop a useless execute() call with 1 job Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 05/10] lavc/hevc_ps: reduce the size of used_by_curr_pic_lt_sps_flag Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 06/10] lavc/hevc_ps/HEVCSPS: change flags into size-1 bitfields Anton Khirnov 2024-04-11 11:55 ` Andreas Rheinhardt 2024-05-24 9:07 ` Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 07/10] lavc/hevc_ps: fix variable signedness in ff_hevc_decode_short_term_rps() Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 08/10] lavc/hevc_ps: do not store delta_poc_s[01] in ShortTermRPS Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used Anton Khirnov 2024-04-10 13:42 ` James Almer 2024-05-24 9:11 ` Anton Khirnov 2024-05-24 11:54 ` James Almer 2024-05-29 8:05 ` [FFmpeg-devel] [PATCH v2 09-10] " Anton Khirnov 2024-04-10 13:31 ` [FFmpeg-devel] [PATCH 10/10] lavc/hevc_ps: compactify ShortTermRPS Anton Khirnov 2024-04-11 12:35 ` James Almer 2024-05-24 9:19 ` Anton Khirnov
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git