On 2/11/2025 1:11 PM, Zhao Zhili wrote: > > >> On Feb 12, 2025, at 00:02, James Almer wrote: >> >> On 2/11/2025 12:57 PM, Zhao Zhili wrote: >>>> On Feb 11, 2025, at 23:37, Andreas Rheinhardt wrote: >>>> >>>> Zhao Zhili: >>>>> From: Zhao Zhili >>>>> >>>>> Signed-off-by: Zhao Zhili >>>>> --- >>>>> libavcodec/hevc/hevcdec.c | 73 ++++++++++++++++++++++++++++++++++++++- >>>>> libavcodec/hevc/hevcdec.h | 2 ++ >>>>> libavcodec/hevc/refs.c | 35 ++++++++++++++++++- >>>>> 3 files changed, 108 insertions(+), 2 deletions(-) >>>>> >>>>> diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c >>>>> index e9c045f7a1..f71edf213b 100644 >>>>> --- a/libavcodec/hevc/hevcdec.c >>>>> +++ b/libavcodec/hevc/hevcdec.c >>>>> @@ -466,6 +466,25 @@ static int export_multilayer(HEVCContext *s, const HEVCVPS *vps) >>>>> return 0; >>>>> } >>>>> >>>>> +int ff_hevc_is_alpha_video(const HEVCContext *s) >>>>> +{ >>>>> + const HEVCVPS *vps = s->vps; >>>>> + int ret = 0; >>>>> + >>>>> + if (vps->nb_layers != 2 || !vps->layer_id_in_nuh[1]) >>>>> + return 0; >>>>> + >>>>> + /* decode_vps_ext() guarantees that SCALABILITY_AUXILIARY with AuxId other >>>>> + * than alpha cannot reach here. >>>>> + */ >>>>> + ret = (s->vps->scalability_mask_flag & HEVC_SCALABILITY_AUXILIARY); >>>>> + >>>>> + av_log(s->avctx, AV_LOG_DEBUG, "Multi layer video, %s alpha video\n", >>>>> + ret ? "is" : "not"); >>>>> + >>>>> + return ret; >>>>> +} >>>>> + >>>>> static int setup_multilayer(HEVCContext *s, const HEVCVPS *vps) >>>>> { >>>>> unsigned layers_active_output = 0, highest_layer; >>>>> @@ -473,6 +492,18 @@ static int setup_multilayer(HEVCContext *s, const HEVCVPS *vps) >>>>> s->layers_active_output = 1; >>>>> s->layers_active_decode = 1; >>>>> >>>>> + if (ff_hevc_is_alpha_video(s)) { >>>>> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt); >>>>> + >>>>> + if (!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) >>>>> + return 0; >>>>> + >>>>> + s->layers_active_decode = (1 << vps->nb_layers) - 1; >>>>> + s->layers_active_output = 1; >>>>> + >>>>> + return 0; >>>>> + } >>>>> + >>>>> // nothing requested - decode base layer only >>>>> if (!s->nb_view_ids) >>>>> return 0; >>>>> @@ -530,6 +561,34 @@ static int setup_multilayer(HEVCContext *s, const HEVCVPS *vps) >>>>> return 0; >>>>> } >>>>> >>>>> +static enum AVPixelFormat map_to_alpha_format(HEVCContext *s, >>>>> + enum AVPixelFormat pix_fmt) >>>>> +{ >>>>> + switch (pix_fmt) { >>>>> + case AV_PIX_FMT_YUV420P: >>>>> + case AV_PIX_FMT_YUVJ420P: >>>>> + return AV_PIX_FMT_YUVA420P; >>>>> + case AV_PIX_FMT_YUV420P10: >>>>> + return AV_PIX_FMT_YUVA420P10; >>>>> + case AV_PIX_FMT_YUV444P: >>>>> + return AV_PIX_FMT_YUVA444P; >>>>> + case AV_PIX_FMT_YUV422P: >>>>> + return AV_PIX_FMT_YUVA422P; >>>>> + case AV_PIX_FMT_YUV422P10LE: >>>>> + return AV_PIX_FMT_YUVA422P10LE; >>>>> + case AV_PIX_FMT_YUV444P10: >>>>> + return AV_PIX_FMT_YUVA444P10; >>>>> + case AV_PIX_FMT_YUV444P12: >>>>> + return AV_PIX_FMT_YUVA444P12; >>>>> + case AV_PIX_FMT_YUV422P12: >>>>> + return AV_PIX_FMT_YUVA422P12; >>>>> + default: >>>>> + av_log(s->avctx, AV_LOG_WARNING, "No alpha pixel format map for %s\n", >>>>> + av_get_pix_fmt_name(pix_fmt)); >>>>> + return AV_PIX_FMT_NONE; >>>>> + } >>>>> +} >>>>> + >>>>> static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) >>>>> { >>>>> #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \ >>>>> @@ -540,9 +599,13 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) >>>>> CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ >>>>> CONFIG_HEVC_VDPAU_HWACCEL + \ >>>>> CONFIG_HEVC_VULKAN_HWACCEL) >>>>> - enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; >>>>> + enum AVPixelFormat pix_fmts[HWACCEL_MAX + 3], *fmt = pix_fmts; >>>>> + enum AVPixelFormat alpha_fmt = AV_PIX_FMT_NONE; >>>>> int ret; >>>>> >>>>> + if (ff_hevc_is_alpha_video(s)) >>>>> + alpha_fmt = map_to_alpha_format(s, sps->pix_fmt); >>>>> + >>>>> switch (sps->pix_fmt) { >>>>> case AV_PIX_FMT_YUV420P: >>>>> case AV_PIX_FMT_YUVJ420P: >>>>> @@ -664,6 +727,8 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) >>>>> break; >>>>> } >>>>> >>>>> + if (alpha_fmt != AV_PIX_FMT_NONE) >>>>> + *fmt++ = alpha_fmt; >>>>> *fmt++ = sps->pix_fmt; >>>>> *fmt = AV_PIX_FMT_NONE; >>>>> >>>>> @@ -3194,6 +3259,12 @@ static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l, >>>>> !sps->vui.common.video_signal_type_present_flag) >>>>> pix_fmt = sps_base->pix_fmt; >>>>> >>>>> + // Ignore range mismatch between base layer and alpha layer >>>>> + if (ff_hevc_is_alpha_video(s) && >>>>> + sps_base->pix_fmt == AV_PIX_FMT_YUV420P && >>>>> + pix_fmt == AV_PIX_FMT_YUVJ420P) >>>>> + pix_fmt = sps_base->pix_fmt; >>>>> + >>>>> if (pix_fmt != sps_base->pix_fmt || >>>>> sps->width != sps_base->width || >>>>> sps->height != sps_base->height) { >>>>> diff --git a/libavcodec/hevc/hevcdec.h b/libavcodec/hevc/hevcdec.h >>>>> index 4e95035688..b2b725b5cd 100644 >>>>> --- a/libavcodec/hevc/hevcdec.h >>>>> +++ b/libavcodec/hevc/hevcdec.h >>>>> @@ -714,6 +714,8 @@ void ff_hevc_hls_residual_coding(HEVCLocalContext *lc, const HEVCPPS *pps, >>>>> >>>>> void ff_hevc_hls_mvd_coding(HEVCLocalContext *lc, int x0, int y0, int log2_cb_size); >>>>> >>>>> +int ff_hevc_is_alpha_video(const HEVCContext *s); >>>>> + >>>>> extern const uint8_t ff_hevc_qpel_extra_before[4]; >>>>> extern const uint8_t ff_hevc_qpel_extra_after[4]; >>>>> extern const uint8_t ff_hevc_qpel_extra[4]; >>>>> diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c >>>>> index dd7f7f95a8..6f10efd0ac 100644 >>>>> --- a/libavcodec/hevc/refs.c >>>>> +++ b/libavcodec/hevc/refs.c >>>>> @@ -79,6 +79,31 @@ void ff_hevc_flush_dpb(HEVCContext *s) >>>>> } >>>>> } >>>>> >>>>> +static int replace_alpha_plane(AVFrame *alpha, AVFrame *base) >>>>> +{ >>>>> + AVBufferRef *base_a = av_frame_get_plane_buffer(base, 3); >>>>> + uintptr_t data = (uintptr_t)alpha->data[0]; >>>>> + int ret; >>>>> + >>>>> + for (int i = 0; i < FF_ARRAY_ELEMS(alpha->buf) && alpha->buf[i]; i++) { >>>>> + AVBufferRef *buf = alpha->buf[i]; >>>>> + uintptr_t buf_begin = (uintptr_t)buf->data; >>>>> + >>>>> + if (data >= buf_begin && data < buf_begin + buf->size) { >>>>> + ret = av_buffer_replace(&alpha->buf[i], base_a); >>>>> + if (ret < 0) >>>>> + return ret; >>>>> + >>>>> + alpha->linesize[0] = base->linesize[3]; >>>>> + alpha->data[0] = base->data[3]; >>>>> + >>>>> + return 0; >>>>> + } >>>>> + } >>>> >>>> Why does the decoding process actually need multiple references to the >>>> buffer of the alpha plane? >>> I’m not sure if I understand your question correctly. >>> 1. Both the base layer and the alpha layer are decoded as yuv420p. >>> 2. The y plane of alpha layer is actually alpha. >>> 3. The decoder finally output yuva, not two yuv420p. >>> So here make y plane of alpha frame reference the alpha plane of base frame. >>> When output, the base frame will contain YUV and alpha from two layers. >> >> The loop makes it look like you're potentially creating references to base_a in all buf[] entries from alpha, when all you need is one in alpha->buf[0], right? > > That’s what I did before patch v6. Andreas pointed out that alpha->buf[0] doesn’t means the first plane. > frame->data[0] can pointed to frame->buf[2]. So I need a loop to find out which buf frame->data[0] pointed to. Oh, you're right, i see now the return 0 that breaks the loop.