Re: [FFmpeg-devel] [PATCH 1/7 v2] avcodec: add an Immersive Audio Model and Formats frame split bsf

From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH 1/7 v2] avcodec: add an Immersive Audio Model and Formats frame split bsf
Date: Tue, 30 Jan 2024 22:47:41 +0100
Message-ID: <AS8P250MB0744EEFCF888F5F84C29062D8F7D2@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM> (raw)
In-Reply-To: <20240130173218.63297-1-jamrial@gmail.com>

James Almer:
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavcodec/bitstream_filters.c         |   1 +
>  libavcodec/bsf/Makefile                |   1 +
>  libavcodec/bsf/iamf_stream_split_bsf.c | 813 +++++++++++++++++++++++++
>  3 files changed, 815 insertions(+)
>  create mode 100644 libavcodec/bsf/iamf_stream_split_bsf.c
> 
> diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
> index 1e9a676a3d..640b821413 100644
> --- a/libavcodec/bitstream_filters.c
> +++ b/libavcodec/bitstream_filters.c
> @@ -42,6 +42,7 @@ extern const FFBitStreamFilter ff_h264_redundant_pps_bsf;
>  extern const FFBitStreamFilter ff_hapqa_extract_bsf;
>  extern const FFBitStreamFilter ff_hevc_metadata_bsf;
>  extern const FFBitStreamFilter ff_hevc_mp4toannexb_bsf;
> +extern const FFBitStreamFilter ff_iamf_stream_split_bsf;
>  extern const FFBitStreamFilter ff_imx_dump_header_bsf;
>  extern const FFBitStreamFilter ff_media100_to_mjpegb_bsf;
>  extern const FFBitStreamFilter ff_mjpeg2jpeg_bsf;
> diff --git a/libavcodec/bsf/Makefile b/libavcodec/bsf/Makefile
> index 7831b0f2aa..80dcdf94fb 100644
> --- a/libavcodec/bsf/Makefile
> +++ b/libavcodec/bsf/Makefile
> @@ -20,6 +20,7 @@ OBJS-$(CONFIG_H264_REDUNDANT_PPS_BSF)     += bsf/h264_redundant_pps.o
>  OBJS-$(CONFIG_HAPQA_EXTRACT_BSF)          += bsf/hapqa_extract.o
>  OBJS-$(CONFIG_HEVC_METADATA_BSF)          += bsf/h265_metadata.o
>  OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF)       += bsf/hevc_mp4toannexb.o
> +OBJS-$(CONFIG_IAMF_STREAM_MERGE_BSF)      += bsf/iamf_stream_merge_bsf.o
>  OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF)        += bsf/imx_dump_header.o
>  OBJS-$(CONFIG_MEDIA100_TO_MJPEGB_BSF)     += bsf/media100_to_mjpegb.o
>  OBJS-$(CONFIG_MJPEG2JPEG_BSF)             += bsf/mjpeg2jpeg.o
> diff --git a/libavcodec/bsf/iamf_stream_split_bsf.c b/libavcodec/bsf/iamf_stream_split_bsf.c
> new file mode 100644
> index 0000000000..01193801d7
> --- /dev/null
> +++ b/libavcodec/bsf/iamf_stream_split_bsf.c
> @@ -0,0 +1,813 @@
> +/*
> + * Copyright (c) 2023 James Almer <jamrial@gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include <stdint.h>
> +#include <stddef.h>
> +
> +#include "libavutil/dict.h"
> +#include "libavutil/opt.h"
> +#include "libavformat/iamf.h"
> +#include "bsf.h"
> +#include "bsf_internal.h"
> +#include "get_bits.h"
> +
> +typedef struct ParamDefinition {
> +    AVIAMFParamDefinition *param;
> +    size_t param_size;
> +    int mode;
> +    int recon_gain_present_bitmask;
> +} ParamDefinition;
> +
> +typedef struct IAMFSplitContext {
> +    AVClass *class;
> +    AVPacket *buffer_pkt;
> +
> +    ParamDefinition *param_definitions;
> +    unsigned int nb_param_definitions;
> +
> +    unsigned int *ids;
> +    int nb_ids;
> +
> +    // AVOptions
> +    int first_index;
> +
> +    // Packet side data
> +    AVIAMFParamDefinition *mix;
> +    size_t mix_size;
> +    AVIAMFParamDefinition *demix;
> +    size_t demix_size;
> +    AVIAMFParamDefinition *recon;
> +    size_t recon_size;
> +} IAMFSplitContext;
> +
> +static int param_parse(AVBSFContext *ctx, GetBitContext *gb,
> +                       unsigned int type,
> +                       ParamDefinition **out)
> +{
> +    IAMFSplitContext *const c = ctx->priv_data;
> +    ParamDefinition *param_definition = NULL;
> +    AVIAMFParamDefinition *param;
> +    unsigned int parameter_id, parameter_rate, mode;
> +    unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0;
> +    size_t param_size;
> +
> +    parameter_id = get_leb(gb);
> +
> +    for (int i = 0; i < c->nb_param_definitions; i++)
> +        if (c->param_definitions[i].param->parameter_id == parameter_id) {
> +            param_definition = &c->param_definitions[i];
> +            break;
> +        }
> +
> +    parameter_rate = get_leb(gb);
> +    mode = get_bits(gb, 8) >> 7;
> +
> +    if (mode == 0) {
> +        duration = get_leb(gb);
> +        constant_subblock_duration = get_leb(gb);
> +        if (constant_subblock_duration == 0) {
> +            nb_subblocks = get_leb(gb);
> +        } else
> +            nb_subblocks = duration / constant_subblock_duration;
> +    }
> +
> +    param = av_iamf_param_definition_alloc(type, nb_subblocks, &param_size);
> +    if (!param)
> +        return AVERROR(ENOMEM);
> +
> +    for (int i = 0; i < nb_subblocks; i++) {
> +        if (constant_subblock_duration == 0)
> +            get_leb(gb); // subblock_duration
> +
> +        switch (type) {
> +        case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
> +            break;
> +        case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
> +            skip_bits(gb, 8); // dmixp_mode
> +            break;
> +        case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
> +            break;
> +        default:
> +            av_free(param);
> +            return AVERROR_INVALIDDATA;
> +        }
> +    }
> +
> +    param->parameter_id = parameter_id;
> +    param->parameter_rate = parameter_rate;
> +    param->duration = duration;
> +    param->constant_subblock_duration = constant_subblock_duration;
> +    param->nb_subblocks = nb_subblocks;
> +
> +    if (param_definition) {
> +        if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) {
> +            av_log(ctx, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n", parameter_id);
> +            av_free(param);
> +            return AVERROR_INVALIDDATA;
> +        }
> +        av_freep(&param);
> +    } else {
> +        ParamDefinition *tmp = av_realloc_array(c->param_definitions, c->nb_param_definitions + 1,
> +                                                sizeof(*c->param_definitions));
> +        if (!tmp) {
> +            av_free(param);
> +            return AVERROR(ENOMEM);
> +        }
> +        c->param_definitions = tmp;
> +
> +        param_definition = &c->param_definitions[c->nb_param_definitions++];
> +        param_definition->param = param;
> +        param_definition->mode = !mode;
> +        param_definition->param_size = param_size;
> +    }
> +    if (out)
> +        *out = param_definition;
> +
> +    return 0;
> +}
> +
> +static int scalable_channel_layout_config(AVBSFContext *ctx, GetBitContext *gb,
> +                                          ParamDefinition *recon_gain)
> +{
> +    int nb_layers;
> +
> +    nb_layers = get_bits(gb, 3);
> +    skip_bits(gb, 5); //reserved
> +
> +    if (nb_layers > 6)
> +        return AVERROR_INVALIDDATA;
> +
> +    for (int i = 0; i < nb_layers; i++) {
> +        int output_gain_is_present_flag, recon_gain_is_present;
> +
> +        skip_bits(gb, 4); // loudspeaker_layout
> +        output_gain_is_present_flag = get_bits1(gb);
> +        recon_gain_is_present = get_bits1(gb);
> +        if (recon_gain)
> +            recon_gain->recon_gain_present_bitmask |= recon_gain_is_present << i;
> +        skip_bits(gb, 2); // reserved
> +        skip_bits(gb, 8); // substream_count
> +        skip_bits(gb, 8); // coupled_substream_count
> +        if (output_gain_is_present_flag) {
> +            skip_bits(gb, 8); // output_gain_flags & reserved
> +            skip_bits(gb, 16); // output_gain
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int audio_element_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
> +{
> +    IAMFSplitContext *const c = ctx->priv_data;
> +    GetBitContext gb;
> +    ParamDefinition *recon_gain = NULL;
> +    unsigned audio_element_type;
> +    unsigned num_substreams, num_parameters;
> +    int ret;
> +
> +    ret = init_get_bits8(&gb, buf, size);
> +    if (ret < 0)
> +        return ret;
> +
> +    get_leb(&gb); // audio_element_id
> +    audio_element_type = get_bits(&gb, 3);
> +    skip_bits(&gb, 5); // reserved
> +
> +    get_leb(&gb); // codec_config_id
> +    num_substreams = get_leb(&gb);
> +    for (unsigned i = 0; i < num_substreams; i++) {
> +        unsigned *audio_substream_id = av_dynarray2_add((void **)&c->ids, &c->nb_ids,
> +                                                        sizeof(*c->ids), NULL);
> +        if (!audio_substream_id)
> +            return AVERROR(ENOMEM);
> +
> +        *audio_substream_id = get_leb(&gb);
> +    }
> +
> +    num_parameters = get_leb(&gb);
> +    if (num_parameters && audio_element_type != 0) {
> +        av_log(ctx, AV_LOG_ERROR, "Audio Element parameter count %u is invalid"
> +                                " for Scene representations\n", num_parameters);
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    for (int i = 0; i < num_parameters; i++) {
> +        unsigned type = get_leb(&gb);
> +
> +        if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN)
> +            return AVERROR_INVALIDDATA;
> +        else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING) {
> +            ret = param_parse(ctx, &gb, type, NULL);
> +            if (ret < 0)
> +                return ret;
> +            skip_bits(&gb, 8); // default_w
> +        } else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) {
> +            ret = param_parse(ctx, &gb, type, &recon_gain);
> +            if (ret < 0)
> +                return ret;
> +        } else {
> +            unsigned param_definition_size = get_leb(&gb);
> +            skip_bits_long(&gb, param_definition_size * 8);
> +        }
> +    }
> +
> +    if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) {
> +        ret = scalable_channel_layout_config(ctx, &gb, recon_gain);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static int label_string(GetBitContext *gb)
> +{
> +    int byte;
> +
> +    do {
> +        byte = get_bits(gb, 8);
> +    } while (byte);
> +
> +    return 0;
> +}
> +
> +static int mix_presentation_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
> +{
> +    GetBitContext gb;
> +    unsigned mix_presentation_id, count_label;
> +    unsigned nb_submixes, nb_elements;
> +    int ret;
> +
> +    ret = init_get_bits8(&gb, buf, size);
> +    if (ret < 0)
> +        return ret;
> +
> +    mix_presentation_id = get_leb(&gb);
> +    count_label = get_leb(&gb);
> +
> +    for (int i = 0; i < count_label; i++) {
> +        ret = label_string(&gb);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    for (int i = 0; i < count_label; i++) {
> +        ret = label_string(&gb);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    nb_submixes = get_leb(&gb);
> +    for (int i = 0; i < nb_submixes; i++) {
> +        unsigned nb_layouts;
> +
> +        nb_elements = get_leb(&gb);
> +
> +        for (int j = 0; j < nb_elements; j++) {
> +            unsigned rendering_config_extension_size;
> +
> +            get_leb(&gb); // audio_element_id
> +            for (int k = 0; k < count_label; k++) {
> +                ret = label_string(&gb);
> +                if (ret < 0)
> +                    return ret;
> +            }
> +
> +            skip_bits(&gb, 8); // headphones_rendering_mode & reserved
> +            rendering_config_extension_size = get_leb(&gb);
> +            skip_bits_long(&gb, rendering_config_extension_size * 8);
> +
> +            ret = param_parse(ctx, &gb, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL);
> +            if (ret < 0)
> +                return ret;
> +            skip_bits(&gb, 16); // default_mix_gain
> +        }
> +
> +        ret = param_parse(ctx, &gb, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL);
> +        if (ret < 0)
> +            return ret;
> +        get_bits(&gb, 16); // default_mix_gain
> +
> +        nb_layouts = get_leb(&gb);
> +        for (int j = 0; j < nb_layouts; j++) {
> +            int info_type, layout_type;
> +            int byte = get_bits(&gb, 8);
> +
> +            layout_type = byte >> 6;
> +            if (layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS &&
> +                layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) {
> +                av_log(ctx, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n",
> +                       layout_type, mix_presentation_id);
> +                return AVERROR_INVALIDDATA;
> +            }
> +
> +            info_type = get_bits(&gb, 8);
> +            get_bits(&gb, 16); // integrated_loudness
> +            get_bits(&gb, 16); // digital_peak
> +
> +            if (info_type & 1)
> +                get_bits(&gb, 16); // true_peak
> +
> +            if (info_type & 2) {
> +                unsigned int num_anchored_loudness = get_bits(&gb, 8);
> +
> +                for (int k = 0; k < num_anchored_loudness; k++) {
> +                    get_bits(&gb, 8); // anchor_element
> +                    get_bits(&gb, 16); // anchored_loudness
> +                }
> +            }
> +
> +            if (info_type & 0xFC) {
> +                unsigned int info_type_size = get_leb(&gb);
> +                skip_bits_long(&gb, info_type_size * 8);
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int find_idx_by_id(AVBSFContext *ctx, unsigned id)
> +{
> +    IAMFSplitContext *const c = ctx->priv_data;
> +
> +    for (int i = 0; i < c->nb_ids; i++) {
> +        unsigned audio_substream_id = c->ids[i];
> +
> +        if (audio_substream_id == id)
> +            return i;
> +    }
> +
> +    av_log(ctx, AV_LOG_ERROR, "Invalid id %d\n", id);
> +    return AVERROR_INVALIDDATA;
> +}
> +
> +static int audio_frame_obu(AVBSFContext *ctx, enum IAMF_OBU_Type type, int *idx,
> +                           uint8_t *buf, int *start_pos, unsigned *size,
> +                           int id_in_bitstream)
> +{
> +    GetBitContext gb;
> +    unsigned audio_substream_id;
> +    int ret;
> +
> +    ret = init_get_bits8(&gb, buf + *start_pos, *size);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (id_in_bitstream) {
> +        int pos;
> +        audio_substream_id = get_leb(&gb);
> +        pos = get_bits_count(&gb) / 8;
> +        *start_pos += pos;
> +        *size -= pos;
> +    } else
> +        audio_substream_id = type - IAMF_OBU_IA_AUDIO_FRAME_ID0;
> +
> +    ret = find_idx_by_id(ctx, audio_substream_id);
> +    if (ret < 0)
> +        return ret;
> +
> +    *idx = ret;
> +
> +    return 0;
> +}
> +
> +static const ParamDefinition *get_param_definition(AVBSFContext *ctx, unsigned int parameter_id)
> +{
> +    const IAMFSplitContext *const c = ctx->priv_data;
> +    const ParamDefinition *param_definition = NULL;
> +
> +    for (int i = 0; i < c->nb_param_definitions; i++)
> +        if (c->param_definitions[i].param->parameter_id == parameter_id) {
> +            param_definition = &c->param_definitions[i];
> +            break;
> +        }
> +
> +    return param_definition;
> +}
> +
> +static int parameter_block_obu(AVBSFContext *ctx, uint8_t *buf, unsigned size)
> +{
> +    IAMFSplitContext *const c = ctx->priv_data;
> +    GetBitContext gb;
> +    const ParamDefinition *param_definition;
> +    const AVIAMFParamDefinition *param;
> +    AVIAMFParamDefinition *out_param = NULL;
> +    unsigned int duration, constant_subblock_duration;
> +    unsigned int nb_subblocks;
> +    unsigned int parameter_id;
> +    size_t out_param_size;
> +    int ret;
> +
> +    ret = init_get_bits8(&gb, buf, size);
> +    if (ret < 0)
> +        return ret;
> +
> +    parameter_id = get_leb(&gb);
> +
> +    param_definition = get_param_definition(ctx, parameter_id);
> +    if (!param_definition) {
> +        ret = 0;
> +        goto fail;
> +    }
> +
> +    param = param_definition->param;
> +    if (!param_definition->mode) {
> +        duration = get_leb(&gb);
> +        constant_subblock_duration = get_leb(&gb);
> +        if (constant_subblock_duration == 0)
> +            nb_subblocks = get_leb(&gb);
> +        else
> +            nb_subblocks = duration / constant_subblock_duration;
> +    } else {
> +        duration = param->duration;
> +        constant_subblock_duration = param->constant_subblock_duration;
> +        nb_subblocks = param->nb_subblocks;
> +        if (!nb_subblocks)
> +            nb_subblocks = duration / constant_subblock_duration;
> +    }
> +
> +    out_param = av_iamf_param_definition_alloc(param->type, nb_subblocks, &out_param_size);
> +    if (!out_param) {
> +        ret = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    out_param->parameter_id = param->parameter_id;
> +    out_param->type = param->type;
> +    out_param->parameter_rate = param->parameter_rate;
> +    out_param->duration = duration;
> +    out_param->constant_subblock_duration = constant_subblock_duration;
> +    out_param->nb_subblocks = nb_subblocks;
> +
> +    for (int i = 0; i < nb_subblocks; i++) {
> +        void *subblock = av_iamf_param_definition_get_subblock(out_param, i);
> +        unsigned int subblock_duration = constant_subblock_duration;
> +
> +        if (!param_definition->mode && !constant_subblock_duration)
> +            subblock_duration = get_leb(&gb);
> +
> +        switch (param->type) {
> +        case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: {
> +            AVIAMFMixGain *mix = subblock;
> +
> +            mix->animation_type = get_leb(&gb);
> +            if (mix->animation_type > AV_IAMF_ANIMATION_TYPE_BEZIER) {
> +                ret = 0;
> +                av_free(out_param);
> +                goto fail;
> +            }
> +
> +            mix->start_point_value = av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
> +            if (mix->animation_type >= AV_IAMF_ANIMATION_TYPE_LINEAR)
> +                mix->end_point_value = av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
> +            if (mix->animation_type == AV_IAMF_ANIMATION_TYPE_BEZIER) {
> +                mix->control_point_value = av_make_q(sign_extend(get_bits(&gb, 16), 16), 1 << 8);
> +                mix->control_point_relative_time = av_make_q(get_bits(&gb, 8), 1 << 8);
> +            }
> +            mix->subblock_duration = subblock_duration;
> +            break;
> +        }
> +        case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: {
> +            AVIAMFDemixingInfo *demix = subblock;
> +
> +            demix->dmixp_mode = get_bits(&gb, 3);
> +            skip_bits(&gb, 5); // reserved
> +            demix->subblock_duration = subblock_duration;
> +            break;
> +        }
> +        case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: {
> +            AVIAMFReconGain *recon = subblock;
> +
> +            for (int i = 0; i < 6; i++) {
> +                if (param_definition->recon_gain_present_bitmask & (1 << i)) {
> +                    unsigned int recon_gain_flags = get_leb(&gb);
> +                    unsigned int bitcount = 7 + 5 * !!(recon_gain_flags & 0x80);
> +                    recon_gain_flags = (recon_gain_flags & 0x7F) | ((recon_gain_flags & 0xFF00) >> 1);
> +                    for (int j = 0; j < bitcount; j++) {
> +                        if (recon_gain_flags & (1 << j))
> +                            recon->recon_gain[i][j] = get_bits(&gb, 8);
> +                    }
> +                }
> +            }
> +            recon->subblock_duration = subblock_duration;
> +            break;
> +        }
> +        default:
> +            av_assert0(0);
> +        }
> +    }
> +
> +    switch (param->type) {
> +    case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN:
> +        av_free(c->mix);
> +        c->mix = out_param;
> +        c->mix_size = out_param_size;
> +        break;
> +    case AV_IAMF_PARAMETER_DEFINITION_DEMIXING:
> +        av_free(c->demix);
> +        c->demix = out_param;
> +        c->demix_size = out_param_size;
> +        break;
> +    case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN:
> +        av_free(c->recon);
> +        c->recon = out_param;
> +        c->recon_size = out_param_size;
> +        break;
> +    default:
> +        av_assert0(0);
> +    }
> +
> +    ret = 0;
> +fail:
> +    if (ret < 0)
> +        av_free(out_param);
> +
> +    return ret;
> +}
> +
> +static int iamf_parse_obu_header(const uint8_t *buf, int buf_size,
> +                                 unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type,
> +                                 unsigned *skip_samples, unsigned *discard_padding)
> +{
> +    GetBitContext gb;
> +    int ret, extension_flag, trimming, start;
> +    unsigned size;
> +
> +    ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE));
> +    if (ret < 0)
> +        return ret;
> +
> +    *type          = get_bits(&gb, 5);
> +    /*redundant      =*/ get_bits1(&gb);
> +    trimming       = get_bits1(&gb);
> +    extension_flag = get_bits1(&gb);
> +
> +    *obu_size = get_leb(&gb);

This stuff here should not a GetBitContext at all, as basically
everything is byte-aligned (and the flags above are in known bits).

> +    if (*obu_size > INT_MAX)
> +        return AVERROR_INVALIDDATA;
> +
> +    start = get_bits_count(&gb) / 8;
> +
> +    if (trimming) {
> +        *skip_samples = get_leb(&gb); // num_samples_to_trim_at_end
> +        *discard_padding = get_leb(&gb); // num_samples_to_trim_at_start
> +    }
> +
> +    if (extension_flag) {
> +        unsigned extension_bytes = get_leb(&gb);
> +        if (extension_bytes > INT_MAX / 8)
> +            return AVERROR_INVALIDDATA;
> +        skip_bits_long(&gb, extension_bytes * 8);
> +    }
> +
> +    if (get_bits_left(&gb) < 0)
> +        return AVERROR_INVALIDDATA;
> +
> +    size = *obu_size + start;
> +    if (size > INT_MAX)
> +        return AVERROR_INVALIDDATA;
> +
> +    *obu_size -= get_bits_count(&gb) / 8 - start;
> +    *start_pos = size - *obu_size;
> +
> +    return size;
> +}
> +
> +static int iamf_stream_split_filter(AVBSFContext *ctx, AVPacket *out)
> +{
> +    IAMFSplitContext *const c = ctx->priv_data;
> +    int ret = 0;
> +
> +    if (!c->buffer_pkt->data) {
> +        ret = ff_bsf_get_packet_ref(ctx, c->buffer_pkt);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    while (1) {
> +        enum IAMF_OBU_Type type;
> +        unsigned skip_samples = 0, discard_padding = 0, obu_size;
> +        int len, start_pos, idx;
> +
> +        len = iamf_parse_obu_header(c->buffer_pkt->data,
> +                                    c->buffer_pkt->size,
> +                                    &obu_size, &start_pos, &type,
> +                                    &skip_samples, &discard_padding);
> +        if (len < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "Failed to read obu\n");
> +            ret = len;
> +            goto fail;
> +        }
> +
> +        if (type >= IAMF_OBU_IA_AUDIO_FRAME && type <= IAMF_OBU_IA_AUDIO_FRAME_ID17) {
> +            ret = audio_frame_obu(ctx, type, &idx,
> +                                  c->buffer_pkt->data, &start_pos,
> +                                  &obu_size,
> +                                  type == IAMF_OBU_IA_AUDIO_FRAME);
> +            if (ret < 0)
> +                goto fail;
> +        } else {
> +            switch (type) {
> +            case IAMF_OBU_IA_AUDIO_ELEMENT:
> +                ret = audio_element_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
> +                if (ret < 0)
> +                    goto fail;
> +                break;
> +            case IAMF_OBU_IA_MIX_PRESENTATION:
> +                ret = mix_presentation_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
> +                if (ret < 0)
> +                    goto fail;
> +                break;
> +            case IAMF_OBU_IA_PARAMETER_BLOCK:
> +                ret = parameter_block_obu(ctx, c->buffer_pkt->data + start_pos, obu_size);
> +                if (ret < 0)
> +                    goto fail;
> +                break;
> +            case IAMF_OBU_IA_SEQUENCE_HEADER:
> +                for (int i = 0; c->param_definitions && i < c->nb_param_definitions; i++)
> +                    av_free(c->param_definitions[i].param);
> +                av_freep(&c->param_definitions);
> +                av_freep(&c->ids);
> +                c->nb_param_definitions = 0;
> +                c->nb_ids = 0;
> +                // fall-through
> +            case IAMF_OBU_IA_TEMPORAL_DELIMITER:
> +                av_freep(&c->mix);
> +                av_freep(&c->demix);
> +                av_freep(&c->recon);
> +                c->mix_size = 0;
> +                c->demix_size = 0;
> +                c->recon_size = 0;
> +                break;
> +            }
> +
> +            c->buffer_pkt->data += len;
> +            c->buffer_pkt->size -= len;
> +
> +            if (!c->buffer_pkt->size) {
> +                av_packet_unref(c->buffer_pkt);
> +                ret = ff_bsf_get_packet_ref(ctx, c->buffer_pkt);
> +                if (ret < 0)
> +                    return ret;
> +            } else if (c->buffer_pkt->size < 0) {
> +                ret = AVERROR_INVALIDDATA;
> +                goto fail;
> +            }
> +            continue;
> +        }
> +
> +        if (c->buffer_pkt->size > INT_MAX - len) {
> +            ret = AVERROR_INVALIDDATA;
> +            goto fail;
> +        }
> +
> +        ret = av_packet_ref(out, c->buffer_pkt);
> +        if (ret < 0)
> +            goto fail;
> +
> +        if (skip_samples || discard_padding) {
> +            uint8_t *side_data = av_packet_new_side_data(out, AV_PKT_DATA_SKIP_SAMPLES, 10);
> +            if (!side_data)
> +                return AVERROR(ENOMEM);
> +            AV_WL32(side_data, skip_samples);
> +            AV_WL32(side_data + 4, discard_padding);
> +        }
> +        if (c->mix) {
> +            uint8_t *side_data = av_packet_new_side_data(out, AV_PKT_DATA_IAMF_MIX_GAIN_PARAM, c->mix_size);
> +            if (!side_data)
> +                return AVERROR(ENOMEM);
> +            memcpy(side_data, c->mix, c->mix_size);
> +        }
> +        if (c->demix) {
> +            uint8_t *side_data = av_packet_new_side_data(out, AV_PKT_DATA_IAMF_DEMIXING_INFO_PARAM, c->demix_size);
> +            if (!side_data)
> +                return AVERROR(ENOMEM);
> +            memcpy(side_data, c->demix, c->demix_size);
> +        }
> +        if (c->recon) {
> +            uint8_t *side_data = av_packet_new_side_data(out, AV_PKT_DATA_IAMF_RECON_GAIN_INFO_PARAM, c->recon_size);
> +            if (!side_data)
> +                return AVERROR(ENOMEM);
> +            memcpy(side_data, c->recon, c->recon_size);
> +        }
> +
> +        out->data += start_pos;
> +        out->size = obu_size;
> +        out->stream_index = idx + c->first_index;
> +
> +        c->buffer_pkt->data += len;
> +        c->buffer_pkt->size -= len;
> +
> +        if (!c->buffer_pkt->size)
> +            av_packet_unref(c->buffer_pkt);
> +        else if (c->buffer_pkt->size < 0) {
> +            ret = AVERROR_INVALIDDATA;
> +            goto fail;
> +        }
> +
> +        return 0;
> +    }
> +
> +fail:
> +    if (ret < 0) {
> +        av_packet_unref(out);
> +        av_packet_unref(c->buffer_pkt);
> +    }
> +
> +    return ret;
> +}
> +
> +static int iamf_stream_split_init(AVBSFContext *ctx)
> +{
> +    IAMFSplitContext *const c = ctx->priv_data;
> +
> +    c->buffer_pkt = av_packet_alloc();
> +    if (!c->buffer_pkt)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
> +static void iamf_stream_split_flush(AVBSFContext *ctx)
> +{
> +    IAMFSplitContext *const c = ctx->priv_data;
> +
> +    if (c->buffer_pkt)
> +        av_packet_unref(c->buffer_pkt);
> +
> +    av_freep(&c->mix);
> +    av_freep(&c->demix);
> +    av_freep(&c->recon);
> +    c->mix_size = 0;
> +    c->demix_size = 0;
> +    c->recon_size = 0;
> +}
> +
> +static void iamf_stream_split_close(AVBSFContext *ctx)
> +{
> +    IAMFSplitContext *const c = ctx->priv_data;
> +
> +    iamf_stream_split_flush(ctx);
> +    av_packet_free(&c->buffer_pkt);
> +
> +    for (int i = 0; c->param_definitions && i < c->nb_param_definitions; i++)
> +        av_free(c->param_definitions[i].param);
> +    av_freep(&c->param_definitions);
> +    c->nb_param_definitions = 0;
> +
> +    av_freep(&c->ids);
> +    c->nb_ids = 0;
> +}
> +
> +#define OFFSET(x) offsetof(IAMFSplitContext, x)
> +#define FLAGS (AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_BSF_PARAM)
> +static const AVOption iamf_stream_split_options[] = {
> +    { "first_index", "First index to set stream index in output packets",
> +        OFFSET(first_index), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, FLAGS },
> +    { NULL }
> +};
> +
> +static const AVClass iamf_stream_split_class = {
> +    .class_name = "iamf_stream_split_bsf",
> +    .item_name  = av_default_item_name,
> +    .option     = iamf_stream_split_options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +static const enum AVCodecID iamf_stream_split_codec_ids[] = {
> +    AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE,
> +    AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S24BE,
> +    AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE,
> +    AV_CODEC_ID_OPUS,      AV_CODEC_ID_AAC,
> +    AV_CODEC_ID_FLAC,      AV_CODEC_ID_NONE,
> +};
> +
> +const FFBitStreamFilter ff_iamf_stream_split_bsf = {
> +    .p.name         = "iamf_stream_split",
> +    .p.codec_ids    = iamf_stream_split_codec_ids,
> +    .p.priv_class   = &iamf_stream_split_class,
> +    .priv_data_size = sizeof(IAMFSplitContext),
> +    .init           = iamf_stream_split_init,
> +    .flush          = iamf_stream_split_flush,
> +    .close          = iamf_stream_split_close,
> +    .filter         = iamf_stream_split_filter,
> +};

This needs to add documentation for what this BSF is actually supposed
to do. Right now it seems crazy: It parses the packet's data and expects
to find OBU headers, although the input data is supposed to be PCM,
Opus, AAC or Flac.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".