Re: [FFmpeg-devel] [PATCH v8 1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats

From: "Marvin Scholz" <epirat07@gmail.com>
To: "FFmpeg development discussions and patches" <ffmpeg-devel@ffmpeg.org>
Cc: Thilo Borgmann <thilo.borgmann@mail.de>, Aman Karmani <ffmpeg@tmm1.net>
Subject: Re: [FFmpeg-devel] [PATCH v8 1/3] libavdevice/avfoundation.m: use AudioConvert, extend supported formats
Date: Wed, 05 Jan 2022 15:51:54 +0100
Message-ID: <4F2D812A-796E-45F3-A587-962A5C6D55D3@gmail.com> (raw)
In-Reply-To: <44D555B2-4959-47EA-8210-917ACB4652EE@rastageeks.org>

On 31 Dec 2021, at 18:42, Romain Beauxis wrote:

> * Implement support for AudioConverter
> * Switch to AudioConverter's API to convert unsupported PCM
>   formats (non-interleaved, non-packed) to supported formats
> * Minimize data copy.
>
> This fixes: https://trac.ffmpeg.org/ticket/9502
>
> API ref:
> https://developer.apple.com/documentation/audiotoolbox/audio_converter_services
>
> Signed-off-by: Romain Beauxis <toots@rastageeks.org>
> —
> [Sorry for the noise but an issue came up with the previous set]
>
> This is the first patch of a series of 3 that fix, cleanup and enhance 
> the
> avfoundation implementation for libavdevice.
>
> These patches come from an actual user-facing application relying on
> libavdevice’s implementation of avfoundation audio input. Without 
> them,
> Avfoundation is practically unusable as it will:
> * Refuse to process certain specific audio input format that are 
> actually
> returned by the OS for some users (packed PCM audio)
> * Drop audio frames, resulting in corrupted audio input. This might 
> have been
> unnoticed with video frames but this makes avfoundation essentially 
> unusable
> for audio.
>
> The patches are now being included in our production build so they are 
> tested
> and usable in production.
>
> Changelog for this patch:
> * v2: None
> * v3: None
> * v4: None
> * v5: Fix indentation/wrapping
> * v6: None
> * v7: Removed use of kAudioConverterPropertyCalculateOutputBufferSize
>  to calculate output buffer size. The calculation is trivial and this 
> call was
>  randomly failing for no reason
> * v8: None
>

The patchset fails to apply for me:

Applying: libavdevice/avfoundation.m: use AudioConvert, extend supported 
formats
error: corrupt patch at line 191
Patch failed at 0001 libavdevice/avfoundation.m: use AudioConvert, 
extend supported formats

>
> libavdevice/avfoundation.m | 255 +++++++++++++++++++++----------------
> 1 file changed, 145 insertions(+), 110 deletions(-)
>
> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m
> index 0cd6e646d5..738cd93375 100644
> --- a/libavdevice/avfoundation.m
> +++ b/libavdevice/avfoundation.m
> @@ -111,16 +111,11 @@
>
>     int             num_video_devices;
>
> -    int             audio_channels;
> -    int             audio_bits_per_sample;
> -    int             audio_float;
> -    int             audio_be;
> -    int             audio_signed_integer;
> -    int             audio_packed;
> -    int             audio_non_interleaved;
> -
> -    int32_t         *audio_buffer;
> -    int             audio_buffer_size;
> +    UInt32            audio_buffers;
> +    UInt32            audio_channels;
> +    UInt32            input_bytes_per_sample;
> +    UInt32            output_bytes_per_sample;
> +    AudioConverterRef audio_converter;
>
>     enum AVPixelFormat pixel_format;
>
> @@ -299,7 +294,10 @@ static void destroy_context(AVFContext* ctx)
>     ctx->avf_delegate    = NULL;
>     ctx->avf_audio_delegate = NULL;
>
> -    av_freep(&ctx->audio_buffer);
> +    if (ctx->audio_converter) {
> +      AudioConverterDispose(ctx->audio_converter);
> +      ctx->audio_converter = NULL;
> +    }
>
>     pthread_mutex_destroy(&ctx->frame_lock);
>
> @@ -673,6 +671,10 @@ static int get_audio_config(AVFormatContext *s)
>     AVFContext *ctx = (AVFContext*)s->priv_data;
>     CMFormatDescriptionRef format_desc;
>     AVStream* stream = avformat_new_stream(s, NULL);
> +    AudioStreamBasicDescription output_format = {0};
> +    int audio_bits_per_sample, audio_float, audio_be;
> +    int audio_signed_integer, audio_packed, audio_non_interleaved;
> +    int must_convert = 0;
>
>     if (!stream) {
>         return 1;
> @@ -690,60 +692,97 @@ static int get_audio_config(AVFormatContext *s)
>     avpriv_set_pts_info(stream, 64, 1, avf_time_base);
>
>     format_desc = 
> CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
> -    const AudioStreamBasicDescription *basic_desc = 
> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
> +    const AudioStreamBasicDescription *input_format = 
> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
>
> -    if (!basic_desc) {
> +    if (!input_format) {
>         unlock_frames(ctx);
>         av_log(s, AV_LOG_ERROR, "audio format not available\n");
>         return 1;
>     }
>
> +    if (input_format->mFormatID != kAudioFormatLinearPCM) {
> +        unlock_frames(ctx);
> +        av_log(s, AV_LOG_ERROR, "only PCM audio format are supported 
> at the moment\n");
> +        return 1;
> +    }
> +
>     stream->codecpar->codec_type     = AVMEDIA_TYPE_AUDIO;
> -    stream->codecpar->sample_rate    = basic_desc->mSampleRate;
> -    stream->codecpar->channels       = basic_desc->mChannelsPerFrame;
> +    stream->codecpar->sample_rate    = input_format->mSampleRate;
> +    stream->codecpar->channels       = 
> input_format->mChannelsPerFrame;
>     stream->codecpar->channel_layout = 
> av_get_default_channel_layout(stream->codecpar->channels);
>
> -    ctx->audio_channels        = basic_desc->mChannelsPerFrame;
> -    ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel;
> -    ctx->audio_float           = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsFloat;
> -    ctx->audio_be              = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsBigEndian;
> -    ctx->audio_signed_integer  = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsSignedInteger;
> -    ctx->audio_packed          = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsPacked;
> -    ctx->audio_non_interleaved = basic_desc->mFormatFlags & 
> kAudioFormatFlagIsNonInterleaved;
> -
> -    if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> -        ctx->audio_float &&
> -        ctx->audio_bits_per_sample == 32 &&
> -        ctx->audio_packed) {
> -        stream->codecpar->codec_id = ctx->audio_be ? 
> AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> -        ctx->audio_signed_integer &&
> -        ctx->audio_bits_per_sample == 16 &&
> -        ctx->audio_packed) {
> -        stream->codecpar->codec_id = ctx->audio_be ? 
> AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> -        ctx->audio_signed_integer &&
> -        ctx->audio_bits_per_sample == 24 &&
> -        ctx->audio_packed) {
> -        stream->codecpar->codec_id = ctx->audio_be ? 
> AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
> -    } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
> -        ctx->audio_signed_integer &&
> -        ctx->audio_bits_per_sample == 32 &&
> -        ctx->audio_packed) {
> -        stream->codecpar->codec_id = ctx->audio_be ? 
> AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
> +    audio_bits_per_sample = input_format->mBitsPerChannel;
> +    audio_float           = input_format->mFormatFlags & 
> kAudioFormatFlagIsFloat;
> +    audio_be              = input_format->mFormatFlags & 
> kAudioFormatFlagIsBigEndian;
> +    audio_signed_integer  = input_format->mFormatFlags & 
> kAudioFormatFlagIsSignedInteger;
> +    audio_packed          = input_format->mFormatFlags & 
> kAudioFormatFlagIsPacked;
> +    audio_non_interleaved = input_format->mFormatFlags & 
> kAudioFormatFlagIsNonInterleaved;
> +
> +    ctx->input_bytes_per_sample  = input_format->mBitsPerChannel >> 
> 3;
> +    ctx->output_bytes_per_sample = ctx->input_bytes_per_sample;
> +    ctx->audio_channels          = input_format->mChannelsPerFrame;
> +
> +    if (audio_non_interleaved) {
> +        ctx->audio_buffers = input_format->mChannelsPerFrame;
>     } else {
> -        unlock_frames(ctx);
> -        av_log(s, AV_LOG_ERROR, "audio format is not supported\n");
> -        return 1;
> +        ctx->audio_buffers = 1;
> +    }
> +
> +    if (audio_non_interleaved || !audio_packed) {
> +      must_convert = 1;
> +    }
> +
> +    output_format.mBitsPerChannel   = input_format->mBitsPerChannel;
> +    output_format.mChannelsPerFrame = ctx->audio_channels;
> +    output_format.mFramesPerPacket  = 1;
> +    output_format.mBytesPerFrame    = output_format.mChannelsPerFrame 
> * ctx->input_bytes_per_sample;
> +    output_format.mBytesPerPacket   = output_format.mFramesPerPacket 
> * output_format.mBytesPerFrame;
> +    output_format.mFormatFlags      = kAudioFormatFlagIsPacked | 
> audio_be;
> +    output_format.mFormatID         = kAudioFormatLinearPCM;
> +    output_format.mReserved         = 0;
> +    output_format.mSampleRate       = input_format->mSampleRate;
> +
> +    if (audio_float &&
> +        audio_bits_per_sample == 32) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F32BE 
> : AV_CODEC_ID_PCM_F32LE;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
> +    } else if (audio_float &&
> +        audio_bits_per_sample == 64) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_F64BE 
> : AV_CODEC_ID_PCM_F64LE;
> +        output_format.mFormatFlags |= kAudioFormatFlagIsFloat;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 8) {
> +        stream->codecpar->codec_id = AV_CODEC_ID_PCM_S8;
> +        output_format.mFormatFlags |= 
> kAudioFormatFlagIsSignedInteger;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 16) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S16BE 
> : AV_CODEC_ID_PCM_S16LE;
> +        output_format.mFormatFlags |= 
> kAudioFormatFlagIsSignedInteger;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 24) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S24BE 
> : AV_CODEC_ID_PCM_S24LE;
> +        output_format.mFormatFlags |= 
> kAudioFormatFlagIsSignedInteger;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 32) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE 
> : AV_CODEC_ID_PCM_S32LE;
> +        output_format.mFormatFlags |= 
> kAudioFormatFlagIsSignedInteger;
> +    } else if (audio_signed_integer &&
> +        audio_bits_per_sample == 64) {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S64BE 
> : AV_CODEC_ID_PCM_S64LE;
> +        output_format.mFormatFlags |= 
> kAudioFormatFlagIsSignedInteger;
> +    } else {
> +        stream->codecpar->codec_id = audio_be ? AV_CODEC_ID_PCM_S32BE 
> : AV_CODEC_ID_PCM_S32LE;
> +        ctx->output_bytes_per_sample = 4;
> +        output_format.mBitsPerChannel = 32;
> +        output_format.mFormatFlags |= 
> kAudioFormatFlagIsSignedInteger;
> +        must_convert = 1;
>     }
>
> -    if (ctx->audio_non_interleaved) {
> -        CMBlockBufferRef block_buffer = 
> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
> -        ctx->audio_buffer_size        = 
> CMBlockBufferGetDataLength(block_buffer);
> -        ctx->audio_buffer             = 
> av_malloc(ctx->audio_buffer_size);
> -        if (!ctx->audio_buffer) {
> +    if (must_convert) {
> +        OSStatus ret = AudioConverterNew(input_format, 
> &output_format, &ctx->audio_converter);
> +        if (ret != noErr) {
>             unlock_frames(ctx);
> -            av_log(s, AV_LOG_ERROR, "error allocating audio 
> buffer\n");
> +            av_log(s, AV_LOG_ERROR, "Error while allocating audio 
> converter\n");
>             return 1;
>         }
>     }
> @@ -1048,6 +1087,7 @@ static int copy_cvpixelbuffer(AVFormatContext 
> *s,
>
> static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
> {
> +    OSStatus ret;
>     AVFContext* ctx = (AVFContext*)s->priv_data;
>
>     do {
> @@ -1091,7 +1131,7 @@ static int avf_read_packet(AVFormatContext *s, 
> AVPacket *pkt)
>                 status = copy_cvpixelbuffer(s, image_buffer, pkt);
>             } else {
>                 status = 0;
> -                OSStatus ret = 
> CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
> +                ret = CMBlockBufferCopyDataBytes(block_buffer, 0, 
> pkt->size, pkt->data);
>                 if (ret != kCMBlockBufferNoErr) {
>                     status = AVERROR(EIO);
>                 }
> @@ -1105,21 +1145,60 @@ static int avf_read_packet(AVFormatContext *s, 
> AVPacket *pkt)
>             }
>         } else if (ctx->current_audio_frame != nil) {
>             CMBlockBufferRef block_buffer = 
> CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
> -            int block_buffer_size         = 
> CMBlockBufferGetDataLength(block_buffer);
>
> -            if (!block_buffer || !block_buffer_size) {
> -                unlock_frames(ctx);
> -                return AVERROR(EIO);
> -            }
> +            size_t input_size = 
> CMBlockBufferGetDataLength(block_buffer);
> +            int buffer_size = input_size / ctx->audio_buffers;
> +            int nb_samples = input_size / (ctx->audio_channels * 
> ctx->input_bytes_per_sample);
> +            int output_size = nb_samples * 
> ctx->output_bytes_per_sample * ctx->audio_channels;
>
> -            if (ctx->audio_non_interleaved && block_buffer_size > 
> ctx->audio_buffer_size) {
> -                unlock_frames(ctx);
> -                return AVERROR_BUFFER_TOO_SMALL;
> +            status = av_new_packet(pkt, output_size);
> +            if (status < 0) {
> +                CFRelease(audio_frame);
> +                return status;
>             }
>
> -            if (av_new_packet(pkt, block_buffer_size) < 0) {
> -                unlock_frames(ctx);
> -                return AVERROR(EIO);
> +            if (ctx->audio_converter) {
> +                size_t input_buffer_size = offsetof(AudioBufferList, 
> mBuffers[0]) + (sizeof(AudioBuffer) * ctx->audio_buffers);
> +                AudioBufferList *input_buffer = 
> av_malloc(input_buffer_size);
> +
> +                input_buffer->mNumberBuffers = ctx->audio_buffers;
> +
> +                for (int c = 0; c < ctx->audio_buffers; c++) {
> +                    input_buffer->mBuffers[c].mNumberChannels = 1;
> +
> +                    ret = CMBlockBufferGetDataPointer(block_buffer, c 
> * buffer_size, (size_t *)&input_buffer->mBuffers[c].mDataByteSize, 
> NULL, (void *)&input_buffer->mBuffers[c].mData);
> +
> +                    if (ret != kCMBlockBufferNoErr) {
> +                        av_free(input_buffer);
> +                        unlock_frames(ctx);
> +                        return AVERROR(EIO);
> +                    }
> +                }
> +
> +                AudioBufferList output_buffer = {
> +                   .mNumberBuffers = 1,
> +                   .mBuffers[0]    = {
> +                       .mNumberChannels = ctx->audio_channels,
> +                       .mDataByteSize   = pkt->size,
> +                       .mData           = pkt->data
> +                   }
> +                };
> +
> +                ret = 
> AudioConverterConvertComplexBuffer(ctx->audio_converter, nb_samples, 
> input_buffer, &output_buffer);
> +                av_free(input_buffer);
> +
> +                if (ret != noErr) {
> +                    unlock_frames(ctx);
> +                    return AVERROR(EIO);
> +                }
> +
> +                pkt->size = output_buffer.mBuffers[0].mDataByteSize;
> +            } else {
> +                 ret = CMBlockBufferCopyDataBytes(block_buffer, 0, 
> pkt->size, pkt->data);
> +                 if (ret != kCMBlockBufferNoErr) {
> +                     unlock_frames(ctx);
> +                     return AVERROR(EIO);
> +                 }
>             }
>
>             CMItemCount count;
> @@ -1133,54 +1212,10 @@ static int avf_read_packet(AVFormatContext *s, 
> AVPacket *pkt)
>             pkt->stream_index  = ctx->audio_stream_index;
>             pkt->flags        |= AV_PKT_FLAG_KEY;
>
> -            if (ctx->audio_non_interleaved) {
> -                int sample, c, shift, num_samples;
> -
> -                OSStatus ret = 
> CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, 
> ctx->audio_buffer);
> -                if (ret != kCMBlockBufferNoErr) {
> -                    unlock_frames(ctx);
> -                    return AVERROR(EIO);
> -                }
> -
> -                num_samples = pkt->size / (ctx->audio_channels * 
> (ctx->audio_bits_per_sample >> 3));
> -
> -                // transform decoded frame into output format
> -                #define INTERLEAVE_OUTPUT(bps)                        
>                  \
> -                {                                                     
>                  \
> -                    int##bps##_t **src;                               
>                  \
> -                    int##bps##_t *dest;                               
>                  \
> -                    src = av_malloc(ctx->audio_channels * 
> sizeof(int##bps##_t*));      \
> -                    if (!src) {                                       
>                  \
> -                        unlock_frames(ctx);                           
>                  \
> -                        return AVERROR(EIO);                          
>                  \
> -                    }                                                 
>                  \
> -                                                                      
>                  \
> -                    for (c = 0; c < ctx->audio_channels; c++) {       
>                  \
> -                        src[c] = ((int##bps##_t*)ctx->audio_buffer) + 
> c * num_samples; \
> -                    }                                                 
>                  \
> -                    dest  = (int##bps##_t*)pkt->data;                 
>                  \
> -                    shift = bps - ctx->audio_bits_per_sample;         
>                  \
> -                    for (sample = 0; sample < num_samples; sample++)  
>                  \
> -                        for (c = 0; c < ctx->audio_channels; c++)     
>                  \
> -                            *dest++ = src[c][sample] << shift;        
>                  \
> -                    av_freep(&src);                                   
>                  \
> -                }
> -
> -                if (ctx->audio_bits_per_sample <= 16) {
> -                    INTERLEAVE_OUTPUT(16)
> -                } else {
> -                    INTERLEAVE_OUTPUT(32)
> -                }
> -            } else {
> -                OSStatus ret = 
> CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
> -                if (ret != kCMBlockBufferNoErr) {
> -                    unlock_frames(ctx);
> -                    return AVERROR(EIO);
> -                }
> -            }
> -
>             CFRelease(ctx->current_audio_frame);
>             ctx->current_audio_frame = nil;
> +
> +            unlock_frames(ctx);
>         } else {
>             pkt->data = NULL;
>             unlock_frames(ctx);
> -- 
> 2.32.0 (Apple Git-132)
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".