From 8a4e942a001ae49dc052899f331ed43abf954dda Mon Sep 17 00:00:00 2001 From: paolo Date: Sat, 18 Jun 2022 13:53:55 +0200 Subject: [PATCH] doc/examples/muxing: code rewrite with improved readability and fixed issues Improved readability with functions that have clearer prototypes and that don't mix logically unrelated blocks of code Fixed issues in case of unsupported extensions Fixed memory leaks on errors, which are now properly propagated to the main() function Fixed issue on raw images output fprintf() replaced with av_log() Input A/V parameters exposed in the main() function and easier to customize --- doc/examples/muxing.c | 905 +++++++++++++++++++----------------------- 1 file changed, 406 insertions(+), 499 deletions(-) diff --git a/doc/examples/muxing.c b/doc/examples/muxing.c index 3acb778322..04739995d8 100644 --- a/doc/examples/muxing.c +++ b/doc/examples/muxing.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2022 Paolo Prete (paolopr976 at gmail.com) after Fabrice Bellard * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -24,625 +24,532 @@ * @file * libavformat API example. * - * Output a media file in any supported libavformat format. The default + * Output a media file in a set of supported libavformat formats. The default * codecs are used. * @example muxing.c */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include #include #include -#include +#include #include +#include -#define STREAM_DURATION 10.0 -#define STREAM_FRAME_RATE 25 /* 25 images/s */ -#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */ - -#define SCALE_FLAGS SWS_BICUBIC +#define VIDEO_FRAME_RATE 25 /* 25 images/s */ +#define VIDEO_SCALE_FLAGS SWS_BICUBIC +#define STREAM_DURATION 10.0 /* 10 seconds */ -// a wrapper around a single output AVStream -typedef struct OutputStream { - AVStream *st; - AVCodecContext *enc; +static void log_error(const char *s, int *num) +{ + if (num) + av_log(NULL, AV_LOG_ERROR, "%s (error '%s')\n", s, av_err2str(*num)); + else + av_log(NULL, AV_LOG_ERROR, "%s\n", s); +} - /* pts of the next frame that will be generated */ - int64_t next_pts; - int samples_count; +static int mux_encoded_pkt(AVPacket *out_pkt, AVFormatContext *out_fmt_ctx, + enum AVMediaType type) +{ + int ret; + AVRational enc_time_base, str_time_base; - AVFrame *frame; - AVFrame *tmp_frame; + if (out_fmt_ctx->streams[0]->codecpar->codec_type == type) + out_pkt->stream_index = 0; + else if ((out_fmt_ctx->nb_streams > 1) && (type == AVMEDIA_TYPE_VIDEO)) + out_pkt->stream_index = 1; + str_time_base = out_fmt_ctx->streams[out_pkt->stream_index]->time_base; - AVPacket *tmp_pkt; + if (type == AVMEDIA_TYPE_AUDIO) + enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[0]; + else + enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[1]; - float t, tincr, tincr2; + av_packet_rescale_ts(out_pkt, enc_time_base, str_time_base); - struct SwsContext *sws_ctx; - struct SwrContext *swr_ctx; -} OutputStream; + av_log(NULL, AV_LOG_INFO, "stream_index=%d, size=%d, pts_time=%s\n", + out_pkt->stream_index, + out_pkt->size, av_ts2timestr(out_pkt->pts, &str_time_base)); -static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt) -{ - AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base; + if ((ret = av_interleaved_write_frame(out_fmt_ctx, out_pkt)) < 0) + log_error("Error calling av_interleaved_write_frame()", &ret); - printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n", - av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base), - av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base), - av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base), - pkt->stream_index); + return ret; } -static int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c, - AVStream *st, AVFrame *frame, AVPacket *pkt) +static int is_extension_supported(const char *filename) { - int ret; + const char *extensions[] = {".aac", ".avi", ".bmp", ".jpeg", ".mka", + ".mkv", ".mov", ".mp4", ".flv", ".ts"}; + int i, size = sizeof(extensions) / sizeof(extensions[0]); + char *dot = strrchr(filename, '.'); - // send the frame to the encoder - ret = avcodec_send_frame(c, frame); - if (ret < 0) { - fprintf(stderr, "Error sending a frame to the encoder: %s\n", - av_err2str(ret)); - exit(1); + for (i = 0; i < size; i++) { + if (dot && !strcmp(dot, extensions[i])) + return 1; } - while (ret >= 0) { - ret = avcodec_receive_packet(c, pkt); - if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) - break; - else if (ret < 0) { - fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret)); - exit(1); - } + log_error("File extension not supported", NULL); + av_log(NULL, AV_LOG_WARNING, "Please choose one of the following extensions: "); + for (i = 0; i < size - 1; i++) + av_log(NULL, AV_LOG_WARNING, "%s, ", extensions[i]); + av_log(NULL, AV_LOG_WARNING, "%s\n", extensions[size-1]); - /* rescale output packet timestamp values from codec to stream timebase */ - av_packet_rescale_ts(pkt, c->time_base, st->time_base); - pkt->stream_index = st->index; - - /* Write the compressed frame to the media file. */ - log_packet(fmt_ctx, pkt); - ret = av_interleaved_write_frame(fmt_ctx, pkt); - /* pkt is now blank (av_interleaved_write_frame() takes ownership of - * its contents and resets pkt), so that no unreferencing is necessary. - * This would be different if one used av_write_frame(). */ - if (ret < 0) { - fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret)); - exit(1); - } - } - - return ret == AVERROR_EOF ? 1 : 0; + return 0; } -/* Add an output stream. */ -static void add_stream(OutputStream *ost, AVFormatContext *oc, - const AVCodec **codec, - enum AVCodecID codec_id) +static int get_default_enc_params(AVCodecParameters *params, + const char *fname, enum AVMediaType type) { - AVCodecContext *c; - int i; - - /* find the encoder */ - *codec = avcodec_find_encoder(codec_id); - if (!(*codec)) { - fprintf(stderr, "Could not find encoder for '%s'\n", - avcodec_get_name(codec_id)); - exit(1); + AVFormatContext *tmp_fctx; + enum AVCodecID id; + const AVCodec *c; + int ret = 0; + + if ((ret = avformat_alloc_output_context2(&tmp_fctx, NULL, NULL, fname)) < 0) { + log_error("Could not get default encoder", &ret); + return AVERROR_EXIT; } - ost->tmp_pkt = av_packet_alloc(); - if (!ost->tmp_pkt) { - fprintf(stderr, "Could not allocate AVPacket\n"); - exit(1); - } + id = (type == AVMEDIA_TYPE_AUDIO) ? tmp_fctx->oformat->audio_codec : + tmp_fctx->oformat->video_codec; - ost->st = avformat_new_stream(oc, NULL); - if (!ost->st) { - fprintf(stderr, "Could not allocate stream\n"); - exit(1); - } - ost->st->id = oc->nb_streams-1; - c = avcodec_alloc_context3(*codec); - if (!c) { - fprintf(stderr, "Could not alloc an encoding context\n"); - exit(1); + if (!(c = avcodec_find_encoder(id))) { + avformat_free_context(tmp_fctx); + return ret; } - ost->enc = c; - - switch ((*codec)->type) { - case AVMEDIA_TYPE_AUDIO: - c->sample_fmt = (*codec)->sample_fmts ? - (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP; - c->bit_rate = 64000; - c->sample_rate = 44100; - if ((*codec)->supported_samplerates) { - c->sample_rate = (*codec)->supported_samplerates[0]; - for (i = 0; (*codec)->supported_samplerates[i]; i++) { - if ((*codec)->supported_samplerates[i] == 44100) - c->sample_rate = 44100; - } - } - av_channel_layout_copy(&c->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO); - ost->st->time_base = (AVRational){ 1, c->sample_rate }; - break; - - case AVMEDIA_TYPE_VIDEO: - c->codec_id = codec_id; - - c->bit_rate = 400000; - /* Resolution must be a multiple of two. */ - c->width = 352; - c->height = 288; - /* timebase: This is the fundamental unit of time (in seconds) in terms - * of which frame timestamps are represented. For fixed-fps content, - * timebase should be 1/framerate and timestamp increments should be - * identical to 1. */ - ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE }; - c->time_base = ost->st->time_base; - - c->gop_size = 12; /* emit one intra frame every twelve frames at most */ - c->pix_fmt = STREAM_PIX_FMT; - if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) { - /* just for testing, we also add B-frames */ - c->max_b_frames = 2; - } - if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) { - /* Needed to avoid using macroblocks in which some coeffs overflow. - * This does not happen with normal video, it just happens here as - * the motion of the chroma plane does not match the luma plane. */ - c->mb_decision = 2; - } - break; - default: - break; + params->codec_type = c->type; + params->codec_id = c-> id; + if (c->type == AVMEDIA_TYPE_AUDIO) { + params->format = c->sample_fmts ? + c->sample_fmts[0] : AV_SAMPLE_FMT_FLTP; + params->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO; + params->sample_rate = c->supported_samplerates ? + c->supported_samplerates[0] : 44100; + } else if (c->type == AVMEDIA_TYPE_VIDEO) { + params->format = c->pix_fmts ? c->pix_fmts[0] : AV_PIX_FMT_YUV420P; } + avformat_free_context(tmp_fctx); - /* Some formats want stream headers to be separate. */ - if (oc->oformat->flags & AVFMT_GLOBALHEADER) - c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + return ret; } -/**************************************************************/ -/* audio output */ - -static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt, - const AVChannelLayout *channel_layout, - int sample_rate, int nb_samples) +static int init_encoder(AVCodecContext **enc_ctx, AVCodecParameters *params) { - AVFrame *frame = av_frame_alloc(); + const AVCodec *codec = NULL; int ret; - if (!frame) { - fprintf(stderr, "Error allocating an audio frame\n"); - exit(1); + codec = avcodec_find_encoder(params->codec_id); + *enc_ctx = avcodec_alloc_context3(codec); + if (!codec) { + log_error("Could not allocate the encoding context", NULL); + return AVERROR_EXIT; } - frame->format = sample_fmt; - av_channel_layout_copy(&frame->ch_layout, channel_layout); - frame->sample_rate = sample_rate; - frame->nb_samples = nb_samples; - - if (nb_samples) { - ret = av_frame_get_buffer(frame, 0); - if (ret < 0) { - fprintf(stderr, "Error allocating an audio buffer\n"); - exit(1); - } + (*enc_ctx)->codec_id = params->codec_id; + (*enc_ctx)->codec_type = params->codec_type; + if (params->codec_type == AVMEDIA_TYPE_AUDIO) { + (*enc_ctx)->sample_fmt = params->format; + (*enc_ctx)->sample_rate = params->sample_rate; + (*enc_ctx)->time_base = (AVRational){1, params->sample_rate}; + (*enc_ctx)->ch_layout = params->ch_layout; + } else if (params->codec_type == AVMEDIA_TYPE_VIDEO) { + (*enc_ctx)->width = params->width; + (*enc_ctx)->height = params->height; + (*enc_ctx)->time_base = (AVRational){ 1, VIDEO_FRAME_RATE }; + (*enc_ctx)->gop_size = 12; + (*enc_ctx)->pix_fmt = params->format; } - return frame; + if ((ret = avcodec_open2(*enc_ctx, codec, NULL)) < 0) { + log_error("Could not open input codec", &ret); + return ret; + } else + return 0; } -static void open_audio(AVFormatContext *oc, const AVCodec *codec, - OutputStream *ost, AVDictionary *opt_arg) +static int init_avframe(AVFrame **frame, AVCodecParameters *params) { - AVCodecContext *c; - int nb_samples; int ret; - AVDictionary *opt = NULL; - c = ost->enc; + if (!(*frame = av_frame_alloc())) { + log_error("Could not allocate AVFrame", NULL); + return AVERROR(ENOMEM); + } - /* open it */ - av_dict_copy(&opt, opt_arg, 0); - ret = avcodec_open2(c, codec, &opt); - av_dict_free(&opt); - if (ret < 0) { - fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret)); - exit(1); + (*frame)->opaque = ¶ms->codec_type; + if (params->codec_type == AVMEDIA_TYPE_AUDIO) { + (*frame)->nb_samples = params->frame_size; + (*frame)->sample_rate = params->sample_rate; + (*frame)->format = params->format; + (*frame)->ch_layout = params->ch_layout; + } else { + (*frame)->width = params->width; + (*frame)->height = params->height; + (*frame)->format = params->format; } - /* init signal generator */ - ost->t = 0; - ost->tincr = 2 * M_PI * 110.0 / c->sample_rate; - /* increment frequency by 110 Hz per second */ - ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate; + /* Allocate the frame's data buffer */ + if ((ret = av_frame_get_buffer(*frame, 0)) < 0) { + log_error("Could not allocate buffer for AVFrame", &ret); + return AVERROR(ENOMEM); + } else + return 0; +} - if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) - nb_samples = 10000; - else - nb_samples = c->frame_size; +static int init_audio_convert(struct SwrContext **ctx, AVCodecParameters *in_params, + AVCodecParameters *out_params) +{ + swr_alloc_set_opts2(ctx, + &(out_params->ch_layout), + out_params->format, out_params->sample_rate, + &(in_params->ch_layout), + in_params->format, in_params->sample_rate, + 0, NULL); + if (!*ctx) { + log_error("Could not allocate resample context", NULL); + return AVERROR(ENOMEM); + } else + return 0; +} - ost->frame = alloc_audio_frame(c->sample_fmt, &c->ch_layout, - c->sample_rate, nb_samples); - ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, &c->ch_layout, - c->sample_rate, nb_samples); +static int init_video_convert(struct SwsContext **ctx, AVCodecParameters *in_params, + AVCodecParameters *out_params) +{ + *ctx = sws_getContext(in_params->width, in_params->height, + in_params->format, + out_params->width, out_params->height, + out_params->codec_id == out_params->format, + VIDEO_SCALE_FLAGS, NULL, NULL, NULL); + if (!*ctx) { + log_error("Could not allocate scale context", NULL); + return AVERROR(ENOMEM); + } else + return 0; +} - /* copy the stream parameters to the muxer */ - ret = avcodec_parameters_from_context(ost->st->codecpar, c); - if (ret < 0) { - fprintf(stderr, "Could not copy the stream parameters\n"); - exit(1); - } +static int init_muxer(AVFormatContext **out_fmt_ctx, AVCodecContext *audio_enc_ctx, + AVCodecContext *video_enc_ctx, const char *filename) +{ + int ret; + AVStream *out_audio_str, *out_video_str; - /* create resampler context */ - ost->swr_ctx = swr_alloc(); - if (!ost->swr_ctx) { - fprintf(stderr, "Could not allocate resampler context\n"); - exit(1); + if ((ret = avformat_alloc_output_context2(out_fmt_ctx, NULL, NULL, filename)) < 0) { + log_error("Could not create output context", &ret); + return ret; } - /* set options */ - av_opt_set_chlayout (ost->swr_ctx, "in_chlayout", &c->ch_layout, 0); - av_opt_set_int (ost->swr_ctx, "in_sample_rate", c->sample_rate, 0); - av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0); - av_opt_set_chlayout (ost->swr_ctx, "out_chlayout", &c->ch_layout, 0); - av_opt_set_int (ost->swr_ctx, "out_sample_rate", c->sample_rate, 0); - av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt", c->sample_fmt, 0); - - /* initialize the resampling context */ - if ((ret = swr_init(ost->swr_ctx)) < 0) { - fprintf(stderr, "Failed to initialize the resampling context\n"); - exit(1); + /* open the output file, if needed */ + if (!((*out_fmt_ctx)->oformat->flags & AVFMT_NOFILE)) { + if ((ret = avio_open(&(*out_fmt_ctx)->pb, filename, AVIO_FLAG_WRITE)) < 0) { + log_error("Could not open output file", &ret); + return ret; + } } -} -/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and - * 'nb_channels' channels. */ -static AVFrame *get_audio_frame(OutputStream *ost) -{ - AVFrame *frame = ost->tmp_frame; - int j, i, v; - int16_t *q = (int16_t*)frame->data[0]; - - /* check if we want to generate more frames */ - if (av_compare_ts(ost->next_pts, ost->enc->time_base, - STREAM_DURATION, (AVRational){ 1, 1 }) > 0) - return NULL; + if (audio_enc_ctx) { + if (!(out_audio_str = avformat_new_stream(*out_fmt_ctx, NULL))) { + log_error("Could not create new stream", NULL); + return AVERROR(ENOMEM); + } + out_audio_str->id = (*out_fmt_ctx)->nb_streams - 1; + avcodec_parameters_from_context(out_audio_str->codecpar, audio_enc_ctx); + } - for (j = 0; j nb_samples; j++) { - v = (int)(sin(ost->t) * 10000); - for (i = 0; i < ost->enc->ch_layout.nb_channels; i++) - *q++ = v; - ost->t += ost->tincr; - ost->tincr += ost->tincr2; + if (video_enc_ctx) { + if (!(out_video_str = avformat_new_stream(*out_fmt_ctx, NULL))) { + log_error("Could not create new stream", NULL); + return AVERROR(ENOMEM); + } + out_video_str->id = (*out_fmt_ctx)->nb_streams - 1; + avcodec_parameters_from_context(out_video_str->codecpar, video_enc_ctx); } - frame->pts = ost->next_pts; - ost->next_pts += frame->nb_samples; + av_dump_format(*out_fmt_ctx, 0, filename, 1); - return frame; + /* Write the stream header, if any. */ + if (avformat_write_header(*out_fmt_ctx, NULL) < 0) { + log_error("avformat_write_header() error", NULL); + return AVERROR_EXIT; + } else + return 0; } -/* - * encode one audio frame and send it to the muxer - * return 1 when encoding is finished, 0 otherwise - */ -static int write_audio_frame(AVFormatContext *oc, OutputStream *ost) +static void fill_dummy_s16_frame(AVFrame *frame) { - AVCodecContext *c; - AVFrame *frame; - int ret; - int dst_nb_samples; - - c = ost->enc; - - frame = get_audio_frame(ost); - - if (frame) { - /* convert samples from native format to destination codec format, using the resampler */ - /* compute destination number of samples */ - dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples, - c->sample_rate, c->sample_rate, AV_ROUND_UP); - av_assert0(dst_nb_samples == frame->nb_samples); - - /* when we pass a frame to the encoder, it may keep a reference to it - * internally; - * make sure we do not overwrite it here - */ - ret = av_frame_make_writable(ost->frame); - if (ret < 0) - exit(1); - - /* convert to destination format */ - ret = swr_convert(ost->swr_ctx, - ost->frame->data, dst_nb_samples, - (const uint8_t **)frame->data, frame->nb_samples); - if (ret < 0) { - fprintf(stderr, "Error while converting\n"); - exit(1); - } - frame = ost->frame; - - frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base); - ost->samples_count += dst_nb_samples; + int j, i, v; + static float t, tincr, tincr2; + int16_t *data = (int16_t*)frame->data[0]; + static int frame_ctr; + + if (!tincr) { + t = 0; + tincr = 2 * M_PI * 110.0 / frame->sample_rate; + /* increment frequency by 110 Hz per second */ + tincr2 = tincr / frame->sample_rate; } - - return write_frame(oc, c, ost->st, frame, ost->tmp_pkt); + for (j = 0; j nb_samples; j++) { + v = (int)(sin(t) * 10000); + for (i = 0; i < frame->ch_layout.nb_channels; i++) + *data++ = v; + t += tincr; + tincr += tincr2; + } + frame->pts = frame->nb_samples*(++frame_ctr); } -/**************************************************************/ -/* video output */ - -static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height) +static void fill_dummy_yuv420p_frame(AVFrame *frame) { - AVFrame *picture; - int ret; - - picture = av_frame_alloc(); - if (!picture) - return NULL; + int x, y; + static int idx; - picture->format = pix_fmt; - picture->width = width; - picture->height = height; + /* Y */ + for (y = 0; y < frame->width; y++) + for (x = 0; x < frame->width; x++) + frame->data[0][y * frame->linesize[0] + x] = x + y + idx * 3; - /* allocate the buffers for the frame data */ - ret = av_frame_get_buffer(picture, 0); - if (ret < 0) { - fprintf(stderr, "Could not allocate frame data.\n"); - exit(1); + /* Cb and Cr */ + for (y = 0; y < frame->height / 2; y++) { + for (x = 0; x < frame->width / 2; x++) { + frame->data[1][y * frame->linesize[1] + x] = 128 + y + idx * 2; + frame->data[2][y * frame->linesize[2] + x] = 64 + x + idx * 5; + } } - return picture; + frame->pts = idx++; } -static void open_video(AVFormatContext *oc, const AVCodec *codec, - OutputStream *ost, AVDictionary *opt_arg) +static int convert_frame(void *convert_ctx, AVFrame *in_frame, AVFrame *out_frame) { int ret; - AVCodecContext *c = ost->enc; - AVDictionary *opt = NULL; - - av_dict_copy(&opt, opt_arg, 0); + enum AVMediaType *type = (enum AVMediaType *)(in_frame->opaque); - /* open the codec */ - ret = avcodec_open2(c, codec, &opt); - av_dict_free(&opt); - if (ret < 0) { - fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret)); - exit(1); - } - - /* allocate and init a re-usable frame */ - ost->frame = alloc_picture(c->pix_fmt, c->width, c->height); - if (!ost->frame) { - fprintf(stderr, "Could not allocate video frame\n"); - exit(1); + if (av_frame_make_writable(out_frame) < 0) { + log_error("av_frame_make_writable() error", NULL); + return AVERROR_EXIT; } - /* If the output format is not YUV420P, then a temporary YUV420P - * picture is needed too. It is then converted to the required - * output format. */ - ost->tmp_frame = NULL; - if (c->pix_fmt != AV_PIX_FMT_YUV420P) { - ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height); - if (!ost->tmp_frame) { - fprintf(stderr, "Could not allocate temporary picture\n"); - exit(1); + if (*type == AVMEDIA_TYPE_AUDIO) { + if ((ret = swr_convert_frame((struct SwrContext *)convert_ctx, out_frame, + (const AVFrame *)in_frame)) != 0) { + log_error("Error converting AVFrame", &ret); + return ret; } + } else { + sws_scale((struct SwsContext *)convert_ctx, (const uint8_t * const *)in_frame->data, + in_frame->linesize, 0, in_frame->height, out_frame->data, + out_frame->linesize); } - /* copy the stream parameters to the muxer */ - ret = avcodec_parameters_from_context(ost->st->codecpar, c); - if (ret < 0) { - fprintf(stderr, "Could not copy the stream parameters\n"); - exit(1); - } + out_frame->pts = in_frame->pts; + return 0; } -/* Prepare a dummy image. */ -static void fill_yuv_image(AVFrame *pict, int frame_index, - int width, int height) +static int encode_frame(AVCodecContext *ctx, AVFrame *in_frame, AVPacket *out_pkt) { - int x, y, i; + static int is_flushing_audio = 0, is_flushing_video = 0; + int ret = 0; + int is_audio = ctx->codec->type == AVMEDIA_TYPE_AUDIO; - i = frame_index; - - /* Y */ - for (y = 0; y < height; y++) - for (x = 0; x < width; x++) - pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3; - - /* Cb and Cr */ - for (y = 0; y < height / 2; y++) { - for (x = 0; x < width / 2; x++) { - pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2; - pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5; - } + if ((is_audio && !is_flushing_audio) || (!is_audio && !is_flushing_video)) { + ret = avcodec_send_frame(ctx, in_frame); } -} - -static AVFrame *get_video_frame(OutputStream *ost) -{ - AVCodecContext *c = ost->enc; - - /* check if we want to generate more frames */ - if (av_compare_ts(ost->next_pts, c->time_base, - STREAM_DURATION, (AVRational){ 1, 1 }) > 0) - return NULL; - - /* when we pass a frame to the encoder, it may keep a reference to it - * internally; make sure we do not overwrite it here */ - if (av_frame_make_writable(ost->frame) < 0) - exit(1); - - if (c->pix_fmt != AV_PIX_FMT_YUV420P) { - /* as we only generate a YUV420P picture, we must convert it - * to the codec pixel format if needed */ - if (!ost->sws_ctx) { - ost->sws_ctx = sws_getContext(c->width, c->height, - AV_PIX_FMT_YUV420P, - c->width, c->height, - c->pix_fmt, - SCALE_FLAGS, NULL, NULL, NULL); - if (!ost->sws_ctx) { - fprintf(stderr, - "Could not initialize the conversion context\n"); - exit(1); - } + if (ret < 0) { + av_log(NULL, AV_LOG_ERROR, + "Error sending frame to the encoder (error '%s')\n", av_err2str(ret)); + return ret; + } else if (ret == 0) { + ret = avcodec_receive_packet(ctx, out_pkt); + if ((ret < 0) && (ret != AVERROR(EAGAIN)) && (ret != AVERROR_EOF)) { + av_log(NULL, AV_LOG_ERROR, + "Error receiving encoded packet (error '%s')\n", av_err2str(ret)); + return ret; } - fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height); - sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data, - ost->tmp_frame->linesize, 0, c->height, ost->frame->data, - ost->frame->linesize); - } else { - fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height); } - ost->frame->pts = ost->next_pts++; + if (is_audio) + is_flushing_audio = (in_frame == NULL); + else + is_flushing_video = (in_frame == NULL); - return ost->frame; + return ret; } -/* - * encode one video frame and send it to the muxer - * return 1 when encoding is finished, 0 otherwise - */ -static int write_video_frame(AVFormatContext *oc, OutputStream *ost) +static int frame_exceeds_stream_duration(AVFrame *fr) { - return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt); + enum AVMediaType *type = (enum AVMediaType *)(fr->opaque); + AVRational tb = (*type == AVMEDIA_TYPE_AUDIO) ? (AVRational){ 1, fr->sample_rate} : + (AVRational){ 1, VIDEO_FRAME_RATE}; + + return av_compare_ts(fr->pts, tb ,STREAM_DURATION, (AVRational){ 1, 1 }) > 0; } -static void close_stream(AVFormatContext *oc, OutputStream *ost) +static enum AVMediaType media_type_of_earlier_frame(AVFrame *audio_fr, + AVFrame *video_fr) { - avcodec_free_context(&ost->enc); - av_frame_free(&ost->frame); - av_frame_free(&ost->tmp_frame); - av_packet_free(&ost->tmp_pkt); - sws_freeContext(ost->sws_ctx); - swr_free(&ost->swr_ctx); + if (!audio_fr) + return AVMEDIA_TYPE_VIDEO; + if (!video_fr) + return AVMEDIA_TYPE_AUDIO; + + if (av_compare_ts(audio_fr->pts, (AVRational){ 1, audio_fr->sample_rate}, + video_fr->pts, (AVRational){ 1, VIDEO_FRAME_RATE}) < 0) + return AVMEDIA_TYPE_AUDIO; + else + return AVMEDIA_TYPE_VIDEO; } -/**************************************************************/ -/* media file output */ - int main(int argc, char **argv) { - OutputStream video_st = { 0 }, audio_st = { 0 }; - const AVOutputFormat *fmt; - const char *filename; - AVFormatContext *oc; - const AVCodec *audio_codec, *video_codec; - int ret; - int have_video = 0, have_audio = 0; - int encode_video = 0, encode_audio = 0; - AVDictionary *opt = NULL; - int i; - - if (argc < 2) { + const char *fname; + AVCodecContext *audio_enc_ctx = NULL, *video_enc_ctx = NULL, *enc_ctx = NULL; + + /* NOTE: if you want to modify the audio/video input ".format" parameter, + * you need to modify the corresponding fill_dummy_XXX_frame() function(s) too */ + AVCodecParameters audio_in_params = { + .codec_type = AVMEDIA_TYPE_AUDIO, + .format = AV_SAMPLE_FMT_S16, + .sample_rate = 44100, + .ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO + }, + video_in_params = { + .codec_type = AVMEDIA_TYPE_VIDEO, + .width = 352, + .height = 288, + .format = AV_PIX_FMT_YUV420P + }, + video_enc_params = { 0 }, audio_enc_params = { 0 }; + struct AVRational enc_timebases[2]; + AVFrame *in_audio_frame = NULL, *converted_audio_frame = NULL, + *in_video_frame = NULL, *converted_video_frame = NULL, + *frame_to_encode = NULL; + struct SwrContext *audio_convert_ctx = NULL; + struct SwsContext *video_convert_ctx = NULL; + enum AVMediaType media_type; + AVFormatContext *out_fmt_ctx = NULL; + AVPacket *out_pkt = av_packet_alloc(); + int ret = 0, process_audio = 0, process_video = 0; + + if (argc != 2) { printf("usage: %s output_file\n" "API example program to output a media file with libavformat.\n" - "This program generates a synthetic audio and video stream, encodes and\n" + "This program generates a synthetic audio and/or video stream, encodes and\n" "muxes them into a file named output_file.\n" "The output format is automatically guessed according to the file extension.\n" - "Raw images can also be output by using '%%d' in the filename.\n" + "BMP or JPEG images can also be output by using '%%d' in the filename.\n" "\n", argv[0]); - return 1; + return AVERROR_EXIT; } - filename = argv[1]; - for (i = 2; i+1 < argc; i+=2) { - if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags")) - av_dict_set(&opt, argv[i]+1, argv[i+1], 0); + fname = argv[1]; + if (!is_extension_supported(fname)) { + ret = AVERROR_EXIT; + goto end; } - /* allocate the output media context */ - avformat_alloc_output_context2(&oc, NULL, NULL, filename); - if (!oc) { - printf("Could not deduce output format from file extension: using MPEG.\n"); - avformat_alloc_output_context2(&oc, NULL, "mpeg", filename); - } - if (!oc) - return 1; - - fmt = oc->oformat; - - /* Add the audio and video streams using the default format codecs - * and initialize the codecs. */ - if (fmt->video_codec != AV_CODEC_ID_NONE) { - add_stream(&video_st, oc, &video_codec, fmt->video_codec); - have_video = 1; - encode_video = 1; + /* Desume the default codecs and their default parameters from the filename */ + if ((ret = get_default_enc_params(&audio_enc_params, fname, AVMEDIA_TYPE_AUDIO)) < 0) + goto end; + if ((ret = get_default_enc_params(&video_enc_params, fname, AVMEDIA_TYPE_VIDEO)) < 0) + goto end; + process_audio = audio_enc_params.codec_id != AV_CODEC_ID_NONE; + process_video = video_enc_params.codec_id != AV_CODEC_ID_NONE; + if (!process_audio && !process_video) { + log_error("Could not get default encoder(s)", NULL); + ret = AVERROR_EXIT; + goto end; } - if (fmt->audio_codec != AV_CODEC_ID_NONE) { - add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec); - have_audio = 1; - encode_audio = 1; - } - - /* Now that all the parameters are set, we can open the audio and - * video codecs and allocate the necessary encode buffers. */ - if (have_video) - open_video(oc, video_codec, &video_st, opt); - - if (have_audio) - open_audio(oc, audio_codec, &audio_st, opt); - - av_dump_format(oc, 0, filename, 1); - /* open the output file, if needed */ - if (!(fmt->flags & AVFMT_NOFILE)) { - ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE); - if (ret < 0) { - fprintf(stderr, "Could not open '%s': %s\n", filename, - av_err2str(ret)); - return 1; - } + if (process_audio) { + /* Prepare the audio encoder*/ + if ((ret = init_encoder(&audio_enc_ctx, &audio_enc_params)) < 0) + goto end; + enc_timebases[0] = audio_enc_ctx->time_base; + audio_in_params.frame_size = audio_enc_params.frame_size = audio_enc_ctx->frame_size; + + /* Allocate an audio resampler and its input and output AVFrames */ + if ((ret = init_audio_convert(&audio_convert_ctx, &audio_in_params, + &audio_enc_params)) < 0) + goto end; + if ((ret = init_avframe(&in_audio_frame, &audio_in_params)) < 0) + goto end; + if ((ret = init_avframe(&converted_audio_frame, &audio_enc_params)) < 0) + goto end; } - /* Write the stream header, if any. */ - ret = avformat_write_header(oc, &opt); - if (ret < 0) { - fprintf(stderr, "Error occurred when opening output file: %s\n", - av_err2str(ret)); - return 1; + if (process_video) { + video_enc_params.width = video_in_params.width; + video_enc_params.height = video_in_params.height; + if ((ret = init_encoder(&video_enc_ctx, &video_enc_params)) < 0) + goto end; + enc_timebases[1] = video_enc_ctx->time_base; + if ((ret = init_video_convert(&video_convert_ctx,&video_in_params, + &video_enc_params)) < 0) + goto end; + if ((ret = init_avframe(&in_video_frame, &video_in_params)) < 0) + goto end; + if ((ret = init_avframe(&converted_video_frame, &video_enc_params)) < 0) + goto end; } - while (encode_video || encode_audio) { - /* select the stream to encode */ - if (encode_video && - (!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base, - audio_st.next_pts, audio_st.enc->time_base) <= 0)) { - encode_video = !write_video_frame(oc, &video_st); + /* Create the output container for the encoded frames */ + if ((ret = init_muxer(&out_fmt_ctx, audio_enc_ctx, video_enc_ctx, fname)) < 0) + goto end; + out_fmt_ctx->opaque = &enc_timebases; + + while (process_audio || process_video) { + + frame_to_encode = NULL; + media_type = media_type_of_earlier_frame(in_audio_frame, in_video_frame); + + /* fill and convert the input frames */ + if (media_type == AVMEDIA_TYPE_AUDIO) { + enc_ctx = audio_enc_ctx; + fill_dummy_s16_frame(in_audio_frame); + if ((ret = convert_frame(audio_convert_ctx, in_audio_frame, + converted_audio_frame)) != 0) + goto end; + if (!frame_exceeds_stream_duration(converted_audio_frame)) + frame_to_encode = converted_audio_frame; } else { - encode_audio = !write_audio_frame(oc, &audio_st); + enc_ctx = video_enc_ctx; + fill_dummy_yuv420p_frame(in_video_frame); + if ((ret = convert_frame(video_convert_ctx, in_video_frame, + converted_video_frame)) != 0) + goto end; + if (!frame_exceeds_stream_duration(in_video_frame)) + frame_to_encode = converted_video_frame; } - } - - av_write_trailer(oc); - /* Close each codec. */ - if (have_video) - close_stream(oc, &video_st); - if (have_audio) - close_stream(oc, &audio_st); + /* encode the converted frames and mux the encoded packets */ + if ((ret = encode_frame(enc_ctx, frame_to_encode, out_pkt)) == 0) { + if ((ret = mux_encoded_pkt(out_pkt, out_fmt_ctx, media_type)) < 0) + goto end; + } - if (!(fmt->flags & AVFMT_NOFILE)) - /* Close the output file. */ - avio_closep(&oc->pb); + /* check if the encoders have been fully flushed */ + process_audio &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_AUDIO)); + process_video &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_VIDEO)); - /* free the stream */ - avformat_free_context(oc); + } - return 0; + av_write_trailer(out_fmt_ctx); + ret = 0; + +end: + + avcodec_free_context(&audio_enc_ctx); + avcodec_free_context(&video_enc_ctx); + av_frame_free(&in_audio_frame); + av_frame_free(&in_video_frame); + av_frame_free(&converted_audio_frame); + av_frame_free(&converted_video_frame); + swr_free(&audio_convert_ctx); + sws_freeContext(video_convert_ctx); + if (out_fmt_ctx) + avio_closep(&out_fmt_ctx->pb); + avformat_free_context(out_fmt_ctx); + av_packet_free(&out_pkt); + + return ret; } -- 2.32.0