Hi! I have made a small application to extract audio from an mp4 file, or simply convert an existing audio file to AAC/mp4 format (both raw AAC, or inside mp4 container). I have run this application with existing mp4 files as input, and it properly extracts audio, and encodes to mp4 (audio only:AAC), or even directly in AAC format (i.e. test.aac also works). But when I tried running it on mp3 files, output clip plays faster than it should be (a clip of 1:12 seconds plays back till 1:05 seconds only, and is also noisy).
Here is the code I have written to achieve this: //////////////////////////////////////////////// #include "stdafx.h" #include <iostream> #include <fstream> #include <string> #include <vector> #include <map> #include <deque> #include <queue> #include <math.h> #include <stdlib.h> #include <stdio.h> #include <conio.h> extern "C" { #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libavdevice/avdevice.h" #include "libswscale/swscale.h" #include "libavutil/dict.h" #include "libavutil/error.h" #include "libavutil/opt.h" #include <libavutil/fifo.h> #include <libavutil/imgutils.h> #include <libavutil/samplefmt.h> } AVFormatContext* fmt_ctx= NULL; int audio_stream_index = -1; AVCodecContext *codec_ctx_audio = NULL; AVCodec* codec_audio = NULL; AVFrame* decoded_frame = NULL; uint8_t** audio_dst_data = NULL; int got_frame = 0; int audiobufsize = 0; AVPacket input_packet; int audio_dst_linesize = 0; int audio_dst_bufsize = 0; AVOutputFormat *output_format = NULL ; AVFormatContext *output_fmt_ctx = NULL; AVStream *audio_st = NULL; AVCodec *audio_codec = NULL; double audio_pts = 0.0; int audio_input_frame_size = 0; uint8_t *audio_data_buf = NULL; uint8_t *audio_out = NULL; int audio_bit_rate; int audio_sample_rate; int audio_channels; int decode_packet(); int open_audio_input(char* src_filename); int decode_frame(); int open_encoder(char* output_filename); AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id); int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st); void close_audio(AVFormatContext *oc, AVStream *st); void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize); int open_audio_input(char* src_filename) { int i =0; /* open input file, and allocate format context */ if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) { fprintf(stderr, "Could not open source file %s\n", src_filename); exit(1); } // Retrieve stream information if(avformat_find_stream_info(fmt_ctx, NULL)<0) return -1; // Couldn't find stream information // Dump information about file onto standard error av_dump_format(fmt_ctx, 0, src_filename, 0); // Find the first video stream for(i=0; i<fmt_ctx->nb_streams; i++) { if(fmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO) { audio_stream_index=i; break; } } if ( audio_stream_index != -1 ) { // Get a pointer to the codec context for the audio stream codec_ctx_audio=fmt_ctx->streams[audio_stream_index]->codec; // Find the decoder for the video stream codec_audio=avcodec_find_decoder(codec_ctx_audio->codec_id); if(codec_audio==NULL) { fprintf(stderr, "Unsupported audio codec!\n"); return -1; // Codec not found } // Open codec AVDictionary *codecDictOptions = NULL; if(avcodec_open2(codec_ctx_audio, codec_audio, &codecDictOptions)<0) return -1; // Could not open codec // Allocate audio frame if ( decoded_frame == NULL ) decoded_frame = avcodec_alloc_frame(); int nb_planes = 0; AVStream* audio_stream = fmt_ctx->streams[audio_stream_index]; nb_planes = av_sample_fmt_is_planar(codec_ctx_audio->sample_fmt) ? codec_ctx_audio->channels : 1; int tempSize = sizeof(uint8_t *) * nb_planes; audio_dst_data = (uint8_t**)av_mallocz(tempSize); if (!audio_dst_data) { fprintf(stderr, "Could not allocate audio data buffers\n"); } else { for ( int i = 0 ; i < nb_planes ; i ++ ) { audio_dst_data[i] = NULL; } } } } int decode_frame() { int rv = 0; got_frame = 0; if ( fmt_ctx == NULL ) { return rv; } int ret = 0; audiobufsize = 0; rv = av_read_frame(fmt_ctx, &input_packet); if ( rv < 0 ) { return rv; } rv = decode_packet(); // Free the input_packet that was allocated by av_read_frame av_free_packet(&input_packet); return rv; } int decode_packet() { int rv = 0; int ret = 0; //audio stream? if(input_packet.stream_index == audio_stream_index) { /* decode audio frame */ rv = avcodec_decode_audio4(codec_ctx_audio, decoded_frame, &got_frame, &input_packet); if (rv < 0) { fprintf(stderr, "Error decoding audio frame\n"); //return ret; } else { if (got_frame) { if ( audio_dst_data[0] == NULL ) { ret = av_samples_alloc(audio_dst_data, &audio_dst_linesize, decoded_frame->channels, decoded_frame->nb_samples, (AVSampleFormat)decoded_frame->format, 1); if (ret < 0) { fprintf(stderr, "Could not allocate audio buffer\n"); return AVERROR(ENOMEM); } /* TODO: extend return code of the av_samples_* functions so that this call is not needed */ audio_dst_bufsize = av_samples_get_buffer_size(NULL, decoded_frame->channels, decoded_frame->nb_samples, (AVSampleFormat)decoded_frame->format, 1); } /* copy audio data to destination buffer: * this is required since rawaudio expects non aligned data */ av_samples_copy(audio_dst_data, decoded_frame->data, 0, 0, decoded_frame->nb_samples, decoded_frame->channels, (AVSampleFormat)decoded_frame->format); } } } return rv; } int open_encoder(char* output_filename ) { int rv = 0; /* allocate the output media context */ AVOutputFormat *opfmt = NULL; avformat_alloc_output_context2(&output_fmt_ctx, opfmt, NULL, output_filename); if (!output_fmt_ctx) { printf("Could not deduce output format from file extension: using MPEG.\n"); avformat_alloc_output_context2(&output_fmt_ctx, NULL, "mpeg", output_filename); } if (!output_fmt_ctx) { rv = -1; } else { output_format = output_fmt_ctx->oformat; } /* Add the audio stream using the default format codecs * and initialize the codecs. */ audio_st = NULL; if ( output_fmt_ctx ) { if (output_format->audio_codec != AV_CODEC_ID_NONE) { audio_st = add_audio_stream(output_fmt_ctx, &audio_codec, output_format->audio_codec); } /* Now that all the parameters are set, we can open the audio and * video codecs and allocate the necessary encode buffers. */ if (audio_st) { rv = open_audio(output_fmt_ctx, audio_codec, audio_st); if ( rv < 0 ) return rv; } av_dump_format(output_fmt_ctx, 0, output_filename, 1); /* open the output file, if needed */ if (!(output_format->flags & AVFMT_NOFILE)) { if (avio_open(&output_fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE) < 0) { fprintf(stderr, "Could not open '%s'\n", output_filename); rv = -1; } else { /* Write the stream header, if any. */ if (avformat_write_header(output_fmt_ctx, NULL) < 0) { fprintf(stderr, "Error occurred when opening output file\n"); rv = -1; } } } } return rv; } AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec, enum AVCodecID codec_id) { AVCodecContext *c; AVStream *st; /* find the audio encoder */ *codec = avcodec_find_encoder(codec_id); if (!(*codec)) { fprintf(stderr, "Could not find codec\n"); exit(1); } st = avformat_new_stream(oc, *codec); if (!st) { fprintf(stderr, "Could not allocate stream\n"); exit(1); } st->id = 1; c = st->codec; /* put sample parameters */ c->sample_fmt = AV_SAMPLE_FMT_S16; c->bit_rate = audio_bit_rate; c->sample_rate = audio_sample_rate; c->channels = audio_channels; // some formats want stream headers to be separate if (oc->oformat->flags & AVFMT_GLOBALHEADER) c->flags |= CODEC_FLAG_GLOBAL_HEADER; return st; } int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st) { int ret=0; AVCodecContext *c; c = st->codec; /* open it */ if (avcodec_open2(c, codec, NULL) < 0) { fprintf(stderr, "could not open codec\n"); return -1; //exit(1); } if (c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE) audio_input_frame_size = 10000; else audio_input_frame_size = c->frame_size; int tempSize = audio_input_frame_size * av_get_bytes_per_sample(c->sample_fmt) * c->channels; return ret; } void close_audio(AVFormatContext *oc, AVStream *st) { avcodec_close(st->codec); } void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize) { AVFormatContext *oc = output_fmt_ctx; AVStream *st = audio_st; if ( oc == NULL || st == NULL ) return; AVCodecContext *c; AVPacket pkt = { 0 }; // data and size must be 0; AVFrame *frame = avcodec_alloc_frame(); int got_packet; av_init_packet(&pkt); c = st->codec; frame->nb_samples = audio_input_frame_size; int buf_size = audio_src_bufsize * av_get_bytes_per_sample(c->sample_fmt) * c->channels; avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt, (uint8_t *) *audio_src_data, buf_size, 1); avcodec_encode_audio2(c, &pkt, frame, &got_packet); if (!got_packet) { avcodec_free_frame(&frame); } else { pkt.stream_index = st->index; /* Write the compressed frame to the media file. */ if (av_interleaved_write_frame(oc, &pkt) != 0) { fprintf(stderr, "Error while writing audio frame\n"); exit(1); } avcodec_free_frame(&frame); } av_free_packet(&pkt); } void write_delayed_frames(AVFormatContext *oc, AVStream *st) { AVCodecContext *c = st->codec; int got_output = 0; int ret = 0; AVPacket pkt; pkt.data = NULL; pkt.size = 0; av_init_packet(&pkt); int i = 0; //int got_packet; for (got_output = 1; got_output; i++) { ret = avcodec_encode_audio2(c, &pkt, NULL, &got_output); if (ret < 0) { fprintf(stderr, "error encoding frame\n"); exit(1); } static int64_t tempPts = 0; static int64_t tempDts = 0; /* If size is zero, it means the image was buffered. */ if (got_output) { if (pkt.pts != AV_NOPTS_VALUE) pkt.pts = av_rescale_q(pkt.pts, st->codec->time_base, st->time_base); if (pkt.dts != AV_NOPTS_VALUE) pkt.dts = av_rescale_q(pkt.dts, st->codec->time_base, st->time_base); if (c->coded_frame->key_frame) pkt.flags |= AV_PKT_FLAG_KEY; pkt.stream_index = st->index; /* Write the compressed frame to the media file. */ ret = av_interleaved_write_frame(oc, &pkt); } else { ret = 0; } av_free_packet(&pkt); } } int main (int argc, char **argv) { /* register all formats and codecs */ av_register_all(); int i =0; char src_filename[90] = "test.mp3"; char dst_filename[90] = "test.mp4"; open_audio_input(src_filename); audio_bit_rate = codec_ctx_audio->bit_rate; audio_sample_rate = codec_ctx_audio->sample_rate; audio_channels = codec_ctx_audio->channels; open_encoder( dst_filename ); while(1) { int rv = decode_frame(); if ( rv < 0 ) { break; } if (audio_st) { audio_pts = (double)audio_st->pts.val * audio_st->time_base.num / audio_st->time_base.den; } else { audio_pts = 0.0; } printf("\naudio_pts: %.3f",audio_pts); if ( codec_ctx_audio ) { if ( got_frame) { write_audio_frame( audio_dst_data, audio_dst_bufsize ); } } if ( audio_dst_data[0] ) { av_freep(&audio_dst_data[0]); audio_dst_data[0] = NULL; } } write_delayed_frames( output_fmt_ctx, audio_st ); av_write_trailer(output_fmt_ctx); close_audio( output_fmt_ctx, audio_st); return 0; } /////////////////////////////////////////////// I have been looking at this problem from many angles since about two days now, but cant seem to figure out what I'm doing wrong. Note also: the printf() statement I've inserted shows audio_pts up to 64.551 (that's about 1:05 seconds that also proves encoder is not going to full duration of input file: 1:12 secs): ....... ....... ....... audio_pts: 63.808 audio_pts: 63.832 audio_pts: 63.855 audio_pts: 63.878 audio_pts: 63.901 audio_pts: 63.925 audio_pts: 63.948 audio_pts: 63.971 audio_pts: 63.994 audio_pts: 64.017 audio_pts: 64.041 audio_pts: 64.064 audio_pts: 64.087 audio_pts: 64.110 audio_pts: 64.134 audio_pts: 64.157 audio_pts: 64.180 audio_pts: 64.203 audio_pts: 64.226 audio_pts: 64.250 audio_pts: 64.273 audio_pts: 64.296 audio_pts: 64.319 audio_pts: 64.342 audio_pts: 64.366 audio_pts: 64.389 audio_pts: 64.412 audio_pts: 64.435 audio_pts: 64.459 audio_pts: 64.482 audio_pts: 64.505 audio_pts: 64.528 audio_pts: 64.551 Can anyone please guide me what I may be doing wrong? Thanks in advance for any guidance! p.s. when run through command line like: ffmpeg -i test.mp3 test.mp4, it converts the file just fine.
_______________________________________________ Libav-user mailing list Libav-user@ffmpeg.org http://ffmpeg.org/mailman/listinfo/libav-user