Hi Christian and Nicolas, Thanks for the input. I managed to get video encoding working and everything looks good there. The solution was to use the picture.best_effort_timestamp as the pts of encoded frame instead of the pkt.pts (since I have now learned that a pkt could contain 0 or more pictures, not necessarily one picture per packet).
However I'm still having trouble with the audio, specifically resampling and encoding. If I simply pass the source audio through, it works ok. The call to avresample_convert() seems to work ok (returns a valid length). The first call to avcodec_encode_audio2() will succeed, but all subsequent calls after the first will return -22 (Illegal argument error). I'm not sure what argument is illegal especially since the first call succeeds. Does anyone have any ideas where I went wrong on the audio encoding part? I put the whole source code below. I'm looking forward to any suggestions. :) Thanks everyone! #include <stdio.h> #include <assert.h> #include "libavformat/avformat.h" #include "libavcodec/avcodec.h" #include "libswscale/swscale.h" #include "libavresample/avresample.h" #include "libavutil/opt.h" int main (int argc, const char * argv[]) { const char *infile = argv[1]; const char *outfile = argv[2]; int r; int video_index = -1, audio_index = -1; // stream index AVPacket pkt; AVFrame *srcpic = NULL, *srcaudio = NULL; AVFormatContext *in = NULL, *out = NULL; AVCodecContext *in_vcodec, *in_acodec, *out_vcodec, *out_acodec; AVStream *in_vstream, *in_astream, *out_vstream, *out_astream; AVAudioResampleContext *avr; AVCodec *h264, *aac; int got_picture, got_audio; int video_frames = 0, audio_samples = 0; char errbuf[128]; struct SwsContext *img_convert_ctx; double framerate, samplerate; int got_packet_ptr = 0; int audio_bufsize = AVCODEC_MAX_AUDIO_FRAME_SIZE + FF_INPUT_BUFFER_PADDING_SIZE; uint8_t *video_outbuf; int video_outbuf_size = 2000000; // TODO: is there a defined max size for encoded video buffer? uint8_t *picbuf; int picbuf_size; int64_t firstpts = -1; // init LAVF av_register_all(); avformat_network_init(); av_log_set_level(AV_LOG_VERBOSE); // Open input file printf("Open input file: %s\n", infile); r = avformat_open_input(&in, infile, NULL, NULL); if (r) { printf("err %x\n", r); return r; } r = avformat_find_stream_info(in, NULL); if (r) { printf("err %x\n", r); return r; } // iterate over input streams for (int i = 0; i < in->nb_streams; i++) { AVStream *inputStream = in->streams[i]; if (inputStream->codec->codec_type == AVMEDIA_TYPE_VIDEO) { inputStream->discard = AVDISCARD_NONE; video_index = i; in_vstream = inputStream; in_vcodec = inputStream->codec; if (!inputStream->codec->codec) { avcodec_open2(inputStream->codec, avcodec_find_decoder(inputStream->codec->codec_id), NULL); } printf("Input video %s rate %d/%d width %d height %d\n", in_vcodec->codec->name, inputStream->r_frame_rate.num, inputStream->r_frame_rate.den, in_vcodec->width, in_vcodec->height); } else if (inputStream->codec->codec_type == AVMEDIA_TYPE_AUDIO) { inputStream->discard = AVDISCARD_NONE; audio_index = i; in_astream = inputStream; in_acodec = inputStream->codec; if (!inputStream->codec->codec) { avcodec_open2(inputStream->codec, avcodec_find_decoder(inputStream->codec->codec_id), NULL); } printf("Input audio %s rate %d channels %d sample_format %d\n", in_acodec->codec->name, in_acodec->sample_rate, in_acodec->channels, in_acodec->sample_fmt); } else { inputStream->discard = AVDISCARD_ALL; } } assert(in_vcodec && in_acodec); // Open output file for writing out = avformat_alloc_context(); assert(out); out->oformat = av_guess_format(NULL, outfile, NULL); // Guess output container format based on file extension assert(out->oformat); // Output parameters // Video codec h264 = avcodec_find_encoder(CODEC_ID_H264); assert(h264); out_vcodec = avcodec_alloc_context3(h264); assert(out_vcodec); avcodec_get_context_defaults3(out_vcodec, h264); out_vcodec->bit_rate = 500000; out_vcodec->width = 640; out_vcodec->height = 360; out_vcodec->time_base.num = in_vstream->r_frame_rate.den; // time_base is 1/framerate out_vcodec->time_base.den = in_vstream->r_frame_rate.num; out_vcodec->gop_size = (int)round(av_q2d(in_vstream->r_frame_rate) / 2); // GOP size is framerate/2 out_vcodec->max_b_frames = 0; out_vcodec->coder_type = FF_CODER_TYPE_VLC; out_vcodec->pix_fmt = PIX_FMT_YUV420P; out_vcodec->profile = FF_PROFILE_H264_BASELINE; // TODO: set other codec parameters r = avcodec_open2(out_vcodec, h264, NULL); assert(!r); // Video stream out_vstream = avformat_new_stream(out, out_vcodec->codec); assert(out_vstream); out_vstream->codec = out_vcodec; out_vstream->r_frame_rate = in_vstream->r_frame_rate; framerate = av_q2d(out_vstream->r_frame_rate); // Audio codec aac = avcodec_find_encoder(CODEC_ID_MP2); assert(aac); out_acodec = avcodec_alloc_context3(aac); avcodec_get_context_defaults3(out_acodec, aac); assert(out_acodec); out_acodec->codec_id = aac->id; out_acodec->codec_type = AVMEDIA_TYPE_AUDIO; out_acodec->bit_rate = 128000; out_acodec->channels = 2; out_acodec->sample_rate = 48000; samplerate = out_acodec->sample_rate; out_acodec->sample_fmt = AV_SAMPLE_FMT_S16; out_acodec->channel_layout = av_get_channel_layout("stereo"); out_acodec->time_base = in_acodec->time_base; // TODO: set other codec parameters r = avcodec_open2(out_acodec, aac, NULL); assert(!r); // Audio stream out_astream = avformat_new_stream(out, out_acodec->codec); assert(out_astream); out_astream->codec = out_acodec; // Begin writing output file printf("Open output file: %s\nOutput container: %s\n", outfile, out->oformat->long_name); r = avio_open2(&out->pb, outfile, AVIO_FLAG_WRITE, NULL, NULL); if (r) { printf("err %x\n", r); return r; } printf("write out header\n"); r = avformat_write_header(out, NULL); if (r) { printf("err %x\n", r); return r; } // show output streams for (int i = 0; i < out->nb_streams; i++) { AVStream *outputStream = out->streams[i]; if (outputStream->codec && outputStream->codec->codec) { printf("Output stream %d: %s %d/%d ", i, outputStream->codec->codec->name, outputStream->time_base.num, outputStream->time_base.den); if (outputStream->codec->codec_type == AVMEDIA_TYPE_VIDEO) printf("width %d height %d bitrate %d\n", outputStream->codec->width, outputStream->codec->height, outputStream->codec->bit_rate); if (outputStream->codec->codec_type == AVMEDIA_TYPE_AUDIO) printf("channels %d sample_rate %d bitrate %d\n", outputStream->codec->channels, outputStream->codec->sample_rate, outputStream->codec->bit_rate); } else printf("Output stream %d: %d/%d\n", i, outputStream->time_base.num, outputStream->time_base.den); } // buffer for encoded video data video_outbuf = (uint8_t*)av_malloc(video_outbuf_size); assert(video_outbuf); img_convert_ctx = sws_getContext(in_vcodec->width, in_vcodec->height, in_vcodec->pix_fmt, out_vcodec->width, out_vcodec->height, out_vcodec->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL); assert(img_convert_ctx); // buffer for picture data picbuf_size = avpicture_get_size(out_vcodec->pix_fmt, out_vcodec->width, out_vcodec->height); picbuf = (uint8_t*)av_malloc(picbuf_size); assert(picbuf); // setup resample context avr = avresample_alloc_context(); av_opt_set_int(avr, "in_channel_layout", in_acodec ->channel_layout, 0); av_opt_set_int(avr, "out_channel_layout", out_acodec->channel_layout, 0); av_opt_set_int(avr, "in_sample_fmt", in_acodec ->sample_fmt, 0); av_opt_set_int(avr, "out_sample_fmt", out_acodec->sample_fmt, 0); av_opt_set_int(avr, "in_sample_rate", in_acodec ->sample_rate, 0); av_opt_set_int(avr, "out_sample_rate", out_acodec->sample_rate, 0); av_opt_set_int(avr, "in_channels", in_acodec ->channels, 0); av_opt_set_int(avr, "out_channels", out_acodec->channels, 0); r = avresample_open(avr); assert(!r); printf("begin input loop\n"); while (1) { av_init_packet(&pkt); r = av_read_frame(in, &pkt); if (r) { if (r == AVERROR_EOF) printf("EOF\n"); else printf("read error %x\n", r); break; } printf("src pkt stream %d, pts %"PRId64", dts %"PRId64"\n", pkt.stream_index, pkt.pts, pkt.dts); if (firstpts == -1 && pkt.pts != AV_NOPTS_VALUE) firstpts = pkt.pts; if (pkt.stream_index == in_vstream->index) { srcpic = avcodec_alloc_frame(); assert(srcpic); avcodec_get_frame_defaults(srcpic); got_picture = 0; r = avcodec_decode_video2(in_vcodec, srcpic, &got_picture, &pkt); if (r < 0) { av_strerror(r, errbuf, 128); printf("video decode error %d %s\n", r, errbuf); break; } else if (got_picture) { AVPacket newpkt; AVFrame *destpic; av_init_packet(&newpkt); destpic = avcodec_alloc_frame(); got_packet_ptr = 0; printf("got picture: best_effort_timestamp %"PRId64"\n", srcpic->best_effort_timestamp); // convert picture to dest format avpicture_fill((AVPicture*)destpic, picbuf, out_vcodec->pix_fmt, out_vcodec->width, out_vcodec->height); sws_scale(img_convert_ctx, (const uint8_t* const*)srcpic->data, srcpic->linesize, 0, in_vcodec->height, destpic->data, destpic->linesize); // set destpic PTS if (srcpic->best_effort_timestamp != AV_NOPTS_VALUE) destpic->pts = av_rescale_q(srcpic->best_effort_timestamp, in_vstream->time_base, out_vstream->time_base); else destpic->pts = (int)((double)video_frames * (90000.0/framerate)); // TODO: not always 90k // encode picture r = avcodec_encode_video2(out_vcodec, &newpkt, destpic, &got_packet_ptr); if (r < 0) { av_strerror(r, errbuf, 128); printf("video encode error %d %s\n", r, errbuf); } else if (got_packet_ptr) { // write packet newpkt.stream_index = out_vstream->index; printf("write video pkt: stream %d, pts %"PRId64", dts %"PRId64"\n", newpkt.stream_index, newpkt.pts, newpkt.dts); r = av_interleaved_write_frame(out, &newpkt); if (r && (r != AVERROR(EINVAL))) { printf("video write error %x\n", r); } assert(!r); } av_free_packet(&newpkt); av_free(destpic); video_frames++; } av_free(srcpic); } else if (pkt.stream_index == in_astream->index) { // decode audio srcaudio = avcodec_alloc_frame(); avcodec_get_frame_defaults(srcaudio); got_audio = 0; r = avcodec_decode_audio4(in_acodec, srcaudio, &got_audio, &pkt); if (r < 0) { av_strerror(r, errbuf, 128); printf("audio decode error %d %s\n", r, errbuf); break; } else if (got_audio) { // convert audio AVPacket newpkt; AVFrame *destaudio; // frame for resampled audio int nb_samples; av_init_packet(&newpkt); destaudio = avcodec_alloc_frame(); avcodec_get_frame_defaults(destaudio); destaudio->extended_data = av_malloc(sizeof(uint8_t*)); destaudio->extended_data[0] = av_malloc(audio_bufsize); got_packet_ptr = 0; printf("srcaudio linesize[0]=%d nb_samples=%d\n", srcaudio->linesize[0], srcaudio->nb_samples); // resample to dest format nb_samples = avresample_convert(avr, (void**)destaudio->extended_data, destaudio->linesize[0], audio_bufsize, (void**)srcaudio->extended_data, srcaudio->linesize[0], srcaudio->nb_samples); if (nb_samples < 0) { av_strerror(nb_samples, errbuf, 128); printf("avr error %d %s\n", nb_samples, errbuf); } printf("avr ret len %d\n", nb_samples); if (srcaudio->best_effort_timestamp != AV_NOPTS_VALUE) destaudio->pts = av_rescale_q(srcaudio->best_effort_timestamp, in_astream->time_base, out_astream->time_base); else destaudio->pts = firstpts + (int)((double)audio_samples * (90000.0/samplerate)); printf("destaudio pts %"PRId64"\n", destaudio->pts); // why does this return -22 after the first successfull call? r = avcodec_encode_audio2(out_acodec, &newpkt, destaudio, &got_packet_ptr); if (r < 0) { av_strerror(r, errbuf, 128); printf("audio encode error %d %s\n", r, errbuf); } else if (got_packet_ptr) { // write frame newpkt.stream_index = out_astream->index; newpkt.flags |= AV_PKT_FLAG_KEY; printf("write audio pkt: stream %d, pts %"PRId64", dts %"PRId64"\n", newpkt.stream_index, newpkt.pts, newpkt.dts); r = av_interleaved_write_frame(out, &newpkt); if (r && (r != AVERROR(EINVAL))) { printf("audio write error %x\n", r); } } av_free(destaudio->extended_data[0]); av_free(destaudio->extended_data); av_free(destaudio); av_free_packet(&newpkt); audio_samples += nb_samples; } av_free(srcaudio); } av_free_packet(&pkt); } // Flush any remaining encoded data // encode picture av_free_packet(&pkt); printf("Flush video packets\n"); while (1) { av_init_packet(&pkt); got_packet_ptr = 0; r = avcodec_encode_video2(out_vcodec, &pkt, NULL, &got_packet_ptr); if (r < 0) { av_strerror(r, errbuf, 128); printf("video encode error %d %s\n", r, errbuf); break; } else if (got_packet_ptr) { // write packet pkt.stream_index = out_vstream->index; printf("write video pkt: stream %d, pts %"PRId64", dts %"PRId64"\n", pkt.stream_index, pkt.pts, pkt.dts); r = av_interleaved_write_frame(out, &pkt); if (r && (r != AVERROR(EINVAL))) { printf("video write error %x\n", r); } assert(!r); } else if (r == 0) { break; } av_free_packet(&pkt); } av_free_packet(&pkt); // flush audio printf("Flush audio packets\n"); while (1) { av_init_packet(&pkt); got_packet_ptr = 0; r = avcodec_encode_audio2(out_acodec, &pkt, NULL, &got_packet_ptr); if (r < 0) { av_strerror(r, errbuf, 128); printf("audio encode error %d %s\n", r, errbuf); break; } else if (got_packet_ptr) { // write packet pkt.stream_index = out_astream->index; printf("write audio pkt: stream %d, pts %"PRId64", dts %"PRId64"\n", pkt.stream_index, pkt.pts, pkt.dts); r = av_interleaved_write_frame(out, &pkt); if (r && (r != AVERROR(EINVAL))) { printf("audio write error %x\n", r); } assert(!r); } else if (r == 0) { break; } av_free_packet(&pkt); } av_free_packet(&pkt); av_free(picbuf); av_free(video_outbuf); avcodec_close(in_vcodec); avcodec_close(in_acodec); avcodec_close(out_vcodec); avcodec_close(out_acodec); // TODO: anything else to free/close? r = av_write_trailer(out); if (r) { printf("error closing output %x\n", r); } avformat_close_input(&in); printf("Wrote output file: %s\n", outfile); return 0; } -- View this message in context: http://libav-users.943685.n4.nabble.com/Example-for-recompressing-a-video-tp4655098p4655124.html Sent from the libav-users mailing list archive at Nabble.com. _______________________________________________ Libav-user mailing list Libav-user@ffmpeg.org http://ffmpeg.org/mailman/listinfo/libav-user