[Libav-user] Last audio frame missing when transcoding to H264

M N Fri, 07 Oct 2016 16:48:51 -0700

Hi,

I am doing a program to transcode .mp4 files to H264, but I am running into a 
problem which is that the last audio frame is not being written to the output 
stream, and MediaInfo gives (Duration_LastFrame: -20 ms.) I also don't know if 
it has to do with this, but Windows Media Player doesn't show the video of the 
generated mp4 file, it just plays the sound but the video is black screen 
(Checked the color space and chroma subsampling, its yuv420p.)


Here is my code:

#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libavutil/avutil.h"
#include "libavutil/rational.h"
#include "libavutil/timestamp.h"

#include <stdio.h>

static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt, 
const char *tag)
{
    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
    printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s 
duration_time:%s stream_index:%d\n\n",
           tag,
           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
           pkt->stream_index);
}

int main()
{
    av_register_all();

    av_log_set_level(AV_LOG_FATAL);


    AVFormatContext *ps = avformat_alloc_context();

    AVFormatContext *ps2 = NULL;
    AVOutputFormat *oF = av_guess_format("mp4", NULL, "video/mp4");

    FILE *gSize = fopen("vid.mp4", "rb");
    fseek(gSize, 0, SEEK_END);
    size_t iSize = ftell(gSize);
    fclose(gSize);




    if(avformat_open_input(&ps, "vid.mp4", NULL, NULL) != 0)
    {
        printf("Failed to open input file.\n");
        return -1;
    }

    avformat_alloc_output_context2(&ps2, oF, NULL, "vid2.mp4");

    avformat_find_stream_info(ps, NULL);

    AVCodecContext **pC = (AVCodecContext**)malloc(ps->nb_streams), **p2C = 
(AVCodecContext**)malloc(ps->nb_streams);

    AVStream *oStream = NULL;
    AVStream *iStream = NULL;

    AVCodec *encoder = NULL;
    AVCodec *decoder = NULL;
    AVCodecContext *strCtx = NULL;

    unsigned int i;

    avio_open(&ps2->pb, "vid2.mp4", AVIO_FLAG_WRITE);

    for(i = 0; i < ps->nb_streams; i++)
    {
        printf("%d\n", i);

        iStream = ps->streams[i];

        pC[i] = iStream->codec;


        if(pC[i]->codec_type == AVMEDIA_TYPE_UNKNOWN)
        {
            printf("Skipping bad stream\n");
            continue;
        }

        if(pC[i]->codec_type == AVMEDIA_TYPE_VIDEO || pC[i]->codec_type == 
AVMEDIA_TYPE_AUDIO)
        {
            encoder = avcodec_find_encoder(pC[i]->codec_id);
            if (!encoder)
            {
                av_log(NULL, AV_LOG_FATAL, "Necessary encoder not found\n");
                return AVERROR_INVALIDDATA;
            }

            oStream = avformat_new_stream(ps2, encoder);

            //av_dict_copy(&oStream->metadata, iStream->metadata, 0);

            strCtx = oStream->codec; //We have to set oStream->codec parameters 
for write_header to work,
                                    //since write_header only relies on the 
stream parameters.

            //avcodec_parameters_copy(oStream->codecpar, iStream->codecpar);
            //p2C[i] = oStream->codec;
            p2C[i] = avcodec_alloc_context3(encoder); //H264 codec context must 
be set using alloc_context

            //AVCodecParameters *pars = avcodec_parameters_alloc();
            //avcodec_parameters_from_context(pars, pC[i]);
            //avcodec_parameters_to_context(p2C[i], pars);

            AVDictionary *param = NULL;


            if (pC[i]->codec_type == AVMEDIA_TYPE_VIDEO)
            {
                p2C[i]->width = pC[i]->width;
                p2C[i]->height = pC[i]->height;

                if (encoder->pix_fmts)
                    p2C[i]->pix_fmt = encoder->pix_fmts[0];
                else
                    p2C[i]->pix_fmt = pC[i]->pix_fmt;

                p2C[i]->sample_rate = pC[i]->sample_rate;
                p2C[i]->sample_aspect_ratio = pC[i]->sample_aspect_ratio;
                //p2C[i]->bits_per_coded_sample = pC[i]->bits_per_coded_sample;
                //p2C[i]->bits_per_raw_sample = pC[i]->bits_per_raw_sample;
                //p2C[i]->flags = pC[i]->flags;
                //p2C[i]->flags2 = pC[i]->flags2;
                p2C[i]->time_base = pC[i]->time_base;
                //p2C[i]->bit_rate = pC[i]->bit_rate;
                //p2C[i]->bit_rate_tolerance = pC[i]->bit_rate_tolerance;
                free(p2C[i]->extradata);
                p2C[i]->extradata = (uint8_t*)malloc(pC[i]->extradata_size);
                p2C[i]->extradata = pC[i]->extradata;
                p2C[i]->extradata_size = pC[i]->extradata_size;
                p2C[i]->gop_size = pC[i]->gop_size;

                strCtx->width = pC[i]->width;
                strCtx->height = pC[i]->height;

                /*if (encoder->pix_fmts)
                    strCtx->pix_fmt = encoder->pix_fmts[0];
                else
                    strCtx->pix_fmt = pC[i]->pix_fmt;*/
                //strCtx->sample_rate = pC[i]->sample_rate;
                //strCtx->sample_aspect_ratio = pC[i]->sample_aspect_ratio;
                strCtx->time_base = pC[i]->time_base;
                free(strCtx->extradata);
                strCtx->extradata = (uint8_t*)malloc(pC[i]->extradata_size);
                strCtx->extradata = pC[i]->extradata;
                strCtx->extradata_size = pC[i]->extradata_size;

                //av_dict_set(&param, "qp", "23", 0);
                //av_opt_set(p2C[i]->priv_data, "profile", "high", (1 << 0));
                //av_opt_set(strCtx->priv_data, "profile", "high", (1 << 0));
                /*
                Change options to trade off compression efficiency against 
encoding speed. If you specify a preset, the changes it makes will be applied 
before all other parameters are applied.
                You should generally set this option to the slowest you can 
bear.
                Values available: ultrafast, superfast, veryfast, faster, fast, 
medium, slow, slower, veryslow, placebo.
                */
                //av_dict_set(&param, "preset", "placebo", 0);
                /*
                Tune options to further optimize them for your input content. 
If you specify a tuning, the changes will be applied after --preset but before 
all other parameters.
                If your source content matches one of the available tunings you 
can use this, otherwise leave unset.
                Values available: film, animation, grain, stillimage, psnr, 
ssim, fastdecode, zerolatency.
                */
                //av_dict_set(&param, "crf", "23", 0);
                //av_dict_set(&param, "coder", "1", 0);
                //av_dict_set(&param, "vprofile", "film", 0);
                //av_dict_set(&param, "tune", "zerolatency", 0);
                //av_dict_set(&param, "no-cabac", "0", 0);
                //av_dict_set(&param, "preset", "medium", 0);
            }
            else
            {
                //av_opt_set(p2C[i]->priv_data, "profile", "high", (1 << 0));
                p2C[i]->sample_rate = pC[i]->sample_rate;
                p2C[i]->sample_aspect_ratio = pC[i]->sample_aspect_ratio;
                p2C[i]->channel_layout = pC[i]->channel_layout;
                p2C[i]->channels = 
av_get_channel_layout_nb_channels(p2C[i]->channel_layout);
                // take first format from list of supported formats
                p2C[i]->sample_fmt = encoder->sample_fmts[0];
                p2C[i]->time_base = (AVRational){1, p2C[i]->sample_rate};
                p2C[i]->frame_size = pC[i]->frame_size;
                free(p2C[i]->extradata);
                p2C[i]->extradata = (uint8_t*)malloc(pC[i]->extradata_size);
                p2C[i]->extradata = pC[i]->extradata;
                p2C[i]->extradata_size = pC[i]->extradata_size;
                //p2C[i]->gop_size = pC[i]->gop_size;

                strCtx->sample_rate = pC[i]->sample_rate;
                strCtx->sample_aspect_ratio = pC[i]->sample_aspect_ratio;
                //strCtx->channel_layout = pC[i]->channel_layout;
                //strCtx->channels = 
av_get_channel_layout_nb_channels(strCtx->channel_layout);
                // take first format from list of supported formats
                //strCtx->sample_fmt = encoder->sample_fmts[0];
                strCtx->time_base = (AVRational){1, strCtx->sample_rate};
                strCtx->frame_size = pC[i]->frame_size;
                free(strCtx->extradata);
                strCtx->extradata = (uint8_t*)malloc(pC[i]->extradata_size);
                strCtx->extradata = pC[i]->extradata;
                strCtx->extradata_size = pC[i]->extradata_size;
            }

            //AVCodecParameters *par = avcodec_parameters_alloc();
            //avcodec_parameters_from_context(par, pC[i]);
            //avcodec_parameters_to_context(p2C[i], par);

            decoder = avcodec_find_decoder(pC[i]->codec_id);
            if(decoder == NULL) printf("Couldn't find decoder\n");

            int ret1 = avcodec_open2(pC[i], decoder, NULL);
            int ret2 = avcodec_open2(p2C[i], encoder, NULL);
            printf("Ret1: %d | Ret2: %d\n", ret1, ret2);

        }
        else if (pC[i]->codec_type == AVMEDIA_TYPE_UNKNOWN) {
            av_log(NULL, AV_LOG_FATAL, "Elementary stream #%d is of unknown 
type, cannot proceed\n", i);

        }
        else
        {
            //avcodec_copy_context(oStream->codec, iStream->codec);
            //printf("BUG\n");
        }
    }
    printf("done\n");

    AVDictionaryEntry *tag = NULL;
    while ((tag = av_dict_get(ps2->metadata, "", tag, AV_DICT_IGNORE_SUFFIX)))
        printf("%s=%s\n", tag->key, tag->value);

    int ret = avformat_write_header(ps2, NULL);
    char err[200];
    av_make_error_string(err, 200, ret);
    printf("Write header %d: %s\n", ret, err);
    printf("Frames in 0: %d\n", ps->streams[0]->nb_frames);
    printf("Frames in 1: %d\n", ps->streams[1]->nb_frames);
    int decoded_af = 0;
    int audio_frames = 0;
    int encoded_af = 0, encoded2_af = 0;

    int state = 0;
    int prevStream = 0;


    unsigned long long j = 0;
    for(;; ++j)
    {
        AVPacket *pkts = av_packet_alloc();
        av_init_packet(pkts);
        pkts->data = NULL;
        pkts->size = 0;
        AVPacket *pktr = av_packet_alloc();
        av_init_packet(pktr);
        pktr->data = NULL;
        pktr->size = 0;
        AVFrame *rawFrame = av_frame_alloc();




        if(av_read_frame(ps, pkts) == AVERROR_EOF)
        {
            //printf("END\n");

            if(state == 0)
            {
                state++;
                printf("Changed to state %d\n", state);
            }


        }


        int stream_index = pkts->stream_index;
        //if(prevStream != stream_index)
        prevStream = stream_index;

        if(!(ps2->flags & AVFMT_NOTIMESTAMPS))
        {
            pkts->dts = av_rescale_q(pkts->dts, 
ps->streams[stream_index]->time_base, ps2->streams[stream_index]->time_base);
            pkts->pts = av_rescale_q(pkts->pts, 
ps->streams[stream_index]->time_base, ps2->streams[stream_index]->time_base);
            pkts->duration = av_rescale_q(pkts->duration, 
ps->streams[stream_index]->time_base, ps2->streams[stream_index]->time_base);
            //pkts->pos = -1;
            //log_packet(ps2, pkts, "out");
        }
        else
        {
            pkts->dts = AV_NOPTS_VALUE;
            pkts->pts = AV_NOPTS_VALUE;
            printf("NO TIME STAMPS!\n");
        }


        //decoding
        int dret = 0, eret = 0;

        if(state == 0) avcodec_send_packet(pC[stream_index], pkts);
        else if(state == 1)
        {
            avcodec_send_packet(pC[pkts->stream_index], NULL);
            state++;
        }

        dret = avcodec_receive_frame(pC[stream_index], rawFrame);
        if(dret == 0 || state >= 3)
        {
            if(stream_index == 1) decoded_af++;
            //encoding
            if(state < 3)
            {
                rawFrame->pts = av_frame_get_best_effort_timestamp(rawFrame);
                int rets = avcodec_send_frame(p2C[stream_index], rawFrame);
                if(rets == 0 && stream_index == 1) encoded_af++;
                //if(stream_index == 1) printf("Frame: %d\n", 
p2C[stream_index]->frame_number);
            }
            else if (state == 3)
            {
                avcodec_send_frame(p2C[stream_index], NULL);
                state++;
            }

            eret = avcodec_receive_packet(p2C[stream_index], pktr);
            if(eret == 0)
            {
                if(stream_index == 1) encoded2_af++;

                while(eret == 0)
                {
                    pktr->stream_index = stream_index;
                    int retW = av_interleaved_write_frame(ps2, pktr);

                    if(retW != 0)
                    {
                        printf("Failed to write packet\n");
                        break;
                    }
                    else if(retW == 0 && stream_index == 1) audio_frames++;
                    eret = avcodec_receive_packet(p2C[stream_index], pktr);
                }
                //avcodec_flush_buffers(pC[stream_index]);
            }
            else if(eret == AVERROR_EOF)
            {
                if(stream_index == 1) printf("Audio frame failure at EOF\n");
                avcodec_flush_buffers(pC[stream_index]);
                printf("Finished\n");
                break;
            }
            else if(eret == AVERROR(EAGAIN))
            {
                if(stream_index == 1) printf("Audio frame failure at 
AVERROR(EAGAIN)\n");
                else printf("AVERROR(EAGAIN)\n");
                //continue;
                goto clean;
            }
            else
            {
                if(stream_index == 1) printf("Audio frame failure at other 
error.\n");
                printf("other error\n");
            }
        }
        else if(dret == AVERROR_EOF && state == 2)
        {
            state++;

            printf("Changed to state %d\n", state);
        }

clean:


        av_packet_free(&pkts);
        av_packet_free(&pktr);
        av_frame_free(&rawFrame);
        av_frame_unref(rawFrame);
    }

    printf("Written AF: %d\nDecoded AF: %d\nEncoded AF: %d\nEncoded2_AF: %d\n", 
audio_frames, decoded_af, encoded_af, encoded2_af);

    if(av_write_trailer(ps2) == 0) printf("Wrote trailer\n");

}
********************************************

Thanks!
_______________________________________________
Libav-user mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/libav-user

[Libav-user] Last audio frame missing when transcoding to H264

Reply via email to