[Libav-user] Remuxing sync problem: video is late of some frames.

Thu Jun 11 15:52:39 CEST 2015

Following a previous message, I have now a correct image quality but:
in the remuxed MPEG4 movie, the video is late of something like 0.3 second.

The main process is:
1) read packet from source video
2) If it's an AUDIO packet, just write it as it is
3) if it's a VIDEO packet:
	decode it,
	scale to RGB,
	draw a watermark on the image,
	scale back to YUV,
	encode and write;

Any help to solve this shift would make my day. Thank you !

You can find below the Codec initialization and the code for stylize (apply watermark) the video.

I initialize the MPEG4 codec this way:
_________________________________
    //codec found, now we param it
    o_codec_ctx->codec_id=AV_CODEC_ID_MPEG4;
    o_codec_ctx->bit_rate=in_ctx->format_ctx->streams[in_ctx->video_stream_idx]->codec->bit_rate;
    o_codec_ctx->width=in_ctx->format_ctx->streams[in_ctx->video_stream_idx]->codec->width;
    o_codec_ctx->height=in_ctx->format_ctx->streams[in_ctx->video_stream_idx]->codec->height;
    o_codec_ctx->time_base = in_ctx->format_ctx->streams[in_ctx->video_stream_idx]->codec->time_base;
    o_codec_ctx->ticks_per_frame = in_ctx->format_ctx->streams[in_ctx->video_stream_idx]->codec->ticks_per_frame;
    o_codec_ctx->sample_aspect_ratio = in_ctx->format_ctx->streams[in_ctx->video_stream_idx]->codec->sample_aspect_ratio;
    o_codec_ctx->pix_fmt = in_ctx->format_ctx->streams[in_ctx->video_stream_idx]->codec->pix_fmt;
    o_codec_ctx->gop_size=in_ctx->format_ctx->streams[in_ctx->video_stream_idx]->codec->gop_size;
    o_codec_ctx->pix_fmt=AV_PIX_FMT_YUV420P;
    // Some codecs put the wrong ticks_per_frame in (in the thousands instead of tens)
    if(av_q2d(i_codec_ctx->time_base) * i_codec_ctx->ticks_per_frame > av_q2d(in_stream->time_base) && av_q2d(in_stream->time_base) < 1.0/1000) {
        o_codec_ctx->time_base = i_codec_ctx->time_base;
        o_codec_ctx->time_base.num *= i_codec_ctx->ticks_per_frame;
    }
    else {
        o_codec_ctx->time_base = in_stream->time_base;
    }

and the code for producing the video is:
____________________________________
void vs_stylize(VS_VideoContext *in_video_ctx, VS_VideoContext *out_video_ctx, int styleID)
{
    AVStream *in_stream, *out_stream;
    AVPacket orig_pkt, styl_pkt;

    VS_Picture *yuv_pix=vs_alloc_picture(in_video_ctx, VS_PIX_FMT_YUV420P);
    VS_Picture *rgb_pix=vs_alloc_picture(in_video_ctx, VS_PIX_FMT_RGB24);

    int ret, got_something;
    int idx=0;//saved frame index in name

    av_init_packet(&orig_pkt);
    av_init_packet(&styl_pkt);

    while(1)
    {
        ret=av_read_frame(in_video_ctx->format_ctx, &orig_pkt);
        if(ret<0)
        break;

        in_stream=in_video_ctx->format_ctx->streams[orig_pkt.stream_index];
        out_stream=out_video_ctx->format_ctx->streams[orig_pkt.stream_index];

        log_packet(in_video_ctx->format_ctx, &orig_pkt, "in");

        if(in_stream->codec->codec->type==AVMEDIA_TYPE_AUDIO)
        {
            // simply copy audio packet
            orig_pkt.pts = av_rescale_q_rnd(orig_pkt.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
            orig_pkt.dts = av_rescale_q_rnd(orig_pkt.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
            orig_pkt.duration = av_rescale_q(orig_pkt.duration, in_stream->time_base, out_stream->time_base);
            orig_pkt.pos = -1;
            log_packet(in_video_ctx->format_ctx, &orig_pkt, "ORIG");
            log_packet(out_video_ctx->format_ctx, &styl_pkt, "STYL");
            //avio_flush(out_video_ctx->format_ctx->pb);
            ret = av_interleaved_write_frame(out_video_ctx->format_ctx, &orig_pkt);
            if (ret < 0) {
                fprintf(stderr, "Error muxing packet\n");
                break;
            }

        }
        else
        if(in_stream->codec->codec->type==AVMEDIA_TYPE_VIDEO)
        {
            //Decode packet (orig_packet),
            //Scale to RGB24,
            //Put a watermark on the frame,
            //Scale back to YUV420P,
            //Copy dts and pts time from original packet
            //Encode yuv frame in a new packet (styl_packet);
            ret=avcodec_decode_video2(in_stream->codec, yuv_pix->av_frame, &got_something, &orig_pkt);
            if(got_something!=0)
            {
                rgb_pix->av_frame=YUVtoRGB(yuv_pix->av_frame, in_stream->codec);
                waterMark(rgb_pix->av_frame, in_stream->codec);
                yuv_pix->av_frame=RGBtoYUV(rgb_pix->av_frame, in_stream->codec);
                avcodec_encode_video2(out_stream->codec, &styl_pkt, yuv_pix->av_frame, &got_something);
                if(!got_something)
                {
                    INFO(stderr, ":-( Unable to encode yuv frame.\n");
                    exit(0);
                }      
            }

            //copy timestamps
            //Note: dans transcodeing .c they use av_packet_rescale_ts()
            styl_pkt.pts = av_rescale_q_rnd(orig_pkt.pts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
            styl_pkt.dts = av_rescale_q_rnd(orig_pkt.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
            styl_pkt.duration = av_rescale_q(orig_pkt.duration, in_stream->time_base, out_stream->time_base);
            styl_pkt.pos = -1;
            log_packet(in_video_ctx->format_ctx, &orig_pkt, "ORIG");
            log_packet(out_video_ctx->format_ctx, &styl_pkt, "STYL");

            ret = av_interleaved_write_frame(out_video_ctx->format_ctx, &styl_pkt);
            if (ret < 0) {
                fprintf(stderr, "Error muxing packet\n");
                break;
            }
        }
        av_free_packet(&orig_pkt);
        av_free_packet(&styl_pkt);
    }
    av_write_trailer(out_video_ctx->format_ctx);

    vs_free(in_video_ctx);//free and close everything
    vs_free(out_video_ctx);
}