<div dir="ltr"><div>







<p class="">Hello,<br><br>I'm pulling out my hair trying to get a h264 and aac (raw with no adts) into a quicktime container (or mp4 - same results).  The video works fine, but the audio does not play at all in quicktime.  In fact when i view media information it only shows h264 and there is no entry for AAC at all.<br><br>The same video works fine in VLC and my time stamps are all correct.  I have already tried rescaling the timestamps or increasing them in even 1/fps steps (as a test of course) and qt still doesn't see the audio.</p>
<p class="">See code below:</p></div><div><br></div><div>// Video has a uin64_t ntp timestamp, so the numerator would usually be 1 and the denominator would usually be 2^32</div><div>// Because ffmpeg uses an int64 / int32 we need to divide the 64bit ntp ts by at least 2 to handle the sign bit.</div><div>// I'll divide it by 65536 (and lose some precision) so we have enough space in the numerator to store a long video</div><div>double iCMTimeNumeratorDivider = 0x10000;</div><div>double iCMTimeDenomentator     = 4294967296/iCMTimeNumeratorDivider;</div><div><br></div><div>struct connection {<span class="" style="white-space:pre">                                          </span>// Convenient way to pass all the connection vars between functions</div><div><span class="" style="white-space:pre">        </span>AVFormatContext *oc;<span class="" style="white-space:pre">                              </span>// AV Output context</div><div><span class="" style="white-space:pre">       </span>int FFaudioStreamIndex;</div><div><span class="" style="white-space:pre">    </span>int FFvideoStreamIndex;</div><div><span class="" style="white-space:pre">    </span></div><div><span class="" style="white-space:pre">   </span>// Stream</div><div><span class="" style="white-space:pre">  </span>struct timeval <span class="" style="white-space:pre">   </span>streamStartTime;<span class="" style="white-space:pre">          </span>// The time we received the first packet</div><div><span class="" style="white-space:pre">   </span></div><div><span class="" style="white-space:pre">   </span>// Video</div><div><span class="" style="white-space:pre">   </span>uint64_t <span class="" style="white-space:pre">         </span>firstVideoTimeStamp;<span class="" style="white-space:pre">      </span>// Initial timestamp to calculate offsets</div><div><br></div><div><span class="" style="white-space:pre"> </span>// Audio</div><div><span class="" style="white-space:pre">   </span>bool<span class="" style="white-space:pre">                      </span>firstAudioPacket;<span class="" style="white-space:pre">         </span>// Signal this is the first audio packet (new connection)</div><div><span class="" style="white-space:pre">  </span>uint16_t <span class="" style="white-space:pre">         </span>prevAudioSequence;<span class="" style="white-space:pre">                </span>// Previous Audio sequence number</div><div><span class="" style="white-space:pre">  </span>uint64_t<span class="" style="white-space:pre">          </span>longAudioTimeStamp;<span class="" style="white-space:pre">               </span>// 64 bit timestamp</div><div><span class="" style="white-space:pre">        </span></div><div>};</div><div><br></div><div>// When the data arrives on a new connection i create the file and add the streams</div><div>void createFile(struct connection *c, unsigned char *configData, int configLen) {</div><div>//av_log_set_level(AV_LOG_DEBUG);</div><div>av_log_set_level(AV_LOG_TRACE);</div><div> </div><div><span class="" style="white-space:pre"> </span>char <span class="" style="white-space:pre">     </span>filename[255];<span class="" style="white-space:pre">                            </span>// filename</div><div><span class="" style="white-space:pre">        </span>strcpy(filename, "test.mov");</div><div><span class="" style="white-space:pre">    </span></div><div><span class="" style="white-space:pre">   </span>avformat_alloc_output_context2(&c->oc, NULL, NULL, filename);</div><div><span class="" style="white-space:pre">       </span>if (!c->oc)</div><div>        terminateGracefully(c, "Could not create output context (libavformat).");</div><div><br></div><div><span class="" style="white-space:pre">      </span>// Setup the video stream<span class="" style="white-space:pre">         </span></div><div><span class="" style="white-space:pre">   </span>uint32_t streamWidth = 0;</div><div><span class="" style="white-space:pre">  </span>uint32_t streamHeight = 0;</div><div><span class="" style="white-space:pre"> </span>decodeSPS(c, configData, &streamWidth, &streamHeight);<span class="" style="white-space:pre">            </span>// decode the SPS and extract the stream width and height</div><div><span class="" style="white-space:pre">  </span></div><div><span class="" style="white-space:pre">   </span>AVStream *videoStream = avformat_new_stream(c->oc, NULL);</div><div><span class="" style="white-space:pre">       </span>if (!videoStream)</div><div>        terminateGracefully(c, "ERROR creating video stream.");</div><div><br></div><div><span class="" style="white-space:pre">     </span>c->FFvideoStreamIndex = c->oc->nb_streams-1;</div><div><span class="" style="white-space:pre">      </span>videoStream->id = c->FFvideoStreamIndex;</div><div>    videoStream->time_base.num = 1;</div><div>    videoStream->time_base.den = iCMTimeDenomentator;<span class="" style="white-space:pre">                    </span>// Denominator is now 2^32 divided by 65536, i'll divide pts by 65536 too</div><div><br></div><div>    videoStream->codec->codec_type = AVMEDIA_TYPE_VIDEO;</div><div>    videoStream->codec->codec_id = AV_CODEC_ID_H264;</div><div>    videoStream->codec->width = streamWidth;</div><div>    videoStream->codec->height = streamHeight;</div><div><span class="" style="white-space:pre">   </span>//videoStream->codec->pix_fmt = AV_PIX_FMT_YUV420P16;</div><div><br></div><div><span class="" style="white-space:pre">       </span>// Copy extra data</div><div><span class="" style="white-space:pre"> </span>videoStream->codec->extradata = (uint8_t*)av_mallocz(configLen);</div><div><span class="" style="white-space:pre">     </span>videoStream->codec->extradata_size = configLen;</div><div><span class="" style="white-space:pre">      </span>memcpy(videoStream->codec->extradata, configData, configLen);</div><div><br></div><div><span class="" style="white-space:pre">       </span>// Setup the audio stream</div><div><span class="" style="white-space:pre">  </span>AVStream *audioStream = avformat_new_stream(c->oc, NULL);</div><div><span class="" style="white-space:pre">       </span>if (!audioStream)</div><div>        terminateGracefully(c, "ERROR creating audio stream.");</div><div><br></div><div><span class="" style="white-space:pre">     </span>c->FFaudioStreamIndex = c->oc->nb_streams-1;<span class="" style="white-space:pre">                             </span>// 16 bit 44,100Hz AAC</div><div><span class="" style="white-space:pre">     </span>audioStream->id = c->FFaudioStreamIndex;</div><div><span class="" style="white-space:pre">     </span>audioStream->time_base.num = 1;</div><div><span class="" style="white-space:pre"> </span>audioStream->time_base.den = 44100;</div><div><br></div><div><span class="" style="white-space:pre">    </span>audioStream->codec->codec_type = AVMEDIA_TYPE_AUDIO;</div><div><span class="" style="white-space:pre"> </span>audioStream->codec->codec_id = AV_CODEC_ID_AAC;</div><div><span class="" style="white-space:pre">      </span>audioStream->codec->sample_rate = 44100;</div><div><br></div><div><span class="" style="white-space:pre">    </span>audioStream->codec->channel_layout = AV_CH_LAYOUT_STEREO;</div><div><span class="" style="white-space:pre">    </span>audioStream->codec->sample_fmt = AV_SAMPLE_FMT_S16;</div><div><span class="" style="white-space:pre">  </span>audioStream->codec->channels = 2;</div><div><span class="" style="white-space:pre">    </span>audioStream->codec->frame_size = 1024;</div><div><span class="" style="white-space:pre">       </span>audioStream->codec->profile = FF_PROFILE_AAC_LOW;</div><div>//<span class="" style="white-space:pre">  </span>audioStream->codec->profile = FF_PROFILE_AAC_MAIN;</div><div><br></div><div><span class="" style="white-space:pre">  </span>// We need global flags for qt</div><div>    if (c->oc->oformat->flags & AVFMT_GLOBALHEADER) {</div><div>        videoStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;</div><div>        audioStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;</div><div>    }</div><div><br></div><div><span class="" style="white-space:pre">     </span>// Create the file</div><div>    if (!(c->oc->oformat->flags & AVFMT_NOFILE))</div><div>        if (avio_open(&c->oc->pb, c->oc->filename, AVIO_FLAG_WRITE) < 0)</div><div><span class="" style="white-space:pre">                     </span>terminateGracefully(c, "Error opening file");</div><div><br></div><div><span class="" style="white-space:pre">   </span>// Write the headers</div><div><span class="" style="white-space:pre">       </span>AVDictionary * av_opts = NULL;</div><div><span class="" style="white-space:pre">     </span>av_dict_set(&av_opts, "movflags", "faststart+disable_chpl", 0);</div><div><span class="" style="white-space:pre">    </span>avformat_write_header(c->oc, &av_opts); //, NULL);</div><div><br></div><div><span class="" style="white-space:pre"> </span>av_dump_format(c->oc, 0, filename, 1);</div><div><span class="" style="white-space:pre">  </span>printf("Video stream index %d, id = %d\n", videoStream->index, videoStream->id);</div><div><span class="" style="white-space:pre">   </span>printf("Audio stream index %d, id = %d\n", audioStream->index, audioStream->id);</div><div>}</div><div><br></div><div>void processAudio(struct connection *c, uint32_t len) {</div><div><span class="" style="white-space:pre">    </span>if (c->streamStartTime.tv_sec == 0) gettimeofday(&c->streamStartTime, NULL);<span class="" style="white-space:pre">            </span>// Setup the stream start time to 'NOW' in system time if it's not already set</div><div><br></div><div><span class="" style="white-space:pre">        </span>// Audio data has a rtp header</div><div><span class="" style="white-space:pre">     </span>// 80 96 = Version(2) | Padding (1) | Extension (1) | CC/CSRC count (4) | Marker (1) | Payload Type (7)</div><div><span class="" style="white-space:pre">    </span>// b191 = sequence number</div><div><span class="" style="white-space:pre">  </span>// f7 79 16 c2 = Timestamp</div><div><span class="" style="white-space:pre"> </span>// e8 bb 6b 2c = Sync source identifer</div><div><span class="" style="white-space:pre">     </span>// 00 00 00 00 = SSRC</div><div><span class="" style="white-space:pre">      </span>// 00 00 00 00 = CSRC</div><div><span class="" style="white-space:pre">      </span>uint32_t seq = (c->audioBuffer[2]<<8 | c->audioBuffer[3]);<span class="" style="white-space:pre">    </span>// Get reversed uint16 seq</div><div><br></div><div><span class="" style="white-space:pre">        </span>// If this is the </div><div><span class="" style="white-space:pre">        </span>// Audio has an rtp timestamp. So i'll use NOW (in system time) minus the video start time (in system time) as an index of how much time has passed till now</div><div><span class="" style="white-space:pre">   </span>// Then store that as increments of timebase to setup the initial timestamp offset for the audio</div><div><br></div><div><span class="" style="white-space:pre">  </span>if (c->firstAudioPacket) {<span class="" style="white-space:pre">                             </span>// Init previous sequence on first packet</div><div><span class="" style="white-space:pre">          </span>c->firstAudioPacket = false;</div><div><span class="" style="white-space:pre">            </span>c->prevAudioSequence = seq;</div><div><span class="" style="white-space:pre">             </span></div><div><span class="" style="white-space:pre">           </span>// Init the audio timestamp to 'NOW'</div><div><span class="" style="white-space:pre">               </span>struct timeval timeNow;</div><div><span class="" style="white-space:pre">            </span>gettimeofday(&timeNow,NULL);<span class="" style="white-space:pre">          </span></div><div><span class="" style="white-space:pre">           </span>c->longAudioTimeStamp = (timeNow.tv_sec - c->streamStartTime.tv_sec)*44100 + (timeNow.tv_usec - c->streamStartTime.tv_usec)*0.044100;</div><div><span class="" style="white-space:pre">     </span>}</div><div><span class="" style="white-space:pre">  </span>c->longAudioTimeStamp += ( (seq - c->prevAudioSequence)*1024 );<span class="" style="white-space:pre">             </span>// Increment long ts in sample size increments</div><div><span class="" style="white-space:pre">     </span>c->prevAudioSequence = seq;<span class="" style="white-space:pre">                                                                                    </span>// Store this as the previous ts</div><div><br></div><div><span class="" style="white-space:pre">  </span>len -= 12;<span class="" style="white-space:pre">                                                                                                        </span>// Strip header</div><div><span class="" style="white-space:pre">    </span>AVPacket packet;</div><div><span class="" style="white-space:pre">   </span>av_init_packet(&packet);<span class="" style="white-space:pre">                                      </span>// Init the packet with defaults</div><div><span class="" style="white-space:pre">   </span>packet.pts = c->longAudioTimeStamp;<span class="" style="white-space:pre">                    </span>// Calculated offset pts</div><div><span class="" style="white-space:pre">   </span>packet.dts = AV_NOPTS_VALUE;</div><div><br></div><div>//<span class="" style="white-space:pre">    </span>packet.duration = 1024;</div><div><span class="" style="white-space:pre">    </span>packet.stream_index = c->FFaudioStreamIndex;<span class="" style="white-space:pre">   </span>// Audio stream index</div><div><span class="" style="white-space:pre">      </span>packet.data =  c->daudioBuffer;<span class="" style="white-space:pre">                                       </span>// Set the data pointer</div><div><span class="" style="white-space:pre">    </span>packet.size =  len;<span class="" style="white-space:pre">                                                              </span>// Set the length</div><div><br></div><div><span class="" style="white-space:pre"> </span>printf("----------> Audio: %f\n", (double)packet.pts/(double)c->oc->streams[c->FFaudioStreamIndex]->time_base.den);</div><div><span class="" style="white-space:pre">       </span>av_interleaved_write_frame(c->oc, &packet);<span class="" style="white-space:pre">                                                                </span>// Write it interleaved</div><div><span class="" style="white-space:pre">    </span>avio_flush(c->oc->pb);</div><div>}</div><div><br></div><div>void processVideo(struct connection *c, uint32_t len) {</div><div><br></div><div>...</div><div><br></div><div><span class="" style="white-space:pre">            </span>if (c->streamStartTime.tv_sec == 0) gettimeofday(&c->streamStartTime, NULL);<span class="" style="white-space:pre">            </span>// Setup the stream start time if it's not already set</div><div><br></div><div><span class="" style="white-space:pre">                </span>uint64_t ts = *(uint64_t *)&c->videoBuffer[o + 8];</div><div><br></div><div><span class="" style="white-space:pre">         </span>// Setup initial ts to calculate offsets</div><div><span class="" style="white-space:pre">           </span>if (c->firstVideoTimeStamp == 0) {</div><div><span class="" style="white-space:pre">                      </span>struct timeval timeNow;</div><div><span class="" style="white-space:pre">                    </span>gettimeofday(&timeNow, NULL);</div><div><span class="" style="white-space:pre">                  </span></div><div><span class="" style="white-space:pre">                   </span>c->firstVideoTimeStamp = ts - (timeNow.tv_sec - c->streamStartTime.tv_sec)*4294967296 - (timeNow.tv_usec - c->streamStartTime.tv_usec)*4294.967296;</div><div><span class="" style="white-space:pre">               </span>}</div><div><br></div><div><span class="" style="white-space:pre">         </span>AVPacket packet;</div><div><span class="" style="white-space:pre">           </span>av_init_packet(&packet);<span class="" style="white-space:pre">                                                                                              </span>// Init the packet with defaults</div><div><span class="" style="white-space:pre">           </span>packet.pts = (ts - c->firstVideoTimeStamp)/iCMTimeNumeratorDivider;<span class="" style="white-space:pre">            </span>// Calculate offset pts</div><div><span class="" style="white-space:pre">            </span>packet.dts = AV_NOPTS_VALUE;</div><div><span class="" style="white-space:pre">               </span>packet.stream_index = c->FFvideoStreamIndex;<span class="" style="white-space:pre">                                                   </span>// Video stream index</div><div><span class="" style="white-space:pre">              </span>packet.data =  &c->videoBuffer[o + NALU_HEADER_LEN];<span class="" style="white-space:pre">                                                                              </span>// Set the data pointer</div><div><span class="" style="white-space:pre">            </span>packet.size =  len;<span class="" style="white-space:pre">                                                                                                              </span>// Set the length</div><div><br></div><div>printf("Video: %lu, %f\n", ts, (double)packet.pts/(double)c->oc->streams[c->FFvideoStreamIndex]->time_base.den);</div><div><br></div><div><span class="" style="white-space:pre">           </span>av_interleaved_write_frame(c->oc, &packet);<span class="" style="white-space:pre">                                                                </span>// Write it interleaved</div><div><span class="" style="white-space:pre">            </span>avio_flush(c->oc->pb);<span class="" style="white-space:pre">                                                                                              </span>// Flush to disk (for testing)</div><div><br></div><div>...</div><div>}</div></div>