[FFmpeg-devel] [PATCH 2/2 v2] avformat/movenc: parse h264 packets to build Sync Sample and Recovery Point tables
James Almer
jamrial at gmail.com
Sat Jul 31 06:01:14 EEST 2021
On 7/29/2021 9:49 PM, Andreas Rheinhardt wrote:
> James Almer:
>> Since we can't blindly trust the keyframe flag in packets and assume its
>> contents are a valid Sync Sample, do some basic bitstream parsing to build the
>> Sync Sample table in addition to a Random Access Recovery Point table.
>>
>> Suggested-by: ffmpeg at fb.com
>> Signed-off-by: James Almer <jamrial at gmail.com>
>> ---
>> libavformat/movenc.c | 142 +++++++++++++++++++++++++++++++++--
>> libavformat/movenc.h | 1 +
>> tests/ref/lavf-fate/h264.mp4 | 6 +-
>> 3 files changed, 139 insertions(+), 10 deletions(-)
>>
>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>> index fedc9c0e18..01ab2f43a6 100644
>> --- a/libavformat/movenc.c
>> +++ b/libavformat/movenc.c
>> @@ -34,13 +34,17 @@
>> #include "avc.h"
>> #include "libavcodec/ac3_parser_internal.h"
>> #include "libavcodec/dnxhddata.h"
>> +#include "libavcodec/h264.h"
>> +#include "libavcodec/h2645_parse.h"
>> #include "libavcodec/flac.h"
>> #include "libavcodec/get_bits.h"
>> +#include "libavcodec/golomb.h"
>>
>> #include "libavcodec/internal.h"
>> #include "libavcodec/put_bits.h"
>> #include "libavcodec/vc1_common.h"
>> #include "libavcodec/raw.h"
>> +#include "libavcodec/sei.h"
>> #include "internal.h"
>> #include "libavutil/avstring.h"
>> #include "libavutil/channel_layout.h"
>> @@ -2537,7 +2541,9 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
>> if (!sgpd_entries)
>> return AVERROR(ENOMEM);
>>
>> - av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC);
>> + av_assert0(track->par->codec_id == AV_CODEC_ID_OPUS ||
>> + track->par->codec_id == AV_CODEC_ID_AAC ||
>> + track->par->codec_id == AV_CODEC_ID_H264);
>>
>> if (track->par->codec_id == AV_CODEC_ID_OPUS) {
>> for (i = 0; i < track->entry; i++) {
>> @@ -2566,6 +2572,18 @@ static int mov_preroll_write_stbl_atoms(AVIOContext *pb, MOVTrack *track)
>> sgpd_entries[entries].group_description_index = distance ? ++group : 0;
>> }
>> }
>> + } else if (track->par->codec_id == AV_CODEC_ID_H264) {
>> + for (i = 0; i < track->entry; i++) {
>> + int distance = track->cluster[i].roll_distance;
>> + if (i && distance == sgpd_entries[entries].roll_distance) {
>> + sgpd_entries[entries].count++;
>> + } else {
>> + entries++;
>> + sgpd_entries[entries].count = 1;
>> + sgpd_entries[entries].roll_distance = distance;
>> + sgpd_entries[entries].group_description_index = distance ? ++group : 0;
>> + }
>> + }
>> } else {
>> entries++;
>> sgpd_entries[entries].count = track->sample_count;
>> @@ -2639,7 +2657,9 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVMuxContext
>> if (track->cenc.aes_ctr) {
>> ff_mov_cenc_write_stbl_atoms(&track->cenc, pb);
>> }
>> - if (track->par->codec_id == AV_CODEC_ID_OPUS || track->par->codec_id == AV_CODEC_ID_AAC) {
>> + if (track->par->codec_id == AV_CODEC_ID_OPUS ||
>> + track->par->codec_id == AV_CODEC_ID_AAC ||
>> + track->par->codec_id == AV_CODEC_ID_H264) {
>> mov_preroll_write_stbl_atoms(pb, track);
>> }
>> return update_size(pb, pos);
>> @@ -5150,6 +5170,107 @@ static int mov_parse_mpeg2_frame(AVPacket *pkt, uint32_t *flags)
>> return 0;
>> }
>>
>> +static int mov_parse_h264_frame(AVPacket *pkt, MOVTrack *trk, uint8_t *reformatted_data,
>> + int size)
>> +{
>> + const uint8_t *buf, *buf_end, *sei_buf = NULL;
>> + uint32_t state = -1;
>> + unsigned roll_distance = 0;
>> + int nal_length_size = 0, nalsize;
>> + int ret, idr = 0;
>> +
>> + if (!size)
>> + return 0;
>> + if (trk->vos_data && trk->vos_data[0] == 1) {
>> + if (trk->vos_len < 5)
>> + return 0;
>> + nal_length_size = (trk->vos_data[4] & 0x03) + 1;
>> + buf = pkt->data;
>> + buf_end = pkt->data + size;
>> + } else {
>> + nal_length_size = 4;
>> + buf = reformatted_data;
>> + buf_end = reformatted_data + size;
>> + }
>> +
>> + while (buf_end - buf >= 4) {
>> + int i = 0;
>
> This variable can use case H264_NAL_SEI scope.
It's used by get_nalsize() in the following line.
>
>> + nalsize = get_nalsize(nal_length_size, buf, buf_end - buf, &i, NULL);
>> + if (nalsize < 0)
>> + break;
>> + state = buf[nal_length_size];
>> + buf += nal_length_size + 1;
>> + nalsize--;
>> +
>> + switch (state & 0x1f) {
>> + case H264_NAL_IDR_SLICE:
>> + idr = 1;
>> + // fall-through
>> + case H264_NAL_SLICE:
>> + goto end;
>
> Given that you are now only parsing until you have the first VLC slice I
> wonder whether it is really advantageous to reuse the already
> transformed packet; after all, ff_avc_parse_nal_units_buf() has an
> allocation and a memcpy more than ff_avc_parse_nal_units() which
> probably outweighs any savings in this function given that SEIs are so tiny.
> Maybe the helper function from my old patchset
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/20200709191805.21648-1-andreas.rheinhardt@gmail.com/
> (which I never pushed because I feared that I was too strict) could be
> helpful? (But one would probably have to add a new parameter to allow to
> peek into the future (i.e. see what the next NALU will be); otherwise
> one would needlessly try to find the end of the first VLC NALU.
Feel free to optimize it if you commit that function or a similar one.
>
>> + case H264_NAL_SEI: {
>> + int sei_size;
>> +
>> + sei_buf = ff_nal_unit_extract_rbsp(buf, nalsize, &sei_size, 0);
>> + if (!sei_buf)
>> + goto end;
>> +
>> + i = 0;
>> + do {
>> + GetBitContext gb;
>> + int payload_type = 0, payload_size = 0;
>> +
>> + do {
>> + if (sei_size - i < 1)
>
> sei_size <= i seems more natural.
Will change.
>
>> + goto end;
>> + payload_type += sei_buf[i];
>> + } while (sei_buf[i++] == 255);
>> + do {
>> + if (sei_size - i < 1)
>> + goto end;
>> + payload_size += sei_buf[i];
>> + } while (sei_buf[i++] == 255);
>> +
>> + if (payload_size > sei_size - i)
>> + goto end;
>> +
>> + ret = init_get_bits8(&gb, sei_buf + i, payload_size);
>> + if (ret < 0)
>> + goto end;
>> +
>> + switch (payload_type) {
>> + case SEI_TYPE_RECOVERY_POINT:
>> + roll_distance = get_ue_golomb_long(&gb);
>> + break;
>> + default:
>> + break;
>> + }
>> + i += payload_size;
>> + } while (sei_size - i > 0 && sei_buf[i] != 0x80);
>> + av_freep(&sei_buf);
>> + break;
>> + }
>> + default:
>> + break;
>> + }
>> + buf += nalsize;
>> + }
>> +
>> +end:
>> + if (roll_distance != (int16_t)roll_distance)
>> + roll_distance = 0;
>> + trk->cluster[trk->entry].roll_distance = roll_distance;
>> +
>> + if (idr) {
>> + trk->cluster[trk->entry].flags |= MOV_SYNC_SAMPLE;
>> + trk->has_keyframes++;
>> + }
>> +
>> + av_freep(&sei_buf);
>> +
>> + return 0;
>> +}
>> +
>> static void mov_parse_vc1_frame(AVPacket *pkt, MOVTrack *trk)
>> {
>> const uint8_t *start, *next, *end = pkt->data + pkt->size;
>> @@ -5638,13 +5759,17 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>> avio_write(pb, reformatted_data, size);
>> } else {
>> if (trk->cenc.aes_ctr) {
>> - size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size);
>> - if (size < 0) {
>> - ret = size;
>> + ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size);
>> + if (ret < 0)
>> + return ret;
>> + ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, 4, pb, reformatted_data, size);
>> + if (ret < 0)
>> goto err;
>> - }
>> } else {
>> - size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size);
>> + ret = ff_avc_parse_nal_units_buf(pkt->data, &reformatted_data, &size);
>> + if (ret < 0)
>> + return ret;
>
> You can factor the call to ff_avc_parse_nal_units_buf() out; in fact,
> not only the two branches seen here do this, but the code immediately
> above this does it as well: It even coincides with what is here.
Ah, good catch, will merge them.
>
>> + avio_write(pb, reformatted_data, size);
>> }
>> }
>> } else if (par->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 &&
>> @@ -5740,6 +5865,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>> trk->cluster[trk->entry].entries = samples_in_chunk;
>> trk->cluster[trk->entry].dts = pkt->dts;
>> trk->cluster[trk->entry].pts = pkt->pts;
>> + trk->cluster[trk->entry].roll_distance = 0;
>> if (!trk->entry && trk->start_dts != AV_NOPTS_VALUE) {
>> if (!trk->frag_discont) {
>> /* First packet of a new fragment. We already wrote the duration
>> @@ -5821,6 +5947,8 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
>>
>> if (par->codec_id == AV_CODEC_ID_VC1) {
>> mov_parse_vc1_frame(pkt, trk);
>> + } else if (par->codec_id == AV_CODEC_ID_H264 && !TAG_IS_AVCI(trk->tag)) {
>> + mov_parse_h264_frame(pkt, trk, reformatted_data, size);
>> } else if (par->codec_id == AV_CODEC_ID_TRUEHD) {
>> mov_parse_truehd_frame(pkt, trk);
>> } else if (pkt->flags & AV_PKT_FLAG_KEY) {
>> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
>> index af1ea0bce6..73bf73ce8f 100644
>> --- a/libavformat/movenc.h
>> +++ b/libavformat/movenc.h
>> @@ -56,6 +56,7 @@ typedef struct MOVIentry {
>> #define MOV_PARTIAL_SYNC_SAMPLE 0x0002
>> #define MOV_DISPOSABLE_SAMPLE 0x0004
>> uint32_t flags;
>> + int16_t roll_distance;
>> AVProducerReferenceTime prft;
>> } MOVIentry;
>>
>> diff --git a/tests/ref/lavf-fate/h264.mp4 b/tests/ref/lavf-fate/h264.mp4
>> index a9c3823c2c..c08ee4c7ae 100644
>> --- a/tests/ref/lavf-fate/h264.mp4
>> +++ b/tests/ref/lavf-fate/h264.mp4
>> @@ -1,3 +1,3 @@
>> -fe299ea5205b71a48281f917b1256a5d *tests/data/lavf-fate/lavf.h264.mp4
>> -547928 tests/data/lavf-fate/lavf.h264.mp4
>> -tests/data/lavf-fate/lavf.h264.mp4 CRC=0x9da2c999
>> +2812f617314d23474fcb23898b8a56ab *tests/data/lavf-fate/lavf.h264.mp4
>> +548084 tests/data/lavf-fate/lavf.h264.mp4
>> +tests/data/lavf-fate/lavf.h264.mp4 CRC=0x396f0829
>>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".
>
More information about the ffmpeg-devel
mailing list