[FFmpeg-devel] [PATCH 2/2] avformat/movenc: add support for fragmented TTML muxing

Fri Dec 23 14:51:57 EET 2022

From: Jan Ekström <jan.ekstrom at 24i.com>

Attempts to base the fragmentation timing on other streams
as most receivers expect media fragments to be more or less
aligned.

Currently does not support fragmentation on subtitle track
only, as the subtitle packet queue timings would have to be
checked in addition to the current fragmentation timing logic.

Signed-off-by: Jan Ekström <jan.ekstrom at 24i.com>
---
 libavformat/movenc.c      |   9 ---
 libavformat/movenc_ttml.c | 163 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 159 insertions(+), 13 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 7d49892283..e9a7984f8a 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -7188,15 +7188,6 @@ static int mov_init(AVFormatContext *s)
                 track->squash_fragment_samples_to_one =
                     ff_is_ttml_stream_paragraph_based(track->par);
 
-                if (mov->flags & FF_MOV_FLAG_FRAGMENT &&
-                    track->squash_fragment_samples_to_one) {
-                    av_log(s, AV_LOG_ERROR,
-                           "Fragmentation is not currently supported for "
-                           "TTML in MP4/ISMV (track synchronization between "
-                           "subtitles and other media is not yet implemented)!\n");
-                    return AVERROR_PATCHWELCOME;
-                }
-
                 if (track->mode != MODE_ISM &&
                     track->par->codec_tag == MOV_ISMV_TTML_TAG &&
                     s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
diff --git a/libavformat/movenc_ttml.c b/libavformat/movenc_ttml.c
index 6deae49657..27ec7d9487 100644
--- a/libavformat/movenc_ttml.c
+++ b/libavformat/movenc_ttml.c
@@ -54,6 +54,50 @@ static int mov_init_ttml_writer(MOVTrack *track, AVFormatContext **out_ctx)
     return 0;
 }
 
+static void mov_calculate_start_and_end_of_other_tracks(
+    AVFormatContext *s, MOVTrack *track, int64_t *start_pts, int64_t *end_pts)
+{
+    MOVMuxContext *mov = s->priv_data;
+
+    // Initialize at the end of the previous document/fragment, which is NOPTS
+    // until the first fragment is created.
+    int64_t max_track_end_dts = *start_pts = track->end_pts;
+
+    for (unsigned int i = 0; i < s->nb_streams; i++) {
+        MOVTrack *other_track = &mov->tracks[i];
+
+        // Skip our own track, any other track that needs squashing,
+        // or any track which still has its start_dts at NOPTS or
+        // any track that did not yet get any packets.
+        if (track == other_track ||
+            other_track->squash_fragment_samples_to_one ||
+            other_track->start_dts == AV_NOPTS_VALUE ||
+            !other_track->entry) {
+            continue;
+        }
+
+        {
+            int64_t picked_start = av_rescale_q_rnd(other_track->cluster[0].dts + other_track->cluster[0].cts,
+                                                    other_track->st->time_base,
+                                                    track->st->time_base,
+                                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
+            int64_t picked_end   = av_rescale_q_rnd(other_track->end_pts,
+                                                    other_track->st->time_base,
+                                                    track->st->time_base,
+                                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
+
+            if (*start_pts == AV_NOPTS_VALUE)
+                *start_pts = picked_start;
+            else if (picked_start >= track->end_pts)
+                *start_pts = FFMIN(*start_pts, picked_start);
+
+            max_track_end_dts = FFMAX(max_track_end_dts, picked_end);
+        }
+    }
+
+    *end_pts = max_track_end_dts;
+}
+
 static int mov_write_ttml_document_from_queue(AVFormatContext *s,
                                               AVFormatContext *ttml_ctx,
                                               MOVTrack *track,
@@ -65,13 +109,87 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
     int64_t start_ts = track->start_dts == AV_NOPTS_VALUE ?
                        0 : (track->start_dts + track->track_duration);
     int64_t end_ts   = start_ts;
+    unsigned int time_limited = 0;
+    PacketList back_to_queue_list = { 0 };
+
+    if (*out_start_ts != AV_NOPTS_VALUE) {
+        // we have non-nopts values here, thus we have been given a time range
+        time_limited = 1;
+        start_ts = *out_start_ts;
+        end_ts   = *out_start_ts + *out_duration;
+    }
 
     if ((ret = avformat_write_header(ttml_ctx, NULL)) < 0) {
         return ret;
     }
 
     while (!avpriv_packet_list_get(&track->squashed_packet_queue, pkt)) {
-        end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
+        unsigned int stop_at_current_packet = 0;
+        int64_t pts_before      = pkt->pts;
+        int64_t duration_before = pkt->duration;
+
+        if (time_limited) {
+            // special cases first:
+            if (pkt->pts + pkt->duration < start_ts) {
+                // too late for our fragment, unfortunately
+                // unref and proceed to next packet in queue.
+                av_log(s, AV_LOG_WARNING,
+                       "Very late TTML packet in queue, dropping packet with "
+                       "pts: %"PRId64", duration: %"PRId64"\n",
+                       pkt->pts, pkt->duration);
+                av_packet_unref(pkt);
+                goto next_iteration;
+            } else if (pkt->pts >= end_ts) {
+                // starts after this fragment, put back to original queue
+                ret = avpriv_packet_list_put(&track->squashed_packet_queue,
+                                             pkt, av_packet_ref,
+                                             FF_PACKETLIST_FLAG_PREPEND);
+                if (ret < 0)
+                    goto cleanup;
+
+                stop_at_current_packet = 1;
+                goto next_iteration;
+            }
+
+            // limit packet pts to start_ts
+            if (pkt->pts < start_ts) {
+                pkt->duration -= start_ts - pkt->pts;
+                pkt->pts = start_ts;
+            }
+
+            if (pkt->pts + pkt->duration > end_ts) {
+                // goes over our current fragment, create duplicate and
+                // put it back to list after iteration has finished in
+                // order to handle multiple subtitles at the same time.
+                int64_t offset = end_ts - pkt->pts;
+
+                ret = avpriv_packet_list_put(&back_to_queue_list,
+                                             pkt, av_packet_ref,
+                                             FF_PACKETLIST_FLAG_PREPEND);
+                if (ret < 0)
+                    goto cleanup;
+
+                back_to_queue_list.head->pkt.pts =
+                back_to_queue_list.head->pkt.dts =
+                back_to_queue_list.head->pkt.pts + offset;
+                back_to_queue_list.head->pkt.duration -= offset;
+
+                // and for our normal packet we just set duration to offset
+                pkt->duration = offset;
+            }
+        } else {
+            end_ts = FFMAX(end_ts, pkt->pts + pkt->duration);
+        }
+
+        av_log(s, AV_LOG_TRACE,
+               "TTML packet writeout: pts: %"PRId64" (%"PRId64"), "
+               "duration: %"PRId64"\n",
+               pkt->pts, pkt->pts - start_ts, pkt->duration);
+        if (pkt->pts != pts_before || pkt->duration != duration_before) {
+            av_log(s, AV_LOG_TRACE,
+                   "Adjustments: pts: %"PRId64", duration: %"PRId64"\n",
+                   pkt->pts - pts_before, pkt->duration - duration_before);
+        }
 
         // in case of the 'dfxp' muxing mode, each written document is offset
         // to its containing sample's beginning.
@@ -89,6 +207,10 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
         }
 
         av_packet_unref(pkt);
+
+next_iteration:
+        if (stop_at_current_packet)
+            break;
     }
 
     if ((ret = av_write_trailer(ttml_ctx)) < 0)
@@ -100,15 +222,30 @@ static int mov_write_ttml_document_from_queue(AVFormatContext *s,
     ret = 0;
 
 cleanup:
+    while (!avpriv_packet_list_get(&back_to_queue_list, pkt)) {
+        ret = avpriv_packet_list_put(&track->squashed_packet_queue,
+                                     pkt, av_packet_ref,
+                                     FF_PACKETLIST_FLAG_PREPEND);
+
+        // unrelated to whether we succeed or not, we unref the packet
+        // received from the temporary list.
+        av_packet_unref(pkt);
+
+        if (ret < 0) {
+            avpriv_packet_list_free(&back_to_queue_list);
+            break;
+        }
+    }
     return ret;
 }
 
 int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
                                          MOVTrack *track, AVPacket *pkt)
 {
+    MOVMuxContext *mov = s->priv_data;
     AVFormatContext *ttml_ctx = NULL;
     // values for the generated AVPacket
-    int64_t start_ts = 0;
+    int64_t start_ts = AV_NOPTS_VALUE;
     int64_t duration = 0;
 
     int ret = AVERROR_BUG;
@@ -119,12 +256,30 @@ int ff_mov_generate_squashed_ttml_packet(AVFormatContext *s,
         goto cleanup;
     }
 
+    if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
+        int64_t calculated_start = AV_NOPTS_VALUE;
+        int64_t calculated_end = AV_NOPTS_VALUE;
+
+        mov_calculate_start_and_end_of_other_tracks(s, track, &calculated_start, &calculated_end);
+
+        if (calculated_start != AV_NOPTS_VALUE) {
+            start_ts = calculated_start;
+            duration = calculated_end - calculated_start;
+            av_log(s, AV_LOG_VERBOSE,
+                   "Calculated subtitle fragment start: %"PRId64", "
+                   "duration: %"PRId64"\n",
+                   start_ts, duration);
+        }
+    }
+
     if (!track->squashed_packet_queue.head) {
         // empty queue, write minimal empty document with zero duration
         avio_write(ttml_ctx->pb, empty_ttml_document,
                    sizeof(empty_ttml_document) - 1);
-        start_ts = 0;
-        duration = 0;
+        if (start_ts == AV_NOPTS_VALUE) {
+            start_ts = 0;
+            duration = 0;
+        }
         goto generate_packet;
     }
 
-- 
2.38.1