[FFmpeg-devel] [PATCH] RFC: Set reasonable subtitle dimensions for timed-text in mov/mp4.
Philip Langdale
philipl at overt.org
Mon Mar 11 00:23:52 CET 2013
See https://ffmpeg.org/trac/ffmpeg/ticket/1845.
It's crazy, but the full spec for timed-text subtitles, requires
specifying dimensions and positioning for the subtitle rendering
area, and doing so in pixels, which pretty much means you have to
know the size of the video stream being shown, so that the subtitles
can be meaningfully placed over it.
It's an excellent abstraction breaker, as you might imagine.
This diff shows one possible way of implementing a reasonable
heuristic for the subtitle area: Find the first video stream and
set the subtitle area to be of equal width and 10% height, and
place it 90% of the way down the video, so that it occupies the
bottom 10% of the video area.
This seems reasonable in terms of the height of the subtitles, and
the single video stream is an excellent assumption - and I can't
think of anything sane to do in the face of multiple streams.
The most ridiculous part of this exercise is that the muxer has to
reach into the subtitle track's header and *rewrite* the dimensions
of the track's text box (which is another box inside the main
subtitle area - don't ask) to match the external dimensions.
If we ever support arbitrary styling, it's possible to change the
size and location of the text box in mid stream, and I have no idea
what you'd do there.
Note that, as far as I know, only the Apple QT Player on OSX or
Windows respects this sizing information. The iOS player does not,
most notably. Also note that even the QT Player doesn't appear to
respect the font selection specified in the subtitle stream.
Signed-off-by: Philip Langdale <philipl at overt.org>
---
libavformat/movenc.c | 40 +++++++++++++++++++++++++++++++++++-----
libavformat/movenc.h | 4 ++++
2 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 42496b5..50648cf 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1049,8 +1049,20 @@ static int mov_write_subtitle_tag(AVIOContext *pb, MOVTrack *track)
avio_wb16(pb, 0); /* Reserved */
avio_wb16(pb, 1); /* Data-reference index */
- if (track->enc->extradata_size)
+ if (track->enc->extradata_size) {
+ if (track->enc->extradata_size >= 18) {
+ // Rewrite text box dimensions to match video stream.
+ uint8_t *ed = track->enc->extradata;
+ uint16_t width = track->video_width;
+ uint16_t height = track->video_height;
+ height /= 10;
+ ed[14] = height >> 8;
+ ed[15] = height & 0xFF;
+ ed[16] = width >> 8;
+ ed[17] = width & 0xFF;
+ }
avio_write(pb, track->enc->extradata, track->enc->extradata_size);
+ }
return update_size(pb, pos);
}
@@ -1633,7 +1645,9 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
AVDictionaryEntry *rot = av_dict_get(st->metadata, "rotate", NULL, 0);
rotation = (rot && rot->value) ? atoi(rot->value) : 0;
}
- if (rotation == 90) {
+ if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE) {
+ write_matrix(pb, 1, 0, 0, 1, 0, (track->video_height * 9) / 10);
+ } else if (rotation == 90) {
write_matrix(pb, 0, 1, -1, 0, track->enc->height, 0);
} else if (rotation == 180) {
write_matrix(pb, -1, 0, 0, -1, track->enc->width, track->enc->height);
@@ -1643,8 +1657,7 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
write_matrix(pb, 1, 0, 0, 1, 0, 0);
}
/* Track width and height, for visual only */
- if(st && (track->enc->codec_type == AVMEDIA_TYPE_VIDEO ||
- track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE)) {
+ if(st && (track->enc->codec_type == AVMEDIA_TYPE_VIDEO)) {
if(track->mode == MODE_MOV) {
avio_wb32(pb, track->enc->width << 16);
avio_wb32(pb, track->height << 16);
@@ -1655,6 +1668,9 @@ static int mov_write_tkhd_tag(AVIOContext *pb, MOVTrack *track, AVStream *st)
avio_wb32(pb, sample_aspect_ratio * track->enc->width*0x10000);
avio_wb32(pb, track->height*0x10000);
}
+ } else if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE) {
+ avio_wb32(pb, track->video_width * 0x10000);
+ avio_wb32(pb, (track->video_height * 0x10000) / 10);
}
else {
avio_wb32(pb, 0);
@@ -1786,7 +1802,6 @@ static int mov_write_udta_sdp(AVIOContext *pb, MOVTrack *track)
NULL, NULL, 0, 0, ctx);
av_strlcatf(buf, sizeof(buf), "a=control:streamid=%d\r\n", track->track_id);
len = strlen(buf);
-
avio_wb32(pb, len + 24);
ffio_wfourcc(pb, "udta");
avio_wb32(pb, len + 16);
@@ -1803,6 +1818,7 @@ static int mov_write_trak_tag(AVIOContext *pb, MOVMuxContext *mov,
int64_t pos = avio_tell(pb);
avio_wb32(pb, 0); /* size */
ffio_wfourcc(pb, "trak");
+
mov_write_tkhd_tag(pb, track, st);
if (supports_edts(mov))
mov_write_edts_tag(pb, track); // PSP Movies and several other cases require edts box
@@ -3672,6 +3688,20 @@ static int mov_write_header(AVFormatContext *s)
}
}
+ for (i = 0; i < mov->nb_streams; i++) {
+ MOVTrack *track = &mov->tracks[i];
+ if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE) {
+ int j;
+ for (j = 0; j < mov->nb_streams; j++) {
+ if (mov->tracks[j].enc->codec_type == AVMEDIA_TYPE_VIDEO) {
+ track->video_width = mov->tracks[j].enc->width;
+ track->video_height = mov->tracks[j].enc->height;
+ break;
+ }
+ }
+ }
+ }
+
if (mov->mode == MODE_ISM) {
/* If no fragmentation options have been set, set a default. */
if (!(mov->flags & (FF_MOV_FLAG_FRAG_KEYFRAME |
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index a5db895..42f120d 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -138,6 +138,10 @@ typedef struct MOVIndex {
int packet_entry;
int slices;
} vc1_info;
+
+ // For subtitle tracks.
+ uint16_t video_width;
+ uint16_t video_height;
} MOVTrack;
typedef struct MOVMuxContext {
--
1.7.10.4
More information about the ffmpeg-devel
mailing list