[FFmpeg-devel] [PATCH 3/3] lavf/movenc: support iTunes cover art

Sun Jun 30 16:15:46 CEST 2013

Cover art muxing is done by introducing the -cover_stream_index option
which takes an output stream index as argument.
The stream used for the cover art is not muxed as a track in the
resulting file.
---
 libavformat/movenc.c     | 157 +++++++++++++++++++++++++++++++++++++++++++----
 libavformat/movenc.h     |   5 ++
 libavformat/movenchint.c |   1 +
 3 files changed, 152 insertions(+), 11 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 5577530..f411493 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -63,6 +63,7 @@ static const AVOption options[] = {
     { "ism_lookahead", "Number of lookahead entries for ISM files", offsetof(MOVMuxContext, ism_lookahead), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
     { "use_editlist", "use edit list", offsetof(MOVMuxContext, use_editlist), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { "video_track_timescale", "set timescale of all video tracks", offsetof(MOVMuxContext, video_track_timescale), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
+    { "cover_stream_index", "video stream index to use for cover art", offsetof(MOVMuxContext, cover_stream_index), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
     { NULL },
 };
 
@@ -2067,6 +2068,46 @@ static int mov_write_int8_metadata(AVFormatContext *s, AVIOContext *pb,
     return size;
 }
 
+static int mov_write_covr(AVFormatContext *s, AVIOContext *pb, AVPacketList *covers)
+{
+    int64_t pos = avio_tell(pb);
+
+    avio_wb32(pb, 0);
+    ffio_wfourcc(pb, "covr");
+
+    while (covers) {
+        int type;
+        AVPacket *pkt = &covers->pkt;
+        enum AVCodecID codec_id = s->streams[pkt->stream_index]->codec->codec_id;
+
+        switch(codec_id) {
+        case CODEC_ID_MJPEG:
+            type = 0xD;
+            break;
+        case CODEC_ID_PNG:
+            type = 0xE;
+            break;
+        case CODEC_ID_BMP:
+            type = 0x1B;
+            break;
+        default:
+            covers = covers->next;
+            av_log(s, AV_LOG_ERROR, "unsupported codec %s for cover, skipping", s->streams[pkt->stream_index]->codec->codec_name);
+            continue;
+        }
+
+        avio_wb32(pb, 16 + pkt->size);
+        ffio_wfourcc(pb, "data");
+        avio_wb32(pb, type);
+        avio_wb32(pb , 0);
+        avio_write(pb, pkt->data, pkt->size);
+
+        covers = covers->next;
+    }
+
+    return update_size(pb, pos);
+}
+
 /* iTunes meta data list */
 static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
                               AVFormatContext *s)
@@ -2096,6 +2137,10 @@ static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
     mov_write_int8_metadata  (s, pb, "stik",    "media_type",1);
     mov_write_int8_metadata  (s, pb, "hdvd",    "hd_video",  1);
     mov_write_int8_metadata  (s, pb, "pgap",    "gapless_playback",1);
+
+    if (mov->covers)
+        mov_write_covr(s, pb, mov->covers);
+
     mov_write_trkn_tag(pb, mov, s);
     mov_write_tmpo_tag(pb, s);
     return update_size(pb, pos);
@@ -2198,7 +2243,7 @@ static int mov_write_udta_tag(AVIOContext *pb, MOVMuxContext *mov,
     int i, ret, size;
     uint8_t *buf;
 
-    for (i = 0; i < s->nb_streams; i++)
+    for (i = 0; i < mov->nb_streams; i++)
         if (mov->tracks[i].enc->flags & CODEC_FLAG_BITEXACT) {
             return 0;
         }
@@ -2801,7 +2846,7 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
 
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
-        if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
+        if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO && i != mov->cover_stream_index)
             has_video = 1;
         if (st->codec->codec_id == AV_CODEC_ID_H264)
             has_h264 = 1;
@@ -3008,6 +3053,18 @@ static int get_moov_size(AVFormatContext *s)
     return ret;
 }
 
+static MOVTrack *mov_get_track(MOVMuxContext *mov, int stream_index)
+{
+    int i;
+
+    for (i = 0; i < mov->nb_streams; i++) {
+        MOVTrack *track = &mov->tracks[i];
+        if (track->stream_index == stream_index)
+            return track;
+    }
+    return NULL;
+}
+
 static int mov_flush_fragment(AVFormatContext *s)
 {
     MOVMuxContext *mov = s->priv_data;
@@ -3137,12 +3194,15 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     MOVMuxContext *mov = s->priv_data;
     AVIOContext *pb = s->pb;
-    MOVTrack *trk = &mov->tracks[pkt->stream_index];
-    AVCodecContext *enc = trk->enc;
+    MOVTrack *trk = mov_get_track(mov, pkt->stream_index);
+    AVCodecContext *enc;
     unsigned int samples_in_chunk = 0;
     int size= pkt->size;
     uint8_t *reformatted_data = NULL;
 
+    if (!trk) return 0;
+    enc = trk->enc;
+
     if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
         int ret;
         if (mov->fragments > 0) {
@@ -3290,11 +3350,14 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
 {
         MOVMuxContext *mov = s->priv_data;
-        MOVTrack *trk = &mov->tracks[pkt->stream_index];
-        AVCodecContext *enc = trk->enc;
+        MOVTrack *trk = mov_get_track(mov, pkt->stream_index);
+        AVCodecContext *enc;
         int64_t frag_duration = 0;
         int size = pkt->size;
 
+        if (!trk) return 0;
+        enc = trk->enc;
+
         if (!pkt->size) return 0; /* Discard 0 sized packets */
 
         if (trk->entry && pkt->stream_index < s->nb_streams)
@@ -3378,6 +3441,37 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
 
+        if (pkt->stream_index == mov->cover_stream_index) {
+            AVStream *st = s->streams[pkt->stream_index];
+
+            /* warn only once */
+            if (st->nb_frames == 1) {
+                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                    " ignoring.\n", pkt->stream_index);
+            }
+
+            if (st->nb_frames == 0) {
+                int ret;
+                AVPacketList *last, *covers = av_mallocz(sizeof(AVPacketList));
+                if (!covers)
+                    return AVERROR(ENOMEM);
+
+                if ((ret = av_copy_packet(&covers->pkt, pkt)) < 0) {
+                    av_freep(&covers);
+                    return ret;
+                }
+
+                if (!mov->covers)
+                    mov->covers = covers;
+                else {
+                    last = mov->covers;
+                    while (last->next)
+                        last = last->next;
+                    last->next = covers;
+                }
+            }
+        }
+
         return mov_write_single_packet(s, pkt);
     }
 }
@@ -3489,6 +3583,7 @@ static int mov_create_timecode_track(AVFormatContext *s, int index, int src_inde
     track->mode      = mov->mode;
     track->tag       = MKTAG('t','m','c','d');
     track->src_track = src_index;
+    track->stream_index = index;
     track->timescale = mov->tracks[src_index].timescale;
     if (tc.flags & AV_TIMECODE_FLAG_DROPFRAME)
         track->timecode_flags |= MOV_TIMECODE_FLAG_DROPFRAME;
@@ -3512,7 +3607,7 @@ static int mov_write_header(AVFormatContext *s)
     AVIOContext *pb = s->pb;
     MOVMuxContext *mov = s->priv_data;
     AVDictionaryEntry *t, *global_tcr = av_dict_get(s->metadata, "timecode", NULL, 0);
-    int i, hint_track = 0, tmcd_track = 0;
+    int i, hint_track = 0, tmcd_track = 0, stream_index = -1;
 
     /* Set the FRAGMENT flag if any of the fragmentation methods are
      * enabled. */
@@ -3567,6 +3662,30 @@ static int mov_write_header(AVFormatContext *s)
     }
 
     mov->nb_streams = s->nb_streams;
+
+    if (mov->cover_stream_index >= 0) {
+        AVStream *st;
+        if (mov->cover_stream_index >= s->nb_streams) {
+            av_log(s, AV_LOG_ERROR, "cover stream #%d does not exist\n", mov->cover_stream_index);
+            return -1;
+        }
+
+        st = s->streams[mov->cover_stream_index];
+        if (st->codec->codec_type != AVMEDIA_TYPE_VIDEO) {
+            av_log(s, AV_LOG_ERROR, "cover stream #%d is not a video stream\n", mov->cover_stream_index);
+            return -1;
+        }
+
+        if (st->codec->codec_id != CODEC_ID_MJPEG &&
+            st->codec->codec_id != CODEC_ID_PNG &&
+            st->codec->codec_id != CODEC_ID_BMP) {
+            av_log(s, AV_LOG_ERROR, "cover stream #%d has wrong codec type, must be either png, mjpeg or bmp\n", mov->cover_stream_index);
+            return -1;
+        }
+
+        mov->nb_streams--;
+    }
+
     if (mov->mode & (MODE_MOV|MODE_IPOD) && s->nb_chapters)
         mov->chapter_track = mov->nb_streams++;
 
@@ -3575,7 +3694,7 @@ static int mov_write_header(AVFormatContext *s)
         hint_track = mov->nb_streams;
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
-            if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
+            if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO && i != mov->cover_stream_index) ||
                 st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
                 mov->nb_streams++;
             }
@@ -3588,7 +3707,7 @@ static int mov_write_header(AVFormatContext *s)
         /* +1 tmcd track for each video stream with a timecode */
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
-            if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+            if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO && i != mov->cover_stream_index) &&
                 (global_tcr || av_dict_get(st->metadata, "timecode", NULL, 0)))
                 mov->nb_meta_tmcd++;
         }
@@ -3614,7 +3733,15 @@ static int mov_write_header(AVFormatContext *s)
 
     for(i=0; i<s->nb_streams; i++){
         AVStream *st= s->streams[i];
-        MOVTrack *track= &mov->tracks[i];
+        MOVTrack *track;
+
+        /* skip cover art stream */
+        if (i == mov->cover_stream_index)
+            continue;
+
+        stream_index++;
+        track = &mov->tracks[stream_index];
+        track->stream_index = i;
         AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
 
         track->enc = st->codec;
@@ -3731,7 +3858,7 @@ static int mov_write_header(AVFormatContext *s)
         /* Initialize the hint tracks for each audio and video stream */
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
-            if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
+            if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO && i != mov->cover_stream_index) ||
                 st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
                 ff_mov_init_hinting(s, hint_track, i);
                 hint_track++;
@@ -3871,6 +3998,7 @@ static int mov_write_trailer(AVFormatContext *s)
 {
     MOVMuxContext *mov = s->priv_data;
     AVIOContext *pb = s->pb;
+    AVPacketList *covers = mov->covers;
     int64_t moov_pos;
     int res = 0;
     int i;
@@ -3962,6 +4090,13 @@ static int mov_write_trailer(AVFormatContext *s)
 
     av_freep(&mov->tracks);
 
+    while (covers) {
+        AVPacketList *next = covers->next;
+        av_free_packet(&covers->pkt);
+        av_freep(&covers);
+        covers = next;
+    }
+
     return res;
 }
 
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index a6571d5..dbdba3d 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -141,6 +141,8 @@ typedef struct MOVTrack {
         int     packet_entry;
         int     slices;
     } vc1_info;
+
+    int stream_index;
 } MOVTrack;
 
 typedef struct MOVMuxContext {
@@ -172,6 +174,9 @@ typedef struct MOVMuxContext {
 
     int use_editlist;
     int video_track_timescale;
+
+    AVPacketList *covers;
+    int cover_stream_index;
 } MOVMuxContext;
 
 #define FF_MOV_FLAG_RTP_HINT 1
diff --git a/libavformat/movenchint.c b/libavformat/movenchint.c
index cc90f0b..82c19dc 100644
--- a/libavformat/movenchint.c
+++ b/libavformat/movenchint.c
@@ -36,6 +36,7 @@ int ff_mov_init_hinting(AVFormatContext *s, int index, int src_index)
 
     track->tag = MKTAG('r','t','p',' ');
     track->src_track = src_index;
+    track->stream_index = index;
 
     track->enc = avcodec_alloc_context3(NULL);
     if (!track->enc)
-- 
1.8.3.1