[FFmpeg-devel] [PATCH 3/3] lavf/movenc: support iTunes cover art

Wed Jul 3 12:08:38 CEST 2013

On Tue, Jul 02, 2013 at 10:43:39PM +0200, Michael Niedermayer wrote:
> On Tue, Jul 02, 2013 at 09:33:04PM +0200, Matthieu Bouron wrote:
> > On Tue, Jul 2, 2013 at 7:46 PM, Michael Niedermayer <michaelni at gmx.at>wrote:
> > 
> > > On Sun, Jun 30, 2013 at 04:15:46PM +0200, Matthieu Bouron wrote:
> > > > Cover art muxing is done by introducing the -cover_stream_index option
> > > > which takes an output stream index as argument.
> > > > The stream used for the cover art is not muxed as a track in the
> > > > resulting file.
> > > > ---
> > > >  libavformat/movenc.c     | 157
> > > +++++++++++++++++++++++++++++++++++++++++++----
> > > >  libavformat/movenc.h     |   5 ++
> > > >  libavformat/movenchint.c |   1 +
> > > >  3 files changed, 152 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> > > > index 5577530..f411493 100644
> > > > --- a/libavformat/movenc.c
> > > > +++ b/libavformat/movenc.c
> > > > @@ -63,6 +63,7 @@ static const AVOption options[] = {
> > > >      { "ism_lookahead", "Number of lookahead entries for ISM files",
> > > offsetof(MOVMuxContext, ism_lookahead), AV_OPT_TYPE_INT, {.i64 = 0}, 0,
> > > INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
> > > >      { "use_editlist", "use edit list", offsetof(MOVMuxContext,
> > > use_editlist), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1,
> > > AV_OPT_FLAG_ENCODING_PARAM},
> > > >      { "video_track_timescale", "set timescale of all video tracks",
> > > offsetof(MOVMuxContext, video_track_timescale), AV_OPT_TYPE_INT, {.i64 =
> > > 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
> > > > +    { "cover_stream_index", "video stream index to use for cover art",
> > > offsetof(MOVMuxContext, cover_stream_index), AV_OPT_TYPE_INT, {.i64 = -1},
> > > -1, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
> > > >      { NULL },
> > >
> > > isnt AV_DISPOSITION_ATTACHED_PIC enough ?
> > > why is this option needed ?
> > >
> > 
> > I guess AV_DISPOSITION_ATTACHED_PIC could be enough.
> > My idea here was to let the user choose the cover art from any output
> > streams he likes with -cover_stream_index. If the input does not have any
> > cover arts, can a custom stream be flagged as an attached pic ?
> 
> if it cannot then such feature should be added but certainly doesnt
> belong in a single individual muxer

You're right, using AV_DISPOSITON_ATTACHED_PIC seems a better solution
than introducing the cover_stream_index.

Here is a new patch which uses all stream with AV_DISPOSITON_ATTACHED_PIC
as cover arts. It works for all modes which use the iTunes metadata (!3gp
and !mov).

Actually the cover art codec is only checked at the end (when writing the
metadata). If not BMP, MJPEG or PNG the user is warned that the cover art
is ignored.

Do you think this check should be included in the new mov_stream_is_apic
function and discard all apic streams which have not the right codec (and
use them as normal stream ? (IMHO it do not feel right to me to use them
as normal stream)).

Matthieu
-------------- next part --------------
>From a2ea6cfb8b10f553d4a6e131b2cf20d054bb284b Mon Sep 17 00:00:00 2001
From: Matthieu Bouron <matthieu.bouron at gmail.com>
Date: Thu, 27 Jun 2013 18:12:50 +0200
Subject: [PATCH 2/2] lavf/movenc: support iTunes cover art

Video streams with AV_DISPOSITON_ATTACHED_PIC will be used as cover arts
and won't be muxed as normal tracks in the resulting file.
---
 libavformat/movenc.c     | 152 +++++++++++++++++++++++++++++++++++++++++++----
 libavformat/movenc.h     |   4 ++
 libavformat/movenchint.c |   1 +
 3 files changed, 146 insertions(+), 11 deletions(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 5d224bc..170a4a2 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -104,6 +104,15 @@ static int is_co64_required(const MOVTrack *track)
     return 0;
 }
 
+static int mov_stream_is_apic(MOVMuxContext *mov, AVStream *st)
+{
+    if ((mov->mode & MODE_3GP) || (mov->mode & MODE_MOV))
+        return 0;
+    if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
+        return 1;
+    return 0;
+}
+
 /* Chunk offset atom */
 static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
 {
@@ -2067,6 +2076,46 @@ static int mov_write_int8_metadata(AVFormatContext *s, AVIOContext *pb,
     return size;
 }
 
+static int mov_write_covr(AVFormatContext *s, AVIOContext *pb, AVPacketList *covers)
+{
+    int64_t pos = avio_tell(pb);
+
+    avio_wb32(pb, 0);
+    ffio_wfourcc(pb, "covr");
+
+    while (covers) {
+        int type;
+        AVPacket *pkt = &covers->pkt;
+        enum AVCodecID codec_id = s->streams[pkt->stream_index]->codec->codec_id;
+
+        switch(codec_id) {
+        case CODEC_ID_MJPEG:
+            type = 0xD;
+            break;
+        case CODEC_ID_PNG:
+            type = 0xE;
+            break;
+        case CODEC_ID_BMP:
+            type = 0x1B;
+            break;
+        default:
+            covers = covers->next;
+            av_log(s, AV_LOG_ERROR, "unsupported codec %s for cover, skipping", s->streams[pkt->stream_index]->codec->codec_name);
+            continue;
+        }
+
+        avio_wb32(pb, 16 + pkt->size);
+        ffio_wfourcc(pb, "data");
+        avio_wb32(pb, type);
+        avio_wb32(pb , 0);
+        avio_write(pb, pkt->data, pkt->size);
+
+        covers = covers->next;
+    }
+
+    return update_size(pb, pos);
+}
+
 /* iTunes meta data list */
 static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
                               AVFormatContext *s)
@@ -2096,6 +2145,10 @@ static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
     mov_write_int8_metadata  (s, pb, "stik",    "media_type",1);
     mov_write_int8_metadata  (s, pb, "hdvd",    "hd_video",  1);
     mov_write_int8_metadata  (s, pb, "pgap",    "gapless_playback",1);
+
+    if (mov->covers)
+        mov_write_covr(s, pb, mov->covers);
+
     mov_write_trkn_tag(pb, mov, s);
     mov_write_tmpo_tag(pb, s);
     return update_size(pb, pos);
@@ -2198,7 +2251,7 @@ static int mov_write_udta_tag(AVIOContext *pb, MOVMuxContext *mov,
     int i, ret, size;
     uint8_t *buf;
 
-    for (i = 0; i < s->nb_streams; i++)
+    for (i = 0; i < mov->nb_streams; i++)
         if (mov->tracks[i].enc->flags & CODEC_FLAG_BITEXACT) {
             return 0;
         }
@@ -2801,7 +2854,8 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
 
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
-        if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
+        if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+            !mov_stream_is_apic(mov, st))
             has_video = 1;
         if (st->codec->codec_id == AV_CODEC_ID_H264)
             has_h264 = 1;
@@ -3008,6 +3062,18 @@ static int get_moov_size(AVFormatContext *s)
     return ret;
 }
 
+static MOVTrack *mov_get_track(MOVMuxContext *mov, int stream_index)
+{
+    int i;
+
+    for (i = 0; i < mov->nb_streams; i++) {
+        MOVTrack *track = &mov->tracks[i];
+        if (track->stream_index == stream_index)
+            return track;
+    }
+    return NULL;
+}
+
 static int mov_flush_fragment(AVFormatContext *s)
 {
     MOVMuxContext *mov = s->priv_data;
@@ -3137,12 +3203,15 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     MOVMuxContext *mov = s->priv_data;
     AVIOContext *pb = s->pb;
-    MOVTrack *trk = &mov->tracks[pkt->stream_index];
-    AVCodecContext *enc = trk->enc;
+    MOVTrack *trk = mov_get_track(mov, pkt->stream_index);
+    AVCodecContext *enc;
     unsigned int samples_in_chunk = 0;
     int size= pkt->size;
     uint8_t *reformatted_data = NULL;
 
+    if (!trk) return 0;
+    enc = trk->enc;
+
     if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
         int ret;
         if (mov->fragments > 0) {
@@ -3290,11 +3359,14 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
 {
         MOVMuxContext *mov = s->priv_data;
-        MOVTrack *trk = &mov->tracks[pkt->stream_index];
-        AVCodecContext *enc = trk->enc;
+        MOVTrack *trk = mov_get_track(mov, pkt->stream_index);
+        AVCodecContext *enc;
         int64_t frag_duration = 0;
         int size = pkt->size;
 
+        if (!trk) return 0;
+        enc = trk->enc;
+
         if (!pkt->size) return 0; /* Discard 0 sized packets */
 
         if (trk->entry && pkt->stream_index < s->nb_streams)
@@ -3343,6 +3415,7 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
     } else {
         int i;
         MOVMuxContext *mov = s->priv_data;
+        AVStream *st = s->streams[pkt->stream_index];
 
         if (!pkt->size) return 0; /* Discard 0 sized packets */
 
@@ -3378,6 +3451,36 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
 
+        if (mov_stream_is_apic(mov, st)) {
+
+            /* warn only once */
+            if (st->nb_frames == 1) {
+                av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+                    " ignoring.\n", pkt->stream_index);
+            }
+
+            if (st->nb_frames == 0) {
+                int ret;
+                AVPacketList *last, *covers = av_mallocz(sizeof(AVPacketList));
+                if (!covers)
+                    return AVERROR(ENOMEM);
+
+                if ((ret = av_copy_packet(&covers->pkt, pkt)) < 0) {
+                    av_freep(&covers);
+                    return ret;
+                }
+
+                if (!mov->covers)
+                    mov->covers = covers;
+                else {
+                    last = mov->covers;
+                    while (last->next)
+                        last = last->next;
+                    last->next = covers;
+                }
+            }
+        }
+
         return mov_write_single_packet(s, pkt);
     }
 }
@@ -3489,6 +3592,7 @@ static int mov_create_timecode_track(AVFormatContext *s, int index, int src_inde
     track->mode      = mov->mode;
     track->tag       = MKTAG('t','m','c','d');
     track->src_track = src_index;
+    track->stream_index = index;
     track->timescale = mov->tracks[src_index].timescale;
     if (tc.flags & AV_TIMECODE_FLAG_DROPFRAME)
         track->timecode_flags |= MOV_TIMECODE_FLAG_DROPFRAME;
@@ -3512,7 +3616,7 @@ static int mov_write_header(AVFormatContext *s)
     AVIOContext *pb = s->pb;
     MOVMuxContext *mov = s->priv_data;
     AVDictionaryEntry *t, *global_tcr = av_dict_get(s->metadata, "timecode", NULL, 0);
-    int i, hint_track = 0, tmcd_track = 0;
+    int i, hint_track = 0, tmcd_track = 0, stream_index = -1;
 
     /* Set the FRAGMENT flag if any of the fragmentation methods are
      * enabled. */
@@ -3567,6 +3671,13 @@ static int mov_write_header(AVFormatContext *s)
     }
 
     mov->nb_streams = s->nb_streams;
+
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st = s->streams[i];
+        if (mov_stream_is_apic(mov, st))
+            mov->nb_streams--;
+    }
+
     if (mov->mode & (MODE_MOV|MODE_IPOD) && s->nb_chapters)
         mov->chapter_track = mov->nb_streams++;
 
@@ -3575,7 +3686,8 @@ static int mov_write_header(AVFormatContext *s)
         hint_track = mov->nb_streams;
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
-            if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
+            if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+                 !mov_stream_is_apic(mov, st)) ||
                 st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
                 mov->nb_streams++;
             }
@@ -3588,7 +3700,8 @@ static int mov_write_header(AVFormatContext *s)
         /* +1 tmcd track for each video stream with a timecode */
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
-            if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+            if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+                !mov_stream_is_apic(mov, st)) &&
                 (global_tcr || av_dict_get(st->metadata, "timecode", NULL, 0)))
                 mov->nb_meta_tmcd++;
         }
@@ -3614,7 +3727,15 @@ static int mov_write_header(AVFormatContext *s)
 
     for(i=0; i<s->nb_streams; i++){
         AVStream *st= s->streams[i];
-        MOVTrack *track= &mov->tracks[i];
+        MOVTrack *track;
+
+        /* skip cover art stream */
+        if (mov_stream_is_apic(mov, st))
+            continue;
+
+        stream_index++;
+        track = &mov->tracks[stream_index];
+        track->stream_index = i;
         AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
 
         track->enc = st->codec;
@@ -3731,7 +3852,8 @@ static int mov_write_header(AVFormatContext *s)
         /* Initialize the hint tracks for each audio and video stream */
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
-            if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
+            if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+                 !mov_stream_is_apic(mov, st)) ||
                 st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
                 ff_mov_init_hinting(s, hint_track, i);
                 hint_track++;
@@ -3871,6 +3993,7 @@ static int mov_write_trailer(AVFormatContext *s)
 {
     MOVMuxContext *mov = s->priv_data;
     AVIOContext *pb = s->pb;
+    AVPacketList *covers = mov->covers;
     int64_t moov_pos;
     int res = 0;
     int i;
@@ -3962,6 +4085,13 @@ static int mov_write_trailer(AVFormatContext *s)
 
     av_freep(&mov->tracks);
 
+    while (covers) {
+        AVPacketList *next = covers->next;
+        av_free_packet(&covers->pkt);
+        av_freep(&covers);
+        covers = next;
+    }
+
     return res;
 }
 
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index a6571d5..38dfb13 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -141,6 +141,8 @@ typedef struct MOVTrack {
         int     packet_entry;
         int     slices;
     } vc1_info;
+
+    int stream_index;
 } MOVTrack;
 
 typedef struct MOVMuxContext {
@@ -172,6 +174,8 @@ typedef struct MOVMuxContext {
 
     int use_editlist;
     int video_track_timescale;
+
+    AVPacketList *covers;
 } MOVMuxContext;
 
 #define FF_MOV_FLAG_RTP_HINT 1
diff --git a/libavformat/movenchint.c b/libavformat/movenchint.c
index cc90f0b..82c19dc 100644
--- a/libavformat/movenchint.c
+++ b/libavformat/movenchint.c
@@ -36,6 +36,7 @@ int ff_mov_init_hinting(AVFormatContext *s, int index, int src_index)
 
     track->tag = MKTAG('r','t','p',' ');
     track->src_track = src_index;
+    track->stream_index = index;
 
     track->enc = avcodec_alloc_context3(NULL);
     if (!track->enc)
-- 
1.8.3.1