[FFmpeg-devel] [PATCH 3/3] lavf/movenc: support iTunes cover art
Matthieu Bouron
matthieu.bouron at gmail.com
Sun Jul 28 17:28:30 CEST 2013
On Sat, Jul 27, 2013 at 09:37:25PM +0200, Matthieu Bouron wrote:
>
> BTW, updated patch attached (fixing an indentation mistake + return
> immediately after storing attached pic in mov_write_packet function).
Updated and rebased patch attached (take psp check into account + fix a
warning).
[...]
-------------- next part --------------
>From 5180daf02567e91f47190e3ef33a5b792db68f62 Mon Sep 17 00:00:00 2001
From: Matthieu Bouron <matthieu.bouron at gmail.com>
Date: Thu, 27 Jun 2013 18:12:50 +0200
Subject: [PATCH] lavf/movenc: support iTunes cover art
Video streams with AV_DISPOSITON_ATTACHED_PIC will be used as cover arts
and won't be muxed as normal tracks in the resulting file.
---
libavformat/movenc.c | 200 +++++++++++++++++++++++++++++++++++++++++------
libavformat/movenc.h | 10 ++-
libavformat/movenchint.c | 14 +++-
3 files changed, 195 insertions(+), 29 deletions(-)
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 42ec3f2..08bdbc5 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -104,6 +104,15 @@ static int is_co64_required(const MOVTrack *track)
return 0;
}
+static int mov_stream_is_apic(MOVMuxContext *mov, AVStream *st)
+{
+ if ((mov->mode & MODE_3GP) || (mov->mode & MODE_MOV))
+ return 0;
+ if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
+ return 1;
+ return 0;
+}
+
/* Chunk offset atom */
static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
{
@@ -2064,6 +2073,46 @@ static int mov_write_int8_metadata(AVFormatContext *s, AVIOContext *pb,
return size;
}
+static int mov_write_covr(AVFormatContext *s, AVIOContext *pb, AVPacketList *covers)
+{
+ int64_t pos = avio_tell(pb);
+
+ avio_wb32(pb, 0);
+ ffio_wfourcc(pb, "covr");
+
+ while (covers) {
+ int type;
+ AVPacket *pkt = &covers->pkt;
+ enum AVCodecID codec_id = s->streams[pkt->stream_index]->codec->codec_id;
+
+ switch(codec_id) {
+ case CODEC_ID_MJPEG:
+ type = 0xD;
+ break;
+ case CODEC_ID_PNG:
+ type = 0xE;
+ break;
+ case CODEC_ID_BMP:
+ type = 0x1B;
+ break;
+ default:
+ covers = covers->next;
+ av_log(s, AV_LOG_ERROR, "unsupported codec %s for cover, skipping", s->streams[pkt->stream_index]->codec->codec_name);
+ continue;
+ }
+
+ avio_wb32(pb, 16 + pkt->size);
+ ffio_wfourcc(pb, "data");
+ avio_wb32(pb, type);
+ avio_wb32(pb , 0);
+ avio_write(pb, pkt->data, pkt->size);
+
+ covers = covers->next;
+ }
+
+ return update_size(pb, pos);
+}
+
/* iTunes meta data list */
static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
AVFormatContext *s)
@@ -2093,6 +2142,10 @@ static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
mov_write_int8_metadata (s, pb, "stik", "media_type",1);
mov_write_int8_metadata (s, pb, "hdvd", "hd_video", 1);
mov_write_int8_metadata (s, pb, "pgap", "gapless_playback",1);
+
+ if (mov->covers)
+ mov_write_covr(s, pb, mov->covers);
+
mov_write_trkn_tag(pb, mov, s);
mov_write_tmpo_tag(pb, s);
return update_size(pb, pos);
@@ -2197,7 +2250,7 @@ static int mov_write_udta_tag(AVIOContext *pb, MOVMuxContext *mov,
int i, ret, size;
uint8_t *buf;
- for (i = 0; i < s->nb_streams; i++)
+ for (i = 0; i < mov->nb_input_streams; i++)
if (mov->tracks[i].enc->flags & CODEC_FLAG_BITEXACT) {
return 0;
}
@@ -2340,7 +2393,7 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
}
if (mov->chapter_track)
- for (i = 0; i < s->nb_streams; i++) {
+ for (i = 0; i < mov->nb_input_streams; i++) {
mov->tracks[i].tref_tag = MKTAG('c','h','a','p');
mov->tracks[i].tref_id = mov->tracks[mov->chapter_track].track_id;
}
@@ -2365,14 +2418,14 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
mov_write_iods_tag(pb, mov);
for (i = 0; i < mov->nb_streams; i++) {
if (mov->tracks[i].entry > 0 || mov->flags & FF_MOV_FLAG_FRAGMENT) {
- if (i < s->nb_streams){
+ if (i < mov->nb_input_streams) {
int codec_type= s->streams[i]->codec->codec_type;
if (codec_type==AVMEDIA_TYPE_AUDIO || codec_type==AVMEDIA_TYPE_SUBTITLE){
mov->tracks[i].secondary= not_first[codec_type];
not_first[codec_type]= 1;
}
}
- mov_write_trak_tag(pb, mov, &(mov->tracks[i]), i < s->nb_streams ? s->streams[i] : NULL);
+ mov_write_trak_tag(pb, mov, &(mov->tracks[i]), i < mov->nb_input_streams ? s->streams[i] : NULL);
}
}
if (mov->flags & FF_MOV_FLAG_FRAGMENT)
@@ -2800,7 +2853,8 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
+ if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+ !mov_stream_is_apic(mov, st))
has_video = 1;
if (st->codec->codec_id == AV_CODEC_ID_H264)
has_h264 = 1;
@@ -3007,6 +3061,18 @@ static int get_moov_size(AVFormatContext *s)
return ret;
}
+int ff_mov_get_track_index(MOVMuxContext *mov, int stream_index)
+{
+ int i;
+
+ for (i = 0; i < mov->nb_streams; i++) {
+ MOVTrack *track = &mov->tracks[i];
+ if (track->stream_index == stream_index)
+ return i;
+ }
+ return -1;
+}
+
static int mov_flush_fragment(AVFormatContext *s)
{
MOVMuxContext *mov = s->priv_data;
@@ -3136,12 +3202,19 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
{
MOVMuxContext *mov = s->priv_data;
AVIOContext *pb = s->pb;
- MOVTrack *trk = &mov->tracks[pkt->stream_index];
- AVCodecContext *enc = trk->enc;
+ int trk_index = ff_mov_get_track_index(mov, pkt->stream_index);
+ MOVTrack *trk;
+ AVCodecContext *enc;
unsigned int samples_in_chunk = 0;
int size = pkt->size;
uint8_t *reformatted_data = NULL;
+ if (trk_index < 0)
+ return 0;
+
+ trk = &mov->tracks[trk_index];
+ enc = trk->enc;
+
if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
int ret;
if (mov->fragments > 0) {
@@ -3289,14 +3362,21 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
{
MOVMuxContext *mov = s->priv_data;
- MOVTrack *trk = &mov->tracks[pkt->stream_index];
- AVCodecContext *enc = trk->enc;
+ int trk_index = ff_mov_get_track_index(mov, pkt->stream_index);
+ MOVTrack *trk;
+ AVCodecContext *enc;
int64_t frag_duration = 0;
int size = pkt->size;
if (!pkt->size)
return 0; /* Discard 0 sized packets */
+ if (trk_index < 0)
+ return 0;
+
+ trk = &mov->tracks[trk_index];
+ enc = trk->enc;
+
if (trk->entry && pkt->stream_index < s->nb_streams)
frag_duration = av_rescale_q(pkt->dts - trk->cluster[0].dts,
s->streams[pkt->stream_index]->time_base,
@@ -3343,6 +3423,7 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
} else {
int i;
MOVMuxContext *mov = s->priv_data;
+ AVStream *st = s->streams[pkt->stream_index];
if (!pkt->size) return 0; /* Discard 0 sized packets */
@@ -3378,24 +3459,56 @@ static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
}
}
+ if (mov_stream_is_apic(mov, st)) {
+
+ /* warn only once */
+ if (st->nb_frames == 1) {
+ av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
+ " ignoring.\n", pkt->stream_index);
+ }
+
+ if (st->nb_frames == 0) {
+ int ret;
+ AVPacketList *last, *covers = av_mallocz(sizeof(AVPacketList));
+ if (!covers)
+ return AVERROR(ENOMEM);
+
+ if ((ret = av_copy_packet(&covers->pkt, pkt)) < 0) {
+ av_freep(&covers);
+ return ret;
+ }
+
+ if (!mov->covers)
+ mov->covers = covers;
+ else {
+ last = mov->covers;
+ while (last->next)
+ last = last->next;
+ last->next = covers;
+ }
+ }
+ return 0;
+ }
+
return mov_write_single_packet(s, pkt);
}
}
// QuickTime chapters involve an additional text track with the chapter names
// as samples, and a tref pointing from the other tracks to the chapter one.
-static void mov_create_chapter_track(AVFormatContext *s, int tracknum)
+static void mov_create_chapter_track(AVFormatContext *s, int tracknum, int stream_index)
{
AVIOContext *pb;
MOVMuxContext *mov = s->priv_data;
MOVTrack *track = &mov->tracks[tracknum];
- AVPacket pkt = { .stream_index = tracknum, .flags = AV_PKT_FLAG_KEY };
+ AVPacket pkt = { .stream_index = stream_index, .flags = AV_PKT_FLAG_KEY };
int i, len;
track->mode = mov->mode;
track->tag = MKTAG('t','e','x','t');
track->timescale = MOV_TIMESCALE;
+ track->stream_index = stream_index;
track->enc = avcodec_alloc_context3(NULL);
track->enc->codec_type = AVMEDIA_TYPE_SUBTITLE;
@@ -3463,14 +3576,14 @@ static void mov_create_chapter_track(AVFormatContext *s, int tracknum)
}
}
-static int mov_create_timecode_track(AVFormatContext *s, int index, int src_index, const char *tcstr)
+static int mov_create_timecode_track(AVFormatContext *s, int index, int stream_index, int src_index, const char *tcstr)
{
int ret;
MOVMuxContext *mov = s->priv_data;
MOVTrack *track = &mov->tracks[index];
AVStream *src_st = s->streams[src_index];
AVTimecode tc;
- AVPacket pkt = {.stream_index = index, .flags = AV_PKT_FLAG_KEY, .size = 4};
+ AVPacket pkt = {.stream_index = stream_index, .flags = AV_PKT_FLAG_KEY, .size = 4};
AVRational rate = {src_st->codec->time_base.den, src_st->codec->time_base.num};
/* if the codec time base makes no sense, try to fallback on stream frame rate */
@@ -3489,6 +3602,7 @@ static int mov_create_timecode_track(AVFormatContext *s, int index, int src_inde
track->mode = mov->mode;
track->tag = MKTAG('t','m','c','d');
track->src_track = src_index;
+ track->stream_index = stream_index;
track->timescale = mov->tracks[src_index].timescale;
if (tc.flags & AV_TIMECODE_FLAG_DROPFRAME)
track->timecode_flags |= MOV_TIMECODE_FLAG_DROPFRAME;
@@ -3512,7 +3626,8 @@ static int mov_write_header(AVFormatContext *s)
AVIOContext *pb = s->pb;
MOVMuxContext *mov = s->priv_data;
AVDictionaryEntry *t, *global_tcr = av_dict_get(s->metadata, "timecode", NULL, 0);
- int i, hint_track = 0, tmcd_track = 0;
+ int i, stream_index = -1, hint_track = 0, hint_stream_index = 0;
+ int tmcd_track = 0, tmcd_stream_index = 0;
/* Set the FRAGMENT flag if any of the fragmentation methods are
* enabled. */
@@ -3560,7 +3675,8 @@ static int mov_write_header(AVFormatContext *s)
int video_streams_nb = 0, audio_streams_nb = 0, other_streams_nb = 0;
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
+ if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+ !mov_stream_is_apic(mov, st))
video_streams_nb++;
else if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO)
audio_streams_nb++;
@@ -3575,29 +3691,45 @@ static int mov_write_header(AVFormatContext *s)
mov_write_uuidprof_tag(pb, s);
}
- mov->nb_streams = s->nb_streams;
- if (mov->mode & (MODE_MOV|MODE_IPOD) && s->nb_chapters)
+ mov->next_stream_index = mov->nb_streams = s->nb_streams;
+
+ for (i = 0; i < s->nb_streams; i++) {
+ AVStream *st = s->streams[i];
+ if (mov_stream_is_apic(mov, st))
+ mov->nb_streams--;
+ }
+
+ mov->nb_input_streams = mov->nb_streams;
+
+ if (mov->mode & (MODE_MOV|MODE_IPOD) && s->nb_chapters) {
mov->chapter_track = mov->nb_streams++;
+ mov->chapter_stream_index = mov->next_stream_index++;
+ }
if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
/* Add hint tracks for each audio and video stream */
hint_track = mov->nb_streams;
+ hint_stream_index = mov->next_stream_index++;
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
+ if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+ !mov_stream_is_apic(mov, st)) ||
st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
mov->nb_streams++;
+ mov->next_stream_index++;
}
}
}
if (mov->mode == MODE_MOV) {
tmcd_track = mov->nb_streams;
+ tmcd_stream_index = mov->next_stream_index;
/* +1 tmcd track for each video stream with a timecode */
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+ if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+ !mov_stream_is_apic(mov, st)) &&
(global_tcr || av_dict_get(st->metadata, "timecode", NULL, 0)))
mov->nb_meta_tmcd++;
}
@@ -3615,6 +3747,7 @@ static int mov_write_header(AVFormatContext *s)
}
mov->nb_streams += mov->nb_meta_tmcd;
+ mov->next_stream_index += mov->nb_meta_tmcd;
}
mov->tracks = av_mallocz(mov->nb_streams * sizeof(*mov->tracks));
@@ -3623,9 +3756,17 @@ static int mov_write_header(AVFormatContext *s)
for (i = 0; i < s->nb_streams; i++) {
AVStream *st= s->streams[i];
- MOVTrack *track= &mov->tracks[i];
+ MOVTrack *track;
AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL,0);
+ /* skip cover art streams */
+ if (mov_stream_is_apic(mov, st))
+ continue;
+
+ stream_index++;
+ track = &mov->tracks[stream_index];
+ track->stream_index = i;
+
track->enc = st->codec;
track->language = ff_mov_iso639_to_lang(lang?lang->value:"und", mov->mode!=MODE_MOV);
if (track->language < 0)
@@ -3734,17 +3875,19 @@ static int mov_write_header(AVFormatContext *s)
mov->time += 0x7C25B080; // 1970 based -> 1904 based
if (mov->chapter_track)
- mov_create_chapter_track(s, mov->chapter_track);
+ mov_create_chapter_track(s, mov->chapter_track, mov->chapter_stream_index);
if (mov->flags & FF_MOV_FLAG_RTP_HINT) {
/* Initialize the hint tracks for each audio and video stream */
for (i = 0; i < s->nb_streams; i++) {
AVStream *st = s->streams[i];
- if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
+ if ((st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+ !mov_stream_is_apic(mov, st)) ||
st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
- if (ff_mov_init_hinting(s, hint_track, i) < 0)
+ if (ff_mov_init_hinting(s, hint_track, hint_stream_index, i) < 0)
goto error;
hint_track++;
+ hint_stream_index++;
}
}
}
@@ -3760,9 +3903,10 @@ static int mov_write_header(AVFormatContext *s)
t = av_dict_get(st->metadata, "timecode", NULL, 0);
if (!t)
continue;
- if (mov_create_timecode_track(s, tmcd_track, i, t->value) < 0)
+ if (mov_create_timecode_track(s, tmcd_track, tmcd_stream_index, i, t->value) < 0)
goto error;
tmcd_track++;
+ tmcd_stream_index++;
}
}
}
@@ -3881,6 +4025,7 @@ static int mov_write_trailer(AVFormatContext *s)
{
MOVMuxContext *mov = s->priv_data;
AVIOContext *pb = s->pb;
+ AVPacketList *covers = mov->covers;
int64_t moov_pos;
int res = 0;
int i;
@@ -3971,6 +4116,13 @@ static int mov_write_trailer(AVFormatContext *s)
av_freep(&mov->tracks);
+ while (covers) {
+ AVPacketList *next = covers->next;
+ av_free_packet(&covers->pkt);
+ av_freep(&covers);
+ covers = next;
+ }
+
return res;
}
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index a6571d5..56709fd 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -141,6 +141,8 @@ typedef struct MOVTrack {
int packet_entry;
int slices;
} vc1_info;
+
+ int stream_index;
} MOVTrack;
typedef struct MOVMuxContext {
@@ -172,6 +174,11 @@ typedef struct MOVMuxContext {
int use_editlist;
int video_track_timescale;
+
+ AVPacketList *covers;
+ int nb_input_streams; ///< number of input stream used
+ int next_stream_index; ///< next stream index available
+ int chapter_stream_index; ///< chapter stream index
} MOVMuxContext;
#define FF_MOV_FLAG_RTP_HINT 1
@@ -185,10 +192,11 @@ typedef struct MOVMuxContext {
int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt);
-int ff_mov_init_hinting(AVFormatContext *s, int index, int src_index);
+int ff_mov_init_hinting(AVFormatContext *s, int index, int stream_index, int src_index);
int ff_mov_add_hinted_packet(AVFormatContext *s, AVPacket *pkt,
int track_index, int sample,
uint8_t *sample_data, int sample_size);
void ff_mov_close_hinting(MOVTrack *track);
+int ff_mov_get_track_index(MOVMuxContext *mov, int stream_index);
#endif /* AVFORMAT_MOVENC_H */
diff --git a/libavformat/movenchint.c b/libavformat/movenchint.c
index 943680e..bb5fd38 100644
--- a/libavformat/movenchint.c
+++ b/libavformat/movenchint.c
@@ -20,22 +20,28 @@
*/
#include "movenc.h"
+#include "libavutil/avassert.h"
#include "libavutil/intreadwrite.h"
#include "internal.h"
#include "rtpenc_chain.h"
#include "avio_internal.h"
#include "rtp.h"
-int ff_mov_init_hinting(AVFormatContext *s, int index, int src_index)
+int ff_mov_init_hinting(AVFormatContext *s, int index, int stream_index, int src_index)
{
MOVMuxContext *mov = s->priv_data;
MOVTrack *track = &mov->tracks[index];
- MOVTrack *src_track = &mov->tracks[src_index];
AVStream *src_st = s->streams[src_index];
+ int src_track_index = ff_mov_get_track_index(mov, src_index);
+ MOVTrack *src_track;
int ret = AVERROR(ENOMEM);
+ av_assert0(src_track_index >= 0);
+ src_track = &mov->tracks[src_track_index];
+
track->tag = MKTAG('r','t','p',' ');
- track->src_track = src_index;
+ track->src_track = src_track_index;
+ track->stream_index = stream_index;
track->enc = avcodec_alloc_context3(NULL);
if (!track->enc)
@@ -445,7 +451,7 @@ int ff_mov_add_hinted_packet(AVFormatContext *s, AVPacket *pkt,
hint_pkt.size = size = avio_close_dyn_buf(hintbuf, &buf);
hint_pkt.data = buf;
hint_pkt.pts = hint_pkt.dts;
- hint_pkt.stream_index = track_index;
+ hint_pkt.stream_index = trk->stream_index;
if (pkt->flags & AV_PKT_FLAG_KEY)
hint_pkt.flags |= AV_PKT_FLAG_KEY;
if (count > 0)
--
1.7.12.4 (Apple Git-37)
More information about the ffmpeg-devel
mailing list