FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
id3v2.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Fabrice Bellard
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * ID3v2 header parser
24  *
25  * Specifications available at:
26  * http://id3.org/Developer_Information
27  */
28 
29 #include "config.h"
30 
31 #if CONFIG_ZLIB
32 #include <zlib.h>
33 #endif
34 
35 #include "libavutil/avstring.h"
36 #include "libavutil/dict.h"
37 #include "libavutil/intreadwrite.h"
38 #include "avio_internal.h"
39 #include "internal.h"
40 #include "id3v1.h"
41 #include "id3v2.h"
42 
44  { "TALB", "album" },
45  { "TCOM", "composer" },
46  { "TCON", "genre" },
47  { "TCOP", "copyright" },
48  { "TENC", "encoded_by" },
49  { "TIT2", "title" },
50  { "TLAN", "language" },
51  { "TPE1", "artist" },
52  { "TPE2", "album_artist" },
53  { "TPE3", "performer" },
54  { "TPOS", "disc" },
55  { "TPUB", "publisher" },
56  { "TRCK", "track" },
57  { "TSSE", "encoder" },
58  { 0 }
59 };
60 
62  { "TCMP", "compilation" },
63  { "TDRC", "date" },
64  { "TDRL", "date" },
65  { "TDEN", "creation_time" },
66  { "TSOA", "album-sort" },
67  { "TSOP", "artist-sort" },
68  { "TSOT", "title-sort" },
69  { 0 }
70 };
71 
73  { "TAL", "album" },
74  { "TCO", "genre" },
75  { "TCP", "compilation" },
76  { "TT2", "title" },
77  { "TEN", "encoded_by" },
78  { "TP1", "artist" },
79  { "TP2", "album_artist" },
80  { "TP3", "performer" },
81  { "TRK", "track" },
82  { 0 }
83 };
84 
85 const char ff_id3v2_tags[][4] = {
86  "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDLY", "TENC", "TEXT",
87  "TFLT", "TIT1", "TIT2", "TIT3", "TKEY", "TLAN", "TLEN", "TMED",
88  "TOAL", "TOFN", "TOLY", "TOPE", "TOWN", "TPE1", "TPE2", "TPE3",
89  "TPE4", "TPOS", "TPUB", "TRCK", "TRSN", "TRSO", "TSRC", "TSSE",
90  { 0 },
91 };
92 
93 const char ff_id3v2_4_tags[][4] = {
94  "TDEN", "TDOR", "TDRC", "TDRL", "TDTG", "TIPL", "TMCL", "TMOO",
95  "TPRO", "TSOA", "TSOP", "TSOT", "TSST",
96  { 0 },
97 };
98 
99 const char ff_id3v2_3_tags[][4] = {
100  "TDAT", "TIME", "TORY", "TRDA", "TSIZ", "TYER",
101  { 0 },
102 };
103 
104 const char *ff_id3v2_picture_types[21] = {
105  "Other",
106  "32x32 pixels 'file icon'",
107  "Other file icon",
108  "Cover (front)",
109  "Cover (back)",
110  "Leaflet page",
111  "Media (e.g. label side of CD)",
112  "Lead artist/lead performer/soloist",
113  "Artist/performer",
114  "Conductor",
115  "Band/Orchestra",
116  "Composer",
117  "Lyricist/text writer",
118  "Recording Location",
119  "During recording",
120  "During performance",
121  "Movie/video screen capture",
122  "A bright coloured fish",
123  "Illustration",
124  "Band/artist logotype",
125  "Publisher/Studio logotype",
126 };
127 
129  { "image/gif", AV_CODEC_ID_GIF },
130  { "image/jpeg", AV_CODEC_ID_MJPEG },
131  { "image/jpg", AV_CODEC_ID_MJPEG },
132  { "image/png", AV_CODEC_ID_PNG },
133  { "image/tiff", AV_CODEC_ID_TIFF },
134  { "image/bmp", AV_CODEC_ID_BMP },
135  { "JPG", AV_CODEC_ID_MJPEG }, /* ID3v2.2 */
136  { "PNG", AV_CODEC_ID_PNG }, /* ID3v2.2 */
137  { "", AV_CODEC_ID_NONE },
138 };
139 
140 int ff_id3v2_match(const uint8_t *buf, const char *magic)
141 {
142  return buf[0] == magic[0] &&
143  buf[1] == magic[1] &&
144  buf[2] == magic[2] &&
145  buf[3] != 0xff &&
146  buf[4] != 0xff &&
147  (buf[6] & 0x80) == 0 &&
148  (buf[7] & 0x80) == 0 &&
149  (buf[8] & 0x80) == 0 &&
150  (buf[9] & 0x80) == 0;
151 }
152 
154 {
155  int len = ((buf[6] & 0x7f) << 21) +
156  ((buf[7] & 0x7f) << 14) +
157  ((buf[8] & 0x7f) << 7) +
158  (buf[9] & 0x7f) +
160  if (buf[5] & 0x10)
161  len += ID3v2_HEADER_SIZE;
162  return len;
163 }
164 
165 static unsigned int get_size(AVIOContext *s, int len)
166 {
167  int v = 0;
168  while (len--)
169  v = (v << 7) + (avio_r8(s) & 0x7F);
170  return v;
171 }
172 
173 static unsigned int size_to_syncsafe(unsigned int size)
174 {
175  return (((size) & (0x7f << 0)) >> 0) +
176  (((size) & (0x7f << 8)) >> 1) +
177  (((size) & (0x7f << 16)) >> 2) +
178  (((size) & (0x7f << 24)) >> 3);
179 }
180 
181 /* No real verification, only check that the tag consists of
182  * a combination of capital alpha-numerical characters */
183 static int is_tag(const char *buf, unsigned int len)
184 {
185  if (!len)
186  return 0;
187 
188  while (len--)
189  if ((buf[len] < 'A' ||
190  buf[len] > 'Z') &&
191  (buf[len] < '0' ||
192  buf[len] > '9'))
193  return 0;
194 
195  return 1;
196 }
197 
198 /**
199  * Return 1 if the tag of length len at the given offset is valid, 0 if not, -1 on error
200  */
201 static int check_tag(AVIOContext *s, int offset, unsigned int len)
202 {
203  char tag[4];
204 
205  if (len > 4 ||
206  avio_seek(s, offset, SEEK_SET) < 0 ||
207  avio_read(s, tag, len) < len)
208  return -1;
209  else if (!AV_RB32(tag) || is_tag(tag, len))
210  return 1;
211 
212  return 0;
213 }
214 
215 /**
216  * Free GEOB type extra metadata.
217  */
218 static void free_geobtag(void *obj)
219 {
220  ID3v2ExtraMetaGEOB *geob = obj;
221  av_free(geob->mime_type);
222  av_free(geob->file_name);
223  av_free(geob->description);
224  av_free(geob->data);
225  av_free(geob);
226 }
227 
228 /**
229  * Decode characters to UTF-8 according to encoding type. The decoded buffer is
230  * always null terminated. Stop reading when either *maxread bytes are read from
231  * pb or U+0000 character is found.
232  *
233  * @param dst Pointer where the address of the buffer with the decoded bytes is
234  * stored. Buffer must be freed by caller.
235  * @param maxread Pointer to maximum number of characters to read from the
236  * AVIOContext. After execution the value is decremented by the number of bytes
237  * actually read.
238  * @returns 0 if no error occurred, dst is uninitialized on error
239  */
240 static int decode_str(AVFormatContext *s, AVIOContext *pb, int encoding,
241  uint8_t **dst, int *maxread)
242 {
243  int ret;
244  uint8_t tmp;
245  uint32_t ch = 1;
246  int left = *maxread;
247  unsigned int (*get)(AVIOContext*) = avio_rb16;
248  AVIOContext *dynbuf;
249 
250  if ((ret = avio_open_dyn_buf(&dynbuf)) < 0) {
251  av_log(s, AV_LOG_ERROR, "Error opening memory stream\n");
252  return ret;
253  }
254 
255  switch (encoding) {
257  while (left && ch) {
258  ch = avio_r8(pb);
259  PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
260  left--;
261  }
262  break;
263 
265  if ((left -= 2) < 0) {
266  av_log(s, AV_LOG_ERROR, "Cannot read BOM value, input too short\n");
267  avio_close_dyn_buf(dynbuf, dst);
268  av_freep(dst);
269  return AVERROR_INVALIDDATA;
270  }
271  switch (avio_rb16(pb)) {
272  case 0xfffe:
273  get = avio_rl16;
274  case 0xfeff:
275  break;
276  default:
277  av_log(s, AV_LOG_ERROR, "Incorrect BOM value\n");
278  avio_close_dyn_buf(dynbuf, dst);
279  av_freep(dst);
280  *maxread = left;
281  return AVERROR_INVALIDDATA;
282  }
283  // fall-through
284 
286  while ((left > 1) && ch) {
287  GET_UTF16(ch, ((left -= 2) >= 0 ? get(pb) : 0), break;)
288  PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
289  }
290  if (left < 0)
291  left += 2; /* did not read last char from pb */
292  break;
293 
294  case ID3v2_ENCODING_UTF8:
295  while (left && ch) {
296  ch = avio_r8(pb);
297  avio_w8(dynbuf, ch);
298  left--;
299  }
300  break;
301  default:
302  av_log(s, AV_LOG_WARNING, "Unknown encoding\n");
303  }
304 
305  if (ch)
306  avio_w8(dynbuf, 0);
307 
308  avio_close_dyn_buf(dynbuf, dst);
309  *maxread = left;
310 
311  return 0;
312 }
313 
314 /**
315  * Parse a text tag.
316  */
317 static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen,
318  AVDictionary **metadata, const char *key)
319 {
320  uint8_t *dst;
321  int encoding, dict_flags = AV_DICT_DONT_OVERWRITE | AV_DICT_DONT_STRDUP_VAL;
322  unsigned genre;
323 
324  if (taglen < 1)
325  return;
326 
327  encoding = avio_r8(pb);
328  taglen--; /* account for encoding type byte */
329 
330  if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
331  av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
332  return;
333  }
334 
335  if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) &&
336  (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) &&
337  genre <= ID3v1_GENRE_MAX) {
338  av_freep(&dst);
339  dst = av_strdup(ff_id3v1_genre_str[genre]);
340  } else if (!(strcmp(key, "TXXX") && strcmp(key, "TXX"))) {
341  /* dst now contains the key, need to get value */
342  key = dst;
343  if (decode_str(s, pb, encoding, &dst, &taglen) < 0) {
344  av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", key);
345  av_freep(&key);
346  return;
347  }
348  dict_flags |= AV_DICT_DONT_STRDUP_KEY;
349  } else if (!*dst)
350  av_freep(&dst);
351 
352  if (dst)
353  av_dict_set(metadata, key, dst, dict_flags);
354 }
355 
356 /**
357  * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct.
358  */
359 static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen,
360  char *tag, ID3v2ExtraMeta **extra_meta, int isv34)
361 {
362  ID3v2ExtraMetaGEOB *geob_data = NULL;
363  ID3v2ExtraMeta *new_extra = NULL;
364  char encoding;
365  unsigned int len;
366 
367  if (taglen < 1)
368  return;
369 
370  geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB));
371  if (!geob_data) {
372  av_log(s, AV_LOG_ERROR, "Failed to alloc %"SIZE_SPECIFIER" bytes\n",
373  sizeof(ID3v2ExtraMetaGEOB));
374  return;
375  }
376 
377  new_extra = av_mallocz(sizeof(ID3v2ExtraMeta));
378  if (!new_extra) {
379  av_log(s, AV_LOG_ERROR, "Failed to alloc %"SIZE_SPECIFIER" bytes\n",
380  sizeof(ID3v2ExtraMeta));
381  goto fail;
382  }
383 
384  /* read encoding type byte */
385  encoding = avio_r8(pb);
386  taglen--;
387 
388  /* read MIME type (always ISO-8859) */
389  if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &geob_data->mime_type,
390  &taglen) < 0 ||
391  taglen <= 0)
392  goto fail;
393 
394  /* read file name */
395  if (decode_str(s, pb, encoding, &geob_data->file_name, &taglen) < 0 ||
396  taglen <= 0)
397  goto fail;
398 
399  /* read content description */
400  if (decode_str(s, pb, encoding, &geob_data->description, &taglen) < 0 ||
401  taglen < 0)
402  goto fail;
403 
404  if (taglen) {
405  /* save encapsulated binary data */
406  geob_data->data = av_malloc(taglen);
407  if (!geob_data->data) {
408  av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen);
409  goto fail;
410  }
411  if ((len = avio_read(pb, geob_data->data, taglen)) < taglen)
413  "Error reading GEOB frame, data truncated.\n");
414  geob_data->datasize = len;
415  } else {
416  geob_data->data = NULL;
417  geob_data->datasize = 0;
418  }
419 
420  /* add data to the list */
421  new_extra->tag = "GEOB";
422  new_extra->data = geob_data;
423  new_extra->next = *extra_meta;
424  *extra_meta = new_extra;
425 
426  return;
427 
428 fail:
429  av_log(s, AV_LOG_ERROR, "Error reading frame %s, skipped\n", tag);
430  free_geobtag(geob_data);
431  av_free(new_extra);
432  return;
433 }
434 
435 static int is_number(const char *str)
436 {
437  while (*str >= '0' && *str <= '9')
438  str++;
439  return !*str;
440 }
441 
443 {
445  if ((t = av_dict_get(m, tag, NULL, AV_DICT_MATCH_CASE)) &&
446  strlen(t->value) == 4 && is_number(t->value))
447  return t;
448  return NULL;
449 }
450 
451 static void merge_date(AVDictionary **m)
452 {
454  char date[17] = { 0 }; // YYYY-MM-DD hh:mm
455 
456  if (!(t = get_date_tag(*m, "TYER")) &&
457  !(t = get_date_tag(*m, "TYE")))
458  return;
459  av_strlcpy(date, t->value, 5);
460  av_dict_set(m, "TYER", NULL, 0);
461  av_dict_set(m, "TYE", NULL, 0);
462 
463  if (!(t = get_date_tag(*m, "TDAT")) &&
464  !(t = get_date_tag(*m, "TDA")))
465  goto finish;
466  snprintf(date + 4, sizeof(date) - 4, "-%.2s-%.2s", t->value + 2, t->value);
467  av_dict_set(m, "TDAT", NULL, 0);
468  av_dict_set(m, "TDA", NULL, 0);
469 
470  if (!(t = get_date_tag(*m, "TIME")) &&
471  !(t = get_date_tag(*m, "TIM")))
472  goto finish;
473  snprintf(date + 10, sizeof(date) - 10,
474  " %.2s:%.2s", t->value, t->value + 2);
475  av_dict_set(m, "TIME", NULL, 0);
476  av_dict_set(m, "TIM", NULL, 0);
477 
478 finish:
479  if (date[0])
480  av_dict_set(m, "date", date, 0);
481 }
482 
483 static void free_apic(void *obj)
484 {
485  ID3v2ExtraMetaAPIC *apic = obj;
486  av_buffer_unref(&apic->buf);
487  av_freep(&apic->description);
488  av_freep(&apic);
489 }
490 
491 static void read_apic(AVFormatContext *s, AVIOContext *pb, int taglen,
492  char *tag, ID3v2ExtraMeta **extra_meta, int isv34)
493 {
494  int enc, pic_type;
495  char mimetype[64];
496  const CodecMime *mime = ff_id3v2_mime_tags;
497  enum AVCodecID id = AV_CODEC_ID_NONE;
498  ID3v2ExtraMetaAPIC *apic = NULL;
499  ID3v2ExtraMeta *new_extra = NULL;
500  int64_t end = avio_tell(pb) + taglen;
501 
502  if (taglen <= 4)
503  goto fail;
504 
505  new_extra = av_mallocz(sizeof(*new_extra));
506  apic = av_mallocz(sizeof(*apic));
507  if (!new_extra || !apic)
508  goto fail;
509 
510  enc = avio_r8(pb);
511  taglen--;
512 
513  /* mimetype */
514  if (isv34) {
515  taglen -= avio_get_str(pb, taglen, mimetype, sizeof(mimetype));
516  } else {
517  avio_read(pb, mimetype, 3);
518  mimetype[3] = 0;
519  }
520  while (mime->id != AV_CODEC_ID_NONE) {
521  if (!av_strncasecmp(mime->str, mimetype, sizeof(mimetype))) {
522  id = mime->id;
523  break;
524  }
525  mime++;
526  }
527  if (id == AV_CODEC_ID_NONE) {
529  "Unknown attached picture mimetype: %s, skipping.\n", mimetype);
530  goto fail;
531  }
532  apic->id = id;
533 
534  /* picture type */
535  pic_type = avio_r8(pb);
536  taglen--;
537  if (pic_type < 0 || pic_type >= FF_ARRAY_ELEMS(ff_id3v2_picture_types)) {
538  av_log(s, AV_LOG_WARNING, "Unknown attached picture type %d.\n",
539  pic_type);
540  pic_type = 0;
541  }
542  apic->type = ff_id3v2_picture_types[pic_type];
543 
544  /* description and picture data */
545  if (decode_str(s, pb, enc, &apic->description, &taglen) < 0) {
546  av_log(s, AV_LOG_ERROR,
547  "Error decoding attached picture description.\n");
548  goto fail;
549  }
550 
552  if (!apic->buf || !taglen || avio_read(pb, apic->buf->data, taglen) != taglen)
553  goto fail;
554  memset(apic->buf->data + taglen, 0, FF_INPUT_BUFFER_PADDING_SIZE);
555 
556  new_extra->tag = "APIC";
557  new_extra->data = apic;
558  new_extra->next = *extra_meta;
559  *extra_meta = new_extra;
560 
561  return;
562 
563 fail:
564  if (apic)
565  free_apic(apic);
566  av_freep(&new_extra);
567  avio_seek(pb, end, SEEK_SET);
568 }
569 
570 static void read_chapter(AVFormatContext *s, AVIOContext *pb, int len, char *ttag, ID3v2ExtraMeta **extra_meta, int isv34)
571 {
572  AVRational time_base = {1, 1000};
573  uint32_t start, end;
574  AVChapter *chapter;
575  uint8_t *dst = NULL;
576  int taglen;
577  char tag[5];
578 
579  if (!s) {
580  /* We should probably just put the chapter data to extra_meta here
581  * and do the AVFormatContext-needing part in a separate
582  * ff_id3v2_parse_apic()-like function. */
583  av_log(NULL, AV_LOG_DEBUG, "No AVFormatContext, skipped ID3 chapter data\n");
584  return;
585  }
586 
587  if (decode_str(s, pb, 0, &dst, &len) < 0)
588  return;
589  if (len < 16)
590  return;
591 
592  start = avio_rb32(pb);
593  end = avio_rb32(pb);
594  avio_skip(pb, 8);
595 
596  chapter = avpriv_new_chapter(s, s->nb_chapters + 1, time_base, start, end, dst);
597  if (!chapter) {
598  av_free(dst);
599  return;
600  }
601 
602  len -= 16;
603  while (len > 10) {
604  if (avio_read(pb, tag, 4) < 4)
605  goto end;
606  tag[4] = 0;
607  taglen = avio_rb32(pb);
608  avio_skip(pb, 2);
609  len -= 10;
610  if (taglen < 0 || taglen > len)
611  goto end;
612  if (tag[0] == 'T')
613  read_ttag(s, pb, taglen, &chapter->metadata, tag);
614  else
615  avio_skip(pb, taglen);
616  len -= taglen;
617  }
618 
619  ff_metadata_conv(&chapter->metadata, NULL, ff_id3v2_34_metadata_conv);
620  ff_metadata_conv(&chapter->metadata, NULL, ff_id3v2_4_metadata_conv);
621 end:
622  av_free(dst);
623 }
624 
625 static void free_priv(void *obj)
626 {
627  ID3v2ExtraMetaPRIV *priv = obj;
628  av_freep(&priv->owner);
629  av_freep(&priv->data);
630  av_freep(&priv);
631 }
632 
633 static void read_priv(AVFormatContext *s, AVIOContext *pb, int taglen,
634  char *tag, ID3v2ExtraMeta **extra_meta, int isv34)
635 {
636  ID3v2ExtraMeta *meta;
637  ID3v2ExtraMetaPRIV *priv;
638 
639  meta = av_mallocz(sizeof(*meta));
640  priv = av_mallocz(sizeof(*priv));
641 
642  if (!meta || !priv)
643  goto fail;
644 
645  if (decode_str(s, pb, ID3v2_ENCODING_ISO8859, &priv->owner, &taglen) < 0)
646  goto fail;
647 
648  priv->data = av_malloc(taglen);
649  if (!priv->data)
650  goto fail;
651 
652  priv->datasize = taglen;
653 
654  if (avio_read(pb, priv->data, priv->datasize) != priv->datasize)
655  goto fail;
656 
657  meta->tag = "PRIV";
658  meta->data = priv;
659  meta->next = *extra_meta;
660  *extra_meta = meta;
661 
662  return;
663 
664 fail:
665  if (priv)
666  free_priv(priv);
667  av_freep(&meta);
668 }
669 
670 typedef struct ID3v2EMFunc {
671  const char *tag3;
672  const char *tag4;
673  void (*read)(AVFormatContext *, AVIOContext *, int, char *,
674  ID3v2ExtraMeta **, int isv34);
675  void (*free)(void *obj);
676 } ID3v2EMFunc;
677 
679  { "GEO", "GEOB", read_geobtag, free_geobtag },
680  { "PIC", "APIC", read_apic, free_apic },
681  { "CHAP","CHAP", read_chapter, NULL },
682  { "PRIV","PRIV", read_priv, free_priv },
683  { NULL }
684 };
685 
686 /**
687  * Get the corresponding ID3v2EMFunc struct for a tag.
688  * @param isv34 Determines if v2.2 or v2.3/4 strings are used
689  * @return A pointer to the ID3v2EMFunc struct if found, NULL otherwise.
690  */
691 static const ID3v2EMFunc *get_extra_meta_func(const char *tag, int isv34)
692 {
693  int i = 0;
694  while (id3v2_extra_meta_funcs[i].tag3) {
695  if (tag && !memcmp(tag,
696  (isv34 ? id3v2_extra_meta_funcs[i].tag4 :
697  id3v2_extra_meta_funcs[i].tag3),
698  (isv34 ? 4 : 3)))
699  return &id3v2_extra_meta_funcs[i];
700  i++;
701  }
702  return NULL;
703 }
704 
705 static void id3v2_parse(AVIOContext *pb, AVDictionary **metadata,
707  uint8_t flags, ID3v2ExtraMeta **extra_meta)
708 {
709  int isv34, unsync;
710  unsigned tlen;
711  char tag[5];
712  int64_t next, end = avio_tell(pb) + len;
713  int taghdrlen;
714  const char *reason = NULL;
715  AVIOContext pb_local;
716  AVIOContext *pbx;
717  unsigned char *buffer = NULL;
718  int buffer_size = 0;
719  const ID3v2EMFunc *extra_func = NULL;
720  unsigned char *uncompressed_buffer = NULL;
721  av_unused int uncompressed_buffer_size = 0;
722 
723  av_log(s, AV_LOG_DEBUG, "id3v2 ver:%d flags:%02X len:%d\n", version, flags, len);
724 
725  switch (version) {
726  case 2:
727  if (flags & 0x40) {
728  reason = "compression";
729  goto error;
730  }
731  isv34 = 0;
732  taghdrlen = 6;
733  break;
734 
735  case 3:
736  case 4:
737  isv34 = 1;
738  taghdrlen = 10;
739  break;
740 
741  default:
742  reason = "version";
743  goto error;
744  }
745 
746  unsync = flags & 0x80;
747 
748  if (isv34 && flags & 0x40) { /* Extended header present, just skip over it */
749  int extlen = get_size(pb, 4);
750  if (version == 4)
751  /* In v2.4 the length includes the length field we just read. */
752  extlen -= 4;
753 
754  if (extlen < 0) {
755  reason = "invalid extended header length";
756  goto error;
757  }
758  avio_skip(pb, extlen);
759  len -= extlen + 4;
760  if (len < 0) {
761  reason = "extended header too long.";
762  goto error;
763  }
764  }
765 
766  while (len >= taghdrlen) {
767  unsigned int tflags = 0;
768  int tunsync = 0;
769  int tcomp = 0;
770  int tencr = 0;
771  unsigned long av_unused dlen;
772 
773  if (isv34) {
774  if (avio_read(pb, tag, 4) < 4)
775  break;
776  tag[4] = 0;
777  if (version == 3) {
778  tlen = avio_rb32(pb);
779  } else {
780  /* some encoders incorrectly uses v3 sizes instead of syncsafe ones
781  * so check the next tag to see which one to use */
782  tlen = avio_rb32(pb);
783  if (tlen > 0x7f) {
784  if (tlen < len) {
785  int64_t cur = avio_tell(pb);
786 
787  if (ffio_ensure_seekback(pb, 2 /* tflags */ + tlen + 4 /* next tag */))
788  break;
789 
790  if (check_tag(pb, cur + 2 + size_to_syncsafe(tlen), 4) == 1)
791  tlen = size_to_syncsafe(tlen);
792  else if (check_tag(pb, cur + 2 + tlen, 4) != 1)
793  break;
794  avio_seek(pb, cur, SEEK_SET);
795  } else
796  tlen = size_to_syncsafe(tlen);
797  }
798  }
799  tflags = avio_rb16(pb);
800  tunsync = tflags & ID3v2_FLAG_UNSYNCH;
801  } else {
802  if (avio_read(pb, tag, 3) < 3)
803  break;
804  tag[3] = 0;
805  tlen = avio_rb24(pb);
806  }
807  if (tlen > (1<<28))
808  break;
809  len -= taghdrlen + tlen;
810 
811  if (len < 0)
812  break;
813 
814  next = avio_tell(pb) + tlen;
815 
816  if (!tlen) {
817  if (tag[0])
818  av_log(s, AV_LOG_DEBUG, "Invalid empty frame %s, skipping.\n",
819  tag);
820  continue;
821  }
822 
823  if (tflags & ID3v2_FLAG_DATALEN) {
824  if (tlen < 4)
825  break;
826  dlen = avio_rb32(pb);
827  tlen -= 4;
828  } else
829  dlen = tlen;
830 
831  tcomp = tflags & ID3v2_FLAG_COMPRESSION;
832  tencr = tflags & ID3v2_FLAG_ENCRYPTION;
833 
834  /* skip encrypted tags and, if no zlib, compressed tags */
835  if (tencr || (!CONFIG_ZLIB && tcomp)) {
836  const char *type;
837  if (!tcomp)
838  type = "encrypted";
839  else if (!tencr)
840  type = "compressed";
841  else
842  type = "encrypted and compressed";
843 
844  av_log(s, AV_LOG_WARNING, "Skipping %s ID3v2 frame %s.\n", type, tag);
845  avio_skip(pb, tlen);
846  /* check for text tag or supported special meta tag */
847  } else if (tag[0] == 'T' ||
848  (extra_meta &&
849  (extra_func = get_extra_meta_func(tag, isv34)))) {
850  pbx = pb;
851 
852  if (unsync || tunsync || tcomp) {
853  av_fast_malloc(&buffer, &buffer_size, tlen);
854  if (!buffer) {
855  av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", tlen);
856  goto seek;
857  }
858  }
859  if (unsync || tunsync) {
860  int64_t end = avio_tell(pb) + tlen;
861  uint8_t *b;
862 
863  b = buffer;
864  while (avio_tell(pb) < end && b - buffer < tlen && !pb->eof_reached) {
865  *b++ = avio_r8(pb);
866  if (*(b - 1) == 0xff && avio_tell(pb) < end - 1 &&
867  b - buffer < tlen &&
868  !pb->eof_reached ) {
869  uint8_t val = avio_r8(pb);
870  *b++ = val ? val : avio_r8(pb);
871  }
872  }
873  ffio_init_context(&pb_local, buffer, b - buffer, 0, NULL, NULL, NULL,
874  NULL);
875  tlen = b - buffer;
876  pbx = &pb_local; // read from sync buffer
877  }
878 
879 #if CONFIG_ZLIB
880  if (tcomp) {
881  int err;
882 
883  av_log(s, AV_LOG_DEBUG, "Compresssed frame %s tlen=%d dlen=%ld\n", tag, tlen, dlen);
884 
885  av_fast_malloc(&uncompressed_buffer, &uncompressed_buffer_size, dlen);
886  if (!uncompressed_buffer) {
887  av_log(s, AV_LOG_ERROR, "Failed to alloc %ld bytes\n", dlen);
888  goto seek;
889  }
890 
891  if (!(unsync || tunsync)) {
892  err = avio_read(pb, buffer, tlen);
893  if (err < 0) {
894  av_log(s, AV_LOG_ERROR, "Failed to read compressed tag\n");
895  goto seek;
896  }
897  tlen = err;
898  }
899 
900  err = uncompress(uncompressed_buffer, &dlen, buffer, tlen);
901  if (err != Z_OK) {
902  av_log(s, AV_LOG_ERROR, "Failed to uncompress tag: %d\n", err);
903  goto seek;
904  }
905  ffio_init_context(&pb_local, uncompressed_buffer, dlen, 0, NULL, NULL, NULL, NULL);
906  tlen = dlen;
907  pbx = &pb_local; // read from sync buffer
908  }
909 #endif
910  if (tag[0] == 'T')
911  /* parse text tag */
912  read_ttag(s, pbx, tlen, metadata, tag);
913  else
914  /* parse special meta tag */
915  extra_func->read(s, pbx, tlen, tag, extra_meta, isv34);
916  } else if (!tag[0]) {
917  if (tag[1])
918  av_log(s, AV_LOG_WARNING, "invalid frame id, assuming padding\n");
919  avio_skip(pb, tlen);
920  break;
921  }
922  /* Skip to end of tag */
923 seek:
924  avio_seek(pb, next, SEEK_SET);
925  }
926 
927  /* Footer preset, always 10 bytes, skip over it */
928  if (version == 4 && flags & 0x10)
929  end += 10;
930 
931 error:
932  if (reason)
933  av_log(s, AV_LOG_INFO, "ID3v2.%d tag skipped, cannot handle %s\n",
934  version, reason);
935  avio_seek(pb, end, SEEK_SET);
936  av_free(buffer);
937  av_free(uncompressed_buffer);
938  return;
939 }
940 
941 static void id3v2_read_internal(AVIOContext *pb, AVDictionary **metadata,
942  AVFormatContext *s, const char *magic,
943  ID3v2ExtraMeta **extra_meta, int64_t max_search_size)
944 {
945  int len, ret;
947  int found_header;
948  int64_t start, off;
949 
950  if (max_search_size && max_search_size < ID3v2_HEADER_SIZE)
951  return;
952 
953  start = avio_tell(pb);
954  do {
955  /* save the current offset in case there's nothing to read/skip */
956  off = avio_tell(pb);
957  if (max_search_size && off - start >= max_search_size - ID3v2_HEADER_SIZE) {
958  avio_seek(pb, off, SEEK_SET);
959  break;
960  }
961 
962  ret = avio_read(pb, buf, ID3v2_HEADER_SIZE);
963  if (ret != ID3v2_HEADER_SIZE) {
964  avio_seek(pb, off, SEEK_SET);
965  break;
966  }
967  found_header = ff_id3v2_match(buf, magic);
968  if (found_header) {
969  /* parse ID3v2 header */
970  len = ((buf[6] & 0x7f) << 21) |
971  ((buf[7] & 0x7f) << 14) |
972  ((buf[8] & 0x7f) << 7) |
973  (buf[9] & 0x7f);
974  id3v2_parse(pb, metadata, s, len, buf[3], buf[5], extra_meta);
975  } else {
976  avio_seek(pb, off, SEEK_SET);
977  }
978  } while (found_header);
979  ff_metadata_conv(metadata, NULL, ff_id3v2_34_metadata_conv);
980  ff_metadata_conv(metadata, NULL, id3v2_2_metadata_conv);
981  ff_metadata_conv(metadata, NULL, ff_id3v2_4_metadata_conv);
982  merge_date(metadata);
983 }
984 
986  const char *magic, ID3v2ExtraMeta **extra_meta)
987 {
988  id3v2_read_internal(pb, metadata, NULL, magic, extra_meta, 0);
989 }
990 
991 void ff_id3v2_read(AVFormatContext *s, const char *magic,
992  ID3v2ExtraMeta **extra_meta, unsigned int max_search_size)
993 {
994  id3v2_read_internal(s->pb, &s->metadata, s, magic, extra_meta, max_search_size);
995 }
996 
998 {
999  ID3v2ExtraMeta *current = *extra_meta, *next;
1000  const ID3v2EMFunc *extra_func;
1001 
1002  while (current) {
1003  if ((extra_func = get_extra_meta_func(current->tag, 1)))
1004  extra_func->free(current->data);
1005  next = current->next;
1006  av_freep(&current);
1007  current = next;
1008  }
1009 
1010  *extra_meta = NULL;
1011 }
1012 
1014 {
1015  ID3v2ExtraMeta *cur;
1016 
1017  for (cur = *extra_meta; cur; cur = cur->next) {
1018  ID3v2ExtraMetaAPIC *apic;
1019  AVStream *st;
1020 
1021  if (strcmp(cur->tag, "APIC"))
1022  continue;
1023  apic = cur->data;
1024 
1025  if (!(st = avformat_new_stream(s, NULL)))
1026  return AVERROR(ENOMEM);
1027 
1030  st->codec->codec_id = apic->id;
1031  av_dict_set(&st->metadata, "title", apic->description, 0);
1032  av_dict_set(&st->metadata, "comment", apic->type, 0);
1033 
1035  st->attached_pic.buf = apic->buf;
1036  st->attached_pic.data = apic->buf->data;
1038  st->attached_pic.stream_index = st->index;
1040 
1041  apic->buf = NULL;
1042  }
1043 
1044  return 0;
1045 }