[Ffmpeg-devel] ID3v2

Michael Niedermayer michaelni
Mon Sep 25 13:28:47 CEST 2006


Hi

On Sun, Sep 24, 2006 at 01:21:40PM +0200, Andreas ?man wrote:
> Hi
> 
> I've written a tentative patch that adds ID3v2.[234] reading
> and ID3v2.4 writing to the mp3 (de)muxer. I've tested it
> together with xmms and itunes.
> I'd say the primary reasons for using ID3v2 is:
> 
> * It's located at the beginning of the file
>   (obviously a benefit when streaming)
> * It supports strings > 30 chars
> 
> Comments welcome.
> 
> While at it, I renamed some stuff to better reflect what
> ID3-version it is used for.
> Perhaps I should submit that as a separate patch?

yes, that has to be a seperate patch

> 
> 

> Index: libavformat/mp3.c
> ===================================================================
> --- libavformat/mp3.c	(revision 6324)
> +++ libavformat/mp3.c	(working copy)
[...]
> @@ -167,7 +167,250 @@
>              (buf[9] & 0x80) == 0);
>  }
>  
> -static void id3_get_string(char *str, int str_size,
> +static inline unsigned int id3v2_size(const char *buf)
> +{
> +    return ((buf[0] & 0x7f) << 21) | ((buf[1] & 0x7f) << 14) |
> +        ((buf[2] & 0x7f) << 7) | (buf[3] & 0x7f);
> +}

no function which is used only for reading/writing a header
should be inline


> +
> +static void id3v2_read_ttag(AVFormatContext *s, int taglen, char *dst, int dstlen)
> +{
> +    int len;
> +    int64_t v;
> +
> +    taglen--; /* account for encoding type byte */
> +    dstlen--; /* Leave space for zero terminator */
> +
> +    switch(get_byte(&s->pb)) { /* encoding type */
> +
> +    case 0:  /* ISO-8859-1 */
> +        len = FFMIN(taglen, dstlen);
> +        get_buffer(&s->pb, dst, len);
> +        dst[len] = 0;
> +        /* Skip over non-read bytes */
> +        len = FFMAX(0, taglen - dstlen);
> +        if(len)
> +            url_fskip(&s->pb, len);
> +        break;
> +
> +    case 3:  /* UTF-8 */
> +        while(dstlen > 0) {
> +            GET_UTF8(v, (taglen--, get_byte(&s->pb)), break;)
> +            *dst++ = v;
> +            dstlen--;
> +        }
> +        *dst = 0;
> +        /* Skip over non-read bytes */
> +        if(taglen > 0)
> +            url_fskip(&s->pb, taglen);
> +        break;

IMHO strings in ffmpeg should either be ASCII or UTF-8 but never something
random like ISO-8859-1


> +
> +    case 1:  /* UTF-16 we cannot handle */
> +    case 2:  /* UTF-16BE we cannot handle */
> +    default:
> +        url_fskip(&s->pb, taglen);
> +    }

i suggest that a offset_t end= url_ftell() + taglen; and url_fseek() be used
to skip over things instead of having several lines of code in every case
to do that


> +}
> +
> +
> +/*
> + * Parser for ID3v2.2
> + */

change to doxygen compatible comment


> +
> +static void id3v2_2_parse(AVFormatContext *s, int len, uint8_t flags)
> +{
> +    char tmp[16];
> +    uint32_t tag;
> +    int tlen, i;
> +    uint16_t tflags;
> +
> +    if(flags & 0xc0) {
> +        /* We cannot deal with unsynchornization nor compression */

comment -> av_log


> +        url_fskip(&s->pb, len);
> +        return;
> +    }
> +
> +    while(len > 0) {
> +        tag = 0;
> +        for(i = 0; i < 3; i++)
> +            tag = tag << 8 | get_byte(&s->pb);

get_be24();


> +
> +        tmp[0] = 0;
> +        if(get_buffer(&s->pb, tmp + 1, 3) != 3)
> +            return;
> +        tlen = id3v2_size(tmp);

a function which converts 3-4 bytes of a ByteIOContext to the size
seeme like a good idea as this is occuring several times


> +
> +        len -= 6 + tlen;
> +
> +        switch(tag) {
> +        case MKBETAG(0, 'T', 'T', '2'):
> +            id3v2_read_ttag(s, tlen, s->title, sizeof(s->title));
> +            break;
> +        case MKBETAG(0, 'T', 'P', '1'):
> +            id3v2_read_ttag(s, tlen, s->author, sizeof(s->author));
> +            break;
> +        case MKBETAG(0, 'T', 'A', 'L'):
> +            id3v2_read_ttag(s, tlen, s->album, sizeof(s->album));
> +            break;
> +        case MKBETAG(0, 'T', 'C', 'O'):
> +            id3v2_read_ttag(s, tlen, s->genre, sizeof(s->genre));
> +            break;
> +        case MKBETAG(0, 'T', 'C', 'R'):
> +            id3v2_read_ttag(s, tlen, s->copyright, sizeof(s->copyright));
> +            break;
> +        case MKBETAG(0, 'T', 'R', 'K'):
> +            id3v2_read_ttag(s, tlen, tmp, sizeof(tmp));
> +            s->track = atoi(tmp);
> +            break;
> +        default:
> +            url_fskip(&s->pb, tlen);
> +            break;
> +        }
> +    }
> +}
> +
> +
> +/*
> + * Parser for ID3v2.[34]
> + */
> +
> +static void id3v2_34_parse(AVFormatContext *s, int len, uint8_t flags)
> +{
> +    char tmp[16];
> +    uint32_t tag;
> +    int tlen;
> +    uint16_t tflags;
> +
> +    if(flags & 0x80) {
> +        /* We cannot deal with unsynchornization yet, skip entire id3v2 tag */
> +        url_fskip(&s->pb, len);
> +        return;
> +    }
> +
> +    if(flags & 0x40) {
> +        /* Extended header present, just skip over it */
> +        if(get_buffer(&s->pb, tmp, 4) != 4)
> +            return;
> +        url_fskip(&s->pb, id3v2_size(tmp));
> +    }
> +
> +    while(len > 0) {
> +
> +        tag = get_be32(&s->pb);
> +
> +        if(get_buffer(&s->pb, tmp, 4) != 4)
> +            return;
> +        tlen = id3v2_size(tmp);
> +
> +        tflags = get_be16(&s->pb);
> +        len -= 10 + tlen;
> +
> +        switch(tag) {
> +        case MKBETAG('T', 'I', 'T', '2'):
> +            id3v2_read_ttag(s, tlen, s->title, sizeof(s->title));
> +            break;
> +        case MKBETAG('T', 'P', 'E', '1'):
> +            id3v2_read_ttag(s, tlen, s->author, sizeof(s->author));
> +            break;
> +        case MKBETAG('T', 'A', 'L', 'B'):
> +            id3v2_read_ttag(s, tlen, s->album, sizeof(s->album));
> +            break;
> +        case MKBETAG('T', 'C', 'O', 'N'):
> +            id3v2_read_ttag(s, tlen, s->genre, sizeof(s->genre));
> +            break;
> +        case MKBETAG('T', 'C', 'O', 'P'):
> +            id3v2_read_ttag(s, tlen, s->copyright, sizeof(s->copyright));
> +            break;
> +        case MKBETAG('T', 'R', 'C', 'K'):
> +            id3v2_read_ttag(s, tlen, tmp, sizeof(tmp));
> +            s->track = atoi(tmp);
> +            break;
> +        default:
> +            url_fskip(&s->pb, tlen);
> +            break;
> +        }

this switch() should be merged with the id3v2_2 one


> +    }
> +
> +    if(flags & 0x10) {
> +        /* Footer preset, always 10 bytes, skip over it */
> +        url_fskip(&s->pb, 10);
> +    }
> +}
> +
> +
> +static void id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags)
> +{
> +    switch(version) {
> +    case 2:
> +        id3v2_2_parse(s, len, flags);
> +        break;
> +    case 3 ... 4:
> +        id3v2_34_parse(s, len, flags);
> +        break;
> +    }
> +
> +}
> +
> +static void id3v2_put_size(AVFormatContext *s, int size)
> +{
> +    unsigned char buf[4];
> +
> +
> +    buf[0] = size >> 21 & 0x7f;
> +    buf[1] = size >> 14 & 0x7f;
> +    buf[2] = size >> 7 & 0x7f;
> +    buf[3] = size & 0x7f;
> +    put_buffer(&s->pb, buf, 4);

4 put_byte() would be cleaner


> +}
> +
> +static void id3v2_put_ttag(AVFormatContext *s, char *string, uint32_t tag)
> +{
> +    int len = strlen(string);
> +    put_be32(&s->pb, tag);
> +    id3v2_put_size(s, len + 1);
> +    put_be16(&s->pb, 0);
> +    put_byte(&s->pb, 0); /* ISO-8859-1 */

UTF8 should be default


> +    put_buffer(&s->pb, string, len);
> +}
> +
> +static void id3v2_write_tag(AVFormatContext *s)
> +{
> +    int totlen = 0;
> +    char tracktxt[10];
> +
> +    if(s->track)
> +        snprintf(tracktxt, sizeof(tracktxt) - 1, "%d", s->track);
> +    else
> +        tracktxt[0] = 0;
> +
> +    if(s->title[0])     totlen += strlen(s->title) + 11;
> +    if(s->author[0])    totlen += strlen(s->author) + 11;
> +    if(s->album[0])     totlen += strlen(s->album) + 11;
> +    if(s->genre[0])     totlen += strlen(s->genre) + 11;
> +    if(s->copyright[0]) totlen += strlen(s->copyright) + 11;
> +    if(tracktxt[0])      totlen += strlen(tracktxt) + 11;

the above is almost aligned ...
and if you want you could also align the ) + 11; ...


> +
> +    totlen += strlen(LIBAVFORMAT_IDENT) + 11;
> +
> +    put_be32(&s->pb, MKBETAG('I', 'D', '3', 0x04)); /* ID3v2.4 */
> +    put_byte(&s->pb, 0);
> +    put_byte(&s->pb, 0); /* flags */
> +
> +    id3v2_put_size(s, totlen);
> +
> +    if(s->title[0])     id3v2_put_ttag(s, s->title, MKBETAG('T', 'I', 'T', '2'));
> +    if(s->author[0])    id3v2_put_ttag(s, s->author, MKBETAG('T', 'P', 'E', '1'));
> +    if(s->album[0])     id3v2_put_ttag(s, s->album, MKBETAG('T', 'A', 'L', 'B'));
> +    if(s->genre[0])     id3v2_put_ttag(s, s->genre, MKBETAG('T', 'C', 'O', 'N'));
> +    if(s->copyright[0]) id3v2_put_ttag(s, s->copyright, MKBETAG('T', 'C', 'O', 'P'));
> +    if(tracktxt[0])     id3v2_put_ttag(s, tracktxt, MKBETAG('T', 'R', 'C', 'K'));
> +    id3v2_put_ttag(s, LIBAVFORMAT_IDENT, MKBETAG('T', 'E', 'N', 'C'));

LIBAVFORMAT_IDENT must only be stored if the AVCodecCOntext.flags doesnt
contain CODEC_FLAG_BITEXACT otherwise regression tests would break if it
changes

also if you like you could nicely align the MKBETAG so they are more readable

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In the past you could go to a library and read, borrow or copy any book
Today you'd get arrested for mere telling someone where the library is




More information about the ffmpeg-devel mailing list