[FFmpeg-devel] [PATCH] support for UTF-16 encoding in id3v2 tags

Michael Niedermayer michaelni
Thu Sep 10 13:07:17 CEST 2009


On Sun, Sep 06, 2009 at 02:00:32PM +0200, Anton Khirnov wrote:
> Hi,
> attached is a patch for $subj. I've uploaded a sample file to incoming
> - utf16_tags.mp3.
> 
> Anton Khirnov

>  libavformat/id3v2.c |   34 ++++++++++++++++++++++++++++++++++
>  libavutil/common.h  |   26 ++++++++++++++++++++++++++
>  2 files changed, 60 insertions(+)
> 3cc4dd4ef107240c2e23a4bdbf0ea569ee0c8e6d  0001-id3v2-add-support-for-UTF-16-encoding.patch
> >From a7c0a1fb2cc56531963feb8e4730f0844a462ecf Mon Sep 17 00:00:00 2001
> From: Anton Khirnov <wyskas at gmail.com>
> Date: Sun, 6 Sep 2009 13:34:48 +0200
> Subject: [PATCH] id3v2: add support for UTF-16 encoding.
> 
> ---
>  libavformat/id3v2.c |   34 ++++++++++++++++++++++++++++++++++
>  libavutil/common.h  |   26 ++++++++++++++++++++++++++
>  2 files changed, 60 insertions(+), 0 deletions(-)
> 
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index 0cf2cb1..fe79c01 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -81,6 +81,7 @@ static void read_ttag(AVFormatContext *s, int taglen, const char *key)
>      char *q, dst[512];
>      int len, dstlen = sizeof(dst) - 1;
>      unsigned genre;
> +    unsigned int (*get)(ByteIOContext*) = NULL;
>  
>      dst[0] = 0;
>      if (taglen < 1)
> @@ -99,11 +100,44 @@ static void read_ttag(AVFormatContext *s, int taglen, const char *key)
>          *q = '\0';
>          break;
>  
> +    case 1:  /* UTF-16 with BOM */
> +        taglen -= 2;
> +        switch (get_be16(s->pb)) {
> +        case 0xfeff:
> +            get = get_be16;
> +            break;
> +        case 0xfffe:
> +            get = get_le16;
> +            break;
> +        default:
> +            av_log(s, AV_LOG_ERROR, "Incorrect BOM value.\n");
> +            return;
> +        }
> +        // fall-through
> +
> +    case 2:  /* UTF-16BE without BOM */

> +        if (!get)
> +            get = get_be16;

can be set before the switch() as default


> +
> +        q = dst;
> +        while (taglen > 1) {
> +            uint32_t ch;
> +            uint8_t tmp;
> +
> +            GET_UTF16(ch, get(s->pb), break;, len)
> +            PUT_UTF8(ch, tmp, if (q - dst < dstlen -1) *q++ = tmp;)
> +            taglen -= len;
> +        }
> +        *q = '\0';
> +        break;
> +
>      case 3:  /* UTF-8 */
>          len = FFMIN(taglen, dstlen - 1);
>          get_buffer(s->pb, dst, len);
>          dst[len] = 0;
>          break;
> +    default:
> +        av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s\n.", key);
>      }
>  
>      if (!strcmp(key, "genre")
> diff --git a/libavutil/common.h b/libavutil/common.h
> index 0797a79..458e32f 100644
> --- a/libavutil/common.h
> +++ b/libavutil/common.h

> @@ -266,6 +266,32 @@ static inline av_const int av_ceil_log2(int x)
>      }
>  
>  /*!
> + * \def GET_UTF16(val, GET_BYTE, ERROR)

GET_16BIT not GET_BYTE


> + * Converts a UTF-16 character (2 or 4 bytes) to its 32-bit UCS-4 encoded form
> + * \param val is the output and should be of type uint32_t. It holds the converted
> + * UCS-4 character and should be a left value.
> + * \param GET_BYTE gets UTF-16 encoded bytes from any proper source. It can be
> + * a function or a statement whose return value or evaluated value is of type
> + * uint16_t. It will be executed up to 2 times.
> + * \param ERROR action that should be taken when an invalid UTF-16 surrogate is
> + * returned from GET_BYTE. It should be a statement that jumps out of the macro,
> + * like exit(), goto, return, break, or continue.
> + * \param read should be an int and is set to the number of bytes read (2 or 4).
> + */
> +#define GET_UTF16(val, GET_BYTE, ERROR, read)\
> +    {\
> +        val = GET_BYTE;\
> +        read = 2;\
> +        if (val >= 0xD800 && val <= 0xDBFF) {\
> +            val = ((val - 0xD800)<<10) + (get(s->pb) - 0xDC00) + 0x0010000UL;\

missing error checking


> +            read += 2;\
> +        }\
> +        else if (val > 0xDBFF && val <= 0xDFFF) {\
> +            ERROR\
> +        }\

unsigned int hi= val - 0xD800;
if(hi < 0x800) {
    val= get(s->pb) - 0xDC00;
    if(val > 0x3FFU || hi > 0x3FFU){
        ERROR
    }
    val+= (hi<<10) + 0x1000;
}

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Thouse who are best at talking, realize last or never when they are wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20090910/2fd025a4/attachment.pgp>



More information about the ffmpeg-devel mailing list