[FFmpeg-devel] [PATCH 2/2] Encoding of styles - Bold, Italic, Underlined for timed-text subttiles

Fri Jun 12 05:40:51 CEST 2015

On Thu, 11 Jun 2015 10:18:53 +0530
Niklesh Lalwani <niklesh.lalwani at iitb.ac.in> wrote:

> From: Niklesh <niklesh.lalwani at iitb.ac.in>
> 
> Encoding of bold, Italic, underlined styles for 3gpp timed text
> subtitles. All the formatting information is appended into the buffer
> after the text, unlike other encoders like srt, which can write out
> styling information as it goes. Another tricky part is to take care
> of the Endianness properly while writing into the buffer.
> 
> Signed-off-by: Niklesh <niklesh.lalwani at iitb.ac.in>
> ---
>  libavcodec/movtextenc.c | 104
> +++++++++++++++++++++++++++++++++++------------- 1 file changed, 76
> insertions(+), 28 deletions(-)
> 
> diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
> index 1b8f454..adbe19d 100644
> --- a/libavcodec/movtextenc.c
> +++ b/libavcodec/movtextenc.c
> @@ -24,14 +24,27 @@
>  #include "libavutil/avassert.h"
>  #include "libavutil/avstring.h"
>  #include "libavutil/intreadwrite.h"
> +#include "libavutil/common.h"
>  #include "ass_split.h"
>  #include "ass.h"
>  
> +#define STYLE_FLAG_BOLD         1
> +#define STYLE_FLAG_ITALIC       2
> +#define STYLE_FLAG_UNDERLINE    4
> +
>  typedef struct {
>      ASSSplitContext *ass_ctx;
> -    char buffer[2048];
> -    char *ptr;
> -    char *end;
> +    AVBPrint buffer;
> +    uint32_t tsmb_size;
> +    uint32_t tsmb_type;
> +    uint16_t style_entries;
> +    uint16_t style_start;
> +    uint16_t style_end;
> +    uint16_t style_fontID;
> +    uint8_t style_flag;
> +    uint8_t style_fontsize;
> +    uint32_t style_color;
> +    uint16_t text_pos;
>  } MovTextContext;
>  
>  
> @@ -47,10 +60,10 @@ static av_cold int
> mov_text_encode_init(AVCodecContext *avctx)
> 0xFF,                   // int8_t vertical-justification 0x00, 0x00,
> 0x00, 0x00, // uint8_t background-color-rgba[4] // BoxRecord {
> -        0x00, 0x00,             // int16_t top
> -        0x00, 0x00,             // int16_t left
> -        0x00, 0x00,             // int16_t bottom
> -        0x00, 0x00,             // int16_t right
> +        0x00, 0x01,             // int16_t top
> +        0x00, 0x01,             // int16_t left
> +        0x00, 0x1E,             // int16_t bottom
> +        0x00, 0xC8,             // int16_t right

Leave this part out of this diff. We'll deal with sizing later.

>          // };
>          // StyleRecord {
>          0x00, 0x00,             // uint16_t startChar
> @@ -79,32 +92,54 @@ static av_cold int
> mov_text_encode_init(AVCodecContext *avctx) if (!avctx->extradata)
>          return AVERROR(ENOMEM);
>  
> +    av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
> +
>      memcpy(avctx->extradata, text_sample_entry,
> avctx->extradata_size); 
>      s->ass_ctx = ff_ass_split(avctx->subtitle_header);
>      return s->ass_ctx ? 0 : AVERROR_INVALIDDATA;
>  }
>  
> +static void mov_text_style_cb(void *priv, const char style, int
> close) +{
> +    MovTextContext *s = priv;
> +    s->tsmb_type = MKTAG('s','t','y','l');

You don't need to do this. Use a simple boolean flag to indicate if any
styles were generated.

> +    if (!close) {
> +        s->style_start = AV_RB16(&s->text_pos);
> +        switch (style){
> +        case 'b':
> +            s->style_flag += STYLE_FLAG_BOLD;
> +            break;
> +        case 'i':
> +            s->style_flag += STYLE_FLAG_ITALIC;
> +            break;
> +        case 'u':
> +            s->style_flag += STYLE_FLAG_UNDERLINE;
> +            break;
> +        }
> +    } else {
> +        s->style_end = AV_RB16(&s->text_pos);
> +    }
> +}

You are only storing a single set of style properties. So if multiple
styles are used in the original subtitles, only the last one will take
effect. You need to build up an array of style entries.

>  static void mov_text_text_cb(void *priv, const char *text, int len)
>  {
>      MovTextContext *s = priv;
> -    av_assert0(s->end >= s->ptr);
> -    av_strlcpy(s->ptr, text, FFMIN(s->end - s->ptr, len + 1));
> -    s->ptr += FFMIN(s->end - s->ptr, len);
> +    av_bprint_append_data(&s->buffer, text, len);
> +    s->text_pos += len;
>  }
>  
>  static void mov_text_new_line_cb(void *priv, int forced)
>  {
>      MovTextContext *s = priv;
> -    av_assert0(s->end >= s->ptr);
> -    av_strlcpy(s->ptr, "\n", FFMIN(s->end - s->ptr, 2));
> -    if (s->end > s->ptr)
> -        s->ptr++;
> +    av_bprint_append_data(&s->buffer, "\n", 2);
> +    s->text_pos += 2;
>  }
>  
>  static const ASSCodesCallbacks mov_text_callbacks = {
>      .text     = mov_text_text_cb,
>      .new_line = mov_text_new_line_cb,
> +    .style    = mov_text_style_cb,
>  };
>  
>  static int mov_text_encode_frame(AVCodecContext *avctx, unsigned
> char *buf, @@ -112,10 +147,9 @@ static int
> mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf, {
>      MovTextContext *s = avctx->priv_data;
>      ASSDialog *dialog;
> -    int i, len, num;
> +    int i, num;
>  
> -    s->ptr = s->buffer;
> -    s->end = s->ptr + sizeof(s->buffer);
> +    av_bprint_clear(&s->buffer);
>  
>      for (i = 0; i < sub->num_rects; i++) {
>  
> @@ -123,33 +157,47 @@ static int mov_text_encode_frame(AVCodecContext
> *avctx, unsigned char *buf, av_log(avctx, AV_LOG_ERROR, "Only
> SUBTITLE_ASS type supported.\n"); return AVERROR(ENOSYS);
>          }
> +        s->text_pos = 0;
>  
>          dialog = ff_ass_split_dialog(s->ass_ctx, sub->rects[i]->ass,
> 0, &num); for (; dialog && num--; dialog++) {
>              ff_ass_split_override_codes(&mov_text_callbacks, s,
> dialog->text); }
> +        if (s->tsmb_type == MKTAG('s','t','y','l') && s->style_end) {
> +            s->tsmb_size = MKTAG(0x00, 0x00, 0x00, 0x16);

This can't be hard-coded, it will vary with the number of styles.

> +            s->style_entries = 0x00 | 0x01<<8;
> +            s->style_fontID = 0x00 | 0x01<<8;
> +            s->style_fontsize = 0x12;
> +            s->style_color = MKTAG(0xFF, 0xFF, 0xFF, 0xFF);
> +            av_bprint_append_data(&s->buffer, &s->tsmb_size, 4);
> +            av_bprint_append_data(&s->buffer, &s->tsmb_type, 4);
> +            av_bprint_append_data(&s->buffer, &s->style_entries, 2);
> +            av_bprint_append_data(&s->buffer, &s->style_start, 2);
> +            av_bprint_append_data(&s->buffer, &s->style_end, 2);
> +            av_bprint_append_data(&s->buffer, &s->style_fontID, 2);
> +            av_bprint_append_data(&s->buffer, &s->style_flag, 1);
> +            av_bprint_append_data(&s->buffer, &s->style_fontsize, 1);
> +            av_bprint_append_data(&s->buffer, &s->style_color, 4);
> +        }
>      }

As above, this all assumes a single style.

> -    if (s->ptr == s->buffer)
> -        return 0;
> -
> -    AV_WB16(buf, strlen(s->buffer));
> +    AV_WB16(buf, s->text_pos);
>      buf += 2;
>  
> -    len = av_strlcpy(buf, s->buffer, bufsize - 2);
> -
> -    if (len > bufsize-3) {
> -        av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS
> event.\n");
> -        return AVERROR(EINVAL);
> -    }
> +    if (!av_bprint_is_complete(&s->buffer))
> +        return AVERROR(ENOMEM);
> +    if (!s->buffer.len)
> +        return 0;
>  
> -    return len + 2;
> +    memcpy(buf, s->buffer.str, s->buffer.len);
> +    return s->buffer.len + 2;

I would clear the bprint at the end so that it's not taking up memory
after it's no longer of use.

>  }
>  
>  static int mov_text_encode_close(AVCodecContext *avctx)
>  {
>      MovTextContext *s = avctx->priv_data;
>      ff_ass_split_free(s->ass_ctx);
> +    av_bprint_finalize(&s->buffer, NULL);
>      return 0;
>  }
>  

--phil