[FFmpeg-devel] [PATCH 2/4] avformat/srtdec: UTF-16 support

Wed Sep 3 15:33:30 CEST 2014

On Tue, 2 Sep 2014 23:54:51 +0200
Clément Bœsch <u at pkh.me> wrote:

> On Tue, Sep 02, 2014 at 08:56:10PM +0200, wm4 wrote:
> > ---
> >  libavformat/srtdec.c    | 22 +++++++++++----------
> >  libavformat/subtitles.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++--
> >  libavformat/subtitles.h | 33 +++++++++++++++++++++++++++++---
> >  3 files changed, 91 insertions(+), 15 deletions(-)
> > 
> > diff --git a/libavformat/srtdec.c b/libavformat/srtdec.c
> > index 53182cd..a258cfe 100644
> > --- a/libavformat/srtdec.c
> > +++ b/libavformat/srtdec.c
> > @@ -31,20 +31,20 @@ typedef struct {
> >  
> >  static int srt_probe(AVProbeData *p)
> >  {
> > -    const unsigned char *ptr = p->buf;
> >      int i, v, num = 0;
> > +    FFTextReader tr;
> >  
> > -    if (AV_RB24(ptr) == 0xEFBBBF)
> > -        ptr += 3;  /* skip UTF-8 BOM */
> > +    ff_text_init_buf(&tr, p->buf, p->buf_size);
> >  
> > -    while (*ptr == '\r' || *ptr == '\n')
> > -        ptr++;
> > +    while (ff_text_peek_r8(&tr) == '\r' || ff_text_peek_r8(&tr) == '\n')
> > +        ff_text_r8(&tr);
> >      for (i=0; i<2; i++) {
> > +        char buf[128];
> > +        ff_subtitles_read_line(&tr, buf, sizeof(buf));
> >          if ((num == i || num + 1 == i)
> > -            && sscanf(ptr, "%*d:%*2d:%*2d%*1[,.]%*3d --> %*d:%*2d:%*2d%*1[,.]%3d", &v) == 1)
> > +            && sscanf(buf, "%*d:%*2d:%*2d%*1[,.]%*3d --> %*d:%*2d:%*2d%*1[,.]%3d", &v) == 1)
> >              return AVPROBE_SCORE_MAX;
> > -        num = atoi(ptr);
> > -        ptr += ff_subtitles_next_line(ptr);
> > +        num = atoi(buf);
> 
> So ff_subtitles_read_line() actually makes the probe too slow?
> 
> What if you try to:
>   buf[0] >= '0' && buf[1] <= '9' && strstr(buf, " --> ") && sscanf(...)
> to gain some little time by sometimes avoiding the sscanf call?

Actually, I made a mistake when testing, and with this change, the
performance is more reasonable now, and probably not an issue anymore.

> >      }
> >      return 0;
> >  }
> > @@ -79,6 +79,8 @@ static int srt_read_header(AVFormatContext *s)
> >      AVBPrint buf;
> >      AVStream *st = avformat_new_stream(s, NULL);
> >      int res = 0;
> > +    FFTextReader tr;
> > +    ff_text_init_avio(&tr, s->pb);
> >  
> >      if (!st)
> >          return AVERROR(ENOMEM);
> > @@ -88,8 +90,8 @@ static int srt_read_header(AVFormatContext *s)
> >  
> >      av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
> >  
> > -    while (!avio_feof(s->pb)) {
> > -        ff_subtitles_read_chunk(s->pb, &buf);
> > +    while (!ff_text_eof(&tr)) {
> > +        ff_subtitles_read_text_chunk(&tr, &buf);
> >  
> >          if (buf.len) {
> 
> >              int64_t pos = avio_tell(s->pb);
> 
> ff_text_pos()?
> 
> [...]
> > +size_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
> > +{
> > +    size_t cur = 0;
> > +    if (!size)
> > +        return 0;
> > +    while (cur + 1 < size) {
> > +        unsigned char c = ff_text_r8(tr);
> > +        if (!c && ff_text_eof(tr))
> > +            return cur;
> > +        if (c == '\r' || c == '\n')
> > +            break;
> > +        buf[cur++] = c;
> 
> > +        buf[cur] = '\0';
> 
> By reworking the logic, you might be able avoid double writing that end
> character and gain some speed for the probing.
> 
> > +    }
> > +    if (ff_text_peek_r8(tr) == '\r')
> > +        ff_text_r8(tr);
> > +    if (ff_text_peek_r8(tr) == '\n')
> > +        ff_text_r8(tr);
> > +    return cur;
> > +}
> 
> LGTM otherwise
>