[FFmpeg-devel] [PATCH 2/2] refactored semidec

Yayoi Ukai yayoi.ukai at gmail.com
Fri Apr 10 10:39:25 CEST 2015


And answering the question it is kind of yes, not parsing but converting
part.  The code is refactored to use the same function that converts the
html format text that exist both in srt  and semi.  So the original the
files takes care of the parsing to the points to be able to use refactored
function to convert these subtitles format file to .ass.

I tested as below.

./ffmpeg -i in.srt out.ass

or

./ffmpeg -i inr.smi out.ass

and

./ffplay -f lavfi -i color=s=hd720 -vf ass=out.ass

I hope it is clear now and thank you!

On Fri, Apr 10, 2015 at 1:12 AM, Yayoi Ukai <yayoi.ukai at gmail.com> wrote:

> Sorry I should have written that out. It adds the color etc support when
> you convert semi format subtitles to ass format.
> The code was already there from src to ass. So it is refactored to extend
> the support for semi format subtitles to convert ass format.
> I tested with these files.  (Attached..)
>
> Thank you!
> Yayoi
>
>
>
>
> On Fri, Apr 10, 2015 at 12:50 AM, wm4 <nfxjfg at googlemail.com> wrote:
>
>> On Thu,  9 Apr 2015 23:54:23 -0700
>> Yayoi <yayoi.ukai at gmail.com> wrote:
>>
>> > ---
>> >  libavcodec/Makefile        |  2 +-
>> >  libavcodec/htmlsubtitles.c |  6 ++++++
>> >  libavcodec/samidec.c       | 52
>> ++++++++++++++++------------------------------
>> >  3 files changed, 25 insertions(+), 35 deletions(-)
>> >
>> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> > index 8384458..8e780ad 100644
>> > --- a/libavcodec/Makefile
>> > +++ b/libavcodec/Makefile
>> > @@ -422,7 +422,7 @@ OBJS-$(CONFIG_RV20_DECODER)            += rv10.o
>> >  OBJS-$(CONFIG_RV20_ENCODER)            += rv20enc.o
>> >  OBJS-$(CONFIG_RV30_DECODER)            += rv30.o rv34.o rv30dsp.o
>> rv34dsp.o
>> >  OBJS-$(CONFIG_RV40_DECODER)            += rv40.o rv34.o rv34dsp.o
>> rv40dsp.o
>> > -OBJS-$(CONFIG_SAMI_DECODER)            += samidec.o ass.o
>> > +OBJS-$(CONFIG_SAMI_DECODER)            += samidec.o ass.o
>> htmlsubtitles.o
>> >  OBJS-$(CONFIG_S302M_DECODER)           += s302m.o
>> >  OBJS-$(CONFIG_S302M_ENCODER)           += s302menc.o
>> >  OBJS-$(CONFIG_SANM_DECODER)            += sanm.o
>> > diff --git a/libavcodec/htmlsubtitles.c b/libavcodec/htmlsubtitles.c
>> > index 7eeec98..54a9707 100644
>> > --- a/libavcodec/htmlsubtitles.c
>> > +++ b/libavcodec/htmlsubtitles.c
>> > @@ -93,6 +93,11 @@ void ff_htmlmarkup_to_ass(AVCodecContext *avctx,
>> AVBPrint *dst, const char *in)
>> >                  av_bprint_chars(dst, *in, 1);
>> >              break;
>> >          case '<':
>> > +            if (!av_strncasecmp(in, "<BR", 3)){
>> > +                    av_bprintf(dst, "\\N");
>> > +                    in += 4;
>> > +            }
>> > +
>> >              tag_close = in[1] == '/';
>> >              len = 0;
>> >              if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >=
>> 1 && len > 0) {
>> > @@ -175,5 +180,6 @@ void ff_htmlmarkup_to_ass(AVCodecContext *avctx,
>> AVBPrint *dst, const char *in)
>> >      while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N",
>> 2))
>> >          dst->len -= 2;
>> >      dst->str[dst->len] = 0;
>> > +
>> >      rstrip_spaces_buf(dst);
>> >  }
>> > \ No newline at end of file
>> > diff --git a/libavcodec/samidec.c b/libavcodec/samidec.c
>> > index 47850e2..df84bd5 100644
>> > --- a/libavcodec/samidec.c
>> > +++ b/libavcodec/samidec.c
>> > @@ -28,6 +28,13 @@
>> >  #include "libavutil/avstring.h"
>> >  #include "libavutil/bprint.h"
>> >
>> > +
>> > +#include "libavutil/common.h"
>> > +#include "libavutil/intreadwrite.h"
>> > +#include "libavutil/parseutils.h"
>> > +#include "avcodec.h"
>> > +#include "htmlsubtitles.h"
>> > +
>> >  typedef struct {
>> >      AVBPrint source;
>> >      AVBPrint content;
>> > @@ -40,28 +47,27 @@ static int sami_paragraph_to_ass(AVCodecContext
>> *avctx, const char *src)
>> >      int ret = 0;
>> >      char *tag = NULL;
>> >      char *dupsrc = av_strdup(src);
>> > -    char *p = dupsrc;
>> > +    char *in = dupsrc;
>> >
>> >      av_bprint_clear(&sami->content);
>> >      for (;;) {
>> >          char *saveptr = NULL;
>> > -        int prev_chr_is_space = 0;
>> >          AVBPrint *dst = &sami->content;
>> >
>> >          /* parse & extract paragraph tag */
>> > -        p = av_stristr(p, "<P");
>> > -        if (!p)
>> > +        in = av_stristr(in, "<P");
>> > +        if (!in)
>> >              break;
>> > -        if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion
>> with tags such as <PRE>
>> > -            p++;
>> > +        if (in[2] != '>' && !av_isspace(in[2])) { // avoid confusion
>> with tags such as <PRE>
>> > +            in++;
>> >              continue;
>> >          }
>> >          if (dst->len) // add a separator with the previous paragraph
>> if there was one
>> >              av_bprintf(dst, "\\N");
>> > -        tag = av_strtok(p, ">", &saveptr);
>> > +        tag = av_strtok(in, ">", &saveptr);
>> >          if (!tag || !saveptr)
>> >              break;
>> > -        p = saveptr;
>> > +        in = saveptr;
>> >
>> >          /* check if the current paragraph is the "source" (speaker
>> name) */
>> >          if (av_stristr(tag, "ID=Source") || av_stristr(tag,
>> "ID=\"Source\"")) {
>> > @@ -70,36 +76,14 @@ static int sami_paragraph_to_ass(AVCodecContext
>> *avctx, const char *src)
>> >          }
>> >
>> >          /* if empty event -> skip subtitle */
>> > -        while (av_isspace(*p))
>> > -            p++;
>> > -        if (!strncmp(p, " ", 6)) {
>> > +        while (av_isspace(*in))
>> > +            in++;
>> > +        if (!strncmp(in, " ", 6)) {
>> >              ret = -1;
>> >              goto end;
>> >          }
>> >
>> > -        /* extract the text, stripping most of the tags */
>> > -        while (*p) {
>> > -            if (*p == '<') {
>> > -                if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' ||
>> av_isspace(p[2])))
>> > -                    break;
>> > -                if (!av_strncasecmp(p, "<BR", 3))
>> > -                    av_bprintf(dst, "\\N");
>> > -                p++;
>> > -                while (*p && *p != '>')
>> > -                    p++;
>> > -                if (!*p)
>> > -                    break;
>> > -                if (*p == '>')
>> > -                    p++;
>> > -                continue;
>> > -            }
>> > -            if (!av_isspace(*p))
>> > -                av_bprint_chars(dst, *p, 1);
>> > -            else if (!prev_chr_is_space)
>> > -                av_bprint_chars(dst, ' ', 1);
>> > -            prev_chr_is_space = av_isspace(*p);
>> > -            p++;
>> > -        }
>> > +        ff_htmlmarkup_to_ass(avctx, dst, in);
>> >      }
>> >
>> >      av_bprint_clear(&sami->full);
>>
>> So... what exactly does this do? Use the same parser for srt and sami?
>> How does this work out, aren't they different formats? (Such things
>> could be written in the commit message.)
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel at ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: inr.smi
Type: application/smil
Size: 55583 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20150410/0471c6b9/attachment.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: in.srt
Type: application/octet-stream
Size: 5599 bytes
Desc: not available
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20150410/0471c6b9/attachment.obj>


More information about the ffmpeg-devel mailing list