FFmpeg
samidec.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Clément Bœsch
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * SAMI subtitle decoder
24  * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
25  */
26 
27 #include "ass.h"
28 #include "libavutil/avstring.h"
29 #include "libavutil/bprint.h"
30 #include "htmlsubtitles.h"
31 
32 typedef struct {
33  AVBPrint source;
34  AVBPrint content;
35  AVBPrint encoded_source;
36  AVBPrint encoded_content;
37  AVBPrint full;
38  int readorder;
39 } SAMIContext;
40 
41 static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
42 {
43  SAMIContext *sami = avctx->priv_data;
44  int ret = 0;
45  char *tag = NULL;
46  char *dupsrc = av_strdup(src);
47  char *p = dupsrc;
48  AVBPrint *dst_content = &sami->encoded_content;
49  AVBPrint *dst_source = &sami->encoded_source;
50 
51  if (!dupsrc)
52  return AVERROR(ENOMEM);
53 
55  av_bprint_clear(&sami->content);
57  for (;;) {
58  char *saveptr = NULL;
59  int prev_chr_is_space = 0;
60  AVBPrint *dst = &sami->content;
61 
62  /* parse & extract paragraph tag */
63  p = av_stristr(p, "<P");
64  if (!p)
65  break;
66  if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
67  p++;
68  continue;
69  }
70  if (dst->len) // add a separator with the previous paragraph if there was one
71  av_bprintf(dst, "\\N");
72  tag = av_strtok(p, ">", &saveptr);
73  if (!tag || !saveptr)
74  break;
75  p = saveptr;
76 
77  /* check if the current paragraph is the "source" (speaker name) */
78  if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
79  dst = &sami->source;
80  av_bprint_clear(dst);
81  }
82 
83  /* if empty event -> skip subtitle */
84  while (av_isspace(*p))
85  p++;
86  if (!strncmp(p, "&nbsp;", 6)) {
87  ret = -1;
88  goto end;
89  }
90 
91  /* extract the text, stripping most of the tags */
92  while (*p) {
93  if (*p == '<') {
94  if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
95  break;
96  }
97  if (!av_strncasecmp(p, "<BR", 3)) {
98  av_bprintf(dst, "\\N");
99  p++;
100  while (*p && *p != '>')
101  p++;
102  if (!*p)
103  break;
104  if (*p == '>')
105  p++;
106  continue;
107  }
108  if (!av_isspace(*p))
109  av_bprint_chars(dst, *p, 1);
110  else if (!prev_chr_is_space)
111  av_bprint_chars(dst, ' ', 1);
112  prev_chr_is_space = av_isspace(*p);
113  p++;
114  }
115  }
116 
117  av_bprint_clear(&sami->full);
118  if (sami->source.len) {
119  ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
120  if (ret < 0)
121  goto end;
122  av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
123  }
124  ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
125  if (ret < 0)
126  goto end;
127  av_bprintf(&sami->full, "%s", sami->encoded_content.str);
128 
129 end:
130  av_free(dupsrc);
131  return ret;
132 }
133 
135  void *data, int *got_sub_ptr, AVPacket *avpkt)
136 {
137  AVSubtitle *sub = data;
138  const char *ptr = avpkt->data;
139  SAMIContext *sami = avctx->priv_data;
140 
141  if (ptr && avpkt->size > 0) {
142  int ret = sami_paragraph_to_ass(avctx, ptr);
143  if (ret < 0)
144  return ret;
145  // TODO: pass escaped sami->encoded_source.str as source
146  ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
147  if (ret < 0)
148  return ret;
149  }
150  *got_sub_ptr = sub->num_rects > 0;
151  return avpkt->size;
152 }
153 
154 static av_cold int sami_init(AVCodecContext *avctx)
155 {
156  SAMIContext *sami = avctx->priv_data;
157  av_bprint_init(&sami->source, 0, 2048);
158  av_bprint_init(&sami->content, 0, 2048);
159  av_bprint_init(&sami->encoded_source, 0, 2048);
160  av_bprint_init(&sami->encoded_content, 0, 2048);
161  av_bprint_init(&sami->full, 0, 2048);
162  return ff_ass_subtitle_header_default(avctx);
163 }
164 
166 {
167  SAMIContext *sami = avctx->priv_data;
168  av_bprint_finalize(&sami->source, NULL);
172  av_bprint_finalize(&sami->full, NULL);
173  return 0;
174 }
175 
176 static void sami_flush(AVCodecContext *avctx)
177 {
178  SAMIContext *sami = avctx->priv_data;
179  if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
180  sami->readorder = 0;
181 }
182 
184  .name = "sami",
185  .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
186  .type = AVMEDIA_TYPE_SUBTITLE,
187  .id = AV_CODEC_ID_SAMI,
188  .priv_data_size = sizeof(SAMIContext),
189  .init = sami_init,
190  .close = sami_close,
192  .flush = sami_flush,
193 };
#define NULL
Definition: coverity.c:32
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:94
static int sami_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr, AVPacket *avpkt)
Definition: samidec.c:134
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:100
static void flush(AVCodecContext *avctx)
char * av_stristr(const char *s1, const char *s2)
Locate the first case-independent occurrence in the string haystack of the string needle...
Definition: avstring.c:56
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
int size
Definition: avcodec.h:1478
static av_const int av_isspace(int c)
Locale-independent conversion of ASCII isspace.
Definition: avstring.h:222
int av_strncasecmp(const char *a, const char *b, size_t n)
Locale-independent case-insensitive compare.
Definition: avstring.c:223
unsigned num_rects
Definition: avcodec.h:3933
int ff_ass_add_rect(AVSubtitle *sub, const char *dialog, int readorder, int layer, const char *style, const char *speaker)
Add an ASS dialog to a subtitle.
Definition: ass.c:101
AVBPrint full
Definition: samidec.c:37
#define src
Definition: vp8dsp.c:254
AVCodec.
Definition: avcodec.h:3477
static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame, FILE *outfile)
Definition: decode_audio.c:42
int av_bprint_finalize(AVBPrint *buf, char **ret_str)
Finalize a print buffer.
Definition: bprint.c:235
#define av_cold
Definition: attributes.h:82
static av_cold int sami_init(AVCodecContext *avctx)
Definition: samidec.c:154
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
AVBPrint encoded_content
Definition: samidec.c:36
int ff_ass_subtitle_header_default(AVCodecContext *avctx)
Generate a suitable AVCodecContext.subtitle_header for SUBTITLE_ASS with default style.
Definition: ass.c:80
static av_cold int sami_close(AVCodecContext *avctx)
Definition: samidec.c:165
uint8_t * data
Definition: avcodec.h:1477
static void sami_flush(AVCodecContext *avctx)
Definition: samidec.c:176
uint32_t tag
Definition: movenc.c:1496
#define AV_CODEC_FLAG2_RO_FLUSH_NOOP
Do not reset ASS ReadOrder field on flush (subtitles decoding)
Definition: avcodec.h:963
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
const char * name
Name of the codec implementation.
Definition: avcodec.h:3484
int ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in)
char * av_strdup(const char *s)
Duplicate a string.
Definition: mem.c:251
main external API structure.
Definition: avcodec.h:1565
int readorder
Definition: samidec.c:38
AVCodec ff_sami_decoder
Definition: samidec.c:183
AVBPrint source
Definition: samidec.c:33
void av_bprint_clear(AVBPrint *buf)
Reset the string to "" but keep internal allocated data.
Definition: bprint.c:227
char * av_strtok(char *s, const char *delim, char **saveptr)
Split the string into several tokens which can be accessed by successive calls to av_strtok()...
Definition: avstring.c:184
void * priv_data
Definition: avcodec.h:1592
#define av_free(p)
int flags2
AV_CODEC_FLAG2_*.
Definition: avcodec.h:1652
AVBPrint encoded_source
Definition: samidec.c:35
static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
Definition: samidec.c:41
AVBPrint content
Definition: samidec.c:34
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
This structure stores compressed data.
Definition: avcodec.h:1454
void av_bprint_chars(AVBPrint *buf, char c, unsigned n)
Append char c n times to a print buffer.
Definition: bprint.c:140