FFmpeg
samidec.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Clément Bœsch
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * SAMI subtitle decoder
24  * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
25  */
26 
27 #include "ass.h"
28 #include "libavutil/attributes.h"
29 #include "libavutil/avstring.h"
30 #include "libavutil/bprint.h"
31 #include "libavutil/mem.h"
32 #include "codec_internal.h"
33 #include "htmlsubtitles.h"
34 
35 typedef struct {
36  AVBPrint source;
37  AVBPrint content;
38  AVBPrint encoded_source;
39  AVBPrint encoded_content;
40  AVBPrint full;
41  int readorder;
42 } SAMIContext;
43 
44 static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
45 {
46  SAMIContext *sami = avctx->priv_data;
47  int ret = 0;
48  char *tag = NULL;
49  char *dupsrc = av_strdup(src);
50  char *p = dupsrc;
51  AVBPrint *dst_content = &sami->encoded_content;
52  AVBPrint *dst_source = &sami->encoded_source;
53 
54  if (!dupsrc)
55  return AVERROR(ENOMEM);
56 
58  av_bprint_clear(&sami->content);
60  for (;;) {
61  char *saveptr = NULL;
62  int prev_chr_is_space = 0;
63  AVBPrint *dst = &sami->content;
64 
65  /* parse & extract paragraph tag */
66  p = av_stristr(p, "<P");
67  if (!p)
68  break;
69  if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
70  p++;
71  continue;
72  }
73  if (dst->len) // add a separator with the previous paragraph if there was one
74  av_bprintf(dst, "\\N");
75  tag = av_strtok(p, ">", &saveptr);
76  if (!tag || !saveptr)
77  break;
78  p = saveptr;
79 
80  /* check if the current paragraph is the "source" (speaker name) */
81  if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
82  dst = &sami->source;
84  }
85 
86  /* if empty event -> skip subtitle */
87  while (av_isspace(*p))
88  p++;
89  if (!strncmp(p, "&nbsp;", 6)) {
90  ret = -1;
91  goto end;
92  }
93 
94  /* extract the text, stripping most of the tags */
95  while (*p) {
96  if (*p == '<') {
97  if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
98  break;
99  }
100  if (!av_strncasecmp(p, "<BR", 3)) {
101  av_bprintf(dst, "\\N");
102  p++;
103  while (*p && *p != '>')
104  p++;
105  if (!*p)
106  break;
107  if (*p == '>')
108  p++;
109  continue;
110  }
111  if (!av_isspace(*p))
112  av_bprint_chars(dst, *p, 1);
113  else if (!prev_chr_is_space)
114  av_bprint_chars(dst, ' ', 1);
115  prev_chr_is_space = av_isspace(*p);
116  p++;
117  }
118  }
119 
120  av_bprint_clear(&sami->full);
121  if (sami->source.len) {
122  ret = ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
123  if (ret < 0)
124  goto end;
125  av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
126  }
127  ret = ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
128  if (ret < 0)
129  goto end;
130  av_bprintf(&sami->full, "%s", sami->encoded_content.str);
131 
132 end:
133  av_free(dupsrc);
134  return ret;
135 }
136 
138  int *got_sub_ptr, const AVPacket *avpkt)
139 {
140  const char *ptr = avpkt->data;
141  SAMIContext *sami = avctx->priv_data;
142 
143  if (ptr && avpkt->size > 0) {
144  int ret = sami_paragraph_to_ass(avctx, ptr);
145  if (ret < 0)
146  return ret;
147  // TODO: pass escaped sami->encoded_source.str as source
148  ret = ff_ass_add_rect(sub, sami->full.str, sami->readorder++, 0, NULL, NULL);
149  if (ret < 0)
150  return ret;
151  }
152  *got_sub_ptr = sub->num_rects > 0;
153  return avpkt->size;
154 }
155 
156 static av_cold int sami_init(AVCodecContext *avctx)
157 {
158  SAMIContext *sami = avctx->priv_data;
159  av_bprint_init(&sami->source, 0, 2048);
160  av_bprint_init(&sami->content, 0, 2048);
161  av_bprint_init(&sami->encoded_source, 0, 2048);
162  av_bprint_init(&sami->encoded_content, 0, 2048);
163  av_bprint_init(&sami->full, 0, 2048);
164  return ff_ass_subtitle_header_default(avctx);
165 }
166 
168 {
169  SAMIContext *sami = avctx->priv_data;
170  av_bprint_finalize(&sami->source, NULL);
174  av_bprint_finalize(&sami->full, NULL);
175  return 0;
176 }
177 
178 static av_cold void sami_flush(AVCodecContext *avctx)
179 {
180  SAMIContext *sami = avctx->priv_data;
181  if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
182  sami->readorder = 0;
183 }
184 
186  .p.name = "sami",
187  CODEC_LONG_NAME("SAMI subtitle"),
188  .p.type = AVMEDIA_TYPE_SUBTITLE,
189  .p.id = AV_CODEC_ID_SAMI,
190  .priv_data_size = sizeof(SAMIContext),
191  .init = sami_init,
192  .close = sami_close,
194  .flush = sami_flush,
195 };
AVSubtitle
Definition: avcodec.h:2082
AVMEDIA_TYPE_SUBTITLE
@ AVMEDIA_TYPE_SUBTITLE
Definition: avutil.h:203
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
ff_ass_subtitle_header_default
int ff_ass_subtitle_header_default(AVCodecContext *avctx)
Generate a suitable AVCodecContext.subtitle_header for SUBTITLE_ASS with default style.
Definition: ass.c:98
av_bprint_init
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
av_stristr
char * av_stristr(const char *s1, const char *s2)
Locate the first case-independent occurrence in the string haystack of the string needle.
Definition: avstring.c:58
htmlsubtitles.h
AVSubtitle::num_rects
unsigned num_rects
Definition: avcodec.h:2086
av_isspace
static av_const int av_isspace(int c)
Locale-independent conversion of ASCII isspace.
Definition: avstring.h:218
ff_ass_add_rect
int ff_ass_add_rect(AVSubtitle *sub, const char *dialog, int readorder, int layer, const char *style, const char *speaker)
Add an ASS dialog to a subtitle.
Definition: ass.c:159
AVPacket::data
uint8_t * data
Definition: packet.h:558
SAMIContext::encoded_content
AVBPrint encoded_content
Definition: samidec.c:39
FFCodec
Definition: codec_internal.h:127
sami_decode_frame
static int sami_decode_frame(AVCodecContext *avctx, AVSubtitle *sub, int *got_sub_ptr, const AVPacket *avpkt)
Definition: samidec.c:137
SAMIContext::source
AVBPrint source
Definition: samidec.c:36
SAMIContext::encoded_source
AVBPrint encoded_source
Definition: samidec.c:38
close
static av_cold void close(AVCodecParserContext *s)
Definition: apv_parser.c:135
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:131
sami_close
static av_cold int sami_close(AVCodecContext *avctx)
Definition: samidec.c:167
ff_sami_decoder
const FFCodec ff_sami_decoder
Definition: samidec.c:185
ass.h
sami_init
static av_cold int sami_init(AVCodecContext *avctx)
Definition: samidec.c:156
av_cold
#define av_cold
Definition: attributes.h:90
av_strtok
char * av_strtok(char *s, const char *delim, char **saveptr)
Split the string into several tokens which can be accessed by successive calls to av_strtok().
Definition: avstring.c:179
sami_flush
static av_cold void sami_flush(AVCodecContext *avctx)
Definition: samidec.c:178
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:331
SAMIContext::content
AVBPrint content
Definition: samidec.c:37
SAMIContext::full
AVBPrint full
Definition: samidec.c:40
NULL
#define NULL
Definition: coverity.c:32
av_strncasecmp
int av_strncasecmp(const char *a, const char *b, size_t n)
Locale-independent case-insensitive compare.
Definition: avstring.c:218
AVCodecContext::flags2
int flags2
AV_CODEC_FLAG2_*.
Definition: avcodec.h:495
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AVPacket::size
int size
Definition: packet.h:559
av_bprint_finalize
int av_bprint_finalize(AVBPrint *buf, char **ret_str)
Finalize a print buffer.
Definition: bprint.c:235
codec_internal.h
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
attributes.h
bprint.h
AV_CODEC_ID_SAMI
@ AV_CODEC_ID_SAMI
Definition: codec_id.h:574
ff_htmlmarkup_to_ass
int ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in)
Definition: htmlsubtitles.c:129
SAMIContext::readorder
int readorder
Definition: samidec.c:41
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:179
tag
uint32_t tag
Definition: movenc.c:1957
ret
ret
Definition: filter_design.txt:187
SAMIContext
Definition: samidec.c:35
av_bprintf
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:122
sami_paragraph_to_ass
static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
Definition: samidec.c:44
AVCodecContext
main external API structure.
Definition: avcodec.h:431
av_bprint_clear
void av_bprint_clear(AVBPrint *buf)
Reset the string to "" but keep internal allocated data.
Definition: bprint.c:227
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
FF_CODEC_DECODE_SUB_CB
#define FF_CODEC_DECODE_SUB_CB(func)
Definition: codec_internal.h:350
av_strdup
char * av_strdup(const char *s)
Duplicate a string.
Definition: mem.c:272
mem.h
av_free
#define av_free(p)
Definition: tableprint_vlc.h:34
AVPacket
This structure stores compressed data.
Definition: packet.h:535
AVCodecContext::priv_data
void * priv_data
Definition: avcodec.h:458
av_bprint_chars
void av_bprint_chars(AVBPrint *buf, char c, unsigned n)
Append char c n times to a print buffer.
Definition: bprint.c:130
avstring.h
AV_CODEC_FLAG2_RO_FLUSH_NOOP
#define AV_CODEC_FLAG2_RO_FLUSH_NOOP
Do not reset ASS ReadOrder field on flush (subtitles decoding)
Definition: avcodec.h:372
src
#define src
Definition: vp8dsp.c:248