FFmpeg
libtheoraenc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2006 Paul Richards <paul.richards@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * @brief Theora encoder using libtheora.
24  * @author Paul Richards <paul.richards@gmail.com>
25  *
26  * A lot of this is copy / paste from other output codecs in
27  * libavcodec or pure guesswork (or both).
28  *
29  * I have used t_ prefixes on variables which are libtheora types
30  * and o_ prefixes on variables which are libogg types.
31  */
32 
33 /* FFmpeg includes */
34 #include "libavutil/common.h"
35 #include "libavutil/intreadwrite.h"
36 #include "libavutil/pixdesc.h"
37 #include "libavutil/log.h"
38 #include "libavutil/base64.h"
39 #include "avcodec.h"
40 #include "internal.h"
41 
42 /* libtheora includes */
43 #include <theora/theoraenc.h>
44 
45 typedef struct TheoraContext {
46  th_enc_ctx *t_state;
50  int uv_hshift;
51  int uv_vshift;
54 
55 /** Concatenate an ogg_packet into the extradata. */
56 static int concatenate_packet(unsigned int* offset,
57  AVCodecContext* avc_context,
58  const ogg_packet* packet)
59 {
60  const char* message = NULL;
61  int newsize = avc_context->extradata_size + 2 + packet->bytes;
62  int err = AVERROR_INVALIDDATA;
63 
64  if (packet->bytes < 0) {
65  message = "ogg_packet has negative size";
66  } else if (packet->bytes > 0xffff) {
67  message = "ogg_packet is larger than 65535 bytes";
68  } else if (newsize < avc_context->extradata_size) {
69  message = "extradata_size would overflow";
70  } else {
71  if ((err = av_reallocp(&avc_context->extradata, newsize)) < 0) {
72  avc_context->extradata_size = 0;
73  message = "av_realloc failed";
74  }
75  }
76  if (message) {
77  av_log(avc_context, AV_LOG_ERROR, "concatenate_packet failed: %s\n", message);
78  return err;
79  }
80 
81  avc_context->extradata_size = newsize;
82  AV_WB16(avc_context->extradata + (*offset), packet->bytes);
83  *offset += 2;
84  memcpy(avc_context->extradata + (*offset), packet->packet, packet->bytes);
85  (*offset) += packet->bytes;
86  return 0;
87 }
88 
89 static int get_stats(AVCodecContext *avctx, int eos)
90 {
91 #ifdef TH_ENCCTL_2PASS_OUT
92  TheoraContext *h = avctx->priv_data;
93  uint8_t *buf;
94  int bytes;
95 
96  bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_OUT, &buf, sizeof(buf));
97  if (bytes < 0) {
98  av_log(avctx, AV_LOG_ERROR, "Error getting first pass stats\n");
99  return AVERROR_EXTERNAL;
100  }
101  if (!eos) {
102  void *tmp = av_fast_realloc(h->stats, &h->stats_size,
103  h->stats_offset + bytes);
104  if (!tmp)
105  return AVERROR(ENOMEM);
106  h->stats = tmp;
107  memcpy(h->stats + h->stats_offset, buf, bytes);
108  h->stats_offset += bytes;
109  } else {
110  int b64_size = AV_BASE64_SIZE(h->stats_offset);
111  // libtheora generates a summary header at the end
112  memcpy(h->stats, buf, bytes);
113  avctx->stats_out = av_malloc(b64_size);
114  if (!avctx->stats_out)
115  return AVERROR(ENOMEM);
116  av_base64_encode(avctx->stats_out, b64_size, h->stats, h->stats_offset);
117  }
118  return 0;
119 #else
120  av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n");
121  return AVERROR(ENOSUP);
122 #endif
123 }
124 
125 // libtheora won't read the entire buffer we give it at once, so we have to
126 // repeatedly submit it...
127 static int submit_stats(AVCodecContext *avctx)
128 {
129 #ifdef TH_ENCCTL_2PASS_IN
130  TheoraContext *h = avctx->priv_data;
131  int bytes;
132  if (!h->stats) {
133  if (!avctx->stats_in) {
134  av_log(avctx, AV_LOG_ERROR, "No statsfile for second pass\n");
135  return AVERROR(EINVAL);
136  }
137  h->stats_size = strlen(avctx->stats_in) * 3/4;
138  h->stats = av_malloc(h->stats_size);
139  if (!h->stats) {
140  h->stats_size = 0;
141  return AVERROR(ENOMEM);
142  }
143  h->stats_size = av_base64_decode(h->stats, avctx->stats_in, h->stats_size);
144  }
145  while (h->stats_size - h->stats_offset > 0) {
146  bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_IN,
147  h->stats + h->stats_offset,
148  h->stats_size - h->stats_offset);
149  if (bytes < 0) {
150  av_log(avctx, AV_LOG_ERROR, "Error submitting stats\n");
151  return AVERROR_EXTERNAL;
152  }
153  if (!bytes)
154  return 0;
155  h->stats_offset += bytes;
156  }
157  return 0;
158 #else
159  av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n");
160  return AVERROR(ENOSUP);
161 #endif
162 }
163 
164 static av_cold int encode_init(AVCodecContext* avc_context)
165 {
166  th_info t_info;
167  th_comment t_comment;
168  ogg_packet o_packet;
169  unsigned int offset;
170  TheoraContext *h = avc_context->priv_data;
171  uint32_t gop_size = avc_context->gop_size;
172  int ret;
173 
174  /* Set up the theora_info struct */
175  th_info_init(&t_info);
176  t_info.frame_width = FFALIGN(avc_context->width, 16);
177  t_info.frame_height = FFALIGN(avc_context->height, 16);
178  t_info.pic_width = avc_context->width;
179  t_info.pic_height = avc_context->height;
180  t_info.pic_x = 0;
181  t_info.pic_y = 0;
182  /* Swap numerator and denominator as time_base in AVCodecContext gives the
183  * time period between frames, but theora_info needs the framerate. */
184  t_info.fps_numerator = avc_context->time_base.den;
185  t_info.fps_denominator = avc_context->time_base.num;
186  if (avc_context->sample_aspect_ratio.num) {
187  t_info.aspect_numerator = avc_context->sample_aspect_ratio.num;
188  t_info.aspect_denominator = avc_context->sample_aspect_ratio.den;
189  } else {
190  t_info.aspect_numerator = 1;
191  t_info.aspect_denominator = 1;
192  }
193 
194  if (avc_context->color_primaries == AVCOL_PRI_BT470M)
195  t_info.colorspace = TH_CS_ITU_REC_470M;
196  else if (avc_context->color_primaries == AVCOL_PRI_BT470BG)
197  t_info.colorspace = TH_CS_ITU_REC_470BG;
198  else
199  t_info.colorspace = TH_CS_UNSPECIFIED;
200 
201  if (avc_context->pix_fmt == AV_PIX_FMT_YUV420P)
202  t_info.pixel_fmt = TH_PF_420;
203  else if (avc_context->pix_fmt == AV_PIX_FMT_YUV422P)
204  t_info.pixel_fmt = TH_PF_422;
205  else if (avc_context->pix_fmt == AV_PIX_FMT_YUV444P)
206  t_info.pixel_fmt = TH_PF_444;
207  else {
208  av_log(avc_context, AV_LOG_ERROR, "Unsupported pix_fmt\n");
209  return AVERROR(EINVAL);
210  }
211  ret = av_pix_fmt_get_chroma_sub_sample(avc_context->pix_fmt, &h->uv_hshift, &h->uv_vshift);
212  if (ret)
213  return ret;
214 
215  if (avc_context->flags & AV_CODEC_FLAG_QSCALE) {
216  /* Clip global_quality in QP units to the [0 - 10] range
217  to be consistent with the libvorbis implementation.
218  Theora accepts a quality parameter which is an int value in
219  the [0 - 63] range.
220  */
221  t_info.quality = av_clipf(avc_context->global_quality / (float)FF_QP2LAMBDA, 0, 10) * 6.3;
222  t_info.target_bitrate = 0;
223  } else {
224  t_info.target_bitrate = avc_context->bit_rate;
225  t_info.quality = 0;
226  }
227 
228  /* Now initialise libtheora */
229  h->t_state = th_encode_alloc(&t_info);
230  if (!h->t_state) {
231  av_log(avc_context, AV_LOG_ERROR, "theora_encode_init failed\n");
232  return AVERROR_EXTERNAL;
233  }
234 
235  h->keyframe_mask = (1 << t_info.keyframe_granule_shift) - 1;
236  /* Clear up theora_info struct */
237  th_info_clear(&t_info);
238 
239  if (th_encode_ctl(h->t_state, TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
240  &gop_size, sizeof(gop_size))) {
241  av_log(avc_context, AV_LOG_ERROR, "Error setting GOP size\n");
242  return AVERROR_EXTERNAL;
243  }
244 
245  // need to enable 2 pass (via TH_ENCCTL_2PASS_) before encoding headers
246  if (avc_context->flags & AV_CODEC_FLAG_PASS1) {
247  if ((ret = get_stats(avc_context, 0)) < 0)
248  return ret;
249  } else if (avc_context->flags & AV_CODEC_FLAG_PASS2) {
250  if ((ret = submit_stats(avc_context)) < 0)
251  return ret;
252  }
253 
254  /*
255  Output first header packet consisting of theora
256  header, comment, and tables.
257 
258  Each one is prefixed with a 16-bit size, then they
259  are concatenated together into libavcodec's extradata.
260  */
261  offset = 0;
262 
263  /* Headers */
264  th_comment_init(&t_comment);
265 
266  while (th_encode_flushheader(h->t_state, &t_comment, &o_packet))
267  if ((ret = concatenate_packet(&offset, avc_context, &o_packet)) < 0)
268  return ret;
269 
270  th_comment_clear(&t_comment);
271 
272  return 0;
273 }
274 
275 static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
276  const AVFrame *frame, int *got_packet)
277 {
278  th_ycbcr_buffer t_yuv_buffer;
279  TheoraContext *h = avc_context->priv_data;
280  ogg_packet o_packet;
281  int result, i, ret;
282 
283  // EOS, finish and get 1st pass stats if applicable
284  if (!frame) {
285  th_encode_packetout(h->t_state, 1, &o_packet);
286  if (avc_context->flags & AV_CODEC_FLAG_PASS1)
287  if ((ret = get_stats(avc_context, 1)) < 0)
288  return ret;
289  return 0;
290  }
291 
292  /* Copy planes to the theora yuv_buffer */
293  for (i = 0; i < 3; i++) {
294  t_yuv_buffer[i].width = FFALIGN(avc_context->width, 16) >> (i && h->uv_hshift);
295  t_yuv_buffer[i].height = FFALIGN(avc_context->height, 16) >> (i && h->uv_vshift);
296  t_yuv_buffer[i].stride = frame->linesize[i];
297  t_yuv_buffer[i].data = frame->data[i];
298  }
299 
300  if (avc_context->flags & AV_CODEC_FLAG_PASS2)
301  if ((ret = submit_stats(avc_context)) < 0)
302  return ret;
303 
304  /* Now call into theora_encode_YUVin */
305  result = th_encode_ycbcr_in(h->t_state, t_yuv_buffer);
306  if (result) {
307  const char* message;
308  switch (result) {
309  case -1:
310  message = "differing frame sizes";
311  break;
312  case TH_EINVAL:
313  message = "encoder is not ready or is finished";
314  break;
315  default:
316  message = "unknown reason";
317  break;
318  }
319  av_log(avc_context, AV_LOG_ERROR, "theora_encode_YUVin failed (%s) [%d]\n", message, result);
320  return AVERROR_EXTERNAL;
321  }
322 
323  if (avc_context->flags & AV_CODEC_FLAG_PASS1)
324  if ((ret = get_stats(avc_context, 0)) < 0)
325  return ret;
326 
327  /* Pick up returned ogg_packet */
328  result = th_encode_packetout(h->t_state, 0, &o_packet);
329  switch (result) {
330  case 0:
331  /* No packet is ready */
332  return 0;
333  case 1:
334  /* Success, we have a packet */
335  break;
336  default:
337  av_log(avc_context, AV_LOG_ERROR, "theora_encode_packetout failed [%d]\n", result);
338  return AVERROR_EXTERNAL;
339  }
340 
341  /* Copy ogg_packet content out to buffer */
342  if ((ret = ff_alloc_packet2(avc_context, pkt, o_packet.bytes, 0)) < 0)
343  return ret;
344  memcpy(pkt->data, o_packet.packet, o_packet.bytes);
345 
346  // HACK: assumes no encoder delay, this is true until libtheora becomes
347  // multithreaded (which will be disabled unless explicitly requested)
348  pkt->pts = pkt->dts = frame->pts;
349 #if FF_API_CODED_FRAME
351  avc_context->coded_frame->key_frame = !(o_packet.granulepos & h->keyframe_mask);
353 #endif
354  if (!(o_packet.granulepos & h->keyframe_mask))
355  pkt->flags |= AV_PKT_FLAG_KEY;
356  *got_packet = 1;
357 
358  return 0;
359 }
360 
361 static av_cold int encode_close(AVCodecContext* avc_context)
362 {
363  TheoraContext *h = avc_context->priv_data;
364 
365  th_encode_free(h->t_state);
366  av_freep(&h->stats);
367  av_freep(&avc_context->stats_out);
368  av_freep(&avc_context->extradata);
369  avc_context->extradata_size = 0;
370 
371  return 0;
372 }
373 
374 /** AVCodec struct exposed to libavcodec */
376  .name = "libtheora",
377  .long_name = NULL_IF_CONFIG_SMALL("libtheora Theora"),
378  .type = AVMEDIA_TYPE_VIDEO,
379  .id = AV_CODEC_ID_THEORA,
380  .priv_data_size = sizeof(TheoraContext),
381  .init = encode_init,
382  .close = encode_close,
383  .encode2 = encode_frame,
384  .capabilities = AV_CODEC_CAP_DELAY, // needed to get the statsfile summary
385  .pix_fmts = (const enum AVPixelFormat[]){
387  },
388  .wrapper_name = "libtheora",
389 };
#define NULL
Definition: coverity.c:32
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:59
uint8_t * stats
Definition: libtheoraenc.c:47
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
int64_t bit_rate
the average bitrate
Definition: avcodec.h:1618
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
static int concatenate_packet(unsigned int *offset, AVCodecContext *avc_context, const ogg_packet *packet)
Concatenate an ogg_packet into the extradata.
Definition: libtheoraenc.c:56
int num
Numerator.
Definition: rational.h:59
AVRational sample_aspect_ratio
sample aspect ratio (0 if unknown) That is the width of a pixel divided by the height of the pixel...
Definition: avcodec.h:1947
enum AVPixelFormat pix_fmt
Pixel format, see AV_PIX_FMT_xxx.
Definition: avcodec.h:1778
char * stats_in
pass2 encoding statistics input buffer Concatenated stuff from stats_out of pass1 should be placed he...
Definition: avcodec.h:2595
static AVPacket pkt
AVCodec.
Definition: avcodec.h:3492
static int submit_stats(AVCodecContext *avctx)
Definition: libtheoraenc.c:127
AVRational time_base
This is the fundamental unit of time (in seconds) in terms of which frame timestamps are represented...
Definition: avcodec.h:1691
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: avcodec.h:1009
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:32
uint8_t
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:388
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:1669
uint8_t * data
Definition: avcodec.h:1480
char * stats_out
pass1 encoding statistics output buffer
Definition: avcodec.h:2587
#define AV_WB16(p, v)
Definition: intreadwrite.h:405
#define FFALIGN(x, a)
Definition: macros.h:48
#define av_log(a,...)
#define AV_PKT_FLAG_KEY
The packet contains a keyframe.
Definition: avcodec.h:1512
also FCC Title 47 Code of Federal Regulations 73.682 (a)(20)
Definition: pixfmt.h:448
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2550
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:1648
const char * name
Name of the codec implementation.
Definition: avcodec.h:3499
char * av_base64_encode(char *out, int out_size, const uint8_t *in, int in_size)
Encode data to base64 and null-terminate.
Definition: base64.c:138
int flags
A combination of AV_PKT_FLAG values.
Definition: avcodec.h:1486
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
Definition: pixfmt.h:70
int gop_size
Definition: movenc.c:66
static int get_stats(AVCodecContext *avctx, int eos)
Definition: libtheoraenc.c:89
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:853
#define AV_BASE64_SIZE(x)
Calculate the output size needed to base64-encode x bytes to a null-terminated string.
Definition: base64.h:66
int width
picture width / height.
Definition: avcodec.h:1741
also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM
Definition: pixfmt.h:450
static av_cold int encode_init(AVCodecContext *avc_context)
Definition: libtheoraenc.c:164
#define AV_CODEC_FLAG_PASS1
Use internal 2pass ratecontrol in first pass mode.
Definition: avcodec.h:874
enum AVColorPrimaries color_primaries
Chromaticity coordinates of the source primaries.
Definition: avcodec.h:2182
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given buffer if it is not large enough, otherwise do nothing.
Definition: mem.c:476
int av_reallocp(void *ptr, size_t size)
Allocate, reallocate, or free a block of memory through a pointer to a pointer.
Definition: mem.c:163
Libavcodec external API header.
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:326
static int ogg_packet(AVFormatContext *s, int *sid, int *dstart, int *dsize, int64_t *fpos)
find the next Ogg packet
Definition: oggdec.c:477
main external API structure.
Definition: avcodec.h:1568
static av_cold int encode_close(AVCodecContext *avc_context)
Definition: libtheoraenc.c:361
th_enc_ctx * t_state
Definition: libtheoraenc.c:46
void * buf
Definition: avisynth_c.h:766
int extradata_size
Definition: avcodec.h:1670
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:275
static int encode_frame(AVCodecContext *avc_context, AVPacket *pkt, const AVFrame *frame, int *got_packet)
Definition: libtheoraenc.c:275
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1634
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
int gop_size
the number of pictures in a group of pictures, or 0 for intra_only
Definition: avcodec.h:1763
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
#define FF_DISABLE_DEPRECATION_WARNINGS
Definition: internal.h:84
common internal api header.
common internal and external API header
attribute_deprecated AVFrame * coded_frame
the picture in the bitstream
Definition: avcodec.h:2818
int den
Denominator.
Definition: rational.h:60
#define AV_CODEC_FLAG_PASS2
Use internal 2pass ratecontrol in second pass mode.
Definition: avcodec.h:878
void * priv_data
Definition: avcodec.h:1595
#define FF_ENABLE_DEPRECATION_WARNINGS
Definition: internal.h:85
AVCodec ff_libtheora_encoder
AVCodec struct exposed to libavcodec.
Definition: libtheoraenc.c:375
int key_frame
1 -> keyframe, 0-> not
Definition: frame.h:373
#define FF_QP2LAMBDA
factor to convert from H.263 QP to lambda
Definition: avutil.h:227
int av_base64_decode(uint8_t *out, const char *in_str, int out_size)
Decode a base64-encoded string.
Definition: base64.c:79
int64_t dts
Decompression timestamp in AVStream->time_base units; the time at which the packet is decompressed...
Definition: avcodec.h:1479
and forward the result(frame or status change) to the corresponding input.If nothing is possible
#define av_freep(p)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
#define AVERROR_EXTERNAL
Generic error in an external library.
Definition: error.h:57
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
This structure stores compressed data.
Definition: avcodec.h:1457
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: avcodec.h:1473
static uint8_t tmp[11]
Definition: aes_ctr.c:26