FFmpeg
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56  return 0;
57 }
58 
60 {
61  G722Context *c = avctx->priv_data;
62 
63  c->band[0].scale_factor = 8;
64  c->band[1].scale_factor = 2;
65  c->prev_samples_pos = 22;
66 
67  if (avctx->trellis) {
68  int frontier = 1 << avctx->trellis;
69  int max_paths = frontier * FREEZE_INTERVAL;
70  int i;
71  for (i = 0; i < 2; i++) {
72  c->paths[i] = av_mallocz_array(max_paths, sizeof(**c->paths));
73  c->node_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->node_buf));
74  c->nodep_buf[i] = av_mallocz_array(frontier, 2 * sizeof(**c->nodep_buf));
75  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
76  return AVERROR(ENOMEM);
77  }
78  }
79 
80  if (avctx->frame_size) {
81  /* validate frame size */
82  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
83  int new_frame_size;
84 
85  if (avctx->frame_size == 1)
86  new_frame_size = 2;
87  else if (avctx->frame_size > MAX_FRAME_SIZE)
88  new_frame_size = MAX_FRAME_SIZE;
89  else
90  new_frame_size = avctx->frame_size - 1;
91 
92  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
93  "allowed. Using %d instead of %d\n", new_frame_size,
94  avctx->frame_size);
95  avctx->frame_size = new_frame_size;
96  }
97  } else {
98  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
99  a common packet size for VoIP applications */
100  avctx->frame_size = 320;
101  }
102  avctx->initial_padding = 22;
103 
104  if (avctx->trellis) {
105  /* validate trellis */
106  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
107  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
108  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
109  "allowed. Using %d instead of %d\n", new_trellis,
110  avctx->trellis);
111  avctx->trellis = new_trellis;
112  }
113  }
114 
115  ff_g722dsp_init(&c->dsp);
116 
117  return 0;
118 }
119 
120 static const int16_t low_quant[33] = {
121  35, 72, 110, 150, 190, 233, 276, 323,
122  370, 422, 473, 530, 587, 650, 714, 786,
123  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
124  1765, 1980, 2195, 2557, 2919
125 };
126 
127 static inline void filter_samples(G722Context *c, const int16_t *samples,
128  int *xlow, int *xhigh)
129 {
130  int xout[2];
131  c->prev_samples[c->prev_samples_pos++] = samples[0];
132  c->prev_samples[c->prev_samples_pos++] = samples[1];
133  c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
134  *xlow = xout[0] + xout[1] >> 14;
135  *xhigh = xout[0] - xout[1] >> 14;
137  memmove(c->prev_samples,
138  c->prev_samples + c->prev_samples_pos - 22,
139  22 * sizeof(c->prev_samples[0]));
140  c->prev_samples_pos = 22;
141  }
142 }
143 
144 static inline int encode_high(const struct G722Band *state, int xhigh)
145 {
146  int diff = av_clip_int16(xhigh - state->s_predictor);
147  int pred = 141 * state->scale_factor >> 8;
148  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
149  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
150 }
151 
152 static inline int encode_low(const struct G722Band* state, int xlow)
153 {
154  int diff = av_clip_int16(xlow - state->s_predictor);
155  /* = diff >= 0 ? diff : -(diff + 1) */
156  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
157  int i = 0;
158  limit = limit + 1 << 10;
159  if (limit > low_quant[8] * state->scale_factor)
160  i = 9;
161  while (i < 29 && limit > low_quant[i] * state->scale_factor)
162  i++;
163  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
164 }
165 
166 static void g722_encode_trellis(G722Context *c, int trellis,
167  uint8_t *dst, int nb_samples,
168  const int16_t *samples)
169 {
170  int i, j, k;
171  int frontier = 1 << trellis;
172  struct TrellisNode **nodes[2];
173  struct TrellisNode **nodes_next[2];
174  int pathn[2] = {0, 0}, froze = -1;
175  struct TrellisPath *p[2];
176 
177  for (i = 0; i < 2; i++) {
178  nodes[i] = c->nodep_buf[i];
179  nodes_next[i] = c->nodep_buf[i] + frontier;
180  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
181  nodes[i][0] = c->node_buf[i] + frontier;
182  nodes[i][0]->ssd = 0;
183  nodes[i][0]->path = 0;
184  nodes[i][0]->state = c->band[i];
185  }
186 
187  for (i = 0; i < nb_samples >> 1; i++) {
188  int xlow, xhigh;
189  struct TrellisNode *next[2];
190  int heap_pos[2] = {0, 0};
191 
192  for (j = 0; j < 2; j++) {
193  next[j] = c->node_buf[j] + frontier*(i & 1);
194  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
195  }
196 
197  filter_samples(c, &samples[2*i], &xlow, &xhigh);
198 
199  for (j = 0; j < frontier && nodes[0][j]; j++) {
200  /* Only k >> 2 affects the future adaptive state, therefore testing
201  * small steps that don't change k >> 2 is useless, the original
202  * value from encode_low is better than them. Since we step k
203  * in steps of 4, make sure range is a multiple of 4, so that
204  * we don't miss the original value from encode_low. */
205  int range = j < frontier/2 ? 4 : 0;
206  struct TrellisNode *cur_node = nodes[0][j];
207 
208  int ilow = encode_low(&cur_node->state, xlow);
209 
210  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
211  int decoded, dec_diff, pos;
212  uint32_t ssd;
213  struct TrellisNode* node;
214 
215  if (k < 0)
216  continue;
217 
218  decoded = av_clip_intp2((cur_node->state.scale_factor *
219  ff_g722_low_inv_quant6[k] >> 10)
220  + cur_node->state.s_predictor, 14);
221  dec_diff = xlow - decoded;
222 
223 #define STORE_NODE(index, UPDATE, VALUE)\
224  ssd = cur_node->ssd + dec_diff*dec_diff;\
225  /* Check for wraparound. Using 64 bit ssd counters would \
226  * be simpler, but is slower on x86 32 bit. */\
227  if (ssd < cur_node->ssd)\
228  continue;\
229  if (heap_pos[index] < frontier) {\
230  pos = heap_pos[index]++;\
231  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
232  node = nodes_next[index][pos] = next[index]++;\
233  node->path = pathn[index]++;\
234  } else {\
235  /* Try to replace one of the leaf nodes with the new \
236  * one, but not always testing the same leaf position */\
237  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
238  if (ssd >= nodes_next[index][pos]->ssd)\
239  continue;\
240  heap_pos[index]++;\
241  node = nodes_next[index][pos];\
242  }\
243  node->ssd = ssd;\
244  node->state = cur_node->state;\
245  UPDATE;\
246  c->paths[index][node->path].value = VALUE;\
247  c->paths[index][node->path].prev = cur_node->path;\
248  /* Sift the newly inserted node up in the heap to restore \
249  * the heap property */\
250  while (pos > 0) {\
251  int parent = (pos - 1) >> 1;\
252  if (nodes_next[index][parent]->ssd <= ssd)\
253  break;\
254  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
255  nodes_next[index][pos]);\
256  pos = parent;\
257  }
258  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
259  }
260  }
261 
262  for (j = 0; j < frontier && nodes[1][j]; j++) {
263  int ihigh;
264  struct TrellisNode *cur_node = nodes[1][j];
265 
266  /* We don't try to get any initial guess for ihigh via
267  * encode_high - since there's only 4 possible values, test
268  * them all. Testing all of these gives a much, much larger
269  * gain than testing a larger range around ilow. */
270  for (ihigh = 0; ihigh < 4; ihigh++) {
271  int dhigh, decoded, dec_diff, pos;
272  uint32_t ssd;
273  struct TrellisNode* node;
274 
275  dhigh = cur_node->state.scale_factor *
276  ff_g722_high_inv_quant[ihigh] >> 10;
277  decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
278  dec_diff = xhigh - decoded;
279 
280  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
281  }
282  }
283 
284  for (j = 0; j < 2; j++) {
285  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
286 
287  if (nodes[j][0]->ssd > (1 << 16)) {
288  for (k = 1; k < frontier && nodes[j][k]; k++)
289  nodes[j][k]->ssd -= nodes[j][0]->ssd;
290  nodes[j][0]->ssd = 0;
291  }
292  }
293 
294  if (i == froze + FREEZE_INTERVAL) {
295  p[0] = &c->paths[0][nodes[0][0]->path];
296  p[1] = &c->paths[1][nodes[1][0]->path];
297  for (j = i; j > froze; j--) {
298  dst[j] = p[1]->value << 6 | p[0]->value;
299  p[0] = &c->paths[0][p[0]->prev];
300  p[1] = &c->paths[1][p[1]->prev];
301  }
302  froze = i;
303  pathn[0] = pathn[1] = 0;
304  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
305  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
306  }
307  }
308 
309  p[0] = &c->paths[0][nodes[0][0]->path];
310  p[1] = &c->paths[1][nodes[1][0]->path];
311  for (j = i; j > froze; j--) {
312  dst[j] = p[1]->value << 6 | p[0]->value;
313  p[0] = &c->paths[0][p[0]->prev];
314  p[1] = &c->paths[1][p[1]->prev];
315  }
316  c->band[0] = nodes[0][0]->state;
317  c->band[1] = nodes[1][0]->state;
318 }
319 
321  const int16_t *samples)
322 {
323  int xlow, xhigh, ilow, ihigh;
324  filter_samples(c, samples, &xlow, &xhigh);
325  ihigh = encode_high(&c->band[1], xhigh);
326  ilow = encode_low (&c->band[0], xlow);
328  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
329  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
330  *dst = ihigh << 6 | ilow;
331 }
332 
334  uint8_t *dst, int nb_samples,
335  const int16_t *samples)
336 {
337  int i;
338  for (i = 0; i < nb_samples; i += 2)
339  encode_byte(c, dst++, &samples[i]);
340 }
341 
342 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
343  const AVFrame *frame, int *got_packet_ptr)
344 {
345  G722Context *c = avctx->priv_data;
346  const int16_t *samples = (const int16_t *)frame->data[0];
347  int nb_samples, out_size, ret;
348 
349  out_size = (frame->nb_samples + 1) / 2;
350  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
351  return ret;
352 
353  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
354 
355  if (avctx->trellis)
356  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
357  else
358  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
359 
360  /* handle last frame with odd frame_size */
361  if (nb_samples < frame->nb_samples) {
362  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
363  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
364  }
365 
366  if (frame->pts != AV_NOPTS_VALUE)
367  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
368  *got_packet_ptr = 1;
369  return 0;
370 }
371 
373  .name = "g722",
374  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
375  .type = AVMEDIA_TYPE_AUDIO,
377  .priv_data_size = sizeof(G722Context),
379  .close = g722_encode_close,
380  .encode2 = g722_encode_frame,
381  .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
383  .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
384  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
385 };
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: internal.h:48
struct G722Context::TrellisNode ** nodep_buf[2]
int path
Definition: adpcmenc.c:47
This structure describes decoded (raw) audio or video data.
Definition: frame.h:308
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:200
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
struct G722Context::TrellisPath * paths[2]
int out_size
Definition: movenc.c:55
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
Definition: g722enc.c:127
#define MIN_TRELLIS
Definition: g722enc.c:44
AVCodec.
Definition: codec.h:190
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:333
void(* apply_qmf)(const int16_t *prev_samples, int xout[2])
Definition: g722dsp.h:27
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:33
static int encode_high(const struct G722Band *state, int xhigh)
Definition: g722enc.c:144
uint8_t
#define av_cold
Definition: attributes.h:88
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
#define PREV_SAMPLES_BUF_SIZE
Definition: g722.h:32
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:401
static av_cold int g722_encode_init(AVCodecContext *avctx)
Definition: g722enc.c:59
uint8_t * data
Definition: packet.h:363
#define av_log(a,...)
uint32_t ssd
Definition: adpcmenc.c:46
struct G722Context::TrellisNode * node_buf[2]
const int16_t ff_g722_low_inv_quant6[64]
Definition: g722.c:63
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:153
unsigned int pos
Definition: spdifenc.c:410
int initial_padding
Audio only.
Definition: avcodec.h:2060
int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]
memory of past decoded samples
Definition: g722.h:37
simple assert() macros that are a bit more flexible than ISO C assert().
AVCodec ff_adpcm_g722_encoder
Definition: g722enc.c:372
const char * name
Name of the codec implementation.
Definition: codec.h:197
#define FREEZE_INTERVAL
Definition: g722enc.c:36
struct G722Context::G722Band band[2]
static struct @322 state
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: codec.h:80
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:166
#define MAX_FRAME_SIZE
Definition: g722enc.c:40
static av_cold int g722_encode_close(AVCodecContext *avctx)
Definition: g722enc.c:47
static int encode_low(const struct G722Band *state, int xlow)
Definition: g722enc.c:152
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
Definition: g722.c:143
static const float pred[4]
Definition: siprdata.h:259
G722DSPContext dsp
Definition: g722.h:66
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1206
static const int16_t low_quant[33]
Definition: g722enc.c:120
Libavcodec external API header.
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
main external API structure.
Definition: avcodec.h:526
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:322
common internal api header.
common internal and external API header
signed 16 bits
Definition: samplefmt.h:61
int prev_samples_pos
the number of values in prev_samples
Definition: g722.h:38
int trellis
trellis RD quantization
Definition: avcodec.h:1475
void * priv_data
Definition: avcodec.h:553
#define STORE_NODE(index, UPDATE, VALUE)
static av_always_inline int diff(const uint32_t a, const uint32_t b)
const int16_t ff_g722_high_inv_quant[4]
Definition: g722.c:51
av_cold void ff_g722dsp_init(G722DSPContext *c)
Definition: g722dsp.c:68
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
Definition: g722enc.c:320
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:846
Filter the word “frame” indicates either a video frame or a group of audio samples
#define av_freep(p)
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: g722enc.c:342
#define av_always_inline
Definition: attributes.h:45
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
Definition: internal.h:263
#define MAX_TRELLIS
Definition: g722enc.c:45
#define FFSWAP(type, a, b)
Definition: common.h:99
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
Definition: g722.c:154
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
#define AV_CH_LAYOUT_MONO
This structure stores compressed data.
Definition: packet.h:340
int16_t scale_factor
delayed quantizer scale factor
Definition: g722.h:52
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:374
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:356
int i
Definition: input.c:407
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
void * av_mallocz_array(size_t nmemb, size_t size)
Definition: mem.c:190