FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56 #if FF_API_OLD_ENCODE_AUDIO
57  av_freep(&avctx->coded_frame);
58 #endif
59  return 0;
60 }
61 
63 {
64  G722Context *c = avctx->priv_data;
65  int ret;
66 
67  if (avctx->channels != 1) {
68  av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
69  return AVERROR_INVALIDDATA;
70  }
71 
72  c->band[0].scale_factor = 8;
73  c->band[1].scale_factor = 2;
74  c->prev_samples_pos = 22;
75 
76  if (avctx->trellis) {
77  int frontier = 1 << avctx->trellis;
78  int max_paths = frontier * FREEZE_INTERVAL;
79  int i;
80  for (i = 0; i < 2; i++) {
81  c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
82  c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
83  c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
84  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
85  ret = AVERROR(ENOMEM);
86  goto error;
87  }
88  }
89  }
90 
91  if (avctx->frame_size) {
92  /* validate frame size */
93  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
94  int new_frame_size;
95 
96  if (avctx->frame_size == 1)
97  new_frame_size = 2;
98  else if (avctx->frame_size > MAX_FRAME_SIZE)
99  new_frame_size = MAX_FRAME_SIZE;
100  else
101  new_frame_size = avctx->frame_size - 1;
102 
103  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
104  "allowed. Using %d instead of %d\n", new_frame_size,
105  avctx->frame_size);
106  avctx->frame_size = new_frame_size;
107  }
108  } else {
109  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
110  a common packet size for VoIP applications */
111  avctx->frame_size = 320;
112  }
113  avctx->delay = 22;
114 
115  if (avctx->trellis) {
116  /* validate trellis */
117  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
118  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
119  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
120  "allowed. Using %d instead of %d\n", new_trellis,
121  avctx->trellis);
122  avctx->trellis = new_trellis;
123  }
124  }
125 
126 #if FF_API_OLD_ENCODE_AUDIO
127  avctx->coded_frame = avcodec_alloc_frame();
128  if (!avctx->coded_frame) {
129  ret = AVERROR(ENOMEM);
130  goto error;
131  }
132 #endif
133 
134  return 0;
135 error:
136  g722_encode_close(avctx);
137  return ret;
138 }
139 
140 static const int16_t low_quant[33] = {
141  35, 72, 110, 150, 190, 233, 276, 323,
142  370, 422, 473, 530, 587, 650, 714, 786,
143  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
144  1765, 1980, 2195, 2557, 2919
145 };
146 
147 static inline void filter_samples(G722Context *c, const int16_t *samples,
148  int *xlow, int *xhigh)
149 {
150  int xout1, xout2;
151  c->prev_samples[c->prev_samples_pos++] = samples[0];
152  c->prev_samples[c->prev_samples_pos++] = samples[1];
153  ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
154  *xlow = xout1 + xout2 >> 14;
155  *xhigh = xout1 - xout2 >> 14;
157  memmove(c->prev_samples,
158  c->prev_samples + c->prev_samples_pos - 22,
159  22 * sizeof(c->prev_samples[0]));
160  c->prev_samples_pos = 22;
161  }
162 }
163 
164 static inline int encode_high(const struct G722Band *state, int xhigh)
165 {
166  int diff = av_clip_int16(xhigh - state->s_predictor);
167  int pred = 141 * state->scale_factor >> 8;
168  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
169  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
170 }
171 
172 static inline int encode_low(const struct G722Band* state, int xlow)
173 {
174  int diff = av_clip_int16(xlow - state->s_predictor);
175  /* = diff >= 0 ? diff : -(diff + 1) */
176  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
177  int i = 0;
178  limit = limit + 1 << 10;
179  if (limit > low_quant[8] * state->scale_factor)
180  i = 9;
181  while (i < 29 && limit > low_quant[i] * state->scale_factor)
182  i++;
183  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
184 }
185 
186 static void g722_encode_trellis(G722Context *c, int trellis,
187  uint8_t *dst, int nb_samples,
188  const int16_t *samples)
189 {
190  int i, j, k;
191  int frontier = 1 << trellis;
192  struct TrellisNode **nodes[2];
193  struct TrellisNode **nodes_next[2];
194  int pathn[2] = {0, 0}, froze = -1;
195  struct TrellisPath *p[2];
196 
197  for (i = 0; i < 2; i++) {
198  nodes[i] = c->nodep_buf[i];
199  nodes_next[i] = c->nodep_buf[i] + frontier;
200  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
201  nodes[i][0] = c->node_buf[i] + frontier;
202  nodes[i][0]->ssd = 0;
203  nodes[i][0]->path = 0;
204  nodes[i][0]->state = c->band[i];
205  }
206 
207  for (i = 0; i < nb_samples >> 1; i++) {
208  int xlow, xhigh;
209  struct TrellisNode *next[2];
210  int heap_pos[2] = {0, 0};
211 
212  for (j = 0; j < 2; j++) {
213  next[j] = c->node_buf[j] + frontier*(i & 1);
214  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
215  }
216 
217  filter_samples(c, &samples[2*i], &xlow, &xhigh);
218 
219  for (j = 0; j < frontier && nodes[0][j]; j++) {
220  /* Only k >> 2 affects the future adaptive state, therefore testing
221  * small steps that don't change k >> 2 is useless, the original
222  * value from encode_low is better than them. Since we step k
223  * in steps of 4, make sure range is a multiple of 4, so that
224  * we don't miss the original value from encode_low. */
225  int range = j < frontier/2 ? 4 : 0;
226  struct TrellisNode *cur_node = nodes[0][j];
227 
228  int ilow = encode_low(&cur_node->state, xlow);
229 
230  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
231  int decoded, dec_diff, pos;
232  uint32_t ssd;
233  struct TrellisNode* node;
234 
235  if (k < 0)
236  continue;
237 
238  decoded = av_clip((cur_node->state.scale_factor *
239  ff_g722_low_inv_quant6[k] >> 10)
240  + cur_node->state.s_predictor, -16384, 16383);
241  dec_diff = xlow - decoded;
242 
243 #define STORE_NODE(index, UPDATE, VALUE)\
244  ssd = cur_node->ssd + dec_diff*dec_diff;\
245  /* Check for wraparound. Using 64 bit ssd counters would \
246  * be simpler, but is slower on x86 32 bit. */\
247  if (ssd < cur_node->ssd)\
248  continue;\
249  if (heap_pos[index] < frontier) {\
250  pos = heap_pos[index]++;\
251  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
252  node = nodes_next[index][pos] = next[index]++;\
253  node->path = pathn[index]++;\
254  } else {\
255  /* Try to replace one of the leaf nodes with the new \
256  * one, but not always testing the same leaf position */\
257  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
258  if (ssd >= nodes_next[index][pos]->ssd)\
259  continue;\
260  heap_pos[index]++;\
261  node = nodes_next[index][pos];\
262  }\
263  node->ssd = ssd;\
264  node->state = cur_node->state;\
265  UPDATE;\
266  c->paths[index][node->path].value = VALUE;\
267  c->paths[index][node->path].prev = cur_node->path;\
268  /* Sift the newly inserted node up in the heap to restore \
269  * the heap property */\
270  while (pos > 0) {\
271  int parent = (pos - 1) >> 1;\
272  if (nodes_next[index][parent]->ssd <= ssd)\
273  break;\
274  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
275  nodes_next[index][pos]);\
276  pos = parent;\
277  }
278  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
279  }
280  }
281 
282  for (j = 0; j < frontier && nodes[1][j]; j++) {
283  int ihigh;
284  struct TrellisNode *cur_node = nodes[1][j];
285 
286  /* We don't try to get any initial guess for ihigh via
287  * encode_high - since there's only 4 possible values, test
288  * them all. Testing all of these gives a much, much larger
289  * gain than testing a larger range around ilow. */
290  for (ihigh = 0; ihigh < 4; ihigh++) {
291  int dhigh, decoded, dec_diff, pos;
292  uint32_t ssd;
293  struct TrellisNode* node;
294 
295  dhigh = cur_node->state.scale_factor *
296  ff_g722_high_inv_quant[ihigh] >> 10;
297  decoded = av_clip(dhigh + cur_node->state.s_predictor,
298  -16384, 16383);
299  dec_diff = xhigh - decoded;
300 
301  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
302  }
303  }
304 
305  for (j = 0; j < 2; j++) {
306  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
307 
308  if (nodes[j][0]->ssd > (1 << 16)) {
309  for (k = 1; k < frontier && nodes[j][k]; k++)
310  nodes[j][k]->ssd -= nodes[j][0]->ssd;
311  nodes[j][0]->ssd = 0;
312  }
313  }
314 
315  if (i == froze + FREEZE_INTERVAL) {
316  p[0] = &c->paths[0][nodes[0][0]->path];
317  p[1] = &c->paths[1][nodes[1][0]->path];
318  for (j = i; j > froze; j--) {
319  dst[j] = p[1]->value << 6 | p[0]->value;
320  p[0] = &c->paths[0][p[0]->prev];
321  p[1] = &c->paths[1][p[1]->prev];
322  }
323  froze = i;
324  pathn[0] = pathn[1] = 0;
325  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
326  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
327  }
328  }
329 
330  p[0] = &c->paths[0][nodes[0][0]->path];
331  p[1] = &c->paths[1][nodes[1][0]->path];
332  for (j = i; j > froze; j--) {
333  dst[j] = p[1]->value << 6 | p[0]->value;
334  p[0] = &c->paths[0][p[0]->prev];
335  p[1] = &c->paths[1][p[1]->prev];
336  }
337  c->band[0] = nodes[0][0]->state;
338  c->band[1] = nodes[1][0]->state;
339 }
340 
342  const int16_t *samples)
343 {
344  int xlow, xhigh, ilow, ihigh;
345  filter_samples(c, samples, &xlow, &xhigh);
346  ihigh = encode_high(&c->band[1], xhigh);
347  ilow = encode_low (&c->band[0], xlow);
349  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
350  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
351  *dst = ihigh << 6 | ilow;
352 }
353 
355  uint8_t *dst, int nb_samples,
356  const int16_t *samples)
357 {
358  int i;
359  for (i = 0; i < nb_samples; i += 2)
360  encode_byte(c, dst++, &samples[i]);
361 }
362 
363 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
364  const AVFrame *frame, int *got_packet_ptr)
365 {
366  G722Context *c = avctx->priv_data;
367  const int16_t *samples = (const int16_t *)frame->data[0];
368  int nb_samples, out_size, ret;
369 
370  out_size = (frame->nb_samples + 1) / 2;
371  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)))
372  return ret;
373 
374  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
375 
376  if (avctx->trellis)
377  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
378  else
379  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
380 
381  /* handle last frame with odd frame_size */
382  if (nb_samples < frame->nb_samples) {
383  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
384  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
385  }
386 
387  if (frame->pts != AV_NOPTS_VALUE)
388  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
389  *got_packet_ptr = 1;
390  return 0;
391 }
392 
394  .name = "g722",
395  .type = AVMEDIA_TYPE_AUDIO,
397  .priv_data_size = sizeof(G722Context),
400  .encode2 = g722_encode_frame,
401  .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
402  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
403  .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
405 };