FFmpeg
speexdec.c
Go to the documentation of this file.
1 /*
2  * Copyright 2002-2008 Xiph.org Foundation
3  * Copyright 2002-2008 Jean-Marc Valin
4  * Copyright 2005-2007 Analog Devices Inc.
5  * Copyright 2005-2008 Commonwealth Scientific and Industrial Research Organisation (CSIRO)
6  * Copyright 1993, 2002, 2006 David Rowe
7  * Copyright 2003 EpicGames
8  * Copyright 1992-1994 Jutta Degener, Carsten Bormann
9 
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13 
14  * - Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16 
17  * - Redistributions in binary form must reproduce the above copyright
18  * notice, this list of conditions and the following disclaimer in the
19  * documentation and/or other materials provided with the distribution.
20 
21  * - Neither the name of the Xiph.org Foundation nor the names of its
22  * contributors may be used to endorse or promote products derived from
23  * this software without specific prior written permission.
24 
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
29  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  * This file is part of FFmpeg.
38  *
39  * FFmpeg is free software; you can redistribute it and/or
40  * modify it under the terms of the GNU Lesser General Public
41  * License as published by the Free Software Foundation; either
42  * version 2.1 of the License, or (at your option) any later version.
43  *
44  * FFmpeg is distributed in the hope that it will be useful,
45  * but WITHOUT ANY WARRANTY; without even the implied warranty of
46  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
47  * Lesser General Public License for more details.
48  *
49  * You should have received a copy of the GNU Lesser General Public
50  * License along with FFmpeg; if not, write to the Free Software
51  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
52  */
53 
54 #include "libavutil/avassert.h"
55 #include "libavutil/float_dsp.h"
56 #include "avcodec.h"
57 #include "bytestream.h"
58 #include "codec_internal.h"
59 #include "decode.h"
60 #include "get_bits.h"
61 #include "speexdata.h"
62 
63 #define SPEEX_NB_MODES 3
64 #define SPEEX_INBAND_STEREO 9
65 
66 #define QMF_ORDER 64
67 #define NB_ORDER 10
68 #define NB_FRAME_SIZE 160
69 #define NB_SUBMODES 9
70 #define NB_SUBMODE_BITS 4
71 #define SB_SUBMODE_BITS 3
72 
73 #define NB_SUBFRAME_SIZE 40
74 #define NB_NB_SUBFRAMES 4
75 #define NB_PITCH_START 17
76 #define NB_PITCH_END 144
77 
78 #define NB_DEC_BUFFER (NB_FRAME_SIZE + 2 * NB_PITCH_END + NB_SUBFRAME_SIZE + 12)
79 
80 #define SPEEX_MEMSET(dst, c, n) (memset((dst), (c), (n) * sizeof(*(dst))))
81 #define SPEEX_COPY(dst, src, n) (memcpy((dst), (src), (n) * sizeof(*(dst))))
82 
83 #define LSP_LINEAR(i) (.25f * (i) + .25f)
84 #define LSP_LINEAR_HIGH(i) (.3125f * (i) + .75f)
85 #define LSP_DIV_256(x) (0.00390625f * (x))
86 #define LSP_DIV_512(x) (0.001953125f * (x))
87 #define LSP_DIV_1024(x) (0.0009765625f * (x))
88 
89 typedef struct LtpParams {
90  const int8_t *gain_cdbk;
91  int gain_bits;
93 } LtpParam;
94 
95 static const LtpParam ltp_params_vlbr = { gain_cdbk_lbr, 5, 0 };
96 static const LtpParam ltp_params_lbr = { gain_cdbk_lbr, 5, 7 };
97 static const LtpParam ltp_params_med = { gain_cdbk_lbr, 5, 7 };
98 static const LtpParam ltp_params_nb = { gain_cdbk_nb, 7, 7 };
99 
100 typedef struct SplitCodebookParams {
103  const signed char *shape_cb;
107 
108 static const SplitCodebookParams split_cb_nb_ulbr = { 20, 2, exc_20_32_table, 5, 0 };
109 static const SplitCodebookParams split_cb_nb_vlbr = { 10, 4, exc_10_16_table, 4, 0 };
110 static const SplitCodebookParams split_cb_nb_lbr = { 10, 4, exc_10_32_table, 5, 0 };
111 static const SplitCodebookParams split_cb_nb_med = { 8, 5, exc_8_128_table, 7, 0 };
112 static const SplitCodebookParams split_cb_nb = { 5, 8, exc_5_64_table, 6, 0 };
113 static const SplitCodebookParams split_cb_sb = { 5, 8, exc_5_256_table, 8, 0 };
114 static const SplitCodebookParams split_cb_high = { 8, 5, hexc_table, 7, 1 };
116 
117 /** Quantizes LSPs */
118 typedef void (*lsp_quant_func)(float *, float *, int, GetBitContext *);
119 
120 /** Decodes quantized LSPs */
121 typedef void (*lsp_unquant_func)(float *, int, GetBitContext *);
122 
123 /** Long-term predictor quantization */
124 typedef int (*ltp_quant_func)(float *, float *, float *,
125  float *, float *, float *,
126  const void *, int, int, float, int, int,
127  GetBitContext *, char *, float *,
128  float *, int, int, int, float *);
129 
130 /** Long-term un-quantize */
131 typedef void (*ltp_unquant_func)(float *, float *, int, int,
132  float, const void *, int, int *,
133  float *, GetBitContext *, int, int,
134  float, int);
135 
136 /** Innovation quantization function */
137 typedef void (*innovation_quant_func)(float *, float *,
138  float *, float *, const void *,
139  int, int, float *, float *,
140  GetBitContext *, char *, int, int);
141 
142 /** Innovation unquantization function */
143 typedef void (*innovation_unquant_func)(float *, const void *, int,
144  GetBitContext *, uint32_t *);
145 
146 typedef struct SpeexSubmode {
147  int lbr_pitch; /**< Set to -1 for "normal" modes, otherwise encode pitch using
148  a global pitch and allowing a +- lbr_pitch variation (for
149  low not-rates)*/
150  int forced_pitch_gain; /**< Use the same (forced) pitch gain for all
151  sub-frames */
152  int have_subframe_gain; /**< Number of bits to use as sub-frame innovation
153  gain */
154  int double_codebook; /**< Apply innovation quantization twice for higher
155  quality (and higher bit-rate)*/
156  lsp_unquant_func lsp_unquant; /**< LSP unquantization function */
157 
158  ltp_unquant_func ltp_unquant; /**< Long-term predictor (pitch) un-quantizer */
159  const void *LtpParam; /**< Pitch parameters (options) */
160 
161  innovation_unquant_func innovation_unquant; /**< Innovation un-quantization */
162  const void *innovation_params; /**< Innovation quantization parameters*/
163 
164  float comb_gain; /**< Gain of enhancer comb filter */
165 } SpeexSubmode;
166 
167 typedef struct SpeexMode {
168  int modeID; /**< ID of the mode */
169  int (*decode)(AVCodecContext *avctx, void *dec, GetBitContext *gb, float *out);
170  int frame_size; /**< Size of frames used for decoding */
171  int subframe_size; /**< Size of sub-frames used for decoding */
172  int lpc_size; /**< Order of LPC filter */
173  float folding_gain; /**< Folding gain */
174  const SpeexSubmode *submodes[NB_SUBMODES]; /**< Sub-mode data for the mode */
175  int default_submode; /**< Default sub-mode to use when decoding */
176 } SpeexMode;
177 
178 typedef struct DecoderState {
179  const SpeexMode *mode;
180  int modeID; /**< ID of the decoder mode */
181  int first; /**< Is first frame */
182  int full_frame_size; /**< Length of full-band frames */
183  int is_wideband; /**< If wideband is present */
184  int count_lost; /**< Was the last frame lost? */
185  int frame_size; /**< Length of high-band frames */
186  int subframe_size; /**< Length of high-band sub-frames */
187  int nb_subframes; /**< Number of high-band sub-frames */
188  int lpc_size; /**< Order of high-band LPC analysis */
189  float last_ol_gain; /**< Open-loop gain for previous frame */
190  float *innov_save; /**< If non-NULL, innovation is copied here */
191 
192  /* This is used in packet loss concealment */
193  int last_pitch; /**< Pitch of last correctly decoded frame */
194  float last_pitch_gain; /**< Pitch gain of last correctly decoded frame */
195  uint32_t seed; /**< Seed used for random number generation */
196 
198  const SpeexSubmode *const *submodes; /**< Sub-mode data */
199  int submodeID; /**< Activated sub-mode */
200  int lpc_enh_enabled; /**< 1 when LPC enhancer is on, 0 otherwise */
201 
202  /* Vocoder data */
203  float voc_m1;
204  float voc_m2;
205  float voc_mean;
207 
209  int highpass_enabled; /**< Is the input filter enabled */
210 
211  float *exc; /**< Start of excitation frame */
212  float mem_hp[2]; /**< High-pass filter memory */
213  float exc_buf[NB_DEC_BUFFER]; /**< Excitation buffer */
214  float old_qlsp[NB_ORDER]; /**< Quantized LSPs for previous frame */
215  float interp_qlpc[NB_ORDER]; /**< Interpolated quantized LPCs */
216  float mem_sp[NB_ORDER]; /**< Filter memory for synthesis signal */
219  float pi_gain[NB_NB_SUBFRAMES]; /**< Gain of LPC filter at theta=pi (fe/2) */
220  float exc_rms[NB_NB_SUBFRAMES]; /**< RMS of excitation per subframe */
221 } DecoderState;
222 
223 /* Default handler for user callbacks: skip it */
224 static int speex_default_user_handler(GetBitContext *gb, void *state, void *data)
225 {
226  const int req_size = get_bits(gb, 4);
227  skip_bits_long(gb, 5 + 8 * req_size);
228  return 0;
229 }
230 
231 typedef struct StereoState {
232  float balance; /**< Left/right balance info */
233  float e_ratio; /**< Ratio of energies: E(left+right)/[E(left)+E(right)] */
234  float smooth_left; /**< Smoothed left channel gain */
235  float smooth_right; /**< Smoothed right channel gain */
236 } StereoState;
237 
238 typedef struct SpeexContext {
239  AVClass *class;
241 
242  int32_t version_id; /**< Version for Speex (for checking compatibility) */
243  int32_t rate; /**< Sampling rate used */
244  int32_t mode; /**< Mode used (0 for narrowband, 1 for wideband) */
245  int32_t bitstream_version; /**< Version ID of the bit-stream */
246  int32_t nb_channels; /**< Number of channels decoded */
247  int32_t bitrate; /**< Bit-rate used */
248  int32_t frame_size; /**< Size of frames */
249  int32_t vbr; /**< 1 for a VBR decoding, 0 otherwise */
250  int32_t frames_per_packet; /**< Number of frames stored per Ogg packet */
251  int32_t extra_headers; /**< Number of additional headers after the comments */
252 
253  int pkt_size;
254 
257 
259 } SpeexContext;
260 
261 static void lsp_unquant_lbr(float *lsp, int order, GetBitContext *gb)
262 {
263  int id;
264 
265  for (int i = 0; i < order; i++)
266  lsp[i] = LSP_LINEAR(i);
267 
268  id = get_bits(gb, 6);
269  for (int i = 0; i < 10; i++)
270  lsp[i] += LSP_DIV_256(cdbk_nb[id * 10 + i]);
271 
272  id = get_bits(gb, 6);
273  for (int i = 0; i < 5; i++)
274  lsp[i] += LSP_DIV_512(cdbk_nb_low1[id * 5 + i]);
275 
276  id = get_bits(gb, 6);
277  for (int i = 0; i < 5; i++)
278  lsp[i + 5] += LSP_DIV_512(cdbk_nb_high1[id * 5 + i]);
279 }
280 
281 static void forced_pitch_unquant(float *exc, float *exc_out, int start, int end,
282  float pitch_coef, const void *par, int nsf,
283  int *pitch_val, float *gain_val, GetBitContext *gb, int count_lost,
284  int subframe_offset, float last_pitch_gain, int cdbk_offset)
285 {
286  av_assert0(!isnan(pitch_coef));
287  pitch_coef = fminf(pitch_coef, .99f);
288  for (int i = 0; i < nsf; i++) {
289  exc_out[i] = exc[i - start] * pitch_coef;
290  exc[i] = exc_out[i];
291  }
292  pitch_val[0] = start;
293  gain_val[0] = gain_val[2] = 0.f;
294  gain_val[1] = pitch_coef;
295 }
296 
297 static inline float speex_rand(float std, uint32_t *seed)
298 {
299  const uint32_t jflone = 0x3f800000;
300  const uint32_t jflmsk = 0x007fffff;
301  float fran;
302  uint32_t ran;
303  seed[0] = 1664525 * seed[0] + 1013904223;
304  ran = jflone | (jflmsk & seed[0]);
305  fran = av_int2float(ran);
306  fran -= 1.5f;
307  fran *= std;
308  return fran;
309 }
310 
311 static void noise_codebook_unquant(float *exc, const void *par, int nsf,
312  GetBitContext *gb, uint32_t *seed)
313 {
314  for (int i = 0; i < nsf; i++)
315  exc[i] = speex_rand(1.f, seed);
316 }
317 
318 static void split_cb_shape_sign_unquant(float *exc, const void *par, int nsf,
319  GetBitContext *gb, uint32_t *seed)
320 {
321  int subvect_size, nb_subvect, have_sign, shape_bits;
322  const SplitCodebookParams *params;
323  const signed char *shape_cb;
324  int signs[10], ind[10];
325 
326  params = par;
327  subvect_size = params->subvect_size;
328  nb_subvect = params->nb_subvect;
329 
330  shape_cb = params->shape_cb;
331  have_sign = params->have_sign;
332  shape_bits = params->shape_bits;
333 
334  /* Decode codewords and gains */
335  for (int i = 0; i < nb_subvect; i++) {
336  signs[i] = have_sign ? get_bits1(gb) : 0;
337  ind[i] = get_bitsz(gb, shape_bits);
338  }
339  /* Compute decoded excitation */
340  for (int i = 0; i < nb_subvect; i++) {
341  const float s = signs[i] ? -1.f : 1.f;
342 
343  for (int j = 0; j < subvect_size; j++)
344  exc[subvect_size * i + j] += s * 0.03125f * shape_cb[ind[i] * subvect_size + j];
345  }
346 }
347 
348 #define SUBMODE(x) st->submodes[st->submodeID]->x
349 
350 #define gain_3tap_to_1tap(g) (FFABS(g[1]) + (g[0] > 0.f ? g[0] : -.5f * g[0]) + (g[2] > 0.f ? g[2] : -.5f * g[2]))
351 
352 static void
353 pitch_unquant_3tap(float *exc, float *exc_out, int start, int end, float pitch_coef,
354  const void *par, int nsf, int *pitch_val, float *gain_val, GetBitContext *gb,
355  int count_lost, int subframe_offset, float last_pitch_gain, int cdbk_offset)
356 {
357  int pitch, gain_index, gain_cdbk_size;
358  const int8_t *gain_cdbk;
359  const LtpParam *params;
360  float gain[3];
361 
362  params = (const LtpParam *)par;
363  gain_cdbk_size = 1 << params->gain_bits;
364  gain_cdbk = params->gain_cdbk + 4 * gain_cdbk_size * cdbk_offset;
365 
366  pitch = get_bitsz(gb, params->pitch_bits);
367  pitch += start;
368  gain_index = get_bitsz(gb, params->gain_bits);
369  gain[0] = 0.015625f * gain_cdbk[gain_index * 4] + .5f;
370  gain[1] = 0.015625f * gain_cdbk[gain_index * 4 + 1] + .5f;
371  gain[2] = 0.015625f * gain_cdbk[gain_index * 4 + 2] + .5f;
372 
373  if (count_lost && pitch > subframe_offset) {
374  float tmp = count_lost < 4 ? last_pitch_gain : 0.5f * last_pitch_gain;
375  float gain_sum;
376 
377  tmp = fminf(tmp, .95f);
378  gain_sum = gain_3tap_to_1tap(gain);
379 
380  if (gain_sum > tmp && gain_sum > 0.f) {
381  float fact = tmp / gain_sum;
382  for (int i = 0; i < 3; i++)
383  gain[i] *= fact;
384  }
385  }
386 
387  pitch_val[0] = pitch;
388  gain_val[0] = gain[0];
389  gain_val[1] = gain[1];
390  gain_val[2] = gain[2];
391  SPEEX_MEMSET(exc_out, 0, nsf);
392 
393  for (int i = 0; i < 3; i++) {
394  int tmp1, tmp3;
395  int pp = pitch + 1 - i;
396  tmp1 = nsf;
397  if (tmp1 > pp)
398  tmp1 = pp;
399  for (int j = 0; j < tmp1; j++)
400  exc_out[j] += gain[2 - i] * exc[j - pp];
401  tmp3 = nsf;
402  if (tmp3 > pp + pitch)
403  tmp3 = pp + pitch;
404  for (int j = tmp1; j < tmp3; j++)
405  exc_out[j] += gain[2 - i] * exc[j - pp - pitch];
406  }
407 }
408 
409 static void lsp_unquant_nb(float *lsp, int order, GetBitContext *gb)
410 {
411  int id;
412 
413  for (int i = 0; i < order; i++)
414  lsp[i] = LSP_LINEAR(i);
415 
416  id = get_bits(gb, 6);
417  for (int i = 0; i < 10; i++)
418  lsp[i] += LSP_DIV_256(cdbk_nb[id * 10 + i]);
419 
420  id = get_bits(gb, 6);
421  for (int i = 0; i < 5; i++)
422  lsp[i] += LSP_DIV_512(cdbk_nb_low1[id * 5 + i]);
423 
424  id = get_bits(gb, 6);
425  for (int i = 0; i < 5; i++)
426  lsp[i] += LSP_DIV_1024(cdbk_nb_low2[id * 5 + i]);
427 
428  id = get_bits(gb, 6);
429  for (int i = 0; i < 5; i++)
430  lsp[i + 5] += LSP_DIV_512(cdbk_nb_high1[id * 5 + i]);
431 
432  id = get_bits(gb, 6);
433  for (int i = 0; i < 5; i++)
434  lsp[i + 5] += LSP_DIV_1024(cdbk_nb_high2[id * 5 + i]);
435 }
436 
437 static void lsp_unquant_high(float *lsp, int order, GetBitContext *gb)
438 {
439  int id;
440 
441  for (int i = 0; i < order; i++)
442  lsp[i] = LSP_LINEAR_HIGH(i);
443 
444  id = get_bits(gb, 6);
445  for (int i = 0; i < order; i++)
446  lsp[i] += LSP_DIV_256(high_lsp_cdbk[id * order + i]);
447 
448  id = get_bits(gb, 6);
449  for (int i = 0; i < order; i++)
450  lsp[i] += LSP_DIV_512(high_lsp_cdbk2[id * order + i]);
451 }
452 
453 /* 2150 bps "vocoder-like" mode for comfort noise */
454 static const SpeexSubmode nb_submode1 = {
457 };
458 
459 /* 5.95 kbps very low bit-rate mode */
460 static const SpeexSubmode nb_submode2 = {
463 };
464 
465 /* 8 kbps low bit-rate mode */
466 static const SpeexSubmode nb_submode3 = {
469 };
470 
471 /* 11 kbps medium bit-rate mode */
472 static const SpeexSubmode nb_submode4 = {
475 };
476 
477 /* 15 kbps high bit-rate mode */
478 static const SpeexSubmode nb_submode5 = {
481 };
482 
483 /* 18.2 high bit-rate mode */
484 static const SpeexSubmode nb_submode6 = {
487 };
488 
489 /* 24.6 kbps high bit-rate mode */
490 static const SpeexSubmode nb_submode7 = {
493 };
494 
495 /* 3.95 kbps very low bit-rate mode */
496 static const SpeexSubmode nb_submode8 = {
499 };
500 
501 static const SpeexSubmode wb_submode1 = {
502  0, 0, 1, 0, lsp_unquant_high, NULL, NULL,
503  NULL, NULL, -1.f
504 };
505 
506 static const SpeexSubmode wb_submode2 = {
507  0, 0, 1, 0, lsp_unquant_high, NULL, NULL,
509 };
510 
511 static const SpeexSubmode wb_submode3 = {
512  0, 0, 1, 0, lsp_unquant_high, NULL, NULL,
514 };
515 
516 static const SpeexSubmode wb_submode4 = {
517  0, 0, 1, 1, lsp_unquant_high, NULL, NULL,
519 };
520 
521 static int nb_decode(AVCodecContext *, void *, GetBitContext *, float *);
522 static int sb_decode(AVCodecContext *, void *, GetBitContext *, float *);
523 
525  {
526  .modeID = 0,
527  .decode = nb_decode,
528  .frame_size = NB_FRAME_SIZE,
529  .subframe_size = NB_SUBFRAME_SIZE,
530  .lpc_size = NB_ORDER,
531  .submodes = {
534  },
535  .default_submode = 5,
536  },
537  {
538  .modeID = 1,
539  .decode = sb_decode,
540  .frame_size = NB_FRAME_SIZE,
541  .subframe_size = NB_SUBFRAME_SIZE,
542  .lpc_size = 8,
543  .folding_gain = 0.9f,
544  .submodes = {
546  },
547  .default_submode = 3,
548  },
549  {
550  .modeID = 2,
551  .decode = sb_decode,
552  .frame_size = 320,
553  .subframe_size = 80,
554  .lpc_size = 8,
555  .folding_gain = 0.7f,
556  .submodes = {
557  NULL, &wb_submode1
558  },
559  .default_submode = 1,
560  },
561 };
562 
563 static float compute_rms(const float *x, int len)
564 {
565  float sum = 0.f;
566 
567  for (int i = 0; i < len; i++)
568  sum += x[i] * x[i];
569 
570  av_assert0(len > 0);
571  return sqrtf(.1f + sum / len);
572 }
573 
574 static void bw_lpc(float gamma, const float *lpc_in,
575  float *lpc_out, int order)
576 {
577  float tmp = gamma;
578 
579  for (int i = 0; i < order; i++) {
580  lpc_out[i] = tmp * lpc_in[i];
581  tmp *= gamma;
582  }
583 }
584 
585 static void iir_mem(const float *x, const float *den,
586  float *y, int N, int ord, float *mem)
587 {
588  for (int i = 0; i < N; i++) {
589  float yi = x[i] + mem[0];
590  float nyi = -yi;
591  for (int j = 0; j < ord - 1; j++)
592  mem[j] = mem[j + 1] + den[j] * nyi;
593  mem[ord - 1] = den[ord - 1] * nyi;
594  y[i] = yi;
595  }
596 }
597 
598 static void highpass(const float *x, float *y, int len, float *mem, int wide)
599 {
600  static const float Pcoef[2][3] = {{ 1.00000f, -1.92683f, 0.93071f }, { 1.00000f, -1.97226f, 0.97332f } };
601  static const float Zcoef[2][3] = {{ 0.96446f, -1.92879f, 0.96446f }, { 0.98645f, -1.97277f, 0.98645f } };
602  const float *den, *num;
603 
604  den = Pcoef[wide];
605  num = Zcoef[wide];
606  for (int i = 0; i < len; i++) {
607  float yi = num[0] * x[i] + mem[0];
608  mem[0] = mem[1] + num[1] * x[i] + -den[1] * yi;
609  mem[1] = num[2] * x[i] + -den[2] * yi;
610  y[i] = yi;
611  }
612 }
613 
614 #define median3(a, b, c) \
615  ((a) < (b) ? ((b) < (c) ? (b) : ((a) < (c) ? (c) : (a))) \
616  : ((c) < (b) ? (b) : ((c) < (a) ? (c) : (a))))
617 
618 static int speex_std_stereo(GetBitContext *gb, void *state, void *data)
619 {
620  StereoState *stereo = data;
621  float sign = get_bits1(gb) ? -1.f : 1.f;
622 
623  stereo->balance = exp(sign * .25f * get_bits(gb, 5));
624  stereo->e_ratio = e_ratio_quant[get_bits(gb, 2)];
625 
626  return 0;
627 }
628 
629 static int speex_inband_handler(GetBitContext *gb, void *state, StereoState *stereo)
630 {
631  int id = get_bits(gb, 4);
632 
633  if (id == SPEEX_INBAND_STEREO) {
634  return speex_std_stereo(gb, state, stereo);
635  } else {
636  int adv;
637 
638  if (id < 2)
639  adv = 1;
640  else if (id < 8)
641  adv = 4;
642  else if (id < 10)
643  adv = 8;
644  else if (id < 12)
645  adv = 16;
646  else if (id < 14)
647  adv = 32;
648  else
649  adv = 64;
650  skip_bits_long(gb, adv);
651  }
652  return 0;
653 }
654 
655 static void sanitize_values(float *vec, float min_val, float max_val, int len)
656 {
657  for (int i = 0; i < len; i++) {
658  if (!isnormal(vec[i]) || fabsf(vec[i]) < 1e-8f)
659  vec[i] = 0.f;
660  else
661  vec[i] = av_clipf(vec[i], min_val, max_val);
662  }
663 }
664 
665 static void signal_mul(const float *x, float *y, float scale, int len)
666 {
667  for (int i = 0; i < len; i++)
668  y[i] = scale * x[i];
669 }
670 
671 static float inner_prod(const float *x, const float *y, int len)
672 {
673  float sum = 0.f;
674 
675  for (int i = 0; i < len; i += 8) {
676  float part = 0.f;
677  part += x[i + 0] * y[i + 0];
678  part += x[i + 1] * y[i + 1];
679  part += x[i + 2] * y[i + 2];
680  part += x[i + 3] * y[i + 3];
681  part += x[i + 4] * y[i + 4];
682  part += x[i + 5] * y[i + 5];
683  part += x[i + 6] * y[i + 6];
684  part += x[i + 7] * y[i + 7];
685  sum += part;
686  }
687 
688  return sum;
689 }
690 
691 static int interp_pitch(const float *exc, float *interp, int pitch, int len)
692 {
693  float corr[4][7], maxcorr;
694  int maxi, maxj;
695 
696  for (int i = 0; i < 7; i++)
697  corr[0][i] = inner_prod(exc, exc - pitch - 3 + i, len);
698  for (int i = 0; i < 3; i++) {
699  for (int j = 0; j < 7; j++) {
700  int i1, i2;
701  float tmp = 0.f;
702 
703  i1 = 3 - j;
704  if (i1 < 0)
705  i1 = 0;
706  i2 = 10 - j;
707  if (i2 > 7)
708  i2 = 7;
709  for (int k = i1; k < i2; k++)
710  tmp += shift_filt[i][k] * corr[0][j + k - 3];
711  corr[i + 1][j] = tmp;
712  }
713  }
714  maxi = maxj = 0;
715  maxcorr = corr[0][0];
716  for (int i = 0; i < 4; i++) {
717  for (int j = 0; j < 7; j++) {
718  if (corr[i][j] > maxcorr) {
719  maxcorr = corr[i][j];
720  maxi = i;
721  maxj = j;
722  }
723  }
724  }
725  for (int i = 0; i < len; i++) {
726  float tmp = 0.f;
727  if (maxi > 0.f) {
728  for (int k = 0; k < 7; k++)
729  tmp += exc[i - (pitch - maxj + 3) + k - 3] * shift_filt[maxi - 1][k];
730  } else {
731  tmp = exc[i - (pitch - maxj + 3)];
732  }
733  interp[i] = tmp;
734  }
735  return pitch - maxj + 3;
736 }
737 
738 static void multicomb(const float *exc, float *new_exc, float *ak, int p, int nsf,
739  int pitch, int max_pitch, float comb_gain)
740 {
741  float old_ener, new_ener;
742  float iexc0_mag, iexc1_mag, exc_mag;
743  float iexc[4 * NB_SUBFRAME_SIZE];
744  float corr0, corr1, gain0, gain1;
745  float pgain1, pgain2;
746  float c1, c2, g1, g2;
747  float ngain, gg1, gg2;
748  int corr_pitch = pitch;
749 
750  interp_pitch(exc, iexc, corr_pitch, 80);
751  if (corr_pitch > max_pitch)
752  interp_pitch(exc, iexc + nsf, 2 * corr_pitch, 80);
753  else
754  interp_pitch(exc, iexc + nsf, -corr_pitch, 80);
755 
756  iexc0_mag = sqrtf(1000.f + inner_prod(iexc, iexc, nsf));
757  iexc1_mag = sqrtf(1000.f + inner_prod(iexc + nsf, iexc + nsf, nsf));
758  exc_mag = sqrtf(1.f + inner_prod(exc, exc, nsf));
759  corr0 = inner_prod(iexc, exc, nsf);
760  corr1 = inner_prod(iexc + nsf, exc, nsf);
761  if (corr0 > iexc0_mag * exc_mag)
762  pgain1 = 1.f;
763  else
764  pgain1 = (corr0 / exc_mag) / iexc0_mag;
765  if (corr1 > iexc1_mag * exc_mag)
766  pgain2 = 1.f;
767  else
768  pgain2 = (corr1 / exc_mag) / iexc1_mag;
769  gg1 = exc_mag / iexc0_mag;
770  gg2 = exc_mag / iexc1_mag;
771  if (comb_gain > 0.f) {
772  c1 = .4f * comb_gain + .07f;
773  c2 = .5f + 1.72f * (c1 - .07f);
774  } else {
775  c1 = c2 = 0.f;
776  }
777  g1 = 1.f - c2 * pgain1 * pgain1;
778  g2 = 1.f - c2 * pgain2 * pgain2;
779  g1 = fmaxf(g1, c1);
780  g2 = fmaxf(g2, c1);
781  g1 = c1 / g1;
782  g2 = c1 / g2;
783 
784  if (corr_pitch > max_pitch) {
785  gain0 = .7f * g1 * gg1;
786  gain1 = .3f * g2 * gg2;
787  } else {
788  gain0 = .6f * g1 * gg1;
789  gain1 = .6f * g2 * gg2;
790  }
791  for (int i = 0; i < nsf; i++)
792  new_exc[i] = exc[i] + (gain0 * iexc[i]) + (gain1 * iexc[i + nsf]);
793  new_ener = compute_rms(new_exc, nsf);
794  old_ener = compute_rms(exc, nsf);
795 
796  old_ener = fmaxf(old_ener, 1.f);
797  new_ener = fmaxf(new_ener, 1.f);
798  old_ener = fminf(old_ener, new_ener);
799  ngain = old_ener / new_ener;
800 
801  for (int i = 0; i < nsf; i++)
802  new_exc[i] *= ngain;
803 }
804 
805 static void lsp_interpolate(const float *old_lsp, const float *new_lsp,
806  float *lsp, int len, int subframe,
807  int nb_subframes, float margin)
808 {
809  const float tmp = (1.f + subframe) / nb_subframes;
810 
811  for (int i = 0; i < len; i++) {
812  lsp[i] = (1.f - tmp) * old_lsp[i] + tmp * new_lsp[i];
813  lsp[i] = av_clipf(lsp[i], margin, M_PI - margin);
814  }
815  for (int i = 1; i < len - 1; i++) {
816  lsp[i] = fmaxf(lsp[i], lsp[i - 1] + margin);
817  if (lsp[i] > lsp[i + 1] - margin)
818  lsp[i] = .5f * (lsp[i] + lsp[i + 1] - margin);
819  }
820 }
821 
822 static void lsp_to_lpc(const float *freq, float *ak, int lpcrdr)
823 {
824  float xout1, xout2, xin1, xin2;
825  float *pw, *n0;
826  float Wp[4 * NB_ORDER + 2] = { 0 };
827  float x_freq[NB_ORDER];
828  const int m = lpcrdr >> 1;
829 
830  pw = Wp;
831 
832  xin1 = xin2 = 1.f;
833 
834  for (int i = 0; i < lpcrdr; i++)
835  x_freq[i] = -cosf(freq[i]);
836 
837  /* reconstruct P(z) and Q(z) by cascading second order
838  * polynomials in form 1 - 2xz(-1) +z(-2), where x is the
839  * LSP coefficient
840  */
841  for (int j = 0; j <= lpcrdr; j++) {
842  int i2 = 0;
843  for (int i = 0; i < m; i++, i2 += 2) {
844  n0 = pw + (i * 4);
845  xout1 = xin1 + 2.f * x_freq[i2 ] * n0[0] + n0[1];
846  xout2 = xin2 + 2.f * x_freq[i2 + 1] * n0[2] + n0[3];
847  n0[1] = n0[0];
848  n0[3] = n0[2];
849  n0[0] = xin1;
850  n0[2] = xin2;
851  xin1 = xout1;
852  xin2 = xout2;
853  }
854  xout1 = xin1 + n0[4];
855  xout2 = xin2 - n0[5];
856  if (j > 0)
857  ak[j - 1] = (xout1 + xout2) * 0.5f;
858  n0[4] = xin1;
859  n0[5] = xin2;
860 
861  xin1 = 0.f;
862  xin2 = 0.f;
863  }
864 }
865 
866 static int nb_decode(AVCodecContext *avctx, void *ptr_st,
867  GetBitContext *gb, float *out)
868 {
869  DecoderState *st = ptr_st;
870  float ol_gain = 0, ol_pitch_coef = 0, best_pitch_gain = 0, pitch_average = 0;
871  int m, pitch, wideband, ol_pitch = 0, best_pitch = 40;
872  SpeexContext *s = avctx->priv_data;
873  float innov[NB_SUBFRAME_SIZE];
874  float exc32[NB_SUBFRAME_SIZE];
875  float interp_qlsp[NB_ORDER];
876  float qlsp[NB_ORDER];
877  float ak[NB_ORDER];
878  float pitch_gain[3] = { 0 };
879 
880  st->exc = st->exc_buf + 2 * NB_PITCH_END + NB_SUBFRAME_SIZE + 6;
881 
882  if (st->encode_submode) {
883  do { /* Search for next narrowband block (handle requests, skip wideband blocks) */
884  if (get_bits_left(gb) < 5)
885  return AVERROR_INVALIDDATA;
886  wideband = get_bits1(gb);
887  if (wideband) /* Skip wideband block (for compatibility) */ {
888  int submode, advance;
889 
890  submode = get_bits(gb, SB_SUBMODE_BITS);
891  advance = wb_skip_table[submode];
892  advance -= SB_SUBMODE_BITS + 1;
893  if (advance < 0)
894  return AVERROR_INVALIDDATA;
895  skip_bits_long(gb, advance);
896 
897  if (get_bits_left(gb) < 5)
898  return AVERROR_INVALIDDATA;
899  wideband = get_bits1(gb);
900  if (wideband) {
901  submode = get_bits(gb, SB_SUBMODE_BITS);
902  advance = wb_skip_table[submode];
903  advance -= SB_SUBMODE_BITS + 1;
904  if (advance < 0)
905  return AVERROR_INVALIDDATA;
906  skip_bits_long(gb, advance);
907  wideband = get_bits1(gb);
908  if (wideband) {
909  av_log(avctx, AV_LOG_ERROR, "more than two wideband layers found\n");
910  return AVERROR_INVALIDDATA;
911  }
912  }
913  }
914  if (get_bits_left(gb) < 4)
915  return AVERROR_INVALIDDATA;
916  m = get_bits(gb, 4);
917  if (m == 15) /* We found a terminator */ {
918  return AVERROR_INVALIDDATA;
919  } else if (m == 14) /* Speex in-band request */ {
920  int ret = speex_inband_handler(gb, st, &s->stereo);
921  if (ret)
922  return ret;
923  } else if (m == 13) /* User in-band request */ {
924  int ret = speex_default_user_handler(gb, st, NULL);
925  if (ret)
926  return ret;
927  } else if (m > 8) /* Invalid mode */ {
928  return AVERROR_INVALIDDATA;
929  }
930  } while (m > 8);
931 
932  st->submodeID = m; /* Get the sub-mode that was used */
933  }
934 
935  /* Shift all buffers by one frame */
936  memmove(st->exc_buf, st->exc_buf + NB_FRAME_SIZE, (2 * NB_PITCH_END + NB_SUBFRAME_SIZE + 12) * sizeof(float));
937 
938  /* If null mode (no transmission), just set a couple things to zero */
939  if (st->submodes[st->submodeID] == NULL) {
940  float lpc[NB_ORDER];
941  float innov_gain = 0.f;
942 
943  bw_lpc(0.93f, st->interp_qlpc, lpc, NB_ORDER);
944  innov_gain = compute_rms(st->exc, NB_FRAME_SIZE);
945  for (int i = 0; i < NB_FRAME_SIZE; i++)
946  st->exc[i] = speex_rand(innov_gain, &st->seed);
947 
948  /* Final signal synthesis from excitation */
949  iir_mem(st->exc, lpc, out, NB_FRAME_SIZE, NB_ORDER, st->mem_sp);
950  st->count_lost = 0;
951 
952  return 0;
953  }
954 
955  /* Unquantize LSPs */
956  SUBMODE(lsp_unquant)(qlsp, NB_ORDER, gb);
957 
958  /* Damp memory if a frame was lost and the LSP changed too much */
959  if (st->count_lost) {
960  float fact, lsp_dist = 0;
961 
962  for (int i = 0; i < NB_ORDER; i++)
963  lsp_dist = lsp_dist + FFABS(st->old_qlsp[i] - qlsp[i]);
964  fact = .6f * exp(-.2f * lsp_dist);
965  for (int i = 0; i < NB_ORDER; i++)
966  st->mem_sp[i] = fact * st->mem_sp[i];
967  }
968 
969  /* Handle first frame and lost-packet case */
970  if (st->first || st->count_lost)
971  memcpy(st->old_qlsp, qlsp, sizeof(st->old_qlsp));
972 
973  /* Get open-loop pitch estimation for low bit-rate pitch coding */
974  if (SUBMODE(lbr_pitch) != -1)
975  ol_pitch = NB_PITCH_START + get_bits(gb, 7);
976 
977  if (SUBMODE(forced_pitch_gain))
978  ol_pitch_coef = 0.066667f * get_bits(gb, 4);
979 
980  /* Get global excitation gain */
981  ol_gain = expf(get_bits(gb, 5) / 3.5f);
982 
983  if (st->submodeID == 1)
984  st->dtx_enabled = get_bits(gb, 4) == 15;
985 
986  if (st->submodeID > 1)
987  st->dtx_enabled = 0;
988 
989  for (int sub = 0; sub < NB_NB_SUBFRAMES; sub++) { /* Loop on subframes */
990  float *exc, *innov_save = NULL, tmp, ener;
991  int pit_min, pit_max, offset, q_energy;
992 
993  offset = NB_SUBFRAME_SIZE * sub; /* Offset relative to start of frame */
994  exc = st->exc + offset; /* Excitation */
995  if (st->innov_save) /* Original signal */
996  innov_save = st->innov_save + offset;
997 
998  SPEEX_MEMSET(exc, 0, NB_SUBFRAME_SIZE); /* Reset excitation */
999 
1000  /* Adaptive codebook contribution */
1001  av_assert0(SUBMODE(ltp_unquant));
1002  /* Handle pitch constraints if any */
1003  if (SUBMODE(lbr_pitch) != -1) {
1004  int margin = SUBMODE(lbr_pitch);
1005 
1006  if (margin) {
1007  pit_min = ol_pitch - margin + 1;
1008  pit_min = FFMAX(pit_min, NB_PITCH_START);
1009  pit_max = ol_pitch + margin;
1010  pit_max = FFMIN(pit_max, NB_PITCH_START);
1011  } else {
1012  pit_min = pit_max = ol_pitch;
1013  }
1014  } else {
1015  pit_min = NB_PITCH_START;
1016  pit_max = NB_PITCH_END;
1017  }
1018 
1019  SUBMODE(ltp_unquant)(exc, exc32, pit_min, pit_max, ol_pitch_coef, SUBMODE(LtpParam),
1020  NB_SUBFRAME_SIZE, &pitch, pitch_gain, gb, st->count_lost, offset,
1021  st->last_pitch_gain, 0);
1022 
1023  sanitize_values(exc32, -32000, 32000, NB_SUBFRAME_SIZE);
1024 
1025  tmp = gain_3tap_to_1tap(pitch_gain);
1026 
1027  pitch_average += tmp;
1028  if ((tmp > best_pitch_gain &&
1029  FFABS(2 * best_pitch - pitch) >= 3 &&
1030  FFABS(3 * best_pitch - pitch) >= 4 &&
1031  FFABS(4 * best_pitch - pitch) >= 5) ||
1032  (tmp > .6f * best_pitch_gain &&
1033  (FFABS(best_pitch - 2 * pitch) < 3 ||
1034  FFABS(best_pitch - 3 * pitch) < 4 ||
1035  FFABS(best_pitch - 4 * pitch) < 5)) ||
1036  ((.67f * tmp) > best_pitch_gain &&
1037  (FFABS(2 * best_pitch - pitch) < 3 ||
1038  FFABS(3 * best_pitch - pitch) < 4 ||
1039  FFABS(4 * best_pitch - pitch) < 5))) {
1040  best_pitch = pitch;
1041  if (tmp > best_pitch_gain)
1042  best_pitch_gain = tmp;
1043  }
1044 
1045  memset(innov, 0, sizeof(innov));
1046 
1047  /* Decode sub-frame gain correction */
1048  if (SUBMODE(have_subframe_gain) == 3) {
1049  q_energy = get_bits(gb, 3);
1050  ener = exc_gain_quant_scal3[q_energy] * ol_gain;
1051  } else if (SUBMODE(have_subframe_gain) == 1) {
1052  q_energy = get_bits1(gb);
1053  ener = exc_gain_quant_scal1[q_energy] * ol_gain;
1054  } else {
1055  ener = ol_gain;
1056  }
1057 
1058  av_assert0(SUBMODE(innovation_unquant));
1059  /* Fixed codebook contribution */
1060  SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), NB_SUBFRAME_SIZE, gb, &st->seed);
1061  /* De-normalize innovation and update excitation */
1062 
1063  signal_mul(innov, innov, ener, NB_SUBFRAME_SIZE);
1064 
1065  /* Decode second codebook (only for some modes) */
1066  if (SUBMODE(double_codebook)) {
1067  float innov2[NB_SUBFRAME_SIZE] = { 0 };
1068 
1069  SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), NB_SUBFRAME_SIZE, gb, &st->seed);
1070  signal_mul(innov2, innov2, 0.454545f * ener, NB_SUBFRAME_SIZE);
1071  for (int i = 0; i < NB_SUBFRAME_SIZE; i++)
1072  innov[i] += innov2[i];
1073  }
1074  for (int i = 0; i < NB_SUBFRAME_SIZE; i++)
1075  exc[i] = exc32[i] + innov[i];
1076  if (innov_save)
1077  memcpy(innov_save, innov, sizeof(innov));
1078 
1079  /* Vocoder mode */
1080  if (st->submodeID == 1) {
1081  float g = ol_pitch_coef;
1082 
1083  g = av_clipf(1.5f * (g - .2f), 0.f, 1.f);
1084 
1085  SPEEX_MEMSET(exc, 0, NB_SUBFRAME_SIZE);
1086  while (st->voc_offset < NB_SUBFRAME_SIZE) {
1087  if (st->voc_offset >= 0)
1088  exc[st->voc_offset] = sqrtf(2.f * ol_pitch) * (g * ol_gain);
1089  st->voc_offset += ol_pitch;
1090  }
1092 
1093  for (int i = 0; i < NB_SUBFRAME_SIZE; i++) {
1094  float exci = exc[i];
1095  exc[i] = (.7f * exc[i] + .3f * st->voc_m1) + ((1.f - .85f * g) * innov[i]) - .15f * g * st->voc_m2;
1096  st->voc_m1 = exci;
1097  st->voc_m2 = innov[i];
1098  st->voc_mean = .8f * st->voc_mean + .2f * exc[i];
1099  exc[i] -= st->voc_mean;
1100  }
1101  }
1102  }
1103 
1104  if (st->lpc_enh_enabled && SUBMODE(comb_gain) > 0 && !st->count_lost) {
1106  2 * NB_SUBFRAME_SIZE, best_pitch, 40, SUBMODE(comb_gain));
1108  st->interp_qlpc, NB_ORDER, 2 * NB_SUBFRAME_SIZE, best_pitch, 40,
1109  SUBMODE(comb_gain));
1110  } else {
1112  }
1113 
1114  /* If the last packet was lost, re-scale the excitation to obtain the same
1115  * energy as encoded in ol_gain */
1116  if (st->count_lost) {
1117  float exc_ener, gain;
1118 
1119  exc_ener = compute_rms(st->exc, NB_FRAME_SIZE);
1120  av_assert0(exc_ener + 1.f > 0.f);
1121  gain = fminf(ol_gain / (exc_ener + 1.f), 2.f);
1122  for (int i = 0; i < NB_FRAME_SIZE; i++) {
1123  st->exc[i] *= gain;
1124  out[i] = st->exc[i - NB_SUBFRAME_SIZE];
1125  }
1126  }
1127 
1128  for (int sub = 0; sub < NB_NB_SUBFRAMES; sub++) { /* Loop on subframes */
1129  const int offset = NB_SUBFRAME_SIZE * sub; /* Offset relative to start of frame */
1130  float pi_g = 1.f, *sp = out + offset; /* Original signal */
1131 
1132  lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, NB_ORDER, sub, NB_NB_SUBFRAMES, 0.002f);
1133  lsp_to_lpc(interp_qlsp, ak, NB_ORDER); /* Compute interpolated LPCs (unquantized) */
1134 
1135  for (int i = 0; i < NB_ORDER; i += 2) /* Compute analysis filter at w=pi */
1136  pi_g += ak[i + 1] - ak[i];
1137  st->pi_gain[sub] = pi_g;
1138  st->exc_rms[sub] = compute_rms(st->exc + offset, NB_SUBFRAME_SIZE);
1139 
1141 
1142  memcpy(st->interp_qlpc, ak, sizeof(st->interp_qlpc));
1143  }
1144 
1145  if (st->highpass_enabled)
1147 
1148  /* Store the LSPs for interpolation in the next frame */
1149  memcpy(st->old_qlsp, qlsp, sizeof(st->old_qlsp));
1150 
1151  st->count_lost = 0;
1152  st->last_pitch = best_pitch;
1153  st->last_pitch_gain = .25f * pitch_average;
1154  st->last_ol_gain = ol_gain;
1155  st->first = 0;
1156 
1157  return 0;
1158 }
1159 
1160 static void qmf_synth(const float *x1, const float *x2, const float *a, float *y, int N, int M, float *mem1, float *mem2)
1161 {
1162  const int M2 = M >> 1, N2 = N >> 1;
1163  float xx1[352], xx2[352];
1164 
1165  for (int i = 0; i < N2; i++)
1166  xx1[i] = x1[N2-1-i];
1167  for (int i = 0; i < M2; i++)
1168  xx1[N2+i] = mem1[2*i+1];
1169  for (int i = 0; i < N2; i++)
1170  xx2[i] = x2[N2-1-i];
1171  for (int i = 0; i < M2; i++)
1172  xx2[N2+i] = mem2[2*i+1];
1173 
1174  for (int i = 0; i < N2; i += 2) {
1175  float y0, y1, y2, y3;
1176  float x10, x20;
1177 
1178  y0 = y1 = y2 = y3 = 0.f;
1179  x10 = xx1[N2-2-i];
1180  x20 = xx2[N2-2-i];
1181 
1182  for (int j = 0; j < M2; j += 2) {
1183  float x11, x21;
1184  float a0, a1;
1185 
1186  a0 = a[2*j];
1187  a1 = a[2*j+1];
1188  x11 = xx1[N2-1+j-i];
1189  x21 = xx2[N2-1+j-i];
1190 
1191  y0 += a0 * (x11-x21);
1192  y1 += a1 * (x11+x21);
1193  y2 += a0 * (x10-x20);
1194  y3 += a1 * (x10+x20);
1195  a0 = a[2*j+2];
1196  a1 = a[2*j+3];
1197  x10 = xx1[N2+j-i];
1198  x20 = xx2[N2+j-i];
1199 
1200  y0 += a0 * (x10-x20);
1201  y1 += a1 * (x10+x20);
1202  y2 += a0 * (x11-x21);
1203  y3 += a1 * (x11+x21);
1204  }
1205  y[2 * i ] = 2.f * y0;
1206  y[2 * i+1] = 2.f * y1;
1207  y[2 * i+2] = 2.f * y2;
1208  y[2 * i+3] = 2.f * y3;
1209  }
1210 
1211  for (int i = 0; i < M2; i++)
1212  mem1[2*i+1] = xx1[i];
1213  for (int i = 0; i < M2; i++)
1214  mem2[2*i+1] = xx2[i];
1215 }
1216 
1217 static int sb_decode(AVCodecContext *avctx, void *ptr_st,
1218  GetBitContext *gb, float *out)
1219 {
1220  SpeexContext *s = avctx->priv_data;
1221  DecoderState *st = ptr_st;
1222  float low_pi_gain[NB_NB_SUBFRAMES];
1223  float low_exc_rms[NB_NB_SUBFRAMES];
1224  float interp_qlsp[NB_ORDER];
1225  int ret, wideband;
1226  float *low_innov_alias;
1227  float qlsp[NB_ORDER];
1228  float ak[NB_ORDER];
1229  const SpeexMode *mode;
1230 
1231  mode = st->mode;
1232 
1233  if (st->modeID > 0) {
1234  low_innov_alias = out + st->frame_size;
1235  s->st[st->modeID - 1].innov_save = low_innov_alias;
1236  ret = speex_modes[st->modeID - 1].decode(avctx, &s->st[st->modeID - 1], gb, out);
1237  if (ret < 0)
1238  return ret;
1239  }
1240 
1241  if (st->encode_submode) { /* Check "wideband bit" */
1242  if (get_bits_left(gb) > 0)
1243  wideband = show_bits1(gb);
1244  else
1245  wideband = 0;
1246  if (wideband) { /* Regular wideband frame, read the submode */
1247  wideband = get_bits1(gb);
1248  st->submodeID = get_bits(gb, SB_SUBMODE_BITS);
1249  } else { /* Was a narrowband frame, set "null submode" */
1250  st->submodeID = 0;
1251  }
1252  if (st->submodeID != 0 && st->submodes[st->submodeID] == NULL)
1253  return AVERROR_INVALIDDATA;
1254  }
1255 
1256  /* If null mode (no transmission), just set a couple things to zero */
1257  if (st->submodes[st->submodeID] == NULL) {
1258  for (int i = 0; i < st->frame_size; i++)
1259  out[st->frame_size + i] = 1e-15f;
1260 
1261  st->first = 1;
1262 
1263  /* Final signal synthesis from excitation */
1264  iir_mem(out + st->frame_size, st->interp_qlpc, out + st->frame_size, st->frame_size, st->lpc_size, st->mem_sp);
1265 
1266  qmf_synth(out, out + st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem);
1267 
1268  return 0;
1269  }
1270 
1271  memcpy(low_pi_gain, s->st[st->modeID - 1].pi_gain, sizeof(low_pi_gain));
1272  memcpy(low_exc_rms, s->st[st->modeID - 1].exc_rms, sizeof(low_exc_rms));
1273 
1274  SUBMODE(lsp_unquant)(qlsp, st->lpc_size, gb);
1275 
1276  if (st->first)
1277  memcpy(st->old_qlsp, qlsp, sizeof(st->old_qlsp));
1278 
1279  for (int sub = 0; sub < st->nb_subframes; sub++) {
1280  float filter_ratio, el, rl, rh;
1281  float *innov_save = NULL, *sp;
1282  float exc[80];
1283  int offset;
1284 
1285  offset = st->subframe_size * sub;
1286  sp = out + st->frame_size + offset;
1287  /* Pointer for saving innovation */
1288  if (st->innov_save) {
1289  innov_save = st->innov_save + 2 * offset;
1290  SPEEX_MEMSET(innov_save, 0, 2 * st->subframe_size);
1291  }
1292 
1293  av_assert0(st->nb_subframes > 0);
1294  lsp_interpolate(st->old_qlsp, qlsp, interp_qlsp, st->lpc_size, sub, st->nb_subframes, 0.05f);
1295  lsp_to_lpc(interp_qlsp, ak, st->lpc_size);
1296 
1297  /* Calculate reponse ratio between the low and high filter in the middle
1298  of the band (4000 Hz) */
1299  st->pi_gain[sub] = 1.f;
1300  rh = 1.f;
1301  for (int i = 0; i < st->lpc_size; i += 2) {
1302  rh += ak[i + 1] - ak[i];
1303  st->pi_gain[sub] += ak[i] + ak[i + 1];
1304  }
1305 
1306  rl = low_pi_gain[sub];
1307  filter_ratio = (rl + .01f) / (rh + .01f);
1308 
1309  SPEEX_MEMSET(exc, 0, st->subframe_size);
1310  if (!SUBMODE(innovation_unquant)) {
1311  const int x = get_bits(gb, 5);
1312  const float g = expf(.125f * (x - 10)) / filter_ratio;
1313 
1314  for (int i = 0; i < st->subframe_size; i += 2) {
1315  exc[i ] = mode->folding_gain * low_innov_alias[offset + i ] * g;
1316  exc[i + 1] = -mode->folding_gain * low_innov_alias[offset + i + 1] * g;
1317  }
1318  } else {
1319  float gc, scale;
1320 
1321  el = low_exc_rms[sub];
1322  gc = 0.87360f * gc_quant_bound[get_bits(gb, 4)];
1323 
1324  if (st->subframe_size == 80)
1325  gc *= M_SQRT2;
1326 
1327  scale = (gc * el) / filter_ratio;
1328  SUBMODE(innovation_unquant)
1329  (exc, SUBMODE(innovation_params), st->subframe_size,
1330  gb, &st->seed);
1331 
1332  signal_mul(exc, exc, scale, st->subframe_size);
1333  if (SUBMODE(double_codebook)) {
1334  float innov2[80];
1335 
1336  SPEEX_MEMSET(innov2, 0, st->subframe_size);
1337  SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframe_size, gb, &st->seed);
1338  signal_mul(innov2, innov2, 0.4f * scale, st->subframe_size);
1339  for (int i = 0; i < st->subframe_size; i++)
1340  exc[i] += innov2[i];
1341  }
1342  }
1343 
1344  if (st->innov_save) {
1345  for (int i = 0; i < st->subframe_size; i++)
1346  innov_save[2 * i] = exc[i];
1347  }
1348 
1349  iir_mem(st->exc_buf, st->interp_qlpc, sp, st->subframe_size, st->lpc_size, st->mem_sp);
1350  memcpy(st->exc_buf, exc, sizeof(exc));
1351  memcpy(st->interp_qlpc, ak, sizeof(st->interp_qlpc));
1352  st->exc_rms[sub] = compute_rms(st->exc_buf, st->subframe_size);
1353  }
1354 
1355  qmf_synth(out, out + st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem);
1356  memcpy(st->old_qlsp, qlsp, sizeof(st->old_qlsp));
1357 
1358  st->first = 0;
1359 
1360  return 0;
1361 }
1362 
1364 {
1365  st->mode = mode;
1366  st->modeID = mode->modeID;
1367 
1368  st->first = 1;
1369  st->encode_submode = 1;
1370  st->is_wideband = st->modeID > 0;
1371  st->innov_save = NULL;
1372 
1373  st->submodes = mode->submodes;
1374  st->submodeID = mode->default_submode;
1375  st->subframe_size = mode->subframe_size;
1376  st->lpc_size = mode->lpc_size;
1377  st->full_frame_size = (1 + (st->modeID > 0)) * mode->frame_size;
1378  st->nb_subframes = mode->frame_size / mode->subframe_size;
1379  st->frame_size = mode->frame_size;
1380 
1381  st->lpc_enh_enabled = 1;
1382 
1383  st->last_pitch = 40;
1384  st->count_lost = 0;
1385  st->seed = 1000;
1386  st->last_ol_gain = 0;
1387 
1388  st->voc_m1 = st->voc_m2 = st->voc_mean = 0;
1389  st->voc_offset = 0;
1390  st->dtx_enabled = 0;
1391  st->highpass_enabled = mode->modeID == 0;
1392 
1393  return 0;
1394 }
1395 
1397  const uint8_t *extradata, int extradata_size)
1398 {
1399  SpeexContext *s = avctx->priv_data;
1400  const uint8_t *buf = extradata;
1401 
1402  if (memcmp(buf, "Speex ", 8))
1403  return AVERROR_INVALIDDATA;
1404 
1405  buf += 28;
1406 
1407  s->version_id = bytestream_get_le32(&buf);
1408  buf += 4;
1409  s->rate = bytestream_get_le32(&buf);
1410  if (s->rate <= 0)
1411  return AVERROR_INVALIDDATA;
1412  s->mode = bytestream_get_le32(&buf);
1413  if (s->mode < 0 || s->mode >= SPEEX_NB_MODES)
1414  return AVERROR_INVALIDDATA;
1415  s->bitstream_version = bytestream_get_le32(&buf);
1416  if (s->bitstream_version != 4)
1417  return AVERROR_INVALIDDATA;
1418  s->nb_channels = bytestream_get_le32(&buf);
1419  if (s->nb_channels <= 0 || s->nb_channels > 2)
1420  return AVERROR_INVALIDDATA;
1421  s->bitrate = bytestream_get_le32(&buf);
1422  s->frame_size = bytestream_get_le32(&buf);
1423  if (s->frame_size < NB_FRAME_SIZE << s->mode)
1424  return AVERROR_INVALIDDATA;
1425  s->vbr = bytestream_get_le32(&buf);
1426  s->frames_per_packet = bytestream_get_le32(&buf);
1427  if (s->frames_per_packet <= 0 ||
1428  s->frames_per_packet > 64 ||
1429  s->frames_per_packet >= INT32_MAX / s->nb_channels / s->frame_size)
1430  return AVERROR_INVALIDDATA;
1431  s->extra_headers = bytestream_get_le32(&buf);
1432 
1433  return 0;
1434 }
1435 
1437 {
1438  SpeexContext *s = avctx->priv_data;
1439  int ret;
1440 
1441  s->fdsp = avpriv_float_dsp_alloc(0);
1442  if (!s->fdsp)
1443  return AVERROR(ENOMEM);
1444 
1445  if (avctx->extradata && avctx->extradata_size >= 80) {
1446  ret = parse_speex_extradata(avctx, avctx->extradata, avctx->extradata_size);
1447  if (ret < 0)
1448  return ret;
1449  } else {
1450  s->rate = avctx->sample_rate;
1451  if (s->rate <= 0)
1452  return AVERROR_INVALIDDATA;
1453 
1454  s->nb_channels = avctx->ch_layout.nb_channels;
1455  if (s->nb_channels <= 0 || s->nb_channels > 2)
1456  return AVERROR_INVALIDDATA;
1457 
1458  switch (s->rate) {
1459  case 8000: s->mode = 0; break;
1460  case 16000: s->mode = 1; break;
1461  case 32000: s->mode = 2; break;
1462  default: s->mode = 2;
1463  }
1464 
1465  s->frames_per_packet = 64;
1466  s->frame_size = NB_FRAME_SIZE << s->mode;
1467  }
1468 
1469  if (avctx->codec_tag == MKTAG('S', 'P', 'X', 'N')) {
1470  int quality;
1471 
1472  if (!avctx->extradata || avctx->extradata && avctx->extradata_size < 47) {
1473  av_log(avctx, AV_LOG_ERROR, "Missing or invalid extradata.\n");
1474  return AVERROR_INVALIDDATA;
1475  }
1476 
1477  quality = avctx->extradata[37];
1478  if (quality > 10) {
1479  av_log(avctx, AV_LOG_ERROR, "Unsupported quality mode %d.\n", quality);
1480  return AVERROR_PATCHWELCOME;
1481  }
1482 
1483  s->pkt_size = ((const uint8_t[]){ 5, 10, 15, 20, 20, 28, 28, 38, 38, 46, 62 })[quality];
1484 
1485  s->mode = 0;
1486  s->nb_channels = 1;
1487  s->rate = avctx->sample_rate;
1488  if (s->rate <= 0)
1489  return AVERROR_INVALIDDATA;
1490  s->frames_per_packet = 1;
1491  s->frame_size = NB_FRAME_SIZE;
1492  }
1493 
1494  if (s->bitrate > 0)
1495  avctx->bit_rate = s->bitrate;
1498  avctx->ch_layout.nb_channels = s->nb_channels;
1499  avctx->sample_rate = s->rate;
1500  avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
1501 
1502  for (int m = 0; m <= s->mode; m++) {
1503  ret = decoder_init(s, &s->st[m], &speex_modes[m]);
1504  if (ret < 0)
1505  return ret;
1506  }
1507 
1508  s->stereo.balance = 1.f;
1509  s->stereo.e_ratio = .5f;
1510  s->stereo.smooth_left = 1.f;
1511  s->stereo.smooth_right = 1.f;
1512 
1513  return 0;
1514 }
1515 
1516 static void speex_decode_stereo(float *data, int frame_size, StereoState *stereo)
1517 {
1518  float balance, e_left, e_right, e_ratio;
1519 
1520  balance = stereo->balance;
1521  e_ratio = stereo->e_ratio;
1522 
1523  /* These two are Q14, with max value just below 2. */
1524  e_right = 1.f / sqrtf(e_ratio * (1.f + balance));
1525  e_left = sqrtf(balance) * e_right;
1526 
1527  for (int i = frame_size - 1; i >= 0; i--) {
1528  float tmp = data[i];
1529  stereo->smooth_left = stereo->smooth_left * 0.98f + e_left * 0.02f;
1530  stereo->smooth_right = stereo->smooth_right * 0.98f + e_right * 0.02f;
1531  data[2 * i ] = stereo->smooth_left * tmp;
1532  data[2 * i + 1] = stereo->smooth_right * tmp;
1533  }
1534 }
1535 
1537  int *got_frame_ptr, AVPacket *avpkt)
1538 {
1539  SpeexContext *s = avctx->priv_data;
1540  int frames_per_packet = s->frames_per_packet;
1541  const float scale = 1.f / 32768.f;
1542  int buf_size = avpkt->size;
1543  float *dst;
1544  int ret;
1545 
1546  if (s->pkt_size && avpkt->size == 62)
1547  buf_size = s->pkt_size;
1548  if ((ret = init_get_bits8(&s->gb, avpkt->data, buf_size)) < 0)
1549  return ret;
1550 
1551  frame->nb_samples = FFALIGN(s->frame_size * frames_per_packet, 4);
1552  if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
1553  return ret;
1554 
1555  dst = (float *)frame->extended_data[0];
1556  for (int i = 0; i < frames_per_packet; i++) {
1557  ret = speex_modes[s->mode].decode(avctx, &s->st[s->mode], &s->gb, dst + i * s->frame_size);
1558  if (ret < 0)
1559  return ret;
1560  if (avctx->ch_layout.nb_channels == 2)
1561  speex_decode_stereo(dst + i * s->frame_size, s->frame_size, &s->stereo);
1562  if (get_bits_left(&s->gb) < 5 ||
1563  show_bits(&s->gb, 5) == 15) {
1564  frames_per_packet = i + 1;
1565  break;
1566  }
1567  }
1568 
1569  dst = (float *)frame->extended_data[0];
1570  s->fdsp->vector_fmul_scalar(dst, dst, scale, frame->nb_samples * frame->ch_layout.nb_channels);
1571  frame->nb_samples = s->frame_size * frames_per_packet;
1572 
1573  *got_frame_ptr = 1;
1574 
1575  return (get_bits_count(&s->gb) + 7) >> 3;
1576 }
1577 
1579 {
1580  SpeexContext *s = avctx->priv_data;
1581  av_freep(&s->fdsp);
1582  return 0;
1583 }
1584 
1586  .p.name = "speex",
1587  CODEC_LONG_NAME("Speex"),
1588  .p.type = AVMEDIA_TYPE_AUDIO,
1589  .p.id = AV_CODEC_ID_SPEEX,
1590  .init = speex_decode_init,
1592  .close = speex_decode_close,
1593  .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
1594  .priv_data_size = sizeof(SpeexContext),
1595  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
1596 };
M
#define M(a, b)
Definition: vp3dsp.c:48
LtpParams::pitch_bits
int pitch_bits
Definition: speexdec.c:92
DecoderState::submodeID
int submodeID
Activated sub-mode.
Definition: speexdec.c:199
split_cb_high
static const SplitCodebookParams split_cb_high
Definition: speexdec.c:114
SB_SUBMODE_BITS
#define SB_SUBMODE_BITS
Definition: speexdec.c:71
nb_submode4
static const SpeexSubmode nb_submode4
Definition: speexdec.c:472
skip_bits_long
static void skip_bits_long(GetBitContext *s, int n)
Skips the specified number of bits.
Definition: get_bits.h:278
DecoderState::seed
uint32_t seed
Seed used for random number generation.
Definition: speexdec.c:195
h0
static const float h0[64]
Definition: speexdata.h:741
SpeexSubmode::have_subframe_gain
int have_subframe_gain
Number of bits to use as sub-frame innovation gain.
Definition: speexdec.c:152
SplitCodebookParams::shape_bits
int shape_bits
Definition: speexdec.c:104
FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:42
show_bits1
static unsigned int show_bits1(GetBitContext *s)
Definition: get_bits.h:408
QMF_ORDER
#define QMF_ORDER
Definition: speexdec.c:66
get_bits_left
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:694
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
wb_submode2
static const SpeexSubmode wb_submode2
Definition: speexdec.c:506
hexc_10_32_table
static const int8_t hexc_10_32_table[320]
Definition: speexdata.h:694
nb_submode3
static const SpeexSubmode nb_submode3
Definition: speexdec.c:466
DecoderState::count_lost
int count_lost
Was the last frame lost?
Definition: speexdec.c:184
exc_gain_quant_scal1
static const float exc_gain_quant_scal1[2]
Definition: speexdata.h:778
out
FILE * out
Definition: movenc.c:54
speexdata.h
SpeexContext::vbr
int32_t vbr
1 for a VBR decoding, 0 otherwise
Definition: speexdec.c:249
AVCodecContext::sample_rate
int sample_rate
samples per second
Definition: avcodec.h:1064
DecoderState::exc_buf
float exc_buf[NB_DEC_BUFFER]
Excitation buffer.
Definition: speexdec.c:213
DecoderState::highpass_enabled
int highpass_enabled
Is the input filter enabled.
Definition: speexdec.c:209
DecoderState::dtx_enabled
int dtx_enabled
Definition: speexdec.c:208
DecoderState::mode
const SpeexMode * mode
Definition: speexdec.c:179
hexc_table
static const int8_t hexc_table[1024]
Definition: speexdata.h:612
ltp_quant_func
int(* ltp_quant_func)(float *, float *, float *, float *, float *, float *, const void *, int, int, float, int, int, GetBitContext *, char *, float *, float *, int, int, int, float *)
Long-term predictor quantization.
Definition: speexdec.c:124
DecoderState::mem_hp
float mem_hp[2]
High-pass filter memory.
Definition: speexdec.c:212
DecoderState::voc_m1
float voc_m1
Definition: speexdec.c:203
get_bits_count
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:266
exc_8_128_table
static const int8_t exc_8_128_table[1024]
Definition: speexdata.h:295
LSP_DIV_1024
#define LSP_DIV_1024(x)
Definition: speexdec.c:87
SpeexContext::version_id
int32_t version_id
Version for Speex (for checking compatibility)
Definition: speexdec.c:242
cdbk_nb_high1
static const int8_t cdbk_nb_high1[320]
Definition: speexdata.h:562
SpeexMode::modeID
int modeID
ID of the mode.
Definition: speexdec.c:168
DecoderState::lpc_enh_enabled
int lpc_enh_enabled
1 when LPC enhancer is on, 0 otherwise
Definition: speexdec.c:200
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:340
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
AVPacket::data
uint8_t * data
Definition: packet.h:491
LtpParams
Definition: speexdec.c:89
data
const char data[16]
Definition: mxf.c:148
DecoderState::exc
float * exc
Start of excitation frame.
Definition: speexdec.c:211
expf
#define expf(x)
Definition: libm.h:283
FFCodec
Definition: codec_internal.h:127
LSP_DIV_512
#define LSP_DIV_512(x)
Definition: speexdec.c:86
AVChannelLayout::order
enum AVChannelOrder order
Channel order used in this layout.
Definition: channel_layout.h:312
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
N2
#define N2
Definition: vf_pp7.c:72
SpeexMode::lpc_size
int lpc_size
Order of LPC filter.
Definition: speexdec.c:172
c1
static const uint64_t c1
Definition: murmur3.c:52
nb_submode8
static const SpeexSubmode nb_submode8
Definition: speexdec.c:496
AVChannelLayout::nb_channels
int nb_channels
Number of channels in this layout.
Definition: channel_layout.h:317
SpeexSubmode::double_codebook
int double_codebook
Apply innovation quantization twice for higher quality (and higher bit-rate)
Definition: speexdec.c:154
speex_inband_handler
static int speex_inband_handler(GetBitContext *gb, void *state, StereoState *stereo)
Definition: speexdec.c:629
DecoderState::first
int first
Is first frame
Definition: speexdec.c:181
gain_3tap_to_1tap
#define gain_3tap_to_1tap(g)
Definition: speexdec.c:350
quality
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about quality
Definition: rate_distortion.txt:12
wb_submode4
static const SpeexSubmode wb_submode4
Definition: speexdec.c:516
SpeexSubmode
Definition: speexdec.c:146
SpeexMode::subframe_size
int subframe_size
Size of sub-frames used for decoding.
Definition: speexdec.c:171
SpeexSubmode::LtpParam
const void * LtpParam
Pitch parameters (options)
Definition: speexdec.c:159
LSP_DIV_256
#define LSP_DIV_256(x)
Definition: speexdec.c:85
SpeexContext::pkt_size
int pkt_size
Definition: speexdec.c:253
SpeexContext::nb_channels
int32_t nb_channels
Number of channels decoded.
Definition: speexdec.c:246
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:335
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:131
exc_5_256_table
static const int8_t exc_5_256_table[1280]
Definition: speexdata.h:137
AV_CODEC_ID_SPEEX
@ AV_CODEC_ID_SPEEX
Definition: codec_id.h:477
LSP_LINEAR_HIGH
#define LSP_LINEAR_HIGH(i)
Definition: speexdec.c:84
cosf
#define cosf(x)
Definition: libm.h:78
interp
interp
Definition: vf_curves.c:61
AVCodecContext::ch_layout
AVChannelLayout ch_layout
Audio channel layout.
Definition: avcodec.h:2107
speex_default_user_handler
static int speex_default_user_handler(GetBitContext *gb, void *state, void *data)
Definition: speexdec.c:224
av_int2float
static av_always_inline float av_int2float(uint32_t i)
Reinterpret a 32-bit integer as a float.
Definition: intfloat.h:40
SpeexSubmode::ltp_unquant
ltp_unquant_func ltp_unquant
Long-term predictor (pitch) un-quantizer.
Definition: speexdec.c:158
GetBitContext
Definition: get_bits.h:108
innovation_quant_func
void(* innovation_quant_func)(float *, float *, float *, float *, const void *, int, int, float *, float *, GetBitContext *, char *, int, int)
Innovation quantization function.
Definition: speexdec.c:137
DecoderState
Definition: speexdec.c:178
AVFrame::ch_layout
AVChannelLayout ch_layout
Channel layout of the audio data.
Definition: frame.h:802
scale
static av_always_inline float scale(float x, float s)
Definition: vf_v360.c:1389
split_cb_nb_lbr
static const SplitCodebookParams split_cb_nb_lbr
Definition: speexdec.c:110
DecoderState::nb_subframes
int nb_subframes
Number of high-band sub-frames.
Definition: speexdec.c:187
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
wb_submode3
static const SpeexSubmode wb_submode3
Definition: speexdec.c:511
SpeexContext::bitrate
int32_t bitrate
Bit-rate used.
Definition: speexdec.c:247
e_ratio_quant
static const float e_ratio_quant[4]
Definition: speexdata.h:766
ff_speex_decoder
const FFCodec ff_speex_decoder
Definition: speexdec.c:1585
a1
#define a1
Definition: regdef.h:47
split_cb_nb_ulbr
static const SplitCodebookParams split_cb_nb_ulbr
Definition: speexdec.c:108
NB_PITCH_START
#define NB_PITCH_START
Definition: speexdec.c:75
avassert.h
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
init_get_bits8
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:545
StereoState::balance
float balance
Left/right balance info.
Definition: speexdec.c:232
lsp_interpolate
static void lsp_interpolate(const float *old_lsp, const float *new_lsp, float *lsp, int len, int subframe, int nb_subframes, float margin)
Definition: speexdec.c:805
float
float
Definition: af_crystalizer.c:121
AVCodecContext::extradata_size
int extradata_size
Definition: avcodec.h:543
FF_CODEC_DECODE_CB
#define FF_CODEC_DECODE_CB(func)
Definition: codec_internal.h:306
s
#define s(width, name)
Definition: cbs_vp9.c:198
split_cb_sb
static const SplitCodebookParams split_cb_sb
Definition: speexdec.c:113
nb_decode
static int nb_decode(AVCodecContext *, void *, GetBitContext *, float *)
Definition: speexdec.c:866
g
const char * g
Definition: vf_curves.c:127
frame_size
int frame_size
Definition: mxfenc.c:2311
speex_std_stereo
static int speex_std_stereo(GetBitContext *gb, void *state, void *data)
Definition: speexdec.c:618
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
AV_CHANNEL_ORDER_UNSPEC
@ AV_CHANNEL_ORDER_UNSPEC
Only the channel count is specified, without any further information about the channel order.
Definition: channel_layout.h:112
gain_cdbk_lbr
static const int8_t gain_cdbk_lbr[128]
Definition: speexdata.h:406
fminf
float fminf(float, float)
NB_SUBFRAME_SIZE
#define NB_SUBFRAME_SIZE
Definition: speexdec.c:73
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
nb_submode7
static const SpeexSubmode nb_submode7
Definition: speexdec.c:490
decode.h
speex_rand
static float speex_rand(float std, uint32_t *seed)
Definition: speexdec.c:297
get_bits.h
cdbk_nb_low2
static const int8_t cdbk_nb_low2[320]
Definition: speexdata.h:537
speex_modes
static const SpeexMode speex_modes[SPEEX_NB_MODES]
Definition: speexdec.c:524
DecoderState::encode_submode
int encode_submode
Definition: speexdec.c:197
NB_FRAME_SIZE
#define NB_FRAME_SIZE
Definition: speexdec.c:68
StereoState
Definition: speexdec.c:231
DecoderState::modeID
int modeID
ID of the decoder mode.
Definition: speexdec.c:180
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:272
frame
static AVFrame * frame
Definition: demux_decode.c:54
SUBMODE
#define SUBMODE(x)
Definition: speexdec.c:348
nb_submode6
static const SpeexSubmode nb_submode6
Definition: speexdec.c:484
SpeexSubmode::innovation_unquant
innovation_unquant_func innovation_unquant
Innovation un-quantization.
Definition: speexdec.c:161
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:65
DecoderState::mem_sp
float mem_sp[NB_ORDER]
Filter memory for synthesis signal.
Definition: speexdec.c:216
SPEEX_MEMSET
#define SPEEX_MEMSET(dst, c, n)
Definition: speexdec.c:80
lsp_to_lpc
static void lsp_to_lpc(const float *freq, float *ak, int lpcrdr)
Definition: speexdec.c:822
speex_decode_frame
static int speex_decode_frame(AVCodecContext *avctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
Definition: speexdec.c:1536
nb_submode1
static const SpeexSubmode nb_submode1
Definition: speexdec.c:454
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
SpeexContext::frames_per_packet
int32_t frames_per_packet
Number of frames stored per Ogg packet.
Definition: speexdec.c:250
NB_SUBMODES
#define NB_SUBMODES
Definition: speexdec.c:69
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:64
DecoderState::lpc_size
int lpc_size
Order of high-band LPC analysis.
Definition: speexdec.c:188
isnan
#define isnan(x)
Definition: libm.h:340
SpeexMode::default_submode
int default_submode
Default sub-mode to use when decoding.
Definition: speexdec.c:175
AVCodecContext::bit_rate
int64_t bit_rate
the average bitrate
Definition: avcodec.h:491
get_bits1
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:388
DecoderState::exc_rms
float exc_rms[NB_NB_SUBFRAMES]
RMS of excitation per subframe.
Definition: speexdec.c:220
split_cb_nb
static const SplitCodebookParams split_cb_nb
Definition: speexdec.c:112
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
SpeexContext::bitstream_version
int32_t bitstream_version
Version ID of the bit-stream.
Definition: speexdec.c:245
LtpParams::gain_bits
int gain_bits
Definition: speexdec.c:91
exc_10_32_table
static const int8_t exc_10_32_table[320]
Definition: speexdata.h:367
av_clipf
av_clipf
Definition: af_crystalizer.c:121
SpeexContext::extra_headers
int32_t extra_headers
Number of additional headers after the comments.
Definition: speexdec.c:251
ltp_params_nb
static const LtpParam ltp_params_nb
Definition: speexdec.c:98
exp
int8_t exp
Definition: eval.c:72
seed
static unsigned int seed
Definition: videogen.c:78
wb_skip_table
static const uint16_t wb_skip_table[8]
Definition: speexdata.h:765
DecoderState::voc_offset
int voc_offset
Definition: speexdec.c:206
float_dsp.h
SpeexSubmode::comb_gain
float comb_gain
Gain of enhancer comb filter.
Definition: speexdec.c:164
AV_CODEC_CAP_CHANNEL_CONF
#define AV_CODEC_CAP_CHANNEL_CONF
Codec should fill in channel configuration and samplerate instead of container.
Definition: codec.h:106
f
f
Definition: af_crystalizer.c:121
ff_get_buffer
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1617
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
AVPacket::size
int size
Definition: packet.h:492
LtpParams::gain_cdbk
const int8_t * gain_cdbk
Definition: speexdec.c:90
lsp_unquant_nb
static void lsp_unquant_nb(float *lsp, int order, GetBitContext *gb)
Definition: speexdec.c:409
exc_5_64_table
static const int8_t exc_5_64_table[320]
Definition: speexdata.h:232
codec_internal.h
SpeexContext::fdsp
AVFloatDSPContext * fdsp
Definition: speexdec.c:258
ltp_params_lbr
static const LtpParam ltp_params_lbr
Definition: speexdec.c:96
ltp_params_med
static const LtpParam ltp_params_med
Definition: speexdec.c:97
sanitize_values
static void sanitize_values(float *vec, float min_val, float max_val, int len)
Definition: speexdec.c:655
SpeexMode::folding_gain
float folding_gain
Folding gain.
Definition: speexdec.c:173
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
ltp_unquant_func
void(* ltp_unquant_func)(float *, float *, int, int, float, const void *, int, int *, float *, GetBitContext *, int, int, float, int)
Long-term un-quantize.
Definition: speexdec.c:131
sp
#define sp
Definition: regdef.h:63
NB_PITCH_END
#define NB_PITCH_END
Definition: speexdec.c:76
fmaxf
float fmaxf(float, float)
AVCodecContext::sample_fmt
enum AVSampleFormat sample_fmt
audio sample format
Definition: avcodec.h:1080
DecoderState::submodes
const SpeexSubmode *const * submodes
Sub-mode data.
Definition: speexdec.c:198
LSP_LINEAR
#define LSP_LINEAR(i)
Definition: speexdec.c:83
signal_mul
static void signal_mul(const float *x, float *y, float scale, int len)
Definition: speexdec.c:665
DecoderState::old_qlsp
float old_qlsp[NB_ORDER]
Quantized LSPs for previous frame.
Definition: speexdec.c:214
DecoderState::frame_size
int frame_size
Length of high-band frames.
Definition: speexdec.c:185
SpeexContext
Definition: speexdec.c:238
AVFloatDSPContext
Definition: float_dsp.h:24
noise_codebook_unquant
static void noise_codebook_unquant(float *exc, const void *par, int nsf, GetBitContext *gb, uint32_t *seed)
Definition: speexdec.c:311
pitch_unquant_3tap
static void pitch_unquant_3tap(float *exc, float *exc_out, int start, int end, float pitch_coef, const void *par, int nsf, int *pitch_val, float *gain_val, GetBitContext *gb, int count_lost, int subframe_offset, float last_pitch_gain, int cdbk_offset)
Definition: speexdec.c:353
gain_cdbk_nb
static const int8_t gain_cdbk_nb[512]
Definition: speexdata.h:257
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SpeexMode::frame_size
int frame_size
Size of frames used for decoding.
Definition: speexdec.c:170
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
cdbk_nb_high2
static const int8_t cdbk_nb_high2[320]
Definition: speexdata.h:587
N
#define N
Definition: af_mcompand.c:53
fact
static double fact(double i)
Definition: af_aiir.c:943
a0
#define a0
Definition: regdef.h:46
SPEEX_COPY
#define SPEEX_COPY(dst, src, n)
Definition: speexdec.c:81
M_PI
#define M_PI
Definition: mathematics.h:67
DecoderState::subframe_size
int subframe_size
Length of high-band sub-frames.
Definition: speexdec.c:186
SpeexSubmode::innovation_params
const void * innovation_params
Innovation quantization parameters.
Definition: speexdec.c:162
ran
static uint32_t ran(void)
Definition: trasher.c:28
exc_20_32_table
static const int8_t exc_20_32_table[640]
Definition: speexdata.h:417
shift_filt
static const float shift_filt[3][7]
Definition: speexdata.h:719
multicomb
static void multicomb(const float *exc, float *new_exc, float *ak, int p, int nsf, int pitch, int max_pitch, float comb_gain)
Definition: speexdec.c:738
SplitCodebookParams::shape_cb
const signed char * shape_cb
Definition: speexdec.c:103
AVFrame::nb_samples
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:420
lsp_quant_func
void(* lsp_quant_func)(float *, float *, int, GetBitContext *)
Quantizes LSPs.
Definition: speexdec.c:118
lsp_unquant_lbr
static void lsp_unquant_lbr(float *lsp, int order, GetBitContext *gb)
Definition: speexdec.c:261
split_cb_nb_med
static const SplitCodebookParams split_cb_nb_med
Definition: speexdec.c:111
forced_pitch_unquant
static void forced_pitch_unquant(float *exc, float *exc_out, int start, int end, float pitch_coef, const void *par, int nsf, int *pitch_val, float *gain_val, GetBitContext *gb, int count_lost, int subframe_offset, float last_pitch_gain, int cdbk_offset)
Definition: speexdec.c:281
nb_submode5
static const SpeexSubmode nb_submode5
Definition: speexdec.c:478
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
AVCodecContext::extradata
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
Definition: avcodec.h:542
show_bits
static unsigned int show_bits(GetBitContext *s, int n)
Show 1-25 bits.
Definition: get_bits.h:371
DecoderState::interp_qlpc
float interp_qlpc[NB_ORDER]
Interpolated quantized LPCs.
Definition: speexdec.c:215
AVFrame::extended_data
uint8_t ** extended_data
pointers to the data planes/channels.
Definition: frame.h:401
lsp_unquant_func
void(* lsp_unquant_func)(float *, int, GetBitContext *)
Decodes quantized LSPs.
Definition: speexdec.c:121
iir_mem
static void iir_mem(const float *x, const float *den, float *y, int N, int ord, float *mem)
Definition: speexdec.c:585
DecoderState::full_frame_size
int full_frame_size
Length of full-band frames.
Definition: speexdec.c:182
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:194
DecoderState::voc_m2
float voc_m2
Definition: speexdec.c:204
len
int len
Definition: vorbis_enc_data.h:426
inner_prod
static float inner_prod(const float *x, const float *y, int len)
Definition: speexdec.c:671
cdbk_nb
static const int8_t cdbk_nb[640]
Definition: speexdata.h:463
avcodec.h
decoder_init
static int decoder_init(SpeexContext *s, DecoderState *st, const SpeexMode *mode)
Definition: speexdec.c:1363
SPEEX_INBAND_STEREO
#define SPEEX_INBAND_STEREO
Definition: speexdec.c:64
SpeexContext::gb
GetBitContext gb
Definition: speexdec.c:240
parse_speex_extradata
static int parse_speex_extradata(AVCodecContext *avctx, const uint8_t *extradata, int extradata_size)
Definition: speexdec.c:1396
ret
ret
Definition: filter_design.txt:187
SplitCodebookParams::subvect_size
int subvect_size
Definition: speexdec.c:101
SpeexSubmode::lsp_unquant
lsp_unquant_func lsp_unquant
LSP unquantization function.
Definition: speexdec.c:156
split_cb_nb_vlbr
static const SplitCodebookParams split_cb_nb_vlbr
Definition: speexdec.c:109
SpeexMode::decode
int(* decode)(AVCodecContext *avctx, void *dec, GetBitContext *gb, float *out)
Definition: speexdec.c:169
StereoState::smooth_right
float smooth_right
Smoothed right channel gain.
Definition: speexdec.c:235
gc_quant_bound
static const float gc_quant_bound[16]
Definition: speexdata.h:760
id
enum AVCodecID id
Definition: dts2pts_bsf.c:364
DecoderState::last_pitch
int last_pitch
Pitch of last correctly decoded frame.
Definition: speexdec.c:193
StereoState::smooth_left
float smooth_left
Smoothed left channel gain.
Definition: speexdec.c:234
AVCodecContext
main external API structure.
Definition: avcodec.h:441
c2
static const uint64_t c2
Definition: murmur3.c:53
SpeexMode::submodes
const SpeexSubmode * submodes[NB_SUBMODES]
Sub-mode data for the mode.
Definition: speexdec.c:174
DecoderState::last_ol_gain
float last_ol_gain
Open-loop gain for previous frame.
Definition: speexdec.c:189
cdbk_nb_low1
static const int8_t cdbk_nb_low1[320]
Definition: speexdata.h:512
mode
mode
Definition: ebur128.h:83
DecoderState::voc_mean
float voc_mean
Definition: speexdec.c:205
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:640
NB_NB_SUBFRAMES
#define NB_NB_SUBFRAMES
Definition: speexdec.c:74
DecoderState::is_wideband
int is_wideband
If wideband is present.
Definition: speexdec.c:183
speex_decode_close
static av_cold int speex_decode_close(AVCodecContext *avctx)
Definition: speexdec.c:1578
SpeexContext::stereo
StereoState stereo
Definition: speexdec.c:255
state
static struct @362 state
DecoderState::g0_mem
float g0_mem[QMF_ORDER]
Definition: speexdec.c:217
get_bitsz
static av_always_inline int get_bitsz(GetBitContext *s, int n)
Read 0-25 bits.
Definition: get_bits.h:351
high_lsp_cdbk2
static const int8_t high_lsp_cdbk2[512]
Definition: speexdata.h:99
SpeexContext::mode
int32_t mode
Mode used (0 for narrowband, 1 for wideband)
Definition: speexdec.c:244
M_SQRT2
#define M_SQRT2
Definition: mathematics.h:109
SpeexContext::st
DecoderState st[SPEEX_NB_MODES]
Definition: speexdec.c:256
nb_submode2
static const SpeexSubmode nb_submode2
Definition: speexdec.c:460
ltp_params_vlbr
static const LtpParam ltp_params_vlbr
Definition: speexdec.c:95
SpeexSubmode::forced_pitch_gain
int forced_pitch_gain
Use the same (forced) pitch gain for all sub-frames.
Definition: speexdec.c:150
AVCodecContext::codec_tag
unsigned int codec_tag
fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
Definition: avcodec.h:466
wb_submode1
static const SpeexSubmode wb_submode1
Definition: speexdec.c:501
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
highpass
static void highpass(const float *x, float *y, int len, float *mem, int wide)
Definition: speexdec.c:598
NB_ORDER
#define NB_ORDER
Definition: speexdec.c:67
AVPacket
This structure stores compressed data.
Definition: packet.h:468
AVCodecContext::priv_data
void * priv_data
Definition: avcodec.h:468
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
avpriv_float_dsp_alloc
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
DecoderState::g1_mem
float g1_mem[QMF_ORDER]
Definition: speexdec.c:218
speex_decode_stereo
static void speex_decode_stereo(float *data, int frame_size, StereoState *stereo)
Definition: speexdec.c:1516
int32_t
int32_t
Definition: audioconvert.c:56
bytestream.h
innovation_unquant_func
void(* innovation_unquant_func)(float *, const void *, int, GetBitContext *, uint32_t *)
Innovation unquantization function.
Definition: speexdec.c:143
SpeexSubmode::lbr_pitch
int lbr_pitch
Set to -1 for "normal" modes, otherwise encode pitch using a global pitch and allowing a +- lbr_pitch...
Definition: speexdec.c:147
speex_decode_init
static av_cold int speex_decode_init(AVCodecContext *avctx)
Definition: speexdec.c:1436
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
SpeexContext::frame_size
int32_t frame_size
Size of frames.
Definition: speexdec.c:248
lsp_unquant_high
static void lsp_unquant_high(float *lsp, int order, GetBitContext *gb)
Definition: speexdec.c:437
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
exc_10_16_table
static const int8_t exc_10_16_table[160]
Definition: speexdata.h:392
qmf_synth
static void qmf_synth(const float *x1, const float *x2, const float *a, float *y, int N, int M, float *mem1, float *mem2)
Definition: speexdec.c:1160
exc_gain_quant_scal3
static const float exc_gain_quant_scal3[8]
Definition: speexdata.h:774
MKTAG
#define MKTAG(a, b, c, d)
Definition: macros.h:55
DecoderState::last_pitch_gain
float last_pitch_gain
Pitch gain of last correctly decoded frame.
Definition: speexdec.c:194
split_cb_high_lbr
static const SplitCodebookParams split_cb_high_lbr
Definition: speexdec.c:115
SplitCodebookParams
Definition: speexdec.c:100
SplitCodebookParams::have_sign
int have_sign
Definition: speexdec.c:105
int
int
Definition: ffmpeg_filter.c:368
DecoderState::pi_gain
float pi_gain[NB_NB_SUBFRAMES]
Gain of LPC filter at theta=pi (fe/2)
Definition: speexdec.c:219
SpeexContext::rate
int32_t rate
Sampling rate used.
Definition: speexdec.c:243
bw_lpc
static void bw_lpc(float gamma, const float *lpc_in, float *lpc_out, int order)
Definition: speexdec.c:574
interp_pitch
static int interp_pitch(const float *exc, float *interp, int pitch, int len)
Definition: speexdec.c:691
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
compute_rms
static float compute_rms(const float *x, int len)
Definition: speexdec.c:563
NB_DEC_BUFFER
#define NB_DEC_BUFFER
Definition: speexdec.c:78
sb_decode
static int sb_decode(AVCodecContext *, void *, GetBitContext *, float *)
Definition: speexdec.c:1217
DecoderState::innov_save
float * innov_save
If non-NULL, innovation is copied here.
Definition: speexdec.c:190
SplitCodebookParams::nb_subvect
int nb_subvect
Definition: speexdec.c:102
StereoState::e_ratio
float e_ratio
Ratio of energies: E(left+right)/[E(left)+E(right)]
Definition: speexdec.c:233
SpeexMode
Definition: speexdec.c:167
split_cb_shape_sign_unquant
static void split_cb_shape_sign_unquant(float *exc, const void *par, int nsf, GetBitContext *gb, uint32_t *seed)
Definition: speexdec.c:318
high_lsp_cdbk
static const int8_t high_lsp_cdbk[512]
Definition: speexdata.h:59
SPEEX_NB_MODES
#define SPEEX_NB_MODES
Definition: speexdec.c:63