FFmpeg
wmavoice.c
Go to the documentation of this file.
1 /*
2  * Windows Media Audio Voice decoder.
3  * Copyright (c) 2009 Ronald S. Bultje
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * @brief Windows Media Audio Voice compatible decoder
25  * @author Ronald S. Bultje <rsbultje@gmail.com>
26  */
27 
28 #include <math.h>
29 
31 #include "libavutil/float_dsp.h"
32 #include "libavutil/mem_internal.h"
33 #include "libavutil/thread.h"
34 #include "avcodec.h"
35 #include "codec_internal.h"
36 #include "internal.h"
37 #include "get_bits.h"
38 #include "put_bits.h"
39 #include "wmavoice_data.h"
40 #include "celp_filters.h"
41 #include "acelp_vectors.h"
42 #include "acelp_filters.h"
43 #include "lsp.h"
44 #include "dct.h"
45 #include "rdft.h"
46 #include "sinewin.h"
47 
48 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
49 #define MAX_LSPS 16 ///< maximum filter order
50 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
51  ///< of 16 for ASM input buffer alignment
52 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
53 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
54 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
55 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
56  ///< maximum number of samples per superframe
57 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
58  ///< was split over two packets
59 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
60 
61 /**
62  * Frame type VLC coding.
63  */
65 
66 /**
67  * Adaptive codebook types.
68  */
69 enum {
70  ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)
71  ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which
72  ///< we interpolate to get a per-sample pitch.
73  ///< Signal is generated using an asymmetric sinc
74  ///< window function
75  ///< @note see #wmavoice_ipol1_coeffs
76  ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using
77  ///< a Hamming sinc window function
78  ///< @note see #wmavoice_ipol2_coeffs
79 };
80 
81 /**
82  * Fixed codebook types.
83  */
84 enum {
85  FCB_TYPE_SILENCE = 0, ///< comfort noise during silence
86  ///< generated from a hardcoded (fixed) codebook
87  ///< with per-frame (low) gain values
88  FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block
89  ///< gain values
90  FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,
91  ///< used in particular for low-bitrate streams
92  FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in
93  ///< combinations of either single pulses or
94  ///< pulse pairs
95 };
96 
97 /**
98  * Description of frame types.
99  */
100 static const struct frame_type_desc {
101  uint8_t n_blocks; ///< amount of blocks per frame (each block
102  ///< (contains 160/#n_blocks samples)
103  uint8_t log_n_blocks; ///< log2(#n_blocks)
104  uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)
105  uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)
106  uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs
107  ///< (rather than just one single pulse)
108  ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES
109 } frame_descs[17] = {
110  { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0 },
111  { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0 },
127 };
128 
129 /**
130  * WMA Voice decoding context.
131  */
132 typedef struct WMAVoiceContext {
133  /**
134  * @name Global values specified in the stream header / extradata or used all over.
135  * @{
136  */
137  GetBitContext gb; ///< packet bitreader. During decoder init,
138  ///< it contains the extradata from the
139  ///< demuxer. During decoding, it contains
140  ///< packet data.
141  int8_t vbm_tree[25]; ///< converts VLC codes to frame type
142 
143  int spillover_bitsize; ///< number of bits used to specify
144  ///< #spillover_nbits in the packet header
145  ///< = ceil(log2(ctx->block_align << 3))
146  int history_nsamples; ///< number of samples in history for signal
147  ///< prediction (through ACB)
148 
149  /* postfilter specific values */
150  int do_apf; ///< whether to apply the averaged
151  ///< projection filter (APF)
152  int denoise_strength; ///< strength of denoising in Wiener filter
153  ///< [0-11]
154  int denoise_tilt_corr; ///< Whether to apply tilt correction to the
155  ///< Wiener filter coefficients (postfilter)
156  int dc_level; ///< Predicted amount of DC noise, based
157  ///< on which a DC removal filter is used
158 
159  int lsps; ///< number of LSPs per frame [10 or 16]
160  int lsp_q_mode; ///< defines quantizer defaults [0, 1]
161  int lsp_def_mode; ///< defines different sets of LSP defaults
162  ///< [0, 1]
163 
164  int min_pitch_val; ///< base value for pitch parsing code
165  int max_pitch_val; ///< max value + 1 for pitch parsing
166  int pitch_nbits; ///< number of bits used to specify the
167  ///< pitch value in the frame header
168  int block_pitch_nbits; ///< number of bits used to specify the
169  ///< first block's pitch value
170  int block_pitch_range; ///< range of the block pitch
171  int block_delta_pitch_nbits; ///< number of bits used to specify the
172  ///< delta pitch between this and the last
173  ///< block's pitch value, used in all but
174  ///< first block
175  int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is
176  ///< from -this to +this-1)
177  uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale
178  ///< conversion
179 
180  /**
181  * @}
182  *
183  * @name Packet values specified in the packet header or related to a packet.
184  *
185  * A packet is considered to be a single unit of data provided to this
186  * decoder by the demuxer.
187  * @{
188  */
189  int spillover_nbits; ///< number of bits of the previous packet's
190  ///< last superframe preceding this
191  ///< packet's first full superframe (useful
192  ///< for re-synchronization also)
193  int has_residual_lsps; ///< if set, superframes contain one set of
194  ///< LSPs that cover all frames, encoded as
195  ///< independent and residual LSPs; if not
196  ///< set, each frame contains its own, fully
197  ///< independent, LSPs
198  int skip_bits_next; ///< number of bits to skip at the next call
199  ///< to #wmavoice_decode_packet() (since
200  ///< they're part of the previous superframe)
201 
203  ///< cache for superframe data split over
204  ///< multiple packets
205  int sframe_cache_size; ///< set to >0 if we have data from an
206  ///< (incomplete) superframe from a previous
207  ///< packet that spilled over in the current
208  ///< packet; specifies the amount of bits in
209  ///< #sframe_cache
210  PutBitContext pb; ///< bitstream writer for #sframe_cache
211 
212  /**
213  * @}
214  *
215  * @name Frame and superframe values
216  * Superframe and frame data - these can change from frame to frame,
217  * although some of them do in that case serve as a cache / history for
218  * the next frame or superframe.
219  * @{
220  */
221  double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous
222  ///< superframe
223  int last_pitch_val; ///< pitch value of the previous frame
224  int last_acb_type; ///< frame type [0-2] of the previous frame
225  int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)
226  ///< << 16) / #MAX_FRAMESIZE
227  float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE
228 
229  int aw_idx_is_ext; ///< whether the AW index was encoded in
230  ///< 8 bits (instead of 6)
231  int aw_pulse_range; ///< the range over which #aw_pulse_set1()
232  ///< can apply the pulse, relative to the
233  ///< value in aw_first_pulse_off. The exact
234  ///< position of the first AW-pulse is within
235  ///< [pulse_off, pulse_off + this], and
236  ///< depends on bitstream values; [16 or 24]
237  int aw_n_pulses[2]; ///< number of AW-pulses in each block; note
238  ///< that this number can be negative (in
239  ///< which case it basically means "zero")
240  int aw_first_pulse_off[2]; ///< index of first sample to which to
241  ///< apply AW-pulses, or -0xff if unset
242  int aw_next_pulse_off_cache; ///< the position (relative to start of the
243  ///< second block) at which pulses should
244  ///< start to be positioned, serves as a
245  ///< cache for pitch-adaptive window pulses
246  ///< between blocks
247 
248  int frame_cntr; ///< current frame index [0 - 0xFFFE]; is
249  ///< only used for comfort noise in #pRNG()
250  int nb_superframes; ///< number of superframes in current packet
251  float gain_pred_err[6]; ///< cache for gain prediction
253  ///< cache of the signal of previous
254  ///< superframes, used as a history for
255  ///< signal generation
256  float synth_history[MAX_LSPS]; ///< see #excitation_history
257  /**
258  * @}
259  *
260  * @name Postfilter values
261  *
262  * Variables used for postfilter implementation, mostly history for
263  * smoothing and so on, and context variables for FFT/iFFT.
264  * @{
265  */
266  RDFTContext rdft, irdft; ///< contexts for FFT-calculation in the
267  ///< postfilter (for denoise filter)
268  DCTContext dct, dst; ///< contexts for phase shift (in Hilbert
269  ///< transform, part of postfilter)
270  float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
271  ///< range
272  float postfilter_agc; ///< gain control memory, used in
273  ///< #adaptive_gain_control()
274  float dcf_mem[2]; ///< DC filter history
276  ///< zero filter output (i.e. excitation)
277  ///< by postfilter
279  int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
280  DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
281  ///< aligned buffer for LPC tilting
283  ///< aligned buffer for denoise coefficients
285  ///< aligned buffer for postfilter speech
286  ///< synthesis
287  /**
288  * @}
289  */
291 
292 /**
293  * Set up the variable bit mode (VBM) tree from container extradata.
294  * @param gb bit I/O context.
295  * The bit context (s->gb) should be loaded with byte 23-46 of the
296  * container extradata (i.e. the ones containing the VBM tree).
297  * @param vbm_tree pointer to array to which the decoded VBM tree will be
298  * written.
299  * @return 0 on success, <0 on error.
300  */
301 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
302 {
303  int cntr[8] = { 0 }, n, res;
304 
305  memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
306  for (n = 0; n < 17; n++) {
307  res = get_bits(gb, 3);
308  if (cntr[res] > 3) // should be >= 3 + (res == 7))
309  return -1;
310  vbm_tree[res * 3 + cntr[res]++] = n;
311  }
312  return 0;
313 }
314 
316 {
317  static const uint8_t bits[] = {
318  2, 2, 2, 4, 4, 4,
319  6, 6, 6, 8, 8, 8,
320  10, 10, 10, 12, 12, 12,
321  14, 14, 14, 14
322  };
323  static const uint16_t codes[] = {
324  0x0000, 0x0001, 0x0002, // 00/01/10
325  0x000c, 0x000d, 0x000e, // 11+00/01/10
326  0x003c, 0x003d, 0x003e, // 1111+00/01/10
327  0x00fc, 0x00fd, 0x00fe, // 111111+00/01/10
328  0x03fc, 0x03fd, 0x03fe, // 11111111+00/01/10
329  0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10
330  0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
331  };
332 
334  bits, 1, 1, codes, 2, 2, 132);
335 }
336 
338 {
340  int n;
341 
342  s->postfilter_agc = 0;
343  s->sframe_cache_size = 0;
344  s->skip_bits_next = 0;
345  for (n = 0; n < s->lsps; n++)
346  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
347  memset(s->excitation_history, 0,
348  sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
349  memset(s->synth_history, 0,
350  sizeof(*s->synth_history) * MAX_LSPS);
351  memset(s->gain_pred_err, 0,
352  sizeof(s->gain_pred_err));
353 
354  if (s->do_apf) {
355  memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
356  sizeof(*s->synth_filter_out_buf) * s->lsps);
357  memset(s->dcf_mem, 0,
358  sizeof(*s->dcf_mem) * 2);
359  memset(s->zero_exc_pf, 0,
360  sizeof(*s->zero_exc_pf) * s->history_nsamples);
361  memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
362  }
363 }
364 
365 /**
366  * Set up decoder with parameters from demuxer (extradata etc.).
367  */
369 {
370  static AVOnce init_static_once = AV_ONCE_INIT;
371  int n, flags, pitch_range, lsp16_flag, ret;
373 
374  ff_thread_once(&init_static_once, wmavoice_init_static_data);
375 
376  /**
377  * Extradata layout:
378  * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
379  * - byte 19-22: flags field (annoyingly in LE; see below for known
380  * values),
381  * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,
382  * rest is 0).
383  */
384  if (ctx->extradata_size != 46) {
386  "Invalid extradata size %d (should be 46)\n",
387  ctx->extradata_size);
388  return AVERROR_INVALIDDATA;
389  }
390  if (ctx->block_align <= 0 || ctx->block_align > (1<<22)) {
391  av_log(ctx, AV_LOG_ERROR, "Invalid block alignment %d.\n", ctx->block_align);
392  return AVERROR_INVALIDDATA;
393  }
394 
395  flags = AV_RL32(ctx->extradata + 18);
396  s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
397  s->do_apf = flags & 0x1;
398  if (s->do_apf) {
399  if ((ret = ff_rdft_init(&s->rdft, 7, DFT_R2C)) < 0 ||
400  (ret = ff_rdft_init(&s->irdft, 7, IDFT_C2R)) < 0 ||
401  (ret = ff_dct_init (&s->dct, 6, DCT_I)) < 0 ||
402  (ret = ff_dct_init (&s->dst, 6, DST_I)) < 0)
403  return ret;
404 
405  ff_sine_window_init(s->cos, 256);
406  memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
407  for (n = 0; n < 255; n++) {
408  s->sin[n] = -s->sin[510 - n];
409  s->cos[510 - n] = s->cos[n];
410  }
411  }
412  s->denoise_strength = (flags >> 2) & 0xF;
413  if (s->denoise_strength >= 12) {
415  "Invalid denoise filter strength %d (max=11)\n",
416  s->denoise_strength);
417  return AVERROR_INVALIDDATA;
418  }
419  s->denoise_tilt_corr = !!(flags & 0x40);
420  s->dc_level = (flags >> 7) & 0xF;
421  s->lsp_q_mode = !!(flags & 0x2000);
422  s->lsp_def_mode = !!(flags & 0x4000);
423  lsp16_flag = flags & 0x1000;
424  if (lsp16_flag) {
425  s->lsps = 16;
426  } else {
427  s->lsps = 10;
428  }
429  for (n = 0; n < s->lsps; n++)
430  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
431 
432  init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
433  if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
434  av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
435  return AVERROR_INVALIDDATA;
436  }
437 
438  if (ctx->sample_rate >= INT_MAX / (256 * 37))
439  return AVERROR_INVALIDDATA;
440 
441  s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
442  s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
443  pitch_range = s->max_pitch_val - s->min_pitch_val;
444  if (pitch_range <= 0) {
445  av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
446  return AVERROR_INVALIDDATA;
447  }
448  s->pitch_nbits = av_ceil_log2(pitch_range);
449  s->last_pitch_val = 40;
450  s->last_acb_type = ACB_TYPE_NONE;
451  s->history_nsamples = s->max_pitch_val + 8;
452 
453  if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
454  int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
455  max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
456 
458  "Unsupported samplerate %d (min=%d, max=%d)\n",
459  ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
460 
461  return AVERROR(ENOSYS);
462  }
463 
464  s->block_conv_table[0] = s->min_pitch_val;
465  s->block_conv_table[1] = (pitch_range * 25) >> 6;
466  s->block_conv_table[2] = (pitch_range * 44) >> 6;
467  s->block_conv_table[3] = s->max_pitch_val - 1;
468  s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
469  if (s->block_delta_pitch_hrange <= 0) {
470  av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
471  return AVERROR_INVALIDDATA;
472  }
473  s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
474  s->block_pitch_range = s->block_conv_table[2] +
475  s->block_conv_table[3] + 1 +
476  2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
477  s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
478 
479  av_channel_layout_uninit(&ctx->ch_layout);
481  ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
482 
483  return 0;
484 }
485 
486 /**
487  * @name Postfilter functions
488  * Postfilter functions (gain control, wiener denoise filter, DC filter,
489  * kalman smoothening, plus surrounding code to wrap it)
490  * @{
491  */
492 /**
493  * Adaptive gain control (as used in postfilter).
494  *
495  * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except
496  * that the energy here is calculated using sum(abs(...)), whereas the
497  * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).
498  *
499  * @param out output buffer for filtered samples
500  * @param in input buffer containing the samples as they are after the
501  * postfilter steps so far
502  * @param speech_synth input buffer containing speech synth before postfilter
503  * @param size input buffer size
504  * @param alpha exponential filter factor
505  * @param gain_mem pointer to filter memory (single float)
506  */
507 static void adaptive_gain_control(float *out, const float *in,
508  const float *speech_synth,
509  int size, float alpha, float *gain_mem)
510 {
511  int i;
512  float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
513  float mem = *gain_mem;
514 
515  for (i = 0; i < size; i++) {
516  speech_energy += fabsf(speech_synth[i]);
517  postfilter_energy += fabsf(in[i]);
518  }
519  gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :
520  (1.0 - alpha) * speech_energy / postfilter_energy;
521 
522  for (i = 0; i < size; i++) {
523  mem = alpha * mem + gain_scale_factor;
524  out[i] = in[i] * mem;
525  }
526 
527  *gain_mem = mem;
528 }
529 
530 /**
531  * Kalman smoothing function.
532  *
533  * This function looks back pitch +/- 3 samples back into history to find
534  * the best fitting curve (that one giving the optimal gain of the two
535  * signals, i.e. the highest dot product between the two), and then
536  * uses that signal history to smoothen the output of the speech synthesis
537  * filter.
538  *
539  * @param s WMA Voice decoding context
540  * @param pitch pitch of the speech signal
541  * @param in input speech signal
542  * @param out output pointer for smoothened signal
543  * @param size input/output buffer size
544  *
545  * @returns -1 if no smoothening took place, e.g. because no optimal
546  * fit could be found, or 0 on success.
547  */
548 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
549  const float *in, float *out, int size)
550 {
551  int n;
552  float optimal_gain = 0, dot;
553  const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
554  *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
555  *best_hist_ptr = NULL;
556 
557  /* find best fitting point in history */
558  do {
559  dot = avpriv_scalarproduct_float_c(in, ptr, size);
560  if (dot > optimal_gain) {
561  optimal_gain = dot;
562  best_hist_ptr = ptr;
563  }
564  } while (--ptr >= end);
565 
566  if (optimal_gain <= 0)
567  return -1;
568  dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
569  if (dot <= 0) // would be 1.0
570  return -1;
571 
572  if (optimal_gain <= dot) {
573  dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
574  } else
575  dot = 0.625;
576 
577  /* actual smoothing */
578  for (n = 0; n < size; n++)
579  out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
580 
581  return 0;
582 }
583 
584 /**
585  * Get the tilt factor of a formant filter from its transfer function
586  * @see #tilt_factor() in amrnbdec.c, which does essentially the same,
587  * but somehow (??) it does a speech synthesis filter in the
588  * middle, which is missing here
589  *
590  * @param lpcs LPC coefficients
591  * @param n_lpcs Size of LPC buffer
592  * @returns the tilt factor
593  */
594 static float tilt_factor(const float *lpcs, int n_lpcs)
595 {
596  float rh0, rh1;
597 
598  rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
599  rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
600 
601  return rh1 / rh0;
602 }
603 
604 /**
605  * Derive denoise filter coefficients (in real domain) from the LPCs.
606  */
607 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
608  int fcb_type, float *coeffs, int remainder)
609 {
610  float last_coeff, min = 15.0, max = -15.0;
611  float irange, angle_mul, gain_mul, range, sq;
612  int n, idx;
613 
614  /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
615  s->rdft.rdft_calc(&s->rdft, lpcs);
616 #define log_range(var, assign) do { \
617  float tmp = log10f(assign); var = tmp; \
618  max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
619  } while (0)
620  log_range(last_coeff, lpcs[1] * lpcs[1]);
621  for (n = 1; n < 64; n++)
622  log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
623  lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
624  log_range(lpcs[0], lpcs[0] * lpcs[0]);
625 #undef log_range
626  range = max - min;
627  lpcs[64] = last_coeff;
628 
629  /* Now, use this spectrum to pick out these frequencies with higher
630  * (relative) power/energy (which we then take to be "not noise"),
631  * and set up a table (still in lpc[]) of (relative) gains per frequency.
632  * These frequencies will be maintained, while others ("noise") will be
633  * decreased in the filter output. */
634  irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
635  gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
636  (5.0 / 14.7));
637  angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
638  for (n = 0; n <= 64; n++) {
639  float pwr;
640 
641  idx = lrint((max - lpcs[n]) * irange - 1);
642  idx = FFMAX(0, idx);
643  pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
644  lpcs[n] = angle_mul * pwr;
645 
646  /* 70.57 =~ 1/log10(1.0331663) */
647  idx = av_clipf((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);
648 
649  if (idx > 127) { // fall back if index falls outside table range
650  coeffs[n] = wmavoice_energy_table[127] *
651  powf(1.0331663, idx - 127);
652  } else
653  coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
654  }
655 
656  /* calculate the Hilbert transform of the gains, which we do (since this
657  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
658  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
659  * "moment" of the LPCs in this filter. */
660  s->dct.dct_calc(&s->dct, lpcs);
661  s->dst.dct_calc(&s->dst, lpcs);
662 
663  /* Split out the coefficient indexes into phase/magnitude pairs */
664  idx = 255 + av_clip(lpcs[64], -255, 255);
665  coeffs[0] = coeffs[0] * s->cos[idx];
666  idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
667  last_coeff = coeffs[64] * s->cos[idx];
668  for (n = 63;; n--) {
669  idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
670  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
671  coeffs[n * 2] = coeffs[n] * s->cos[idx];
672 
673  if (!--n) break;
674 
675  idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
676  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
677  coeffs[n * 2] = coeffs[n] * s->cos[idx];
678  }
679  coeffs[1] = last_coeff;
680 
681  /* move into real domain */
682  s->irdft.rdft_calc(&s->irdft, coeffs);
683 
684  /* tilt correction and normalize scale */
685  memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
686  if (s->denoise_tilt_corr) {
687  float tilt_mem = 0;
688 
689  coeffs[remainder - 1] = 0;
690  ff_tilt_compensation(&tilt_mem,
691  -1.8 * tilt_factor(coeffs, remainder - 1),
692  coeffs, remainder);
693  }
694  sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs,
695  remainder));
696  for (n = 0; n < remainder; n++)
697  coeffs[n] *= sq;
698 }
699 
700 /**
701  * This function applies a Wiener filter on the (noisy) speech signal as
702  * a means to denoise it.
703  *
704  * - take RDFT of LPCs to get the power spectrum of the noise + speech;
705  * - using this power spectrum, calculate (for each frequency) the Wiener
706  * filter gain, which depends on the frequency power and desired level
707  * of noise subtraction (when set too high, this leads to artifacts)
708  * We can do this symmetrically over the X-axis (so 0-4kHz is the inverse
709  * of 4-8kHz);
710  * - by doing a phase shift, calculate the Hilbert transform of this array
711  * of per-frequency filter-gains to get the filtering coefficients;
712  * - smoothen/normalize/de-tilt these filter coefficients as desired;
713  * - take RDFT of noisy sound, apply the coefficients and take its IRDFT
714  * to get the denoised speech signal;
715  * - the leftover (i.e. output of the IRDFT on denoised speech data beyond
716  * the frame boundary) are saved and applied to subsequent frames by an
717  * overlap-add method (otherwise you get clicking-artifacts).
718  *
719  * @param s WMA Voice decoding context
720  * @param fcb_type Frame (codebook) type
721  * @param synth_pf input: the noisy speech signal, output: denoised speech
722  * data; should be 16-byte aligned (for ASM purposes)
723  * @param size size of the speech data
724  * @param lpcs LPCs used to synthesize this frame's speech data
725  */
726 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
727  float *synth_pf, int size,
728  const float *lpcs)
729 {
730  int remainder, lim, n;
731 
732  if (fcb_type != FCB_TYPE_SILENCE) {
733  float *tilted_lpcs = s->tilted_lpcs_pf,
734  *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
735 
736  tilted_lpcs[0] = 1.0;
737  memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
738  memset(&tilted_lpcs[s->lsps + 1], 0,
739  sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
740  ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
741  tilted_lpcs, s->lsps + 2);
742 
743  /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
744  * size is applied to the next frame. All input beyond this is zero,
745  * and thus all output beyond this will go towards zero, hence we can
746  * limit to min(size-1, 127-size) as a performance consideration. */
747  remainder = FFMIN(127 - size, size - 1);
748  calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
749 
750  /* apply coefficients (in frequency spectrum domain), i.e. complex
751  * number multiplication */
752  memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
753  s->rdft.rdft_calc(&s->rdft, synth_pf);
754  s->rdft.rdft_calc(&s->rdft, coeffs);
755  synth_pf[0] *= coeffs[0];
756  synth_pf[1] *= coeffs[1];
757  for (n = 1; n < 64; n++) {
758  float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
759  synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
760  synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
761  }
762  s->irdft.rdft_calc(&s->irdft, synth_pf);
763  }
764 
765  /* merge filter output with the history of previous runs */
766  if (s->denoise_filter_cache_size) {
767  lim = FFMIN(s->denoise_filter_cache_size, size);
768  for (n = 0; n < lim; n++)
769  synth_pf[n] += s->denoise_filter_cache[n];
770  s->denoise_filter_cache_size -= lim;
771  memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
772  sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
773  }
774 
775  /* move remainder of filter output into a cache for future runs */
776  if (fcb_type != FCB_TYPE_SILENCE) {
777  lim = FFMIN(remainder, s->denoise_filter_cache_size);
778  for (n = 0; n < lim; n++)
779  s->denoise_filter_cache[n] += synth_pf[size + n];
780  if (lim < remainder) {
781  memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
782  sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
783  s->denoise_filter_cache_size = remainder;
784  }
785  }
786 }
787 
788 /**
789  * Averaging projection filter, the postfilter used in WMAVoice.
790  *
791  * This uses the following steps:
792  * - A zero-synthesis filter (generate excitation from synth signal)
793  * - Kalman smoothing on excitation, based on pitch
794  * - Re-synthesized smoothened output
795  * - Iterative Wiener denoise filter
796  * - Adaptive gain filter
797  * - DC filter
798  *
799  * @param s WMAVoice decoding context
800  * @param synth Speech synthesis output (before postfilter)
801  * @param samples Output buffer for filtered samples
802  * @param size Buffer size of synth & samples
803  * @param lpcs Generated LPCs used for speech synthesis
804  * @param zero_exc_pf destination for zero synthesis filter (16-byte aligned)
805  * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
806  * @param pitch Pitch of the input signal
807  */
808 static void postfilter(WMAVoiceContext *s, const float *synth,
809  float *samples, int size,
810  const float *lpcs, float *zero_exc_pf,
811  int fcb_type, int pitch)
812 {
813  float synth_filter_in_buf[MAX_FRAMESIZE / 2],
814  *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
815  *synth_filter_in = zero_exc_pf;
816 
817  av_assert0(size <= MAX_FRAMESIZE / 2);
818 
819  /* generate excitation from input signal */
820  ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
821 
822  if (fcb_type >= FCB_TYPE_AW_PULSES &&
823  !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
824  synth_filter_in = synth_filter_in_buf;
825 
826  /* re-synthesize speech after smoothening, and keep history */
827  ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
828  synth_filter_in, size, s->lsps);
829  memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
830  sizeof(synth_pf[0]) * s->lsps);
831 
832  wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
833 
834  adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
835  &s->postfilter_agc);
836 
837  if (s->dc_level > 8) {
838  /* remove ultra-low frequency DC noise / highpass filter;
839  * coefficients are identical to those used in SIPR decoding,
840  * and very closely resemble those used in AMR-NB decoding. */
842  (const float[2]) { -1.99997, 1.0 },
843  (const float[2]) { -1.9330735188, 0.93589198496 },
844  0.93980580475, s->dcf_mem, size);
845  }
846 }
847 /**
848  * @}
849  */
850 
851 /**
852  * Dequantize LSPs
853  * @param lsps output pointer to the array that will hold the LSPs
854  * @param num number of LSPs to be dequantized
855  * @param values quantized values, contains n_stages values
856  * @param sizes range (i.e. max value) of each quantized value
857  * @param n_stages number of dequantization runs
858  * @param table dequantization table to be used
859  * @param mul_q LSF multiplier
860  * @param base_q base (lowest) LSF values
861  */
862 static void dequant_lsps(double *lsps, int num,
863  const uint16_t *values,
864  const uint16_t *sizes,
865  int n_stages, const uint8_t *table,
866  const double *mul_q,
867  const double *base_q)
868 {
869  int n, m;
870 
871  memset(lsps, 0, num * sizeof(*lsps));
872  for (n = 0; n < n_stages; n++) {
873  const uint8_t *t_off = &table[values[n] * num];
874  double base = base_q[n], mul = mul_q[n];
875 
876  for (m = 0; m < num; m++)
877  lsps[m] += base + mul * t_off[m];
878 
879  table += sizes[n] * num;
880  }
881 }
882 
883 /**
884  * @name LSP dequantization routines
885  * LSP dequantization routines, for 10/16LSPs and independent/residual coding.
886  * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;
887  * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
888  * @{
889  */
890 /**
891  * Parse 10 independently-coded LSPs.
892  */
893 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
894 {
895  static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
896  static const double mul_lsf[4] = {
897  5.2187144800e-3, 1.4626986422e-3,
898  9.6179549166e-4, 1.1325736225e-3
899  };
900  static const double base_lsf[4] = {
901  M_PI * -2.15522e-1, M_PI * -6.1646e-2,
902  M_PI * -3.3486e-2, M_PI * -5.7408e-2
903  };
904  uint16_t v[4];
905 
906  v[0] = get_bits(gb, 8);
907  v[1] = get_bits(gb, 6);
908  v[2] = get_bits(gb, 5);
909  v[3] = get_bits(gb, 5);
910 
911  dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
912  mul_lsf, base_lsf);
913 }
914 
915 /**
916  * Parse 10 independently-coded LSPs, and then derive the tables to
917  * generate LSPs for the other frames from them (residual coding).
918  */
920  double *i_lsps, const double *old,
921  double *a1, double *a2, int q_mode)
922 {
923  static const uint16_t vec_sizes[3] = { 128, 64, 64 };
924  static const double mul_lsf[3] = {
925  2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
926  };
927  static const double base_lsf[3] = {
928  M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
929  };
930  const float (*ipol_tab)[2][10] = q_mode ?
932  uint16_t interpol, v[3];
933  int n;
934 
935  dequant_lsp10i(gb, i_lsps);
936 
937  interpol = get_bits(gb, 5);
938  v[0] = get_bits(gb, 7);
939  v[1] = get_bits(gb, 6);
940  v[2] = get_bits(gb, 6);
941 
942  for (n = 0; n < 10; n++) {
943  double delta = old[n] - i_lsps[n];
944  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
945  a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
946  }
947 
948  dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
949  mul_lsf, base_lsf);
950 }
951 
952 /**
953  * Parse 16 independently-coded LSPs.
954  */
955 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
956 {
957  static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
958  static const double mul_lsf[5] = {
959  3.3439586280e-3, 6.9908173703e-4,
960  3.3216608306e-3, 1.0334960326e-3,
961  3.1899104283e-3
962  };
963  static const double base_lsf[5] = {
964  M_PI * -1.27576e-1, M_PI * -2.4292e-2,
965  M_PI * -1.28094e-1, M_PI * -3.2128e-2,
966  M_PI * -1.29816e-1
967  };
968  uint16_t v[5];
969 
970  v[0] = get_bits(gb, 8);
971  v[1] = get_bits(gb, 6);
972  v[2] = get_bits(gb, 7);
973  v[3] = get_bits(gb, 6);
974  v[4] = get_bits(gb, 7);
975 
976  dequant_lsps( lsps, 5, v, vec_sizes, 2,
977  wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
978  dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
979  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
980  dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
981  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
982 }
983 
984 /**
985  * Parse 16 independently-coded LSPs, and then derive the tables to
986  * generate LSPs for the other frames from them (residual coding).
987  */
989  double *i_lsps, const double *old,
990  double *a1, double *a2, int q_mode)
991 {
992  static const uint16_t vec_sizes[3] = { 128, 128, 128 };
993  static const double mul_lsf[3] = {
994  1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
995  };
996  static const double base_lsf[3] = {
997  M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
998  };
999  const float (*ipol_tab)[2][16] = q_mode ?
1001  uint16_t interpol, v[3];
1002  int n;
1003 
1004  dequant_lsp16i(gb, i_lsps);
1005 
1006  interpol = get_bits(gb, 5);
1007  v[0] = get_bits(gb, 7);
1008  v[1] = get_bits(gb, 7);
1009  v[2] = get_bits(gb, 7);
1010 
1011  for (n = 0; n < 16; n++) {
1012  double delta = old[n] - i_lsps[n];
1013  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
1014  a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
1015  }
1016 
1017  dequant_lsps( a2, 10, v, vec_sizes, 1,
1018  wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
1019  dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
1020  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
1021  dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
1022  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
1023 }
1024 
1025 /**
1026  * @}
1027  * @name Pitch-adaptive window coding functions
1028  * The next few functions are for pitch-adaptive window coding.
1029  * @{
1030  */
1031 /**
1032  * Parse the offset of the first pitch-adaptive window pulses, and
1033  * the distribution of pulses between the two blocks in this frame.
1034  * @param s WMA Voice decoding context private data
1035  * @param gb bit I/O context
1036  * @param pitch pitch for each block in this frame
1037  */
1039  const int *pitch)
1040 {
1041  static const int16_t start_offset[94] = {
1042  -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1043  13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1044  27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1045  45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1046  69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1047  93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1048  117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1049  141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1050  };
1051  int bits, offset;
1052 
1053  /* position of pulse */
1054  s->aw_idx_is_ext = 0;
1055  if ((bits = get_bits(gb, 6)) >= 54) {
1056  s->aw_idx_is_ext = 1;
1057  bits += (bits - 54) * 3 + get_bits(gb, 2);
1058  }
1059 
1060  /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
1061  * the distribution of the pulses in each block contained in this frame. */
1062  s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
1063  for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1064  s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
1065  s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
1066  offset += s->aw_n_pulses[0] * pitch[0];
1067  s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
1068  s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
1069 
1070  /* if continuing from a position before the block, reset position to
1071  * start of block (when corrected for the range over which it can be
1072  * spread in aw_pulse_set1()). */
1073  if (start_offset[bits] < MAX_FRAMESIZE / 2) {
1074  while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
1075  s->aw_first_pulse_off[1] -= pitch[1];
1076  if (start_offset[bits] < 0)
1077  while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
1078  s->aw_first_pulse_off[0] -= pitch[0];
1079  }
1080 }
1081 
1082 /**
1083  * Apply second set of pitch-adaptive window pulses.
1084  * @param s WMA Voice decoding context private data
1085  * @param gb bit I/O context
1086  * @param block_idx block index in frame [0, 1]
1087  * @param fcb structure containing fixed codebook vector info
1088  * @return -1 on error, 0 otherwise
1089  */
1091  int block_idx, AMRFixed *fcb)
1092 {
1093  uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
1094  uint16_t *use_mask = use_mask_mem + 2;
1095  /* in this function, idx is the index in the 80-bit (+ padding) use_mask
1096  * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
1097  * of idx are the position of the bit within a particular item in the
1098  * array (0 being the most significant bit, and 15 being the least
1099  * significant bit), and the remainder (>> 4) is the index in the
1100  * use_mask[]-array. This is faster and uses less memory than using a
1101  * 80-byte/80-int array. */
1102  int pulse_off = s->aw_first_pulse_off[block_idx],
1103  pulse_start, n, idx, range, aidx, start_off = 0;
1104 
1105  /* set offset of first pulse to within this block */
1106  if (s->aw_n_pulses[block_idx] > 0)
1107  while (pulse_off + s->aw_pulse_range < 1)
1108  pulse_off += fcb->pitch_lag;
1109 
1110  /* find range per pulse */
1111  if (s->aw_n_pulses[0] > 0) {
1112  if (block_idx == 0) {
1113  range = 32;
1114  } else /* block_idx = 1 */ {
1115  range = 8;
1116  if (s->aw_n_pulses[block_idx] > 0)
1117  pulse_off = s->aw_next_pulse_off_cache;
1118  }
1119  } else
1120  range = 16;
1121  pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1122 
1123  /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
1124  * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
1125  * we exclude that range from being pulsed again in this function. */
1126  memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
1127  memset( use_mask, -1, 5 * sizeof(use_mask[0]));
1128  memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
1129  if (s->aw_n_pulses[block_idx] > 0)
1130  for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
1131  int excl_range = s->aw_pulse_range; // always 16 or 24
1132  uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1133  int first_sh = 16 - (idx & 15);
1134  *use_mask_ptr++ &= 0xFFFFu << first_sh;
1135  excl_range -= first_sh;
1136  if (excl_range >= 16) {
1137  *use_mask_ptr++ = 0;
1138  *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1139  } else
1140  *use_mask_ptr &= 0xFFFF >> excl_range;
1141  }
1142 
1143  /* find the 'aidx'th offset that is not excluded */
1144  aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
1145  for (n = 0; n <= aidx; pulse_start++) {
1146  for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
1147  if (idx >= MAX_FRAMESIZE / 2) { // find from zero
1148  if (use_mask[0]) idx = 0x0F;
1149  else if (use_mask[1]) idx = 0x1F;
1150  else if (use_mask[2]) idx = 0x2F;
1151  else if (use_mask[3]) idx = 0x3F;
1152  else if (use_mask[4]) idx = 0x4F;
1153  else return -1;
1154  idx -= av_log2_16bit(use_mask[idx >> 4]);
1155  }
1156  if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1157  use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1158  n++;
1159  start_off = idx;
1160  }
1161  }
1162 
1163  fcb->x[fcb->n] = start_off;
1164  fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
1165  fcb->n++;
1166 
1167  /* set offset for next block, relative to start of that block */
1168  n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
1169  s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
1170  return 0;
1171 }
1172 
1173 /**
1174  * Apply first set of pitch-adaptive window pulses.
1175  * @param s WMA Voice decoding context private data
1176  * @param gb bit I/O context
1177  * @param block_idx block index in frame [0, 1]
1178  * @param fcb storage location for fixed codebook pulse info
1179  */
1181  int block_idx, AMRFixed *fcb)
1182 {
1183  int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
1184  float v;
1185 
1186  if (s->aw_n_pulses[block_idx] > 0) {
1187  int n, v_mask, i_mask, sh, n_pulses;
1188 
1189  if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
1190  n_pulses = 3;
1191  v_mask = 8;
1192  i_mask = 7;
1193  sh = 4;
1194  } else { // 4 pulses, 1:sign + 2:index each
1195  n_pulses = 4;
1196  v_mask = 4;
1197  i_mask = 3;
1198  sh = 3;
1199  }
1200 
1201  for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1202  fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
1203  fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
1204  s->aw_first_pulse_off[block_idx];
1205  while (fcb->x[fcb->n] < 0)
1206  fcb->x[fcb->n] += fcb->pitch_lag;
1207  if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
1208  fcb->n++;
1209  }
1210  } else {
1211  int num2 = (val & 0x1FF) >> 1, delta, idx;
1212 
1213  if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
1214  else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
1215  else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
1216  else { delta = 7; idx = num2 + 1 - 3 * 75; }
1217  v = (val & 0x200) ? -1.0 : 1.0;
1218 
1219  fcb->no_repeat_mask |= 3 << fcb->n;
1220  fcb->x[fcb->n] = idx - delta;
1221  fcb->y[fcb->n] = v;
1222  fcb->x[fcb->n + 1] = idx;
1223  fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
1224  fcb->n += 2;
1225  }
1226 }
1227 
1228 /**
1229  * @}
1230  *
1231  * Generate a random number from frame_cntr and block_idx, which will live
1232  * in the range [0, 1000 - block_size] (so it can be used as an index in a
1233  * table of size 1000 of which you want to read block_size entries).
1234  *
1235  * @param frame_cntr current frame number
1236  * @param block_num current block index
1237  * @param block_size amount of entries we want to read from a table
1238  * that has 1000 entries
1239  * @return a (non-)random number in the [0, 1000 - block_size] range.
1240  */
1241 static int pRNG(int frame_cntr, int block_num, int block_size)
1242 {
1243  /* array to simplify the calculation of z:
1244  * y = (x % 9) * 5 + 6;
1245  * z = (49995 * x) / y;
1246  * Since y only has 9 values, we can remove the division by using a
1247  * LUT and using FASTDIV-style divisions. For each of the 9 values
1248  * of y, we can rewrite z as:
1249  * z = x * (49995 / y) + x * ((49995 % y) / y)
1250  * In this table, each col represents one possible value of y, the
1251  * first number is 49995 / y, and the second is the FASTDIV variant
1252  * of 49995 % y / y. */
1253  static const unsigned int div_tbl[9][2] = {
1254  { 8332, 3 * 715827883U }, // y = 6
1255  { 4545, 0 * 390451573U }, // y = 11
1256  { 3124, 11 * 268435456U }, // y = 16
1257  { 2380, 15 * 204522253U }, // y = 21
1258  { 1922, 23 * 165191050U }, // y = 26
1259  { 1612, 23 * 138547333U }, // y = 31
1260  { 1388, 27 * 119304648U }, // y = 36
1261  { 1219, 16 * 104755300U }, // y = 41
1262  { 1086, 39 * 93368855U } // y = 46
1263  };
1264  unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
1265  if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,
1266  // so this is effectively a modulo (%)
1267  y = x - 9 * MULH(477218589, x); // x % 9
1268  z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
1269  // z = x * 49995 / (y * 5 + 6)
1270  return z % (1000 - block_size);
1271 }
1272 
1273 /**
1274  * Parse hardcoded signal for a single block.
1275  * @note see #synth_block().
1276  */
1278  int block_idx, int size,
1279  const struct frame_type_desc *frame_desc,
1280  float *excitation)
1281 {
1282  float gain;
1283  int n, r_idx;
1284 
1286 
1287  /* Set the offset from which we start reading wmavoice_std_codebook */
1288  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1289  r_idx = pRNG(s->frame_cntr, block_idx, size);
1290  gain = s->silence_gain;
1291  } else /* FCB_TYPE_HARDCODED */ {
1292  r_idx = get_bits(gb, 8);
1293  gain = wmavoice_gain_universal[get_bits(gb, 6)];
1294  }
1295 
1296  /* Clear gain prediction parameters */
1297  memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
1298 
1299  /* Apply gain to hardcoded codebook and use that as excitation signal */
1300  for (n = 0; n < size; n++)
1301  excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
1302 }
1303 
1304 /**
1305  * Parse FCB/ACB signal for a single block.
1306  * @note see #synth_block().
1307  */
1309  int block_idx, int size,
1310  int block_pitch_sh2,
1311  const struct frame_type_desc *frame_desc,
1312  float *excitation)
1313 {
1314  static const float gain_coeff[6] = {
1315  0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1316  };
1317  float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1318  int n, idx, gain_weight;
1319  AMRFixed fcb;
1320 
1321  av_assert0(size <= MAX_FRAMESIZE / 2);
1322  memset(pulses, 0, sizeof(*pulses) * size);
1323 
1324  fcb.pitch_lag = block_pitch_sh2 >> 2;
1325  fcb.pitch_fac = 1.0;
1326  fcb.no_repeat_mask = 0;
1327  fcb.n = 0;
1328 
1329  /* For the other frame types, this is where we apply the innovation
1330  * (fixed) codebook pulses of the speech signal. */
1331  if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1332  aw_pulse_set1(s, gb, block_idx, &fcb);
1333  if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
1334  /* Conceal the block with silence and return.
1335  * Skip the correct amount of bits to read the next
1336  * block from the correct offset. */
1337  int r_idx = pRNG(s->frame_cntr, block_idx, size);
1338 
1339  for (n = 0; n < size; n++)
1340  excitation[n] =
1341  wmavoice_std_codebook[r_idx + n] * s->silence_gain;
1342  skip_bits(gb, 7 + 1);
1343  return;
1344  }
1345  } else /* FCB_TYPE_EXC_PULSES */ {
1346  int offset_nbits = 5 - frame_desc->log_n_blocks;
1347 
1348  fcb.no_repeat_mask = -1;
1349  /* similar to ff_decode_10_pulses_35bits(), but with single pulses
1350  * (instead of double) for a subset of pulses */
1351  for (n = 0; n < 5; n++) {
1352  float sign;
1353  int pos1, pos2;
1354 
1355  sign = get_bits1(gb) ? 1.0 : -1.0;
1356  pos1 = get_bits(gb, offset_nbits);
1357  fcb.x[fcb.n] = n + 5 * pos1;
1358  fcb.y[fcb.n++] = sign;
1359  if (n < frame_desc->dbl_pulses) {
1360  pos2 = get_bits(gb, offset_nbits);
1361  fcb.x[fcb.n] = n + 5 * pos2;
1362  fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
1363  }
1364  }
1365  }
1366  ff_set_fixed_vector(pulses, &fcb, 1.0, size);
1367 
1368  /* Calculate gain for adaptive & fixed codebook signal.
1369  * see ff_amr_set_fixed_gain(). */
1370  idx = get_bits(gb, 7);
1371  fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
1372  gain_coeff, 6) -
1373  5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
1374  acb_gain = wmavoice_gain_codebook_acb[idx];
1375  pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
1376  -2.9957322736 /* log(0.05) */,
1377  1.6094379124 /* log(5.0) */);
1378 
1379  gain_weight = 8 >> frame_desc->log_n_blocks;
1380  memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
1381  sizeof(*s->gain_pred_err) * (6 - gain_weight));
1382  for (n = 0; n < gain_weight; n++)
1383  s->gain_pred_err[n] = pred_err;
1384 
1385  /* Calculation of adaptive codebook */
1386  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1387  int len;
1388  for (n = 0; n < size; n += len) {
1389  int next_idx_sh16;
1390  int abs_idx = block_idx * size + n;
1391  int pitch_sh16 = (s->last_pitch_val << 16) +
1392  s->pitch_diff_sh16 * abs_idx;
1393  int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1394  int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1395  idx = idx_sh16 >> 16;
1396  if (s->pitch_diff_sh16) {
1397  if (s->pitch_diff_sh16 > 0) {
1398  next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1399  } else
1400  next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1401  len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
1402  1, size - n);
1403  } else
1404  len = size;
1405 
1406  ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
1408  idx, 9, len);
1409  }
1410  } else /* ACB_TYPE_HAMMING */ {
1411  int block_pitch = block_pitch_sh2 >> 2;
1412  idx = block_pitch_sh2 & 3;
1413  if (idx) {
1414  ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
1416  idx, 8, size);
1417  } else
1418  av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
1419  sizeof(float) * size);
1420  }
1421 
1422  /* Interpolate ACB/FCB and use as excitation signal */
1423  ff_weighted_vector_sumf(excitation, excitation, pulses,
1424  acb_gain, fcb_gain, size);
1425 }
1426 
1427 /**
1428  * Parse data in a single block.
1429  *
1430  * @param s WMA Voice decoding context private data
1431  * @param gb bit I/O context
1432  * @param block_idx index of the to-be-read block
1433  * @param size amount of samples to be read in this block
1434  * @param block_pitch_sh2 pitch for this block << 2
1435  * @param lsps LSPs for (the end of) this frame
1436  * @param prev_lsps LSPs for the last frame
1437  * @param frame_desc frame type descriptor
1438  * @param excitation target memory for the ACB+FCB interpolated signal
1439  * @param synth target memory for the speech synthesis filter output
1440  * @return 0 on success, <0 on error.
1441  */
1443  int block_idx, int size,
1444  int block_pitch_sh2,
1445  const double *lsps, const double *prev_lsps,
1446  const struct frame_type_desc *frame_desc,
1447  float *excitation, float *synth)
1448 {
1449  double i_lsps[MAX_LSPS];
1450  float lpcs[MAX_LSPS];
1451  float fac;
1452  int n;
1453 
1454  if (frame_desc->acb_type == ACB_TYPE_NONE)
1455  synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
1456  else
1457  synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
1458  frame_desc, excitation);
1459 
1460  /* convert interpolated LSPs to LPCs */
1461  fac = (block_idx + 0.5) / frame_desc->n_blocks;
1462  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1463  i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1464  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1465 
1466  /* Speech synthesis */
1467  ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
1468 }
1469 
1470 /**
1471  * Synthesize output samples for a single frame.
1472  *
1473  * @param ctx WMA Voice decoder context
1474  * @param gb bit I/O context (s->gb or one for cross-packet superframes)
1475  * @param frame_idx Frame number within superframe [0-2]
1476  * @param samples pointer to output sample buffer, has space for at least 160
1477  * samples
1478  * @param lsps LSP array
1479  * @param prev_lsps array of previous frame's LSPs
1480  * @param excitation target buffer for excitation signal
1481  * @param synth target buffer for synthesized speech data
1482  * @return 0 on success, <0 on error.
1483  */
1484 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
1485  float *samples,
1486  const double *lsps, const double *prev_lsps,
1487  float *excitation, float *synth)
1488 {
1490  int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);
1491  int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);
1492 
1493  /* Parse frame type ("frame header"), see frame_descs */
1494  int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
1495 
1496  if (bd_idx < 0) {
1498  "Invalid frame type VLC code, skipping\n");
1499  return AVERROR_INVALIDDATA;
1500  }
1501 
1502  block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
1503 
1504  /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
1505  if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
1506  /* Pitch is provided per frame, which is interpreted as the pitch of
1507  * the last sample of the last block of this frame. We can interpolate
1508  * the pitch of other blocks (and even pitch-per-sample) by gradually
1509  * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
1510  n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
1511  log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
1512  cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
1513  cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
1514  if (s->last_acb_type == ACB_TYPE_NONE ||
1515  20 * abs(cur_pitch_val - s->last_pitch_val) >
1516  (cur_pitch_val + s->last_pitch_val))
1517  s->last_pitch_val = cur_pitch_val;
1518 
1519  /* pitch per block */
1520  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1521  int fac = n * 2 + 1;
1522 
1523  pitch[n] = (MUL16(fac, cur_pitch_val) +
1524  MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
1525  frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
1526  }
1527 
1528  /* "pitch-diff-per-sample" for calculation of pitch per sample */
1529  s->pitch_diff_sh16 =
1530  (cur_pitch_val - s->last_pitch_val) * (1 << 16) / MAX_FRAMESIZE;
1531  }
1532 
1533  /* Global gain (if silence) and pitch-adaptive window coordinates */
1534  switch (frame_descs[bd_idx].fcb_type) {
1535  case FCB_TYPE_SILENCE:
1536  s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
1537  break;
1538  case FCB_TYPE_AW_PULSES:
1539  aw_parse_coords(s, gb, pitch);
1540  break;
1541  }
1542 
1543  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1544  int bl_pitch_sh2;
1545 
1546  /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
1547  switch (frame_descs[bd_idx].acb_type) {
1548  case ACB_TYPE_HAMMING: {
1549  /* Pitch is given per block. Per-block pitches are encoded as an
1550  * absolute value for the first block, and then delta values
1551  * relative to this value) for all subsequent blocks. The scale of
1552  * this pitch value is semi-logarithmic compared to its use in the
1553  * decoder, so we convert it to normal scale also. */
1554  int block_pitch,
1555  t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
1556  t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
1557  t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
1558 
1559  if (n == 0) {
1560  block_pitch = get_bits(gb, s->block_pitch_nbits);
1561  } else
1562  block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
1563  get_bits(gb, s->block_delta_pitch_nbits);
1564  /* Convert last_ so that any next delta is within _range */
1565  last_block_pitch = av_clip(block_pitch,
1566  s->block_delta_pitch_hrange,
1567  s->block_pitch_range -
1568  s->block_delta_pitch_hrange);
1569 
1570  /* Convert semi-log-style scale back to normal scale */
1571  if (block_pitch < t1) {
1572  bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
1573  } else {
1574  block_pitch -= t1;
1575  if (block_pitch < t2) {
1576  bl_pitch_sh2 =
1577  (s->block_conv_table[1] << 2) + (block_pitch << 1);
1578  } else {
1579  block_pitch -= t2;
1580  if (block_pitch < t3) {
1581  bl_pitch_sh2 =
1582  (s->block_conv_table[2] + block_pitch) << 2;
1583  } else
1584  bl_pitch_sh2 = s->block_conv_table[3] << 2;
1585  }
1586  }
1587  pitch[n] = bl_pitch_sh2 >> 2;
1588  break;
1589  }
1590 
1591  case ACB_TYPE_ASYMMETRIC: {
1592  bl_pitch_sh2 = pitch[n] << 2;
1593  break;
1594  }
1595 
1596  default: // ACB_TYPE_NONE has no pitch
1597  bl_pitch_sh2 = 0;
1598  break;
1599  }
1600 
1601  synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1602  lsps, prev_lsps, &frame_descs[bd_idx],
1603  &excitation[n * block_nsamples],
1604  &synth[n * block_nsamples]);
1605  }
1606 
1607  /* Averaging projection filter, if applicable. Else, just copy samples
1608  * from synthesis buffer */
1609  if (s->do_apf) {
1610  double i_lsps[MAX_LSPS];
1611  float lpcs[MAX_LSPS];
1612 
1613  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1614  i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1615  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1616  postfilter(s, synth, samples, 80, lpcs,
1617  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1618  frame_descs[bd_idx].fcb_type, pitch[0]);
1619 
1620  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1621  i_lsps[n] = cos(lsps[n]);
1622  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1623  postfilter(s, &synth[80], &samples[80], 80, lpcs,
1624  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1625  frame_descs[bd_idx].fcb_type, pitch[0]);
1626  } else
1627  memcpy(samples, synth, 160 * sizeof(synth[0]));
1628 
1629  /* Cache values for next frame */
1630  s->frame_cntr++;
1631  if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
1632  s->last_acb_type = frame_descs[bd_idx].acb_type;
1633  switch (frame_descs[bd_idx].acb_type) {
1634  case ACB_TYPE_NONE:
1635  s->last_pitch_val = 0;
1636  break;
1637  case ACB_TYPE_ASYMMETRIC:
1638  s->last_pitch_val = cur_pitch_val;
1639  break;
1640  case ACB_TYPE_HAMMING:
1641  s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
1642  break;
1643  }
1644 
1645  return 0;
1646 }
1647 
1648 /**
1649  * Ensure minimum value for first item, maximum value for last value,
1650  * proper spacing between each value and proper ordering.
1651  *
1652  * @param lsps array of LSPs
1653  * @param num size of LSP array
1654  *
1655  * @note basically a double version of #ff_acelp_reorder_lsf(), might be
1656  * useful to put in a generic location later on. Parts are also
1657  * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),
1658  * which is in float.
1659  */
1660 static void stabilize_lsps(double *lsps, int num)
1661 {
1662  int n, m, l;
1663 
1664  /* set minimum value for first, maximum value for last and minimum
1665  * spacing between LSF values.
1666  * Very similar to ff_set_min_dist_lsf(), but in double. */
1667  lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
1668  for (n = 1; n < num; n++)
1669  lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
1670  lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
1671 
1672  /* reorder (looks like one-time / non-recursed bubblesort).
1673  * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
1674  for (n = 1; n < num; n++) {
1675  if (lsps[n] < lsps[n - 1]) {
1676  for (m = 1; m < num; m++) {
1677  double tmp = lsps[m];
1678  for (l = m - 1; l >= 0; l--) {
1679  if (lsps[l] <= tmp) break;
1680  lsps[l + 1] = lsps[l];
1681  }
1682  lsps[l + 1] = tmp;
1683  }
1684  break;
1685  }
1686  }
1687 }
1688 
1689 /**
1690  * Synthesize output samples for a single superframe. If we have any data
1691  * cached in s->sframe_cache, that will be used instead of whatever is loaded
1692  * in s->gb.
1693  *
1694  * WMA Voice superframes contain 3 frames, each containing 160 audio samples,
1695  * to give a total of 480 samples per frame. See #synth_frame() for frame
1696  * parsing. In addition to 3 frames, superframes can also contain the LSPs
1697  * (if these are globally specified for all frames (residually); they can
1698  * also be specified individually per-frame. See the s->has_residual_lsps
1699  * option), and can specify the number of samples encoded in this superframe
1700  * (if less than 480), usually used to prevent blanks at track boundaries.
1701  *
1702  * @param ctx WMA Voice decoder context
1703  * @return 0 on success, <0 on error or 1 if there was not enough data to
1704  * fully parse the superframe
1705  */
1707  int *got_frame_ptr)
1708 {
1710  GetBitContext *gb = &s->gb, s_gb;
1711  int n, res, n_samples = MAX_SFRAMESIZE;
1712  double lsps[MAX_FRAMES][MAX_LSPS];
1713  const double *mean_lsf = s->lsps == 16 ?
1714  wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
1715  float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
1716  float synth[MAX_LSPS + MAX_SFRAMESIZE];
1717  float *samples;
1718 
1719  memcpy(synth, s->synth_history,
1720  s->lsps * sizeof(*synth));
1721  memcpy(excitation, s->excitation_history,
1722  s->history_nsamples * sizeof(*excitation));
1723 
1724  if (s->sframe_cache_size > 0) {
1725  gb = &s_gb;
1726  init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
1727  s->sframe_cache_size = 0;
1728  }
1729 
1730  /* First bit is speech/music bit, it differentiates between WMAVoice
1731  * speech samples (the actual codec) and WMAVoice music samples, which
1732  * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
1733  * the wild yet. */
1734  if (!get_bits1(gb)) {
1735  avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
1736  return AVERROR_PATCHWELCOME;
1737  }
1738 
1739  /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
1740  if (get_bits1(gb)) {
1741  if ((n_samples = get_bits(gb, 12)) > MAX_SFRAMESIZE) {
1743  "Superframe encodes > %d samples (%d), not allowed\n",
1744  MAX_SFRAMESIZE, n_samples);
1745  return AVERROR_INVALIDDATA;
1746  }
1747  }
1748 
1749  /* Parse LSPs, if global for the superframe (can also be per-frame). */
1750  if (s->has_residual_lsps) {
1751  double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
1752 
1753  for (n = 0; n < s->lsps; n++)
1754  prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
1755 
1756  if (s->lsps == 10) {
1757  dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1758  } else /* s->lsps == 16 */
1759  dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1760 
1761  for (n = 0; n < s->lsps; n++) {
1762  lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1763  lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
1764  lsps[2][n] += mean_lsf[n];
1765  }
1766  for (n = 0; n < 3; n++)
1767  stabilize_lsps(lsps[n], s->lsps);
1768  }
1769 
1770  /* synth_superframe can run multiple times per packet
1771  * free potential previous frame */
1773 
1774  /* get output buffer */
1775  frame->nb_samples = MAX_SFRAMESIZE;
1776  if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
1777  return res;
1778  frame->nb_samples = n_samples;
1779  samples = (float *)frame->data[0];
1780 
1781  /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
1782  for (n = 0; n < 3; n++) {
1783  if (!s->has_residual_lsps) {
1784  int m;
1785 
1786  if (s->lsps == 10) {
1787  dequant_lsp10i(gb, lsps[n]);
1788  } else /* s->lsps == 16 */
1789  dequant_lsp16i(gb, lsps[n]);
1790 
1791  for (m = 0; m < s->lsps; m++)
1792  lsps[n][m] += mean_lsf[m];
1793  stabilize_lsps(lsps[n], s->lsps);
1794  }
1795 
1796  if ((res = synth_frame(ctx, gb, n,
1797  &samples[n * MAX_FRAMESIZE],
1798  lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1799  &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
1800  &synth[s->lsps + n * MAX_FRAMESIZE]))) {
1801  *got_frame_ptr = 0;
1802  return res;
1803  }
1804  }
1805 
1806  /* Statistics? FIXME - we don't check for length, a slight overrun
1807  * will be caught by internal buffer padding, and anything else
1808  * will be skipped, not read. */
1809  if (get_bits1(gb)) {
1810  res = get_bits(gb, 4);
1811  skip_bits(gb, 10 * (res + 1));
1812  }
1813 
1814  if (get_bits_left(gb) < 0) {
1816  return AVERROR_INVALIDDATA;
1817  }
1818 
1819  *got_frame_ptr = 1;
1820 
1821  /* Update history */
1822  memcpy(s->prev_lsps, lsps[2],
1823  s->lsps * sizeof(*s->prev_lsps));
1824  memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
1825  s->lsps * sizeof(*synth));
1826  memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1827  s->history_nsamples * sizeof(*excitation));
1828  if (s->do_apf)
1829  memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
1830  s->history_nsamples * sizeof(*s->zero_exc_pf));
1831 
1832  return 0;
1833 }
1834 
1835 /**
1836  * Parse the packet header at the start of each packet (input data to this
1837  * decoder).
1838  *
1839  * @param s WMA Voice decoding context private data
1840  * @return <0 on error, nb_superframes on success.
1841  */
1843 {
1844  GetBitContext *gb = &s->gb;
1845  unsigned int res, n_superframes = 0;
1846 
1847  skip_bits(gb, 4); // packet sequence number
1848  s->has_residual_lsps = get_bits1(gb);
1849  do {
1850  if (get_bits_left(gb) < 6 + s->spillover_bitsize)
1851  return AVERROR_INVALIDDATA;
1852 
1853  res = get_bits(gb, 6); // number of superframes per packet
1854  // (minus first one if there is spillover)
1855  n_superframes += res;
1856  } while (res == 0x3F);
1857  s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
1858 
1859  return get_bits_left(gb) >= 0 ? n_superframes : AVERROR_INVALIDDATA;
1860 }
1861 
1862 /**
1863  * Copy (unaligned) bits from gb/data/size to pb.
1864  *
1865  * @param pb target buffer to copy bits into
1866  * @param data source buffer to copy bits from
1867  * @param size size of the source data, in bytes
1868  * @param gb bit I/O context specifying the current position in the source.
1869  * data. This function might use this to align the bit position to
1870  * a whole-byte boundary before calling #ff_copy_bits() on aligned
1871  * source data
1872  * @param nbits the amount of bits to copy from source to target
1873  *
1874  * @note after calling this function, the current position in the input bit
1875  * I/O context is undefined.
1876  */
1877 static void copy_bits(PutBitContext *pb,
1878  const uint8_t *data, int size,
1879  GetBitContext *gb, int nbits)
1880 {
1881  int rmn_bytes, rmn_bits;
1882 
1883  rmn_bits = rmn_bytes = get_bits_left(gb);
1884  if (rmn_bits < nbits)
1885  return;
1886  if (nbits > put_bits_left(pb))
1887  return;
1888  rmn_bits &= 7; rmn_bytes >>= 3;
1889  if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
1890  put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
1891  ff_copy_bits(pb, data + size - rmn_bytes,
1892  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1893 }
1894 
1895 /**
1896  * Packet decoding: a packet is anything that the (ASF) demuxer contains,
1897  * and we expect that the demuxer / application provides it to us as such
1898  * (else you'll probably get garbage as output). Every packet has a size of
1899  * ctx->block_align bytes, starts with a packet header (see
1900  * #parse_packet_header()), and then a series of superframes. Superframe
1901  * boundaries may exceed packets, i.e. superframes can split data over
1902  * multiple (two) packets.
1903  *
1904  * For more information about frames, see #synth_superframe().
1905  */
1907  int *got_frame_ptr, AVPacket *avpkt)
1908 {
1910  GetBitContext *gb = &s->gb;
1911  int size, res, pos;
1912 
1913  /* Packets are sometimes a multiple of ctx->block_align, with a packet
1914  * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
1915  * feeds us ASF packets, which may concatenate multiple "codec" packets
1916  * in a single "muxer" packet, so we artificially emulate that by
1917  * capping the packet size at ctx->block_align. */
1918  for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
1919  init_get_bits8(&s->gb, avpkt->data, size);
1920 
1921  /* size == ctx->block_align is used to indicate whether we are dealing with
1922  * a new packet or a packet of which we already read the packet header
1923  * previously. */
1924  if (!(size % ctx->block_align)) { // new packet header
1925  if (!size) {
1926  s->spillover_nbits = 0;
1927  s->nb_superframes = 0;
1928  } else {
1929  if ((res = parse_packet_header(s)) < 0)
1930  return res;
1931  s->nb_superframes = res;
1932  }
1933 
1934  /* If the packet header specifies a s->spillover_nbits, then we want
1935  * to push out all data of the previous packet (+ spillover) before
1936  * continuing to parse new superframes in the current packet. */
1937  if (s->sframe_cache_size > 0) {
1938  int cnt = get_bits_count(gb);
1939  if (cnt + s->spillover_nbits > avpkt->size * 8) {
1940  s->spillover_nbits = avpkt->size * 8 - cnt;
1941  }
1942  copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
1943  flush_put_bits(&s->pb);
1944  s->sframe_cache_size += s->spillover_nbits;
1945  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) == 0 &&
1946  *got_frame_ptr) {
1947  cnt += s->spillover_nbits;
1948  s->skip_bits_next = cnt & 7;
1949  res = cnt >> 3;
1950  return res;
1951  } else
1952  skip_bits_long (gb, s->spillover_nbits - cnt +
1953  get_bits_count(gb)); // resync
1954  } else if (s->spillover_nbits) {
1955  skip_bits_long(gb, s->spillover_nbits); // resync
1956  }
1957  } else if (s->skip_bits_next)
1958  skip_bits(gb, s->skip_bits_next);
1959 
1960  /* Try parsing superframes in current packet */
1961  s->sframe_cache_size = 0;
1962  s->skip_bits_next = 0;
1963  pos = get_bits_left(gb);
1964  if (s->nb_superframes-- == 0) {
1965  *got_frame_ptr = 0;
1966  return size;
1967  } else if (s->nb_superframes > 0) {
1968  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) < 0) {
1969  return res;
1970  } else if (*got_frame_ptr) {
1971  int cnt = get_bits_count(gb);
1972  s->skip_bits_next = cnt & 7;
1973  res = cnt >> 3;
1974  return res;
1975  }
1976  } else if ((s->sframe_cache_size = pos) > 0) {
1977  /* ... cache it for spillover in next packet */
1978  init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
1979  copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
1980  // FIXME bad - just copy bytes as whole and add use the
1981  // skip_bits_next field
1982  }
1983 
1984  return size;
1985 }
1986 
1988 {
1990 
1991  if (s->do_apf) {
1992  ff_rdft_end(&s->rdft);
1993  ff_rdft_end(&s->irdft);
1994  ff_dct_end(&s->dct);
1995  ff_dct_end(&s->dst);
1996  }
1997 
1998  return 0;
1999 }
2000 
2002  .p.name = "wmavoice",
2003  .p.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
2004  .p.type = AVMEDIA_TYPE_AUDIO,
2005  .p.id = AV_CODEC_ID_WMAVOICE,
2006  .priv_data_size = sizeof(WMAVoiceContext),
2008  .close = wmavoice_decode_end,
2011  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
2012  .flush = wmavoice_flush,
2013 };
WMAVoiceContext::has_residual_lsps
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
Definition: wmavoice.c:193
skip_bits_long
static void skip_bits_long(GetBitContext *s, int n)
Skips the specified number of bits.
Definition: get_bits.h:291
AMRFixed::x
int x[10]
Definition: acelp_vectors.h:55
DCT_I
@ DCT_I
Definition: avfft.h:96
wmavoice_std_codebook
static const float wmavoice_std_codebook[1000]
Definition: wmavoice_data.h:2585
interpol
static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)
Definition: vsrc_mandelbrot.c:186
MAX_LSPS
#define MAX_LSPS
maximum filter order
Definition: wmavoice.c:49
WMAVoiceContext::aw_next_pulse_off_cache
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned,...
Definition: wmavoice.c:242
WMAVoiceContext::max_pitch_val
int max_pitch_val
max value + 1 for pitch parsing
Definition: wmavoice.c:165
av_clip
#define av_clip
Definition: common.h:95
aw_pulse_set2
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
Definition: wmavoice.c:1090
FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:41
acelp_vectors.h
get_bits_left
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:839
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
FCB_TYPE_HARDCODED
@ FCB_TYPE_HARDCODED
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:88
wmavoice_dq_lsp10i
static const uint8_t wmavoice_dq_lsp10i[0xf00]
Definition: wmavoice_data.h:33
mem_internal.h
ACB_TYPE_HAMMING
@ ACB_TYPE_HAMMING
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:76
out
FILE * out
Definition: movenc.c:54
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:262
thread.h
frame_descs
static const struct frame_type_desc frame_descs[17]
INIT_VLC_STATIC
#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size)
Definition: vlc.h:125
rdft.h
wmavoice_dq_lsp16r3
static const uint8_t wmavoice_dq_lsp16r3[0x600]
Definition: wmavoice_data.h:1526
dequant_lsps
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
Definition: wmavoice.c:862
init_put_bits
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:62
WMAVoiceContext::excitation_history
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation
Definition: wmavoice.c:252
get_bits_count
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:219
av_log2_16bit
int av_log2_16bit(unsigned v)
Definition: intmath.c:31
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:325
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:221
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
aw_pulse_set1
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
Definition: wmavoice.c:1180
ff_acelp_apply_order_2_transfer_function
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
Definition: acelp_filters.c:121
WMAVoiceContext::irdft
RDFTContext irdft
contexts for FFT-calculation in the postfilter (for denoise filter)
Definition: wmavoice.c:266
internal.h
AVPacket::data
uint8_t * data
Definition: packet.h:374
pRNG
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will live in the range [0,...
Definition: wmavoice.c:1241
ff_wmavoice_decoder
const FFCodec ff_wmavoice_decoder
Definition: wmavoice.c:2001
table
static const uint16_t table[]
Definition: prosumer.c:206
data
const char data[16]
Definition: mxf.c:146
WMAVoiceContext::silence_gain
float silence_gain
set for use in blocks if ACB_TYPE_NONE
Definition: wmavoice.c:227
expf
#define expf(x)
Definition: libm.h:283
WMAVoiceContext::denoise_filter_cache_size
int denoise_filter_cache_size
samples in denoise_filter_cache
Definition: wmavoice.c:279
wmavoice_denoise_power_table
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
Definition: wmavoice_data.h:3064
AV_CHANNEL_LAYOUT_MONO
#define AV_CHANNEL_LAYOUT_MONO
Definition: channel_layout.h:353
wmavoice_gain_codebook_acb
static const float wmavoice_gain_codebook_acb[128]
Definition: wmavoice_data.h:2874
FFCodec
Definition: codec_internal.h:118
base
uint8_t base
Definition: vp3data.h:141
t1
#define t1
Definition: regdef.h:29
max
#define max(a, b)
Definition: cuda_runtime.h:33
FCB_TYPE_EXC_PULSES
@ FCB_TYPE_EXC_PULSES
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.
Definition: wmavoice.c:92
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_celp_lp_synthesis_filterf
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.c:85
WMAVoiceContext::aw_idx_is_ext
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
Definition: wmavoice.c:229
ACB_TYPE_ASYMMETRIC
@ ACB_TYPE_ASYMMETRIC
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:71
init_get_bits
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:649
WMAVoiceContext::dc_level
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
Definition: wmavoice.c:156
wmavoice_dq_lsp16i1
static const uint8_t wmavoice_dq_lsp16i1[0x640]
Definition: wmavoice_data.h:420
WMAVoiceContext::block_conv_table
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
Definition: wmavoice.c:177
frame_type_desc::log_n_blocks
uint8_t log_n_blocks
log2(n_blocks)
Definition: wmavoice.c:103
init
static int init
Definition: av_tx.c:47
skip_bits
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:467
WMAVoiceContext::aw_pulse_range
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
Definition: wmavoice.c:231
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:379
ff_copy_bits
void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
Definition: bitstream.c:49
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:122
av_ceil_log2
#define av_ceil_log2
Definition: common.h:92
WMAVoiceContext::tilted_lpcs_pf
float tilted_lpcs_pf[0x80]
aligned buffer for LPC tilting
Definition: wmavoice.c:280
AMRFixed::pitch_fac
float pitch_fac
Definition: acelp_vectors.h:59
GetBitContext
Definition: get_bits.h:61
MULH
#define MULH
Definition: mathops.h:42
wmavoice_flush
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:337
put_bits_left
static int put_bits_left(PutBitContext *s)
Definition: put_bits.h:125
IDFT_C2R
@ IDFT_C2R
Definition: avfft.h:73
ff_rdft_end
av_cold void ff_rdft_end(RDFTContext *s)
Definition: rdft.c:117
calc_input_response
static void calc_input_response(WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
Definition: wmavoice.c:607
frame_type_desc::n_blocks
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Definition: wmavoice.c:101
val
static double val(void *priv, double ch)
Definition: aeval.c:77
dequant_lsp10i
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
Definition: wmavoice.c:893
synth_block
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
Definition: wmavoice.c:1442
MAX_SFRAMESIZE
#define MAX_SFRAMESIZE
maximum number of samples per superframe
Definition: wmavoice.c:55
wmavoice_gain_codebook_fcb
static const float wmavoice_gain_codebook_fcb[128]
Definition: wmavoice_data.h:2893
WMAVoiceContext::denoise_filter_cache
float denoise_filter_cache[MAX_FRAMESIZE]
Definition: wmavoice.c:278
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
WMAVoiceContext::sin
float sin[511]
Definition: wmavoice.c:270
a1
#define a1
Definition: regdef.h:47
AV_CODEC_ID_WMAVOICE
@ AV_CODEC_ID_WMAVOICE
Definition: codec_id.h:465
lrint
#define lrint
Definition: tablegen.h:53
MUL16
#define MUL16(ra, rb)
Definition: mathops.h:88
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:179
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
av_cold
#define av_cold
Definition: attributes.h:90
dct.h
init_get_bits8
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:667
MAX_LSPS_ALIGN16
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
Definition: wmavoice.c:50
av_memcpy_backptr
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
Overlapping memcpy() implementation.
Definition: mem.c:455
float
float
Definition: af_crystalizer.c:122
wmavoice_dq_lsp10r
static const uint8_t wmavoice_dq_lsp10r[0x1400]
Definition: wmavoice_data.h:749
FF_CODEC_DECODE_CB
#define FF_CODEC_DECODE_CB(func)
Definition: codec_internal.h:260
WMAVoiceContext::sframe_cache_size
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
Definition: wmavoice.c:205
s
#define s(width, name)
Definition: cbs_vp9.c:256
WMAVoiceContext::lsp_q_mode
int lsp_q_mode
defines quantizer defaults [0, 1]
Definition: wmavoice.c:160
frame_type_desc::fcb_type
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
Definition: wmavoice.c:105
log_range
#define log_range(var, assign)
WMAVoiceContext::prev_lsps
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
Definition: wmavoice.c:221
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
WMAVoiceContext::aw_n_pulses
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
Definition: wmavoice.c:237
AMRFixed
Sparse representation for the algebraic codebook (fixed) vector.
Definition: acelp_vectors.h:53
bits
uint8_t bits
Definition: vp3data.h:141
adaptive_gain_control
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
Definition: wmavoice.c:507
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
wmavoice_lsp16_intercoeff_a
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Definition: wmavoice_data.h:2047
ctx
AVFormatContext * ctx
Definition: movenc.c:48
get_bits.h
wmavoice_mean_lsf10
static const double wmavoice_mean_lsf10[2][10]
Definition: wmavoice_data.h:2565
WMAVoiceContext::spillover_nbits
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
Definition: wmavoice.c:189
UMULH
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
Definition: mathops.h:68
AMRFixed::y
float y[10]
Definition: acelp_vectors.h:56
wmavoice_gain_silence
static const float wmavoice_gain_silence[256]
Definition: wmavoice_data.h:2788
PutBitContext
Definition: put_bits.h:50
WMAVoiceContext::vbm_tree
int8_t vbm_tree[25]
converts VLC codes to frame type
Definition: wmavoice.c:141
mul
static float mul(float src0, float src1)
Definition: dnn_backend_native_layer_mathbinary.c:39
wmavoice_dq_lsp16i3
static const uint8_t wmavoice_dq_lsp16i3[0x300]
Definition: wmavoice_data.h:682
if
if(ret)
Definition: filter_design.txt:179
AMRFixed::no_repeat_mask
int no_repeat_mask
Definition: acelp_vectors.h:57
postfilter
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
Definition: wmavoice.c:808
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:177
NULL
#define NULL
Definition: coverity.c:32
sizes
static const int sizes[][2]
Definition: img2dec.c:57
WMAVoiceContext::history_nsamples
int history_nsamples
number of samples in history for signal prediction (through ACB)
Definition: wmavoice.c:146
WMAVoiceContext::synth_history
float synth_history[MAX_LSPS]
see excitation_history
Definition: wmavoice.c:256
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:64
last_coeff
static const uint8_t last_coeff[3]
Definition: qdm2data.h:187
ACB_TYPE_NONE
@ ACB_TYPE_NONE
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:70
WMAVoiceContext::denoise_strength
int denoise_strength
strength of denoising in Wiener filter [0-11]
Definition: wmavoice.c:152
MAX_SIGNAL_HISTORY
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
Definition: wmavoice.c:54
WMAVoiceContext::sframe_cache
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
Definition: wmavoice.c:202
get_bits1
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:498
dequant_lsp10r
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:919
WMAVoiceContext::pitch_nbits
int pitch_nbits
number of bits used to specify the pitch value in the frame header
Definition: wmavoice.c:166
WMAVoiceContext::block_delta_pitch_nbits
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value,...
Definition: wmavoice.c:171
kalman_smoothen
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
Definition: wmavoice.c:548
WMAVoiceContext::denoise_coeffs_pf
float denoise_coeffs_pf[0x80]
aligned buffer for denoise coefficients
Definition: wmavoice.c:282
WMAVoiceContext::skip_bits_next
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
Definition: wmavoice.c:198
DFT_R2C
@ DFT_R2C
Definition: avfft.h:72
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
abs
#define abs(x)
Definition: cuda_runtime.h:35
celp_filters.h
MAX_FRAMESIZE
#define MAX_FRAMESIZE
maximum number of samples per frame
Definition: wmavoice.c:53
av_clipf
av_clipf
Definition: af_crystalizer.c:122
MAX_FRAMES
#define MAX_FRAMES
maximum number of frames per superframe
Definition: wmavoice.c:52
get_vlc2
static av_always_inline int get_vlc2(GetBitContext *s, const VLCElem *table, int bits, int max_depth)
Parse a vlc code.
Definition: get_bits.h:787
decode_vbmtree
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
Definition: wmavoice.c:301
AVOnce
#define AVOnce
Definition: thread.h:176
aw_parse_coords
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
Definition: wmavoice.c:1038
wmavoice_init_static_data
static av_cold void wmavoice_init_static_data(void)
Definition: wmavoice.c:315
float_dsp.h
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:630
WMAVoiceContext::dcf_mem
float dcf_mem[2]
DC filter history.
Definition: wmavoice.c:274
ff_get_buffer
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1462
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
parse_packet_header
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder).
Definition: wmavoice.c:1842
AVPacket::size
int size
Definition: packet.h:375
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
powf
#define powf(x, y)
Definition: libm.h:50
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:290
codec_internal.h
frame_type_vlc
static VLC frame_type_vlc
Frame type VLC coding.
Definition: wmavoice.c:64
WMAVoiceContext::spillover_bitsize
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
Definition: wmavoice.c:143
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
WMAVoiceContext::pb
PutBitContext pb
bitstream writer for sframe_cache
Definition: wmavoice.c:210
WMAVoiceContext::last_pitch_val
int last_pitch_val
pitch value of the previous frame
Definition: wmavoice.c:223
size
int size
Definition: twinvq_data.h:10344
wiener_denoise
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.
Definition: wmavoice.c:726
wmavoice_lsp10_intercoeff_b
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
Definition: wmavoice_data.h:1852
WMAVoiceContext::dct
DCTContext dct
Definition: wmavoice.c:268
FCB_TYPE_AW_PULSES
@ FCB_TYPE_AW_PULSES
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:90
dequant_lsp16i
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
Definition: wmavoice.c:955
wmavoice_dq_lsp16r1
static const uint8_t wmavoice_dq_lsp16r1[0x500]
Definition: wmavoice_data.h:1264
WMAVoiceContext::aw_first_pulse_off
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
Definition: wmavoice.c:240
WMAVoiceContext::zero_exc_pf
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
Definition: wmavoice.c:275
sinewin.h
wmavoice_dq_lsp16r2
static const uint8_t wmavoice_dq_lsp16r2[0x500]
Definition: wmavoice_data.h:1395
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
frame_type_desc
Description of frame types.
Definition: wmavoice.c:100
WMAVoiceContext::block_pitch_range
int block_pitch_range
range of the block pitch
Definition: wmavoice.c:170
stabilize_lsps
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
Definition: wmavoice.c:1660
ff_dct_end
av_cold void ff_dct_end(DCTContext *s)
Definition: dct.c:224
M_PI
#define M_PI
Definition: mathematics.h:52
ff_tilt_compensation
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
Definition: acelp_filters.c:138
wmavoice_energy_table
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
Definition: wmavoice_data.h:3026
ff_sine_window_init
void ff_sine_window_init(float *window, int n)
Generate a sine window.
Definition: sinewin_tablegen.h:59
wmavoice_decode_init
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:368
WMAVoiceContext::block_delta_pitch_hrange
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
Definition: wmavoice.c:175
DST_I
@ DST_I
Definition: avfft.h:97
wmavoice_ipol2_coeffs
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
Definition: wmavoice_data.h:3012
ff_rdft_init
av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
Set up a real FFT.
Definition: rdft.c:89
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:116
WMAVoiceContext::pitch_diff_sh16
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
Definition: wmavoice.c:225
WMAVoiceContext::gain_pred_err
float gain_pred_err[6]
cache for gain prediction
Definition: wmavoice.c:251
ff_dct_init
av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
Set up DCT.
Definition: dct.c:179
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:269
WMAVoiceContext::nb_superframes
int nb_superframes
number of superframes in current packet
Definition: wmavoice.c:250
t3
#define t3
Definition: regdef.h:31
WMAVoiceContext::cos
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
Definition: wmavoice.c:270
RDFTContext
Definition: rdft.h:28
a2
#define a2
Definition: regdef.h:48
WMAVoiceContext::denoise_tilt_corr
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
Definition: wmavoice.c:154
delta
float delta
Definition: vorbis_enc_data.h:430
wmavoice_lsp16_intercoeff_b
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
Definition: wmavoice_data.h:2306
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:487
acelp_filters.h
ff_weighted_vector_sumf
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
Definition: acelp_vectors.c:182
DCTContext
Definition: dct.h:32
WMAVoiceContext::lsp_def_mode
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
Definition: wmavoice.c:161
wmavoice_gain_universal
static const float wmavoice_gain_universal[64]
Definition: wmavoice_data.h:2855
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:211
len
int len
Definition: vorbis_enc_data.h:426
WMAVoiceContext::synth_filter_out_buf
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
Definition: wmavoice.c:284
tilt_factor
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
Definition: wmavoice.c:594
WMAVoiceContext::rdft
RDFTContext rdft
Definition: wmavoice.c:266
VLC_NBITS
#define VLC_NBITS
number of bits to read per VLC iteration
Definition: wmavoice.c:59
wmavoice_data.h
Windows Media Voice (WMAVoice) tables.
avcodec.h
WMAVoiceContext::min_pitch_val
int min_pitch_val
base value for pitch parsing code
Definition: wmavoice.c:164
WMAVoiceContext::last_acb_type
int last_acb_type
frame type [0-2] of the previous frame
Definition: wmavoice.c:224
av_uninit
#define av_uninit(x)
Definition: attributes.h:154
ret
ret
Definition: filter_design.txt:187
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
lsp.h
ff_celp_lp_zero_synthesis_filterf
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.c:200
WMAVoiceContext::do_apf
int do_apf
whether to apply the averaged projection filter (APF)
Definition: wmavoice.c:150
pos
unsigned int pos
Definition: spdifenc.c:412
AMRFixed::n
int n
Definition: acelp_vectors.h:54
wmavoice_dq_lsp16i2
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
Definition: wmavoice_data.h:583
AV_INPUT_BUFFER_PADDING_SIZE
#define AV_INPUT_BUFFER_PADDING_SIZE
Definition: defs.h:40
wmavoice_mean_lsf16
static const double wmavoice_mean_lsf16[2][16]
Definition: wmavoice_data.h:2574
AV_RL32
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32
Definition: bytestream.h:92
U
#define U(x)
Definition: vpx_arith.h:37
wmavoice_decode_end
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:1987
WMAVoiceContext::lsps
int lsps
number of LSPs per frame [10 or 16]
Definition: wmavoice.c:159
AVCodecContext
main external API structure.
Definition: avcodec.h:398
wmavoice_decode_packet
static int wmavoice_decode_packet(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1906
channel_layout.h
t2
#define t2
Definition: regdef.h:30
WMAVoiceContext::dst
DCTContext dst
contexts for phase shift (in Hilbert transform, part of postfilter)
Definition: wmavoice.c:268
WMAVoiceContext::block_pitch_nbits
int block_pitch_nbits
number of bits used to specify the first block's pitch value
Definition: wmavoice.c:168
VLC
Definition: vlc.h:31
synth_superframe
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Definition: wmavoice.c:1706
WMAVoiceContext::frame_cntr
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
Definition: wmavoice.c:248
wmavoice_ipol1_coeffs
static const float wmavoice_ipol1_coeffs[17 *9]
Definition: wmavoice_data.h:2960
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
ff_set_fixed_vector
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
Definition: acelp_vectors.c:224
mean_lsf
static const float mean_lsf[10]
Definition: siprdata.h:27
AV_CODEC_CAP_DELAY
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:82
VLC::table
VLCElem * table
Definition: vlc.h:33
samples
Filter the word “frame” indicates either a video frame or a group of audio samples
Definition: filter_design.txt:8
copy_bits
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
Definition: wmavoice.c:1877
avpriv_scalarproduct_float_c
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors.
Definition: float_dsp.c:124
synth_frame
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
Definition: wmavoice.c:1484
AV_CODEC_CAP_SUBFRAMES
#define AV_CODEC_CAP_SUBFRAMES
Codec can output multiple frames per AVPacket Normally demuxers return one frame at a time,...
Definition: codec.h:100
M_LN10
#define M_LN10
Definition: mathematics.h:43
WMAVoiceContext::gb
GetBitContext gb
packet bitreader.
Definition: wmavoice.c:137
FCB_TYPE_SILENCE
@ FCB_TYPE_SILENCE
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:85
avpriv_request_sample
#define avpriv_request_sample(...)
Definition: tableprint_vlc.h:36
synth_block_fcb_acb
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
Definition: wmavoice.c:1308
flush_put_bits
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:143
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
AVPacket
This structure stores compressed data.
Definition: packet.h:351
synth_block_hardcoded
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
Definition: wmavoice.c:1277
SFRAME_CACHE_MAXSIZE
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
Definition: wmavoice.c:57
AMRFixed::pitch_lag
int pitch_lag
Definition: acelp_vectors.h:58
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:561
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
wmavoice_lsp10_intercoeff_a
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
Definition: wmavoice_data.h:1657
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
dequant_lsp16r
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:988
frame_type_desc::dbl_pulses
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
Definition: wmavoice.c:106
frame_type_desc::acb_type
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
Definition: wmavoice.c:104
MAX_BLOCKS
#define MAX_BLOCKS
maximum number of blocks per frame
Definition: wmavoice.c:48
ff_acelp_lspd2lpc
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
Definition: lsp.c:209
WMAVoiceContext::postfilter_agc
float postfilter_agc
gain control memory, used in adaptive_gain_control()
Definition: wmavoice.c:272
put_bits.h
pulses
static const int8_t pulses[4]
Number of non-zero pulses in the MP-MLQ excitation.
Definition: g723_1.h:260
ff_acelp_interpolatef
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.c:80
AVFormatContext::priv_data
void * priv_data
Format private data.
Definition: avformat.h:1241
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
min
float min
Definition: vorbis_enc_data.h:429
WMAVoiceContext
WMA Voice decoding context.
Definition: wmavoice.c:132