FFmpeg
wmavoice.c
Go to the documentation of this file.
1 /*
2  * Windows Media Audio Voice decoder.
3  * Copyright (c) 2009 Ronald S. Bultje
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * @brief Windows Media Audio Voice compatible decoder
25  * @author Ronald S. Bultje <rsbultje@gmail.com>
26  */
27 
28 #include <math.h>
29 
31 #include "libavutil/float_dsp.h"
32 #include "libavutil/mem.h"
33 #include "libavutil/mem_internal.h"
34 #include "libavutil/thread.h"
35 #include "libavutil/tx.h"
36 #include "avcodec.h"
37 #include "codec_internal.h"
38 #include "decode.h"
39 #include "get_bits.h"
40 #include "put_bits.h"
41 #include "wmavoice_data.h"
42 #include "celp_filters.h"
43 #include "acelp_vectors.h"
44 #include "acelp_filters.h"
45 #include "lsp.h"
46 #include "sinewin.h"
47 
48 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame
49 #define MAX_LSPS 16 ///< maximum filter order
50 #define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple
51  ///< of 16 for ASM input buffer alignment
52 #define MAX_FRAMES 3 ///< maximum number of frames per superframe
53 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame
54 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history
55 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
56  ///< maximum number of samples per superframe
57 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
58  ///< was split over two packets
59 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration
60 
61 /**
62  * Frame type VLC coding.
63  */
64 static VLCElem frame_type_vlc[132];
65 
66 /**
67  * Adaptive codebook types.
68  */
69 enum {
70  ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)
71  ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which
72  ///< we interpolate to get a per-sample pitch.
73  ///< Signal is generated using an asymmetric sinc
74  ///< window function
75  ///< @note see #wmavoice_ipol1_coeffs
76  ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using
77  ///< a Hamming sinc window function
78  ///< @note see #wmavoice_ipol2_coeffs
79 };
80 
81 /**
82  * Fixed codebook types.
83  */
84 enum {
85  FCB_TYPE_SILENCE = 0, ///< comfort noise during silence
86  ///< generated from a hardcoded (fixed) codebook
87  ///< with per-frame (low) gain values
88  FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block
89  ///< gain values
90  FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,
91  ///< used in particular for low-bitrate streams
92  FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in
93  ///< combinations of either single pulses or
94  ///< pulse pairs
95 };
96 
97 /**
98  * Description of frame types.
99  */
100 static const struct frame_type_desc {
101  uint8_t n_blocks; ///< amount of blocks per frame (each block
102  ///< (contains 160/#n_blocks samples)
103  uint8_t log_n_blocks; ///< log2(#n_blocks)
104  uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)
105  uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)
106  uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs
107  ///< (rather than just one single pulse)
108  ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES
109 } frame_descs[17] = {
110  { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0 },
111  { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0 },
127 };
128 
129 /**
130  * WMA Voice decoding context.
131  */
132 typedef struct WMAVoiceContext {
133  /**
134  * @name Global values specified in the stream header / extradata or used all over.
135  * @{
136  */
137  GetBitContext gb; ///< packet bitreader. During decoder init,
138  ///< it contains the extradata from the
139  ///< demuxer. During decoding, it contains
140  ///< packet data.
141  int8_t vbm_tree[25]; ///< converts VLC codes to frame type
142 
143  int spillover_bitsize; ///< number of bits used to specify
144  ///< #spillover_nbits in the packet header
145  ///< = ceil(log2(ctx->block_align << 3))
146  int history_nsamples; ///< number of samples in history for signal
147  ///< prediction (through ACB)
148 
149  /* postfilter specific values */
150  int do_apf; ///< whether to apply the averaged
151  ///< projection filter (APF)
152  int denoise_strength; ///< strength of denoising in Wiener filter
153  ///< [0-11]
154  int denoise_tilt_corr; ///< Whether to apply tilt correction to the
155  ///< Wiener filter coefficients (postfilter)
156  int dc_level; ///< Predicted amount of DC noise, based
157  ///< on which a DC removal filter is used
158 
159  int lsps; ///< number of LSPs per frame [10 or 16]
160  int lsp_q_mode; ///< defines quantizer defaults [0, 1]
161  int lsp_def_mode; ///< defines different sets of LSP defaults
162  ///< [0, 1]
163 
164  int min_pitch_val; ///< base value for pitch parsing code
165  int max_pitch_val; ///< max value + 1 for pitch parsing
166  int pitch_nbits; ///< number of bits used to specify the
167  ///< pitch value in the frame header
168  int block_pitch_nbits; ///< number of bits used to specify the
169  ///< first block's pitch value
170  int block_pitch_range; ///< range of the block pitch
171  int block_delta_pitch_nbits; ///< number of bits used to specify the
172  ///< delta pitch between this and the last
173  ///< block's pitch value, used in all but
174  ///< first block
175  int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is
176  ///< from -this to +this-1)
177  uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale
178  ///< conversion
179 
180  /**
181  * @}
182  *
183  * @name Packet values specified in the packet header or related to a packet.
184  *
185  * A packet is considered to be a single unit of data provided to this
186  * decoder by the demuxer.
187  * @{
188  */
189  int spillover_nbits; ///< number of bits of the previous packet's
190  ///< last superframe preceding this
191  ///< packet's first full superframe (useful
192  ///< for re-synchronization also)
193  int has_residual_lsps; ///< if set, superframes contain one set of
194  ///< LSPs that cover all frames, encoded as
195  ///< independent and residual LSPs; if not
196  ///< set, each frame contains its own, fully
197  ///< independent, LSPs
198  int skip_bits_next; ///< number of bits to skip at the next call
199  ///< to #wmavoice_decode_packet() (since
200  ///< they're part of the previous superframe)
201 
203  ///< cache for superframe data split over
204  ///< multiple packets
205  int sframe_cache_size; ///< set to >0 if we have data from an
206  ///< (incomplete) superframe from a previous
207  ///< packet that spilled over in the current
208  ///< packet; specifies the amount of bits in
209  ///< #sframe_cache
210  PutBitContext pb; ///< bitstream writer for #sframe_cache
211 
212  /**
213  * @}
214  *
215  * @name Frame and superframe values
216  * Superframe and frame data - these can change from frame to frame,
217  * although some of them do in that case serve as a cache / history for
218  * the next frame or superframe.
219  * @{
220  */
221  double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous
222  ///< superframe
223  int last_pitch_val; ///< pitch value of the previous frame
224  int last_acb_type; ///< frame type [0-2] of the previous frame
225  int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)
226  ///< << 16) / #MAX_FRAMESIZE
227  float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE
228 
229  int aw_idx_is_ext; ///< whether the AW index was encoded in
230  ///< 8 bits (instead of 6)
231  int aw_pulse_range; ///< the range over which #aw_pulse_set1()
232  ///< can apply the pulse, relative to the
233  ///< value in aw_first_pulse_off. The exact
234  ///< position of the first AW-pulse is within
235  ///< [pulse_off, pulse_off + this], and
236  ///< depends on bitstream values; [16 or 24]
237  int aw_n_pulses[2]; ///< number of AW-pulses in each block; note
238  ///< that this number can be negative (in
239  ///< which case it basically means "zero")
240  int aw_first_pulse_off[2]; ///< index of first sample to which to
241  ///< apply AW-pulses, or -0xff if unset
242  int aw_next_pulse_off_cache; ///< the position (relative to start of the
243  ///< second block) at which pulses should
244  ///< start to be positioned, serves as a
245  ///< cache for pitch-adaptive window pulses
246  ///< between blocks
247 
248  int frame_cntr; ///< current frame index [0 - 0xFFFE]; is
249  ///< only used for comfort noise in #pRNG()
250  int nb_superframes; ///< number of superframes in current packet
251  float gain_pred_err[6]; ///< cache for gain prediction
253  ///< cache of the signal of previous
254  ///< superframes, used as a history for
255  ///< signal generation
256  float synth_history[MAX_LSPS]; ///< see #excitation_history
257  /**
258  * @}
259  *
260  * @name Postfilter values
261  *
262  * Variables used for postfilter implementation, mostly history for
263  * smoothing and so on, and context variables for FFT/iFFT.
264  * @{
265  */
266  AVTXContext *rdft, *irdft; ///< contexts for FFT-calculation in the
267  av_tx_fn rdft_fn, irdft_fn; ///< postfilter (for denoise filter)
268  AVTXContext *dct, *dst; ///< contexts for phase shift (in Hilbert
269  av_tx_fn dct_fn, dst_fn; ///< transform, part of postfilter)
270  float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
271  ///< range
272  float postfilter_agc; ///< gain control memory, used in
273  ///< #adaptive_gain_control()
274  float dcf_mem[2]; ///< DC filter history
276  ///< zero filter output (i.e. excitation)
277  ///< by postfilter
279  int denoise_filter_cache_size; ///< samples in #denoise_filter_cache
280  DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x82];
281  ///< aligned buffer for LPC tilting
283  ///< aligned buffer for denoise coefficients
285  ///< aligned buffer for postfilter speech
286  ///< synthesis
287  /**
288  * @}
289  */
291 
292 /**
293  * Set up the variable bit mode (VBM) tree from container extradata.
294  * @param gb bit I/O context.
295  * The bit context (s->gb) should be loaded with byte 23-46 of the
296  * container extradata (i.e. the ones containing the VBM tree).
297  * @param vbm_tree pointer to array to which the decoded VBM tree will be
298  * written.
299  * @return 0 on success, <0 on error.
300  */
301 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
302 {
303  int cntr[8] = { 0 }, n, res;
304 
305  memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
306  for (n = 0; n < 17; n++) {
307  res = get_bits(gb, 3);
308  if (cntr[res] > 3) // should be >= 3 + (res == 7))
309  return -1;
310  vbm_tree[res * 3 + cntr[res]++] = n;
311  }
312  return 0;
313 }
314 
316 {
317  static const uint8_t bits[] = {
318  2, 2, 2, 4, 4, 4,
319  6, 6, 6, 8, 8, 8,
320  10, 10, 10, 12, 12, 12,
321  14, 14, 14, 14
322  };
323 
326  1, NULL, 0, 0, 0, 0);
327 }
328 
330 {
332  int n;
333 
334  s->postfilter_agc = 0;
335  s->sframe_cache_size = 0;
336  s->skip_bits_next = 0;
337  for (n = 0; n < s->lsps; n++)
338  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
339  memset(s->excitation_history, 0,
340  sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
341  memset(s->synth_history, 0,
342  sizeof(*s->synth_history) * MAX_LSPS);
343  memset(s->gain_pred_err, 0,
344  sizeof(s->gain_pred_err));
345 
346  if (s->do_apf) {
347  memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
348  sizeof(*s->synth_filter_out_buf) * s->lsps);
349  memset(s->dcf_mem, 0,
350  sizeof(*s->dcf_mem) * 2);
351  memset(s->zero_exc_pf, 0,
352  sizeof(*s->zero_exc_pf) * s->history_nsamples);
353  memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
354  }
355 }
356 
357 /**
358  * Set up decoder with parameters from demuxer (extradata etc.).
359  */
361 {
362  static AVOnce init_static_once = AV_ONCE_INIT;
363  int n, flags, pitch_range, lsp16_flag, ret;
365 
366  ff_thread_once(&init_static_once, wmavoice_init_static_data);
367 
368  /**
369  * Extradata layout:
370  * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
371  * - byte 19-22: flags field (annoyingly in LE; see below for known
372  * values),
373  * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,
374  * rest is 0).
375  */
376  if (ctx->extradata_size != 46) {
378  "Invalid extradata size %d (should be 46)\n",
379  ctx->extradata_size);
380  return AVERROR_INVALIDDATA;
381  }
382  if (ctx->block_align <= 0 || ctx->block_align > (1<<22)) {
383  av_log(ctx, AV_LOG_ERROR, "Invalid block alignment %d.\n", ctx->block_align);
384  return AVERROR_INVALIDDATA;
385  }
386 
387  flags = AV_RL32(ctx->extradata + 18);
388  s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
389  s->do_apf = flags & 0x1;
390  if (s->do_apf) {
391  float scale = 1.0f;
392 
393  ret = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, &scale, 0);
394  if (ret < 0)
395  return ret;
396 
397  ret = av_tx_init(&s->irdft, &s->irdft_fn, AV_TX_FLOAT_RDFT, 1, 1 << 7, &scale, 0);
398  if (ret < 0)
399  return ret;
400 
401  scale = 1.0 / (1 << 6);
402  ret = av_tx_init(&s->dct, &s->dct_fn, AV_TX_FLOAT_DCT_I, 0, 1 << 6, &scale, 0);
403  if (ret < 0)
404  return ret;
405 
406  scale = 1.0 / (1 << 6);
407  ret = av_tx_init(&s->dst, &s->dst_fn, AV_TX_FLOAT_DST_I, 0, 1 << 6, &scale, 0);
408  if (ret < 0)
409  return ret;
410 
411  ff_sine_window_init(s->cos, 256);
412  memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
413  for (n = 0; n < 255; n++) {
414  s->sin[n] = -s->sin[510 - n];
415  s->cos[510 - n] = s->cos[n];
416  }
417  }
418  s->denoise_strength = (flags >> 2) & 0xF;
419  if (s->denoise_strength >= 12) {
421  "Invalid denoise filter strength %d (max=11)\n",
422  s->denoise_strength);
423  return AVERROR_INVALIDDATA;
424  }
425  s->denoise_tilt_corr = !!(flags & 0x40);
426  s->dc_level = (flags >> 7) & 0xF;
427  s->lsp_q_mode = !!(flags & 0x2000);
428  s->lsp_def_mode = !!(flags & 0x4000);
429  lsp16_flag = flags & 0x1000;
430  if (lsp16_flag) {
431  s->lsps = 16;
432  } else {
433  s->lsps = 10;
434  }
435  for (n = 0; n < s->lsps; n++)
436  s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
437 
438  init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
439  if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
440  av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
441  return AVERROR_INVALIDDATA;
442  }
443 
444  if (ctx->sample_rate >= INT_MAX / (256 * 37))
445  return AVERROR_INVALIDDATA;
446 
447  s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
448  s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
449  pitch_range = s->max_pitch_val - s->min_pitch_val;
450  if (pitch_range <= 0) {
451  av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
452  return AVERROR_INVALIDDATA;
453  }
454  s->pitch_nbits = av_ceil_log2(pitch_range);
455  s->last_pitch_val = 40;
456  s->last_acb_type = ACB_TYPE_NONE;
457  s->history_nsamples = s->max_pitch_val + 8;
458 
459  if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
460  int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
461  max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
462 
464  "Unsupported samplerate %d (min=%d, max=%d)\n",
465  ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
466 
467  return AVERROR(ENOSYS);
468  }
469 
470  s->block_conv_table[0] = s->min_pitch_val;
471  s->block_conv_table[1] = (pitch_range * 25) >> 6;
472  s->block_conv_table[2] = (pitch_range * 44) >> 6;
473  s->block_conv_table[3] = s->max_pitch_val - 1;
474  s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
475  if (s->block_delta_pitch_hrange <= 0) {
476  av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
477  return AVERROR_INVALIDDATA;
478  }
479  s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
480  s->block_pitch_range = s->block_conv_table[2] +
481  s->block_conv_table[3] + 1 +
482  2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
483  s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
484 
485  av_channel_layout_uninit(&ctx->ch_layout);
487  ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
488 
489  return 0;
490 }
491 
492 /**
493  * @name Postfilter functions
494  * Postfilter functions (gain control, wiener denoise filter, DC filter,
495  * kalman smoothening, plus surrounding code to wrap it)
496  * @{
497  */
498 /**
499  * Adaptive gain control (as used in postfilter).
500  *
501  * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except
502  * that the energy here is calculated using sum(abs(...)), whereas the
503  * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).
504  *
505  * @param out output buffer for filtered samples
506  * @param in input buffer containing the samples as they are after the
507  * postfilter steps so far
508  * @param speech_synth input buffer containing speech synth before postfilter
509  * @param size input buffer size
510  * @param alpha exponential filter factor
511  * @param gain_mem pointer to filter memory (single float)
512  */
513 static void adaptive_gain_control(float *out, const float *in,
514  const float *speech_synth,
515  int size, float alpha, float *gain_mem)
516 {
517  int i;
518  float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
519  float mem = *gain_mem;
520 
521  for (i = 0; i < size; i++) {
522  speech_energy += fabsf(speech_synth[i]);
523  postfilter_energy += fabsf(in[i]);
524  }
525  gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :
526  (1.0 - alpha) * speech_energy / postfilter_energy;
527 
528  for (i = 0; i < size; i++) {
529  mem = alpha * mem + gain_scale_factor;
530  out[i] = in[i] * mem;
531  }
532 
533  *gain_mem = mem;
534 }
535 
536 /**
537  * Kalman smoothing function.
538  *
539  * This function looks back pitch +/- 3 samples back into history to find
540  * the best fitting curve (that one giving the optimal gain of the two
541  * signals, i.e. the highest dot product between the two), and then
542  * uses that signal history to smoothen the output of the speech synthesis
543  * filter.
544  *
545  * @param s WMA Voice decoding context
546  * @param pitch pitch of the speech signal
547  * @param in input speech signal
548  * @param out output pointer for smoothened signal
549  * @param size input/output buffer size
550  *
551  * @returns -1 if no smoothening took place, e.g. because no optimal
552  * fit could be found, or 0 on success.
553  */
554 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
555  const float *in, float *out, int size)
556 {
557  int n;
558  float optimal_gain = 0, dot;
559  const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
560  *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
561  *best_hist_ptr = NULL;
562 
563  /* find best fitting point in history */
564  do {
565  dot = avpriv_scalarproduct_float_c(in, ptr, size);
566  if (dot > optimal_gain) {
567  optimal_gain = dot;
568  best_hist_ptr = ptr;
569  }
570  } while (--ptr >= end);
571 
572  if (optimal_gain <= 0)
573  return -1;
574  dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
575  if (dot <= 0) // would be 1.0
576  return -1;
577 
578  if (optimal_gain <= dot) {
579  dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
580  } else
581  dot = 0.625;
582 
583  /* actual smoothing */
584  for (n = 0; n < size; n++)
585  out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
586 
587  return 0;
588 }
589 
590 /**
591  * Get the tilt factor of a formant filter from its transfer function
592  * @see #tilt_factor() in amrnbdec.c, which does essentially the same,
593  * but somehow (??) it does a speech synthesis filter in the
594  * middle, which is missing here
595  *
596  * @param lpcs LPC coefficients
597  * @param n_lpcs Size of LPC buffer
598  * @returns the tilt factor
599  */
600 static float tilt_factor(const float *lpcs, int n_lpcs)
601 {
602  float rh0, rh1;
603 
604  rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
605  rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
606 
607  return rh1 / rh0;
608 }
609 
610 /**
611  * Derive denoise filter coefficients (in real domain) from the LPCs.
612  */
613 static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
614  int fcb_type, float *coeffs_dst, int remainder)
615 {
616  float last_coeff, min = 15.0, max = -15.0;
617  float irange, angle_mul, gain_mul, range, sq;
618  LOCAL_ALIGNED_32(float, coeffs, [0x82]);
619  LOCAL_ALIGNED_32(float, lpcs, [0x82]);
620  LOCAL_ALIGNED_32(float, lpcs_dct, [0x82]);
621  int n, idx;
622 
623  memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
624 
625  /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
626  s->rdft_fn(s->rdft, lpcs, lpcs_src, sizeof(float));
627 #define log_range(var, assign) do { \
628  float tmp = log10f(assign); var = tmp; \
629  max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
630  } while (0)
631  log_range(last_coeff, lpcs[64] * lpcs[64]);
632  for (n = 1; n < 64; n++)
633  log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
634  lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
635  log_range(lpcs[0], lpcs[0] * lpcs[0]);
636 #undef log_range
637  range = max - min;
638  lpcs[64] = last_coeff;
639 
640  /* Now, use this spectrum to pick out these frequencies with higher
641  * (relative) power/energy (which we then take to be "not noise"),
642  * and set up a table (still in lpc[]) of (relative) gains per frequency.
643  * These frequencies will be maintained, while others ("noise") will be
644  * decreased in the filter output. */
645  irange = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
646  gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
647  (5.0 / 14.7));
648  angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
649  for (n = 0; n <= 64; n++) {
650  float pwr;
651 
652  idx = lrint((max - lpcs[n]) * irange - 1);
653  idx = FFMAX(0, idx);
654  pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
655  lpcs[n] = angle_mul * pwr;
656 
657  /* 70.57 =~ 1/log10(1.0331663) */
658  idx = av_clipf((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);
659 
660  if (idx > 127) { // fall back if index falls outside table range
661  coeffs[n] = wmavoice_energy_table[127] *
662  powf(1.0331663, idx - 127);
663  } else
664  coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
665  }
666 
667  /* calculate the Hilbert transform of the gains, which we do (since this
668  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
669  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
670  * "moment" of the LPCs in this filter. */
671  s->dct_fn(s->dct, lpcs_dct, lpcs, sizeof(float));
672  s->dst_fn(s->dst, lpcs, lpcs_dct, sizeof(float));
673 
674  /* Split out the coefficient indexes into phase/magnitude pairs */
675  idx = 255 + av_clip(lpcs[64], -255, 255);
676  coeffs[0] = coeffs[0] * s->cos[idx];
677  idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
678  last_coeff = coeffs[64] * s->cos[idx];
679  for (n = 63;; n--) {
680  idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
681  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
682  coeffs[n * 2] = coeffs[n] * s->cos[idx];
683 
684  if (!--n) break;
685 
686  idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
687  coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
688  coeffs[n * 2] = coeffs[n] * s->cos[idx];
689  }
690  coeffs[64] = last_coeff;
691 
692  /* move into real domain */
693  s->irdft_fn(s->irdft, coeffs_dst, coeffs, sizeof(AVComplexFloat));
694 
695  /* tilt correction and normalize scale */
696  memset(&coeffs_dst[remainder], 0, sizeof(coeffs_dst[0]) * (128 - remainder));
697  if (s->denoise_tilt_corr) {
698  float tilt_mem = 0;
699 
700  coeffs_dst[remainder - 1] = 0;
701  ff_tilt_compensation(&tilt_mem,
702  -1.8 * tilt_factor(coeffs_dst, remainder - 1),
703  coeffs_dst, remainder);
704  }
705  sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs_dst, coeffs_dst,
706  remainder));
707  for (n = 0; n < remainder; n++)
708  coeffs_dst[n] *= sq;
709 }
710 
711 /**
712  * This function applies a Wiener filter on the (noisy) speech signal as
713  * a means to denoise it.
714  *
715  * - take RDFT of LPCs to get the power spectrum of the noise + speech;
716  * - using this power spectrum, calculate (for each frequency) the Wiener
717  * filter gain, which depends on the frequency power and desired level
718  * of noise subtraction (when set too high, this leads to artifacts)
719  * We can do this symmetrically over the X-axis (so 0-4kHz is the inverse
720  * of 4-8kHz);
721  * - by doing a phase shift, calculate the Hilbert transform of this array
722  * of per-frequency filter-gains to get the filtering coefficients;
723  * - smoothen/normalize/de-tilt these filter coefficients as desired;
724  * - take RDFT of noisy sound, apply the coefficients and take its IRDFT
725  * to get the denoised speech signal;
726  * - the leftover (i.e. output of the IRDFT on denoised speech data beyond
727  * the frame boundary) are saved and applied to subsequent frames by an
728  * overlap-add method (otherwise you get clicking-artifacts).
729  *
730  * @param s WMA Voice decoding context
731  * @param fcb_type Frame (codebook) type
732  * @param synth_pf input: the noisy speech signal, output: denoised speech
733  * data; should be 16-byte aligned (for ASM purposes)
734  * @param size size of the speech data
735  * @param lpcs LPCs used to synthesize this frame's speech data
736  */
737 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
738  float *synth_pf, int size,
739  const float *lpcs)
740 {
741  int remainder, lim, n;
742 
743  if (fcb_type != FCB_TYPE_SILENCE) {
744  LOCAL_ALIGNED_32(float, coeffs_f, [0x82]);
745  LOCAL_ALIGNED_32(float, synth_f, [0x82]);
746  float *tilted_lpcs = s->tilted_lpcs_pf,
747  *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
748 
749  tilted_lpcs[0] = 1.0;
750  memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
751  memset(&tilted_lpcs[s->lsps + 1], 0,
752  sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
753  ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
754  tilted_lpcs, s->lsps + 2);
755 
756  /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
757  * size is applied to the next frame. All input beyond this is zero,
758  * and thus all output beyond this will go towards zero, hence we can
759  * limit to min(size-1, 127-size) as a performance consideration. */
760  remainder = FFMIN(127 - size, size - 1);
761  calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
762 
763  /* apply coefficients (in frequency spectrum domain), i.e. complex
764  * number multiplication */
765  memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
766  s->rdft_fn(s->rdft, synth_f, synth_pf, sizeof(float));
767  s->rdft_fn(s->rdft, coeffs_f, coeffs, sizeof(float));
768  synth_f[0] *= coeffs_f[0];
769  synth_f[1] *= coeffs_f[1];
770  for (n = 1; n <= 64; n++) {
771  float v1 = synth_f[n * 2], v2 = synth_f[n * 2 + 1];
772  synth_f[n * 2] = v1 * coeffs_f[n * 2] - v2 * coeffs_f[n * 2 + 1];
773  synth_f[n * 2 + 1] = v2 * coeffs_f[n * 2] + v1 * coeffs_f[n * 2 + 1];
774  }
775  s->irdft_fn(s->irdft, synth_pf, synth_f, sizeof(AVComplexFloat));
776  }
777 
778  /* merge filter output with the history of previous runs */
779  if (s->denoise_filter_cache_size) {
780  lim = FFMIN(s->denoise_filter_cache_size, size);
781  for (n = 0; n < lim; n++)
782  synth_pf[n] += s->denoise_filter_cache[n];
783  s->denoise_filter_cache_size -= lim;
784  memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
785  sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
786  }
787 
788  /* move remainder of filter output into a cache for future runs */
789  if (fcb_type != FCB_TYPE_SILENCE) {
790  lim = FFMIN(remainder, s->denoise_filter_cache_size);
791  for (n = 0; n < lim; n++)
792  s->denoise_filter_cache[n] += synth_pf[size + n];
793  if (lim < remainder) {
794  memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
795  sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
796  s->denoise_filter_cache_size = remainder;
797  }
798  }
799 }
800 
801 /**
802  * Averaging projection filter, the postfilter used in WMAVoice.
803  *
804  * This uses the following steps:
805  * - A zero-synthesis filter (generate excitation from synth signal)
806  * - Kalman smoothing on excitation, based on pitch
807  * - Re-synthesized smoothened output
808  * - Iterative Wiener denoise filter
809  * - Adaptive gain filter
810  * - DC filter
811  *
812  * @param s WMAVoice decoding context
813  * @param synth Speech synthesis output (before postfilter)
814  * @param samples Output buffer for filtered samples
815  * @param size Buffer size of synth & samples
816  * @param lpcs Generated LPCs used for speech synthesis
817  * @param zero_exc_pf destination for zero synthesis filter (16-byte aligned)
818  * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
819  * @param pitch Pitch of the input signal
820  */
821 static void postfilter(WMAVoiceContext *s, const float *synth,
822  float *samples, int size,
823  const float *lpcs, float *zero_exc_pf,
824  int fcb_type, int pitch)
825 {
826  float synth_filter_in_buf[MAX_FRAMESIZE / 2],
827  *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
828  *synth_filter_in = zero_exc_pf;
829 
830  av_assert0(size <= MAX_FRAMESIZE / 2);
831 
832  /* generate excitation from input signal */
833  ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
834 
835  if (fcb_type >= FCB_TYPE_AW_PULSES &&
836  !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
837  synth_filter_in = synth_filter_in_buf;
838 
839  /* re-synthesize speech after smoothening, and keep history */
840  ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
841  synth_filter_in, size, s->lsps);
842  memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
843  sizeof(synth_pf[0]) * s->lsps);
844 
845  wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
846 
847  adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
848  &s->postfilter_agc);
849 
850  if (s->dc_level > 8) {
851  /* remove ultra-low frequency DC noise / highpass filter;
852  * coefficients are identical to those used in SIPR decoding,
853  * and very closely resemble those used in AMR-NB decoding. */
855  (const float[2]) { -1.99997, 1.0 },
856  (const float[2]) { -1.9330735188, 0.93589198496 },
857  0.93980580475, s->dcf_mem, size);
858  }
859 }
860 /**
861  * @}
862  */
863 
864 /**
865  * Dequantize LSPs
866  * @param lsps output pointer to the array that will hold the LSPs
867  * @param num number of LSPs to be dequantized
868  * @param values quantized values, contains n_stages values
869  * @param sizes range (i.e. max value) of each quantized value
870  * @param n_stages number of dequantization runs
871  * @param table dequantization table to be used
872  * @param mul_q LSF multiplier
873  * @param base_q base (lowest) LSF values
874  */
875 static void dequant_lsps(double *lsps, int num,
876  const uint16_t *values,
877  const uint16_t *sizes,
878  int n_stages, const uint8_t *table,
879  const double *mul_q,
880  const double *base_q)
881 {
882  int n, m;
883 
884  memset(lsps, 0, num * sizeof(*lsps));
885  for (n = 0; n < n_stages; n++) {
886  const uint8_t *t_off = &table[values[n] * num];
887  double base = base_q[n], mul = mul_q[n];
888 
889  for (m = 0; m < num; m++)
890  lsps[m] += base + mul * t_off[m];
891 
892  table += sizes[n] * num;
893  }
894 }
895 
896 /**
897  * @name LSP dequantization routines
898  * LSP dequantization routines, for 10/16LSPs and independent/residual coding.
899  * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;
900  * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
901  * @{
902  */
903 /**
904  * Parse 10 independently-coded LSPs.
905  */
906 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
907 {
908  static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
909  static const double mul_lsf[4] = {
910  5.2187144800e-3, 1.4626986422e-3,
911  9.6179549166e-4, 1.1325736225e-3
912  };
913  static const double base_lsf[4] = {
914  M_PI * -2.15522e-1, M_PI * -6.1646e-2,
915  M_PI * -3.3486e-2, M_PI * -5.7408e-2
916  };
917  uint16_t v[4];
918 
919  v[0] = get_bits(gb, 8);
920  v[1] = get_bits(gb, 6);
921  v[2] = get_bits(gb, 5);
922  v[3] = get_bits(gb, 5);
923 
924  dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
925  mul_lsf, base_lsf);
926 }
927 
928 /**
929  * Parse 10 independently-coded LSPs, and then derive the tables to
930  * generate LSPs for the other frames from them (residual coding).
931  */
933  double *i_lsps, const double *old,
934  double *a1, double *a2, int q_mode)
935 {
936  static const uint16_t vec_sizes[3] = { 128, 64, 64 };
937  static const double mul_lsf[3] = {
938  2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
939  };
940  static const double base_lsf[3] = {
941  M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
942  };
943  const float (*ipol_tab)[2][10] = q_mode ?
945  uint16_t interpol, v[3];
946  int n;
947 
948  dequant_lsp10i(gb, i_lsps);
949 
950  interpol = get_bits(gb, 5);
951  v[0] = get_bits(gb, 7);
952  v[1] = get_bits(gb, 6);
953  v[2] = get_bits(gb, 6);
954 
955  for (n = 0; n < 10; n++) {
956  double delta = old[n] - i_lsps[n];
957  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
958  a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
959  }
960 
961  dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
962  mul_lsf, base_lsf);
963 }
964 
965 /**
966  * Parse 16 independently-coded LSPs.
967  */
968 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
969 {
970  static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
971  static const double mul_lsf[5] = {
972  3.3439586280e-3, 6.9908173703e-4,
973  3.3216608306e-3, 1.0334960326e-3,
974  3.1899104283e-3
975  };
976  static const double base_lsf[5] = {
977  M_PI * -1.27576e-1, M_PI * -2.4292e-2,
978  M_PI * -1.28094e-1, M_PI * -3.2128e-2,
979  M_PI * -1.29816e-1
980  };
981  uint16_t v[5];
982 
983  v[0] = get_bits(gb, 8);
984  v[1] = get_bits(gb, 6);
985  v[2] = get_bits(gb, 7);
986  v[3] = get_bits(gb, 6);
987  v[4] = get_bits(gb, 7);
988 
989  dequant_lsps( lsps, 5, v, vec_sizes, 2,
990  wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
991  dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
992  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
993  dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
994  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
995 }
996 
997 /**
998  * Parse 16 independently-coded LSPs, and then derive the tables to
999  * generate LSPs for the other frames from them (residual coding).
1000  */
1002  double *i_lsps, const double *old,
1003  double *a1, double *a2, int q_mode)
1004 {
1005  static const uint16_t vec_sizes[3] = { 128, 128, 128 };
1006  static const double mul_lsf[3] = {
1007  1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
1008  };
1009  static const double base_lsf[3] = {
1010  M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
1011  };
1012  const float (*ipol_tab)[2][16] = q_mode ?
1014  uint16_t interpol, v[3];
1015  int n;
1016 
1017  dequant_lsp16i(gb, i_lsps);
1018 
1019  interpol = get_bits(gb, 5);
1020  v[0] = get_bits(gb, 7);
1021  v[1] = get_bits(gb, 7);
1022  v[2] = get_bits(gb, 7);
1023 
1024  for (n = 0; n < 16; n++) {
1025  double delta = old[n] - i_lsps[n];
1026  a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
1027  a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
1028  }
1029 
1030  dequant_lsps( a2, 10, v, vec_sizes, 1,
1031  wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
1032  dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
1033  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
1034  dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
1035  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
1036 }
1037 
1038 /**
1039  * @}
1040  * @name Pitch-adaptive window coding functions
1041  * The next few functions are for pitch-adaptive window coding.
1042  * @{
1043  */
1044 /**
1045  * Parse the offset of the first pitch-adaptive window pulses, and
1046  * the distribution of pulses between the two blocks in this frame.
1047  * @param s WMA Voice decoding context private data
1048  * @param gb bit I/O context
1049  * @param pitch pitch for each block in this frame
1050  */
1052  const int *pitch)
1053 {
1054  static const int16_t start_offset[94] = {
1055  -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1056  13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1057  27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1058  45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1059  69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1060  93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1061  117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1062  141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1063  };
1064  int bits, offset;
1065 
1066  /* position of pulse */
1067  s->aw_idx_is_ext = 0;
1068  if ((bits = get_bits(gb, 6)) >= 54) {
1069  s->aw_idx_is_ext = 1;
1070  bits += (bits - 54) * 3 + get_bits(gb, 2);
1071  }
1072 
1073  /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
1074  * the distribution of the pulses in each block contained in this frame. */
1075  s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
1076  for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1077  s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
1078  s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
1079  offset += s->aw_n_pulses[0] * pitch[0];
1080  s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
1081  s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
1082 
1083  /* if continuing from a position before the block, reset position to
1084  * start of block (when corrected for the range over which it can be
1085  * spread in aw_pulse_set1()). */
1086  if (start_offset[bits] < MAX_FRAMESIZE / 2) {
1087  while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
1088  s->aw_first_pulse_off[1] -= pitch[1];
1089  if (start_offset[bits] < 0)
1090  while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
1091  s->aw_first_pulse_off[0] -= pitch[0];
1092  }
1093 }
1094 
1095 /**
1096  * Apply second set of pitch-adaptive window pulses.
1097  * @param s WMA Voice decoding context private data
1098  * @param gb bit I/O context
1099  * @param block_idx block index in frame [0, 1]
1100  * @param fcb structure containing fixed codebook vector info
1101  * @return -1 on error, 0 otherwise
1102  */
1104  int block_idx, AMRFixed *fcb)
1105 {
1106  uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
1107  uint16_t *use_mask = use_mask_mem + 2;
1108  /* in this function, idx is the index in the 80-bit (+ padding) use_mask
1109  * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
1110  * of idx are the position of the bit within a particular item in the
1111  * array (0 being the most significant bit, and 15 being the least
1112  * significant bit), and the remainder (>> 4) is the index in the
1113  * use_mask[]-array. This is faster and uses less memory than using a
1114  * 80-byte/80-int array. */
1115  int pulse_off = s->aw_first_pulse_off[block_idx],
1116  pulse_start, n, idx, range, aidx, start_off = 0;
1117 
1118  /* set offset of first pulse to within this block */
1119  if (s->aw_n_pulses[block_idx] > 0)
1120  while (pulse_off + s->aw_pulse_range < 1)
1121  pulse_off += fcb->pitch_lag;
1122 
1123  /* find range per pulse */
1124  if (s->aw_n_pulses[0] > 0) {
1125  if (block_idx == 0) {
1126  range = 32;
1127  } else /* block_idx = 1 */ {
1128  range = 8;
1129  if (s->aw_n_pulses[block_idx] > 0)
1130  pulse_off = s->aw_next_pulse_off_cache;
1131  }
1132  } else
1133  range = 16;
1134  pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1135 
1136  /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
1137  * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
1138  * we exclude that range from being pulsed again in this function. */
1139  memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
1140  memset( use_mask, -1, 5 * sizeof(use_mask[0]));
1141  memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
1142  if (s->aw_n_pulses[block_idx] > 0)
1143  for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
1144  int excl_range = s->aw_pulse_range; // always 16 or 24
1145  uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1146  int first_sh = 16 - (idx & 15);
1147  *use_mask_ptr++ &= 0xFFFFu << first_sh;
1148  excl_range -= first_sh;
1149  if (excl_range >= 16) {
1150  *use_mask_ptr++ = 0;
1151  *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1152  } else
1153  *use_mask_ptr &= 0xFFFF >> excl_range;
1154  }
1155 
1156  /* find the 'aidx'th offset that is not excluded */
1157  aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
1158  for (n = 0; n <= aidx; pulse_start++) {
1159  for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
1160  if (idx >= MAX_FRAMESIZE / 2) { // find from zero
1161  if (use_mask[0]) idx = 0x0F;
1162  else if (use_mask[1]) idx = 0x1F;
1163  else if (use_mask[2]) idx = 0x2F;
1164  else if (use_mask[3]) idx = 0x3F;
1165  else if (use_mask[4]) idx = 0x4F;
1166  else return -1;
1167  idx -= av_log2_16bit(use_mask[idx >> 4]);
1168  }
1169  if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1170  use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1171  n++;
1172  start_off = idx;
1173  }
1174  }
1175 
1176  fcb->x[fcb->n] = start_off;
1177  fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
1178  fcb->n++;
1179 
1180  /* set offset for next block, relative to start of that block */
1181  n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
1182  s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
1183  return 0;
1184 }
1185 
1186 /**
1187  * Apply first set of pitch-adaptive window pulses.
1188  * @param s WMA Voice decoding context private data
1189  * @param gb bit I/O context
1190  * @param block_idx block index in frame [0, 1]
1191  * @param fcb storage location for fixed codebook pulse info
1192  */
1194  int block_idx, AMRFixed *fcb)
1195 {
1196  int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
1197  float v;
1198 
1199  if (s->aw_n_pulses[block_idx] > 0) {
1200  int n, v_mask, i_mask, sh, n_pulses;
1201 
1202  if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
1203  n_pulses = 3;
1204  v_mask = 8;
1205  i_mask = 7;
1206  sh = 4;
1207  } else { // 4 pulses, 1:sign + 2:index each
1208  n_pulses = 4;
1209  v_mask = 4;
1210  i_mask = 3;
1211  sh = 3;
1212  }
1213 
1214  for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1215  fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
1216  fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
1217  s->aw_first_pulse_off[block_idx];
1218  while (fcb->x[fcb->n] < 0)
1219  fcb->x[fcb->n] += fcb->pitch_lag;
1220  if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
1221  fcb->n++;
1222  }
1223  } else {
1224  int num2 = (val & 0x1FF) >> 1, delta, idx;
1225 
1226  if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
1227  else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
1228  else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
1229  else { delta = 7; idx = num2 + 1 - 3 * 75; }
1230  v = (val & 0x200) ? -1.0 : 1.0;
1231 
1232  fcb->no_repeat_mask |= 3 << fcb->n;
1233  fcb->x[fcb->n] = idx - delta;
1234  fcb->y[fcb->n] = v;
1235  fcb->x[fcb->n + 1] = idx;
1236  fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
1237  fcb->n += 2;
1238  }
1239 }
1240 
1241 /**
1242  * @}
1243  *
1244  * Generate a random number from frame_cntr and block_idx, which will live
1245  * in the range [0, 1000 - block_size] (so it can be used as an index in a
1246  * table of size 1000 of which you want to read block_size entries).
1247  *
1248  * @param frame_cntr current frame number
1249  * @param block_num current block index
1250  * @param block_size amount of entries we want to read from a table
1251  * that has 1000 entries
1252  * @return a (non-)random number in the [0, 1000 - block_size] range.
1253  */
1254 static int pRNG(int frame_cntr, int block_num, int block_size)
1255 {
1256  /* array to simplify the calculation of z:
1257  * y = (x % 9) * 5 + 6;
1258  * z = (49995 * x) / y;
1259  * Since y only has 9 values, we can remove the division by using a
1260  * LUT and using FASTDIV-style divisions. For each of the 9 values
1261  * of y, we can rewrite z as:
1262  * z = x * (49995 / y) + x * ((49995 % y) / y)
1263  * In this table, each col represents one possible value of y, the
1264  * first number is 49995 / y, and the second is the FASTDIV variant
1265  * of 49995 % y / y. */
1266  static const unsigned int div_tbl[9][2] = {
1267  { 8332, 3 * 715827883U }, // y = 6
1268  { 4545, 0 * 390451573U }, // y = 11
1269  { 3124, 11 * 268435456U }, // y = 16
1270  { 2380, 15 * 204522253U }, // y = 21
1271  { 1922, 23 * 165191050U }, // y = 26
1272  { 1612, 23 * 138547333U }, // y = 31
1273  { 1388, 27 * 119304648U }, // y = 36
1274  { 1219, 16 * 104755300U }, // y = 41
1275  { 1086, 39 * 93368855U } // y = 46
1276  };
1277  unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
1278  if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,
1279  // so this is effectively a modulo (%)
1280  y = x - 9 * MULH(477218589, x); // x % 9
1281  z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
1282  // z = x * 49995 / (y * 5 + 6)
1283  return z % (1000 - block_size);
1284 }
1285 
1286 /**
1287  * Parse hardcoded signal for a single block.
1288  * @note see #synth_block().
1289  */
1291  int block_idx, int size,
1292  const struct frame_type_desc *frame_desc,
1293  float *excitation)
1294 {
1295  float gain;
1296  int n, r_idx;
1297 
1299 
1300  /* Set the offset from which we start reading wmavoice_std_codebook */
1301  if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
1302  r_idx = pRNG(s->frame_cntr, block_idx, size);
1303  gain = s->silence_gain;
1304  } else /* FCB_TYPE_HARDCODED */ {
1305  r_idx = get_bits(gb, 8);
1306  gain = wmavoice_gain_universal[get_bits(gb, 6)];
1307  }
1308 
1309  /* Clear gain prediction parameters */
1310  memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
1311 
1312  /* Apply gain to hardcoded codebook and use that as excitation signal */
1313  for (n = 0; n < size; n++)
1314  excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
1315 }
1316 
1317 /**
1318  * Parse FCB/ACB signal for a single block.
1319  * @note see #synth_block().
1320  */
1322  int block_idx, int size,
1323  int block_pitch_sh2,
1324  const struct frame_type_desc *frame_desc,
1325  float *excitation)
1326 {
1327  static const float gain_coeff[6] = {
1328  0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1329  };
1330  float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
1331  int n, idx, gain_weight;
1332  AMRFixed fcb;
1333 
1334  av_assert0(size <= MAX_FRAMESIZE / 2);
1335  memset(pulses, 0, sizeof(*pulses) * size);
1336 
1337  fcb.pitch_lag = block_pitch_sh2 >> 2;
1338  fcb.pitch_fac = 1.0;
1339  fcb.no_repeat_mask = 0;
1340  fcb.n = 0;
1341 
1342  /* For the other frame types, this is where we apply the innovation
1343  * (fixed) codebook pulses of the speech signal. */
1344  if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
1345  aw_pulse_set1(s, gb, block_idx, &fcb);
1346  if (aw_pulse_set2(s, gb, block_idx, &fcb)) {
1347  /* Conceal the block with silence and return.
1348  * Skip the correct amount of bits to read the next
1349  * block from the correct offset. */
1350  int r_idx = pRNG(s->frame_cntr, block_idx, size);
1351 
1352  for (n = 0; n < size; n++)
1353  excitation[n] =
1354  wmavoice_std_codebook[r_idx + n] * s->silence_gain;
1355  skip_bits(gb, 7 + 1);
1356  return;
1357  }
1358  } else /* FCB_TYPE_EXC_PULSES */ {
1359  int offset_nbits = 5 - frame_desc->log_n_blocks;
1360 
1361  fcb.no_repeat_mask = -1;
1362  /* similar to ff_decode_10_pulses_35bits(), but with single pulses
1363  * (instead of double) for a subset of pulses */
1364  for (n = 0; n < 5; n++) {
1365  float sign;
1366  int pos1, pos2;
1367 
1368  sign = get_bits1(gb) ? 1.0 : -1.0;
1369  pos1 = get_bits(gb, offset_nbits);
1370  fcb.x[fcb.n] = n + 5 * pos1;
1371  fcb.y[fcb.n++] = sign;
1372  if (n < frame_desc->dbl_pulses) {
1373  pos2 = get_bits(gb, offset_nbits);
1374  fcb.x[fcb.n] = n + 5 * pos2;
1375  fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
1376  }
1377  }
1378  }
1379  ff_set_fixed_vector(pulses, &fcb, 1.0, size);
1380 
1381  /* Calculate gain for adaptive & fixed codebook signal.
1382  * see ff_amr_set_fixed_gain(). */
1383  idx = get_bits(gb, 7);
1384  fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
1385  gain_coeff, 6) -
1386  5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
1387  acb_gain = wmavoice_gain_codebook_acb[idx];
1388  pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
1389  -2.9957322736 /* log(0.05) */,
1390  1.6094379124 /* log(5.0) */);
1391 
1392  gain_weight = 8 >> frame_desc->log_n_blocks;
1393  memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
1394  sizeof(*s->gain_pred_err) * (6 - gain_weight));
1395  for (n = 0; n < gain_weight; n++)
1396  s->gain_pred_err[n] = pred_err;
1397 
1398  /* Calculation of adaptive codebook */
1399  if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
1400  int len;
1401  for (n = 0; n < size; n += len) {
1402  int next_idx_sh16;
1403  int abs_idx = block_idx * size + n;
1404  int pitch_sh16 = (s->last_pitch_val << 16) +
1405  s->pitch_diff_sh16 * abs_idx;
1406  int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1407  int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1408  idx = idx_sh16 >> 16;
1409  if (s->pitch_diff_sh16) {
1410  if (s->pitch_diff_sh16 > 0) {
1411  next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1412  } else
1413  next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1414  len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
1415  1, size - n);
1416  } else
1417  len = size;
1418 
1419  ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
1421  idx, 9, len);
1422  }
1423  } else /* ACB_TYPE_HAMMING */ {
1424  int block_pitch = block_pitch_sh2 >> 2;
1425  idx = block_pitch_sh2 & 3;
1426  if (idx) {
1427  ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
1429  idx, 8, size);
1430  } else
1431  av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
1432  sizeof(float) * size);
1433  }
1434 
1435  /* Interpolate ACB/FCB and use as excitation signal */
1436  ff_weighted_vector_sumf(excitation, excitation, pulses,
1437  acb_gain, fcb_gain, size);
1438 }
1439 
1440 /**
1441  * Parse data in a single block.
1442  *
1443  * @param s WMA Voice decoding context private data
1444  * @param gb bit I/O context
1445  * @param block_idx index of the to-be-read block
1446  * @param size amount of samples to be read in this block
1447  * @param block_pitch_sh2 pitch for this block << 2
1448  * @param lsps LSPs for (the end of) this frame
1449  * @param prev_lsps LSPs for the last frame
1450  * @param frame_desc frame type descriptor
1451  * @param excitation target memory for the ACB+FCB interpolated signal
1452  * @param synth target memory for the speech synthesis filter output
1453  * @return 0 on success, <0 on error.
1454  */
1456  int block_idx, int size,
1457  int block_pitch_sh2,
1458  const double *lsps, const double *prev_lsps,
1459  const struct frame_type_desc *frame_desc,
1460  float *excitation, float *synth)
1461 {
1462  double i_lsps[MAX_LSPS];
1463  float lpcs[MAX_LSPS];
1464  float fac;
1465  int n;
1466 
1467  if (frame_desc->acb_type == ACB_TYPE_NONE)
1468  synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
1469  else
1470  synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
1471  frame_desc, excitation);
1472 
1473  /* convert interpolated LSPs to LPCs */
1474  fac = (block_idx + 0.5) / frame_desc->n_blocks;
1475  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1476  i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1477  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1478 
1479  /* Speech synthesis */
1480  ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
1481 }
1482 
1483 /**
1484  * Synthesize output samples for a single frame.
1485  *
1486  * @param ctx WMA Voice decoder context
1487  * @param gb bit I/O context (s->gb or one for cross-packet superframes)
1488  * @param frame_idx Frame number within superframe [0-2]
1489  * @param samples pointer to output sample buffer, has space for at least 160
1490  * samples
1491  * @param lsps LSP array
1492  * @param prev_lsps array of previous frame's LSPs
1493  * @param excitation target buffer for excitation signal
1494  * @param synth target buffer for synthesized speech data
1495  * @return 0 on success, <0 on error.
1496  */
1497 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
1498  float *samples,
1499  const double *lsps, const double *prev_lsps,
1500  float *excitation, float *synth)
1501 {
1503  int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);
1504  int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);
1505 
1506  /* Parse frame type ("frame header"), see frame_descs */
1507  int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc, 6, 3)], block_nsamples;
1508 
1509  if (bd_idx < 0) {
1511  "Invalid frame type VLC code, skipping\n");
1512  return AVERROR_INVALIDDATA;
1513  }
1514 
1515  block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
1516 
1517  /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
1518  if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
1519  /* Pitch is provided per frame, which is interpreted as the pitch of
1520  * the last sample of the last block of this frame. We can interpolate
1521  * the pitch of other blocks (and even pitch-per-sample) by gradually
1522  * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
1523  n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
1524  log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
1525  cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
1526  cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
1527  if (s->last_acb_type == ACB_TYPE_NONE ||
1528  20 * abs(cur_pitch_val - s->last_pitch_val) >
1529  (cur_pitch_val + s->last_pitch_val))
1530  s->last_pitch_val = cur_pitch_val;
1531 
1532  /* pitch per block */
1533  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1534  int fac = n * 2 + 1;
1535 
1536  pitch[n] = (MUL16(fac, cur_pitch_val) +
1537  MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
1538  frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
1539  }
1540 
1541  /* "pitch-diff-per-sample" for calculation of pitch per sample */
1542  s->pitch_diff_sh16 =
1543  (cur_pitch_val - s->last_pitch_val) * (1 << 16) / MAX_FRAMESIZE;
1544  }
1545 
1546  /* Global gain (if silence) and pitch-adaptive window coordinates */
1547  switch (frame_descs[bd_idx].fcb_type) {
1548  case FCB_TYPE_SILENCE:
1549  s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
1550  break;
1551  case FCB_TYPE_AW_PULSES:
1552  aw_parse_coords(s, gb, pitch);
1553  break;
1554  }
1555 
1556  for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
1557  int bl_pitch_sh2;
1558 
1559  /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
1560  switch (frame_descs[bd_idx].acb_type) {
1561  case ACB_TYPE_HAMMING: {
1562  /* Pitch is given per block. Per-block pitches are encoded as an
1563  * absolute value for the first block, and then delta values
1564  * relative to this value) for all subsequent blocks. The scale of
1565  * this pitch value is semi-logarithmic compared to its use in the
1566  * decoder, so we convert it to normal scale also. */
1567  int block_pitch,
1568  t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
1569  t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
1570  t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
1571 
1572  if (n == 0) {
1573  block_pitch = get_bits(gb, s->block_pitch_nbits);
1574  } else
1575  block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
1576  get_bits(gb, s->block_delta_pitch_nbits);
1577  /* Convert last_ so that any next delta is within _range */
1578  last_block_pitch = av_clip(block_pitch,
1579  s->block_delta_pitch_hrange,
1580  s->block_pitch_range -
1581  s->block_delta_pitch_hrange);
1582 
1583  /* Convert semi-log-style scale back to normal scale */
1584  if (block_pitch < t1) {
1585  bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
1586  } else {
1587  block_pitch -= t1;
1588  if (block_pitch < t2) {
1589  bl_pitch_sh2 =
1590  (s->block_conv_table[1] << 2) + (block_pitch << 1);
1591  } else {
1592  block_pitch -= t2;
1593  if (block_pitch < t3) {
1594  bl_pitch_sh2 =
1595  (s->block_conv_table[2] + block_pitch) << 2;
1596  } else
1597  bl_pitch_sh2 = s->block_conv_table[3] << 2;
1598  }
1599  }
1600  pitch[n] = bl_pitch_sh2 >> 2;
1601  break;
1602  }
1603 
1604  case ACB_TYPE_ASYMMETRIC: {
1605  bl_pitch_sh2 = pitch[n] << 2;
1606  break;
1607  }
1608 
1609  default: // ACB_TYPE_NONE has no pitch
1610  bl_pitch_sh2 = 0;
1611  break;
1612  }
1613 
1614  synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1615  lsps, prev_lsps, &frame_descs[bd_idx],
1616  &excitation[n * block_nsamples],
1617  &synth[n * block_nsamples]);
1618  }
1619 
1620  /* Averaging projection filter, if applicable. Else, just copy samples
1621  * from synthesis buffer */
1622  if (s->do_apf) {
1623  double i_lsps[MAX_LSPS];
1624  float lpcs[MAX_LSPS];
1625 
1626  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1627  i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1628  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1629  postfilter(s, synth, samples, 80, lpcs,
1630  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1631  frame_descs[bd_idx].fcb_type, pitch[0]);
1632 
1633  for (n = 0; n < s->lsps; n++) // LSF -> LSP
1634  i_lsps[n] = cos(lsps[n]);
1635  ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1636  postfilter(s, &synth[80], &samples[80], 80, lpcs,
1637  &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1638  frame_descs[bd_idx].fcb_type, pitch[0]);
1639  } else
1640  memcpy(samples, synth, 160 * sizeof(synth[0]));
1641 
1642  /* Cache values for next frame */
1643  s->frame_cntr++;
1644  if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
1645  s->last_acb_type = frame_descs[bd_idx].acb_type;
1646  switch (frame_descs[bd_idx].acb_type) {
1647  case ACB_TYPE_NONE:
1648  s->last_pitch_val = 0;
1649  break;
1650  case ACB_TYPE_ASYMMETRIC:
1651  s->last_pitch_val = cur_pitch_val;
1652  break;
1653  case ACB_TYPE_HAMMING:
1654  s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
1655  break;
1656  }
1657 
1658  return 0;
1659 }
1660 
1661 /**
1662  * Ensure minimum value for first item, maximum value for last value,
1663  * proper spacing between each value and proper ordering.
1664  *
1665  * @param lsps array of LSPs
1666  * @param num size of LSP array
1667  *
1668  * @note basically a double version of #ff_acelp_reorder_lsf(), might be
1669  * useful to put in a generic location later on. Parts are also
1670  * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),
1671  * which is in float.
1672  */
1673 static void stabilize_lsps(double *lsps, int num)
1674 {
1675  int n, m, l;
1676 
1677  /* set minimum value for first, maximum value for last and minimum
1678  * spacing between LSF values.
1679  * Very similar to ff_set_min_dist_lsf(), but in double. */
1680  lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
1681  for (n = 1; n < num; n++)
1682  lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
1683  lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
1684 
1685  /* reorder (looks like one-time / non-recursed bubblesort).
1686  * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
1687  for (n = 1; n < num; n++) {
1688  if (lsps[n] < lsps[n - 1]) {
1689  for (m = 1; m < num; m++) {
1690  double tmp = lsps[m];
1691  for (l = m - 1; l >= 0; l--) {
1692  if (lsps[l] <= tmp) break;
1693  lsps[l + 1] = lsps[l];
1694  }
1695  lsps[l + 1] = tmp;
1696  }
1697  break;
1698  }
1699  }
1700 }
1701 
1702 /**
1703  * Synthesize output samples for a single superframe. If we have any data
1704  * cached in s->sframe_cache, that will be used instead of whatever is loaded
1705  * in s->gb.
1706  *
1707  * WMA Voice superframes contain 3 frames, each containing 160 audio samples,
1708  * to give a total of 480 samples per frame. See #synth_frame() for frame
1709  * parsing. In addition to 3 frames, superframes can also contain the LSPs
1710  * (if these are globally specified for all frames (residually); they can
1711  * also be specified individually per-frame. See the s->has_residual_lsps
1712  * option), and can specify the number of samples encoded in this superframe
1713  * (if less than 480), usually used to prevent blanks at track boundaries.
1714  *
1715  * @param ctx WMA Voice decoder context
1716  * @return 0 on success, <0 on error or 1 if there was not enough data to
1717  * fully parse the superframe
1718  */
1720  int *got_frame_ptr)
1721 {
1723  GetBitContext *gb = &s->gb, s_gb;
1724  int n, res, n_samples = MAX_SFRAMESIZE;
1725  double lsps[MAX_FRAMES][MAX_LSPS];
1726  const double *mean_lsf = s->lsps == 16 ?
1727  wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
1728  float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
1729  float synth[MAX_LSPS + MAX_SFRAMESIZE];
1730  float *samples;
1731 
1732  memcpy(synth, s->synth_history,
1733  s->lsps * sizeof(*synth));
1734  memcpy(excitation, s->excitation_history,
1735  s->history_nsamples * sizeof(*excitation));
1736 
1737  if (s->sframe_cache_size > 0) {
1738  gb = &s_gb;
1739  init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
1740  s->sframe_cache_size = 0;
1741  }
1742 
1743  /* First bit is speech/music bit, it differentiates between WMAVoice
1744  * speech samples (the actual codec) and WMAVoice music samples, which
1745  * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
1746  * the wild yet. */
1747  if (!get_bits1(gb)) {
1748  avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
1749  return AVERROR_PATCHWELCOME;
1750  }
1751 
1752  /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
1753  if (get_bits1(gb)) {
1754  if ((n_samples = get_bits(gb, 12)) > MAX_SFRAMESIZE) {
1756  "Superframe encodes > %d samples (%d), not allowed\n",
1757  MAX_SFRAMESIZE, n_samples);
1758  return AVERROR_INVALIDDATA;
1759  }
1760  }
1761 
1762  /* Parse LSPs, if global for the superframe (can also be per-frame). */
1763  if (s->has_residual_lsps) {
1764  double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
1765 
1766  for (n = 0; n < s->lsps; n++)
1767  prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
1768 
1769  if (s->lsps == 10) {
1770  dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1771  } else /* s->lsps == 16 */
1772  dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
1773 
1774  for (n = 0; n < s->lsps; n++) {
1775  lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1776  lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
1777  lsps[2][n] += mean_lsf[n];
1778  }
1779  for (n = 0; n < 3; n++)
1780  stabilize_lsps(lsps[n], s->lsps);
1781  }
1782 
1783  /* synth_superframe can run multiple times per packet
1784  * free potential previous frame */
1786 
1787  /* get output buffer */
1788  frame->nb_samples = MAX_SFRAMESIZE;
1789  if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
1790  return res;
1791  frame->nb_samples = n_samples;
1792  samples = (float *)frame->data[0];
1793 
1794  /* Parse frames, optionally preceded by per-frame (independent) LSPs. */
1795  for (n = 0; n < 3; n++) {
1796  if (!s->has_residual_lsps) {
1797  int m;
1798 
1799  if (s->lsps == 10) {
1800  dequant_lsp10i(gb, lsps[n]);
1801  } else /* s->lsps == 16 */
1802  dequant_lsp16i(gb, lsps[n]);
1803 
1804  for (m = 0; m < s->lsps; m++)
1805  lsps[n][m] += mean_lsf[m];
1806  stabilize_lsps(lsps[n], s->lsps);
1807  }
1808 
1809  if ((res = synth_frame(ctx, gb, n,
1810  &samples[n * MAX_FRAMESIZE],
1811  lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1812  &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
1813  &synth[s->lsps + n * MAX_FRAMESIZE]))) {
1814  *got_frame_ptr = 0;
1815  return res;
1816  }
1817  }
1818 
1819  /* Statistics? FIXME - we don't check for length, a slight overrun
1820  * will be caught by internal buffer padding, and anything else
1821  * will be skipped, not read. */
1822  if (get_bits1(gb)) {
1823  res = get_bits(gb, 4);
1824  skip_bits(gb, 10 * (res + 1));
1825  }
1826 
1827  if (get_bits_left(gb) < 0) {
1829  return AVERROR_INVALIDDATA;
1830  }
1831 
1832  *got_frame_ptr = 1;
1833 
1834  /* Update history */
1835  memcpy(s->prev_lsps, lsps[2],
1836  s->lsps * sizeof(*s->prev_lsps));
1837  memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
1838  s->lsps * sizeof(*synth));
1839  memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1840  s->history_nsamples * sizeof(*excitation));
1841  if (s->do_apf)
1842  memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
1843  s->history_nsamples * sizeof(*s->zero_exc_pf));
1844 
1845  return 0;
1846 }
1847 
1848 /**
1849  * Parse the packet header at the start of each packet (input data to this
1850  * decoder).
1851  *
1852  * @param s WMA Voice decoding context private data
1853  * @return <0 on error, nb_superframes on success.
1854  */
1856 {
1857  GetBitContext *gb = &s->gb;
1858  unsigned int res, n_superframes = 0;
1859 
1860  skip_bits(gb, 4); // packet sequence number
1861  s->has_residual_lsps = get_bits1(gb);
1862  do {
1863  if (get_bits_left(gb) < 6 + s->spillover_bitsize)
1864  return AVERROR_INVALIDDATA;
1865 
1866  res = get_bits(gb, 6); // number of superframes per packet
1867  // (minus first one if there is spillover)
1868  n_superframes += res;
1869  } while (res == 0x3F);
1870  s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
1871 
1872  return get_bits_left(gb) >= 0 ? n_superframes : AVERROR_INVALIDDATA;
1873 }
1874 
1875 /**
1876  * Copy (unaligned) bits from gb/data/size to pb.
1877  *
1878  * @param pb target buffer to copy bits into
1879  * @param data source buffer to copy bits from
1880  * @param size size of the source data, in bytes
1881  * @param gb bit I/O context specifying the current position in the source.
1882  * data. This function might use this to align the bit position to
1883  * a whole-byte boundary before calling #ff_copy_bits() on aligned
1884  * source data
1885  * @param nbits the amount of bits to copy from source to target
1886  *
1887  * @note after calling this function, the current position in the input bit
1888  * I/O context is undefined.
1889  */
1890 static void copy_bits(PutBitContext *pb,
1891  const uint8_t *data, int size,
1892  GetBitContext *gb, int nbits)
1893 {
1894  int rmn_bytes, rmn_bits;
1895 
1896  rmn_bits = rmn_bytes = get_bits_left(gb);
1897  if (rmn_bits < nbits)
1898  return;
1899  if (nbits > put_bits_left(pb))
1900  return;
1901  rmn_bits &= 7; rmn_bytes >>= 3;
1902  if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
1903  put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
1904  ff_copy_bits(pb, data + size - rmn_bytes,
1905  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1906 }
1907 
1908 /**
1909  * Packet decoding: a packet is anything that the (ASF) demuxer contains,
1910  * and we expect that the demuxer / application provides it to us as such
1911  * (else you'll probably get garbage as output). Every packet has a size of
1912  * ctx->block_align bytes, starts with a packet header (see
1913  * #parse_packet_header()), and then a series of superframes. Superframe
1914  * boundaries may exceed packets, i.e. superframes can split data over
1915  * multiple (two) packets.
1916  *
1917  * For more information about frames, see #synth_superframe().
1918  */
1920  int *got_frame_ptr, AVPacket *avpkt)
1921 {
1923  GetBitContext *gb = &s->gb;
1924  const uint8_t *buf = avpkt->data;
1925  uint8_t dummy[1];
1926  int size, res, pos;
1927 
1928  /* Packets are sometimes a multiple of ctx->block_align, with a packet
1929  * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
1930  * feeds us ASF packets, which may concatenate multiple "codec" packets
1931  * in a single "muxer" packet, so we artificially emulate that by
1932  * capping the packet size at ctx->block_align. */
1933  for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
1934  buf = size ? buf : dummy;
1935  res = init_get_bits8(&s->gb, buf, size);
1936  if (res < 0)
1937  return res;
1938 
1939  /* size == ctx->block_align is used to indicate whether we are dealing with
1940  * a new packet or a packet of which we already read the packet header
1941  * previously. */
1942  if (!(size % ctx->block_align)) { // new packet header
1943  if (!size) {
1944  s->spillover_nbits = 0;
1945  s->nb_superframes = 0;
1946  } else {
1947  if ((res = parse_packet_header(s)) < 0)
1948  return res;
1949  s->nb_superframes = res;
1950  }
1951 
1952  /* If the packet header specifies a s->spillover_nbits, then we want
1953  * to push out all data of the previous packet (+ spillover) before
1954  * continuing to parse new superframes in the current packet. */
1955  if (s->sframe_cache_size > 0) {
1956  int cnt = get_bits_count(gb);
1957  if (cnt + s->spillover_nbits > avpkt->size * 8) {
1958  s->spillover_nbits = avpkt->size * 8 - cnt;
1959  }
1960  copy_bits(&s->pb, buf, size, gb, s->spillover_nbits);
1961  flush_put_bits(&s->pb);
1962  s->sframe_cache_size += s->spillover_nbits;
1963  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) == 0 &&
1964  *got_frame_ptr) {
1965  cnt += s->spillover_nbits;
1966  s->skip_bits_next = cnt & 7;
1967  res = cnt >> 3;
1968  return res;
1969  } else
1970  skip_bits_long (gb, s->spillover_nbits - cnt +
1971  get_bits_count(gb)); // resync
1972  } else if (s->spillover_nbits) {
1973  skip_bits_long(gb, s->spillover_nbits); // resync
1974  }
1975  } else if (s->skip_bits_next)
1976  skip_bits(gb, s->skip_bits_next);
1977 
1978  /* Try parsing superframes in current packet */
1979  s->sframe_cache_size = 0;
1980  s->skip_bits_next = 0;
1981  pos = get_bits_left(gb);
1982  if (s->nb_superframes-- == 0) {
1983  *got_frame_ptr = 0;
1984  return size;
1985  } else if (s->nb_superframes > 0) {
1986  if ((res = synth_superframe(ctx, frame, got_frame_ptr)) < 0) {
1987  return res;
1988  } else if (*got_frame_ptr) {
1989  int cnt = get_bits_count(gb);
1990  s->skip_bits_next = cnt & 7;
1991  res = cnt >> 3;
1992  return res;
1993  }
1994  } else if ((s->sframe_cache_size = pos) > 0) {
1995  /* ... cache it for spillover in next packet */
1996  init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
1997  copy_bits(&s->pb, buf, size, gb, s->sframe_cache_size);
1998  // FIXME bad - just copy bytes as whole and add use the
1999  // skip_bits_next field
2000  }
2001 
2002  return size;
2003 }
2004 
2006 {
2008 
2009  if (s->do_apf) {
2010  av_tx_uninit(&s->rdft);
2011  av_tx_uninit(&s->irdft);
2012  av_tx_uninit(&s->dct);
2013  av_tx_uninit(&s->dst);
2014  }
2015 
2016  return 0;
2017 }
2018 
2020  .p.name = "wmavoice",
2021  CODEC_LONG_NAME("Windows Media Audio Voice"),
2022  .p.type = AVMEDIA_TYPE_AUDIO,
2023  .p.id = AV_CODEC_ID_WMAVOICE,
2024  .priv_data_size = sizeof(WMAVoiceContext),
2026  .close = wmavoice_decode_end,
2028  .p.capabilities =
2029 #if FF_API_SUBFRAMES
2030  AV_CODEC_CAP_SUBFRAMES |
2031 #endif
2033  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
2034  .flush = wmavoice_flush,
2035 };
WMAVoiceContext::has_residual_lsps
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
Definition: wmavoice.c:193
skip_bits_long
static void skip_bits_long(GetBitContext *s, int n)
Skips the specified number of bits.
Definition: get_bits.h:278
AMRFixed::x
int x[10]
Definition: acelp_vectors.h:55
wmavoice_std_codebook
static const float wmavoice_std_codebook[1000]
Definition: wmavoice_data.h:2585
interpol
static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)
Definition: vsrc_mandelbrot.c:185
MAX_LSPS
#define MAX_LSPS
maximum filter order
Definition: wmavoice.c:49
WMAVoiceContext::aw_next_pulse_off_cache
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned,...
Definition: wmavoice.c:242
WMAVoiceContext::max_pitch_val
int max_pitch_val
max value + 1 for pitch parsing
Definition: wmavoice.c:165
av_clip
#define av_clip
Definition: common.h:99
aw_pulse_set2
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
Definition: wmavoice.c:1103
FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:42
acelp_vectors.h
get_bits_left
static int get_bits_left(GetBitContext *gb)
Definition: get_bits.h:695
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
wmavoice_dq_lsp10i
static const uint8_t wmavoice_dq_lsp10i[0xf00]
Definition: wmavoice_data.h:33
mem_internal.h
FCB_TYPE_SILENCE
@ FCB_TYPE_SILENCE
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
Definition: wmavoice.c:85
WMAVoiceContext::tilted_lpcs_pf
float tilted_lpcs_pf[0x82]
aligned buffer for LPC tilting
Definition: wmavoice.c:280
out
FILE * out
Definition: movenc.c:55
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:251
thread.h
frame_descs
static const struct frame_type_desc frame_descs[17]
AVTXContext
Definition: tx_priv.h:235
wmavoice_dq_lsp16r3
static const uint8_t wmavoice_dq_lsp16r3[0x600]
Definition: wmavoice_data.h:1526
dequant_lsps
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
Definition: wmavoice.c:875
init_put_bits
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:62
WMAVoiceContext::excitation_history
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation
Definition: wmavoice.c:252
get_bits_count
static int get_bits_count(const GetBitContext *s)
Definition: get_bits.h:266
av_log2_16bit
int av_log2_16bit(unsigned v)
Definition: intmath.c:31
FCB_TYPE_EXC_PULSES
@ FCB_TYPE_EXC_PULSES
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.
Definition: wmavoice.c:92
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:374
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:223
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
aw_pulse_set1
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
Definition: wmavoice.c:1193
ff_acelp_apply_order_2_transfer_function
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
Definition: acelp_filters.c:121
AVPacket::data
uint8_t * data
Definition: packet.h:524
pRNG
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will live in the range [0,...
Definition: wmavoice.c:1254
ff_wmavoice_decoder
const FFCodec ff_wmavoice_decoder
Definition: wmavoice.c:2019
table
static const uint16_t table[]
Definition: prosumer.c:203
data
const char data[16]
Definition: mxf.c:148
WMAVoiceContext::silence_gain
float silence_gain
set for use in blocks if ACB_TYPE_NONE
Definition: wmavoice.c:227
expf
#define expf(x)
Definition: libm.h:283
WMAVoiceContext::denoise_filter_cache_size
int denoise_filter_cache_size
samples in denoise_filter_cache
Definition: wmavoice.c:279
wmavoice_denoise_power_table
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
Definition: wmavoice_data.h:3064
wmavoice_gain_codebook_acb
static const float wmavoice_gain_codebook_acb[128]
Definition: wmavoice_data.h:2874
FFCodec
Definition: codec_internal.h:126
base
uint8_t base
Definition: vp3data.h:128
AVComplexFloat
Definition: tx.h:27
t1
#define t1
Definition: regdef.h:29
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
ff_celp_lp_synthesis_filterf
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
Definition: celp_filters.c:85
WMAVoiceContext::aw_idx_is_ext
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
Definition: wmavoice.c:229
init_get_bits
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
Definition: get_bits.h:514
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:903
WMAVoiceContext::dc_level
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
Definition: wmavoice.c:156
wmavoice_dq_lsp16i1
static const uint8_t wmavoice_dq_lsp16i1[0x640]
Definition: wmavoice_data.h:420
WMAVoiceContext::block_conv_table
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
Definition: wmavoice.c:177
frame_type_desc::log_n_blocks
uint8_t log_n_blocks
log2(n_blocks)
Definition: wmavoice.c:103
skip_bits
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:381
WMAVoiceContext::aw_pulse_range
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
Definition: wmavoice.c:231
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:335
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:130
av_ceil_log2
#define av_ceil_log2
Definition: common.h:96
AMRFixed::pitch_fac
float pitch_fac
Definition: acelp_vectors.h:59
dummy
int dummy
Definition: motion.c:66
GetBitContext
Definition: get_bits.h:108
MULH
#define MULH
Definition: mathops.h:42
wmavoice_flush
static av_cold void wmavoice_flush(AVCodecContext *ctx)
Definition: wmavoice.c:329
put_bits_left
static int put_bits_left(PutBitContext *s)
Definition: put_bits.h:125
frame_type_desc::n_blocks
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Definition: wmavoice.c:101
val
static double val(void *priv, double ch)
Definition: aeval.c:78
WMAVoiceContext::irdft_fn
av_tx_fn irdft_fn
postfilter (for denoise filter)
Definition: wmavoice.c:267
dequant_lsp10i
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
Definition: wmavoice.c:906
synth_block
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
Definition: wmavoice.c:1455
MAX_SFRAMESIZE
#define MAX_SFRAMESIZE
maximum number of samples per superframe
Definition: wmavoice.c:55
wmavoice_gain_codebook_fcb
static const float wmavoice_gain_codebook_fcb[128]
Definition: wmavoice_data.h:2893
WMAVoiceContext::denoise_filter_cache
float denoise_filter_cache[MAX_FRAMESIZE]
Definition: wmavoice.c:278
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
WMAVoiceContext::sin
float sin[511]
Definition: wmavoice.c:270
calc_input_response
static void calc_input_response(WMAVoiceContext *s, float *lpcs_src, int fcb_type, float *coeffs_dst, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
Definition: wmavoice.c:613
a1
#define a1
Definition: regdef.h:47
AV_CODEC_ID_WMAVOICE
@ AV_CODEC_ID_WMAVOICE
Definition: codec_id.h:476
lrint
#define lrint
Definition: tablegen.h:53
MUL16
#define MUL16(ra, rb)
Definition: mathops.h:89
ff_thread_once
static int ff_thread_once(char *control, void(*routine)(void))
Definition: thread.h:205
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
init_get_bits8
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:545
MAX_LSPS_ALIGN16
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
Definition: wmavoice.c:50
av_tx_fn
void(* av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride)
Function pointer to a function to perform the transform.
Definition: tx.h:151
av_memcpy_backptr
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
Overlapping memcpy() implementation.
Definition: mem.c:447
float
float
Definition: af_crystalizer.c:121
wmavoice_dq_lsp10r
static const uint8_t wmavoice_dq_lsp10r[0x1400]
Definition: wmavoice_data.h:749
FF_CODEC_DECODE_CB
#define FF_CODEC_DECODE_CB(func)
Definition: codec_internal.h:286
WMAVoiceContext::sframe_cache_size
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
Definition: wmavoice.c:205
WMAVoiceContext::dst
AVTXContext * dst
contexts for phase shift (in Hilbert
Definition: wmavoice.c:268
s
#define s(width, name)
Definition: cbs_vp9.c:198
WMAVoiceContext::lsp_q_mode
int lsp_q_mode
defines quantizer defaults [0, 1]
Definition: wmavoice.c:160
frame_type_desc::fcb_type
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
Definition: wmavoice.c:105
log_range
#define log_range(var, assign)
WMAVoiceContext::prev_lsps
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
Definition: wmavoice.c:221
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
WMAVoiceContext::aw_n_pulses
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
Definition: wmavoice.c:237
AMRFixed
Sparse representation for the algebraic codebook (fixed) vector.
Definition: acelp_vectors.h:53
ACB_TYPE_ASYMMETRIC
@ ACB_TYPE_ASYMMETRIC
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Definition: wmavoice.c:71
bits
uint8_t bits
Definition: vp3data.h:128
adaptive_gain_control
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
Definition: wmavoice.c:513
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:40
wmavoice_lsp16_intercoeff_a
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Definition: wmavoice_data.h:2047
ctx
AVFormatContext * ctx
Definition: movenc.c:49
decode.h
get_bits.h
wmavoice_mean_lsf10
static const double wmavoice_mean_lsf10[2][10]
Definition: wmavoice_data.h:2565
WMAVoiceContext::spillover_nbits
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
Definition: wmavoice.c:189
UMULH
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
Definition: mathops.h:69
AMRFixed::y
float y[10]
Definition: acelp_vectors.h:56
WMAVoiceContext::denoise_coeffs_pf
float denoise_coeffs_pf[0x82]
aligned buffer for denoise coefficients
Definition: wmavoice.c:282
wmavoice_gain_silence
static const float wmavoice_gain_silence[256]
Definition: wmavoice_data.h:2788
PutBitContext
Definition: put_bits.h:50
WMAVoiceContext::vbm_tree
int8_t vbm_tree[25]
converts VLC codes to frame type
Definition: wmavoice.c:141
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:271
WMAVoiceContext::dct_fn
av_tx_fn dct_fn
Definition: wmavoice.c:269
wmavoice_dq_lsp16i3
static const uint8_t wmavoice_dq_lsp16i3[0x300]
Definition: wmavoice_data.h:682
if
if(ret)
Definition: filter_design.txt:179
AMRFixed::no_repeat_mask
int no_repeat_mask
Definition: acelp_vectors.h:57
postfilter
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
Definition: wmavoice.c:821
AV_ONCE_INIT
#define AV_ONCE_INIT
Definition: thread.h:203
NULL
#define NULL
Definition: coverity.c:32
sizes
static const int sizes[][2]
Definition: img2dec.c:59
WMAVoiceContext::history_nsamples
int history_nsamples
number of samples in history for signal prediction (through ACB)
Definition: wmavoice.c:146
WMAVoiceContext::synth_history
float synth_history[MAX_LSPS]
see excitation_history
Definition: wmavoice.c:256
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:156
AVERROR_PATCHWELCOME
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:64
last_coeff
static const uint8_t last_coeff[3]
Definition: qdm2data.h:187
ACB_TYPE_HAMMING
@ ACB_TYPE_HAMMING
Per-block pitch with signal generation using a Hamming sinc window function.
Definition: wmavoice.c:76
WMAVoiceContext::denoise_strength
int denoise_strength
strength of denoising in Wiener filter [0-11]
Definition: wmavoice.c:152
MAX_SIGNAL_HISTORY
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
Definition: wmavoice.c:54
WMAVoiceContext::sframe_cache
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
Definition: wmavoice.c:202
get_bits1
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:388
dequant_lsp10r
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:932
WMAVoiceContext::pitch_nbits
int pitch_nbits
number of bits used to specify the pitch value in the frame header
Definition: wmavoice.c:166
WMAVoiceContext::block_delta_pitch_nbits
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value,...
Definition: wmavoice.c:171
kalman_smoothen
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
Definition: wmavoice.c:554
WMAVoiceContext::skip_bits_next
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
Definition: wmavoice.c:198
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
abs
#define abs(x)
Definition: cuda_runtime.h:35
WMAVoiceContext::dst_fn
av_tx_fn dst_fn
transform, part of postfilter)
Definition: wmavoice.c:269
WMAVoiceContext::rdft
AVTXContext * rdft
Definition: wmavoice.c:266
celp_filters.h
MAX_FRAMESIZE
#define MAX_FRAMESIZE
maximum number of samples per frame
Definition: wmavoice.c:53
av_clipf
av_clipf
Definition: af_crystalizer.c:121
MAX_FRAMES
#define MAX_FRAMES
maximum number of frames per superframe
Definition: wmavoice.c:52
get_vlc2
static av_always_inline int get_vlc2(GetBitContext *s, const VLCElem *table, int bits, int max_depth)
Parse a vlc code.
Definition: get_bits.h:652
decode_vbmtree
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
Definition: wmavoice.c:301
AVOnce
#define AVOnce
Definition: thread.h:202
aw_parse_coords
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
Definition: wmavoice.c:1051
wmavoice_init_static_data
static av_cold void wmavoice_init_static_data(void)
Definition: wmavoice.c:315
float_dsp.h
FCB_TYPE_AW_PULSES
@ FCB_TYPE_AW_PULSES
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
Definition: wmavoice.c:90
WMAVoiceContext::dcf_mem
float dcf_mem[2]
DC filter history.
Definition: wmavoice.c:274
ff_get_buffer
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1575
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:366
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
parse_packet_header
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder).
Definition: wmavoice.c:1855
AV_TX_FLOAT_DCT_I
@ AV_TX_FLOAT_DCT_I
Discrete Cosine Transform I.
Definition: tx.h:116
AVPacket::size
int size
Definition: packet.h:525
powf
#define powf(x, y)
Definition: libm.h:50
AVChannelLayout
An AVChannelLayout holds information about the channel layout of audio data.
Definition: channel_layout.h:303
codec_internal.h
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem_internal.h:109
WMAVoiceContext::spillover_bitsize
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
Definition: wmavoice.c:143
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:425
WMAVoiceContext::pb
PutBitContext pb
bitstream writer for sframe_cache
Definition: wmavoice.c:210
WMAVoiceContext::last_pitch_val
int last_pitch_val
pitch value of the previous frame
Definition: wmavoice.c:223
size
int size
Definition: twinvq_data.h:10344
wiener_denoise
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.
Definition: wmavoice.c:737
VLCElem
Definition: vlc.h:32
wmavoice_lsp10_intercoeff_b
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
Definition: wmavoice_data.h:1852
range
enum AVColorRange range
Definition: mediacodec_wrapper.c:2464
dequant_lsp16i
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
Definition: wmavoice.c:968
wmavoice_dq_lsp16r1
static const uint8_t wmavoice_dq_lsp16r1[0x500]
Definition: wmavoice_data.h:1264
WMAVoiceContext::aw_first_pulse_off
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
Definition: wmavoice.c:240
WMAVoiceContext::zero_exc_pf
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
Definition: wmavoice.c:275
sinewin.h
wmavoice_dq_lsp16r2
static const uint8_t wmavoice_dq_lsp16r2[0x500]
Definition: wmavoice_data.h:1395
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
frame_type_desc
Description of frame types.
Definition: wmavoice.c:100
WMAVoiceContext::block_pitch_range
int block_pitch_range
range of the block pitch
Definition: wmavoice.c:170
stabilize_lsps
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
Definition: wmavoice.c:1673
M_PI
#define M_PI
Definition: mathematics.h:67
ff_tilt_compensation
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
Definition: acelp_filters.c:138
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:295
wmavoice_energy_table
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
Definition: wmavoice_data.h:3026
FCB_TYPE_HARDCODED
@ FCB_TYPE_HARDCODED
hardcoded (fixed) codebook with per-block gain values
Definition: wmavoice.c:88
ff_sine_window_init
void ff_sine_window_init(float *window, int n)
Generate a sine window.
Definition: sinewin_tablegen.h:59
wmavoice_decode_init
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
Definition: wmavoice.c:360
WMAVoiceContext::block_delta_pitch_hrange
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
Definition: wmavoice.c:175
wmavoice_ipol2_coeffs
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
Definition: wmavoice_data.h:3012
WMAVoiceContext::pitch_diff_sh16
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
Definition: wmavoice.c:225
WMAVoiceContext::gain_pred_err
float gain_pred_err[6]
cache for gain prediction
Definition: wmavoice.c:251
WMAVoiceContext::rdft_fn
av_tx_fn rdft_fn
Definition: wmavoice.c:267
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
WMAVoiceContext::nb_superframes
int nb_superframes
number of superframes in current packet
Definition: wmavoice.c:250
t3
#define t3
Definition: regdef.h:31
WMAVoiceContext::cos
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
Definition: wmavoice.c:270
a2
#define a2
Definition: regdef.h:48
WMAVoiceContext::denoise_tilt_corr
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
Definition: wmavoice.c:154
delta
float delta
Definition: vorbis_enc_data.h:430
wmavoice_lsp16_intercoeff_b
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
Definition: wmavoice_data.h:2306
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
av_frame_unref
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:606
acelp_filters.h
ff_weighted_vector_sumf
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
Definition: acelp_vectors.c:182
WMAVoiceContext::lsp_def_mode
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
Definition: wmavoice.c:161
wmavoice_gain_universal
static const float wmavoice_gain_universal[64]
Definition: wmavoice_data.h:2855
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:194
len
int len
Definition: vorbis_enc_data.h:426
WMAVoiceContext::synth_filter_out_buf
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
Definition: wmavoice.c:284
tilt_factor
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
Definition: wmavoice.c:600
VLC_NBITS
#define VLC_NBITS
number of bits to read per VLC iteration
Definition: wmavoice.c:59
wmavoice_data.h
Windows Media Voice (WMAVoice) tables.
avcodec.h
WMAVoiceContext::min_pitch_val
int min_pitch_val
base value for pitch parsing code
Definition: wmavoice.c:164
WMAVoiceContext::last_acb_type
int last_acb_type
frame type [0-2] of the previous frame
Definition: wmavoice.c:224
WMAVoiceContext::dct
AVTXContext * dct
Definition: wmavoice.c:268
av_uninit
#define av_uninit(x)
Definition: attributes.h:154
ret
ret
Definition: filter_design.txt:187
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
lsp.h
ff_celp_lp_zero_synthesis_filterf
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
Definition: celp_filters.c:200
WMAVoiceContext::do_apf
int do_apf
whether to apply the averaged projection filter (APF)
Definition: wmavoice.c:150
pos
unsigned int pos
Definition: spdifenc.c:414
AMRFixed::n
int n
Definition: acelp_vectors.h:54
wmavoice_dq_lsp16i2
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
Definition: wmavoice_data.h:583
AV_INPUT_BUFFER_PADDING_SIZE
#define AV_INPUT_BUFFER_PADDING_SIZE
Definition: defs.h:40
wmavoice_mean_lsf16
static const double wmavoice_mean_lsf16[2][16]
Definition: wmavoice_data.h:2574
ff_copy_bits
void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
Definition: bitstream.c:49
AV_RL32
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32
Definition: bytestream.h:92
U
#define U(x)
Definition: vpx_arith.h:37
AV_TX_FLOAT_RDFT
@ AV_TX_FLOAT_RDFT
Real to complex and complex to real DFTs.
Definition: tx.h:90
wmavoice_decode_end
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
Definition: wmavoice.c:2005
WMAVoiceContext::lsps
int lsps
number of LSPs per frame [10 or 16]
Definition: wmavoice.c:159
AVCodecContext
main external API structure.
Definition: avcodec.h:445
wmavoice_decode_packet
static int wmavoice_decode_packet(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
Definition: wmavoice.c:1919
channel_layout.h
t2
#define t2
Definition: regdef.h:30
WMAVoiceContext::block_pitch_nbits
int block_pitch_nbits
number of bits used to specify the first block's pitch value
Definition: wmavoice.c:168
AV_TX_FLOAT_DST_I
@ AV_TX_FLOAT_DST_I
Discrete Sine Transform I.
Definition: tx.h:128
synth_superframe
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Definition: wmavoice.c:1719
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:433
WMAVoiceContext::frame_cntr
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
Definition: wmavoice.c:248
wmavoice_ipol1_coeffs
static const float wmavoice_ipol1_coeffs[17 *9]
Definition: wmavoice_data.h:2960
values
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
Definition: filter_design.txt:263
ff_set_fixed_vector
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
Definition: acelp_vectors.c:224
mean_lsf
static const float mean_lsf[10]
Definition: siprdata.h:27
AV_CODEC_CAP_DELAY
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:76
samples
Filter the word “frame” indicates either a video frame or a group of audio samples
Definition: filter_design.txt:8
copy_bits
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
Definition: wmavoice.c:1890
avpriv_scalarproduct_float_c
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors.
Definition: float_dsp.c:124
synth_frame
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
Definition: wmavoice.c:1497
mem.h
M_LN10
#define M_LN10
Definition: mathematics.h:49
WMAVoiceContext::gb
GetBitContext gb
packet bitreader.
Definition: wmavoice.c:137
avpriv_request_sample
#define avpriv_request_sample(...)
Definition: tableprint_vlc.h:36
synth_block_fcb_acb
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
Definition: wmavoice.c:1321
flush_put_bits
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:143
AV_CHANNEL_LAYOUT_MONO
#define AV_CHANNEL_LAYOUT_MONO
Definition: channel_layout.h:378
VLC_INIT_STATIC_TABLE_FROM_LENGTHS
#define VLC_INIT_STATIC_TABLE_FROM_LENGTHS(vlc_table, nb_bits, nb_codes, lens, lens_wrap, syms, syms_wrap, syms_size, offset, flags)
Definition: vlc.h:280
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:291
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
AVPacket
This structure stores compressed data.
Definition: packet.h:501
synth_block_hardcoded
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
Definition: wmavoice.c:1290
SFRAME_CACHE_MAXSIZE
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
Definition: wmavoice.c:57
frame_type_vlc
static VLCElem frame_type_vlc[132]
Frame type VLC coding.
Definition: wmavoice.c:64
AMRFixed::pitch_lag
int pitch_lag
Definition: acelp_vectors.h:58
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:474
WMAVoiceContext::irdft
AVTXContext * irdft
contexts for FFT-calculation in the
Definition: wmavoice.c:266
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
wmavoice_lsp10_intercoeff_a
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
Definition: wmavoice_data.h:1657
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
dequant_lsp16r
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
Definition: wmavoice.c:1001
frame_type_desc::dbl_pulses
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
Definition: wmavoice.c:106
frame_type_desc::acb_type
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
Definition: wmavoice.c:104
MAX_BLOCKS
#define MAX_BLOCKS
maximum number of blocks per frame
Definition: wmavoice.c:48
ff_acelp_lspd2lpc
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
Definition: lsp.c:220
WMAVoiceContext::postfilter_agc
float postfilter_agc
gain control memory, used in adaptive_gain_control()
Definition: wmavoice.c:272
put_bits.h
pulses
static const int8_t pulses[4]
Number of non-zero pulses in the MP-MLQ excitation.
Definition: g723_1.h:260
ff_acelp_interpolatef
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
Definition: acelp_filters.c:80
AVFormatContext::priv_data
void * priv_data
Format private data.
Definition: avformat.h:1283
AV_SAMPLE_FMT_FLT
@ AV_SAMPLE_FMT_FLT
float
Definition: samplefmt.h:60
tx.h
ACB_TYPE_NONE
@ ACB_TYPE_NONE
no adaptive codebook (only hardcoded fixed)
Definition: wmavoice.c:70
min
float min
Definition: vorbis_enc_data.h:429
WMAVoiceContext
WMA Voice decoding context.
Definition: wmavoice.c:132