libavcodec/wmavoice.c File Reference

Windows Media Audio Voice compatible decoder. More...

#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
#include "put_bits.h"
#include "wmavoice_data.h"
#include "celp_math.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
#include "libavutil/lzo.h"
#include "avfft.h"
#include "fft.h"

Go to the source code of this file.

Data Structures

struct  frame_type_desc
 Description of frame types. More...
struct  WMAVoiceContext
 WMA Voice decoding context. More...

Defines

#define MAX_BLOCKS   8
 maximum number of blocks per frame
#define MAX_LSPS   16
 maximum filter order
#define MAX_LSPS_ALIGN16   16
 same as MAX_LSPS; needs to be multiple
#define MAX_FRAMES   3
 maximum number of frames per superframe
#define MAX_FRAMESIZE   160
 maximum number of samples per frame
#define MAX_SIGNAL_HISTORY   416
 maximum excitation signal history
#define MAX_SFRAMESIZE   (MAX_FRAMESIZE * MAX_FRAMES)
 maximum number of samples per superframe
#define SFRAME_CACHE_MAXSIZE   256
 maximum cache size for frame data that
#define VLC_NBITS   6
 number of bits to read per VLC iteration
#define log_range(var, assign)

Enumerations

enum  { ACB_TYPE_NONE = 0, ACB_TYPE_ASYMMETRIC = 1, ACB_TYPE_HAMMING = 2 }
 Adaptive codebook types. More...
enum  { FCB_TYPE_SILENCE = 0, FCB_TYPE_HARDCODED = 1, FCB_TYPE_AW_PULSES = 2, FCB_TYPE_EXC_PULSES = 3 }
 Fixed codebook types. More...

Functions

static av_cold int decode_vbmtree (GetBitContext *gb, int8_t vbm_tree[25])
 Sets up the variable bit mode (VBM) tree from container extradata.
static av_cold int wmavoice_decode_init (AVCodecContext *ctx)
 Set up decoder with parameters from demuxer (extradata etc.
static void adaptive_gain_control (float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
 Adaptive gain control (as used in postfilter).
static int kalman_smoothen (WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
 Kalman smoothing function.
static float tilt_factor (const float *lpcs, int n_lpcs)
 Get the tilt factor of a formant filter from its transfer function.
static void calc_input_response (WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)
 Derive denoise filter coefficients (in real domain) from the LPCs.
static void wiener_denoise (WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
 This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it.
static void postfilter (WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
 Averaging projection filter, the postfilter used in WMAVoice.
static void dequant_lsps (double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
 Dequantize LSPs.
static void dequant_lsp10i (GetBitContext *gb, double *lsps)
 Parse 10 independently-coded LSPs.
static void dequant_lsp10r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
 Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).
static void dequant_lsp16i (GetBitContext *gb, double *lsps)
 Parse 16 independently-coded LSPs.
static void dequant_lsp16r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
 Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding).
static void aw_parse_coords (WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
 Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame.
static void aw_pulse_set2 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
 Apply second set of pitch-adaptive window pulses.
static void aw_pulse_set1 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
 Apply first set of pitch-adaptive window pulses.
static int pRNG (int frame_cntr, int block_num, int block_size)
 Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).
static void synth_block_hardcoded (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
 Parse hardcoded signal for a single block.
static void synth_block_fcb_acb (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
 Parse FCB/ACB signal for a single block.
static void synth_block (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
 Parse data in a single block.
static int synth_frame (AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
 Synthesize output samples for a single frame.
static void stabilize_lsps (double *lsps, int num)
 Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.
static int check_bits_for_superframe (GetBitContext *orig_gb, WMAVoiceContext *s)
 Test if there's enough bits to read 1 superframe.
static int synth_superframe (AVCodecContext *ctx, float *samples, int *data_size)
 Synthesize output samples for a single superframe.
static int parse_packet_header (WMAVoiceContext *s)
 Parse the packet header at the start of each packet (input data to this decoder).
static void copy_bits (PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
 Copy (unaligned) bits from gb/data/size to pb.
static int wmavoice_decode_packet (AVCodecContext *ctx, void *data, int *data_size, AVPacket *avpkt)
 Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).
static av_cold int wmavoice_decode_end (AVCodecContext *ctx)
static av_cold void wmavoice_flush (AVCodecContext *ctx)

Variables

static VLC frame_type_vlc
 Frame type VLC coding.
static struct frame_type_desc frame_descs [17]
 Description of frame types.
AVCodec wmavoice_decoder


Detailed Description

Windows Media Audio Voice compatible decoder.

Author:
Ronald S. Bultje <rsbultje@gmail.com>

Definition in file wmavoice.c.


Define Documentation

#define log_range ( var,
assign   ) 

Value:

do { \
        float tmp = log10f(assign);  var = tmp; \
        max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
    } while (0)

Referenced by calc_input_response().

#define MAX_BLOCKS   8

maximum number of blocks per frame

Definition at line 42 of file wmavoice.c.

#define MAX_FRAMES   3

maximum number of frames per superframe

Definition at line 46 of file wmavoice.c.

Referenced by check_bits_for_superframe(), and synth_superframe().

#define MAX_FRAMESIZE   160

maximum number of samples per frame

Definition at line 47 of file wmavoice.c.

#define MAX_LSPS   16

maximum filter order

Definition at line 43 of file wmavoice.c.

Referenced by synth_block(), synth_frame(), synth_superframe(), and wmavoice_flush().

#define MAX_LSPS_ALIGN16   16

same as MAX_LSPS; needs to be multiple

of 16 for ASM input buffer alignment

Definition at line 44 of file wmavoice.c.

Referenced by postfilter(), and wmavoice_flush().

#define MAX_SFRAMESIZE   (MAX_FRAMESIZE * MAX_FRAMES)

maximum number of samples per superframe

Definition at line 49 of file wmavoice.c.

Referenced by synth_superframe().

#define MAX_SIGNAL_HISTORY   416

maximum excitation signal history

Definition at line 48 of file wmavoice.c.

Referenced by synth_superframe(), wmavoice_decode_init(), and wmavoice_flush().

#define SFRAME_CACHE_MAXSIZE   256

maximum cache size for frame data that

was split over two packets

Definition at line 51 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

#define VLC_NBITS   6

number of bits to read per VLC iteration

Definition at line 53 of file wmavoice.c.

Referenced by decode_vbmtree().


Enumeration Type Documentation

anonymous enum

Adaptive codebook types.

Enumerator:
ACB_TYPE_NONE  no adaptive codebook (only hardcoded fixed)
ACB_TYPE_ASYMMETRIC  adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.

Signal is generated using an asymmetric sinc window function

Note:
see wmavoice_ipol1_coeffs
ACB_TYPE_HAMMING  Per-block pitch with signal generation using a Hamming sinc window function.

Note:
see wmavoice_ipol2_coeffs

Definition at line 63 of file wmavoice.c.

anonymous enum

Fixed codebook types.

Enumerator:
FCB_TYPE_SILENCE  comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain values
FCB_TYPE_HARDCODED  hardcoded (fixed) codebook with per-block gain values
FCB_TYPE_AW_PULSES  Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
FCB_TYPE_EXC_PULSES  Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs.

Definition at line 78 of file wmavoice.c.


Function Documentation

static int check_bits_for_superframe ( GetBitContext orig_gb,
WMAVoiceContext s 
) [static]

Test if there's enough bits to read 1 superframe.

Parameters:
orig_gb bit I/O context used for reading. This function does not modify the state of the bitreader; it only uses it to copy the current stream position
s WMA Voice decoding context private data
Returns:
-1 if unsupported, 1 on not enough bits or 0 if OK.

Definition at line 1626 of file wmavoice.c.

Referenced by synth_superframe().

static void copy_bits ( PutBitContext pb,
const uint8_t *  data,
int  size,
GetBitContext gb,
int  nbits 
) [static]

Copy (unaligned) bits from gb/data/size to pb.

Parameters:
pb target buffer to copy bits into
data source buffer to copy bits from
size size of the source data, in bytes
gb bit I/O context specifying the current position in the source. data. This function might use this to align the bit position to a whole-byte boundary before calling ff_copy_bits() on aligned source data
nbits the amount of bits to copy from source to target
Note:
after calling this function, the current position in the input bit I/O context is undefined.

Definition at line 1868 of file wmavoice.c.

static av_cold int decode_vbmtree ( GetBitContext gb,
int8_t  vbm_tree[25] 
) [static]

Sets up the variable bit mode (VBM) tree from container extradata.

Parameters:
gb bit I/O context. The bit context (s->gb) should be loaded with byte 23-46 of the container extradata (i.e. the ones containing the VBM tree).
vbm_tree pointer to array to which the decoded VBM tree will be written.
Returns:
0 on success, <0 on error.

Definition at line 298 of file wmavoice.c.

Referenced by wmavoice_decode_init().

static void dequant_lsps ( double *  lsps,
int  num,
const uint16_t *  values,
const uint16_t *  sizes,
int  n_stages,
const uint8_t *  table,
const double *  mul_q,
const double *  base_q 
) [static]

Dequantize LSPs.

Parameters:
lsps output pointer to the array that will hold the LSPs
num number of LSPs to be dequantized
values quantized values, contains n_stages values
sizes range (i.e. max value) of each quantized value
n_stages number of dequantization runs
table dequantization table to be used
mul_q LSF multiplier
base_q base (lowest) LSF values

Definition at line 804 of file wmavoice.c.

Referenced by dequant_lsp10i(), dequant_lsp10r(), dequant_lsp16i(), and dequant_lsp16r().

static int parse_packet_header ( WMAVoiceContext s  )  [static]

Parse the packet header at the start of each packet (input data to this decoder).

Parameters:
s WMA Voice decoding context private data
Returns:
1 if not enough bits were available, or 0 on success.

Definition at line 1833 of file wmavoice.c.

Referenced by gxf_header(), gxf_packet(), gxf_resync_media(), and wmavoice_decode_packet().

static int pRNG ( int  frame_cntr,
int  block_num,
int  block_size 
) [static]

Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).

Parameters:
frame_cntr current frame number
block_num current block index
block_size amount of entries we want to read from a table that has 1000 entries
Returns:
a (non-)random number in the [0, 1000 - block_size] range.

Definition at line 1180 of file wmavoice.c.

Referenced by synth_block_hardcoded().

static void stabilize_lsps ( double *  lsps,
int  num 
) [static]

Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.

Parameters:
lsps array of LSPs
num size of LSP array
Note:
basically a double version of ff_acelp_reorder_lsf(), might be useful to put in a generic location later on. Parts are also present in ff_set_min_dist_lsf() + ff_sort_nearly_sorted_floats(), which is in float.

Definition at line 1588 of file wmavoice.c.

Referenced by synth_superframe().

static void synth_block ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
int  block_pitch_sh2,
const double *  lsps,
const double *  prev_lsps,
const struct frame_type_desc frame_desc,
float *  excitation,
float *  synth 
) [static]

Parse data in a single block.

Note:
we assume enough bits are available, caller should check.
Parameters:
s WMA Voice decoding context private data
gb bit I/O context
block_idx index of the to-be-read block
size amount of samples to be read in this block
block_pitch_sh2 pitch for this block << 2
lsps LSPs for (the end of) this frame
prev_lsps LSPs for the last frame
frame_desc frame type descriptor
excitation target memory for the ACB+FCB interpolated signal
synth target memory for the speech synthesis filter output
Returns:
0 on success, <0 on error.

Definition at line 1370 of file wmavoice.c.

Referenced by synth_frame().

static void synth_block_fcb_acb ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
int  block_pitch_sh2,
const struct frame_type_desc frame_desc,
float *  excitation 
) [static]

Parse FCB/ACB signal for a single block.

Note:
see synth_block().

Definition at line 1247 of file wmavoice.c.

Referenced by synth_block().

static void synth_block_hardcoded ( WMAVoiceContext s,
GetBitContext gb,
int  block_idx,
int  size,
const struct frame_type_desc frame_desc,
float *  excitation 
) [static]

Parse hardcoded signal for a single block.

Note:
see synth_block().

Definition at line 1216 of file wmavoice.c.

Referenced by synth_block().

static int synth_frame ( AVCodecContext ctx,
GetBitContext gb,
int  frame_idx,
float *  samples,
const double *  lsps,
const double *  prev_lsps,
float *  excitation,
float *  synth 
) [static]

Synthesize output samples for a single frame.

Note:
we assume enough bits are available, caller should check.
Parameters:
ctx WMA Voice decoder context
gb bit I/O context (s->gb or one for cross-packet superframes)
frame_idx Frame number within superframe [0-2]
samples pointer to output sample buffer, has space for at least 160 samples
lsps LSP array
prev_lsps array of previous frame's LSPs
excitation target buffer for excitation signal
synth target buffer for synthesized speech data
Returns:
0 on success, <0 on error.

Definition at line 1413 of file wmavoice.c.

Referenced by synth_superframe().

static int synth_superframe ( AVCodecContext ctx,
float *  samples,
int *  data_size 
) [static]

Synthesize output samples for a single superframe.

If we have any data cached in s->sframe_cache, that will be used instead of whatever is loaded in s->gb.

WMA Voice superframes contain 3 frames, each containing 160 audio samples, to give a total of 480 samples per frame. See synth_frame() for frame parsing. In addition to 3 frames, superframes can also contain the LSPs (if these are globally specified for all frames (residually); they can also be specified individually per-frame. See the s->has_residual_lsps option), and can specify the number of samples encoded in this superframe (if less than 480), usually used to prevent blanks at track boundaries.

Parameters:
ctx WMA Voice decoder context
samples pointer to output buffer for voice samples
data_size pointer containing the size of samples on input, and the amount of samples filled on output
Returns:
0 on success, <0 on error or 1 if there was not enough data to fully parse the superframe

Definition at line 1714 of file wmavoice.c.

Referenced by wmavoice_decode_packet().

static av_cold int wmavoice_decode_end ( AVCodecContext ctx  )  [static]

Definition at line 1976 of file wmavoice.c.

static av_cold int wmavoice_decode_init ( AVCodecContext ctx  )  [static]

Set up decoder with parameters from demuxer (extradata etc.

).

Extradata layout:

  • byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),
  • byte 19-22: flags field (annoyingly in LE; see below for known values),
  • byte 23-46: variable bitmode tree (really just 17 * 3 bits, rest is 0).

Definition at line 333 of file wmavoice.c.

static int wmavoice_decode_packet ( AVCodecContext ctx,
void *  data,
int *  data_size,
AVPacket avpkt 
) [static]

Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).

Every packet has a size of ctx->block_align bytes, starts with a packet header (see parse_packet_header()), and then a series of superframes. Superframe boundaries may exceed packets, i.e. superframes can split data over multiple (two) packets.

For more information about frames, see synth_superframe().

Definition at line 1895 of file wmavoice.c.

static av_cold void wmavoice_flush ( AVCodecContext ctx  )  [static]

Definition at line 1990 of file wmavoice.c.


Variable Documentation

struct frame_type_desc frame_descs[17] [static]

Description of frame types.

Referenced by check_bits_for_superframe(), and synth_frame().

VLC frame_type_vlc [static]

Frame type VLC coding.

Definition at line 58 of file wmavoice.c.

Initial value:

Definition at line 2018 of file wmavoice.c.


Generated on Fri Oct 26 02:36:54 2012 for FFmpeg by  doxygen 1.5.8