#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
#include "put_bits.h"
#include "wmavoice_data.h"
#include "celp_math.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
#include "libavutil/lzo.h"
#include "avfft.h"
#include "fft.h"
Go to the source code of this file.
Data Structures | |
| struct | frame_type_desc |
| Description of frame types. More... | |
| struct | WMAVoiceContext |
| WMA Voice decoding context. More... | |
Defines | |
| #define | MAX_BLOCKS 8 |
| maximum number of blocks per frame | |
| #define | MAX_LSPS 16 |
| maximum filter order | |
| #define | MAX_LSPS_ALIGN16 16 |
| same as MAX_LSPS; needs to be multiple | |
| #define | MAX_FRAMES 3 |
| maximum number of frames per superframe | |
| #define | MAX_FRAMESIZE 160 |
| maximum number of samples per frame | |
| #define | MAX_SIGNAL_HISTORY 416 |
| maximum excitation signal history | |
| #define | MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES) |
| maximum number of samples per superframe | |
| #define | SFRAME_CACHE_MAXSIZE 256 |
| maximum cache size for frame data that | |
| #define | VLC_NBITS 6 |
| number of bits to read per VLC iteration | |
| #define | log_range(var, assign) |
Enumerations | |
| enum | { ACB_TYPE_NONE = 0, ACB_TYPE_ASYMMETRIC = 1, ACB_TYPE_HAMMING = 2 } |
| Adaptive codebook types. More... | |
| enum | { FCB_TYPE_SILENCE = 0, FCB_TYPE_HARDCODED = 1, FCB_TYPE_AW_PULSES = 2, FCB_TYPE_EXC_PULSES = 3 } |
| Fixed codebook types. More... | |
Functions | |
| static av_cold int | decode_vbmtree (GetBitContext *gb, int8_t vbm_tree[25]) |
| Sets up the variable bit mode (VBM) tree from container extradata. | |
| static av_cold int | wmavoice_decode_init (AVCodecContext *ctx) |
| Set up decoder with parameters from demuxer (extradata etc. | |
| static void | adaptive_gain_control (float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem) |
| Adaptive gain control (as used in postfilter). | |
| static int | kalman_smoothen (WMAVoiceContext *s, int pitch, const float *in, float *out, int size) |
| Kalman smoothing function. | |
| static float | tilt_factor (const float *lpcs, int n_lpcs) |
| Get the tilt factor of a formant filter from its transfer function. | |
| static void | calc_input_response (WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder) |
| Derive denoise filter coefficients (in real domain) from the LPCs. | |
| static void | wiener_denoise (WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs) |
| This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it. | |
| static void | postfilter (WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch) |
| Averaging projection filter, the postfilter used in WMAVoice. | |
| static void | dequant_lsps (double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q) |
| Dequantize LSPs. | |
| static void | dequant_lsp10i (GetBitContext *gb, double *lsps) |
| Parse 10 independently-coded LSPs. | |
| static void | dequant_lsp10r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode) |
| Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). | |
| static void | dequant_lsp16i (GetBitContext *gb, double *lsps) |
| Parse 16 independently-coded LSPs. | |
| static void | dequant_lsp16r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode) |
| Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). | |
| static void | aw_parse_coords (WMAVoiceContext *s, GetBitContext *gb, const int *pitch) |
| Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame. | |
| static void | aw_pulse_set2 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb) |
| Apply second set of pitch-adaptive window pulses. | |
| static void | aw_pulse_set1 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb) |
| Apply first set of pitch-adaptive window pulses. | |
| static int | pRNG (int frame_cntr, int block_num, int block_size) |
| Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries). | |
| static void | synth_block_hardcoded (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation) |
| Parse hardcoded signal for a single block. | |
| static void | synth_block_fcb_acb (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation) |
| Parse FCB/ACB signal for a single block. | |
| static void | synth_block (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth) |
| Parse data in a single block. | |
| static int | synth_frame (AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth) |
| Synthesize output samples for a single frame. | |
| static void | stabilize_lsps (double *lsps, int num) |
| Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering. | |
| static int | check_bits_for_superframe (GetBitContext *orig_gb, WMAVoiceContext *s) |
| Test if there's enough bits to read 1 superframe. | |
| static int | synth_superframe (AVCodecContext *ctx, float *samples, int *data_size) |
| Synthesize output samples for a single superframe. | |
| static int | parse_packet_header (WMAVoiceContext *s) |
| Parse the packet header at the start of each packet (input data to this decoder). | |
| static void | copy_bits (PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits) |
| Copy (unaligned) bits from gb/data/size to pb. | |
| static int | wmavoice_decode_packet (AVCodecContext *ctx, void *data, int *data_size, AVPacket *avpkt) |
| Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output). | |
| static av_cold int | wmavoice_decode_end (AVCodecContext *ctx) |
| static av_cold void | wmavoice_flush (AVCodecContext *ctx) |
Variables | |
| static VLC | frame_type_vlc |
| Frame type VLC coding. | |
| static struct frame_type_desc | frame_descs [17] |
| Description of frame types. | |
| AVCodec | wmavoice_decoder |
Definition in file wmavoice.c.
| #define log_range | ( | var, | |||
| assign | ) |
Value:
do { \ float tmp = log10f(assign); var = tmp; \ max = FFMAX(max, tmp); min = FFMIN(min, tmp); \ } while (0)
Referenced by calc_input_response().
| #define MAX_BLOCKS 8 |
| #define MAX_FRAMES 3 |
maximum number of frames per superframe
Definition at line 46 of file wmavoice.c.
Referenced by check_bits_for_superframe(), and synth_superframe().
| #define MAX_FRAMESIZE 160 |
| #define MAX_LSPS 16 |
maximum filter order
Definition at line 43 of file wmavoice.c.
Referenced by synth_block(), synth_frame(), synth_superframe(), and wmavoice_flush().
| #define MAX_LSPS_ALIGN16 16 |
same as MAX_LSPS; needs to be multiple
of 16 for ASM input buffer alignment
Definition at line 44 of file wmavoice.c.
Referenced by postfilter(), and wmavoice_flush().
| #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES) |
maximum number of samples per superframe
Definition at line 49 of file wmavoice.c.
Referenced by synth_superframe().
| #define MAX_SIGNAL_HISTORY 416 |
maximum excitation signal history
Definition at line 48 of file wmavoice.c.
Referenced by synth_superframe(), wmavoice_decode_init(), and wmavoice_flush().
| #define SFRAME_CACHE_MAXSIZE 256 |
maximum cache size for frame data that
was split over two packets
Definition at line 51 of file wmavoice.c.
Referenced by wmavoice_decode_packet().
| #define VLC_NBITS 6 |
number of bits to read per VLC iteration
Definition at line 53 of file wmavoice.c.
Referenced by decode_vbmtree().
| anonymous enum |
Adaptive codebook types.
| ACB_TYPE_NONE | no adaptive codebook (only hardcoded fixed) |
| ACB_TYPE_ASYMMETRIC |
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Signal is generated using an asymmetric sinc window function
|
| ACB_TYPE_HAMMING |
Per-block pitch with signal generation using a Hamming sinc window function.
|
Definition at line 63 of file wmavoice.c.
| anonymous enum |
Fixed codebook types.
Definition at line 78 of file wmavoice.c.
| static int check_bits_for_superframe | ( | GetBitContext * | orig_gb, | |
| WMAVoiceContext * | s | |||
| ) | [static] |
Test if there's enough bits to read 1 superframe.
| orig_gb | bit I/O context used for reading. This function does not modify the state of the bitreader; it only uses it to copy the current stream position | |
| s | WMA Voice decoding context private data |
Definition at line 1626 of file wmavoice.c.
Referenced by synth_superframe().
| static void copy_bits | ( | PutBitContext * | pb, | |
| const uint8_t * | data, | |||
| int | size, | |||
| GetBitContext * | gb, | |||
| int | nbits | |||
| ) | [static] |
Copy (unaligned) bits from gb/data/size to pb.
| pb | target buffer to copy bits into | |
| data | source buffer to copy bits from | |
| size | size of the source data, in bytes | |
| gb | bit I/O context specifying the current position in the source. data. This function might use this to align the bit position to a whole-byte boundary before calling ff_copy_bits() on aligned source data | |
| nbits | the amount of bits to copy from source to target |
Definition at line 1868 of file wmavoice.c.
| static av_cold int decode_vbmtree | ( | GetBitContext * | gb, | |
| int8_t | vbm_tree[25] | |||
| ) | [static] |
Sets up the variable bit mode (VBM) tree from container extradata.
| gb | bit I/O context. The bit context (s->gb) should be loaded with byte 23-46 of the container extradata (i.e. the ones containing the VBM tree). | |
| vbm_tree | pointer to array to which the decoded VBM tree will be written. |
Definition at line 298 of file wmavoice.c.
Referenced by wmavoice_decode_init().
| static void dequant_lsps | ( | double * | lsps, | |
| int | num, | |||
| const uint16_t * | values, | |||
| const uint16_t * | sizes, | |||
| int | n_stages, | |||
| const uint8_t * | table, | |||
| const double * | mul_q, | |||
| const double * | base_q | |||
| ) | [static] |
Dequantize LSPs.
| lsps | output pointer to the array that will hold the LSPs | |
| num | number of LSPs to be dequantized | |
| values | quantized values, contains n_stages values | |
| sizes | range (i.e. max value) of each quantized value | |
| n_stages | number of dequantization runs | |
| table | dequantization table to be used | |
| mul_q | LSF multiplier | |
| base_q | base (lowest) LSF values |
Definition at line 804 of file wmavoice.c.
Referenced by dequant_lsp10i(), dequant_lsp10r(), dequant_lsp16i(), and dequant_lsp16r().
| static int parse_packet_header | ( | WMAVoiceContext * | s | ) | [static] |
Parse the packet header at the start of each packet (input data to this decoder).
| s | WMA Voice decoding context private data |
Definition at line 1833 of file wmavoice.c.
Referenced by gxf_header(), gxf_packet(), gxf_resync_media(), and wmavoice_decode_packet().
| static int pRNG | ( | int | frame_cntr, | |
| int | block_num, | |||
| int | block_size | |||
| ) | [static] |
Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).
| frame_cntr | current frame number | |
| block_num | current block index | |
| block_size | amount of entries we want to read from a table that has 1000 entries |
Definition at line 1180 of file wmavoice.c.
Referenced by synth_block_hardcoded().
| static void stabilize_lsps | ( | double * | lsps, | |
| int | num | |||
| ) | [static] |
Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.
| lsps | array of LSPs | |
| num | size of LSP array |
Definition at line 1588 of file wmavoice.c.
Referenced by synth_superframe().
| static void synth_block | ( | WMAVoiceContext * | s, | |
| GetBitContext * | gb, | |||
| int | block_idx, | |||
| int | size, | |||
| int | block_pitch_sh2, | |||
| const double * | lsps, | |||
| const double * | prev_lsps, | |||
| const struct frame_type_desc * | frame_desc, | |||
| float * | excitation, | |||
| float * | synth | |||
| ) | [static] |
Parse data in a single block.
| s | WMA Voice decoding context private data | |
| gb | bit I/O context | |
| block_idx | index of the to-be-read block | |
| size | amount of samples to be read in this block | |
| block_pitch_sh2 | pitch for this block << 2 | |
| lsps | LSPs for (the end of) this frame | |
| prev_lsps | LSPs for the last frame | |
| frame_desc | frame type descriptor | |
| excitation | target memory for the ACB+FCB interpolated signal | |
| synth | target memory for the speech synthesis filter output |
Definition at line 1370 of file wmavoice.c.
Referenced by synth_frame().
| static void synth_block_fcb_acb | ( | WMAVoiceContext * | s, | |
| GetBitContext * | gb, | |||
| int | block_idx, | |||
| int | size, | |||
| int | block_pitch_sh2, | |||
| const struct frame_type_desc * | frame_desc, | |||
| float * | excitation | |||
| ) | [static] |
Parse FCB/ACB signal for a single block.
Definition at line 1247 of file wmavoice.c.
Referenced by synth_block().
| static void synth_block_hardcoded | ( | WMAVoiceContext * | s, | |
| GetBitContext * | gb, | |||
| int | block_idx, | |||
| int | size, | |||
| const struct frame_type_desc * | frame_desc, | |||
| float * | excitation | |||
| ) | [static] |
Parse hardcoded signal for a single block.
Definition at line 1216 of file wmavoice.c.
Referenced by synth_block().
| static int synth_frame | ( | AVCodecContext * | ctx, | |
| GetBitContext * | gb, | |||
| int | frame_idx, | |||
| float * | samples, | |||
| const double * | lsps, | |||
| const double * | prev_lsps, | |||
| float * | excitation, | |||
| float * | synth | |||
| ) | [static] |
Synthesize output samples for a single frame.
| ctx | WMA Voice decoder context | |
| gb | bit I/O context (s->gb or one for cross-packet superframes) | |
| frame_idx | Frame number within superframe [0-2] | |
| samples | pointer to output sample buffer, has space for at least 160 samples | |
| lsps | LSP array | |
| prev_lsps | array of previous frame's LSPs | |
| excitation | target buffer for excitation signal | |
| synth | target buffer for synthesized speech data |
Definition at line 1413 of file wmavoice.c.
Referenced by synth_superframe().
| static int synth_superframe | ( | AVCodecContext * | ctx, | |
| float * | samples, | |||
| int * | data_size | |||
| ) | [static] |
Synthesize output samples for a single superframe.
If we have any data cached in s->sframe_cache, that will be used instead of whatever is loaded in s->gb.
WMA Voice superframes contain 3 frames, each containing 160 audio samples, to give a total of 480 samples per frame. See synth_frame() for frame parsing. In addition to 3 frames, superframes can also contain the LSPs (if these are globally specified for all frames (residually); they can also be specified individually per-frame. See the s->has_residual_lsps option), and can specify the number of samples encoded in this superframe (if less than 480), usually used to prevent blanks at track boundaries.
| ctx | WMA Voice decoder context | |
| samples | pointer to output buffer for voice samples | |
| data_size | pointer containing the size of samples on input, and the amount of samples filled on output |
Definition at line 1714 of file wmavoice.c.
Referenced by wmavoice_decode_packet().
| static av_cold int wmavoice_decode_end | ( | AVCodecContext * | ctx | ) | [static] |
Definition at line 1976 of file wmavoice.c.
| static av_cold int wmavoice_decode_init | ( | AVCodecContext * | ctx | ) | [static] |
Set up decoder with parameters from demuxer (extradata etc.
).
Extradata layout:
Definition at line 333 of file wmavoice.c.
| static int wmavoice_decode_packet | ( | AVCodecContext * | ctx, | |
| void * | data, | |||
| int * | data_size, | |||
| AVPacket * | avpkt | |||
| ) | [static] |
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).
Every packet has a size of ctx->block_align bytes, starts with a packet header (see parse_packet_header()), and then a series of superframes. Superframe boundaries may exceed packets, i.e. superframes can split data over multiple (two) packets.
For more information about frames, see synth_superframe().
Definition at line 1895 of file wmavoice.c.
| static av_cold void wmavoice_flush | ( | AVCodecContext * | ctx | ) | [static] |
Definition at line 1990 of file wmavoice.c.
struct frame_type_desc frame_descs[17] [static] |
VLC frame_type_vlc [static] |
Initial value:
{
"wmavoice",
AVMEDIA_TYPE_AUDIO,
CODEC_ID_WMAVOICE,
sizeof(WMAVoiceContext),
wmavoice_decode_init,
NULL,
wmavoice_decode_end,
wmavoice_decode_packet,
CODEC_CAP_SUBFRAMES,
.flush = wmavoice_flush,
.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
}
Definition at line 2018 of file wmavoice.c.
1.5.8