49 #define MAX_LSPS_ALIGN16 16
52 #define MAX_FRAMESIZE 160
53 #define MAX_SIGNAL_HISTORY 416
54 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
56 #define SFRAME_CACHE_MAXSIZE 256 176 uint16_t block_conv_table[4];
239 int aw_first_pulse_off[2];
250 float gain_pred_err[6];
269 float sin[511], cos[511];
302 int cntr[8] = { 0 }, n, res;
304 memset(vbm_tree, 0xff,
sizeof(vbm_tree[0]) * 25);
305 for (n = 0; n < 17; n++) {
309 vbm_tree[res * 3 + cntr[res]++] = n;
319 10, 10, 10, 12, 12, 12,
322 static const uint16_t codes[] = {
323 0x0000, 0x0001, 0x0002,
324 0x000c, 0x000d, 0x000e,
325 0x003c, 0x003d, 0x003e,
326 0x00fc, 0x00fd, 0x00fe,
327 0x03fc, 0x03fd, 0x03fe,
328 0x0ffc, 0x0ffd, 0x0ffe,
329 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
333 bits, 1, 1, codes, 2, 2, 132);
344 for (n = 0; n < s->
lsps; n++)
370 int n,
flags, pitch_range, lsp16_flag;
385 "Invalid extradata size %d (should be 46)\n",
404 memcpy(&s->
sin[255], s->
cos, 256 *
sizeof(s->
cos[0]));
405 for (n = 0; n < 255; n++) {
406 s->
sin[n] = -s->
sin[510 - n];
407 s->
cos[510 - n] = s->
cos[n];
413 "Invalid denoise filter strength %d (max=11)\n",
421 lsp16_flag = flags & 0x1000;
427 for (n = 0; n < s->
lsps; n++)
442 if (pitch_range <= 0) {
452 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
456 "Unsupported samplerate %d (min=%d, max=%d)\n",
506 const float *speech_synth,
510 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
511 float mem = *gain_mem;
513 for (i = 0; i <
size; i++) {
514 speech_energy +=
fabsf(speech_synth[i]);
515 postfilter_energy +=
fabsf(in[i]);
517 gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :
518 (1.0 -
alpha) * speech_energy / postfilter_energy;
520 for (i = 0; i <
size; i++) {
521 mem = alpha * mem + gain_scale_factor;
522 out[
i] = in[
i] * mem;
550 float optimal_gain = 0, dot;
553 *best_hist_ptr =
NULL;
558 if (dot > optimal_gain) {
562 }
while (--ptr >= end);
564 if (optimal_gain <= 0)
570 if (optimal_gain <= dot) {
571 dot = dot / (dot + 0.6 * optimal_gain);
576 for (n = 0; n <
size; n++)
577 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
606 int fcb_type,
float *coeffs,
int remainder)
609 float irange, angle_mul, gain_mul, range, sq;
614 #define log_range(var, assign) do { \ 615 float tmp = log10f(assign); var = tmp; \ 616 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \ 618 log_range(last_coeff, lpcs[1] * lpcs[1]);
619 for (n = 1; n < 64; n++)
620 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
621 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
632 irange = 64.0 / range;
636 for (n = 0; n <= 64; n++) {
639 idx =
lrint((
max - lpcs[n]) * irange - 1);
642 lpcs[n] = angle_mul * pwr;
645 idx = av_clipf((pwr * gain_mul - 0.0295) * 70.570526123, 0, INT_MAX / 2);
649 powf(1.0331663, idx - 127);
662 idx = 255 + av_clip(lpcs[64], -255, 255);
663 coeffs[0] = coeffs[0] * s->
cos[idx];
664 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
665 last_coeff = coeffs[64] * s->
cos[idx];
667 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
668 coeffs[n * 2 + 1] = coeffs[n] * s->
sin[idx];
669 coeffs[n * 2] = coeffs[n] * s->
cos[idx];
673 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
674 coeffs[n * 2 + 1] = coeffs[n] * s->
sin[idx];
675 coeffs[n * 2] = coeffs[n] * s->
cos[idx];
683 memset(&coeffs[remainder], 0,
sizeof(coeffs[0]) * (128 - remainder));
687 coeffs[remainder - 1] = 0;
694 for (n = 0; n < remainder; n++)
725 float *synth_pf,
int size,
728 int remainder, lim, n;
734 tilted_lpcs[0] = 1.0;
735 memcpy(&tilted_lpcs[1], lpcs,
sizeof(lpcs[0]) * s->
lsps);
736 memset(&tilted_lpcs[s->
lsps + 1], 0,
737 sizeof(tilted_lpcs[0]) * (128 - s->
lsps - 1));
739 tilted_lpcs, s->
lsps + 2);
745 remainder =
FFMIN(127 - size, size - 1);
750 memset(&synth_pf[size], 0,
sizeof(synth_pf[0]) * (128 - size));
753 synth_pf[0] *= coeffs[0];
754 synth_pf[1] *= coeffs[1];
755 for (n = 1; n < 64; n++) {
756 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
757 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
758 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
766 for (n = 0; n < lim; n++)
776 for (n = 0; n < lim; n++)
778 if (lim < remainder) {
808 const float *lpcs,
float *zero_exc_pf,
813 *synth_filter_in = zero_exc_pf;
822 synth_filter_in = synth_filter_in_buf;
826 synth_filter_in, size, s->
lsps);
827 memcpy(&synth_pf[-s->
lsps], &synth_pf[size - s->
lsps],
828 sizeof(synth_pf[0]) * s->
lsps);
840 (
const float[2]) { -1.99997, 1.0 },
841 (
const float[2]) { -1.9330735188, 0.93589198496 },
862 const uint16_t *
sizes,
865 const double *base_q)
869 memset(lsps, 0, num *
sizeof(*lsps));
870 for (n = 0; n < n_stages; n++) {
871 const uint8_t *t_off = &table[values[n] * num];
872 double base = base_q[n],
mul = mul_q[n];
874 for (m = 0; m < num; m++)
875 lsps[m] += base + mul * t_off[m];
877 table += sizes[n] * num;
893 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
894 static const double mul_lsf[4] = {
895 5.2187144800e-3, 1.4626986422e-3,
896 9.6179549166e-4, 1.1325736225e-3
898 static const double base_lsf[4] = {
899 M_PI * -2.15522e-1,
M_PI * -6.1646e-2,
900 M_PI * -3.3486e-2,
M_PI * -5.7408e-2
918 double *i_lsps,
const double *old,
919 double *
a1,
double *
a2,
int q_mode)
921 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
922 static const double mul_lsf[3] = {
923 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
925 static const double base_lsf[3] = {
926 M_PI * -1.07448e-1,
M_PI * -5.2706e-2,
M_PI * -5.1634e-2
928 const float (*ipol_tab)[2][10] = q_mode ?
940 for (n = 0; n < 10; n++) {
941 double delta = old[n] - i_lsps[n];
942 a1[n] = ipol_tab[
interpol][0][n] * delta + i_lsps[n];
943 a1[10 + n] = ipol_tab[
interpol][1][n] * delta + i_lsps[n];
955 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
956 static const double mul_lsf[5] = {
957 3.3439586280e-3, 6.9908173703e-4,
958 3.3216608306e-3, 1.0334960326e-3,
961 static const double base_lsf[5] = {
962 M_PI * -1.27576e-1,
M_PI * -2.4292e-2,
963 M_PI * -1.28094e-1,
M_PI * -3.2128e-2,
987 double *i_lsps,
const double *old,
988 double *
a1,
double *
a2,
int q_mode)
990 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
991 static const double mul_lsf[3] = {
992 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
994 static const double base_lsf[3] = {
997 const float (*ipol_tab)[2][16] = q_mode ?
1009 for (n = 0; n < 16; n++) {
1010 double delta = old[n] - i_lsps[n];
1011 a1[n] = ipol_tab[
interpol][0][n] * delta + i_lsps[n];
1012 a1[16 + n] = ipol_tab[
interpol][1][n] * delta + i_lsps[n];
1039 static const int16_t start_offset[94] = {
1040 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
1041 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
1042 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
1043 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
1044 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
1045 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
1046 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
1047 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
1053 if ((bits =
get_bits(gb, 6)) >= 54) {
1055 bits += (bits - 54) * 3 +
get_bits(gb, 2);
1061 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
1074 if (start_offset[bits] < 0)
1091 uint16_t use_mask_mem[9];
1092 uint16_t *use_mask = use_mask_mem + 2;
1101 pulse_start, n, idx, range, aidx, start_off = 0;
1110 if (block_idx == 0) {
1119 pulse_start = s->
aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
1124 memset(&use_mask[-2], 0, 2 *
sizeof(use_mask[0]));
1125 memset( use_mask, -1, 5 *
sizeof(use_mask[0]));
1126 memset(&use_mask[5], 0, 2 *
sizeof(use_mask[0]));
1130 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
1131 int first_sh = 16 - (idx & 15);
1132 *use_mask_ptr++ &= 0xFFFF
u << first_sh;
1133 excl_range -= first_sh;
1134 if (excl_range >= 16) {
1135 *use_mask_ptr++ = 0;
1136 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
1138 *use_mask_ptr &= 0xFFFF >> excl_range;
1143 for (n = 0; n <= aidx; pulse_start++) {
1144 for (idx = pulse_start; idx < 0; idx += fcb->
pitch_lag) ;
1146 if (use_mask[0]) idx = 0x0F;
1147 else if (use_mask[1]) idx = 0x1F;
1148 else if (use_mask[2]) idx = 0x2F;
1149 else if (use_mask[3]) idx = 0x3F;
1150 else if (use_mask[4]) idx = 0x4F;
1154 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
1155 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
1161 fcb->
x[fcb->
n] = start_off;
1185 int n, v_mask, i_mask, sh, n_pulses;
1199 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
1200 fcb->
y[fcb->
n] = (val & v_mask) ? -1.0 : 1.0;
1201 fcb->
x[fcb->
n] = (val & i_mask) * n_pulses + n +
1203 while (fcb->
x[fcb->
n] < 0)
1209 int num2 = (val & 0x1FF) >> 1,
delta, idx;
1211 if (num2 < 1 * 79) {
delta = 1; idx = num2 + 1; }
1212 else if (num2 < 2 * 78) {
delta = 3; idx = num2 + 1 - 1 * 77; }
1213 else if (num2 < 3 * 77) {
delta = 5; idx = num2 + 1 - 2 * 76; }
1214 else {
delta = 7; idx = num2 + 1 - 3 * 75; }
1215 v = (val & 0x200) ? -1.0 : 1.0;
1220 fcb->
x[fcb->
n + 1] = idx;
1221 fcb->
y[fcb->
n + 1] = (val & 1) ? -v : v;
1239 static int pRNG(
int frame_cntr,
int block_num,
int block_size)
1251 static const unsigned int div_tbl[9][2] = {
1252 { 8332, 3 * 715827883
U },
1253 { 4545, 0 * 390451573
U },
1254 { 3124, 11 * 268435456
U },
1255 { 2380, 15 * 204522253
U },
1256 { 1922, 23 * 165191050
U },
1257 { 1612, 23 * 138547333
U },
1258 { 1388, 27 * 119304648
U },
1259 { 1219, 16 * 104755300
U },
1260 { 1086, 39 * 93368855
U }
1262 unsigned int z, y, x =
MUL16(block_num, 1877) + frame_cntr;
1263 if (x >= 0xFFFF) x -= 0xFFFF;
1265 y = x - 9 *
MULH(477218589, x);
1266 z = (uint16_t) (x * div_tbl[y][0] +
UMULH(x, div_tbl[y][1]));
1268 return z % (1000 - block_size);
1276 int block_idx,
int size,
1298 for (n = 0; n <
size; n++)
1307 int block_idx,
int size,
1308 int block_pitch_sh2,
1312 static const float gain_coeff[6] = {
1313 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
1316 int n, idx, gain_weight;
1320 memset(pulses, 0,
sizeof(*pulses) * size);
1337 for (n = 0; n <
size; n++)
1349 for (n = 0; n < 5; n++) {
1355 fcb.
x[fcb.
n] = n + 5 * pos1;
1356 fcb.
y[fcb.
n++] = sign;
1359 fcb.
x[fcb.
n] = n + 5 * pos2;
1360 fcb.
y[fcb.
n++] = (pos1 < pos2) ? -sign : sign;
1380 for (n = 0; n < gain_weight; n++)
1386 for (n = 0; n <
size; n +=
len) {
1388 int abs_idx = block_idx * size + n;
1391 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
1392 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
1393 idx = idx_sh16 >> 16;
1396 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
1398 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
1409 int block_pitch = block_pitch_sh2 >> 2;
1410 idx = block_pitch_sh2 & 3;
1417 sizeof(
float) * size);
1422 acb_gain, fcb_gain, size);
1441 int block_idx,
int size,
1442 int block_pitch_sh2,
1443 const double *lsps,
const double *prev_lsps,
1445 float *excitation,
float *synth)
1456 frame_desc, excitation);
1459 fac = (block_idx + 0.5) / frame_desc->
n_blocks;
1460 for (n = 0; n < s->
lsps; n++)
1461 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
1484 const double *lsps,
const double *prev_lsps,
1485 float *excitation,
float *synth)
1488 int n, n_blocks_x2, log_n_blocks_x2,
av_uninit(cur_pitch_val);
1496 "Invalid frame type VLC code, skipping\n");
1519 int fac = n * 2 + 1;
1521 pitch[n] = (
MUL16(fac, cur_pitch_val) +
1563 last_block_pitch = av_clip(block_pitch,
1569 if (block_pitch < t1) {
1573 if (block_pitch <
t2) {
1578 if (block_pitch <
t3) {
1585 pitch[n] = bl_pitch_sh2 >> 2;
1590 bl_pitch_sh2 = pitch[n] << 2;
1599 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
1601 &excitation[n * block_nsamples],
1602 &synth[n * block_nsamples]);
1611 for (n = 0; n < s->
lsps; n++)
1612 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1618 for (n = 0; n < s->
lsps; n++)
1619 i_lsps[n] = cos(lsps[n]);
1621 postfilter(s, &synth[80], &samples[80], 80, lpcs,
1625 memcpy(samples, synth, 160 *
sizeof(synth[0]));
1665 lsps[0] =
FFMAX(lsps[0], 0.0015 *
M_PI);
1666 for (n = 1; n < num; n++)
1667 lsps[n] =
FFMAX(lsps[n], lsps[n - 1] + 0.0125 *
M_PI);
1668 lsps[num - 1] =
FFMIN(lsps[num - 1], 0.9985 *
M_PI);
1672 for (n = 1; n < num; n++) {
1673 if (lsps[n] < lsps[n - 1]) {
1674 for (m = 1; m < num; m++) {
1675 double tmp = lsps[m];
1676 for (l = m - 1; l >= 0; l--) {
1677 if (lsps[l] <= tmp)
break;
1678 lsps[l + 1] = lsps[l];
1718 s->
lsps *
sizeof(*synth));
1741 "Superframe encodes > %d samples (%d), not allowed\n",
1751 for (n = 0; n < s->
lsps; n++)
1752 prev_lsps[n] = s->
prev_lsps[n] - mean_lsf[n];
1759 for (n = 0; n < s->
lsps; n++) {
1760 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
1761 lsps[1][n] = mean_lsf[n] + (a1[s->
lsps + n] - a2[n * 2 + 1]);
1762 lsps[2][n] += mean_lsf[n];
1764 for (n = 0; n < 3; n++)
1777 samples = (
float *)frame->
data[0];
1780 for (n = 0; n < 3; n++) {
1784 if (s->
lsps == 10) {
1789 for (m = 0; m < s->
lsps; m++)
1790 lsps[n][m] += mean_lsf[m];
1796 lsps[n], n == 0 ? s->
prev_lsps : lsps[n - 1],
1798 &synth[s->
lsps + n * MAX_FRAMESIZE]))) {
1823 s->
lsps *
sizeof(*synth));
1843 unsigned int res, n_superframes = 0;
1853 n_superframes += res;
1854 }
while (res == 0x3F);
1879 int rmn_bytes, rmn_bits;
1882 if (rmn_bits < nbits)
1886 rmn_bits &= 7; rmn_bytes >>= 3;
1887 if ((rmn_bits =
FFMIN(rmn_bits, nbits)) > 0)
1890 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
1905 int *got_frame_ptr,
AVPacket *avpkt)
1968 }
else if (*got_frame_ptr) {
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Description of frame types.
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply first set of pitch-adaptive window pulses.
av_cold void ff_rdft_end(RDFTContext *s)
static const uint8_t wmavoice_dq_lsp16r2[0x500]
int do_apf
whether to apply the averaged projection filter (APF)
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
static int pRNG(int frame_cntr, int block_num, int block_size)
Generate a random number from frame_cntr and block_idx, which will live in the range [0...
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
Set up the variable bit mode (VBM) tree from container extradata.
void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP synthesis filter.
float gain_pred_err[6]
cache for gain prediction
This structure describes decoded (raw) audio or video data.
int aw_next_pulse_off_cache
the position (relative to start of the second block) at which pulses should start to be positioned...
int nb_superframes
number of superframes in current packet
ptrdiff_t const GLvoid * data
static void flush(AVCodecContext *avctx)
float postfilter_agc
gain control memory, used in adaptive_gain_control()
void ff_acelp_apply_order_2_transfer_function(float *out, const float *in, const float zero_coeffs[2], const float pole_coeffs[2], float gain, float mem[2], int n)
Apply an order 2 rational transfer function in-place.
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
comfort noise during silence generated from a hardcoded (fixed) codebook with per-frame (low) gain va...
static void postfilter(WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch)
Averaging projection filter, the postfilter used in WMAVoice.
void ff_weighted_vector_sumf(float *out, const float *in_a, const float *in_b, float weight_coeff_a, float weight_coeff_b, int length)
float implementation of weighted sum of two vectors.
static void skip_bits_long(GetBitContext *s, int n)
Skips the specified number of bits.
static av_cold int init(AVCodecContext *avctx)
#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size)
#define avpriv_request_sample(...)
float synth_filter_out_buf[0x80+MAX_LSPS_ALIGN16]
aligned buffer for postfilter speech synthesis
no adaptive codebook (only hardcoded fixed)
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, const int *pitch)
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between th...
static const int8_t pulses[4]
Number of non-zero pulses in the MP-MLQ excitation.
int aw_n_pulses[2]
number of AW-pulses in each block; note that this number can be negative (in which case it basically ...
static av_cold void wmavoice_init_static_data(void)
static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)
static void stabilize_lsps(double *lsps, int num)
Ensure minimum value for first item, maximum value for last value, proper spacing between each value ...
static const float wmavoice_gain_codebook_fcb[128]
static const uint8_t wmavoice_dq_lsp16i1[0x640]
static const uint8_t wmavoice_dq_lsp16r1[0x500]
int spillover_nbits
number of bits of the previous packet's last superframe preceding this packet's first full superframe...
void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
Add fixed vector to an array from a sparse representation.
int block_pitch_nbits
number of bits used to specify the first block's pitch value
static const uint8_t wmavoice_dq_lsp16i3[0x300]
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth)
Synthesize output samples for a single frame.
static void calc_input_response(WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder)
Derive denoise filter coefficients (in real domain) from the LPCs.
static void dequant_lsp10i(GetBitContext *gb, double *lsps)
Parse 10 independently-coded LSPs.
int av_log2_16bit(unsigned v)
#define MAX_LSPS_ALIGN16
same as MAX_LSPS; needs to be multiple
int block_align
number of bytes per packet if constant and known or 0 Used by some WAV based audio codecs...
static void decode(AVCodecContext *dec_ctx, AVPacket *pkt, AVFrame *frame, FILE *outfile)
static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb)
Apply second set of pitch-adaptive window pulses.
static const float wmavoice_ipol1_coeffs[17 *9]
static const uint8_t wmavoice_dq_lsp16i2[0x3c0]
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
#define av_assert0(cond)
assert() equivalent, that is always enabled.
int spillover_bitsize
number of bits used to specify spillover_nbits in the packet header = ceil(log2(ctx->block_align << 3...
int block_delta_pitch_nbits
number of bits used to specify the delta pitch between this and the last block's pitch value...
enum AVSampleFormat sample_fmt
audio sample format
Sparse representation for the algebraic codebook (fixed) vector.
static const uint8_t wmavoice_dq_lsp16r3[0x600]
static const float wmavoice_gain_codebook_acb[128]
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
uint8_t log_n_blocks
log2(n_blocks)
int aw_first_pulse_off[2]
index of first sample to which to apply AW-pulses, or -0xff if unset
static av_cold int end(AVCodecContext *avctx)
int has_residual_lsps
if set, superframes contain one set of LSPs that cover all frames, encoded as independent and residua...
float tilted_lpcs_pf[0x80]
aligned buffer for LPC tilting
void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
Copy the content of src to the bitstream.
uint8_t * extradata
some codecs need / can use extradata like Huffman tables.
static float tilt_factor(const float *lpcs, int n_lpcs)
Get the tilt factor of a formant filter from its transfer function.
#define u(width, name, range_min, range_max)
static const uint8_t wmavoice_dq_lsp10r[0x1400]
static void dequant_lsps(double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q)
Dequantize LSPs.
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
static const float wmavoice_ipol2_coeffs[32]
Hamming-window sinc function (num = 32, x = [ 0, 31 ]): (0.54 + 0.46 * cos(2 * M_PI * x / (num - 1)))...
static int get_bits_count(const GetBitContext *s)
float dcf_mem[2]
DC filter history.
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
Overlapping memcpy() implementation.
bitstream reader API header.
static av_cold void wmavoice_flush(AVCodecContext *ctx)
float synth_history[MAX_LSPS]
see excitation_history
double prev_lsps[MAX_LSPS]
LSPs of the last frame of the previous superframe.
static void copy_bits(PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits)
Copy (unaligned) bits from gb/data/size to pb.
static __device__ float fabsf(float a)
static const uint16_t table[]
static int get_bits_left(GetBitContext *gb)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
static const double wmavoice_mean_lsf16[2][16]
int sframe_cache_size
set to >0 if we have data from an (incomplete) superframe from a previous packet that spilled over in...
static const float wmavoice_lsp10_intercoeff_b[32][2][10]
int block_pitch_range
range of the block pitch
static const float wmavoice_std_codebook[1000]
static const int sizes[][2]
int last_acb_type
frame type [0-2] of the previous frame
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
static const float wmavoice_gain_silence[256]
int denoise_filter_cache_size
samples in denoise_filter_cache
int history_nsamples
number of samples in history for signal prediction (through ACB)
static const uint8_t wmavoice_dq_lsp10i[0xf00]
static const float wmavoice_lsp10_intercoeff_a[32][2][10]
static const float wmavoice_energy_table[128]
LUT for 1.071575641632 * pow(1.0331663, n - 127)
Windows Media Voice (WMAVoice) tables.
const char * name
Name of the codec implementation.
int denoise_tilt_corr
Whether to apply tilt correction to the Wiener filter coefficients (postfilter)
int aw_idx_is_ext
whether the AW index was encoded in 8 bits (instead of 6)
uint16_t block_conv_table[4]
boundaries for block pitch unit/scale conversion
DCTContext dst
contexts for phase shift (in Hilbert transform, part of postfilter)
int lsp_def_mode
defines different sets of LSP defaults [0, 1]
static float mul(float src0, float src1)
uint64_t channel_layout
Audio channel layout.
static int put_bits_count(PutBitContext *s)
int skip_bits_next
number of bits to skip at the next call to wmavoice_decode_packet() (since they're part of the previo...
static void dequant_lsp16r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
int min_pitch_val
base value for pitch parsing code
WMA Voice decoding context.
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs)
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it...
int denoise_strength
strength of denoising in Wiener filter [0-11]
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE+AV_INPUT_BUFFER_PADDING_SIZE]
cache for superframe data split over multiple packets
audio channel layout utility functions
#define log_range(var, assign)
#define MAX_LSPS
maximum filter order
static VLC frame_type_vlc
Frame type VLC coding.
int pitch_nbits
number of bits used to specify the pitch value in the frame header
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
#define MAX_BLOCKS
maximum number of blocks per frame
float denoise_coeffs_pf[0x80]
aligned buffer for denoise coefficients
void(* dct_calc)(struct DCTContext *s, FFTSample *data)
static void dequant_lsp10r(GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode)
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames f...
static av_always_inline unsigned UMULH(unsigned a, unsigned b)
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE(*table)[2], int bits, int max_depth)
Parse a vlc code.
void(* rdft_calc)(struct RDFTContext *s, FFTSample *z)
static int kalman_smoothen(WMAVoiceContext *s, int pitch, const float *in, float *out, int size)
Kalman smoothing function.
void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
Apply tilt compensation filter, 1 - tilt * z-1.
static const float wmavoice_gain_universal[64]
void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
Reconstruct LPC coefficients from the line spectral pair frequencies.
static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
Set up decoder with parameters from demuxer (extradata etc.).
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
static const uint8_t last_coeff[3]
static const struct frame_type_desc frame_descs[17]
float denoise_filter_cache[MAX_FRAMESIZE]
Libavcodec external API header.
int sample_rate
samples per second
void AAC_RENAME() ff_sine_window_init(INTFLOAT *window, int n)
Generate a sine window.
static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, int *got_frame_ptr, AVPacket *avpkt)
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer...
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
static const int16_t alpha[]
main external API structure.
static int parse_packet_header(WMAVoiceContext *s)
Parse the packet header at the start of each packet (input data to this decoder). ...
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
AVCodec ff_wmavoice_decoder
int8_t vbm_tree[25]
converts VLC codes to frame type
static unsigned int get_bits1(GetBitContext *s)
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth)
Parse data in a single block.
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31))))#define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac){}void ff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map){AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);return NULL;}return ac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;}int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){int use_generic=1;int len=in->nb_samples;int p;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
static void skip_bits(GetBitContext *s, int n)
av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
Set up DCT.
#define AV_CODEC_CAP_SUBFRAMES
Codec can output multiple frames per AVPacket Normally demuxers return one frame at a time...
int pitch_diff_sh16
((cur_pitch_val - last_pitch_val) << 16) / MAX_FRAMESIZE
static int init_get_bits(GetBitContext *s, const uint8_t *buffer, int bit_size)
Initialize GetBitContext.
#define MAX_SFRAMESIZE
maximum number of samples per superframe
int lsp_q_mode
defines quantizer defaults [0, 1]
int frame_cntr
current frame index [0 - 0xFFFE]; is only used for comfort noise in pRNG()
void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, const float *in, int buffer_length, int filter_length)
LP zero synthesis filter.
float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors.
static void adaptive_gain_control(float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem)
Adaptive gain control (as used in postfilter).
static const float mean_lsf[10]
#define SFRAME_CACHE_MAXSIZE
maximum cache size for frame data that
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
uint8_t fcb_type
Fixed codebook type (FCB_TYPE_*)
#define flags(name, subs,...)
static void dequant_lsp16i(GetBitContext *gb, double *lsps)
Parse 16 independently-coded LSPs.
RDFTContext irdft
contexts for FFT-calculation in the postfilter (for denoise filter)
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, int *got_frame_ptr)
Synthesize output samples for a single superframe.
Per-block pitch with signal generation using a Hamming sinc window function.
Pitch-adaptive window (AW) pulse signals, used in particular for low-bitrate streams.
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation)
Parse hardcoded signal for a single block.
uint8_t n_blocks
amount of blocks per frame (each block (contains 160/n_blocks samples)
Innovation (fixed) codebook pulse sets in combinations of either single pulses or pulse pairs...
common internal api header.
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
hardcoded (fixed) codebook with per-block gain values
float excitation_history[MAX_SIGNAL_HISTORY]
cache of the signal of previous superframes, used as a history for signal generation ...
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
int last_pitch_val
pitch value of the previous frame
#define AV_INPUT_BUFFER_PADDING_SIZE
Required number of additionally allocated bytes at the end of the input bitstream for decoding...
#define MAX_FRAMESIZE
maximum number of samples per frame
float silence_gain
set for use in blocks if ACB_TYPE_NONE
static const double wmavoice_mean_lsf10[2][10]
static void dct(AudioRNNContext *s, float *out, const float *in)
int channels
number of audio channels
static int ff_thread_once(char *control, void(*routine)(void))
VLC_TYPE(* table)[2]
code, bits
av_cold void ff_dct_end(DCTContext *s)
void ff_acelp_interpolatef(float *out, const float *in, const float *filter_coeffs, int precision, int frac_pos, int filter_length, int length)
Floating point version of ff_acelp_interpolate()
int block_delta_pitch_hrange
1/2 range of the delta (full range is from -this to +this-1)
int max_pitch_val
max value + 1 for pitch parsing
int lsps
number of LSPs per frame [10 or 16]
#define MAX_FRAMES
maximum number of frames per superframe
Filter the word “frame” indicates either a video frame or a group of audio samples
static const float wmavoice_lsp16_intercoeff_b[32][2][16]
PutBitContext pb
bitstream writer for sframe_cache
uint8_t acb_type
Adaptive codebook type (ACB_TYPE_*)
static const float wmavoice_denoise_power_table[12][64]
LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
int dc_level
Predicted amount of DC noise, based on which a DC removal filter is used.
#define VLC_NBITS
number of bits to read per VLC iteration
static const float wmavoice_lsp16_intercoeff_a[32][2][16]
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
float cos[511]
8-bit cosine/sine windows over [-pi,pi] range
#define AV_CH_LAYOUT_MONO
av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
Set up a real FFT.
int aw_pulse_range
the range over which aw_pulse_set1() can apply the pulse, relative to the value in aw_first_pulse_off...
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32
static double val(void *priv, double ch)
This structure stores compressed data.
int nb_samples
number of audio samples (per channel) described by this frame
float zero_exc_pf[MAX_SIGNAL_HISTORY+MAX_SFRAMESIZE]
zero filter output (i.e.
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation)
Parse FCB/ACB signal for a single block.
uint8_t dbl_pulses
how many pulse vectors have pulse pairs (rather than just one single pulse) only if fcb_type == FCB_T...
#define MAX_SIGNAL_HISTORY
maximum excitation signal history
GetBitContext gb
packet bitreader.